## ----setup, include=FALSE------------------------------------------------ # General purpose packages (data handling, pretty plotting, ...) library(tidyverse) library(latex2exp) # Latex in ggplot2 labels cbPalette <- c( "#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") # colour-blind friendly palette theme_set(theme_minimal()) # Set a less verbose standard theme bgCol <- "#FAFAFA" theme_set(theme_minimal()) # Set a less verbose standard theme library(rgl) # 3D plotting knit_hooks$set(rgl = hook_rgl) # Packages for actual computation ## ----mesh-prep, echo=FALSE----------------------------------------------- # For creation see the code of lecture 10 meshes <- readRDS("~/Documents/PhD/stat-learning-for-bigdata/lectures/presentations/lasso_elnet_meshes.Rds") # Create axes x0 <- 1.5 XYZ_axes <- matrix(c( -x0, 0, 0, x0, 0, 0, 0, -x0, 0, 0, x0, 0, 0, 0, -x0, 0, 0, x0 ), ncol = 3, byrow = TRUE) # Create axes labels x0_lab <- 1.7 XYZ_labels <- matrix(c( x0_lab, 0, 0, 0, x0_lab, 0, 0, 0, x0_lab ), ncol = 3, byrow = TRUE) ## ----elnet-group-lasso, fig.width=4.5, fig.height=1.6, fig.align="center", rgl=TRUE, echo=FALSE, dev="png", dpi=300, results=FALSE, warning=FALSE, message=FALSE, cache=TRUE---- bg3d(color = bgCol) mfrow3d(nr = 1, nc = 3) text3d( c(-0.5, 2.1, 1), texts = "Lasso", cex = 1.6) segments3d(XYZ_axes) plotmath3d(XYZ_labels, text = c( TeX("$\\beta_1$"), TeX("$\\beta_2$"), TeX("$\\beta_3$")), fixedSize = FALSE, cex = 0.3) triangles3d( meshes$XYZ_lasso[as.vector(t(meshes$XYZ_lasso_tris)),], col = cbPalette[3]) rgl.viewpoint(theta = 30, phi = 15, zoom = 0.6) next3d() text3d( c(0.1, 2.1, 1), texts = "Elastic net (α = 0.7)", cex = 1.6) segments3d(XYZ_axes) plotmath3d(XYZ_labels, text = c( TeX("$\\beta_1$"), TeX("$\\beta_2$"), TeX("$\\beta_3$")), fixedSize = FALSE, cex = 0.3) triangles3d( meshes$XYZ_elnet[as.vector(t(meshes$XYZ_elnet_tris)),], col = cbPalette[3]) rgl.viewpoint(theta = 30, phi = 15, zoom = 0.6) next3d() text3d( c(0.1, 2.1, 1), texts = "Group lasso ({β₁, β₃}, {β₂})", cex = 1.6) segments3d(XYZ_axes) plotmath3d(XYZ_labels, text = c( TeX("$\\beta_1$"), TeX("$\\beta_2$"), TeX("$\\beta_3$")), fixedSize = FALSE, cex=0.3) triangles3d( meshes$XYZ_group_lasso[as.vector(t(meshes$XYZ_group_lasso_tris)),], col = cbPalette[3]) rgl.viewpoint(theta = 30, phi = 15, zoom = 0.6) ## ----scad-penalty, fig.width=4, fig.height=1.6, fig.align="center", echo=FALSE, results=FALSE, warning=FALSE, message=FALSE, cache=TRUE---- scad_penalty <- function(theta, lambda, a) { if (any(theta < 0)) { stop("Value error: theta >= 0 not fulfilled") } if (lambda < 0) { stop("Value error: lambda >= 0 not fulfilled") } if (a <= 2) { stop("Value error: a > 2 not fulfilled") } ifelse( theta <= lambda, lambda * theta, ifelse( theta <= a * lambda, -(theta ^ 2 - 2 * a * lambda * theta + lambda ^ 2) / (2 * (a - 1)), (a + 1) * lambda ^ 2 / 2)) } beta_seq <- seq(-10, 10, by = 0.1) lambda <- 2 a <- 3.7 data_plot <- tibble( beta = rep.int(beta_seq, 2), pabsbeta = c(lambda * abs(beta_seq), scad_penalty(abs(beta_seq), lambda, a)), type = as.factor(rep(c("Lasso", "SCAD"), each = length(beta_seq)))) p1 <- ggplot() + geom_line( aes(beta, pabsbeta), data = data_plot %>% filter(type == "Lasso"), size = 0.4) + scale_x_continuous(TeX("$\\beta$"), expand = c(0, 0)) + scale_y_continuous(TeX("$\\lambda |\\beta |$")) + ggtitle("Lasso") + theme( plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text = element_text(size = 8)) p2 <- ggplot() + geom_vline( aes(xintercept = c(-a * lambda, -lambda, lambda, a * lambda)), colour = "red", linetype = "dashed", size = 0.4) + geom_line( aes(beta, pabsbeta), data = data_plot %>% filter(type == "SCAD"), size = 0.4) + scale_x_continuous(TeX("$\\beta$"), expand = c(0, 0)) + scale_y_continuous(TeX("$p_{2, 3.5}(|\\beta |)$")) + ggtitle("SCAD") + theme( plot.title = element_text(size = 8), axis.title = element_text(size = 8), axis.text = element_text(size = 8)) ggpubr::ggarrange( p1, p2) ## ----scad-shrinkage, fig.width=4.5, fig.height=1.8, fig.align="center", echo=FALSE, results=FALSE, warning=FALSE, message=FALSE, cache=TRUE---- soft_thresholding <- function(theta, lambda) { dif <- abs(theta) - lambda sign(theta) * dif * (dif > 0) } scad_solution <- function(theta, lambda, a) { if (lambda < 0) { stop("Value error: lambda >= 0 not fulfilled") } if (a <= 2) { stop("Value error: a > 2 not fulfilled") } ifelse( abs(theta) <= 2 * lambda, soft_thresholding(theta, lambda), ifelse( abs(theta) <= a * lambda, ((a - 1) * theta - sign(theta) * a * lambda) / (a - 2), theta)) } bnd <- 10 beta_seq <- seq(-bnd, bnd, by = 0.1) lambda <- 2 a <- 3.7 data_plot <- tibble( beta = rep.int(beta_seq, 4), beta_pen = c( beta_seq, beta_seq / (1 + lambda), soft_thresholding(beta_seq, lambda), scad_solution(beta_seq, lambda, a)), type = as.factor( rep(c("Identity", "Ridge", "Lasso", "SCAD"), each = length(beta_seq)))) p <- ggplot() + geom_segment( aes(x, y, xend=xend, yend=yend), data = tibble( x = c(-bnd, 0), y = c(0, -bnd), xend = c(bnd, 0), yend = c(0, bnd)), arrow = arrow(length = unit(2, "mm")), size = 0.3) + scale_x_continuous(TeX("$\\beta$"), expand = c(0, 0)) + scale_y_continuous(name = NULL) + scale_colour_manual(values = cbPalette[c(1, 3)], guide = FALSE) + scale_linetype_manual(values = c("dashed", "solid"), guide = FALSE) + coord_fixed() + theme( panel.grid = element_blank(), plot.title = element_text(size = 8), axis.title = element_text(size = 6), axis.text = element_text(size = 6)) p1 <- p + geom_line( aes(beta, beta_pen, colour = type, linetype = type), data = data_plot %>% filter(type %in% c("Identity", "Ridge")), size = 0.4) + ggtitle("Ridge") p2 <- p + geom_line( aes(beta, beta_pen, colour = type, linetype = type), data = data_plot %>% filter(type %in% c("Identity", "Lasso")), size = 0.4) + ggtitle("Lasso") p3 <- p + geom_line( aes(beta, beta_pen, colour = type, linetype = type), data = data_plot %>% filter(type %in% c("Identity", "SCAD")), size = 0.4) + ggtitle("SCAD") ggpubr::ggarrange( p1, p2, p3, ncol = 3) ## ----sparse-multi-class-logistic, fig.width=4.5, fig.height=2, fig.align="center", echo=FALSE, results=FALSE, warning=FALSE, message=FALSE, cache=TRUE---- # These are greyscale images of handwritten digits. Each image has dimensions # 16 x 16 and results therefore in a 256 dimensional predictor space # The goal is to predict the digit from the image data_train <- ElemStatLearn::zip.train colnames(data_train) <- c("digit", sprintf("x%d", 1:256)) y_train <- data_train[,1] X_train <- data_train[,-1] # Prepare the class means for plotting clm_plot <- as_tibble(data_train) %>% group_by(digit) %>% summarise_all(mean) %>% gather(r, cs, -digit) %>% mutate( r = as.numeric(str_remove(r, "x")) - 1, x = r %% 16, y = 16 - r %/% 16) %>% select(digit, x, y, cs) # Run multinomial fit ## Very slow!! # cv_fit <- glmnet::cv.glmnet( # X_train, y_train, family = "multinomial", alpha = 1) # saveRDS(cv_fit, # "~/Documents/PhD/stat-learning-for-bigdata/lectures/presentations/multinomial-fit.Rds") cv_fit <- readRDS("~/Documents/PhD/stat-learning-for-bigdata/lectures/presentations/multinomial-fit.Rds") mclog_coefs <- glmnet::coef.glmnet(cv_fit, s = cv_fit$lambda.1se) mclog_plot <- do.call(bind_rows, lapply(1:length(mclog_coefs), function(i) { cs <- mclog_coefs[[i]] tibble( digit = rep.int(i - 1, length(cs[-1,])), r = as.numeric(str_remove(names(cs[-1,]), "x")) - 1, cs = cs[-1,]) %>% filter(abs(cs) > 0) %>% mutate( x = r %% 16, y = 16 - r %/% 16) %>% select(digit, x, y, cs) })) ggplot() + geom_tile( aes(x, y, fill = cs), data = clm_plot, width = 1, height = 1, alpha = 0.7) + geom_tile( aes(x, y), data = mclog_plot %>% filter(cs > 0), width = 1, height = 1, fill = cbPalette[2], alpha = 0.9) + geom_tile( aes(x, y), data = mclog_plot %>% filter(cs < 0), width = 1, height = 1, fill = cbPalette[3], alpha = 0.9) + facet_wrap(~ digit, ncol = 5) + theme_void() + coord_fixed() + scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0)) + scale_fill_gradient(low = "black", high = "white", guide = FALSE)