scRNA-seq 数据基础分析（2025修订）｜第十一讲：改造单细胞图形-VlnPlot

1. 准备工作 scRNA <- readRDS ( 'pbmcrenamed.rds' ) # 查看一下数据细胞类型及组别：DimPlot (scRNA, split.by = 'group' ) # 共有三个组：unique (scRNA $ group) # [1] "C57" "AS1" "P3" # 共包含九种细胞类型：unique (scRNA $ celltype) # [1] VSMC T cell Fibro EC Myeloid cells # [6] B cell Mono Macro Neut # Levels: VSMC T cell Macro B cell Fibro Myeloid cells Mono Neut EC # 查看高表达基因：library (dplyr) # 按照表达量给基因排序：myorder <- apply ( as.data.frame (scRNA @ assays $ RNA @ data), 1 , function (x){ sum (x)}) %>% order (., decreasing = T) # 看一下总表达量最高的5个基因：rownames (scRNA)[myorder][ 1 : 5 ] # [1] "Gm42418" "Tmsb4x" "Malat1" "Actb" "mt-Atp6" # 浅画一下VlnPlot：VlnPlot (scRNA, features = 'Gm42418' ) 2. 调整VlnPlot参数 library (patchwork) # 调整点的大小: VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0.5 ) | # 默认为1 VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 ) # 点的大小为0时即关闭点的展示 # 调整颜色 library (ggsci) ori_p <- VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 ) # 离散型颜色 ori_p | VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 , cols = ggsci :: pal_npg ( 10 ) ) # 渐变颜色 ori_p | VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 , cols = ggsci :: pal_gsea ( 10 ) ) # 降序,按照表达均值排序 ori_p | VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 , sort = T) # 展示不同分组变量 ori_p | VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 , group.by = 'group' ) # 把每个celltype分成不同组别展示 ori_p | VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 , split.by = 'group' ) # 绘制多个基因 he_gene <- rownames (scRNA)[myorder][ 1 : 10 ] VlnPlot (scRNA, features = he_gene, pt.size = 0 ) # 多个基因更适合堆积小提琴图：VlnPlot (scRNA, features = he_gene, pt.size = 0 , stack = T ) 3. ggplot2直接中的DIY 3.1 VlnPlot源码 # Vlnplot源码(注意下面的函数名不要加括号)：VlnPlot # function (object, features, cols = NULL, pt.size = NULL, alpha = 1, # idents = NULL, sort = FALSE, assay = NULL, group.by = NULL, # split.by = NULL, adjust = 1, y.max = NULL, same.y.lims = FALSE, # log = FALSE, ncol = NULL, slot = deprecated, layer = NULL, # split.plot = FALSE, stack = FALSE, combine = TRUE, fill.by = "feature", # flip = FALSE, add.noise = TRUE, raster = NULL) # { # if (is_present(arg = slot)) { # deprecate_soft(when = "5.0.0", what = "VlnPlot(slot = )", # with = "VlnPlot(layer = )") # layer <- slot %||% layer # } # layer.set <- suppressWarnings(Layers(object = object, search = layer %||% # "data")) # if (is.null(layer) && length(layer.set) == 1 && layer.set == # "scale.data") { # warning("Default search for \"data\" layer yielded no results; utilizing \"scale.data\" layer instead.") # } # assay.name <- DefaultAssay(object) # if (is.null(layer.set) & is.null(layer)) { # warning("Default search for \"data\" layer in \"", assay.name, # "\" assay yielded no results; utilizing \"counts\" layer instead.", # call. = FALSE, immediate. = TRUE) # layer.set <- Layers(object = object, search = "counts") # } # if (is.null(layer.set)) { # stop("layer \"", layer, "\" is not found in assay: \"", # assay.name, "\"") # } # else { # layer <- layer.set # } # if (!is.null(x = split.by) & getOption(x = "Seurat.warn.vlnplot.split", # default = TRUE)) { # message("The default behaviour of split.by has changed.\n", # "Separate violin plots are now plotted side-by-side.\n", # "To restore the old behaviour of a single split violin,\n", # "set split.plot = TRUE.\n \nThis message will be shown once per session.") # options(Seurat.warn.vlnplot.split = FALSE) # } # return(ExIPlot(object = object, type = ifelse(test = split.plot, # yes = "splitViolin", no = "violin"), features = features, # idents = idents, ncol = ncol, sort = sort, assay = assay, # y.max = y.max, same.y.lims = same.y.lims, adjust = adjust, # pt.size = pt.size, alpha = alpha, cols = cols, group.by = group.by, # split.by = split.by, log = log, layer = layer, stack = stack, # combine = combine, fill.by = fill.by, flip = flip, add.noise = add.noise, # raster = raster)) # } # <bytecode: 0x557c8422ee38> # <environment: namespace:Seurat> # 这个代码框不要运行！

# 最终的绘图函数为"ExIPlot"下的"SingleExIPlot"函数 ExIPlot <- function ( object, features, type = 'violin' , idents = NULL , ncol = NULL , sort = FALSE , assay = NULL , y.max = NULL , same.y.lims = FALSE , adjust = 1 , cols = NULL , pt.size = 0 , alpha = 1 , group.by = NULL , split.by = NULL , log = FALSE , slot = deprecated , layer = 'data' , stack = FALSE , combine = TRUE , fill.by = NULL , flip = FALSE , add.noise = TRUE , raster = NULL ) { if ( is_present ( arg = slot)) { layer <- layer %||% slot } assay <- assay %||% DefaultAssay ( object = object) DefaultAssay ( object = object) <- assay cells <- Cells ( x = object, assay = NULL ) if ( isTRUE ( x = stack)) { if ( ! is.null ( x = ncol)) { warning ( "'ncol' is ignored with 'stack' is TRUE" , call. = FALSE , immediate. = TRUE ) } if ( ! is.null ( x = y.max)) { warning ( "'y.max' is ignored when 'stack' is TRUE" , call. = FALSE , immediate. = TRUE ) } } else { ncol <- ncol %||% ifelse ( test = length ( x = features) > 9 , yes = 4 , no = min ( length ( x = features), 3 ) ) } if ( ! is.null ( x = idents)) { cells <- intersect ( x = names ( x = Idents ( object = object)[ Idents ( object = object) %in% idents]), y = cells ) } data <- FetchData ( object = object, vars = features, slot = layer, cells = cells) pt.size <- pt.size %||% AutoPointSize ( data = object) features <- colnames ( x = data) data <- data[cells, , drop = FALSE ] idents <- if ( is.null ( x = group.by)) { Idents ( object = object)[cells] } else { object[[group.by, drop = TRUE ]][cells] } if ( ! is.factor ( x = idents)) { idents <- factor ( x = idents) } if ( is.null ( x = split.by)) { split <- NULL } else { split <- FetchData (object,split.by)[cells,split.by] if ( ! is.factor ( x = split)) { split <- factor ( x = split) } if ( is.null ( x = cols)) { cols <- hue_pal ( length ( x = levels ( x = idents))) cols <- Interleave (cols, InvertHex ( hexadecimal = cols)) } else if ( length ( x = cols) == 1 && cols == 'interaction' ) { split <- interaction (idents, split) cols <- hue_pal ( length ( x = levels ( x = idents))) } else { cols <- Col2Hex (cols) } if ( length ( x = cols) < length ( x = levels ( x = split))) { cols <- Interleave (cols, InvertHex ( hexadecimal = cols)) } cols <- rep_len ( x = cols, length.out = length ( x = levels ( x = split))) names ( x = cols) <- levels ( x = split) if (( length ( x = cols) > 2 ) & (type == "splitViolin" )) { warning ( "Split violin is only supported for <3 groups, using multi-violin." ) type <- "violin" } } if (same.y.lims && is.null ( x = y.max)) { y.max <- max (data) } if ( isTRUE ( x = stack)) { return ( MultiExIPlot ( type = type, data = data, idents = idents, split = split, sort = sort, same.y.lims = same.y.lims, adjust = adjust, cols = cols, pt.size = pt.size, log = log, fill.by = fill.by, add.noise = add.noise, flip = flip )) } plots <- lapply ( X = features, FUN = function (x) { return ( SingleExIPlot ( type = type, data = data[, x, drop = FALSE ], idents = idents, split = split, sort = sort, y.max = y.max, adjust = adjust, cols = cols, pt.size = pt.size, alpha = alpha, log = log, add.noise = add.noise, raster = raster )) } ) label.fxn <- switch ( EXPR = type, 'violin' = if (stack) { xlab } else { ylab }, "splitViolin" = if (stack) { xlab } else { ylab }, 'ridge' = xlab, stop ( "Unknown ExIPlot type " , type, call. = FALSE ) ) for (i in 1 : length ( x = plots)) { key <- paste0 ( unlist ( x = strsplit ( x = features[i], split = '_' ))[ 1 ], '_' ) obj <- names ( x = which ( x = Key ( object = object) == key)) if ( length ( x = obj) == 1 ) { if ( inherits ( x = object[[obj]], what = 'DimReduc' )) { plots[[i]] <- plots[[i]] + label.fxn ( label = 'Embeddings Value' ) } else if ( inherits ( x = object[[obj]], what = 'Assay' ) || inherits ( x = object[[obj]], what = 'Assay5' )) { next } else { warning ( "Unknown object type " , class ( x = object), immediate. = TRUE , call. = FALSE ) plots[[i]] <- plots[[i]] + label.fxn ( label = NULL ) } } else if ( ! features[i] %in% rownames ( x = object)) { plots[[i]] <- plots[[i]] + label.fxn ( label = NULL ) } } if (combine) { plots <- wrap_plots (plots, ncol = ncol) if ( length ( x = features) > 1 ) { plots <- plots & NoLegend } } return (plots) } # 这个代码框不要运行！

# SingleExIPlot函数源码 SingleExIPlot <- function ( data, idents, split = NULL , type = 'violin' , sort = FALSE , y.max = NULL , adjust = 1 , pt.size = 0 , alpha = 1 , cols = NULL , seed.use = 42 , log = FALSE , add.noise = TRUE , raster = NULL ) { if ( ! is.null ( x = raster) && isTRUE ( x = raster)){ if ( ! PackageCheck ( 'ggrastr' , error = FALSE )) { stop ( "Please install ggrastr from CRAN to enable rasterization." ) } } if ( PackageCheck ( 'ggrastr' , error = FALSE )) { # Set rasterization to true if ggrastr is installed and # number of points exceeds 100,000 if (( nrow ( x = data) > 1e5 ) & is.null ( x = raster)){ message ( "Rasterizing points since number of points exceeds 100,000." , " \n To disable this behavior set `raster=FALSE`" ) # change raster to TRUE raster <- TRUE } } if ( ! is.null ( x = seed.use)) { set.seed ( seed = seed.use) } if ( ! is.data.frame ( x = data) || ncol ( x = data) != 1 ) { stop ( "'SingleExIPlot requires a data frame with 1 column" ) } feature <- colnames ( x = data) data $ ident <- idents if (( is.character ( x = sort) && nchar ( x = sort) > 0 ) || sort) { data $ ident <- factor ( x = data $ ident, levels = names ( x = rev ( x = sort ( x = tapply ( X = data[, feature], INDEX = data $ ident, FUN = mean ), decreasing = grepl ( pattern = paste0 ( '^' , tolower ( x = sort)), x = 'decreasing' ) ))) ) } if (log) { noise <- rnorm ( n = length ( x = data[, feature])) / 200 data[, feature] <- data[, feature] + 1 } else { noise <- rnorm ( n = length ( x = data[, feature])) / 100000 } if ( ! add.noise) { noise <- noise * 0 } if ( all (data[, feature] == data[, feature][ 1 ])) { warning ( paste0 ( "All cells have the same value of " , feature, "." )) } else { data[, feature] <- data[, feature] + noise } axis.label <- 'Expression Level' y.max <- y.max %||% max (data[, feature][ is.finite ( x = data[, feature])]) if (type == 'violin' && ! is.null ( x = split)) { data $ split <- split vln.geom <- geom_violin fill <- 'split' } else if (type == 'splitViolin' && ! is.null ( x = split )) { data $ split <- split vln.geom <- geom_split_violin fill <- 'split' type <- 'violin' } else { vln.geom <- geom_violin fill <- 'ident' } switch ( EXPR = type, 'violin' = { x <- 'ident' y <- paste0 ( "`" , feature, "`" ) xlab <- 'Identity' ylab <- axis.label geom <- list ( vln.geom ( scale = 'width' , adjust = adjust, trim = TRUE ), theme ( axis.text.x = element_text ( angle = 45 , hjust = 1 )) ) if ( is.null ( x = split)) { if ( isTRUE ( x = raster)) { jitter <- ggrastr :: rasterize ( geom_jitter ( height = 0 , size = pt.size, alpha = alpha, show.legend = FALSE )) } else { jitter <- geom_jitter ( height = 0 , size = pt.size, alpha = alpha, show.legend = FALSE ) } } else { if ( isTRUE ( x = raster)) { jitter <- ggrastr :: rasterize ( geom_jitter ( position = position_jitterdodge ( jitter.width = 0.4 , dodge.width = 0.9 ), size = pt.size, alpha = alpha, show.legend = FALSE )) } else { jitter <- geom_jitter ( position = position_jitterdodge ( jitter.width = 0.4 , dodge.width = 0.9 ), size = pt.size, alpha = alpha, show.legend = FALSE ) } } log.scale <- scale_y_log10 axis.scale <- ylim }, 'ridge' = { x <- paste0 ( "`" , feature, "`" ) y <- 'ident' xlab <- axis.label ylab <- 'Identity' geom <- list ( geom_density_ridges ( scale = 4 ), theme_ridges , scale_y_discrete ( expand = c ( 0.01 , 0 )), scale_x_continuous ( expand = c ( 0 , 0 )) ) jitter <- geom_jitter ( width = 0 , size = pt.size, alpha = alpha, show.legend = FALSE ) log.scale <- scale_x_log10 axis.scale <- function (...) { invisible ( x = NULL ) } }, stop ( "Unknown plot type: " , type) ) plot <- ggplot ( data = data, mapping = aes_string ( x = x, y = y, fill = fill)[ c ( 2 , 3 , 1 )] ) + labs ( x = xlab, y = ylab, title = feature, fill = NULL ) + theme_cowplot + theme ( plot.title = element_text ( hjust = 0.5 )) plot <- do.call ( what = '+' , args = list (plot, geom)) plot <- plot + if (log) { log.scale } else { axis.scale ( min (data[, feature]), y.max) } if (pt.size > 0 ) { plot <- plot + jitter } if ( ! is.null ( x = cols)) { if ( ! is.null ( x = split)) { idents <- unique ( x = as.vector ( x = data $ ident)) splits <- unique ( x = as.vector ( x = data $ split)) labels <- if ( length ( x = splits) == 2 ) { splits } else { unlist ( x = lapply ( X = idents, FUN = function (pattern, x) { x.mod <- gsub ( pattern = paste0 (pattern, '.' ), replacement = paste0 (pattern, ': ' ), x = x, fixed = TRUE ) x.keep <- grep ( pattern = ': ' , x = x.mod, fixed = TRUE ) x.return <- x.mod[x.keep] names ( x = x.return) <- x[x.keep] return (x.return) }, x = unique ( x = as.vector ( x = data $ split)) )) } if ( is.null ( x = names ( x = labels))) { names ( x = labels) <- labels } } else { labels <- levels ( x = droplevels (data $ ident)) } plot <- plot + scale_fill_manual ( values = cols, labels = labels) } return (plot) } # 这个代码框不要运行！

# 其中最为核心的绘图区域使用的R包为ggplot2, 所以接下来我们基于ggplot2进行后续的优化 plot <- ggplot ( data = data, mapping = aes_string ( x = x, y = y, fill = fill)[ c ( 2 , 3 , 1 )] ) 3.2 数据整理 # ggplot2的图片绘制需要一个长格式的数据，我们以上面的高表达基因为例，整理出一个长格式数据框 # 取出表达矩阵：data4plot <- scRNA @ assays $ RNA @ data[he_gene[ 1 : 5 ],] %>% as.data.frame %>% t head (data4plot) # Gm42418 Tmsb4x Malat1 Actb mt-Atp6 # AAAGGGCTCGCAGAGA-1_1 3.125663 5.191950 5.443660 4.880147 4.500693 # AACACACAGCATCCCG-1_1 5.717602 4.747414 4.719490 4.166542 5.093166 # AACAGGGTCAACACCA-1_1 4.716741 4.179333 5.849399 3.356926 3.870171 # AACCCAAAGGCCTTGC-1_1 5.104691 3.290715 6.404316 4.364196 3.569047 # AACCCAAAGTGGACGT-1_1 5.853611 7.403626 3.399781 5.316662 3.897164 # AACCCAAGTCCGAAGA-1_1 4.562551 6.145295 4.913077 5.337041 3.646919 # 添加注释信息：data4plot <- cbind (data4plot,scRNA @ meta.data[, c ( 'group' , 'celltype' )]) # 查看一下表头：head (data4plot) # Gm42418 Tmsb4x Malat1 Actb mt-Atp6 group # AAAGGGCTCGCAGAGA-1_1 3.125663 5.191950 5.443660 4.880147 4.500693 C57 # AACACACAGCATCCCG-1_1 5.717602 4.747414 4.719490 4.166542 5.093166 C57 # AACAGGGTCAACACCA-1_1 4.716741 4.179333 5.849399 3.356926 3.870171 C57 # AACCCAAAGGCCTTGC-1_1 5.104691 3.290715 6.404316 4.364196 3.569047 C57 # AACCCAAAGTGGACGT-1_1 5.853611 7.403626 3.399781 5.316662 3.897164 C57 # AACCCAAGTCCGAAGA-1_1 4.562551 6.145295 4.913077 5.337041 3.646919 C57 # celltype # AAAGGGCTCGCAGAGA-1_1 VSMC # AACACACAGCATCCCG-1_1 T cell # AACAGGGTCAACACCA-1_1 Fibro # AACCCAAAGGCCTTGC-1_1 EC # AACCCAAAGTGGACGT-1_1 Myeloid cells # AACCCAAGTCCGAAGA-1_1 T cell 3.3 重现VlnPlot # 待复刻的图：plot0 <- VlnPlot (scRNA, features = 'Gm42418' , pt.size = 0 ) plot0 library (ggplot2) library (ggthemes) # 最简单的复刻：plot1 <- ggplot ( data = data4plot, aes ( x = celltype, y = Gm42418, fill = celltype)) + # 确定坐标与颜色填充 geom_violin # 绘制小提琴图 plot1 # 显然这个还不是很像 # 稍作修改 plot2 <- plot1 + labs ( x = 'Identity' , y = 'Expression Level' , title = 'Gm42418' , fill = NULL ) + # 修改标题 geom_violin + theme_bw + # 去灰色背景 theme_few + # 去网格 theme_classic + theme ( plot.title = element_text ( hjust = 0.5 , face = 'bold' , size= 15 ) ) + # 加粗标题并调整大小 theme ( axis.text.x.bottom = element_text ( angle = 45 , hjust = 1 , vjust = 1 , size = 12 ), # 调整x轴坐标位置、字体、大小 axis.text.y.left = element_text ( size= 10 )) # y轴坐标字体调整 plot2 # 最后看一下，第一张图和第三张图基本一致 library (patchwork) plot0 / plot1 / plot2 # 需要横向排列则是plot0|plot1|plot2 3.4 重现stack vlnplot # 原本VlnPlot中的stack=True选项依赖MultiExIPlo进行可视化：# 再看一眼原图 ori_plot <- VlnPlot (scRNA, features = he_gene, pt.size = 0 , stack = T ) ori_plot # 整理数据 library (reshape2) data4stack <- as.data.frame (scRNA[[ "RNA" ]] @ data[he_gene,]) data4stack $ Gene <- factor (he_gene, levels = he_gene) data4stack[, 1 : 4 ] # AAAGGGCTCGCAGAGA-1_1 AACACACAGCATCCCG-1_1 AACAGGGTCAACACCA-1_1 # Gm42418 3.125663 5.717602 4.716741 # Tmsb4x 5.191950 4.747414 4.179333 # Malat1 5.443660 4.719490 5.849399 # Actb 4.880147 4.166542 3.356926 # mt-Atp6 4.500693 5.093166 3.870171 # mt-Co1 4.223976 4.516079 3.615016 # Fth1 4.598321 1.751268 3.450555 # mt-Co2 4.455747 4.661190 3.522394 # mt-Co3 3.796612 5.022522 3.435546 # Tpt1 3.334123 4.462619 4.313646 # AACCCAAAGGCCTTGC-1_1 # Gm42418 5.104691 # Tmsb4x 3.290715 # Malat1 6.404316 # Actb 4.364196 # mt-Atp6 3.569047 # mt-Co1 3.965073 # Fth1 2.903693 # mt-Co2 3.965073 # mt-Co3 3.786538 # Tpt1 2.903693 # 把宽格式的数据转换为长格式数据 data4stack <- melt (data4stack, id= "Gene" ) head (data4stack) # Gene variable value # 1 Gm42418 AAAGGGCTCGCAGAGA-1_1 3.125663 # 2 Tmsb4x AAAGGGCTCGCAGAGA-1_1 5.191950 # 3 Malat1 AAAGGGCTCGCAGAGA-1_1 5.443660 # 4 Actb AAAGGGCTCGCAGAGA-1_1 4.880147 # 5 mt-Atp6 AAAGGGCTCGCAGAGA-1_1 4.500693 # 6 mt-Co1 AAAGGGCTCGCAGAGA-1_1 4.223976 colnames (data4stack)[ c ( 2 , 3 )] = c ( "Cell" , "Expression" ) head (data4stack) # Gene Cell Expression # 1 Gm42418 AAAGGGCTCGCAGAGA-1_1 3.125663 # 2 Tmsb4x AAAGGGCTCGCAGAGA-1_1 5.191950 # 3 Malat1 AAAGGGCTCGCAGAGA-1_1 5.443660 # 4 Actb AAAGGGCTCGCAGAGA-1_1 4.880147 # 5 mt-Atp6 AAAGGGCTCGCAGAGA-1_1 4.500693 # 6 mt-Co1 AAAGGGCTCGCAGAGA-1_1 4.223976 # 提取细胞类型 my_meta = scRNA @ meta.data[, c ( 'group' , 'celltype' )] my_meta $ Cell <- rownames (my_meta) head (my_meta) # group celltype Cell # AAAGGGCTCGCAGAGA-1_1 C57 VSMC AAAGGGCTCGCAGAGA-1_1 # AACACACAGCATCCCG-1_1 C57 T cell AACACACAGCATCCCG-1_1 # AACAGGGTCAACACCA-1_1 C57 Fibro AACAGGGTCAACACCA-1_1 # AACCCAAAGGCCTTGC-1_1 C57 EC AACCCAAAGGCCTTGC-1_1 # AACCCAAAGTGGACGT-1_1 C57 Myeloid cells AACCCAAAGTGGACGT-1_1 # AACCCAAGTCCGAAGA-1_1 C57 T cell AACCCAAGTCCGAAGA-1_1 # 根据Cell列合并data4stack和my_meta，最终会在data4stack增加上细胞类型和分组的信息 data4stack = inner_join (data4stack,my_meta, by= "Cell" ) # 瞅一眼数据 head (data4stack) # Gene Cell Expression group celltype # 1 Gm42418 AAAGGGCTCGCAGAGA-1_1 3.125663 C57 VSMC # 2 Tmsb4x AAAGGGCTCGCAGAGA-1_1 5.191950 C57 VSMC # 3 Malat1 AAAGGGCTCGCAGAGA-1_1 5.443660 C57 VSMC # 4 Actb AAAGGGCTCGCAGAGA-1_1 4.880147 C57 VSMC # 5 mt-Atp6 AAAGGGCTCGCAGAGA-1_1 4.500693 C57 VSMC # 6 mt-Co1 AAAGGGCTCGCAGAGA-1_1 4.223976 C57 VSMC copy_stack <- ggplot (data4stack, aes ( x = Expression, y = celltype, fill = Gene)) + geom_violin + labs ( x = "Expression Level" , y = "Identity" ) + facet_grid (. ~ Gene, scales = "free_x" , space = 'free_x' ) + # 按照基因拆分做stack，并使每个小提琴图的坐标按照真实极值生成，而非全部统一 theme_bw + # 去灰色背景 theme_few + # 去网格 theme ( panel.spacing.x = unit ( 0 , "cm" )) + # 去间隙 theme ( strip.background = element_blank , # 去Gene标题的背景 strip.text.x = element_text ( angle = - 90 , hjust = 1 , size= 15 , face= 'bold' ) # 旋转Gene标题的角度 ) ori_plot / copy_stack 3.5 重现拆分的VlnPlot # VlnPlot中能够做到按照一定的分类变量拆分小提琴，我们同样可以在ggplot2中重现这一过程 VlnPlot (scRNA, features = he_gene[ 1 ], split.by = 'group' ) # 用ggplot拆一下：ggplot ( data = data4plot, aes ( x = celltype, y = Gm42418, fill = group)) + # 同样是更改fill来实现 geom_violin + # 绘制小提琴图 labs ( x = 'Identity' , y = 'Expression Level' , title = 'Gm42418' , fill = NULL ) + # 设置坐标与标题的字符串 geom_violin + # 添加小提琴图 theme_bw + # 去灰色背景 theme_few + # 去网格 theme_classic + theme ( plot.title = element_text ( hjust = 0.5 , face = 'bold' , size= 15 ) ) + # 加粗标题并调整大小 theme ( axis.text.x.bottom = element_text ( angle = 45 , hjust = 1 , vjust = 1 , size = 12 ), # 横坐标斜体45° axis.text.y.left = element_text ( size= 10 )) 4. ggplot2改造 4.1 VlnPlot颜色修改 plot1 <- ggplot ( data = data4plot, aes ( x = celltype, y = Gm42418, fill = celltype)) + geom_violin + # 绘制小提琴图 labs ( x = 'Identity' , y = 'Expression Level' , title = 'Gm42418' , fill = NULL ) + # 设置坐标与标题的字符串 geom_violin + # 添加小提琴图 theme_bw + # 去灰色背景 theme_few + # 去网格 theme_classic + theme ( plot.title = element_text ( hjust = 0.5 , face = 'bold' , size= 15 ) ) + # 加粗标题并调整大小 theme ( axis.text.x.bottom = element_text ( angle = 45 , hjust = 1 , vjust = 1 , size = 12 ), # 调整x轴坐标角度、位置、大小 axis.text.y.left = element_text ( size= 10 )) # 调整y轴提提大小 # 用ggsci加一个柳叶刀配色 plot2 <- plot1 + scale_fill_manual ( values = ggsci :: pal_nejm ( 9 )) # 颜色会比默认配色好看很多 plot1 / plot2 # 如果觉得图注多余，可以隐藏。

legend.position = "right"（默认）：图例位于图的右侧。# 其他："left"左侧；"top"顶部；

"bottom"底部 plot2 + theme ( legend.position = "none" ) # 或者自定义图注的位置 # x = 0 表示最左边，x = 1 表示最右边;y = 0 表示底部，y = 1 表示顶部 plot2 + theme ( legend.position = c ( - 0.05 , - 0.05 )) # 调整的舒服一些 plot2 + theme ( legend.position = c ( 0.3 , 1 ), legend.background = element_blank , # 让图注背景框透明 legend.direction = 'horizontal' , plot.title = element_text ( hjust = 0.8 , face = 'bold' , size= 15 )) # 调整标题位置(水平对齐，0 = 左对齐，1 = 右对齐，0.5 = 居中。

这里 0.8 表示标题稍微向右偏移)、字体、大小 4.2 stack vlnplot颜色修改 # 添加ggsci配色 p1 <- copy_stack + scale_fill_manual ( values = c (ggsci :: pal_nejm ( 8 ),ggsci :: pal_aaas ( 2 ))) # 把颜色填充由Gene改为Celltype：p2 <- ggplot (data4stack, aes ( x = Expression, y = celltype, fill = celltype)) + # 改动fill即可 geom_violin + labs ( x = "Expression Level" , y = "Identity" ) + facet_grid (. ~ Gene, scales = "free_x" , space = 'free_x' ) + # 按照基因拆分做stack，并使每个小提琴图的坐标按照真实极值生成，而非全部统一 theme_bw + # 去灰色背景 theme_few + # 去网格 theme ( panel.spacing.x = unit ( 0 , "cm" )) + # 去间隙 theme ( strip.background = element_blank , # 去Gene标题的背景 strip.text.x = element_text ( angle = - 90 , hjust = 1 , size= 15 , face= 'bold' ) # 旋转Gene标题的角度 ) # 把颜色填充由Gene改为Celltype后，重新对每个Celltype赋予新的颜色 p3 <- p2 + scale_fill_manual ( values = c (ggsci :: pal_nejm ( 8 ),ggsci :: pal_aaas ( 2 ))) # 对比一下四个图 copy_stack | p1 | p2 | p3 # 交换x/y轴 copy_stack <- ggplot (data4stack, aes ( x = celltype, y = Expression , fill = Gene)) + geom_violin + labs ( x = "Expression Level" , y = "Identity" ) + facet_grid (Gene ~ ., scales = "free_y" , space = 'free_y' ) + theme_bw + # 去灰色背景 theme_few + # 去网格 theme ( panel.spacing.x = unit ( 0 , "cm" )) + # 去间隙 theme ( strip.background = element_blank , # 去Gene标题的背景 strip.text.x = element_text ( angle = - 90 , hjust = 1 , size= 15 , face= 'bold' ) # 旋转Gene标题的角度 ) copy_stack 4.3 拆分的VlnPlot颜色修改 # 此时需要自定义颜色应该输入group对应的三个颜色 ggplot ( data = data4plot, aes ( x = celltype, y = Gm42418, fill = group)) + geom_violin + # 绘制小提琴图 labs ( x = 'Identity' , y = 'Expression Level' , title = 'Gm42418' , fill = NULL ) + # 设置坐标与标题的字符串 geom_violin + # 添加小提琴图 theme_bw + # 去灰色背景 theme_few + # 去网格 theme_classic + theme ( plot.title = element_text ( hjust = 0.5 , face = 'bold' , size= 15 ) ) + # 加粗标题并调整大小 theme ( axis.text.x.bottom = element_text ( angle = 45 , hjust = 1 , vjust = 1 , size = 12 ), axis.text.y.left = element_text ( size= 10 )) + # 横坐标斜体45° scale_fill_manual ( values = ggsci :: pal_aaas ( 3 )) 4.4 坐标轴箭头调整 # 基于前面plot2图形调整 plot2 # 空心箭头坐标轴：plot2 + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.5 , 'cm' )))) # 调整箭头大小：plot2 + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 2 , 'cm' )))) # 调整箭头形状：plot2 + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.5 , 'cm' ), type = 'closed' ))) # 很多时候，简单的小提琴无法帮助我们直接的观察到基因的表达差异，例如下面这张图：# 4.4 小提琴+箱线图 # 显然这样能够看出C57的中位数要低于AS1组 my_box <- ggplot (data4plot, aes (group,Gm42418, fill= group)) + geom_violin ( alpha= 0.4 ) + stat_boxplot ( geom= "errorbar" , position = position_dodge ( width = 0.1 ), width= 0.1 ) + # 加上误差棒 geom_boxplot ( alpha= 0.5 , outlier.size= 0 , size= 0.3 , width= 0.3 ) + # 加上箱线图 scale_fill_manual ( values = c ( ' , ' , ' )) + # 填充颜色 theme_bw my_box 4.5 海盗船图 # 海盗船图：if ( ! require (ggpirate))devtools :: install_github ( "mikabr/ggpirate" ) ggplot (data4plot, aes ( x = group, y = Gm42418, fill = group)) + geom_pirate ( aes ( x = group, y = Gm42418, fill = group), alpha = 0.4 ) + scale_fill_manual ( values = c ( ' , ' , ' )) + # 填充颜色 theme_bw # 加上显著性：library (ggsignif) # 设置比较组 compaired <- list ( c ( "C57" , "AS1" ), c ( "C57" , "P3" ), c ( "AS1" , "P3" )) my_box + geom_signif ( comparisons = compaired, step_increase = 0.1 , map_signif_level = F, test = 'wilcox.test' ) 4.6 点图与蜂群图 # 加上点图 my_box + geom_dotplot ( binaxis= 'y' , # y轴上对应了点的表达量 stackdir= 'center' , dotsize = . 5 , fill= "red" , alpha= 0.3 ) # 蜂群图 library (ggbeeswarm) ggplot (data4plot, aes (group, Gm42418, fill = group)) + geom_beeswarm ( alpha = 0.4 ) # 蜂群图+箱线图 ggplot (data4plot, aes (group, Gm42418, fill = group)) + geom_beeswarm ( alpha = 0.4 ) + # 蜂群图 stat_boxplot ( geom = "errorbar" , position = position_dodge ( width = 0.1 ), width = 0.1 ) + # 加上误差棒 geom_boxplot ( alpha = 0.5 , outlier.size = 0 , size = 0.3 , width = 0.3 ) + # 加上箱线图 scale_fill_manual ( values = c ( ' , ' , ' )) + # 填充颜色 theme_bw 4.7 直方图 ggplot (data4plot, aes ( x = Gm42418, fill = celltype)) + geom_histogram ( position = "identity" , alpha = 0.4 , bins = 30 ) + # 直方图是ggplot2最基础的功能 theme_bw + facet_grid (group ~ .) # 仅按照group拆分 # 分面进行对比：ggplot (data4plot, aes ( x = Gm42418, fill = celltype)) + geom_histogram ( position = "identity" , alpha = 0.4 , bins = 30 ) + theme_bw + facet_grid (group ~ celltype) # groupXcelltype式拆分 4.8 劈开小提琴 # 劈开小提琴图，插入箱线图 library (gghalves) my_box <- ggplot (data4plot, aes (group, Gm42418, fill = group)) + geom_half_violin ( aes ( x = group, y = Gm42418), scale = "width" , side = "r" , position = position_nudge ( x = 0.1 )) + # 右半边小提琴图，通过position_nudge调整位置 geom_half_violin ( aes ( x = group, y = Gm42418), scale = "width" , side = "l" , position = position_nudge ( x = - 0.1 )) + # 左半边小提琴图，通过position_nudge调整位置 geom_boxplot ( width = 0.1 , outlier.shape = NA ) + # 中间的箱线图 scale_fill_manual ( values = c ( ' , ' , ' )) + # 填充颜色 theme_bw my_box # 让两半边分别展示不同的分组：my_box <- ggplot (dplyr :: filter (data4plot,group %in% c ( 'C57' , 'AS1' )), aes (celltype, Gm42418, fill = group)) + geom_half_violin ( data = dplyr :: filter (data4plot,group == 'C57' ) , aes ( x = celltype, y = Gm42418), scale = "width" , side = "r" , position = position_nudge ( x = 0.1 )) + # 右半边小提琴图，通过position_nudge调整位置 geom_half_violin ( data = dplyr :: filter (data4plot,group == 'AS1' ), aes ( x = celltype, y = Gm42418), scale = "width" , side = "l" , position = position_nudge ( x = - 0.1 )) + # 左半边小提琴图，通过position_nudge调整位置 geom_boxplot ( width = 0.1 , outlier.shape = NA ) + # 中间的箱线图 scale_fill_manual ( values = c ( ' , ' )) + # 填充颜色 theme_bw my_box