scRNA-seq 数据基础分析（2025修订）｜第十讲：改造单细胞图形-DimPlot

1. 准备工作 library (Seurat) library (ggsci) sce <- readRDS ( "./iri.intergrate.harmony.rename.rds" ) # 选取部分细胞用于后续分析 sel <- c ( "PTS1" , "PTS2" , "NewPT1" , "DTL-ATL" , "MTAL" , "DCT" , "CNT" , "PC2" , "ICA" , "EC1" , "Fib" , "Mø" ) sce <- subset (sce, Celltype %in% sel) # 指定因子展示顺序 sce @ meta.data $ Celltype <- factor (sce @ meta.data $ Celltype, levels = sel) Idents (sce) <- factor ( Idents (sce), levels = sel) # DimPlot的完整参数 ?DimPlot 2. 调整DimPlot参数 2.1 cols: 修改颜色 mycol <- c ( pal_d3 ( 7 ), pal_aaas ( 7 ), pal_uchicago ( 7 ), pal_jama ( 7 )) DimPlot (sce) DimPlot (sce, cols = mycol) 2.2 pt.size: 修改点的大小 DimPlot (sce, pt.size = 0.01 ) | DimPlot (sce, pt.size = 1 ) 2.3 reduction: 降维 DimPlot (sce, reduction = 'umap' ) | DimPlot (sce, reduction = 'tsne' ) | DimPlot (sce, reduction = 'pca' ) 2.4 label相关 DimPlot (sce, label = F) | DimPlot (sce, label = T) # 筛选数量最多的五种细胞类型做展示 suppressMessages ( library (dplyr)) mycelltype <- table ( Idents (sce)) %>% sort ( decreasing = T) %>% names (.) %>% .[ c ( 1 : 5 )] mycelltype # [1] "NewPT1" "MTAL" "PTS2" "EC1" "PTS1" sce_sub <- subset (sce, idents= mycelltype) DimPlot (sce_sub, label = F) | DimPlot (sce, label = T) # 修改标签大小 # 一样选的比较极端，大家可以自行调整 DimPlot (sce, label = T, label.size = 2.5 ) | DimPlot (sce, label = T, label.size = 5 ) # 增加标签边框 DimPlot (sce, label = T, label.box = T) | DimPlot (sce, label = T, label.box = F) # repel参数控制细胞聚类标签的排布方式的 DimPlot (sce, label = T, label.box = T, repel = T) | DimPlot (sce, label = T, label.box = T, repel = F) # 高亮部分细胞 DimPlot (sce, label = T) | DimPlot (sce, label = T, cells.highlight = c ( 'PTS1' , 'PTS2' )) | DimPlot (sce, label = T, cells.highlight = list ( PTS1_and_PTS2= colnames (sce)[sce $ Celltype %in% c ( 'PTS1' , 'PTS2' )])) # 高亮部分细胞,同时修改点的大小 DimPlot (sce, label = T) | DimPlot (sce, label = T, cells.highlight = list ( PTS1_and_PTS2= colnames (sce)[sce $ Celltype %in% c ( 'PTS1' , 'PTS2' )]), sizes.highlight= 0.2 ) | DimPlot (sce, label = T, cells.highlight = list ( PTS1_and_PTS2= colnames (sce)[sce $ Celltype %in% c ( 'PTS1' , 'PTS2' )]), sizes.highlight= 2 ) 2.5 raster: 是否栅格化 # 即把矢量图像素化 DimPlot (sce, raster = F) | DimPlot (sce, raster = T) pdf ( file = "../result_dimplot/p1.pdf" ) DimPlot (sce, raster = F) dev.off # png # 2 pdf ( file = "../result_dimplot/p2.pdf" ) DimPlot (sce, raster = T) dev.off # png # 2 file.size ( "../result_dimplot/p1.pdf" ) # [1] 1585135 file.size ( "../result_dimplot/p2.pdf" ) # [1] 35107 # reaster.dpi:设定栅格化的分辨率 DimPlot (sce, raster = T, raster.dpi = c ( 512 , 512 )) | DimPlot (sce, raster = T, raster.dpi = c ( 200 , 200 )) 3. ggplot2直接中的DIY 3.1 数据整理 class ( DimPlot (sce)) # [1] "patchwork" "gg" "ggplot" # 这也就意味着，我们可以通过ggplot完成对DimPlot降维图的改造，大家可以先读一下DimPlot的源码了解一下：print (DimPlot) # function (object, dims = c(1, 2), cells = NULL, cols = NULL, # pt.size = NULL, reduction = NULL, group.by = NULL, split.by = NULL, # shape.by = NULL, order = NULL, shuffle = FALSE, seed = 1, # label = FALSE, label.size = 4, label.color = "black", label.box = FALSE, # repel = FALSE, alpha = 1, cells.highlight = NULL, cols.highlight = " # sizes.highlight = 1, na.value = "grey50", ncol = NULL, combine = TRUE, # raster = NULL, raster.dpi = c(512, 512)) # { # if (!is_integerish(x = dims, n = 2L, finite = TRUE) || !all(dims > # 0L)) { # abort(message = "'dims' must be a two-length integer vector") # } # reduction <- reduction %||% DefaultDimReduc(object = object) # cells <- cells %||% Cells(x = object, assay = DefaultAssay(object = object[[reduction]])) # dims <- paste0(Key(object = object[[reduction]]), dims) # orig.groups <- group.by # group.by <- group.by %||% "ident" # data <- FetchData(object = object, vars = c(dims, group.by), # cells = cells, clean = "project") # group.by <- colnames(x = data)[3:ncol(x = data)] # for (group in group.by) { # if (!is.factor(x = data[, group])) { # data[, group] <- factor(x = data[, group]) # } # } # if (!is.null(x = shape.by)) { # data[, shape.by] <- object[[shape.by, drop = TRUE]] # } # if (!is.null(x = split.by)) { # split <- FetchData(object = object, vars = split.by, # clean = TRUE)[split.by] # data <- data[rownames(split), ] # data[, split.by] <- split # } # if (isTRUE(x = shuffle)) { # set.seed(seed = seed) # data <- data[sample(x = 1:nrow(x = data)), ] # } # plots <- lapply(X = group.by, FUN = function(x) { # plot <- SingleDimPlot(data = data[, c(dims, x, split.by, # shape.by)], dims = dims, col.by = x, cols = cols, # pt.size = pt.size, shape.by = shape.by, order = order, # alpha = alpha, label = FALSE, cells.highlight = cells.highlight, # cols.highlight = cols.highlight, sizes.highlight = sizes.highlight, # na.value = na.value, raster = raster, raster.dpi = raster.dpi) # if (label) { # plot <- LabelClusters(plot = plot, id = x, repel = repel, # size = label.size, split.by = split.by, box = label.box, # color = label.color) # } # if (!is.null(x = split.by)) { # plot <- plot + FacetTheme + facet_wrap(facets = vars(!!sym(x = split.by)), # ncol = if (length(x = group.by) > 1 || is.null(x = ncol)) { # length(x = unique(x = data[, split.by])) # } # else { # ncol # }) # } # plot <- if (is.null(x = orig.groups)) { # plot + labs(title = NULL) # } # else { # plot + CenterTitle # } # }) # if (!is.null(x = split.by)) { # ncol <- 1 # } # if (combine) { # plots <- wrap_plots(plots, ncol = orig.groups %iff% ncol) # } # return(plots) # } # <bytecode: 0x557cc27befc8> # <environment: namespace:Seurat> print (SingleDimPlot) # function (data, dims, col.by = NULL, cols = NULL, pt.size = NULL, # shape.by = NULL, alpha = 1, alpha.by = NULL, order = NULL, # label = FALSE, repel = FALSE, label.size = 4, cells.highlight = NULL, # cols.highlight = " sizes.highlight = 1, na.value = "grey50", # raster = NULL, raster.dpi = NULL) # { # if ((nrow(x = data) > 1e+05) & is.null(x = raster)) { # message("Rasterizing points since number of points exceeds 100,000.", # "\nTo disable this behavior set `raster=FALSE`") # } # raster <- raster %||% (nrow(x = data) > 1e+05) # pt.size <- pt.size %||% AutoPointSize(data = data, raster = raster) # if (!is.null(x = cells.highlight) && pt.size != AutoPointSize(data = data, # raster = raster) && sizes.highlight != pt.size && isTRUE(x = raster)) { # warning("When `raster = TRUE` highlighted and non-highlighted cells must be the same size. Plot will use the value provided to 'sizes.highlight'.") # } # if (!is.null(x = raster.dpi)) { # if (!is.numeric(x = raster.dpi) || length(x = raster.dpi) != # 2) # stop("'raster.dpi' must be a two-length numeric vector") # } # if (length(x = dims) != 2) { # stop("'dims' must be a two-length vector") # } # if (!is.data.frame(x = data)) { # data <- as.data.frame(x = data) # } # if (is.character(x = dims) && !all(dims %in% colnames(x = data))) { # stop("Cannot find dimensions to plot in data") # } # else if (is.numeric(x = dims)) { # dims <- colnames(x = data)[dims] # } # if (!is.null(x = cells.highlight)) { # if (inherits(x = cells.highlight, what = "data.frame")) { # stop("cells.highlight cannot be a dataframe. ", "Please supply a vector list") # } # highlight.info <- SetHighlight(cells.highlight = cells.highlight, # cells.all = rownames(x = data), sizes.highlight = sizes.highlight %||% # pt.size, cols.highlight = cols.highlight, col.base = cols[1] %||% # " pt.size = pt.size, raster = raster) # order <- highlight.info$plot.order # data$highlight <- highlight.info$highlight # col.by <- "highlight" # pt.size <- highlight.info$size # cols <- highlight.info$color # } # if (!is.null(x = order) && !is.null(x = col.by)) { # if (typeof(x = order) == "logical") { # if (order) { # data <- data[order(!is.na(x = data[, col.by]), # data[, col.by]), ] # } # } # else { # order <- rev(x = c(order, setdiff(x = unique(x = data[, # col.by]), y = order))) # data[, col.by] <- factor(x = data[, col.by], levels = order) # new.order <- order(x = data[, col.by]) # data <- data[new.order, ] # if (length(x = pt.size) == length(x = new.order)) { # pt.size <- pt.size[new.order] # } # } # } # if (!is.null(x = col.by) && !col.by %in% colnames(x = data)) { # warning("Cannot find ", col.by, " in plotting data, not coloring plot") # col.by <- NULL # } # else { # col.index <- match(x = col.by, table = colnames(x = data)) # if (grepl(pattern = "^\\d", x = col.by)) { # col.by <- paste0("x", col.by) # } # else if (grepl(pattern = "-", x = col.by)) { # col.by <- gsub(pattern = "-", replacement = ".", # x = col.by) # } # colnames(x = data)[col.index] <- col.by # } # if (!is.null(x = shape.by) && !shape.by %in% colnames(x = data)) { # warning("Cannot find ", shape.by, " in plotting data, not shaping plot") # } # if (!is.null(x = alpha.by) && !alpha.by %in% colnames(x = data)) { # warning("Cannot find alpha variable ", alpha.by, " in data, setting to NULL", # call. = FALSE, immediate. = TRUE) # alpha.by <- NULL # } # plot <- ggplot(data = data) # plot <- if (isTRUE(x = raster)) { # plot + geom_scattermore(mapping = aes_string(x = dims[1], # y = dims[2], color = paste0("`", col.by, "`"), shape = shape.by, # alpha = alpha.by), pointsize = pt.size, alpha = alpha, # pixels = raster.dpi) # } # else { # plot + geom_point(mapping = aes_string(x = dims[1], y = dims[2], # color = paste0("`", col.by, "`"), shape = shape.by, # alpha = alpha.by), size = pt.size, alpha = alpha) # } # plot <- plot + guides(color = guide_legend(override.aes = list(size = 3, # alpha = 1))) + labs(color = NULL, title = col.by) + CenterTitle # if (label && !is.null(x = col.by)) { # plot <- LabelClusters(plot = plot, id = col.by, repel = repel, # size = label.size) # } # if (!is.null(x = cols)) { # if (length(x = cols) == 1 && (is.numeric(x = cols) || # cols %in% rownames(x = brewer.pal.info))) { # scale <- scale_color_brewer(palette = cols, na.value = na.value) # } # else if (length(x = cols) == 1 && (cols %in% c("alphabet", # "alphabet2", "glasbey", "polychrome", "stepped"))) { # colors <- DiscretePalette(length(unique(data[[col.by]])), # palette = cols) # scale <- scale_color_manual(values = colors, na.value = na.value) # } # else { # scale <- scale_color_manual(values = cols, na.value = na.value) # } # plot <- plot + scale # } # plot <- plot + theme_cowplot # return(plot) # } # <bytecode: 0x557cc334eea8> # <environment: namespace:Seurat> mydata <- sce @ reductions $ umap @ cell.embeddings # mydata[,3] <- as.data.frame(pbmc$celltype) mymeta <- as.data.frame (sce @ meta.data) mydata <- cbind (mydata,mymeta) head (mydata) # umap_1 umap_2 orig.ident nCount_RNA nFeature_RNA # IRI4h1_AAACCTGAGATTACCC 7.387328 -4.849914 IRI4h1 2490 564 # IRI4h1_AAACCTGAGTGTTAGA -2.336923 -12.271916 IRI4h1 514 320 # IRI4h1_AAACCTGCACCAACCG -3.402302 6.163878 IRI4h1 1171 392 # IRI4h1_AAACCTGCAGCCTGTG -3.669633 2.410121 IRI4h1 1712 530 # IRI4h1_AAACCTGCAGGGAGAG 1.870005 5.571418 IRI4h1 891 503 # IRI4h1_AAACCTGGTAGCGCTC -7.298200 -10.822622 IRI4h1 1121 521 # RNA_snn_res.0.4 seurat_clusters Celltype # IRI4h1_AAACCTGAGATTACCC 6 6 NewPT1 # IRI4h1_AAACCTGAGTGTTAGA 3 3 EC1 # IRI4h1_AAACCTGCACCAACCG 2 2 DCT # IRI4h1_AAACCTGCAGCCTGTG 1 1 MTAL # IRI4h1_AAACCTGCAGGGAGAG 10 ICA # IRI4h1_AAACCTGGTAGCGCTC 7 7 Fib 3.2 重现dimplot library (ggplot2) p <- ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + geom_point ( size = 0.15 ) p # 去掉灰色背景 p + theme_bw # 去掉网格线 library (ggthemes) p + theme_bw + theme_few # 修改坐标轴，把边框去掉一半，这下坐标轴就一样了 p + theme_bw + theme_few + theme_classic # 修改点的大小，这样主图就基本就一模一样啦：ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + geom_point ( size= 0.1 ) + theme_bw + theme_few + theme_classic | DimPlot (sce, pt.size = 0.1 ) 3.3 开始改造 3.3.1 添加置信区间 # 加一个光秃秃的置信区间 p <- ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + geom_point ( size= 0.1 ) + theme_bw + theme_few + theme_classic p p + stat_ellipse p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , alpha= 1 / 5 ) library (ggsci) mycol <- c ( pal_d3 ( 7 ), pal_aaas ( 7 ), pal_uchicago ( 7 ), pal_jama ( 7 )) p <- ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + scale_color_manual ( values = mycol) + geom_point ( size= 0.1 ) + theme_bw + theme_few + theme_classic p p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 ) + scale_fill_manual ( values = mycol) p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 ) + scale_fill_manual ( values = mycol) | p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 1 # 置信区间的透明度，=1时会将点覆盖 ) + scale_fill_manual ( values = mycol) # 修改置信区间线条类型 (p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 1 , size= 1 , alpha= 0.2 ) + scale_fill_manual ( values = mycol) | p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 3 , size= 1 , alpha= 0.2 # 置信区间的透明度，=1时会将点覆盖 ) + scale_fill_manual ( values = mycol) ) / ( p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 4 , size= 1 , alpha= 0.2 ) + scale_fill_manual ( values = mycol) | p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 5 , size= 1 , alpha= 0.2 # 置信区间的透明度，=1时会将点覆盖 ) + scale_fill_manual ( values = mycol) ) # 置信区间椭圆计算方式 # type 参数决定了椭圆的计算方式，即如何估计数据的分布形状。

可选的参数值如下：# "t"（默认值）：计算基于t分布的置信椭圆，适用于小样本（n<30）。使用Mahalanobis距离计算椭圆边界点。计算的置信区间较宽，适合数据点较少的情况 # "norm"（正态分布）：假设数据服从多元正态分布，使用协方差矩阵计算椭圆。计算方式基于 MASS::cov.trob 计算协方差，适用于大样本数据。# "euclid"（欧几里得距离）：生成基于标准偏差的等距椭圆，而非置信区间。

计算方式为：以数据均值为中心，沿x和y轴分别按照个标准差绘制椭圆。适用于标准化数据（如 PCA 结果）。

p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 , # 置信区间的透明度，=1时会将点覆盖 type= 't' ) + scale_fill_manual ( values = mycol) | p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 , # 置信区间的透明度，=1时会将点覆盖 type= 'norm' ) + normal distribution 分布 scale_fill_manual ( values = mycol) | p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 , # 置信区间的透明度，=1时会将点覆盖 type= 'euclid' ) + scale_fill_manual ( values = mycol) # 调整置信度（置信区间包含真实总体参数的概率，通常用百分比表示（如 95%、99%）） p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 , # 置信区间的透明度，=1时会将点覆盖 type= 't' , level = 0.95 ) + scale_fill_manual ( values = mycol) | p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 , # 置信区间的透明度，=1时会将点覆盖 type= 't' , level = 0.5 ) + scale_fill_manual ( values = mycol) # type=euclid时，level的值就是置信区间的半径,看看效果：p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 , # 置信区间的透明度 type= 'euclid' , level = 1 ) + scale_fill_manual ( values = mycol) | p + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 , # 置信区间的透明度 type= 'euclid' , level = 3 ) + scale_fill_manual ( values = mycol) 3.3.2 坐标轴调整 # 原图 p <- ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + scale_color_manual ( values = mycol) + geom_point ( size= 0.1 ) + theme_bw + theme_few + theme_classic + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 ) + # 置信区间的透明度，=1时会将点覆盖 scale_fill_manual ( values = mycol) p # 增加坐标轴两端箭头 p | (p + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.5 , 'cm' )))) ) # 隐藏坐标轴刻度和坐标轴文本 p | p + theme ( axis.ticks = element_blank , axis.text.y = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank ) # 箭头大小调整 p + theme ( axis.ticks = element_blank , axis.text.y = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank ) + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.1 , 'cm' )))) | p + theme ( axis.ticks = element_blank , axis.text.y = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank ) + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 1 , 'cm' )))) # 修改箭头的形状 p + theme ( axis.ticks = element_blank , axis.text.y = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank ) + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.5 , 'cm' ), type = 'open' ))) | p + theme ( axis.ticks = element_blank , axis.text.y = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank ) + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.5 , 'cm' ), type = 'closed' ))) # 缩短坐标轴长度 line.x.data <- data.frame ( x= c ( - 15 , - 10 ), y= c ( - 15 , - 15 )) line.y.data <- data.frame ( x= c ( - 15 , - 15 ), y= c ( - 15 , - 10 )) ggplot (mydata, aes ( x= umap_1, y= umap_2)) + scale_color_manual ( values = mycol) + geom_point ( data = mydata, size= 0.1 , aes ( col= Celltype)) + geom_line ( data = line.x.data, aes ( x = x, y = y), arrow = ggplot2 :: arrow ( length = unit ( 0.2 , "cm" ), type = 'closed' )) + geom_line ( data = line.y.data, aes ( x = x, y = y), arrow = ggplot2 :: arrow ( length = unit ( 0.2 , "cm" ), type = 'closed' )) + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 ) + scale_fill_manual ( values = mycol) + theme ( panel.border = element_blank , axis.title = element_blank , axis.text = element_blank , axis.ticks = element_blank , panel.background = element_rect ( fill = 'white' ), plot.background= element_rect ( fill= "white" )) 3.3.3 加上文本标签 # 生成标签的数据 my.label <- data.frame for (runcell in unique (mydata $ Celltype)) { umap_1 <- dplyr :: filter (mydata,Celltype == runcell) %>% pull (umap_1) %>% mean umap_2 <- dplyr :: filter (mydata,Celltype == runcell) %>% pull (umap_2) %>% mean my.label <- rbind (my.label, cbind (umap_1,umap_2)) } rownames (my.label) <- unique (mydata $ Celltype) my.label $ Celltype <- rownames (my.label) my.label # umap_1 umap_2 Celltype # NewPT1 6.827872 -1.2620147 NewPT1 # EC1 -2.075786 -11.0531261 EC1 # DCT -4.130859 5.7925009 DCT # MTAL -4.155011 1.1480176 MTAL # ICA 1.866516 5.4740464 ICA # Fib -6.432889 -9.7453412 Fib # PTS2 7.218748 0.8346768 PTS2 # PTS1 8.553653 2.6910735 PTS1 # Mø -2.993166 -7.0425605 Mø # PC2 -10.724453 5.2327600 PC2 # DTL-ATL -1.891181 -2.5475451 DTL-ATL # CNT -7.711035 7.0706413 CNT # 跟celltype一个颜色看不清楚 p <- ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + geom_point ( size= 0.1 ) + theme_bw + theme_few + theme_classic + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 ) p + geom_text ( aes ( x = umap_1, y = umap_2, label= Celltype), fontface= "bold" , data = my.label) # 貌似图例有点问题，我们来修改一下：p + geom_text ( aes ( x = umap_1, y = umap_2, label= Celltype), fontface= "bold" , data = my.label, show.legend = F) p + geom_text ( aes ( x = umap_1 + 1 , y = umap_2 + 1 , label= Celltype), fontface= "bold" , data = my.label, show.legend = F) # 修改geom_text字体颜色，使其更加明显 p + geom_text ( aes ( x = umap_1, y = umap_2, label= Celltype), color= 'black' , fontface= "bold" , data = my.label, show.legend = F) # 添加带有边框的文本标签 p + geom_label ( aes ( x = umap_1, y = umap_2, label= Celltype), fontface= "bold" , data = my.label, show.legend = F) # 填充距离为 0.5 行高，即标签与数据点之间会保留 0.5 行高的空白区域，避免标签与数据点过于接近 library (ggrepel) p + geom_label_repel ( aes ( x = umap_1, y = umap_2, label= Celltype), fontface= "bold" , data = my.label, point.padding= unit ( 0.5 , "lines" ), show.legend = F) my.label[, 1 : 2 ] <- my.label[, 1 : 2 ] + 1 p + geom_label_repel ( aes ( x = umap_1, y = umap_2, label= Celltype), fontface= "bold" , data = my.label, point.padding= unit ( 0.5 , "lines" ), show.legend = F) 3.3.4 分面 DimPlot (sce, split.by = 'orig.ident' ) p <- ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + scale_color_manual ( values = mycol) + geom_point ( size= 0.1 ) + theme_bw + theme_few + theme_classic + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 ) + scale_fill_manual ( values = mycol) + theme ( axis.ticks = element_blank , axis.text.y = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank ) + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.1 , 'cm' )))) p p + facet_grid ( ~ orig.ident) p + facet_grid ( orig.ident ~ . ) p + facet_grid (orig.ident ~ Celltype) p + facet_grid (. ~ orig.ident + Celltype) # 如果想要更改split后的顺序：p + facet_grid (. ~ orig.ident) levels (mydata $ orig.ident) # NULL unique (mydata $ orig.ident) # [1] "IRI4h1" "IRI4h2" "IRI4h3" "IRI12h1b1" "IRI12h1b2" "IRI12h2" # [7] "IRI12h3" mydata $ orig.ident <- factor (mydata $ orig.ident, levels = c ( "IRI4h3" , "IRI4h2" , "IRI4h1" , "IRI12h3" , "IRI12h2" , "IRI12h1b2" , "IRI12h1b1" )) levels (mydata $ orig.ident) # [1] "IRI4h3" "IRI4h2" "IRI4h1" "IRI12h3" "IRI12h2" "IRI12h1b2" # [7] "IRI12h1b1" p2 <- ggplot (mydata, aes ( x= umap_1, y= umap_2, col= Celltype)) + scale_color_manual ( values = mycol) + geom_point ( size= 0.1 ) + theme_bw + theme_few + theme_classic + stat_ellipse ( aes ( fill= Celltype), geom = "polygon" , linetype = 2 , size= 1 , alpha= 0.2 ) + scale_fill_manual ( values = mycol) + theme ( axis.ticks = element_blank , axis.text.y = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank ) + theme ( axis.line = element_line ( arrow = arrow ( length = unit ( 0.1 , 'cm' )))) (p + facet_grid (. ~ orig.ident )) / (p2 + facet_grid (. ~ orig.ident )) 4. 3D降维图与动图 4.1 数据整理 library (Seurat) set.seed ( 20230105 ) dim (sce @ reductions $ umap) dim (sce @ reductions $ tsne) DimPlot (sce) # 重新计算umap和tsne sce <- RunTSNE (sce, reduction = "pca" , dims = 1 : 20 , dim.embed= 3 ) sce <- RunUMAP (sce, reduction = "pca" , dims = 1 : 20 , n.components = 3 ) # 保存rds # saveRDS(sce,"./iri.intergrate.harmony.rename.3D.rds") 4.2 ggplot2的3d降维图 if ( ! require (gg3D))devtools :: install_github ( "AckerDWM/gg3D" ) sce <- readRDS ( "./iri.intergrate.harmony.rename.3D.rds" ) dim (sce @ reductions $ umap) # [1] 27653 3 dim (sce @ reductions $ tsne) # [1] 27653 3 tsne <- sce @ reductions $ tsne <- Embeddings ( object = sce[[ "tsne" ]]) umap <- sce @ reductions $ umap <- Embeddings ( object = sce[[ "umap" ]]) umap43d <- cbind (umap,sce @ meta.data) head (umap43d) # umap_1 umap_2 umap_3 orig.ident nCount_RNA # IRI4h1_AAACCTGAGATTACCC 3.559075 -1.740733 2.9102795 IRI4h1 2490 # IRI4h1_AAACCTGAGTGTTAGA -4.556370 -5.701739 -0.4349645 IRI4h1 514 # IRI4h1_AAACCTGCACCAACCG -3.213806 4.902372 3.5176237 IRI4h1 1171 # IRI4h1_AAACCTGCAGCCTGTG -5.149614 6.129024 -2.5434456 IRI4h1 1712 # IRI4h1_AAACCTGCAGGGAGAG -6.157923 -1.452615 -1.0119777 IRI4h1 891 # IRI4h1_AAACCTGGTAGCGCTC -3.686494 -5.441414 -4.7313709 IRI4h1 1121 # nFeature_RNA_snn_res.0.4 seurat_clusters Celltype # IRI4h1_AAACCTGAGATTACCC 564 6 6 NewPT1 # IRI4h1_AAACCTGAGTGTTAGA 320 3 3 EC1 # IRI4h1_AAACCTGCACCAACCG 392 2 2 DCT # IRI4h1_AAACCTGCAGCCTGTG 530 1 1 MTAL # IRI4h1_AAACCTGCAGGGAGAG 503 10 ICA # IRI4h1_AAACCTGGTAGCGCTC 521 7 7 Fib ggplot (umap43d, aes ( x= umap_1, y= umap_2, z= umap_3, color= Celltype)) + theme ( axis.ticks = element_blank , axis.text = element_blank ) + axes_3D ( theta= 0 , phi= 90 ) + 3D 图形中添加坐标轴 stat_3D ( theta= 0 , phi= 90 , size = 0.1 ) + 3D 散点（或者统计变换的 3D 点） labs_3D ( theta= 0 , phi= 90 , 3D 图形的坐标轴添加标签 hjust= c ( 1 , 0 , 0 ), vjust= c ( 1.5 , 1 , - . 2 ), labs= c ( "UMAP_1" , "UMAP_2" , "UMAP_3" )) + theme_void # 修改配色 ggplot (umap43d, aes ( x= umap_1, y= umap_2, z= umap_3, color= Celltype)) + theme ( axis.ticks = element_blank , axis.text = element_blank ) + scale_color_manual ( values = mycol) + axes_3D ( theta= 0 , phi= 90 ) + stat_3D ( theta= 0 , phi= 90 , size = 0.1 ) + labs_3D ( theta= 0 , phi= 90 , hjust= c ( 1 , 0 , 0 ), vjust= c ( 1.5 , 1 , - . 2 ), labs= c ( "UMAP_1" , "UMAP_2" , "UMAP_3" )) + theme_void 4.3 scatterplot3d library (scatterplot3d) # col 控制点的颜色 umap43d $ celltype.col <- umap43d $ Celltype head (umap43d) # umap_1 umap_2 umap_3 orig.ident nCount_RNA # IRI4h1_AAACCTGAGATTACCC 3.559075 -1.740733 2.9102795 IRI4h1 2490 # IRI4h1_AAACCTGAGTGTTAGA -4.556370 -5.701739 -0.4349645 IRI4h1 514 # IRI4h1_AAACCTGCACCAACCG -3.213806 4.902372 3.5176237 IRI4h1 1171 # IRI4h1_AAACCTGCAGCCTGTG -5.149614 6.129024 -2.5434456 IRI4h1 1712 # IRI4h1_AAACCTGCAGGGAGAG -6.157923 -1.452615 -1.0119777 IRI4h1 891 # IRI4h1_AAACCTGGTAGCGCTC -3.686494 -5.441414 -4.7313709 IRI4h1 1121 # nFeature_RNA_snn_res.0.4 seurat_clusters Celltype # IRI4h1_AAACCTGAGATTACCC 564 6 6 NewPT1 # IRI4h1_AAACCTGAGTGTTAGA 320 3 3 EC1 # IRI4h1_AAACCTGCACCAACCG 392 2 2 DCT # IRI4h1_AAACCTGCAGCCTGTG 530 1 1 MTAL # IRI4h1_AAACCTGCAGGGAGAG 503 10 ICA # IRI4h1_AAACCTGGTAGCGCTC 521 7 7 Fib # celltype.col # IRI4h1_AAACCTGAGATTACCC NewPT1 # IRI4h1_AAACCTGAGTGTTAGA EC1 # IRI4h1_AAACCTGCACCAACCG DCT # IRI4h1_AAACCTGCAGCCTGTG MTAL # IRI4h1_AAACCTGCAGGGAGAG ICA # IRI4h1_AAACCTGGTAGCGCTC Fib for (i in 1 : 12 ) { umap43d $ celltype.col <- gsub ( unique (umap43d $ celltype.col)[i],mycol[i],umap43d $ celltype.col) } head (umap43d) # umap_1 umap_2 umap_3 orig.ident nCount_RNA # IRI4h1_AAACCTGAGATTACCC 3.559075 -1.740733 2.9102795 IRI4h1 2490 # IRI4h1_AAACCTGAGTGTTAGA -4.556370 -5.701739 -0.4349645 IRI4h1 514 # IRI4h1_AAACCTGCACCAACCG -3.213806 4.902372 3.5176237 IRI4h1 1171 # IRI4h1_AAACCTGCAGCCTGTG -5.149614 6.129024 -2.5434456 IRI4h1 1712 # IRI4h1_AAACCTGCAGGGAGAG -6.157923 -1.452615 -1.0119777 IRI4h1 891 # IRI4h1_AAACCTGGTAGCGCTC -3.686494 -5.441414 -4.7313709 IRI4h1 1121 # nFeature_RNA_snn_res.0.4 seurat_clusters Celltype # IRI4h1_AAACCTGAGATTACCC 564 6 6 NewPT1 # IRI4h1_AAACCTGAGTGTTAGA 320 3 3 EC1 # IRI4h1_AAACCTGCACCAACCG 392 2 2 DCT # IRI4h1_AAACCTGCAGCCTGTG 530 1 1 MTAL # IRI4h1_AAACCTGCAGGGAGAG 503 10 ICA # IRI4h1_AAACCTGGTAGCGCTC 521 7 7 Fib # celltype.col # IRI4h1_AAACCTGAGATTACCC # IRI4h1_AAACCTGAGTGTTAGA # IRI4h1_AAACCTGCACCAACCG # IRI4h1_AAACCTGCAGCCTGTG # IRI4h1_AAACCTGCAGGGAGAG # IRI4h1_AAACCTGGTAGCGCTC p1 <- scatterplot3d (umap43d[, 1 : 3 ], color = umap43d $ celltype.col, main= "scatterplot3d.umap" , pch = 16 , cex.symbols = 0.2 ) # 形状为实心圆，点的大小是默认的0.2倍 legend (p1 $ xyz.convert ( 8.5 , 2.5 , 5 ), legend = unique (umap43d $ Celltype), col = unique (umap43d $ celltype.col), pch = 16 ) # legend形状为实心圆 # pch 控制点的形状 umap43d $ celltype.pch <- umap43d $ Celltype for (i in 1 : 12 ) { umap43d $ celltype.pch <- gsub ( unique (umap43d $ celltype.pch)[i],( 1 : 12 )[i],umap43d $ celltype.pch) } head (umap43d) # umap_1 umap_2 umap_3 orig.ident nCount_RNA # IRI4h1_AAACCTGAGATTACCC 3.559075 -1.740733 2.9102795 IRI4h1 2490 # IRI4h1_AAACCTGAGTGTTAGA -4.556370 -5.701739 -0.4349645 IRI4h1 514 # IRI4h1_AAACCTGCACCAACCG -3.213806 4.902372 3.5176237 IRI4h1 1171 # IRI4h1_AAACCTGCAGCCTGTG -5.149614 6.129024 -2.5434456 IRI4h1 1712 # IRI4h1_AAACCTGCAGGGAGAG -6.157923 -1.452615 -1.0119777 IRI4h1 891 # IRI4h1_AAACCTGGTAGCGCTC -3.686494 -5.441414 -4.7313709 IRI4h1 1121 # nFeature_RNA_snn_res.0.4 seurat_clusters Celltype # IRI4h1_AAACCTGAGATTACCC 564 6 6 NewPT1 # IRI4h1_AAACCTGAGTGTTAGA 320 3 3 EC1 # IRI4h1_AAACCTGCACCAACCG 392 2 2 DCT # IRI4h1_AAACCTGCAGCCTGTG 530 1 1 MTAL # IRI4h1_AAACCTGCAGGGAGAG 503 10 ICA # IRI4h1_AAACCTGGTAGCGCTC 521 7 7 Fib # celltype.col celltype.pch # IRI4h1_AAACCTGAGATTACCC 1 # IRI4h1_AAACCTGAGTGTTAGA 2 # IRI4h1_AAACCTGCACCAACCG 3 # IRI4h1_AAACCTGCAGCCTGTG 4 # IRI4h1_AAACCTGCAGGGAGAG 5 # IRI4h1_AAACCTGGTAGCGCTC 6 p2 <- scatterplot3d (umap43d[, 1 : 3 ], color = umap43d $ celltype.col, main= "scatterplot3d.umap" , pch = as.numeric (umap43d $ celltype.pch), cex.symbols = 0.4 ) legend (p2 $ xyz.convert ( 8.5 , 2.5 , 5 ), legend = unique (umap43d $ Celltype), col = unique (umap43d $ celltype.col), pch = as.numeric (umap43d $ celltype.pch)) 4.4 plotly的3d降维图 if ( ! require (plotly)) install.packages (plotly) tsne <- as.data.frame (tsne) tsne .3 d <- plot_ly (tsne, x = ~ tSNE_1, y = ~ tSNE_2, z = ~ tSNE_3, color = sce $ Celltype, colors = mycol, marker = list ( size = 1 )) %>% # 这里的 size 控制散点的大小 layout ( legend = list ( itemsizing = "constant" )) tsne .3 d umap <- as.data.frame (umap) umap .3 d <- plot_ly (umap, x = ~ umap_1, y = ~ umap_2, z = ~ umap_3, color = sce $ Celltype, colors = mycol, size= 2 , marker = list ( size = 1 )) %>% # 这里的 size 控制散点的大小 layout ( legend = list ( itemsizing = "constant" )) umap .3 d 5. 一些造好的轮子 # 1 plot1cell # 更多功能可以看这里：https://github.com/HaojiaWu/plot1cell if ( ! require (devtools)) install.packages ( 'devtools' ) if ( ! require (biomaRt)) install.packages ( 'biomaRt' ) if ( ! require (XML)) install.packages ( 'XML' ) if ( ! require (biomaRt))devtools :: install_github ( 'alyst/biomaRt' , dependencies = T) if ( ! require (GenomeInfoDb)) install.packages ( 'GenomeInfoDb' ) if ( ! require ( "BiocManager" , quietly = TRUE )) install.packages ( "BiocManager" ) if ( ! require (EnsDb.Hsapiens.v86))BiocManager :: install ( 'EnsDb.Hsapiens.v86' ) if ( ! require (GEOquery))BiocManager :: install ( 'GEOquery' ) if ( ! require (simplifyEnrichment))BiocManager :: install ( 'simplifyEnrichment' ) if ( ! require (ComplexHeatmap))BiocManager :: install ( 'ComplexHeatmap' ) if ( ! require (DoubletFinder))devtools :: install_github ( "chris-mcginnis-ucsf/DoubletFinder" ) if ( ! require (loomR))devtools :: install_github ( "mojaveazure/loomR" ) if ( ! require (hdf5r))devtools :: install_github ( "Novartis/hdf5r" ) if ( ! require (plot1cell)) devtools :: install_github ( "TheHumphreysLab/plot1cell" ) sce <- readRDS ( "./iri.intergrate.harmony.rename.rds" ) # 选取部分细胞用于后续分析 sel <- c ( "PTS1" , "PTS2" , "NewPT1" , "DTL-ATL" , "MTAL" , "DCT" , "CNT" , "PC2" , "ICA" , "EC1" , "Fib" , "Mø" ) sce <- subset (sce, Celltype %in% sel) # 指定因子展示顺序 sce @ meta.data $ Celltype <- factor (sce @ meta.data $ Celltype, levels = sel) Idents (sce) <- factor ( Idents (sce), levels = sel) circ_data <- prepare_circlize_data (sce, scale = 0.8 ) set.seed ( 1234 ) cluster_colors <- rand_color ( length ( levels (sce))) seurat_clusters_colors <- rand_color ( length ( names ( table (sce $ seurat_clusters)))) rep_colors <- rand_color ( length ( names ( table (sce $ orig.ident)))) plot_circlize (circ_data, do.label = T, pt.size = 0.01 , col.use = cluster_colors , bg.color = 'white' , kde2d.n = 200 , repel = T, label.cex = 0.6 ) add_track (circ_data, group = "seurat_clusters" , colors = seurat_clusters_colors, track_num = 2 ) # 由内向外数 add_track (circ_data, group = "orig.ident" , colors = rep_colors, track_num = 3 ) if ( ! require (scRNAtoolVis))devtools :: install_github ( "junjunlab/scRNAtoolVis" ) test <- system.file ( "extdata" , "seuratTest.RDS" , package = "scRNAtoolVis" ) # tmp <- readRDS(test) # 最简单的绘图 clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = T, clusterCol= "Celltype" ) # 改变箭头形状 clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = T, arrowType = 'open' , clusterCol= "Celltype" ) clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = F, groupFacet = "Celltype" , aspect.ratio = 1 , relLength = 0.5 , clusterCol= "Celltype" , show.legend= F) clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = F, groupFacet = "Celltype" , aspect.ratio = 1 , relLength = 0.5 , clusterCol= "Celltype" , show.legend= F, axes = "one" ) clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = F, groupFacet = "Celltype" , aspect.ratio = 1 , relLength = 0.5 , lineTextcol = 'grey50' , show.legend = F, clusterCol= "Celltype" ) clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = T, keySize = 8 , clusterCol= "Celltype" ) clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = T, cellLabel = T, cellLabelSize = 4 , clusterCol= "Celltype" ) clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , groupFacet = 'orig.ident' , noSplit = F, cellLabel = T, cellLabelSize = 3 , show.legend = F, aspect.ratio = 1 , themebg = 'bwCorner' , clusterCol= "Celltype" ) clusterCornerAxes ( object = sce, reduction = 'umap' , pSize= 0.1 , noSplit = T, cornerTextSize = 3.5 , themebg = 'bwCorner' , addCircle = TRUE , cicAlpha = 0.2 , nbin = 200 , cicDelta= 0.1 , clusterCol= "Celltype" )