第 13/16 章

第十二讲:改造单细胞图形-FeaturePlot

1. 准备工作 library (Seurat) library (dplyr) library (patchwork) # 读入第三讲-单样本分析保存的pbmc.rds并绘制FeaturePlot scRNA <- readRDS ( '../result_seuratV5/pbmc.rds' ) DimPlot (scRNA) scRNA.markers <- FindAllMarkers (scRNA, only.pos = TRUE , min.pct = 0.25 , logfc.threshold = 0.25 ) # 选取每个亚群top3基因 top3gene <- scRNA.markers %>% group_by (cluster) %>% top_n ( n = 3 , wt = avg_log2FC) top3gene # # A tibble: 27 × 7 # # Groups: cluster [9] # p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene # <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> # 1 1.17e- 83 2.37 0.435 0.108 1.60e- 79 Naive CD4 T CCR7 # 2 3.28e- 49 2.10 0.333 0.103 4.50e- 45 Naive CD4 T LEF1 # 3 9.31e- 44 2.02 0.328 0.11 1.28e- 39 Naive CD4 T PRKCQ-AS1 # 4 0 6.64 0.975 0.121 0 CD14+ Mono S100A8 # 5 3.10e-139 7.28 0.3 0.004 4.25e-135 CD14+ Mono FOLR3 # 6 1.63e-121 6.74 0.277 0.006 2.23e-117 CD14+ Mono S100A12 # 7 5.53e- 61 1.65 0.657 0.245 7.58e- 57 Memory CD4 T CD2 # 8 2.61e- 59 2.11 0.424 0.111 3.58e- 55 Memory CD4 T AQP3 # 9 1.94e- 35 1.90 0.267 0.069 2.66e- 31 Memory CD4 T CD40LG # 10 0 6.91 0.936 0.041 0 B CD79A # # ℹ 17 more rows 2. 调整FeaturePlot参数 # 初始FeaturePlot,CD79A为B细胞标记基因 FeaturePlot (scRNA, features = 'CD79A' , label = T) # 和原先DimPLot结果对比 DimPlot (scRNA) | FeaturePlot (scRNA, features = 'CD79A' , label = T) # 绘制多个基因 FeaturePlot (scRNA, c ( 'CD79A' , 'CD8A' ), blend = F) # FeaturePlot绘制两个基因共表达 # blend = T启动颜色混合模式,这种情况只能绘制两个基因 FeaturePlot (scRNA, c ( 'CD79A' , 'CD8A' ), blend = T) # 绘制两个以上基因会出现以下提示 # try({FeaturePlot(scRNA,c('CD79A','CD8A','CCR7'),blend = T)}) # Error in FeaturePlot(scRNA, c("CD79A", "CD8A", "CCR7"), blend = T) : # Blending feature plots only works with two features 3. ggplot2直接中的DIY 3.1 数据整理 library (dplyr) library (ggplot2) library (ggthemes) library (ggnewscale) mydata <- scRNA @ reductions $ umap @ cell.embeddings head (mydata) # umap_1 umap_2 # AAACATACAACCAC-1 -3.304555 -3.914703 # AAACATTGAGCTAC-1 -5.620712 10.682330 # AAACATTGATCAGC-1 -5.630144 -6.400627 # AAACCGTGCTTCCG-1 9.145644 5.535673 # AAACCGTGTATGCG-1 -5.354362 2.053312 # AAACGCACTGGTAC-1 -3.218432 -6.085276 myexpr <- as.data.frame ( GetAssayData (scRNA, layer = "data" ))[ c ( 'CD79A' , 'CD8A' , 'CCR7' ),] %>% t head (myexpr) # CD79A CD8A CCR7 # AAACATACAACCAC-1 0.000000 1.635873 1.635873 # AAACATTGAGCTAC-1 1.962726 0.000000 0.000000 # AAACATTGATCAGC-1 0.000000 0.000000 0.000000 # AAACCGTGCTTCCG-1 0.000000 0.000000 0.000000 # AAACCGTGTATGCG-1 0.000000 0.000000 0.000000 # AAACGCACTGGTAC-1 0.000000 0.000000 0.000000 mydata <- cbind (mydata,myexpr) %>% as.data.frame mydata $ celltype <- Idents (scRNA) head (mydata) # umap_1 umap_2 CD79A CD8A CCR7 celltype # AAACATACAACCAC-1 -3.304555 -3.914703 0.000000 1.635873 1.635873 Memory CD4 T # AAACATTGAGCTAC-1 -5.620712 10.682330 1.962726 0.000000 0.000000 B # AAACATTGATCAGC-1 -5.630144 -6.400627 0.000000 0.000000 0.000000 Memory CD4 T # AAACCGTGCTTCCG-1 9.145644 5.535673 0.000000 0.000000 0.000000 CD14+ Mono # AAACCGTGTATGCG-1 -5.354362 2.053312 0.000000 0.000000 0.000000 NK # AAACGCACTGGTAC-1 -3.218432 -6.085276 0.000000 0.000000 0.000000 Memory CD4 T 3.2 重现FeaturePlot # 绘制一个基因 p <- ggplot (mydata, aes ( x= umap_1, y= umap_2)) + geom_point ( data = mydata, aes ( x= umap_1, y= umap_2, color= CD79A), size= 1 ) + scale_color_gradient ( 'CD8A' , low = alpha ( 'grey' , 0.1 ), high = alpha ( ' , 1 )) p # 添加置信区间 p + stat_density2d ( aes ( colour= CD8A)) # 同时绘制三个基因 # 一般情况下,ggplot2默认只能有一个颜色映射,如果后续 geom_* 图层再使用 aes(color = ...),它会复用前面的颜色映射。

# new_scale("color") 是ggnewscale包中的函数,用于在同一张ggplot2图中应用多个颜色映射。# 例如这里我们需要绘制三个基因,scale_color_gradient已经对CD79A进行了颜色映射,颜色从灰色到紫色。

后续还想对其他基因表达值提供新的颜色映射,new_scale("color") 允许你在后续图层中重新定义 aes(color = ...),并应用新的 scale_color_* 规则 p <- ggplot (mydata, aes ( x= umap_1, y= umap_2)) + geom_point ( data = mydata, aes ( x= umap_1, y= umap_2, color= CD79A), size= 1 ) + scale_color_gradient ( 'CD79A' , low = alpha ( 'grey' , 0.1 ), high = alpha ( ' , 1 )) + new_scale ( 'color' ) + geom_point ( data = mydata, aes ( x= umap_1, y= umap_2, color= CD8A), size= 1 ) + scale_color_gradient ( 'CD8A' , low = alpha ( 'grey' , 0.1 ), high = alpha ( 'red' , 1 )) + new_scale ( 'color' ) + geom_point ( data = mydata, aes ( x= umap_1, y= umap_2, color= CCR7), size= 1 ) + scale_color_gradient ( 'CCR7' , low = alpha ( 'grey' , 0.1 ), high = alpha ( ' , 1 )) p # 更改背景 p <- p + theme_bw + # 删除灰色背景 theme_few + # 删除网格线 theme_classic # 删除上方和右侧的边框线 p # 添加置信区间 p <- p + stat_ellipse ( aes ( fill= celltype)) p # 整理label文件 # 计算每种细胞类型(celltype)在UMAP 空间(umap_1, umap_2)上的平均坐标,并存入my.label这个数据框,以便后续用于UMAP细胞类型标注 my.label <- data.frame for (runcell in unique (mydata $ celltype)) { umap_1 <- dplyr :: filter (mydata,celltype == runcell) %>% pull (umap_1) %>% mean umap_2 <- dplyr :: filter (mydata,celltype == runcell) %>% pull (umap_2) %>% mean my.label <- rbind (my.label, cbind (umap_1,umap_2)) } rownames (my.label) <- unique (mydata $ celltype) my.label $ celltype <- rownames (my.label) my.label # umap_1 umap_2 celltype # Memory CD4 T -4.203053 -5.8672066 Memory CD4 T # B -4.050225 11.3856252 B # CD14+ Mono 9.963179 3.5813161 CD14+ Mono # NK -6.006065 1.7420971 NK # CD8 T -4.693666 -1.3874622 CD8 T # Naive CD4 T -1.245441 -5.6826215 Naive CD4 T # FCGR3A+ Mono 8.431078 7.3155198 FCGR3A+ Mono # DC 6.634143 3.7099839 DC # Platelet 5.470041 0.4560609 Platelet # 添加label p <- p + geom_text ( aes ( x = umap_1, y = umap_2, label= celltype), color= 'black' , fontface= "bold" , data = my.label, show.legend = F) p # 分面 p + facet_grid ( ~ celltype)

← 上一章 下一章 →