本期测试的数据来源于数据集 GSE139107。
前期的数据处理可以参考我们之前推送的DimPlot优化内容改造单细胞降维图|1.DimPlot的探索 1. 准备工作 library (plotrix) library (dplyr) library (ggsci) library (Seurat) sce <- readRDS ( "iri.intergrate.harmony.rds" ) # 根据推文中所有样本的整合后的rds中已经包含了注释后的细胞名称,我们可以根据相同的barcode将新的rds中也修改成和原来一样的细胞名称 sce_origin <- readRDS ( "iri.intergrate.rds" ) sce @ meta.data $ Celltype <- sce_origin @ meta.data[ colnames (sce), "celltype" ] sce @ meta.data $ Celltype <- factor (sce @ meta.data $ Celltype, levels= levels (sce_origin)) Idents (sce) <- "Celltype" # 保存重新命名之后的rds # saveRDS(sce,"./iri.intergrate.harmony.rename.rds") library (plotrix) library (dplyr) library (ggsci) library (Seurat) # 读取添加了注释后细胞标签的rds sce <- readRDS ( "./iri.intergrate.harmony.rename.rds" ) # 选取部分细胞用于后续分析 sel <- c ( "PTS1" , "PTS2" , "NewPT1" , "MTAL" , "DCT" , "EC1" ) sce <- subset (sce, Celltype %in% sel) # 指定因子展示顺序 sce @ meta.data $ Celltype <- factor (sce @ meta.data $ Celltype, levels = sel) Idents (sce) <- factor ( Idents (sce), levels = sel) head (sce @ meta.data) # orig.ident nCount_RNA nFeature_RNA_snn_res.0.4 # IRI4h1_AAACCTGAGATTACCC IRI4h1 2490 564 6 # IRI4h1_AAACCTGAGTGTTAGA IRI4h1 514 320 3 # IRI4h1_AAACCTGCACCAACCG IRI4h1 1171 392 2 # IRI4h1_AAACCTGCAGCCTGTG IRI4h1 1712 530 1 # IRI4h1_AAACCTGGTGATGTGG IRI4h1 3062 696 6 # IRI4h1_AAACCTGGTTACGTCA IRI4h1 1996 556 0 # seurat_clusters Celltype # IRI4h1_AAACCTGAGATTACCC 6 NewPT1 # IRI4h1_AAACCTGAGTGTTAGA 3 EC1 # IRI4h1_AAACCTGCACCAACCG 2 DCT # IRI4h1_AAACCTGCAGCCTGTG 1 MTAL # IRI4h1_AAACCTGGTGATGTGG 6 NewPT1 # IRI4h1_AAACCTGGTTACGTCA 0 PTS2 unique (sce $ orig.ident) # [1] "IRI4h1" "IRI4h2" "IRI4h3" "IRI12h1b1" "IRI12h1b2" "IRI12h2" # [7] "IRI12h3" unique (sce $ Celltype) # [1] NewPT1 EC1 DCT MTAL PTS2 PTS1 # Levels: PTS1 PTS2 NewPT1 MTAL DCT EC1 table (sce $ orig.ident) # # IRI12h1b1 IRI12h1b2 IRI12h2 IRI12h3 IRI4h1 IRI4h2 IRI4h3 # 3286 3505 2780 2670 4804 143446 table (sce $ Celltype) # # PTS1 PTS2 NewPT1 MTAL DCT EC1 # 2646 4134 5305 4609 2322 2909 2. 饼图 # 提取细胞标签 mynames <- table (sce $ Celltype) %>% names mynames # [1] "PTS1" "PTS2" "NewPT1" "MTAL" "DCT" "EC1" # 提取细胞数目 myratio <- table (sce $ Celltype) %>% as.numeric myratio # [1] 2646 4134 5305 4609 2322 2909 # 计算细胞占比,每个细胞数目/总细胞数目 pielabel <- paste0 (mynames, " (" , round (myratio / sum (myratio) * 100 , 2 ), "%)" ) pielabel # [1] "PTS1 (12.07%)" "PTS2 (18.86%)" "NewPT1 (24.2%)" "MTAL (21.02%)" # [5] "DCT (10.59%)" "EC1 (13.27%)" cols <- pal_npg ( "nrc" )( 10 ) cols # [1] " " " " " " # [7] " " " " pie (myratio, labels= pielabel, radius = 1.0 , 参数用于控制饼图的半径大小,表示相对于图形区域的比例:默认值 radius = 1,即饼图占据完整的绘图区域(不会超出,但可能贴近边界)。
radius < 1 会缩小饼图,使其不占满整个绘图区域。
radius > 1 会扩大饼图,可能导致部分图形超出绘图区域并被截断 clockwise= F, 参数用于控制扇形区域的绘制方向,clockwise=F逆时针 main = "Celltype" , col = cols) # 绘制 3D 图,family 要设置你系统支持的中文字体库 pie3D (myratio, labels = pielabel, explode = 0.1 , 0 ~ 1 之间) main = "Cell Proption" , height = 0.3 , 3D 饼图的厚度,数值越大,立体效果越明显 labelcex = 1 ) 1,值越大字体越大 3. 甜甜圈图 # 并没有直接画甜甜圈图的R包,所以在饼图源代码的基础上改改 doughnut <- function (x, labels = names (x), edges = 200 , outer.radius = 0.8 , inner.radius= 0.6 , clockwise = FALSE , init.angle = if (clockwise) 90 else 0 , density = NULL , angle = 45 , col = NULL , border = FALSE , lty = NULL , main = NULL , ...) { if ( ! is.numeric (x) || any ( is.na (x) | x < 0 )) stop ( "'x' values must be positive." ) if ( is.null (labels)) labels <- as.character ( seq_along (x)) else labels <- as.graphicsAnnot (labels) x <- c ( 0 , cumsum (x) / sum (x)) dx <- diff (x) nx <- length (dx) plot.new pin <- par ( "pin" ) xlim <- ylim <- c ( - 1 , 1 ) if (pin[ 1 L ] > pin[ 2 L ]) xlim <- (pin[ 1 L ] / pin[ 2 L ]) * xlim else ylim <- (pin[ 2 L ] / pin[ 1 L ]) * ylim plot.window (xlim, ylim, "" , asp = 1 ) if ( is.null (col)) col <- if ( is.null (density)) palette else par ( "fg" ) col <- rep (col, length.out = nx) border <- rep (border, length.out = nx) lty <- rep (lty, length.out = nx) angle <- rep (angle, length.out = nx) density <- rep (density, length.out = nx) twopi <- if (clockwise) - 2 * pi else 2 * pi t2xy <- function (t, radius) { t2p <- twopi * t + init.angle * pi / 180 list ( x = radius * cos (t2p), y = radius * sin (t2p)) } for (i in 1 L : nx) { n <- max ( 2 , floor (edges * dx[i])) P <- t2xy ( seq.int (x[i], x[i + 1 ], length.out = n), outer.radius) polygon ( c (P $ x, 0 ), c (P $ y, 0 ), density = density[i], angle = angle[i], border = border[i], col = col[i], lty = lty[i]) Pout <- t2xy ( mean (x[i + 0 : 1 ]), outer.radius) lab <- as.character (labels[i]) if ( ! is.na (lab) && nzchar (lab)) { lines ( c ( 1 , 1.05 ) * Pout $ x, c ( 1 , 1.05 ) * Pout $ y) text ( 1.1 * Pout $ x, 1.1 * Pout $ y, labels[i], xpd = TRUE , adj = ifelse (Pout $ x < 0 , 1 , 0 ), ...) } Pin <- t2xy ( seq.int ( 0 , 1 , length.out = n * nx), inner.radius) polygon (Pin $ x, Pin $ y, density = density[i], angle = angle[i], border = border[i], col = "white" , lty = lty[i]) } title ( main = main, ...) invisible ( NULL ) } # 绘图 df <- table (sce $ Celltype) %>% as.data.frame df # Var1 Freq # 1 PTS1 2646 # 2 PTS2 4134 # 3 NewPT1 5305 # 4 MTAL 4609 # 5 DCT 2322 # 6 EC1 2909 labs <- paste0 (df $ Var1, " (" , round (df $ Freq / sum (df $ Freq) * 100 , 2 ), "%)" ) labs # [1] "PTS1 (12.07%)" "PTS2 (18.86%)" "NewPT1 (24.2%)" "MTAL (21.02%)" # [5] "DCT (10.59%)" "EC1 (13.27%)" doughnut ( df $ Freq, labels= labs, init.angle= 0 , # 设置初始角度 col = cols , # 设置颜色 border= "white" , # 边框颜色 inner.radius= 0.4 , # 内环大小 cex = 1 ) # 字体大小 4. 堆积柱状图 library (ggplot2) cellnum <- table (sce $ Celltype,sce $ orig.ident) cell.prop <- as.data.frame ( prop.table (cellnum)) colnames (cell.prop) <- c ( "Celltype" , "Group" , "Proportion" ) cell.prop # Celltype Group Proportion # 1 PTS1 IRI12h1b1 0.014139111 # 2 PTS2 IRI12h1b1 0.020250855 # 3 NewPT1 IRI12h1b1 0.035393387 # 4 MTAL IRI12h1b1 0.038677309 # 5 DCT IRI12h1b1 0.016009122 # 6 EC1 IRI12h1b1 0.025404789 # 7 PTS1 IRI12h1b2 0.016693273 # 8 PTS2 IRI12h1b2 0.021026226 # 9 NewPT1 IRI12h1b2 0.041778791 # 10 MTAL IRI12h1b2 0.039452680 # 11 DCT IRI12h1b2 0.016966933 # 12 EC1 IRI12h1b2 0.023945268 # 13 PTS1 IRI12h2 0.012862030 # 14 PTS2 IRI12h2 0.022576967 # 15 NewPT1 IRI12h2 0.025906499 # 16 MTAL IRI12h2 0.029509692 # 17 DCT IRI12h2 0.013500570 # 18 EC1 IRI12h2 0.022440137 # 19 PTS1 IRI12h3 0.014367161 # 20 PTS2 IRI12h3 0.020114025 # 21 NewPT1 IRI12h3 0.023854048 # 22 MTAL IRI12h3 0.030239453 # 23 DCT IRI12h3 0.011904219 # 24 EC1 IRI12h3 0.021299886 # 25 PTS1 IRI4h1 0.031790194 # 26 PTS2 IRI4h1 0.056647662 # 27 NewPT1 IRI4h1 0.054640821 # 28 MTAL IRI4h1 0.033112885 # 29 DCT IRI4h1 0.023397948 # 30 EC1 IRI4h1 0.019521095 # 31 PTS1 IRI4h2 0.003466363 # 32 PTS2 IRI4h2 0.007616876 # 33 NewPT1 IRI4h2 0.028734322 # 34 MTAL IRI4h2 0.009897377 # 35 DCT IRI4h2 0.006066135 # 36 EC1 IRI4h2 0.009623717 # 37 PTS1 IRI4h3 0.027366021 # 38 PTS2 IRI4h3 0.040319270 # 39 NewPT1 IRI4h3 0.031653364 # 40 MTAL IRI4h3 0.029327252 # 41 DCT IRI4h3 0.018061574 # 42 EC1 IRI4h3 0.010444698 p.bar <- ggplot (cell.prop, aes (Group,Proportion, fill= Celltype)) + geom_bar ( stat= "identity" , position= "fill" ) + scale_fill_manual ( values= cols) + ggtitle ( "cell portation" ) + theme_bw + theme ( axis.ticks.length= unit ( 0.5 , 'cm' )) + guides ( fill= guide_legend ( title= NULL )) p.bar 5. 箱线图 cellnum <- table (sce $ Celltype,sce $ orig.ident) cellnum # # IRI12h1b1 IRI12h1b2 IRI12h2 IRI12h3 IRI4h1 IRI4h2 IRI4h3 # PTS1 310 366 282 315 697 76 600 # PTS2 444 461 495 441 1242 167 884 # NewPT1 776 916 568 523 1198 630 694 # MTAL 848 865 647 663 726 217 643 # DCT 351 372 296 261 5133 396 # EC1 557 525 492 467 428 211 229 for (i in 1 : ncol (cellnum)) { cellnum[,i] <- cellnum[,i] / sum (cellnum[,i]) } cellnum # # IRI12h1b1 IRI12h1b2 IRI12h2 IRI12h3 IRI4h1 IRI4h2 # PTS1 0.09433962 0.10442225 0.10143885 0.11797753 0.14508743 0.05299861 # PTS2 0.13511869 0.13152639 0.17805755 0.16516854 0.25853455 0.11645746 # NewPT1 0.23615338 0.26134094 0.20431655 0.19588015 0.24937552 0.43933054 # MTAL 0.25806452 0.24679030 0.23273381 0.24831461 0.15112406 0.15132497 # DCT 0.10681680 0.10613409 0.10647482 0.09775281 0.10678601 0.09274756 # EC1 0.16950700 0.14978602 0.17697842 0.17490637 0.08909242 0.14714086 # # IRI4h3 # PTS1 0.17411492 # PTS2 0.25652931 # NewPT1 0.20139292 # MTAL 0.18659315 # DCT 0.11491584 # EC1 0.06645386 cellnum <- as.data.frame (cellnum) cellnum # Var1 Var2 Freq # 1 PTS1 IRI12h1b1 0.09433962 # 2 PTS2 IRI12h1b1 0.13511869 # 3 NewPT1 IRI12h1b1 0.23615338 # 4 MTAL IRI12h1b1 0.25806452 # 5 DCT IRI12h1b1 0.10681680 # 6 EC1 IRI12h1b1 0.16950700 # 7 PTS1 IRI12h1b2 0.10442225 # 8 PTS2 IRI12h1b2 0.13152639 # 9 NewPT1 IRI12h1b2 0.26134094 # 10 MTAL IRI12h1b2 0.24679030 # 11 DCT IRI12h1b2 0.10613409 # 12 EC1 IRI12h1b2 0.14978602 # 13 PTS1 IRI12h2 0.10143885 # 14 PTS2 IRI12h2 0.17805755 # 15 NewPT1 IRI12h2 0.20431655 # 16 MTAL IRI12h2 0.23273381 # 17 DCT IRI12h2 0.10647482 # 18 EC1 IRI12h2 0.17697842 # 19 PTS1 IRI12h3 0.11797753 # 20 PTS2 IRI12h3 0.16516854 # 21 NewPT1 IRI12h3 0.19588015 # 22 MTAL IRI12h3 0.24831461 # 23 DCT IRI12h3 0.09775281 # 24 EC1 IRI12h3 0.17490637 # 25 PTS1 IRI4h1 0.14508743 # 26 PTS2 IRI4h1 0.25853455 # 27 NewPT1 IRI4h1 0.24937552 # 28 MTAL IRI4h1 0.15112406 # 29 DCT IRI4h1 0.10678601 # 30 EC1 IRI4h1 0.08909242 # 31 PTS1 IRI4h2 0.05299861 # 32 PTS2 IRI4h2 0.11645746 # 33 NewPT1 IRI4h2 0.43933054 # 34 MTAL IRI4h2 0.15132497 # 35 DCT IRI4h2 0.09274756 # 36 EC1 IRI4h2 0.14714086 # 37 PTS1 IRI4h3 0.17411492 # 38 PTS2 IRI4h3 0.25652931 # 39 NewPT1 IRI4h3 0.20139292 # 40 MTAL IRI4h3 0.18659315 # 41 DCT IRI4h3 0.11491584 # 42 EC1 IRI4h3 0.06645386 # 计算关注的组间显著性 library (reshape2) library (ggplot2) library (ggsci) library (ggsignif) library (ggpubr) colnames (cellnum) <- c ( 'Celltype' , 'Sample' , 'Freq' ) cellnum $ Group <- c ( rep ( "IRI12h" , times= 24 ), rep ( "IRI4h" , times= 18 )) cellnum # Celltype Sample Freq Group # 1 PTS1 IRI12h1b1 0.09433962 IRI12h # 2 PTS2 IRI12h1b1 0.13511869 IRI12h # 3 NewPT1 IRI12h1b1 0.23615338 IRI12h # 4 MTAL IRI12h1b1 0.25806452 IRI12h # 5 DCT IRI12h1b1 0.10681680 IRI12h # 6 EC1 IRI12h1b1 0.16950700 IRI12h # 7 PTS1 IRI12h1b2 0.10442225 IRI12h # 8 PTS2 IRI12h1b2 0.13152639 IRI12h # 9 NewPT1 IRI12h1b2 0.26134094 IRI12h # 10 MTAL IRI12h1b2 0.24679030 IRI12h # 11 DCT IRI12h1b2 0.10613409 IRI12h # 12 EC1 IRI12h1b2 0.14978602 IRI12h # 13 PTS1 IRI12h2 0.10143885 IRI12h # 14 PTS2 IRI12h2 0.17805755 IRI12h # 15 NewPT1 IRI12h2 0.20431655 IRI12h # 16 MTAL IRI12h2 0.23273381 IRI12h # 17 DCT IRI12h2 0.10647482 IRI12h # 18 EC1 IRI12h2 0.17697842 IRI12h # 19 PTS1 IRI12h3 0.11797753 IRI12h # 20 PTS2 IRI12h3 0.16516854 IRI12h # 21 NewPT1 IRI12h3 0.19588015 IRI12h # 22 MTAL IRI12h3 0.24831461 IRI12h # 23 DCT IRI12h3 0.09775281 IRI12h # 24 EC1 IRI12h3 0.17490637 IRI12h # 25 PTS1 IRI4h1 0.14508743 IRI4h # 26 PTS2 IRI4h1 0.25853455 IRI4h # 27 NewPT1 IRI4h1 0.24937552 IRI4h # 28 MTAL IRI4h1 0.15112406 IRI4h # 29 DCT IRI4h1 0.10678601 IRI4h # 30 EC1 IRI4h1 0.08909242 IRI4h # 31 PTS1 IRI4h2 0.05299861 IRI4h # 32 PTS2 IRI4h2 0.11645746 IRI4h # 33 NewPT1 IRI4h2 0.43933054 IRI4h # 34 MTAL IRI4h2 0.15132497 IRI4h # 35 DCT IRI4h2 0.09274756 IRI4h # 36 EC1 IRI4h2 0.14714086 IRI4h # 37 PTS1 IRI4h3 0.17411492 IRI4h # 38 PTS2 IRI4h3 0.25652931 IRI4h # 39 NewPT1 IRI4h3 0.20139292 IRI4h # 40 MTAL IRI4h3 0.18659315 IRI4h # 41 DCT IRI4h3 0.11491584 IRI4h # 42 EC1 IRI4h3 0.06645386 IRI4h unique (cellnum $ Group) # [1] "IRI12h" "IRI4h" ggplot (cellnum, aes ( x= Celltype, y= Freq, fill= Group)) + geom_boxplot + # 箱线图 theme_bw + # 设置主题 theme ( panel.grid = element_blank , axis.text.x.bottom = element_text ( angle = 45 , hjust = 1 , vjust = 1 )) + # 调整x轴倾斜角度 scale_fill_manual ( values = pal_npg ( "nrc" )( 10 )) + # 调整箱子颜色 stat_compare_means ( aes ( group = Group), method = "t.test" , label = "p.signif" ) # 统计检验方法,可改为 "wilcox.test" ggplot (cellnum, aes ( x= Group, y= Freq, fill= Group)) + geom_boxplot + # 绘制箱线图 theme_bw + # 主题设置 theme ( panel.grid = element_blank , axis.text.x = element_text ( angle = 45 , hjust = 1 , vjust = 1 )) + # x轴标签角度调整 scale_fill_manual ( values = pal_npg ( "nrc" )( 10 )) + facet_wrap ( ~ Celltype) + # 颜色填充 geom_signif ( comparisons = list ( c ( "IRI12h" , "IRI4h" )), # 需要比较的组 map_signif_level = TRUE , # 自动映射显著性水平 test = "t.test" , # 统计检验方法,可改为 "wilcox.test" step_increase = 0.2 , # 调整 p 值标记的高度,防止与箱线图重叠 tip_length = 0.03 , # 调整误差线长度 textsize = 3 )