scRNA-seq 数据基础分析（2025修订）｜第十四讲：改造单细胞图形-DoHeatmap

1、加载数据 suppressMessages ({ library (Seurat) library (ggplot2) library (dplyr) library (tidyr) library (aplot) library (pheatmap) library (circlize) library (ComplexHeatmap) library (dendextend) }) 本期测试的数据来源于数据集 GSE139107。

前期的数据处理可以参考我们之前推送的DimPlot优化内容改造单细胞降维图|1.DimPlot的探索 sce <- readRDS ( "./iri.intergrate.rds" ) sce # An object of class Seurat # 29133 features across 126578 samples within 2 assays # Active assay: RNA (27133 features, 0 variable features) # 2 layers present: counts, data # 1 other assay present: integrated # 3 dimensional reductions calculated: pca, umap, tsne DimPlot (sce, raster= FALSE ) levels (sce) # [1] "PTS1" "PTS2" "PTS3" "NewPT1" "NewPT2" "DTL-ATL" "MTAL" # [8] "CTAL1" "CTAL2" "MD" "DCT" "DCT-CNT" "CNT" "PC1" # [15] "PC2" "ICA" "ICB" "Uro" "Pod" "PEC" "EC1" # [22] "EC2" "Fib" "Per" "Mø" "Tcell" sub_celltype <- c ( "MD" , "Uro" , "Pod" , "PEC" , "EC2" , "Per" , "Tcell" ) sub # standardGeneric for "sub" defined from package "base" # # function (pattern, replacement, x, ignore.case = FALSE, perl = FALSE, # fixed = FALSE, useBytes = FALSE) # standardGeneric("sub") # <environment: 0x557cc51c4b90> # Methods may be defined for arguments: pattern, replacement, x, ignore.case, perl, fixed, useBytes # Use showMethods(sub) for currently available ones. sub <- subset (sce, subset= celltype %in% sub_celltype) table ( Idents (sub)) # # MD Uro Pod PEC2 Per Tcell # 155 1167 772 518 434 397 619 sub <- ScaleData (sub) sub.markers <- FindAllMarkers (sub, only.pos = TRUE ) top <- sub.markers %>% group_by (cluster) %>% dplyr :: filter (avg_log2FC > 1 ) %>% slice_head ( n = 5 ) %>% ungroup marker <- top $ gene marker # [1] "Slc12a1" "Nos1" "Tenm2" "Enox1" "Erbb4" "Crybg1" # [7] "Naaladl2" "Gpc5" "Grip1" "Gsdmc2" "Magi2" "Srgap1" # [13] "Ptpro" "Robo2" "Wt1" "Ncam1" "Akap12" "Bicc1" # [19] "Cp" "Cdh6" "Ptprb" "Prex2" "Arhgap31" "Heg1" # [25] "Shank3" "Ebf1" "Cacna1c" "Slit3" "Stac" "Dgkb" # [31] "Gm2682" "Arhgap45" "Arhgap15" "Skap1" "Mir142hg" 2、DoHeatmap参数 DoHeatmap ( object, features = NULL , cells = NULL , group.by = "ident" , group.bar = TRUE , group.colors = NULL , disp.min = - 2.5 , (下面的所有值都被剪裁) disp.max = NULL , (以上所有值都被剪切) ; 如果槽是 “scale.data”，默认值为 2.5，否则为 6 slot = "scale.data" , scale.data，也可以选择 data assay = NULL , label = TRUE , TRUE size = 5.5 , hjust = 0 , vjust = 0 , angle = 45 , raster = TRUE , draw.lines = TRUE , lines.width = NULL , group.bar.height = 0.02 , combine = TRUE ) 3、DoHeatmap及其自带参数美化 # 默认参数效果 DoHeatmap (sub, features = marker) # 修改参数展示 # 一般比较常用的调整参数包括以下：cells / group.by / group.colors / angle / lines.width / size # （1）挑选部分细胞类型展示 sub_sub <- subset (sub, idents= c ( "Per" , "Tcell" )) cells <- colnames (sub_sub) DoHeatmap (sub, features = marker, cells= cells) # 这样展示可能会导致被剔除的细胞标签还显示在上面，可以直接用挑选后的seurat对象绘图 DoHeatmap (sub_sub, features = marker) # （2）按照指定列作为分组信息展示，例如使用Group分组 DoHeatmap (sub, features = marker, group.by = "Group" ) # （3）修改分组颜色 DoHeatmap (sub, features = marker, group.colors = rainbow ( 7 )) # （4）调整字体显示角度 DoHeatmap (sub, features = marker, angle= 90 ) # （5）增加分割线的宽度 DoHeatmap (sub, features = marker, lines.width= 50 ) # （6）修改文本字体大小 DoHeatmap (sub, features = marker, size = 10 ) 4、ggplot2复现DoHeatmap函数源码 DoHeatmap <- function ( object, features = NULL , cells = NULL , group.by = 'ident' , group.bar = TRUE , group.colors = NULL , disp.min = - 2.5 , disp.max = NULL , slot = 'scale.data' , assay = NULL , label = TRUE , size = 5.5 , hjust = 0 , vjust = 0 , angle = 45 , raster = TRUE , draw.lines = TRUE , lines.width = NULL , group.bar.height = 0.02 , combine = TRUE ) { assay <- assay %||% DefaultAssay ( object = object) DefaultAssay ( object = object) <- assay cells <- cells %||% colnames ( x = object[[assay]]) if ( is.numeric ( x = cells)) { cells <- colnames ( x = object)[cells] } features <- features %||% VariableFeatures ( object = object) features <- rev ( x = unique ( x = features)) disp.max <- disp.max %||% ifelse ( test = slot == 'scale.data' , yes = 2.5 , no = 6 ) # make sure features are present possible.features <- rownames ( x = GetAssayData ( object = object, slot = slot)) if ( any ( ! features %in% possible.features)) { bad.features <- features[ ! features %in% possible.features] features <- features[features %in% possible.features] if ( length ( x = features) == 0 ) { stop ( "No requested features found in the " , slot, " slot for the " , assay, " assay." ) } warning ( "The following features were omitted as they were not found in the " , slot, " slot for the " , assay, " assay: " , paste (bad.features, collapse = ", " )) } data <- as.data.frame ( x = as.matrix ( x = t ( x = GetAssayData ( object = object, slot = slot)[features, cells, drop = FALSE ]))) object <- suppressMessages ( expr = StashIdent ( object = object, save.name = 'ident' )) group.by <- group.by %||% 'ident' groups.use <- object[[group.by]][cells, , drop = FALSE ] # group.use <- switch( # EXPR = group.by, # 'ident' = Idents(object = object), # object[[group.by, drop = TRUE]] # ) # group.use <- factor(x = group.use[cells]) plots <- vector ( mode = 'list' , length = ncol ( x = groups.use)) for (i in 1 : ncol ( x = groups.use)) { data.group <- data group.use <- groups.use[, i, drop = TRUE ] if ( ! is.factor ( x = group.use)) { group.use <- factor ( x = group.use) } names ( x = group.use) <- cells if (draw.lines) { # create fake cells to serve as the white lines, fill with NAs lines.width <- lines.width %||% ceiling ( x = nrow ( x = data.group) * 0.0025 ) placeholder.cells <- sapply ( X = 1 : ( length ( x = levels ( x = group.use)) * lines.width), FUN = function (x) { return ( RandomName ( length = 20 )) } ) placeholder.groups <- rep ( x = levels ( x = group.use), times = lines.width) group.levels <- levels ( x = group.use) names ( x = placeholder.groups) <- placeholder.cells group.use <- as.vector ( x = group.use) names ( x = group.use) <- cells group.use <- factor ( x = c (group.use, placeholder.groups), levels = group.levels) na.data.group <- matrix ( data = NA , nrow = length ( x = placeholder.cells), ncol = ncol ( x = data.group), dimnames = list (placeholder.cells, colnames ( x = data.group)) ) data.group <- rbind (data.group, na.data.group) } lgroup <- length ( levels (group.use)) plot <- SingleRasterMap ( data = data.group, raster = raster, disp.min = disp.min, disp.max = disp.max, feature.order = features, cell.order = names ( x = sort ( x = group.use)), group.by = group.use ) if (group.bar) { # TODO : Change group.bar to annotation.bar default.colors <- c ( hue_pal ( length ( x = levels ( x = group.use)))) if ( ! is.null ( x = names ( x = group.colors))) { cols <- unname ( obj = group.colors[ levels ( x = group.use)]) } else { cols <- group.colors[ 1 : length ( x = levels ( x = group.use))] %||% default.colors } if ( any ( is.na ( x = cols))) { cols[ is.na ( x = cols)] <- default.colors[ is.na ( x = cols)] cols <- Col2Hex (cols) col.dups <- sort ( x = unique ( x = which ( x = duplicated ( x = substr ( x = cols, start = 1 , stop = 7 ))))) through <- length ( x = default.colors) while ( length ( x = col.dups) > 0 ) { pal.max <- length ( x = col.dups) + through cols.extra <- hue_pal (pal.max)[(through + 1 ) : pal.max] cols[col.dups] <- cols.extra col.dups <- sort ( x = unique ( x = which ( x = duplicated ( x = substr ( x = cols, start = 1 , stop = 7 ))))) } } group.use2 <- sort ( x = group.use) if (draw.lines) { na.group <- RandomName ( length = 20 ) levels ( x = group.use2) <- c ( levels ( x = group.use2), na.group) group.use2[placeholder.cells] <- na.group cols <- c (cols, " ) } pbuild <- ggplot_build ( plot = plot) names ( x = cols) <- levels ( x = group.use2) # scale theight of the bar y.range <- diff ( x = pbuild $ layout $ panel_params[[ 1 ]] $ y.range) y.pos <- max (pbuild $ layout $ panel_params[[ 1 ]] $ y.range) + y.range * 0.015 y.max <- y.pos + group.bar.height * y.range x.min <- min (pbuild $ layout $ panel_params[[ 1 ]] $ x.range) + 0.1 x.max <- max (pbuild $ layout $ panel_params[[ 1 ]] $ x.range) - 0.1 plot <- plot + annotation_raster ( raster = t ( x = cols[group.use2]), xmin = x.min, xmax = x.max, ymin = y.pos, ymax = y.max ) + coord_cartesian ( ylim = c ( 0 , y.max), clip = 'off' ) + scale_color_manual ( values = cols[ - length ( x = cols)], name = "Identity" , na.translate = FALSE ) if (label) { x.max <- max (pbuild $ layout $ panel_params[[ 1 ]] $ x.range) # Attempt to pull xdivs from x.major in ggplot2 < 3.3.0; if NULL, pull from the >= 3.3.0 slot x.divs <- pbuild $ layout $ panel_params[[ 1 ]] $ x.major %||% attr ( x = pbuild $ layout $ panel_params[[ 1 ]] $ x $ get_breaks , which = "pos" ) x <- data.frame ( group = sort ( x = group.use), x = x.divs) label.x.pos <- tapply ( X = x $ x, INDEX = x $ group, FUN = function (y) { if ( isTRUE ( x = draw.lines)) { mean ( x = y[ - length ( x = y)]) } else { mean ( x = y) } }) label.x.pos <- data.frame ( group = names ( x = label.x.pos), label.x.pos) plot <- plot + geom_text ( stat = "identity" , data = label.x.pos, aes_string ( label = 'group' , x = 'label.x.pos' ), y = y.max + y.max * 0.03 * 0.5 + vjust, angle = angle, hjust = hjust, size = size ) plot <- suppressMessages (plot + coord_cartesian ( ylim = c ( 0 , y.max + y.max * 0.002 * max ( nchar ( x = levels ( x = group.use))) * size), clip = 'off' ) ) } } plot <- plot + theme ( line = element_blank ) plots[[i]] <- plot } if (combine) { plots <- wrap_plots (plots) } return (plots) } 从中可以看到，DoHeatmap函数主要依赖于下面这段代码绘图，使用到的主要绘图函数还是基于ggplot2 接下来我们尝试用“ggplot2”还原DoHeatmap的结果 # DoHeatmap绘图横坐标顺序获取 p1 <- DoHeatmap (sub, features = marker) gg_build <- ggplot_build (p1) x_order <- gg_build $ layout $ panel_params[[ 1 ]] $ x $ get_labels x_order <- x_order[ ! (x_order %in% setdiff (x_order, colnames (sub)))] # 目标基因表达矩阵获取 data1 <- as.data.frame (Seurat :: GetAssayData (sub, layer = "data" )) rownames (data1) <- rownames (sub) colnames (data1) <- colnames (sub) data1 <- data1[marker,x_order] data1 <- as.data.frame ( scale (data1)) data2 <- data1 data2 $ feature <- rownames (data2) data2 <- data2 %>% pivot_longer ( cols = colnames (data1), names_to = "variable" , values_to = "value" ) data2 <- as.data.frame (data2 ) data2 $ variable <- factor (data2 $ variable, levels= x_order) data2 $ feature <- factor (data2 $ feature, levels= rev (marker)) p2 <- ggplot (data2, aes ( x = variable, y = feature, fill = value)) + geom_tile + scale_fill_gradient2 + labs ( x = "Cell ID" , y = "Gene" ) + theme ( axis.text.x = element_blank , axis.text.y = element_text ( size = 14 , color = "black" ), axis.title.x = element_text ( size = 16 , color = "black" ), axis.title.y = element_text ( size = 16 , color = "black" ) ) p2 anno1 <- sub @ meta.data[x_order,] anno1 $ Cell <- rownames (anno1) anno1 $ other <- "anno" anno1 $ Cell <- factor (anno1 $ Cell, levels= x_order) anno1 $ celltype <- factor (anno1 $ celltype, levels= c ( "MD" , "Uro" , "Pod" , "PEC" , "EC2" , "Per" , "Tcell" )) # 绘制机械分群注释条 p2_anno1 <- ggplot (anno1, aes ( x= Cell, y= other, fill = celltype)) + geom_tile + scale_y_discrete ( position = "right" ) + scale_fill_manual ( values = c ( "MD" = " , "Uro" = " , "Pod" = " , "PEC" = " , "EC2" = " , "Per" = " , "Tcell" = " )) + theme_bw + theme ( panel.grid.major= element_blank , panel.border = element_blank ) + theme ( axis.text.x = element_blank , axis.text.y = element_blank , axis.ticks.x= element_blank , axis.ticks.y = element_blank ) + labs ( x= "" , y= "" ) p2_anno1 # 合并heatmap和机械分群注释条 p2 %>% insert_top (p2_anno1, height = 0.05 ) 5、pheatmap复现DoHeatmap epithelial cells; Per:pericytes; Pod:podocytes; MD:macula densa；

Uro:urothelium anno2 <- anno1 %>% mutate ( group = case_when ( celltype %in% c ( "Tcell" ) ~ "Immune" , celltype %in% c ( "EC2" , "PEC" , "Per" , "Uro" , "MD" , "Pod" ) ~ "other" )) anno2_color <- list ( celltype = c ( "MD" = " , "Uro" = " , "Pod" = " , "PEC" = " , "EC2" = " , "Per" = " , "Tcell" = " ), group = c ( "Immune" = " , "other" = " )) pheatmap (data1, cluster_rows = F, cluster_cols = F, show_colnames = F, color = colorRampPalette ( c ( "white" , " ))( 100 ), annotation_col = anno2[, c ( "celltype" , "group" )], annotation_colors = anno2_color ) pheatmap (data1, cluster_rows = F, cluster_cols = F, show_colnames = F, color = colorRampPalette ( c ( "white" , " ))( 100 ), annotation_col = anno2[, c ( "celltype" , "group" )], annotation_colors = anno2_color, gaps_col = c ( 155 , 1322 , 2094 , 2612 , 3046 , 3443 , 4062 ), gaps_row = c ( 5 , 10 , 15 , 20 , 25 , 30 ) ) 6、complexheatmap复现DoHeatmap 6.1 计算差异基因 sub1 <- subset (sce, subset= celltype %in% c ( "MD" , "PEC" )) sub1 <- ScaleData (sub1) sub.markers1 <- FindAllMarkers (sub1, only.pos = TRUE ) # write.table(sub.markers1,"MD_PEC.diffgene.xls") top1 <- sub.markers1 %>% group_by (cluster) %>% dplyr :: filter (avg_log2FC > 1 ) %>% slice_head ( n = 50 ) %>% ungroup marker1 <- top1 $ gene marker1 # [1] "Slc12a1" "Thsd4" "Robo2" "Mecom" "Erbb4" "Enox1" # [7] "Nos1" "Slit2" "Tenm2" "Pde10a" "Tshr" "Stk32b" # [13] "Col4a3" "Esrrg" "Prkd1" "Flvcr1" "Kcnc2" "Nadk2" # [19] "Ndst4" "Slc16a5" "Cacna1d" "Plcb1" "Cables1" "Tmem117" # [25] "Wdr72" "Pappa2" "Slc5a1" "Stox2" "Pappa" "Efhd1" # [31] "Tmem116" "Cntn5" "Enpp1" "Neat1" "Prdm16" "Dock8" # [37] "Vav3" "Paqr5" "Pla2g3" "Pantr1" "Ranbp3l" "Syne1" # [43] "Rap1gap2" "Car15" "Sgms2" "Unc5d" "Ppp1r1a" "Ksr2" # [49] "Blnk" "Tbxas1" "Ncam1" "Akap12" "Cp" "Ext1" # [55] "Bicc1" "Kcnma1" "Glis3" "Slc6a6" "Pkhd1" "Ror1" # [61] "Tshz2" "Fmn1" "Rbpms" "Gpc6" "Palld" "Arid1b" # [67] "Chrm3" "Igf1r" "Sdk1" "Bnc2" "Wwc1" "Cdh6" # [73] "Tnc" "Spns2" "Bmp6" "Col18a1" "Wt1" "Pax8" # [79] "Dcdc2a" "Prkg1" "Crim1" "Ptpn13" "Rai14" "Rcan2" # [85] "Cald1" "Nav2" "Tbc1d4" "Pard3b" "Robo1" "Chn2" # [91] "Pakap" "Mical2" "Zbtb20" "Actn1" "Fam19a1" "Tns1" # [97] "Arhgap28" "Agap1" "Nfia" "Hspa12a" marker1_MD <- marker1[ 1 : 50 ] marker1_PEC <- marker1[ 51 : 100 ] DoHeatmap (sub1,marker1) data7 <- as.data.frame (Seurat :: GetAssayData (sub1, layer = "data" )) rownames (data7) <- rownames (sub1) colnames (data7) <- colnames (sub1) data7 <- data7[marker1, intersect (x_order, colnames (sub1))] data7 <- scale (data7) 6.2 complexheatmap绘图这里我们可以在初始热图的基础上，添加更多的信息，例如我们可以加上基因通路富集分析的结果。

没有这部分知识的同学可以看这里：基因集富集分析和差异分析的热图如何用富集分析词云做注释 suppressMessages ( library (org.Mm.eg.db)) suppressMessages ( library (clusterProfiler)) ha = HeatmapAnnotation ( foo = anno_block ( gp = gpar ( fill = 2 : 3 ), labels = c ( "PEC" , "MD" ), labels_gp = gpar ( col = "white" , fontsize = 10 )) ) gene.df <- bitr ( gsub ( ' \\ . \\ d$' , '' ,marker1), fromType= "SYMBOL" , toType= c ( "ENTREZID" , "ENSEMBL" ), OrgDb = org.Mm.eg.db) gene can be mapped....”，建议修改参数为T，参考https://www.jianshu.com/p/2d0e371bb7fe ekegg <- enrichKEGG ( unique (gene.df $ ENTREZID), organism= 'mmu' , pvalueCutoff= 0.05 , pAdjustMethod= 'BH' , minGSSize= 10 , maxGSSize= 500 , use_internal_data= F) ekegg <- setReadable (ekegg, 'org.Mm.eg.db' , 'ENTREZID' ) my.kegg.res <- as.data.frame (ekegg @ result) head (my.kegg.res) # category subcategory ID # mmu04970 Organismal Systems Digestive system mmu04970 # mmu04270 Organismal Systems Circulatory system mmu04270 # mmu04730 Organismal Systems Nervous system mmu04730 # mmu04911 Organismal Systems Endocrine system mmu04911 # mmu04713 Organismal Systems Environmental adaptation mmu04713 # mmu04973 Organismal Systems Digestive system mmu04973 # Description # mmu04970 Salivary secretion - Mus musculus (house mouse) # mmu04270 Vascular smooth muscle contraction - Mus musculus (house mouse) # mmu04730 Long-term depression - Mus musculus (house mouse) # mmu04911 Insulin secretion - Mus musculus (house mouse) # mmu04713 Circadian entrainment - Mus musculus (house mouse) # mmu04973 Carbohydrate digestion and absorption - Mus musculus (house mouse) # GeneRatio BgRatio RichFactor FoldEnrichment zScore pvalue # mmu04970 5/50 87/9575 0.05747126 11.005747 6.792365 8.471584e-05 # mmu04270 6/50 144/9575 0.04166667 7.979167 6.113715 9.606816e-05 # mmu04730 4/50 60/9575 0.06666667 12.766667 6.624055 2.587172e-04 # mmu04911 4/50 86/9575 0.04651163 8.906977 5.336404 1.019846e-03 # mmu04713 4/50 99/9575 0.04040404 7.737374 4.881968 1.720174e-03 # mmu04973 3/50 49/9575 0.06122449 11.724490 5.452788 2.084891e-03 # p.adjust qvalue geneID Count # mmu04970 0.007781521 0.006674209 Nos1/Plcb1/Kcnma1/Chrm3/Prkg1 5 # mmu04270 0.007781521 0.006674209 Cacna1d/Plcb1/Pla2g3/Kcnma1/Prkg1/Cald1 6 # mmu04730 0.013970730 0.011982693 Nos1/Plcb1/Igf1r/Prkg1 4 # mmu04911 0.041303769 0.035426235 Cacna1d/Plcb1/Kcnma1/Chrm3 4 # mmu04713 0.055733624 0.047802718 Nos1/Cacna1d/Plcb1/Prkg1 4 # mmu04973 0.056292061 0.048281689 Cacna1d/Plcb1/Slc5a1 3 marker.gene <- top_n (my.kegg.res, 1 , GeneRatio) %>% pull (geneID) %>% .[ 1 ] %>% strsplit (., '/' ) %>% unlist marker.gene # [1] "Cacna1d" "Plcb1" "Pla2g3" "Kcnma1" "Prkg1" "Cald1" Heatmap (data7, column_km = 2 , border = TRUE , top_annotation = ha, right_annotation = rowAnnotation ( most.enrich.gene= anno_mark ( at = ( 1 : length (marker1))[marker1 %in% marker.gene], labels = marker.gene)), show_row_names = F, show_column_names = F ) 6.3 制作文本注释 up.gene.df <- bitr ( gsub ( ' \\ . \\ d$' , '' ,marker1_MD), fromType= "SYMBOL" , toType= c ( "ENTREZID" , "ENSEMBL" ), OrgDb = org.Mm.eg.db) down.gene.df <- bitr ( gsub ( ' \\ . \\ d$' , '' ,marker1_PEC), fromType= "SYMBOL" , toType= c ( "ENTREZID" , "ENSEMBL" ), OrgDb = org.Mm.eg.db) up.ekegg <- enrichKEGG ( unique (up.gene.df $ ENTREZID), organism= 'mmu' , pvalueCutoff= 0.05 , pAdjustMethod= 'BH' , minGSSize= 10 , maxGSSize= 500 , use_internal_data= F) up.ekegg <- setReadable (up.ekegg, 'org.Mm.eg.db' , 'ENTREZID' ) my.up.ekegg.res <- as.data.frame (up.ekegg @ result) head (my.up.ekegg.res) # category subcategory ID # mmu04973 Organismal Systems Digestive system mmu04973 # mmu04713 Organismal Systems Environmental adaptation mmu04713 # mmu04925 Organismal Systems Endocrine system mmu04925 # mmu04024 Environmental Information Processing Signal transduction mmu04024 # mmu04020 Environmental Information Processing Signal transduction mmu04020 # mmu04926 Organismal Systems Endocrine system mmu04926 # Description # mmu04973 Carbohydrate digestion and absorption - Mus musculus (house mouse) # mmu04713 Circadian entrainment - Mus musculus (house mouse) # mmu04925 Aldosterone synthesis and secretion - Mus musculus (house mouse) # mmu04024 cAMP signaling pathway - Mus musculus (house mouse) # mmu04020 Calcium signaling pathway - Mus musculus (house mouse) # mmu04926 Relaxin signaling pathway - Mus musculus (house mouse) # GeneRatio BgRatio RichFactor FoldEnrichment zScore pvalue # mmu04973 3/28 49/9575 0.06122449 20.936589 7.576801 0.0003771123 # mmu04713 3/28 99/9575 0.03030303 10.362554 5.070977 0.0029116826 # mmu04925 3/28 104/9575 0.02884615 9.864354 4.922187 0.0033476944 # mmu04024 4/28 224/9575 0.01785714 6.106505 4.188042 0.0038406248 # mmu04020 4/28 255/9575 0.01568627 5.364146 3.825196 0.0060811968 # mmu04926 3/28 130/9575 0.02307692 7.891484 4.284259 0.0062518856 # p.adjust qvalue geneID Count # mmu04973 0.04789326 0.03652035 Cacna1d/Plcb1/Slc5a1 3 # mmu04713 0.12193984 0.09298355 Nos1/Cacna1d/Plcb1 3 # mmu04925 0.12193984 0.09298355 Prkd1/Cacna1d/Plcb1 3 # mmu04024 0.12193984 0.09298355 Pde10a/Tshr/Cacna1d/Vav3 4 # mmu04020 0.12546150 0.09566894 Erbb4/Nos1/Cacna1d/Plcb1 4 # mmu04926 0.12546150 0.09566894 Nos1/Col4a3/Plcb1 3 up.pathway <- top_n (my.up.ekegg.res, - 10 , pvalue) down.gene.df <- bitr ( gsub ( ' \\ . \\ d$' , '' ,marker1_PEC), fromType= "SYMBOL" , toType= c ( "ENTREZID" , "ENSEMBL" ), OrgDb = org.Mm.eg.db) down.ekegg <- enrichKEGG ( unique (down.gene.df $ ENTREZID), organism= 'mmu' , pvalueCutoff= 0.05 , pAdjustMethod= 'BH' , minGSSize= 10 , maxGSSize= 500 , use_internal_data= F) down.ekegg <- setReadable (down.ekegg, 'org.Mm.eg.db' , 'ENTREZID' ) my.down.ekegg.res <- as.data.frame (down.ekegg @ result) head (my.down.ekegg.res) # category subcategory ID # mmu04970 Organismal Systems Digestive system mmu04970 # mmu04270 Organismal Systems Circulatory system mmu04270 # mmu04730 Organismal Systems Nervous system mmu04730 # mmu04913 Organismal Systems Endocrine system mmu04913 # mmu04510 Cellular Processes Cellular community - eukaryotes mmu04510 # mmu05202 Human Diseases Cancer: overview mmu05202 # Description # mmu04970 Salivary secretion - Mus musculus (house mouse) # mmu04270 Vascular smooth muscle contraction - Mus musculus (house mouse) # mmu04730 Long-term depression - Mus musculus (house mouse) # mmu04913 Ovarian steroidogenesis - Mus musculus (house mouse) # mmu04510 Focal adhesion - Mus musculus (house mouse) # mmu05202 Transcriptional misregulation in cancer - Mus musculus (house mouse) # GeneRatio BgRatio RichFactor FoldEnrichment zScore pvalue # mmu04970 3/22 87/9575 0.03448276 15.007837 6.298419 0.0009848422 # mmu04270 3/22 144/9575 0.02083333 9.067235 4.680748 0.0041602216 # mmu04730 2/22 60/9575 0.03333333 14.507576 5.036592 0.0082294941 # mmu04913 2/22 64/9575 0.03125000 13.600852 4.853611 0.0093214491 # mmu04510 3/22 202/9575 0.01485149 6.463771 3.766319 0.0106002026 # mmu05202 3/226/9575 0.01327434 5.777353 3.487764 0.0143489610 # p.adjust qvalue geneID Count # mmu04970 0.07484801 0.06738394 Kcnma1/Chrm3/Prkg1 3 # mmu04270 0.15808842 0.14232337 Kcnma1/Prkg1/Cald1 3 # mmu04730 0.16112308 0.14505540 Igf1r/Prkg1 2 # mmu04913 0.16112308 0.14505540 Igf1r/Bmp6 2 # mmu04510 0.16112308 0.14505540 Igf1r/Tnc/Actn1 3 # mmu05202 0.17688661 0.15924695 Igf1r/Wt1/Pax8 3 down.pathway <- top_n (my.down.ekegg.res, - 10 , pvalue) text = list ( 'up' = up.pathway $ Description, 'down' = down.pathway $ Description) text # $up # [1] "Carbohydrate digestion and absorption - Mus musculus (house mouse)" # [2] "Circadian entrainment - Mus musculus (house mouse)" # [3] "Aldosterone synthesis and secretion - Mus musculus (house mouse)" # [4] "cAMP signaling pathway - Mus musculus (house mouse)" # [5] "Calcium signaling pathway - Mus musculus (house mouse)" # [6] "Relaxin signaling pathway - Mus musculus (house mouse)" # [7] "Nicotinate and nicotinamide metabolism - Mus musculus (house mouse)" # [8] "Vascular smooth muscle contraction - Mus musculus (house mouse)" # [9] "Adrenergic signaling in cardiomyocytes - Mus musculus (house mouse)" # [10] "Long-term depression - Mus musculus (house mouse)" # # $down # [1] "Salivary secretion - Mus musculus (house mouse)" # [2] "Vascular smooth muscle contraction - Mus musculus (house mouse)" # [3] "Long-term depression - Mus musculus (house mouse)" # [4] "Ovarian steroidogenesis - Mus musculus (house mouse)" # [5] "Focal adhesion - Mus musculus (house mouse)" # [6] "Transcriptional misregulation in cancer - Mus musculus (house mouse)" # [7] "Insulin secretion - Mus musculus (house mouse)" # [8] "Adherens junction - Mus musculus (house mouse)" # [9] "Pancreatic secretion - Mus musculus (house mouse)" # [10] "Thyroid hormone signaling pathway - Mus musculus (house mouse)" library (ggsci) Heatmap (data7, column_km = 2 , border = TRUE , top_annotation = ha, right_annotation = rowAnnotation ( most.enrich.gene= anno_mark ( at = ( 1 : length (marker1))[marker1 %in% marker.gene], labels = marker.gene)), show_row_names = F, show_column_names = F, left_annotation = rowAnnotation ( textbox = anno_textbox ( rep ( c ( "up" , "down" ), each= 50 ), text, by = "anno_block" , add_new_line = T, gp = gpar ( col = c ( pal_npg ( 10 ), pal_nejm ( 10 ))), round_corners = TRUE ) ) ) 7、拓展：基因表达均值热图 7.1 pheatmap avg <- AverageExpression (sub, features = marker)[[ "RNA" ]] avg <- as.data.frame (avg) anno3 <- as.data.frame (sub_celltype) rownames (anno3) <- sub_celltype anno3 $ group <- c ( "other" , "other" , "other" , "other" , "other" , "other" , "Immune" ) anno3_color <- list ( sub_celltype = c ( "MD" = " , "Uro" = " , "Pod" = " , "PEC" = " , "EC2" = " , "Per" = " , "Tcell" = " ), group = c ( "Immune" = " , "other" = " )) pheatmap (avg, cluster_rows = F, cluster_cols = F, show_colnames = T, color = colorRampPalette ( c ( "navy" , "white" , "firebrick3" ))( 50 ), annotation_col = anno3, annotation_colors = anno3_color, scale = "row" ) 7.2 pheatmap美化 # 修改边框颜色 # 修改方格长方形为正方形 # 增加间隔 pheatmap (avg, cluster_rows = F, cluster_cols = F, show_colnames = T, color = colorRampPalette ( c ( "navy" , "white" , "firebrick3" ))( 50 ), annotation_col = anno3, treeheight_row = 50 , annotation_colors = anno3_color, scale = "row" , border_color = "white" , # 设置方格边框 gaps_row = c ( 5 , 10 , 15 , 20 , 25 , 30 ), cellwidth = 17 , cellheight = 17 ) 7.3 增加散点图或者柱状图 ha_column1 = HeatmapAnnotation ( points = anno_points ( rnorm ( 35 )), annotation_name_side = "left" ) ht1 = Heatmap ( scale ( t (avg)), name = "ht1" , column_title = "Heatmap 1" , km= 3 , col = colorRamp2 ( c ( - 2 , 0 , 2 ), c ( "navy" , "white" , "firebrick3" )), top_annotation = ha_column1, row_names_side = "left" ) ha_column2 = HeatmapAnnotation ( type = rep ( c ( "MD" , "Uro" , "Pod" , "PEC" , "EC2" , "Per" , "Tcell" ), each= 5 ), col = list ( type = c ( "MD" = " , "Uro" = " , "Pod" = " , "PEC" = " , "EC2" = " , "Per" = " , "Tcell" = " ))) ht2 = Heatmap ( scale ( t (avg)), name = "ht2" , column_title = "Heatmap 2" , column_km = 6 , col = colorRamp2 ( c ( - 2 , 0 , 2 ), c ( "navy" , "white" , "firebrick3" )), bottom_annotation = ha_column2) ht_list = ht1 + ht2 + rowAnnotation ( bar = anno_barplot ( rowMeans ( scale ( t (avg))), width = unit ( 2 , "cm" ))) draw (ht_list, row_title = "Heatmap list" , column_title = "Heatmap list" ) 7.4 circlize圈形热图 col_fun = colorRamp2 ( c ( - 2 , 0 , 2 ), c ( "navy" , "white" , "firebrick3" )) circos.clear data3 <- as.data.frame ( scale ( t (avg))) split <- rownames (data3) split <- factor (split, levels = rownames (data3)) circos.heatmap (data3, col = col_fun, split= split, bg.border = "red" , bg.lwd = 2 , bg.lty = 2 , show.sector.labels = TRUE , cluster = F, ) lgd = Legend ( title = "avg" , col_fun = col_fun) grid.draw (lgd) circos.clear data4 <- as.data.frame ( scale (avg)) split <- rep ( 1 : 7 , each= 5 ) split <- factor (split, levels = 1 : 7 ) circos.clear circos.heatmap (data4, col = col_fun, split= split, rownames.side = "outside" , bg.border = "red" , bg.lwd = 2 , bg.lty = 2 , cluster = F, ) lgd = Legend ( title = "avg" , col_fun = col_fun) grid.draw (lgd) circos.clear circos.heatmap (data4, col = col_fun, dend.side = "inside" , rownames.side = "outside" , rownames.col = 1 : nrow (avg) %% 10 + 1 , rownames.cex = runif ( nrow (avg), min = 0.3 , max = 1.5 ), rownames.font = 1 : nrow (avg) %% 4 + 1 ) lgd = Legend ( title = "avg" , col_fun = col_fun) grid.draw (lgd) circos.clear dend_col <- structure ( rainbow ( 7 ), names = 1 : 7 ) circos.clear circos.heatmap (data4, col = col_fun, split= split, rownames.side = "outside" , bg.border = "red" , bg.lwd = 2 , bg.lty = 2 , dend.side = "inside" , dend.track.height = 0.2 , dend.callback = function (dend, m, si) { color_branches (dend, k = 1 , col = dend_col[si]) } ) lgd = Legend ( title = "avg" , col_fun = col_fun) grid.draw (lgd)