第 4/6 章

四、外部数据实操

接下来,我们从GEO数据库下载真实单细胞转录组测序数据,并使用R语言Slingshot包进行单细胞转录组测序 拟时序分析;全流程包括:原始数据下载———测序数据预处理———降维聚类———细胞注释———利用关键细胞进行拟时序分析。4.1 单细胞数据下载:GSE163973 该数据集主要探讨了皮肤纤维化疾病中的成纤维细胞异质性,特别是以瘢痕疙瘩(KD)为研究模型。

该研究使用真皮进行 scRNA-seq 分析,因为瘢痕疙瘩代表皮肤真皮纤维化疾病。经过严格的质量控制,我们获得了 40655 个细胞的转录组(瘢痕疙瘩(nsample=3):21488;正常疤痕(nsample=3):19167)。我们下载该数据集中每个样本(其中包含3个KD样本,3个NC样本)的原始文件,即barcodes.tsv、genes.tsv和matrix.mtx文件。

4.2 测序数据预处理:接下来,我们使用R语言加载数据并构建疾病组与正常组样本的Seurat对象:4.2.1 载入相关的包 if ( ! require (Seurat)) install.packages ( "Seurat" ) if ( ! require (dplyr)) install.packages ( "dplyr" ) if ( ! require (ggplot2)) install.packages ( "ggplot2" ) if ( ! require (scran)) install.packages ( "scran" ) if ( ! require (patchwork)) install.packages ( "patchwork" ) if ( ! require (tidyr)) install.packages ( "tidyr" ) if ( ! require (scater)) install.packages ( "scater" ) if ( ! require (stringr)) install.packages ( "stringr" ) if ( ! require (tibble)) install.packages ( "tibble" ) if ( ! require (SingleCellExperiment)) install.packages ( "SingleCellExperiment" ) library (hdf5r) library (Matrix) library (openxlsx) library (clustree) library (paletteer) 4.2.2 处理各组样本 rm ( list= ls ) normal_scar <- Read10X ( data.dir = "./00_rawdata/NS1/" ) class (normal_scar) normal_scar_obj <- CreateSeuratObject ( counts = normal_scar, project = "NS1" ) normal_scar_obj[[ "percent.mt" ]] <- PercentageFeatureSet (normal_scar_obj, pattern = "^MT-" ) normal_scar_obj <- subset (normal_scar_obj, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10 ) VlnPlot1 <- VlnPlot (normal_scar_obj, features = c ( "nFeature_RNA" , "nCount_RNA" , "percent.mt" ), ncol = 3 ) for (sample in c ( "NS2" , "NS3" )) { normal_scar1 <- Read10X ( data.dir = paste0 ( "./00_rawdata/" ,sample, "/" )) class (normal_scar1) normal_scar_obj1 <- CreateSeuratObject ( counts = normal_scar1, project = sample) normal_scar_obj1[[ "percent.mt" ]] <- PercentageFeatureSet (normal_scar_obj1, pattern = "^MT-" ) normal_scar_obj1 <- subset (normal_scar_obj1, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10 ) normal_scar_obj <- merge (normal_scar_obj,normal_scar_obj1) } saveRDS (normal_scar_obj, file = "normal_Object.rds" ) rm ( list= ls ) keloid_data <- Read10X ( data.dir = "./00_rawdata/KL1/" ) keloid_obj <- CreateSeuratObject ( counts = keloid_data, project = "KL1" ) keloid_obj[[ "percent.mt" ]] <- PercentageFeatureSet (keloid_obj, pattern = "^MT-" ) keloid_obj <- subset (keloid_obj, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10 ) VlnPlot1 <- VlnPlot (keloid_obj, features = c ( "nFeature_RNA" , "nCount_RNA" , "percent.mt" ), ncol = 3 ) for (sample in c ( "KL2" , "KL3" )) { keloid_data1 <- Read10X ( data.dir = paste0 ( "./00_rawdata/" ,sample, "/" )) class (keloid_data1) keloid_data_obj1 <- CreateSeuratObject ( counts = keloid_data1, project = sample) keloid_data_obj1[[ "percent.mt" ]] <- PercentageFeatureSet (keloid_data_obj1, pattern = "^MT-" ) keloid_data_obj1 <- subset (keloid_data_obj1, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10 ) keloid_obj <- merge (keloid_obj,keloid_data_obj1) } saveRDS (keloid_obj, file = "keloid_Object.rds" ) 4.2.3 合并两组数据,构建seurat_object rm ( list= ls ) normal_object <- readRDS ( "../00_rawdata/normal_Object.rds" ) keloid_object <- readRDS ( "../00_rawdata/keloid_Object.rds" ) seurat_object <- merge (keloid_object, y = normal_object) seurat_object[[ "percent.mt" ]] <- PercentageFeatureSet (seurat_object, pattern = "^MT-" ) VlnPlot1 <- VlnPlot (seurat_object, features = c ( "nFeature_RNA" , "nCount_RNA" , "percent.mt" ), ncol = 3 ) 4.2.4 数据标准化 seurat_object <- NormalizeData (seurat_object, normalization.method = "LogNormalize" , scale.factor = 10000 ) seurat_object <- FindVariableFeatures (seurat_object, selection.method = "vst" , nfeatures = 2000 ) top10 <- head ( VariableFeatures (seurat_object), 10 ) plot1 <- VariableFeaturePlot (seurat_object) plot2 <- LabelPoints ( plot = plot1, points = top10, repel = TRUE ) variable_genes <- plot1 + plot2 seurat_object <- ScaleData (seurat_object, features = VariableFeatures (seurat_object)) seurat_object <- RunPCA (seurat_object, features = VariableFeatures ( object = seurat_object)) dims = 1:15, cells = 1000, balanced = TRUE) seurat_object <- JackStraw (seurat_object, num.replicate = 100 ) seurat_object <- ScoreJackStraw (seurat_object, dims = 1 : 20 ) PCA_Score <- JackStrawPlot (seurat_object, dims = 1 : 20 ) p <- DimPlot (seurat_object, reduction = 'pca' , group.by = 'orig.ident' ) 4.3 细胞聚类 seurat_object <- RunUMAP (seurat_object, reduction = "pca" , dims = 1 : 20 ) seurat_object <- FindNeighbors (seurat_object, reduction = "pca" , dims = 1 : 20 ) seurat_object <- FindClusters (seurat_object, resolution = 0.3 ) head ( Idents (seurat_object), 5 ) plot <- DimPlot (seurat_object, reduction = "umap" , label = TRUE , group.by = "RNA_snn_res.0.3" ) 4.3.1 细胞簇注释 seurat_object <- JoinLayers (seurat_object) seurat_object.markers <- FindAllMarkers (seurat_object, group.by = "RNA_snn_res.0.3" , min.pct = 0.5 , logfc.threshold = 0.5 , only.pos = T) top10 <- seurat_object.markers %>% group_by (cluster) %>% dplyr :: filter (avg_log2FC > 1 ) %>% slice_head ( n = 5 ) %>% ungroup mesenchymal_stem_cell <- c ( "THY1" , "TAGLN" , "COL1A2" , "DCN" , "PECAM1" , "CDH5" , "VWF" , "KRT5" , "KRT14" , "KRT1" , "CD68" , "CSF1R" , "CYBB" , "CPA3" , "HPGDS" , "TPSB2" , "MLANA" , "CD3D" , "CD3E" , "KRT19" , "KRT7" , "KRT8" ) 4.3.2 定义细胞类型 new.cluster.ids <- c ( "Endothelial cells" , "Fibroblast cells" , "Fibroblast cells" , "Epidermal stem cells" , "Fibroblast cells" , "Fibroblast cells" , "Endothelial cells" , "Epidermal stem cells" , "Macrophages" , "Fibroblast cells" , "Endothelial cells" , "Mast cells" , "T cells" , "Melanocytes" , "Endothelial cells" , "Epidermal stem cells" , "Endothelial cells" , "Keratinocyte" ) names (new.cluster.ids) <- levels (seurat_object) new.cluster.ids seurat_object <- RenameIdents (seurat_object,new.cluster.ids) seurat_obj_Annotated <- seurat_object saveRDS (seurat_obj_Annotated, file = "seurat_obj_Annotated.rds" ) 4.3.3 可视化注释结果 rm ( list= ls ) seurat_object <- readRDS ( "seurat_obj_Annotated.rds" ) seurat_object <- JoinLayers (seurat_object) seurat_object.markers <- FindAllMarkers (seurat_object, min.pct = 0.5 , logfc.threshold = 1 , only.pos = T) plot <- DimPlot (seurat_object, reduction = "umap" , label = TRUE ) 4.3.4 显著表达基因可视化 mesenchymal_stem_cell <- c ( "SELE" , "DCN" , "LGALS7" , "CD86" , "TPSAB1" , "PTPRC" , "SOX10" , "DCD" ) plot <- VlnPlot (seurat_object, features = mesenchymal_stem_cell, ncol = 2 ) image 4.4 关键细胞 在下游分析中,我们可以选择一些关键细胞群进行拟时序分析,关键细胞可以通过生物标志物鉴定,也可以根据现有研究来定义,在这里我们选择”Fibroblast cells”,“Endothelial cells”,“Epidermal stem cells”做进一步的研究分析:seurat_object @ meta.data $ cell_type <- seurat_object @ active.ident seurat_object $ group <- ifelse ( grepl ( "^KL" , seurat_object $ orig.ident), "KD" , ifelse ( grepl ( "^NS" , seurat_object $ orig.ident), "NC" , "unknown" )) metadata <- data.frame (seurat_object @ meta.data) unique (seurat_object $ cell_type) subset_clusters <- subset (seurat_object, idents = c ( "Fibroblast cells" , "Endothelial cells" , "Epidermal stem cells" )) saveRDS (subset_clusters, file = "subset_clusters.rds" ) 4.5 拟时序分析 在这里,我们基于上面筛选的关键细胞簇进行后续拟时序分析:4.5.1 载入相关包 rm ( list= ls ) library (Seurat) library (SingleCellExperiment) library (tidyverse) library (RColorBrewer) 4.5.2 可视化细胞簇 subset_clusters <- readRDS ( "subset_clusters.rds" ) plot <- DimPlot (subset_clusters, reduction = "umap" , label = TRUE ) mesenchymal_stem_cell <- c ( "PECAM1" , "CDH5" , "VWF" , "THY1" , "TAGLN" , "COL1A2" , "DCN" , "KRT5" , "KRT14" , "KRT1" ) plot <- DotPlot (subset_clusters, features = mesenchymal_stem_cell) + RotatedAxis + scale_color_gradientn ( colours = c ( ' , ' , ' , ' )) seurat_object <- subset_clusters 4.5.3 安装slingshot if ( ! require ( "BiocManager" , quietly = TRUE )) install.packages ( "BiocManager" ) BiocManager :: install ( "slingshot" ) 接下啦,我们从现有的Seurat对象中,根据cluster_list指定的簇标识符,提取对应的细胞,并将这部分数据转换成SingleCellExperiment对象,以便进行进一步的分析或操作。

library (slingshot) cluster_list <- unique (seurat_object $ cell_type) select_cluster = as.SingleCellExperiment ( subset (seurat_object, idents = cluster_list), assay = 'RNA' ) select_cluster colnames ( colData (select_cluster)) 结果展示:> select_cluster class: SingleCellExperiment dim: 33538382 metadata(0): assays(2): counts logcounts rownames(33538): MIR1302-2HG FAM138A ... AC213203.1 FAM231C rowData names(0): colnames(38382): AAACCCAAGAACAGGA-1_1_1 AAACCCAAGATCCTAC-1_1_1 ... TTTGTTGTCGAACCAT-1_2 TTTGTTGTCTTCTAAC-1_2 colData names(9): orig.ident nCount_RNA ... group ident reducedDimNames(2): PCA UMAP mainExpName: RNA altExpNames(0): > > colnames(colData(select_cluster)) [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "percent.mt" "RNA_snn_res.0.3" [6] "seurat_clusters" "cell_type" "group" "ident" > 4.5.4 轨迹推断 sim <- slingshot (select_cluster, clusterLabels = 'cell_type' , reducedDim = 'UMAP' , start.clus= "Epidermal stem cells" , # 选择起点 end.clus = NULL # 这里我们不指定终点 ) 这里我们定义”Epidermal stem cells”为细胞发育起点,并且不规定终点,让算法进行模拟计算。

slingshot函数,用于单细胞数据分析中的谱系推断,帮助从高维数据中推测细胞发展轨迹。下面是各个参数的含义和作用:1. data 类型:数据对象,包含用于谱系推断的坐标矩阵。说明:这可以是 matrix、SingleCellExperiment、SlingshotDataSet 或 PseudotimeOrdering 类型的数据对象,通常是高维数据的降维结果(如PCA、t-SNE、UMAP等)。

2. clusterLabels 类型:向量或矩阵 说明:每个细胞的簇分配标签。如果是向量,则每个细胞对应一个标签;如果是矩阵,则为每个细胞与簇的加权关系,其中“-1”表示未分配的细胞。3. reducedDim 类型:字符或矩阵(可选) 说明:指定要使用的降维数据。如果提供了该参数,则可以指定哪一个降维结果(例如PCA、UMAP或t-SNE)用于谱系推断。

4. start.clus 类型:字符(可选) 说明:指定谱系推断的起始簇,通常是谱系树的根簇。5. end.clus 类型:字符(可选) 说明:指定强制作为叶节点的簇,通常是谱系树的终点簇。6. dist.method 类型:字符(可选) 说明:指定计算簇之间距离的方法,默认值为“slingshot”。可以使用不同的距离度量方法。

4.5.5 可视化计算结果 colnames ( colData (sim)) SlingshotDataSet (sim) 结果展示:> colnames(colData(sim)) [1] "orig.ident" "nCount_RNA" "nFeature_RNA" "percent.mt" "RNA_snn_res.0.3" [6] "seurat_clusters" "cell_type" "group" "ident" "slingshot" [11] "slingPseudotime_1" > SlingshotDataSet(sim) class: SlingshotDataSet Samples Dimensions 38382 2 lineages: 1 Lineage1: Epidermal stem cells Endothelial cells Fibroblast cells curves: 1 Curve1: Length: 48.537 Samples: 38382 > plot ( reducedDims (sim) $ UMAP, pch= 16 , asp = 1 ) lines ( SlingshotDataSet (sim), lwd= 2 , col= brewer.pal ( 9 , "Set1" )) legend ( "right" , legend = paste0 ( "lineage" , 1 ), col = unique ( brewer.pal ( 6 , "Set1" )), inset= 0.8 , pch = 16 ) 美化图形,添加渐变色。

colors <- colorRampPalette ( brewer.pal ( 11 , 'Spectral' )[ - 6 ])( 100 ) plotcol <- colors[ cut (sim $ slingPseudotime_1, breaks= 100 )] plotcol[ is.na (plotcol)] <- "lightgrey" plot ( reducedDims (sim) $ UMAP, col = plotcol, pch= 16 , asp = 1 ) lines ( SlingshotDataSet (sim), lwd= 2 , col= brewer.pal ( 9 , "Set1" )) legend ( "right" , legend = paste0 ( "lineage" , 1 ), col = unique ( brewer.pal ( 6 , "Set1" )), inset= 0.8 , pch = 16 ) 从模拟计算结果我们可以看到,3种关键细胞,由”Epidermal stem cells”向”Endothelial cells”,再向”Fibroblast cells”分化。

4.6 探究分化过程中的基因表达 4.6.1 tradeSeq 提供了一种灵活的回归模型拟合方法,可用于查找沿轨迹中的一个或多个谱系差异表达的基因。基于拟合模型,发现表达与伪时间相关的基因,或沿轨迹差异表达(在特定区域)的基因。它为每个基因拟合一个负二项式广义加性模型 (GAM),并对 GAM 的参数进行推理。

if ( ! require ( "BiocManager" , quietly = TRUE )) install.packages ( "BiocManager" ) BiocManager :: install ( "tradeSeq" ) 4.6.2 数据准备 counts <- sim @ assays @ data $ counts crv <- SlingshotDataSet (sim) counts:从模拟数据对象sim中提取基因表达矩阵(计数数据)。

sim@assays @data$counts提取计数矩阵,每行代表一个基因,每列代表一个细胞。crv:将sim转换为SlingshotDataSet对象。Slingshot是一种分析单细胞发育轨迹的方法,SlingshotDataSet对象描述了细胞间的发育轨迹及其分支结构。

4.6.3 确定最佳模型的参数 k set.seed ( 1234 ) icMat <- evaluateK ( counts = counts, sds = crv, k = 3 : 5 , nGenes = 20 , verbose = T) evaluateK:评估不同模型中参数k的最佳值,k表示基因表达模型中的平滑参数数量。参数说明:counts:输入的基因表达矩阵,用于拟合模型。

sds:发育轨迹数据(SlingshotDataSet对象),定义了细胞的轨迹和分支信息。k = 3:10:尝试从3到10个不同的平滑参数数量。nGenes = 200:随机选择200个基因进行评估,加快分析速度。verbose = T:显示详细的日志信息,方便跟踪运行状态。输出:icMat:存储不同k值下模型的评估结果,通常基于信息准则(如AIC、BIC)选择最优k值。

这一计算时间较长,结果展示如下:下图中,我们需要找到快速下降曲线的转折点;如果这个点小于6,则选择对应的数字;如果这个点大于等于6,则选择6。

这里我们选择 nknots = 6,进行下一步计算:内容解读可参考:https://statomics.github.io/tradeSeq/articles/tradeSeq.html 4.6.4 计算伪时间 pseudotime <- slingPseudotime (crv, na = FALSE ) slingPseudotime(crv, na = FALSE):计算每个细胞的伪时间值。

crv 是一个 SlingshotDataSet 对象,包含了细胞的发育轨迹和分支结构。伪时间(Pseudotime) 是一种用于量化细胞在发育过程中所处“时间”点的概念。尽管我们没有真实的时间轴,伪时间可以帮助揭示细胞的发育或分化状态。该函数计算每个细胞沿轨迹的相对位置(伪时间),并将这些值存储在 pseudotime 中。na = FALSE:表示如果在计算过程中有缺失值,是否应跳过这些细胞。

设为FALSE时,缺失值将被处理(默认行为)。4.6.5 计算细胞权重 cellWeights <- slingCurveWeights (crv) slingCurveWeights(crv):计算每个细胞在各个轨迹曲线上的权重。在发育轨迹分析中,细胞的表达模式可以影响它们在特定轨迹上的位置。该函数计算每个细胞在各个发育轨迹(或曲线)上的“权重”,即它们在不同轨迹分支的概率分配。

4.6.6 拟合广义加性模型(GAM) top_genes <- head ( order ( rowVars (counts), decreasing = TRUE ), 200 ) counts_subset <- counts[top_genes, ] system.time ({ sce <- fitGAM ( counts = counts_subset, pseudotime = pseudotime, cellWeights = cellWeights, nknots = 6 , verbose = FALSE ) }) fitGAM:拟合一个广义加性模型(GAM)来描述基因在伪时间中的表达变化。

counts:基因表达矩阵,其中行是基因,列是细胞。pseudotime:之前计算的每个细胞的伪时间值。它提供了每个细胞在发育轨迹中的相对时间点,作为GAM拟合的一个自变量。cellWeights:每个细胞在不同轨迹曲线上的权重,作为GAM拟合中的额外信息,用于提升模型的准确性。nknots = 6:指定在拟合GAM时使用的“结点”数量。结点数量影响模型的灵活性,通常越多结点,模型越灵活,但也可能导致过拟合。这里指定了6个结点。

4.6.7 基因表达与拟时序的相关性 assoRes <- associationTest (sce) head (assoRes) > head(assoRes) waldStat df pvalue meanLogFC COL1A1 76617.452 2 0 2.5987026 MALAT1 6995.086 2 0 0.8395357 COL1A2 83287.236 2 0 2.6722302 COL3A1 65739.148 2 0 2.6731177 RPLP1 41623.380 2 0 0.8002227 RPS18 25055.523 2 0 0.5648929 > 4.6.8 起止点相关性最高的基因 startRes <- startVsEndTest (sce) head (startRes) > head (startRes) waldStat df pvalue logFClineage1 COL1A1 58068.684 1 0 6.8558583 MALAT1 4258.177 1 0 0.7492381 COL1A2 63639.221 1 0 6.8190368 COL3A1 50532.819 1 0 6.7238449 RPLP1 22381.552 1 0 - 0.6871365 RPS18 22745.007 1 0 - 0.6963100 startVsEndTest 是一个用于评估单细胞转录组数据中发育轨迹(lineage)起始点(start)和终止点(end)之间差异表达的函数。

该函数的主要目的是通过比较发育轨迹起始点与终止点的基因表达差异,来识别在发育过程中可能有重要作用的基因。startVsEndTest 的结果是一个包含差异表达基因(DEGs)分析的输出,通常包括统计量(如Wald统计量)、自由度、p值、对数变化值(logFC)等。你提供的输出是一个数据框,其中包含以下列:waldStat:Wald统计量用于检验基因在起始点和终止点之间是否存在显著的表达变化。

它是一个用于假设检验的统计量,通常用于广义线性模型中。数值越大,表示该基因在轨迹的起始和终止点之间的差异越显著。df:这是自由度(degrees of freedom),通常用于检验统计显著性的分布。由于每个基因的表达模型是基于伪时间拟合的,因此它的自由度是1,表示每个基因有一个独立的估计参数(即线性模型中的斜率)。pvalue:p值用于检验基因表达的差异是否显著。p值越小,表示该基因在起始点和终止点之间的表达差异越显著。

logFClineage1:logFClineage1 是基因在发育轨迹的第1条线路(lineage 1)中起始和终止点之间的对数变化(log2 fold change)。这是基因在起始点和终止点之间表达变化的对数变动。

4.6.9 按照相关性排序 oStart <- order (startRes $ waldStat, decreasing = TRUE ) # 挑选相关性最强的基因,并可视化 sigGeneStart <- names (sce)[oStart[ 1 ]] # > sigGeneStart # [1] "DCN" plotSmoothers (sce, counts, gene = sigGeneStart) 在UMAP图中展示基因的差异表达:plotGeneCount (crv, counts, gene = sigGeneStart) 4.6.10 使用ggplots可视化基因表达趋势 coldata <- data.frame ( celltype = sim @ colData $ cell_type, plotcol = sim @ colData $ group) rownames (coldata) = colnames (sim) # sce中对应信息取出 filter_coldata <- coldata[ colnames (sce),] # 添加拟时序信息 filter_coldata $ Pseudotime = sce $ crv $ pseudotime.Lineage1 # top6 genes top6 <- names (sce)[oStart[ 1 : 6 ]] top6_exp = sce @ assays @ data $ counts[top6,] top6_exp = log2 (top6_exp + 1 ) %>% t # 获得最终的清洁数据 plt_data = cbind (filter_coldata, top6_exp) colnames (plt_data) 结果展示:> head (plt_data) celltype plotcol Pseudotime DCN CD63 COL1A2 CCDC80 COL6A2 AAACCCAAGAACAGGA - 1_1_1 Endothelial cells KD 17.6562951 0.000000 2.807355 1.000000 0.000000 0.000000 AAACCCAAGATCCTAC - 1_1_1 Fibroblast cells KD 44.1862229 8.266787 5.754888 8.471675 6.672425 4.954196 AAACCCAAGCCTGACC - 1_1_1 Endothelial cells KD 18.1090814 0.000000 3.459432 0.000000 0.000000 0.000000 AAACCCAAGCGCAATG - 1_1_1 Endothelial cells KD 21.4856377 0.000000 1.584963 0.000000 0.000000 0.000000 AAACCCACAAACTCGT - 1_1_1 Endothelial cells KD 21.1379977 0.000000 1.584963 1.000000 0.000000 0.000000 AAACCCACAAGACGGT - 1_1_1 Epidermal stem cells KD 0.9835969 0.000000 2.000000 0.000000 0.000000 0.000000 CST3 AAACCCAAGAACAGGA - 1_1_1 3.321928 AAACCCAAGATCCTAC - 1_1_1 6.714246 AAACCCAAGCCTGACC - 1_1_1 3.906891 AAACCCAAGCGCAATG - 1_1_1 4.169925 AAACCCACAAACTCGT - 1_1_1 2.584963 AAACCCACAAGACGGT - 1_1_1 2.321928 > colnames (plt_data) [ 1 ] "celltype" "plotcol" "Pseudotime" "DCN" "CD63" "COL1A2" "CCDC80" "COL6A2" [ 9 ] "CST3" > 4.6.11 绘图 library (ggplot2) library (ggsci) library (ggpubr) library (RColorBrewer) getPalette = colorRampPalette ( brewer.pal ( 12 , "Paired" )) mycolors = getPalette ( length ( unique (plt_data $ celltype))) plt_list = list for (gene in top6) { print (gene) p = ggscatter ( data = plt_data, x = 'Pseudotime' , y = gene, color = 'celltype' , size = 0.6 ) + geom_smooth ( se = F, color = 'orange' ) + theme_bw + scale_color_manual ( values = mycolors) + theme ( legend.position = 'none' ) plt_list[[gene]] = p } library (patchwork) wrap_plots (plt_list) ggsave ( filename = 'top6_genes.pdf' , width = 9 , height = 5 )

← 上一章 下一章 →