第 4/8 章

五、nichenetr

测试数据来源于: Medaglia et al. 等人提供的NICHE-seq数据,用于研究淋巴细胞性脑膜脊髓炎病毒( lymphocytic choriomeningitis virus, LCVM)注射后72小时在腹股沟淋巴结中是否会对T Cell的细胞通讯产生影响。NicheNet 会选择一定的基因集用于分析,因此在做 NicheNet 分析前需要确定一些受体细胞的特定基因集(通常是一些差异基因)。

1、准备工作 1.1 安装并加载R包 # 安装R包 # install.packages("mlrMBO") # if(!require(nichenetr))devtools::install_github("saeyslab/nichenetr") # 加载R包 library (nichenetr) library (Seurat) library (tidyverse) # 查看R包版本 packageVersion ( "Seurat" ) # [1] '5.0.3' 1.2 表达矩阵获取 # 读入数据 # 这个网址可能打不开 # seuratObj = readRDS(url("https://zenodo.org/record/3531889/files/seuratObj.rds")) # 我把他下载到了本地:Sys.time # [1] "2025-05-15 11:16:14 CST" rm ( list = ls ) gc # used (Mb) gc trigger (Mb) max used (Mb) # Ncells 6995062 373.6 13029127 695.9 13029127 695.9 # Vcells 13767826 105.1 84017254 641.1 205115830 1565.0 setwd ( "/data/07_scRNA-seq_cell_communication/" ) seuratObj = readRDS ( "data/seuratObj.rds" ) # 如遇报错"no slot of name "images" for this object of class "Seurat"",可以使用UpdateSeuratObject更细seurat对象 seuratObj <- UpdateSeuratObject (seuratObj) seuratObj # An object of class Seurat # 13541 features across 5027 samples within 1 assay # Active assay: RNA (13541 features, 1575 variable features) # 3 layers present: counts, data, scale.data # 4 dimensional reductions calculated: cca, cca.aligned, tsne, pca # 查看数据基本信息 # 注释信息 head (seuratObj @ meta.data) # nGene nUMI orig.ident aggregate res.0.6 celltype nCount_RNA # W380370 880 1611 LN_SS 1 CD8 T 1607 # W380372 541 891 LN_SS 0 CD4 T 885 # W3803742 1229 LN_SS 0 CD4 T 1223 # W380378 847 1546 LN_SS 1 CD8 T 1537 # W380379 839 1606 LN_SS 0 CD4 T 1603 # W380381 517 844 LN_SS 0 CD4 T 840 # nFeature_RNA # W380370 876 # W380372 536 # W380374 737 # W380378 838 # W380379 836 # W380381 513 # celltype数量 table (seuratObj @ meta.data $ celltype) # # B CD4 T CD8 T DC Mono NK Treg # 382 2562 1645 18 90 131 199 # 降维图 DimPlot (seuratObj, reduction = "tsne" ) # 查看分组信息,这里分组变量为"aggregate" seuratObj @ meta.data $ aggregate %>% table # . # LCMV SS # 3886 1141 table (seuratObj @ meta.data $ celltype, seuratObj @ meta.data $ aggregate) # # LCMV SS # B 344 38 # CD4 T 1961 601 # CD8 T 1252 393 # DC 14 4 # Mono 75 15 # NK 94 37 # Treg 146 53 DimPlot (seuratObj, reduction = "tsne" , group.by = "aggregate" ) 1.3 下载先验模型 目前支持的物种为小鼠 mouse 与人类 human # 定义分析物种 organism = "mouse" # 在线读取库文件,可能连接比较困难 if (F){ if (organism == "human" ){ lr_network = readRDS ( url ( "https://zenodo.org/record/7074291/files/lr_network_human_21122021.rds" )) ligand_target_matrix = readRDS ( url ( "https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final.rds" )) weighted_networks = readRDS ( url ( "https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final.rds" )) } else if (organism == "mouse" ){ lr_network = readRDS ( url ( "https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds" )) ligand_target_matrix = readRDS ( url ( "https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds" )) weighted_networks = readRDS ( url ( "https://zenodo.org/record/7074291/files/weighted_networks_nsga2r_final_mouse.rds" )) } } # 因此我下了个本地版的 # 根据物种加载数据库 # 数据库包括:配体-受体网络 (lr_network)、配体-靶基因关联矩阵 (ligand_target_matrix) 和加权网络 (weighted_networks),这些是 NicheNet 核心分析所需的数据库 if (organism == "human" ){ lr_network = readRDS ( "data/lr_network_human_21122021.rds" ) ligand_target_matrix = readRDS ( "data/ligand_target_matrix_nsga2r_final.rds" ) weighted_networks = readRDS ( "data/weighted_networks_nsga2r_final.rds" ) } else if (organism == "mouse" ){ lr_network = readRDS ( "data/lr_network_mouse_21122021.rds" ) ligand_target_matrix = readRDS ( "data/ligand_target_matrix_nsga2r_final_mouse.rds" ) weighted_networks = readRDS ( "data/weighted_networks_nsga2r_final_mouse.rds" ) } # 查看三个文件前几行 head (lr_network) # # A tibble: 6 × 4 # from to database source # <chr> <chr> <chr> <chr> # 1 2300002M23Rik Ddr1 omnipath # 2 2610528A11Rik Gpr15 omnipath # 3 9530003J23Rik Itgal omnipath # 4 a Atrn omnipath # 5 a F11r omnipath # 6 a Mc1r omnipath head (ligand_target_matrix[, 1 : 6 ]) # 2300002M23Rik 2610528A11Rik 9530003J23Rik a # 0610005C13Rik 0.000000e+00 0.000000e+00 1.311297e-05 0.000000e+00 # 0610009B22Rik 0.000000e+00 0.000000e+00 1.269301e-05 0.000000e+00 # 0610009L18Rik 8.872902e-05 4.977197e-05 2.581909e-04 7.570125e-05 # 0610010F05Rik 2.194046e-03 1.111556e-03 3.142374e-03 1.631658e-03 # 0610010K14Rik 2.271606e-03 9.360769e-04 3.546140e-03 1.697713e-03 # 0610012G03Rik 6.619036e-04 2.958259e-04 9.295219e-04 4.211287e-04 # A2m Aanat # 0610005C13Rik 1.390053e-05 0.000000e+00 # 0610009B22Rik 1.345536e-05 0.000000e+00 # 0610009L18Rik 9.802264e-05 6.938897e-05 # 0610010F05Rik 2.585820e-03 1.829664e-03 # 0610010K14Rik 2.632082e-03 1.658516e-03 # 0610012G03Rik 6.271871e-04 4.077356e-04 head (weighted_networks) # $lr_sig # # A tibble: 3,865,137 × 3 # from to weight # <chr> <chr> <dbl> # 1 0610010F05Rik App 0.110 # 2 0610010F05Rik Cat 0.0673 # 3 0610010F05Rik H1f2 0.0660 # 4 0610010F05Rik Lrrc49 0.0829 # 5 0610010F05Rik Nicn1 0.0864 # 6 0610010F05Rik Srpk1 0.123 # 7 0610010F05Rik Ttll1 0.0855 # 8 0610010K14Rik Akap8 0.131 # 9 0610010K14Rik Ash2l 0.312 # 10 0610010K14Rik Bptf 0.146 # # ℹ 3,865,127 more rows # # $gr # # A tibble: 4,364,411 × 3 # from to weight # <chr> <chr> <dbl> # 1 0610010K14Rik 0610010K14Rik 0.121 # 2 0610010K14Rik 2510039O18Rik 0.121 # 3 0610010K14Rik 2610021A01Rik 0.0256 # 4 0610010K14Rik 9130401M01Rik 0.0263 # 5 0610010K14Rik Alg1 0.127 # 6 0610010K14Rik Alox12 0.128 # 7 0610010K14Rik Ambra1 0.122 # 8 0610010K14Rik Ankib1 0.127 # 9 0610010K14Rik Anxa1 0.125 # 10 0610010K14Rik Arhgap1 0.125 # # ℹ 4,364,401 more rows # 预处理 # 去除 lr_network 数据框中 from 和 to 列组合重复的行,保留唯一的 from 和 to 组合 lr_network = lr_network %>% distinct (from, to) head (lr_network) # # A tibble: 6 × 2 # from to # <chr> <chr> # 1 2300002M23Rik Ddr1 # 2 2610528A11Rik Gpr15 # 3 9530003J23Rik Itgal # 4 a Atrn # 5 a F11r # 6 a Mc1r weighted_networks_lr = weighted_networks $ lr_sig %>% inner_join (lr_network, by = c ( "from" , "to" )) # 1. lr_sig(配体-受体信号网络):包含了每个配体-受体对的权重,这些权重通常反映了配体通过其受体对靶基因表达的影响程度。

# from: 配体(ligand) # to: 受体(receptor) # weight: 配体-受体相互作用的权重,通常是信号强度或潜在调控能力 head (weighted_networks $ lr_sig) # # A tibble: 6 × 3 # from to weight # <chr> <chr> <dbl> # 1 0610010F05Rik App 0.110 # 2 0610010F05Rik Cat 0.0673 # 3 0610010F05Rik H1f2 0.0660 # 4 0610010F05Rik Lrrc49 0.0829 # 5 0610010F05Rik Nicn1 0.0864 # 6 0610010F05Rik Srpk1 0.123 # 2. gr(受体-基因调控网络):描述的是受体与靶基因之间的调控关系,即受体激活后,如何通过转录因子或其他调控机制影响靶基因的表达。

它揭示了受体如何调控基因表达,是配体通过其受体影响靶基因的下游环节。通过 gr 网络,NicheNet 能够进一步推测从配体到基因的调控路径。

# from: 受体(receptor) # to: 靶基因(target gene) # weight: 受体对靶基因表达的影响程度或调控强度 head (weighted_networks $ gr) # # A tibble: 6 × 3 # from to weight # <chr> <chr> <dbl> # 1 0610010K14Rik 0610010K14Rik 0.121 # 2 0610010K14Rik 2510039O18Rik 0.121 # 3 0610010K14Rik 2610021A01Rik 0.0256 # 4 0610010K14Rik 9130401M01Rik 0.0263 # 5 0610010K14Rik Alg1 0.127 # 6 0610010K14Rik Alox12 0.128 2、细胞通讯分析 2.1 定义sender与receiver 这里作者的研究目的中,CD8 T 作为 receiver,CD4 T , Treg , Mono , NK , B 与 DC 作为 sender。

并且在表达矩阵中,只有至少在一种细胞内表达比例至少达到10%的基因才会参与下游分析。

# receiver = "CD8 T" # 筛选在 seuratObj 这个 Seurat对象中 CD8 T细胞 群中,至少10%的细胞表达的基因 expressed_genes_receiver = get_expressed_genes (receiver, seuratObj, pct = 0.10 ) # 从 expressed_genes_receiver 中,进一步筛选出在 ligand_target_matrix 中也存在的基因 background_expressed_genes = expressed_genes_receiver %>% .[. %in% rownames (ligand_target_matrix)] # sender_celltypes = c ( "CD4 T" , "Treg" , "Mono" , "NK" , "B" , "DC" ) # 由于这里的sender细胞种类大于1,因此通过lapply获取每种细胞类型中表达比例大于10%的基因 list_expressed_genes_sender = sender_celltypes %>% unique %>% lapply (get_expressed_genes, seuratObj, 0.10 ) expressed_genes_sender = list_expressed_genes_sender %>% unlist %>% unique 2.2 定义一组感兴趣的基因集 receiver 细胞类型中可能存在经细胞互作影响表达的基因,这些基因可帮助后续的数据分析更加的有目的性。

由于这里作者需要对比LCVM与steady-state(SS)间 CD8 T 的差别,因此这里的基因集便为T Cell在组间的差异基因。

# 取出受体细胞的Seurat对象 seurat_obj_receiver = subset (seuratObj, idents = receiver) # 将分组信息aggregate作为Idents (seurat_obj_receiver) <- "aggregate" # 实验组:condition_oi <- "LCMV" # 对照组 condition_reference <- "SS" # 计算组间差异基因 # rownames_to_column("gene") 将基因名从行名转换为新的一列 gene DE_table_receiver <- FindMarkers ( object = seurat_obj_receiver, ident.1 = condition_oi, ident.2 = condition_reference, min.pct = 0.10 ) %>% rownames_to_column ( "gene" ) head (DE_table_receiver) # gene p_val avg_log2FC pct.1 pct.2 p_val_adj # 1 Ifi27l2b 2.164082e-150 2.922767 0.973 0.387 2.930383e-146 # 2 Irf7 5.425060e-129 3.707013 0.870 0.165 7.346074e-125 # 3 Ly6a 8.023340e-124 3.358513 0.879 0.239 1.086440e-119 # 4 Ifi27l2a 2.882324e-115 3.116906 0.861 0.191 3.902955e-111 # 5 Stat1 2.742811e-92 2.210800 0.888 0.389 3.714040e-88 # 6 Ly6c2 3.421692e-89 1.907967 0.940 0.534 4.633313e-85 # 过滤并获得基因名称 # 过滤标准:多重校正后的 p 值 ≤ 0.05(显著性)和 log2FoldChange 的绝对值 ≥ 0.25 geneset_oi <- DE_table_receiver %>% filter (p_val_adj <= 0.05 & abs (avg_log2FC) >= 0.25 ) %>% pull (gene) # 基因名称需要包含在配受体库文件中:geneset_oi <- geneset_oi %>% .[. %in% rownames (ligand_target_matrix)] # 最后查看一下前五个基因 geneset_oi[ 1 : 5 ] # [1] "Ifi27l2b" "Irf7" "Ly6a" "Stat1" "Ly6c2" 2.3 获取配受体基因表达矩阵 结合前面获得的 sender 与 receiver 的表达矩阵与库文件,分别取出受体基因在 receiver 的表达矩阵与配体基因在 sender 中对应的表达矩阵:# 从库文件中取出配体基因 ligands = lr_network %>% pull (from) %>% unique ligands[ 1 : 5 ] # [1] "2300002M23Rik" "2610528A11Rik" "9530003J23Rik" "a" # [5] "A2m" # 从库文件中取出受体基因 receptors = lr_network %>% pull (to) %>% unique receptors[ 1 : 5 ] # [1] "Ddr1" "Gpr15" "Itgal" "Atrn" "F11r" # 在`sender`发送细胞中表达的配体基因 expressed_ligands = intersect (ligands,expressed_genes_sender) expressed_ligands[ 1 : 5 ] # [1] "Ace" "Adam10" "Adam17" "Adam23" "Aimp1" # 在`receiver`接收细胞表达的受体基因 expressed_receptors = intersect (receptors,expressed_genes_receiver) expressed_receptors[ 1 : 5 ] # [1] "Itgal" "Notch1" "Itga4" "Itgb7" "Treml2" # 并且这个配体-受体对本身在 lr_network 网络中是存在的 # 如果存在则取出真正的配体基因作为潜在配体 potential_ligands = lr_network %>% filter (from %in% expressed_ligands & to %in% expressed_receptors) %>% pull (from) %>% unique potential_ligands[ 1 : 5 ] # [1] "Adam10" "Adam17" "App" "B2m" "Btla" 2.4 配体活性分析 结合配体的靶向基因在作者感兴趣的基因集和背景基因的表达情况,对潜在配体进行评级。

# 核心逻辑是:对每一个 potential_ligand,看它根据 ligand_target_matrix 最可能调控 geneset_oi 中的基因。

计算每个配体的一个打分指标,衡量它能否解释目标基因集的表达变化 # geneset:是你在前面筛选出来的目标基因集合("CD8 T"组件差异表达且在ligand_target_matrix中存在的基因),即配体可能调控的基因 # background_expressed_genes:是在接收细胞中表达的背景基因,用于设定合理的比较基线 # ligand_target_matrix:是预先训练好的配体-靶基因打分矩阵(每个配体对每个基因的调控潜力打分) # potential_ligands:是之前筛选出的、在发送细胞表达且对应受体在接收细胞表达的潜在配体 ligand_activities = predict_ligand_activities ( geneset = geneset_oi, background_expressed_genes = background_expressed_genes, ligand_target_matrix = ligand_target_matrix, potential_ligands = potential_ligands ) # 按照 aupr_corrected(打分指标,adjusted Area Under the Precision-Recall curve,一种综合考虑准确率和召回率的打分)从高到低排序 # mutate为每个配体生成一个排名,并存储到ligand_activities的rank列 ligand_activities = ligand_activities %>% arrange ( - aupr_corrected) %>% mutate ( rank = rank ( desc (aupr_corrected))) head (ligand_activities) # # A tibble: 6 × 6 # test_ligand auroc aupr_corrected pearson rank # <chr> <dbl> <dbl> <dbl> <dbl> <dbl> # 1 Ebi3 0.655 0.380 0.234 0.291 1 # 2 Ptprc 0.640 0.304 0.158 0.160 2 # 3 H2-M3 0.609 0.287 0.141 0.181 3 # 4 H2-M2 0.613 0.272 0.125 0.146 5 # 5 H2-T10 0.613 0.272 0.125 0.146 5 # 6 H2-T22 0.613 0.272 0.125 0.146 5 基于area under the precision-recall curve(AUPR)选择Top30的ligand用户下游分析 # 把筛选出来的30个配体再按 aupr_corrected 分数从高到低排一下顺序 # 从 ligand_activities 中提取出列名叫 test_ligand 的这一列(即配体的名字),不带其他列 best_upstream_ligands = ligand_activities %>% top_n ( 30 , aupr_corrected) %>% arrange ( - aupr_corrected) %>% pull (test_ligand) %>% unique best_upstream_ligands # [1] "Ebi3" "Ptprc" "H2-M3" "H2-M2" "H2-T10" "H2-T22" "H2-T23" "H2-K1" # [9] "H2-Q4" "H2-Q6" "H2-Q7" "H2-D1" "Sirpa" "Cd48" "App" "Tgfb1" # [17] "Ccl22" "Selplg" "Btla" "Cxcl10" "Adam17" "Cxcl11" "Icam1" "Tgm2" # [25] "Cxcl9" "B2m" "Cd72" "C3" "Hp" "Itgb2" # 查看配体在各细胞中的表达情况 DotPlot (seuratObj, features = best_upstream_ligands %>% rev , cols = "RdYlBu" ) + RotatedAxis 2.5 推断受体以及配体潜在调控靶基因与配体的调控活性 2.5.1 配体与Target Gene的调控关系 # get_weighted_ligand_target_links获取配体对应的靶基因打分矩阵 # 对每个配体,从 ligand_target_matrix 中选出打分最高的 200 个目标基因(如果这些目标基因在你的 geneset_oi 中) active_ligand_target_links_df = best_upstream_ligands %>% lapply (get_weighted_ligand_target_links, geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = 200 ) %>% bind_rows %>% drop_na # 得到的 active_ligand_target_links_df 是一个数据框,其中包含每个配体对多个靶基因的“调控潜力得分” head (active_ligand_target_links_df) # # A tibble: 6 × 3 # ligand target weight # <chr> <chr> <dbl> # 1 Ebi3 Bst2 0.0500 # 2 Ebi3 Cd274 0.0504 # 3 Ebi3 Cxcl10 0.0570 # 4 Ebi3 Cxcr4 0.0430 # 5 Ebi3 Ddit4 0.0485 # 6 Ebi3 Ddx58 0.0402 # 将上一步的 dataframe 转换成适合绘制热图的矩阵形式。

# cutoff = 0.33 表示过滤掉得分低于这个值的配体-靶基因对,降低噪音,突出高潜力的调控关系 active_ligand_target_links = prepare_ligand_target_visualization ( ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = 0.33 ) # 保留best_upstream_ligands和colnames(active_ligand_target_links)都存在的配体 # rev翻转配体基因排列顺序 # make.names 把名字里带 -、.、空格等特殊字符的基因名转换成 R 合法变量名(避免热图绘图时报错) order_ligands = intersect (best_upstream_ligands, colnames (active_ligand_target_links)) %>% rev %>% make.names # 保留active_ligand_target_links_df$target和rownames(active_ligand_target_links)都存在的受体 # 同样处理基因可能存在的特殊字符 order_targets = active_ligand_target_links_df $ target %>% unique %>% intersect ( rownames (active_ligand_target_links)) %>% make.names # 处理active_ligand_target_links行列配体和靶基因名称可能存在的特殊字符,如 rownames (active_ligand_target_links) = rownames (active_ligand_target_links) %>% make.names colnames (active_ligand_target_links) = colnames (active_ligand_target_links) %>% make.names # 得到最终的绘图矩阵 vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t # 可视化:p_ligand_target_network = vis_ligand_target %>% make_heatmap_ggplot ( "Prioritized ligands" , # y轴标题为配体 "Predicted target genes" , # x轴标题为配体的靶基因 color = "purple" , legend_position = "top" , # 图例放在图形顶部 x_axis_position = "top" , # x轴标签放在图形顶部 legend_title = "Regulatory potential" ) + # 为图例设置标题,表明图例代表的是配体对靶基因的调控潜力 theme ( axis.text.x = element_text ( face = "italic" )) + # 将x轴上的文本字体设置为斜体 scale_fill_gradient2 ( low = "whitesmoke" , high = "purple" ) p_ligand_target_network # 热图中展示的结果经过严格的过滤,不包含target gene或调控分值不足0.33者均被忽略 2.5.2 配体与受体间的活性分析 # 筛选配体在 best_upstream_ligands中(预测最活跃的前30个配体)和受体在expressed_receptors中的高可信配受体对 lr_network_top = lr_network %>% filter (from %in% best_upstream_ligands & to %in% expressed_receptors) %>% distinct (from,to) # 得到最相关的受体基因名 best_upstream_receptors = lr_network_top %>% pull (to) %>% unique # 从 weighted network 中获取配受体打分 lr_network_top_df_large = weighted_networks_lr %>% filter (from %in% best_upstream_ligands & to %in% best_upstream_receptors) # 转成矩阵用于绘图(热图前的准备) # spread函数把长数据变成宽数据,lr_network_top_df_large数据框中的 "from"列的值转换为新的列名,将 "weight" 列的值填充到相应的新列中,若某个组合不存在值,则用 0 进行填充 lr_network_top_df = lr_network_top_df_large %>% spread ( "from" , "weight" , fill = 0 ) # 选取除 "to" 列之外的所有列 # 数据框转换成矩阵 # 从原始数据框 lr_network_top_df 中提取 "to" 列的值。

这些值会被用作转换后矩阵的行名 lr_network_top_matrix = lr_network_top_df %>% select ( - to) %>% as.matrix %>% magrittr :: set_rownames (lr_network_top_df $ to) # dist函数计算 lr_network_top_matrix 中受体(矩阵的行)之间的二元距离 # hclust函数对计算得到的距离矩阵进行层次聚类,采用Ward方法(ward.D2),该方法试图最小化聚类内部的方差 dist_receptors = dist (lr_network_top_matrix, method = "binary" ) hclust_receptors = hclust (dist_receptors, method = "ward.D2" ) # 从聚类结果中提取排序后的受体名称,存储在 order_receptors 中 order_receptors = hclust_receptors $ labels[hclust_receptors $ order] # 对矩阵进行转置,使得配体变为矩阵的行,受体变为矩阵的列 # 同样计算配体之间的二元距离,进行层次聚类,并提取排序后的配体名称存储在 order_ligands 中 dist_ligands = dist (lr_network_top_matrix %>% t , method = "binary" ) hclust_ligands = hclust (dist_ligands, method = "ward.D2" ) order_ligands = hclust_ligands $ labels[hclust_ligands $ order] # 使用 intersect 函数确保 order_receptors 和 order_ligands 中的名称与原始矩阵的行名和列名一致,避免出现不匹配的情况 order_receptors = order_receptors %>% intersect ( rownames (lr_network_top_matrix)) order_ligands = order_ligands %>% intersect ( colnames (lr_network_top_matrix)) # 根据排序后的受体和配体名称对原始矩阵进行重排 # make.names 函数用于将名称转换为合法的 R 名称,避免出现非法字符 vis_ligand_receptor_network = lr_network_top_matrix[order_receptors, order_ligands] rownames (vis_ligand_receptor_network) = order_receptors %>% make.names colnames (vis_ligand_receptor_network) = order_ligands %>% make.names # 可视化 # 配体在 x 轴,受体在 y 轴 p_ligand_receptor_network = vis_ligand_receptor_network %>% t %>% make_heatmap_ggplot ( "Ligands" , # y轴标题为配体 "Receptors" , # x轴标题为受体 color = "mediumvioletred" , x_axis_position = "top" , legend_title = "Prior interaction potential" ) p_ligand_receptor_network 2.6 sender 中配体的组间差异log fold change值 # 对每个发送细胞类型调用 get_lfc_celltype,进行LCMV vs SS组间差异分析 DE_table_all = Idents (seuratObj) %>% levels %>% intersect (sender_celltypes) %>% lapply (get_lfc_celltype, seurat_obj = seuratObj, condition_colname = "aggregate" , condition_oi = condition_oi, condition_reference = condition_reference, # 注意修改自己的对照组与实验组名称 expression_pct = 0.10 , # 表达比例大于 10% 才考虑 celltype_col = NULL ) %>% reduce (full_join) # 将所有发送细胞类型的结果合并为一个大表(按基因名对齐) # 将非差异基因者数值转换为0 DE_table_all[ is.na (DE_table_all)] = 0 head (DE_table_all) # # A tibble: 6 × 7 # gene `CD4 T` Treg B NK Mono DC # <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> # 1 Ifi27l2b 2.55 1.64 1.70 3.36 2.54 6.43 # 2 Irf7 4.16 4.30 3.54 3.47 4.65 6.16 # 3 Ly6a 2.67 2.94 1.47 2.81 1.59 4.33 # 4 Ifi27l2a 2.68 2.24 1.70 3.73 2.35 5.35 # 5 Ifit3 4.69 4.47 3.41 4.55 5.81 3.74 # 6 Stat1 2.27 2.04 1.88 2.00 1.75 6.10 # 合并配体活动信息和差异表达信息 # 从 ligand_activities 中选出 test_ligand 和 pearson(预测的调控强度),重命名为 ligand # 将其和刚才的 DE_table_all 按 ligand 字段做左连接 ligand_activities_de = ligand_activities %>% select (test_ligand, pearson) %>% rename ( ligand = test_ligand) %>% left_join (DE_table_all %>% rename ( ligand = gene)) # 再次将 NA 转为 0(说明某些配体没出现在差异表达结果中) ligand_activities_de[ is.na (ligand_activities_de)] = 0 head (ligand_activities_de) # # A tibble: 6 × 8 # ligand pearson `CD4 T` Treg B NK Mono DC # <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> # 1 Ebi3 0.291 0 4.23 0.958 0.663 3.07 0 # 2 Ptprc 0.160 0.314 0.519 0.448 0.616 0.253 3.87 # 3 H2-M3 0.181 0.944 0.676 1.23 0.178 0.990 2.54 # 4 H2-M2 0.146 0 0 0 0 0 -0.705 # 5 H2-T10 0.146 1.52 1.76 1.30 0 1.60 1.62 # 6 H2-T22 0.146 1.84 2.79 2.07 1.95 3.71 5.27 # 绘制热图 # 构建用于绘图的矩阵(logFC 矩阵) # 提取差异表达数据(去掉 ligand 名和 pearson 列) # 设置行名为配体名 lfc_matrix = ligand_activities_de %>% select ( - ligand, - pearson) %>% as.matrix %>% magrittr :: set_rownames (ligand_activities_de $ ligand) # make.names 防止非法字符导致绘图报错 rownames (lfc_matrix) = rownames (lfc_matrix) %>% make.names # 确保配体顺序与之前热图一致 order_ligands = order_ligands[order_ligands %in% rownames (lfc_matrix)] vis_ligand_lfc = lfc_matrix[order_ligands,] # 修正列名即细胞名称 colnames (vis_ligand_lfc) = vis_ligand_lfc %>% colnames %>% make.names p_ligand_lfc = vis_ligand_lfc %>% make_threecolor_heatmap_ggplot ( "Prioritized ligands" , "LFC in Sender" , low_color = "midnightblue" , mid_color = "white" , mid = median (vis_ligand_lfc), high_color = "red" , legend_position = "top" , x_axis_position = "top" , legend_title = "LFC" ) + theme ( axis.text.y = element_text ( face = "italic" )) p_ligand_lfc 2.7 可视化拼盘 # 配体活性热图 # 筛选出ligand_activities的aupr_corrected,数值表示某配体调控其靶基因集合的能力(越高代表越有效) # 将ligand_activities$test_ligand设置为结果的行名 ligand_aupr_matrix = ligand_activities %>% select (aupr_corrected) %>% as.matrix %>% magrittr :: set_rownames (ligand_activities $ test_ligand) head (ligand_aupr_matrix) # aupr_corrected # Ebi3 0.2335898 # Ptprc 0.1579384 # H2-M3 0.1406565 # H2-M2 0.1254201 # H2-T10 0.1254201 # H2-T22 0.1254201 # 确保配体名合法 rownames (ligand_aupr_matrix) = rownames (ligand_aupr_matrix) %>% make.names colnames (ligand_aupr_matrix) = colnames (ligand_aupr_matrix) %>% make.names # 设置配体展示顺序和前面热图一直 # 设置列名称为AUPR vis_ligand_aupr = ligand_aupr_matrix[order_ligands, ] %>% as.matrix ( ncol = 1 ) %>% magrittr :: set_colnames ( "AUPR" ) # 绘制热图 p_ligand_aupr = vis_ligand_aupr %>% make_heatmap_ggplot ( "Prioritized ligands" , "Ligand activity" , color = "darkorange" , legend_position = "top" , x_axis_position = "top" , legend_title = "AUPR \n (target gene prediction ability)" ) + theme ( legend.text = element_text ( size = 9 )) # 配体表达量气泡图 order_ligands_adapted = order_ligands # 因为前面用 make.names 把基因名中的点号转换成了 .,这里为了让 DotPlot 正常识别表达矩阵中真实的基因名,将部分基因名改回来(如 "H2.M3" → "H2-M3" order_ligands_adapted[order_ligands_adapted == "H2.M3" ] = "H2-M3" order_ligands_adapted[order_ligands_adapted == "H2.T23" ] = "H2-T23" # 绘制气泡图 rotated_dotplot = DotPlot (seuratObj %>% subset (celltype %in% sender_celltypes), features = order_ligands_adapted, cols = "RdYlBu" ) + coord_flip + theme ( legend.text = element_text ( size = 10 ), legend.title = element_text ( size = 12 )) # 将四张关键的配体-受体分析图整合成一个完整面板 figures_without_legend = cowplot :: plot_grid ( # p_ligand_aupr:配体活性热图(AUPR 值) p_ligand_aupr + theme ( legend.position = "none" , axis.ticks = element_blank ) + theme ( axis.title.x = element_text ), # rotated_dotplot:配体表达量气泡图(在 Sender 细胞中) rotated_dotplot + theme ( legend.position = "none" , axis.ticks = element_blank , axis.title.x = element_text ( size = 12 ), axis.text.y = element_text ( face = "italic" , size = 9 ), axis.text.x = element_text ( size = 9 , angle = 90 , hjust = 0 )) + ylab ( "Expression in Sender" ) + xlab ( "" ) + scale_y_discrete ( position = "right" ), # p_ligand_lfc:Sender 细胞中配体的差异表达(LogFC) p_ligand_lfc + theme ( legend.position = "none" , axis.ticks = element_blank ) + theme ( axis.title.x = element_text ) + ylab ( "" ), # p_ligand_target_network:配体-靶基因调控网络热图 p_ligand_target_network + theme ( legend.position = "none" , axis.ticks = element_blank ) + ylab ( "" ), align = "hv" , # 水平方向和垂直方向都对齐 nrow = 1 , # 按列宽比例调节四张图的视觉宽度 rel_widths = c ( ncol (vis_ligand_aupr) + 6 , ncol (vis_ligand_lfc) + 7 , ncol (vis_ligand_lfc) + 8 , ncol (vis_ligand_target)) ) # legends:抽出各个图的图例 legends = cowplot :: plot_grid ( ggpubr :: as_ggplot (ggpubr :: get_legend (p_ligand_aupr)), ggpubr :: as_ggplot (ggpubr :: get_legend (rotated_dotplot)), ggpubr :: as_ggplot (ggpubr :: get_legend (p_ligand_lfc)), ggpubr :: as_ggplot (ggpubr :: get_legend (p_ligand_target_network)), nrow = 1 , = 1) align = "h" , # 水平对齐 rel_widths = c ( 1.5 , 1 , 1 , 1 )) # 使用 rel_widths 控制每个图例所占的空间比例 # 最终组合:主图 + 图例 combined_plot = cowplot :: plot_grid ( figures_without_legend, legends, rel_heights = c ( 10 , 5 ), nrow = 2 , align = "hv" ) # 水平方向和垂直方向都对齐 combined_plot 3、差异细胞通讯分析 本部分教程的示例数据来自于一个肝脏 scRNA-Seq 数据集( https://www.sciencedirect.com/science/article/pii/S0092867421014811)的 一部分。

NicheNet 的差异分析目标是找到配受体对均存在差异表达且激活的互作通路。

3.1 准备工作 3.1.1 加载R包 library (nichenetr) library (RColorBrewer) library (tidyverse) library (Seurat) 3.1.2 测试数据 # 读取表达矩阵,依旧是Seurat对象的rds文件 # 这个网址大家可能连接不上 if (F){ seurat_obj = readRDS ( url ( "https://zenodo.org/record/4675430/files/seurat_obj_hnscc.rds" )) DimPlot (seurat_obj, group.by = "celltype" , label = TRUE ) } # 依旧是下在本地操作比较稳:seurat_obj = readRDS ( "data/seurat_obj_hnscc.rds" ) # 查看数据中包含的细胞类型:DimPlot (seurat_obj, group.by = "celltype" , label = TRUE ) seurat_obj = SetIdent (seurat_obj, value = "celltype" ) # 查看数据中的分组信息 # p-EMT(partial epithelial-to-mesenchymal transition) 部分上皮细胞到间质的转变 DimPlot (seurat_obj, group.by = "pEMT" ) table (seurat_obj @ meta.data $ celltype, seurat_obj @ meta.data $ pEMT) # # High Low # CAF 396 104 # Endothelial 105 53 # Malignant 1093 549 # Myeloid 92 7 # myofibroblast 382 61 # T.cell 689 3 # 类似于Seurat中的差异计算,这里也需要构建一个包含celltype信息与分组信息的idents:seurat_obj @ meta.data $ celltype_aggregate <- paste (seurat_obj @ meta.data $ celltype, seurat_obj @ meta.data $ pEMT, sep = "_" ) # user adaptation required on own dataset DimPlot (seurat_obj, group.by = "celltype_aggregate" ) seurat_obj @ meta.data $ celltype_aggregate %>% table %>% sort ( decreasing = TRUE ) # . # Malignant_High T.cell_High Malignant_Low CAF_High # 1093 689 549 396 # myofibroblast_High Endothelial_High CAF_Low Myeloid_High # 382 105 104 92 # myofibroblast_Low Endothelial_Low Myeloid_Low T.cell_Low # 61 53 7 3 celltype_id <- "celltype_aggregate" Idents (seurat_obj) <- celltype_id 3.1.3 库文件 # 读取库文件 organism <- "human" # 测试数据是一个人类数据 if (T){ if (organism == "human" ){ lr_network = readRDS ( "data/lr_network_human_21122021.rds" ) ligand_target_matrix = readRDS ( "data/ligand_target_matrix_nsga2r_final.rds" ) weighted_networks = readRDS ( "data/weighted_networks_nsga2r_final.rds" ) } else if (organism == "mouse" ){ lr_network = readRDS ( "data/lr_network_mouse_21122021.rds" ) ligand_target_matrix = readRDS ( "data/ligand_target_matrix_nsga2r_final_mouse.rds" ) weighted_networks = readRDS ( "data/weighted_networks_nsga2r_final_mouse.rds" ) } } # 预处理 lr_network = lr_network %>% dplyr :: rename ( ligand = from, receptor = to) %>% distinct (ligand, receptor) head (lr_network) # # A tibble: 6 × 2 # ligand receptor # <chr> <chr> # 1 A2M MMP2 # 2 A2M MMP9 # 3 A2M LRP1 # 4 A2M KLK3 # 5 AANAT MTNR1A # 6 AANAT MTNR1B ligand_target_matrix[ 1 : 5 , 1 : 5 ] # A2M AANAT ABCA1 ACE2 # A-GAMMA3'E 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.000000000 # A1BG 0.0018503922 0.0011108718 0.0014225077 0.0028594037 0.001139013 # A1BG-AS1 0.0007400797 0.0004677614 0.0005193137 0.0007836698 0.000375007 # A1CF 0.0024799266 0.0013026348 0.0020420890 0.0047921048 0.003273375 # A2M 0.0084693452 0.0040689323 0.0064256379 0.0105191365 0.005719199 3.2 定义 niches 即 sender 与 receiver 的细胞群体集合,在R语言中的本质为 list。

需要注意的是一个 niche 中 receiver 只能有一个,而 sender 可以包含很多。

# 例如这个包含两个niche的niches,第一个为pEMT_High_niche,第二个为pEMT_Low_niches = list ( "pEMT_High_niche" = list ( "sender" = c ( "myofibroblast_High" , "Endothelial_High" , "CAF_High" , "T.cell_High" , "Myeloid_High" ), "receiver" = c ( "Malignant_High" )), "pEMT_Low_niche" = list ( "sender" = c ( "myofibroblast_Low" , "Endothelial_Low" , "CAF_Low" ), "receiver" = c ( "Malignant_Low" )) ) niches # $pEMT_High_niche # $pEMT_High_niche$sender # [1] "myofibroblast_High" "Endothelial_High" "CAF_High" # [4] "T.cell_High" "Myeloid_High" # # $pEMT_High_niche$receiver # [1] "Malignant_High" # # # $pEMT_Low_niche # $pEMT_Low_niche$sender # [1] "myofibroblast_Low" "Endothelial_Low" "CAF_Low" # # $pEMT_Low_niche$receiver # [1] "Malignant_Low" 3.3 计算 niches 的差异基因 # 目标:将配体和受体在发送者(sender)和接收者(receiver)细胞群中的差异表达信息与配对的配体-受体网络(lr_network)整合起来,计算每对LR对的特异性评分(specificity score),以找出在不同生态位(niche)中更具特异性的细胞通讯对 # 设置后续的分析操作(例如差异表达分析)基于 RNA 表达矩阵来开展 assay_oi = "RNA" # lr_network$ligand %>% unique:从 lr_network 数据结构里提取所有独特的配体名称 # subset(features = ...):从seurat_obj中选取仅包含这些配体基因的子集。

这样做的目的是把分析范围限定在配体相关的基因上,减少不必要的计算量 # niches:一个列表,定义了不同的细胞生态位(niche),其中每个生态位包含发送者细胞群和接收者细胞群的信息 # type = "sender":指定要进行差异表达分析的细胞类型为发送者细胞。这意味着函数会比较不同生态位中发送者细胞群里配体基因的表达差异 # 计算过程: # (1) 分组:依据 niches 里定义的发送者细胞群,把细胞划分成不同的组。

例如,在 "pEMT_High_niche" 和 "pEMT_Low_niche" 中分别确定对应的发送者细胞群 # (2) 统计检验:对每个配体基因,在不同组的发送者细胞中进行差异表达检验。常见的统计检验方法有 Wilcoxon 秩和检验、t 检验等。

这些检验会比较不同组中基因表达水平的均值或分布,以判断基因是否存在差异表达 # (3) 结果输出:函数会返回一个结果对象 DE_sender,其中包含每个配体基因的差异表达统计信息,例如 p 值、调整后的 p 值、平均表达差异等。

这些信息能够帮助你识别在不同生态位中发送者细胞群里显著差异表达的配体基因 DE_sender <- calculate_niche_de ( seurat_obj = seurat_obj %>% subset ( features = lr_network $ ligand %>% unique ), niches = niches, type = "sender" , assay_oi = assay_oi) # [1] "Calculate Sender DE between: myofibroblast_High and myofibroblast_Low" # [2] "Calculate Sender DE between: myofibroblast_High and Endothelial_Low" # [3] "Calculate Sender DE between: myofibroblast_High and CAF_Low" # [1] "Calculate Sender DE between: Endothelial_High and myofibroblast_Low" # [2] "Calculate Sender DE between: Endothelial_High and Endothelial_Low" # [3] "Calculate Sender DE between: Endothelial_High and CAF_Low" # [1] "Calculate Sender DE between: CAF_High and myofibroblast_Low" # [2] "Calculate Sender DE between: CAF_High and Endothelial_Low" # [3] "Calculate Sender DE between: CAF_High and CAF_Low" # [1] "Calculate Sender DE between: T.cell_High and myofibroblast_Low" # [2] "Calculate Sender DE between: T.cell_High and Endothelial_Low" # [3] "Calculate Sender DE between: T.cell_High and CAF_Low" # [1] "Calculate Sender DE between: Myeloid_High and myofibroblast_Low" # [2] "Calculate Sender DE between: Myeloid_High and Endothelial_Low" # [3] "Calculate Sender DE between: Myeloid_High and CAF_Low" # [1] "Calculate Sender DE between: myofibroblast_Low and myofibroblast_High" # [2] "Calculate Sender DE between: myofibroblast_Low and Endothelial_High" # [3] "Calculate Sender DE between: myofibroblast_Low and CAF_High" # [4] "Calculate Sender DE between: myofibroblast_Low and T.cell_High" # [5] "Calculate Sender DE between: myofibroblast_Low and Myeloid_High" # [1] "Calculate Sender DE between: Endothelial_Low and myofibroblast_High" # [2] "Calculate Sender DE between: Endothelial_Low and Endothelial_High" # [3] "Calculate Sender DE between: Endothelial_Low and CAF_High" # [4] "Calculate Sender DE between: Endothelial_Low and T.cell_High" # [5] "Calculate Sender DE between: Endothelial_Low and Myeloid_High" # [1] "Calculate Sender DE between: CAF_Low and myofibroblast_High" # [2] "Calculate Sender DE between: CAF_Low and Endothelial_High" # [3] "Calculate Sender DE between: CAF_Low and CAF_High" # [4] "Calculate Sender DE between: CAF_Low and T.cell_High" # [5] "Calculate Sender DE between: CAF_Low and Myeloid_High" head (DE_sender) # # A tibble: 6 × 8 # gene p_val avg_log2FC pct.1 pct.2 p_val_adj sender_other_niche # <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> # 1 THBS4 0.0612 9.63 0.055 0 1 myofibroblas… myofibroblast_Low # 2 APOO 0.104 9.29 0.042 0 1 myofibroblas… myofibroblast_Low # 3 LRIG2 0.111 9.03 0.068 0.016 1 myofibroblas… myofibroblast_Low # 4 MFAP2 0.256 8.92 0.021 0 1 myofibroblas… myofibroblast_Low # 5 LGI1 0.0937 8.83 0.045 0 1 myofibroblas… myofibroblast_Low # 6 ICAM2 0.256 8.73 0.021 0 1 myofibroblas… myofibroblast_Low # receiver数量较少,算起来也比较快:DE_receiver <- calculate_niche_de ( seurat_obj = seurat_obj %>% subset ( features = lr_network $ receptor %>% unique ), niches = niches, type = "receiver" , assay_oi = assay_oi) # # A tibble: 1 × 2 # receiver_other_niche # <chr> <chr> # 1 Malignant_High Malignant_Low # [1] "Calculate receiver DE between: Malignant_High and Malignant_Low" # [1] "Calculate receiver DE between: Malignant_Low and Malignant_High" head (DE_receiver) # # A tibble: 6 × 8 # gene p_val avg_log2FC pct.1 pct.2 p_val_adj receiver_other_niche # <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> # 1 CD3D 1.38e- 2 11.8 0.011 0 1 e+ 0 Maligna… Malignant_Low # 2 CTLA4 2.81e- 1 11.4 0.005 0.002 1 e+ 0 Maligna… Malignant_Low # 3 MMP1 2.06e-26 10.6 0.22 0.022 2.03e-23 Maligna… Malignant_Low # 4 ASGR2 6.21e- 1 10.1 0.001 0.002 1 e+ 0 Maligna… Malignant_Low # 5 MMP9 2.82e- 8 9.81 0.08 0.013 2.78e- 5 Maligna… Malignant_Low # 6 CLEC1… 2.20e- 1 9.63 0.003 0 1 e+ 0 Maligna… Malignant_Low # 将无穷大于无穷小的avg_log2FC转换为极值:DE_sender <- DE_sender %>% mutate ( avg_log2FC = ifelse (avg_log2FC == Inf , max (avg_log2FC[ is.finite (avg_log2FC)]), ifelse (avg_log2FC == - Inf , min (avg_log2FC[ is.finite (avg_log2FC)]), avg_log2FC))) DE_receiver <- DE_receiver %>% mutate ( avg_log2FC = ifelse (avg_log2FC == Inf , max (avg_log2FC[ is.finite (avg_log2FC)]), ifelse (avg_log2FC == - Inf , min (avg_log2FC[ is.finite (avg_log2FC)]), avg_log2FC))) 去除表达比例不足10%的基因,并计算最小 avg_log2FC 与平均 avg_log2FC : expression_pct <- 0.10 # process_niche_de:遍历DE_table中的每个基因,检查其在相应细胞群体中的表达比例。

如果某个基因的表达比例低于 expression_pct,则将其从结果中过滤掉。这有助于去除那些可能是噪声或低质量的基因表达信息 # 函数会根据 niches 中定义的细胞生态位信息,将差异表达结果进行分类。

例如,对于发送者细胞,会将结果按照不同的发送者细胞群和生态位进行分组,以便后续分析不同生态位中发送者细胞的基因表达差异 DE_sender_processed <- process_niche_de ( DE_table = DE_sender, niches = niches, expression_pct = expression_pct, type = "sender" ) head (DE_sender_processed) # # A tibble: 6 × 9 # niche sender gene mean_avg_log2FC min_avg_log2FC mean_significant # <chr> <chr> <chr> <dbl> <dbl> <dbl> # 1 pEMT_High_niche myofibr… COL9… 8.85 8.46 0 # 2 pEMT_High_niche myofibr… CALCA 7.45 7.06 0 # 3 pEMT_High_niche myofibr… S100B 7.86 6.97 0 # 4 pEMT_High_niche myofibr… COL2… 7.09 6.70 0 # 5 pEMT_High_niche myofibr… PTPR… 6.83 6.30 0 # 6 pEMT_High_niche myofibr… PRL 6.26 5.87 0 # # ℹ 3 more variables: mean_present <dbl>, mean_score <dbl>, min_score <dbl> DE_receiver_processed <- process_niche_de ( DE_table = DE_receiver, niches = niches, expression_pct = expression_pct, type = "receiver" ) head (DE_receiver_processed) # # A tibble: 6 × 9 # niche receiver gene mean_avg_log2FC min_avg_log2FC mean_significant # <chr> <chr> <chr> <dbl> <dbl> <dbl> # 1 pEMT_High_niche Maligna… CD3D 11.8 11.8 0 # 2 pEMT_High_niche Maligna… CTLA4 11.4 11.4 0 # 3 pEMT_High_niche Maligna… MMP1 10.6 10.6 1 # 4 pEMT_High_niche Maligna… ASGR2 10.1 10.1 0 # 5 pEMT_High_niche Maligna… MMP9 9.81 9.81 1 # 6 pEMT_High_niche Maligna… CLEC… 9.63 9.63 0 # # ℹ 3 more variables: mean_present <dbl>, mean_score <dbl>, min_score <dbl> # 将经过处理的发送者细胞差异表达结果(DE_sender_processed)和接收者细胞差异表达结果(DE_receiver_processed)进行整合,并结合配体 - 受体网络信息(lr_network),同时根据指定的特异性得分方法(specificity_score_LR_pairs)来计算相关得分,最终得到整合后的差异表达结果 DE_sender_receiver # 举例:配体在 sender 的 log2FC 是 1.2,受体在 receiver 是 0.3 → 这个对的得分就是 0.3,说明受体的表达是限制因素 specificity_score_LR_pairs <- "min_lfc" DE_sender_receiver <- combine_sender_receiver_de (DE_sender_processed, DE_receiver_processed, lr_network, specificity_score <- specificity_score_LR_pairs) 3.4 定义感兴趣的基因集 与2.2类似,在开展正式细胞通讯计算前需要预先为每个 niche 确定一个感兴趣的基因集,这里主要研究目的是组间差异,因此这个基因集自然就是Malignant_High与Malignant_Low的差异基因。

# 设定阈值 lfc_cutoff <- 0.15 # 利用min_lfc筛选target:specificity_score_targets <- "min_lfc" # 计算下游目标基因(target genes)在 receiver 中的差异表达 # 使用完整的 Seurat(不需提前 subset) # 每个目标基因的DE信息,结果用于target-based下游分析 DE_receiver_targets <- calculate_niche_de_targets ( seurat_obj = seurat_obj, niches = niches, lfc_cutoff = lfc_cutoff, expression_pct <- expression_pct, assay_oi = assay_oi) # [1] "Calculate receiver DE between: Malignant_High and Malignant_Low" # [1] "Calculate receiver DE between: Malignant_Low and Malignant_High" head (DE_receiver_targets) # # A tibble: 6 × 8 # receiver_other_niche gene p_val avg_log2FC pct.1 pct.2 # <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> # 1 Malignant_High Malignant_Low C9orf152 1 e+ 0 0 NA # 2 Malignant_High Malignant_Low RPS11 1.12e-15 -1.95 0.972 0.995 # 3 Malignant_High Malignant_Low ELMO2 5.23e- 2 -0.271 0.263 0.31 # 4 Malignant_High Malignant_Low CREB3L1 1 e+ 0 0 NA # 5 Malignant_High Malignant_Low PNMA1 1.01e- 9 -1.00 0.199 0.339 # 6 Malignant_High Malignant_Low MMP2 8.78e-30 2.87 0.369 0.106 # # ℹ 1 more variable: p_val_adj <dbl> # 过滤并计算target score # 依据在 receiver 群体中的表达比例(expression_pct)过滤低表达目标基因 # 根据选择的 specificity_score 方法(如 "min_lfc")对 target gene 在各receiver群体中差异表达的 特异性进行打分 DE_receiver_processed_targets <- process_receiver_target_de ( DE_receiver_targets = DE_receiver_targets, niches = niches, expression_pct <- expression_pct, specificity_score = specificity_score_targets) head (DE_receiver_processed_targets) # # A tibble: 6 × 6 # niche receiver target_score target_significant target_present # <chr> <chr> <chr> <dbl> <dbl> <dbl> # 1 pEMT_Low_niche Maligna… CT45A3 15.0 1 1 # 2 pEMT_Low_niche Maligna… CT45A1 14.9 1 1 # 3 pEMT_High_niche Maligna… MSLN 13.8 1 1 # 4 pEMT_Low_niche Maligna… NR0B1 13.3 1 1 # 5 pEMT_High_niche Maligna… SPINK6 13.1 1 1 # 6 pEMT_Low_niche Maligna… DSCR8 13.0 1 1 # 确定背景基因 # 合并所有 receiver 细胞群的 DE 结果 background <- DE_receiver_processed_targets %>% pull (target) %>% unique # niche1的target基因集 geneset_niche1 <- DE_receiver_processed_targets %>% filter (receiver == niches[[ 1 ]] $ receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1 ) %>% pull (target) %>% unique # niche2的target基因集 geneset_niche2 <- DE_receiver_processed_targets %>% filter (receiver == niches[[ 2 ]] $ receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1 ) %>% pull (target) %>% unique # 如果有很多基因在这个过程中被drop out,请检查自己的基因名版本 length (geneset_niche1 %>% setdiff ( rownames (ligand_target_matrix))) # [1] 97 length (geneset_niche2 %>% setdiff ( rownames (ligand_target_matrix))) # [1] 514 # 查看最终的target基因数目 length (geneset_niche1) # [1] 1366 length (geneset_niche2) # [1] 4992 # 查看最终的target数据类型 # 其实最终得到的就是字符串,所以你也可以结合前期研究基础填一些自己真正感兴趣的基因 class (geneset_niche1) # [1] "character" # 查看前五个target基因ID geneset_niche1[ 1 : 5 ] # [1] "MSLN" "SPINK6" "NEFM" "LINC00162" "GPR128" geneset_niche2[ 1 : 5 ] # [1] "CT45A3" "CT45A1" "NR0B1" "DSCR8" "CTAG2" geneSet 的大小通常建议在20~1000之间,背景基因推荐至少5000个。

如果你的DE数量过多,可以设置较高的阈值对基因集进行过滤。

如果你有两个以上的 receiver cells/niches,lfc_cutoff 推荐为 0.15,若只有2个 receiver cells/niches,可以考虑用更高一些的的阈值,例如 0.25,若你的数据为 Smart-seq2 这可高测序深度的数据,可以使用更高的阈值,例如这里,高阈值会让数据的可信度增加:# 这里重新调整阈值,执行和前面相同的步骤 lfc_cutoff <- 0.75 specificity_score_targets <- "min_lfc" DE_receiver_processed_targets <- process_receiver_target_de ( DE_receiver_targets = DE_receiver_targets, niches = niches, expression_pct = expression_pct, specificity_score = specificity_score_targets) # 背景基因 background <- DE_receiver_processed_targets %>% pull (target) %>% unique # niche1的target基因 geneset_niche1 <- DE_receiver_processed_targets %>% filter (receiver == niches[[ 1 ]] $ receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1 ) %>% pull (target) %>% unique # niche2的target基因 geneset_niche2 <- DE_receiver_processed_targets %>% filter (receiver == niches[[ 2 ]] $ receiver & target_score >= lfc_cutoff & target_significant == 1 & target_present == 1 ) %>% pull (target) %>% unique length (geneset_niche1 %>% setdiff ( rownames (ligand_target_matrix))) # [1] 67 length (geneset_niche2 %>% setdiff ( rownames (ligand_target_matrix))) # [1] 427 # 基于新的阈值,得到的每个niche的target的基因数目 length (geneset_niche1) # [1] 1047 length (geneset_niche2) # [1] 4018 3.5 计算配体活性并推断配受体对 # 定在后续分析中选取的前 n 个靶基因的数量 top_n_target <- 250 # 这部分代码创建了一个列表niche_geneset_list,其中包含两个子列表,分别对应两个细胞生态位:pEMT_High_niche 和 pEMT_Low_niche # 每个子列表包含三个元素:# "receiver":指定该生态位下的接收细胞类型,分别从 niches 列表中获取。

# "geneset":指定该生态位下的靶基因集,分别为 geneset_niche1 和 geneset_niche2。# "background":指定背景基因集,这里两个生态位使用相同的背景基因集 background。

niche_geneset_list <- list ( "pEMT_High_niche" <- list ( "receiver" = niches[[ 1 ]] $ receiver, "geneset" = geneset_niche1, "background" = background), "pEMT_Low_niche" <- list ( "receiver" = niches[[ 2 ]] $ receiver, "geneset" = geneset_niche2 , "background" = background) ) # get_ligand_activities_targets 函数来计算配体的活性 # 时间较长,大约十分钟 ligand_activities_targets <- get_ligand_activities_targets ( niche_geneset_list = niche_geneset_list, ligand_target_matrix = ligand_target_matrix, top_n_target = top_n_target) # [1] "Calculate Ligand activities for: Malignant_High" # [1] "Calculate Ligand activities for: Malignant_Low" head (ligand_activities_targets) # # A tibble: 6 × 8 # ligand activity target ligand_target_weight receiver activity_normalized # <chr> <dbl> <chr> <dbl> <chr> <dbl> # 1 A2M 0.103 ANGPTL4 0.00747 Malignant_Hi… 0.275 # 2 A2M 0.103 APP 0.0112 Malignant_Hi… 0.275 # 3 A2M 0.103 ASS1 0.00892 Malignant_Hi… 0.275 # 4 A2M 0.103 ATF3 0.0107 Malignant_Hi… 0.275 # 5 A2M 0.103 BIRC3 0.00988 Malignant_Hi… 0.275 # 6 A2M 0.103 C3 0.00717 Malignant_Hi… 0.275 # # ℹ 2 more variables: scaled_activity_normalized <dbl>, scaled_activity <dbl> 3.6 计算配体、受体、target的表达量 NA。

features_oi <- union (lr_network $ ligand, lr_network $ receptor) %>% union (ligand_activities_targets $ target) %>% setdiff ( NA ) # 对 niches 中所有涉及的细胞类型进行 DotPlot绘图 dotplot <- suppressWarnings ( Seurat :: DotPlot (seurat_obj %>% subset ( idents = niches %>% unlist %>% unique ), features = features_oi, assay = assay_oi)) # 获取每个基因在每种 celltype 的平均表达(avg.exp)、scaled 表达(avg.exp.scaled)和表达细胞百分比(pct.exp) exprs_tbl <- dotplot $ data %>% as_tibble # 对获取得到的列名重新命名、占比转换以及基因排序 exprs_tbl <- exprs_tbl %>% rename ( celltype = id, gene = features.plot, expression = avg.exp, expression_scaled = avg.exp.scaled, fraction = pct.exp) %>% mutate ( fraction = fraction / 100 ) %>% # 转换成百分位数计数 as_tibble %>% select (celltype, gene, expression, expression_scaled, fraction) %>% distinct %>% arrange (gene) %>% mutate ( gene = as.character (gene)) head (exprs_tbl) # # A tibble: 6 × 5 # celltype gene expression_scaled fraction # <fct> <chr> <dbl> <dbl> <dbl> # 1 myofibroblast_High A2M 6208. 1.04 0.966 # 2 myofibroblast_Low A2M 4346. 0.916 0.885 # 3 CAF_High A2M 882. 0.374 0.588 # 4 Malignant_Low A2M 9.10 -1.15 0.0419 # 5 Malignant_High A2M 21.4 -0.875 0.0677 # 6 CAF_Low A2M 349. 0.0600 0.510 # 将大表 exprs_tbl 拆分为三个小表(ligand、receptor 和 target) # exprs_tbl_ligand:每个 sender 细胞中的 ligand 表达 exprs_tbl_ligand <- exprs_tbl %>% filter (gene %in% lr_network $ ligand) %>% rename ( sender = celltype, ligand = gene, ligand_expression = expression, ligand_expression_scaled = expression_scaled, ligand_fraction = fraction) # exprs_tbl_receptor:每个 receiver 细胞中的 receptor 表达 exprs_tbl_receptor <- exprs_tbl %>% filter (gene %in% lr_network $ receptor) %>% rename ( receiver = celltype, receptor = gene, receptor_expression = expression, receptor_expression_scaled = expression_scaled, receptor_fraction = fraction) # exprs_tbl_target:每个 receiver 细胞中的 target 基因表达 exprs_tbl_target <- exprs_tbl %>% filter (gene %in% ligand_activities_targets $ target) %>% rename ( receiver = celltype, target = gene, target_expression = expression, target_expression_scaled = expression_scaled, target_fraction = fraction) # scale_quantile_adapted:自定义分位数标准化函数,用于让不同基因/细胞类型之间的表达值可以比较 # mutate_cond(...):如果表达比例 ≥ expression_pct(比如 0.10),就截断为 expression_pct,避免高比例值主导评分 # 最终保留标准化后的表达值和表达比例,作为后续网络过滤、配体选择等依据 exprs_tbl_ligand <- exprs_tbl_ligand %>% mutate ( scaled_ligand_expression_scaled = scale_quantile_adapted (ligand_expression_scaled)) %>% ligand_fraction_adapted,初始值等于 ligand_fraction mutate ( ligand_fraction_adapted = ligand_fraction) %>% ligand_fraction 大于等于 expression_pct,则将 ligand_fraction_adapted 的值设为 expression_pct mutate_cond (ligand_fraction >= expression_pct, ligand_fraction_adapted = expression_pct) %>% ligand_fraction_adapted 列进行分位数标准化,得到新的列 scaled_ligand_fraction_adapted mutate ( scaled_ligand_fraction_adapted = scale_quantile_adapted (ligand_fraction_adapted)) # receptor 的处理是同样逻辑 # scale_quantile_adapted:自定义分位数标准化函数,用于让不同基因/细胞类型之间的表达值可以比较 # mutate_cond(...):如果表达比例 ≥ expression_pct(比如 0.10),就截断为 expression_pct,避免高比例值主导评分 # 最终保留标准化后的表达值和表达比例,作为后续网络过滤、配体选择等依据 exprs_tbl_receptor <- exprs_tbl_receptor %>% mutate ( scaled_receptor_expression_scaled = scale_quantile_adapted (receptor_expression_scaled)) %>% mutate ( receptor_fraction_adapted = receptor_fraction) %>% mutate_cond (receptor_fraction >= expression_pct, receptor_fraction_adapted = expression_pct) %>% mutate ( scaled_receptor_fraction_adapted = scale_quantile_adapted (receptor_fraction_adapted)) 3.7 通过受体表达量评估配受体对互作强度 # 一是将不同的数据框进行连接操作,整合配体、受体、细胞间通讯的差异表达等信息 # 二是对整合后的数据进行分组计算和排序,得出配体 - 受体 - 接收细胞组合下的综合得分 # 使用 inner_join 函数将lr_network与exprs_tbl_ligand按ligand列进行内连接。

内连接意味着只保留两个数据框中ligand列值相同的行,这样就把配体相关的表达信息添加到 lr_network 中 exprs_sender_receiver <- lr_network %>% inner_join (exprs_tbl_ligand, by = c ( "ligand" )) %>% # 接着将上一步结果与exprs_tbl_receptor按receptor列进行内连接,把受体相关的表达信息也整合进来 inner_join (exprs_tbl_receptor, by = c ( "receptor" )) %>% # 最后将前面的结果与DE_sender_receiver按niche、sender和receiver列进行内连接,并且只保留 DE_sender_receiver 中这三列的唯一行 inner_join (DE_sender_receiver %>% distinct (niche, sender, receiver)) # 按照ligand和receiver列对exprs_sender_receiver数据框进行分组,后续的计算将在每个分组内独立进行 ligand_scaled_receptor_expression_fraction_df <- exprs_sender_receiver %>% group_by (ligand, receiver) %>% # 使用dense_rank函数分别对每个分组内的receptor_expression和receptor_fraction列进行排名,得到新的列rank_receptor_expression和rank_receptor_fraction。

dense_rank函数会为相同的值赋予相同的排名,且排名是连续的 mutate ( rank_receptor_expression = dense_rank (receptor_expression), rank_receptor_fraction = dense_rank (receptor_fraction)) %>% # 计算每个分组内的综合得分ligand_scaled_receptor_expression_fraction。

具体做法是先将rank_receptor_fraction和rank_receptor_expression分别除以各自分组内的最大值,将排名归一化到0到1之间,然后取两者的平均值乘以0.5 mutate ( ligand_scaled_receptor_expression_fraction = 0.5 * ( (rank_receptor_fraction / max (rank_receptor_fraction)) + ((rank_receptor_expression / max (rank_receptor_expression))) )) %>% # 去除 ligand、receptor、receiver 和 ligand_scaled_receptor_expression_fraction 列组合中的重复 distinct (ligand, receptor, receiver, ligand_scaled_receptor_expression_fraction) %>% distinct %>% # 取消分组操作,使数据框恢复到未分组的状态 ungroup 3.8 计算配受体对权重 参与权重计算的参数有:Ligand DE score 、 Scaled ligand expression 、 Ligand expression fraction 、 Ligand spatial DE score 、 Receptor DE score 、 Scaled receptor expression 、 Receptor expression fraction 、 Receptor expression strength 、 Receptor spatial DE score 、 Absolute ligand activity 、 Normalized ligand activity。

prioritizing_weights <- c ( "scaled_ligand_score" = 5 , "scaled_ligand_expression_scaled" = 1 , "ligand_fraction" = 1 , = 2, "scaled_receptor_score" = 0.5 , "scaled_receptor_expression_scaled" = 0.5 , "receptor_fraction" = 1 , "ligand_scaled_receptor_expression_fraction" = 1 , = 0, "scaled_activity" = 0 , "scaled_activity_normalized" = 1 ) # 将之前计算得到的多个数据对象(如差异表达结果、配体 - 受体表达分数数据框等)整合到一个名为 output 的列表中,方便后续在自定义函数中调用这些数据 # DE_sender_receiver 发送细胞和接收细胞的差异表达分析整合结果 # ligand_scaled_receptor_expression_fraction_df 配体 - 受体组合的综合表达情况 # ligand_activities_targets 配体对靶基因的活性信息 # DE_receiver_processed_targets 接收细胞靶基因的差异表达结果 # exprs_tbl_ligand 配体在不同发送细胞中的表达水平、标准化后的表达水平以及表达比例等数据 # exprs_tbl_receptor 不同接收细胞中的表达水平、标准化后的表达水平以及表达比例等 # exprs_tbl_target 靶基因在不同接收细胞中的表达水平、标准化后的表达水平以及表达比例等 output <- list ( DE_sender_receiver = DE_sender_receiver, ligand_scaled_receptor_expression_fraction_df = ligand_scaled_receptor_expression_fraction_df, ligand_activities_targets = ligand_activities_targets, DE_receiver_processed_targets = DE_receiver_processed_targets, exprs_tbl_ligand = exprs_tbl_ligand, exprs_tbl_receptor = exprs_tbl_receptor, exprs_tbl_target = exprs_tbl_target) # 因为我们的数据不涉及空间转录组数据,因此下面这个函数需要修改一下 # prioritization_tables = get_prioritization_tables(output, prioritizing_weights) # 修改为:my_get_prioritization_tables <- function (output_nichenet_analysis, prioritizing_weights) { # 数据连接 combined_information = output_nichenet_analysis $ DE_sender_receiver %>% inner_join (output_nichenet_analysis $ ligand_scaled_receptor_expression_fraction_df, by = c ( "receiver" , "ligand" , "receptor" )) %>% inner_join (output_nichenet_analysis $ ligand_activities_targets, by = c ( "receiver" , "ligand" )) %>% left_join (output_nichenet_analysis $ DE_receiver_processed_targets, by = c ( "niche" , "receiver" , "target" )) %>% inner_join (output_nichenet_analysis $ exprs_tbl_ligand, by = c ( "sender" , "ligand" )) %>% inner_join (output_nichenet_analysis $ exprs_tbl_receptor, by = c ( "receiver" , "receptor" )) %>% left_join (output_nichenet_analysis $ exprs_tbl_target, by = c ( "receiver" , "target" )) # 生成新列 combined_information = combined_information %>% mutate ( ligand_receptor = paste (ligand, receptor, sep = "--" )) # # 选择需要的列并去重 combined_information = combined_information %>% select (niche, receiver, sender, ligand_receptor, ligand, receptor, target, ligand_score, ligand_significant, ligand_present, ligand_expression, ligand_expression_scaled, ligand_fraction, receptor_score, receptor_significant, receptor_present, receptor_expression, receptor_expression_scaled, receptor_fraction, ligand_scaled_receptor_expression_fraction, avg_score_ligand_receptor, target_score, target_significant, target_present, target_expression, target_expression_scaled, target_fraction, ligand_target_weight, activity, activity_normalized, scaled_ligand_score, scaled_ligand_expression_scaled, scaled_receptor_score, scaled_receptor_expression_scaled, scaled_avg_score_ligand_receptor, scaled_ligand_fraction_adapted, scaled_receptor_fraction_adapted, scaled_activity, scaled_activity_normalized) %>% distinct # 计算优先级分数 combined_information_prioritized = combined_information %>% dplyr :: mutate ( prioritization_score = ((prioritizing_weights[ "scaled_ligand_score" ] * scaled_ligand_score) + (prioritizing_weights[ "scaled_ligand_expression_scaled" ] * scaled_ligand_expression_scaled) + (prioritizing_weights[ "scaled_receptor_score" ] * scaled_receptor_score) + (prioritizing_weights[ "scaled_receptor_expression_scaled" ] * scaled_receptor_expression_scaled) + (prioritizing_weights[ "ligand_scaled_receptor_expression_fraction" ] * ligand_scaled_receptor_expression_fraction) + (prioritizing_weights[ "scaled_activity" ] * scaled_activity) + (prioritizing_weights[ "scaled_activity_normalized" ] * scaled_activity_normalized) + (prioritizing_weights[ "ligand_fraction" ] * scaled_ligand_fraction_adapted) + (prioritizing_weights[ "receptor_fraction" ] * scaled_receptor_fraction_adapted) ) * ( 1 / length (prioritizing_weights))) %>% dplyr :: arrange ( - prioritization_score) # 生成配体 - 受体优先级表格 prioritization_tbl_ligand_receptor = combined_information_prioritized %>% select (niche, receiver, sender, ligand_receptor, ligand, receptor, ligand_score, ligand_significant, ligand_present, ligand_expression, ligand_expression_scaled, ligand_fraction, receptor_score, receptor_significant, receptor_present, receptor_expression, receptor_expression_scaled, receptor_fraction, ligand_scaled_receptor_expression_fraction, avg_score_ligand_receptor, activity, activity_normalized, scaled_ligand_score, scaled_ligand_expression_scaled, scaled_receptor_score, scaled_receptor_expression_scaled, scaled_avg_score_ligand_receptor, scaled_ligand_fraction_adapted, scaled_receptor_fraction_adapted, scaled_activity, scaled_activity_normalized, prioritization_score) %>% distinct # 生成配体 - 靶基因优先级表格 prioritization_tbl_ligand_target = combined_information_prioritized %>% select (niche, receiver, sender, ligand_receptor, ligand, receptor, target, target_score, target_significant, target_present, target_expression, target_expression_scaled, target_fraction, ligand_target_weight, activity, activity_normalized, scaled_activity, scaled_activity_normalized, prioritization_score) %>% distinct return ( list ( prioritization_tbl_ligand_receptor = prioritization_tbl_ligand_receptor, prioritization_tbl_ligand_target = prioritization_tbl_ligand_target)) } # 调用my_get_prioritization_tables函数,传入output和prioritizing_weights,得到包含两个优先级表格的 prioritization_tables列表 prioritization_tables <- my_get_prioritization_tables (output, prioritizing_weights) # 分别从 prioritization_tables 的两个表格中筛选出 receiver 为 niches 列表中第一个和第二个元素的 receiver 值的记录,并取前 10 行,用于查看特定接收细胞的高优先级配体 - 受体对和配体 - 靶基因对的信息 prioritization_tables $ prioritization_tbl_ligand_receptor %>% # 配体和受体优先级表格 filter (receiver == niches[[ 1 ]] $ receiver) %>% head ( 10 ) # # A tibble: 10 × 32 # niche receiver sender ligand_receptor ligand receptor ligand_score # <chr> <chr> <chr> <chr> <chr> <chr> <dbl> # 1 pEMT_High_niche Malignan… Myelo… C1QB--LRP1 C1QB LRP1 17.1 # 2 pEMT_High_niche Malignan… T.cel… GZMB--CHRM3 GZMB CHRM3 19.1 # 3 pEMT_High_niche Malignan… T.cel… TNF--TNFRSF21 14.7 # 4 pEMT_High_niche Malignan… T.cel… GZMB--MCL1 GZMB MCL1 19.1 # 5 pEMT_High_niche Malignan… Myelo… IL1RN--IL1R2 IL1RN IL1R2 16.1 # 6 pEMT_High_niche Malignan… Myelo… TNF--TNFRSF21 13.5 # 7 pEMT_High_niche Malignan… Myelo… S100B--ALCAM S100B ALCAM 14.1 # 8 pEMT_High_niche Malignan… Myelo… OSM--OSMR 14.5 # 9 pEMT_High_niche Malignan… T.cel… LTB--LTBR 16.1 # 10 pEMT_High_niche Malignan… Myelo… CCL22--DPP4 CCL22 DPP4 12.5 # # ℹ 25 more variables: ligand_significant <dbl>, ligand_present <dbl>, # # ligand_expression <dbl>, ligand_expression_scaled <dbl>, # # ligand_fraction <dbl>, receptor_score <dbl>, receptor_significant <dbl>, # # receptor_present <dbl>, receptor_expression <dbl>, # # receptor_expression_scaled <dbl>, receptor_fraction <dbl>, # # ligand_scaled_receptor_expression_fraction <dbl>, # # avg_score_ligand_receptor <dbl>, activity <dbl>, … prioritization_tables $ prioritization_tbl_ligand_target %>% # 配体和靶基因优先级表格 filter (receiver == niches[[ 1 ]] $ receiver) %>% head ( 10 ) # # A tibble: 10 × 19 # niche receiver sender ligand_receptor ligand receptor target_score # <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> # 1 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 APP 2.16 # 2 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 AREG 3.58 # 3 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 ASS1 1.10 # 4 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 ATF3 1.41 # 5 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 BIRC3 1.02 # 6 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 C3 1.44 # 7 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 CA2 9.94 # 8 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 CAV1 3.06 # 9 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 CCL20 2.54 # 10 pEMT_Hig… Maligna… Myelo… C1QB--LRP1 C1QB LRP1 CD14 3.36 # # ℹ 11 more variables: target_significant <dbl>, target_present <dbl>, # # target_expression <dbl>, target_expression_scaled <dbl>, # # target_fraction <dbl>, ligand_target_weight <dbl>, activity <dbl>, # # activity_normalized <dbl>, scaled_activity <dbl>, # # scaled_activity_normalized <dbl>, prioritization_score <dbl> prioritization_tables $ prioritization_tbl_ligand_receptor %>% # 配体和受体优先级表格 filter (receiver == niches[[ 2 ]] $ receiver) %>% head ( 10 ) # # A tibble: 10 × 32 # niche receiver sender ligand_receptor ligand receptor ligand_score # <chr> <chr> <chr> <chr> <chr> <chr> <dbl> # 1 pEMT_Low_niche Malignant… CAF_L… WIF1--RYK WIF1 RYK 13.2 # 2 pEMT_Low_niche Malignant… CAF_L… IGSF11--CLEC2L IGSF11 CLEC2L 8.09 # 3 pEMT_Low_niche Malignant… CAF_L… LRRN1--LRRC4 LRRN1 LRRC4 3.25 # 4 pEMT_Low_niche Malignant… myofi… IGFBPL1--DCC IGFBP… DCC 2.79 # 5 pEMT_Low_niche Malignant… CAF_L… LRFN5--PTPRS LRFN5 PTPRS 2.47 # 6 pEMT_Low_niche Malignant… CAF_L… IGSF10--IGSF11 IGSF10 IGSF11 3.63 # 7 pEMT_Low_niche Malignant… CAF_L… MMP13--F12 MMP13 F12 7.90 # 8 pEMT_Low_niche Malignant… CAF_L… SLIT2--SDC1 SLIT2 SDC1 2.35 # 9 pEMT_Low_niche Malignant… CAF_L… FGF14--SCN9A FGF14 SCN9A 1.51 # 10 pEMT_Low_niche Malignant… CAF_L… CHL1--TMEM132A CHL1 TMEM132A 2.71 # # ℹ 25 more variables: ligand_significant <dbl>, ligand_present <dbl>, # # ligand_expression <dbl>, ligand_expression_scaled <dbl>, # # ligand_fraction <dbl>, receptor_score <dbl>, receptor_significant <dbl>, # # receptor_present <dbl>, receptor_expression <dbl>, # # receptor_expression_scaled <dbl>, receptor_fraction <dbl>, # # ligand_scaled_receptor_expression_fraction <dbl>, # # avg_score_ligand_receptor <dbl>, activity <dbl>, … prioritization_tables $ prioritization_tbl_ligand_target %>% # 配体和靶基因优先级表格 filter (receiver == niches[[ 2 ]] $ receiver) %>% head ( 10 ) # # A tibble: 10 × 19 # niche receiver sender ligand_receptor ligand receptor target_score # <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> # 1 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK AKAP1 1.53 # 2 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK AXIN2 4.26 # 3 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK BCL11A 2.04 # 4 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK BCL6 1.20 # 5 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK BDNF 4.98 # 6 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK BIRC5 4.70 # 7 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK BMP4 0.821 # 8 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK BMP7 0.866 # 9 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK BRCA1 1.62 # 10 pEMT_Low… Maligna… CAF_L… WIF1--RYK WIF1 RYK CBX5 2.25 # # ℹ 11 more variables: target_significant <dbl>, target_present <dbl>, # # target_expression <dbl>, target_expression_scaled <dbl>, # # target_fraction <dbl>, ligand_target_weight <dbl>, activity <dbl>, # # activity_normalized <dbl>, scaled_activity <dbl>, # # scaled_activity_normalized <dbl>, prioritization_score <dbl> 3.9 差异可视化 3.9.1 获取top配体与top受体 获取每个 niche 的top50配体 # 从 prioritization_tables 中的 prioritization_tbl_ligand_receptor 数据框里筛选出高优先级的配体、受体以及对应的细胞生态位信息。

下面分别对两段代码的功能进行详细解释:# prioritization_tables$prioritization_tbl_ligand_receptor 数据框中选取 niche(细胞生态位)、sender(发送细胞)、receiver(接收细胞)、ligand(配体)、receptor(受体)和 prioritization_score(优先级得分)这些列,生成一个新的数据子集 top_ligand_niche_df <- prioritization_tables $ prioritization_tbl_ligand_receptor %>% select (niche, sender, receiver, ligand, receptor, prioritization_score) %>% # 按照 ligand 列对数据进行分组,后续的操作会在每个配体组内独立执行 group_by (ligand) %>% # 每个配体组内,根据 prioritization_score 列的值选取优先级得分最高的那一行数据 top_n ( 1 , prioritization_score) %>% # 取消之前的分组操作,使数据恢复到未分组状态 ungroup %>% # 从筛选后的数据中选取 ligand、receptor 和 niche 这三列 select (ligand, receptor, niche) %>% # 将 niche 列重命名为 top_niche,最终得到 top_ligand_niche_df 数据框,该数据框记录了每个配体优先级最高的那一行对应的受体和细胞生态位信息 rename ( top_niche = niche) # 和第一段代码一样,从 prioritization_tables$prioritization_tbl_ligand_receptor 数据框中选取指定的列,生成新的数据子集 top_ligand_receptor_niche_df = prioritization_tables $ prioritization_tbl_ligand_receptor %>% select (niche, sender, receiver, ligand, receptor, prioritization_score) %>% # 按照 ligand 和 receptor 两列对数据进行分组,后续操作会在每个配体 - 受体组合组内独立执行 group_by (ligand, receptor) %>% # 每个配体 - 受体组合组内,根据 prioritization_score 列的值选取优先级得分最高的那一行数据 top_n ( 1 , prioritization_score) %>% # 取消分组操作,让数据回到未分组状态 ungroup %>% # 从筛选后的数据中选取 ligand、receptor 和 niche 这三列 select (ligand, receptor, niche) %>% # 将 niche 列重命名为 top_niche,最终得到 top_ligand_receptor_niche_df 数据框,该数据框记录了每个配体 - 受体组合优先级最高的那一行对应的细胞生态位信息 rename ( top_niche = niche) ligand_prioritized_tbl_oi <- prioritization_tables $ prioritization_tbl_ligand_receptor %>% select (niche, sender, receiver, ligand, prioritization_score) %>% # 按照 ligand(配体)和 niche(细胞生态位)对数据进行分组,后续的操作将在每个配体 - 细胞生态位组合的组内进行 group_by (ligand, niche) %>% # 在每个组内,根据 prioritization_score(优先级得分)选取得分最高的那一行数据,即找出每个配体在每个细胞生态位中的最高优先级记 top_n ( 1 , prioritization_score) %>% ungroup %>% distinct %>% # inner_join 函数将前面筛选得到的数据与top_ligand_niche_df数据框进行内连接。

连接的依据是两个数据框中共同的列,这样可以将 top_ligand_niche_df 中的信息整合到当前数据集中 inner_join (top_ligand_niche_df) %>% # 根据 niche 列和 top_niche 列的值进行筛选,只保留 niche 列的值与 top_niche 列的值相等的行。

这一步进一步筛选出符合特定条件的数据 filter (niche == top_niche) %>% # 按照 niche(细胞生态位)对数据进行分组 group_by (niche) %>% # 在每个细胞生态位组内,根据 prioritization_score(优先级得分)选取得分最高的前 50 行数据 top_n ( 50 , prioritization_score) %>% ungroup # 这段代码通过一系列的数据处理和筛选操作,对配体 - 受体的优先级数据进行了整理和分析,帮助快速获取每个细胞生态位中最重要的配体信息 每个配体获取top2的受体:# 对配体 - 受体的优先级数据进行进一步筛选和处理,以获取特定接收细胞(Malignant_High)相关的高优先级配体 - 受体对信息 receiver_oi <- "Malignant_High" # 从 ligand_prioritized_tbl_oi 数据框中筛选出 receiver 列的值等于 receiver_oi(即 "Malignant_High")的行 # 从筛选后的数据中提取 ligand 列的数据,得到一个包含配体名称的向量 # 去除向量中的重复元素,得到一个包含唯一配体名称的向量 filtered_ligands filtered_ligands <- ligand_prioritized_tbl_oi %>% filter (receiver == receiver_oi) %>% pull (ligand) %>% unique prioritized_tbl_oi <- prioritization_tables $ prioritization_tbl_ligand_receptor %>% # 从 prioritization_tables 列表中的 prioritization_tbl_ligand_receptor 数据框中筛选出 ligand 列的值在 filtered_ligands 向量中的行 filter (ligand %in% filtered_ligands) %>% # 选取筛选后数据中的 niche(细胞生态位)、sender(发送细胞)、receiver(接收细胞)、ligand(配体)、receptor(受体)、ligand_receptor(配体 - 受体组合)和prioritization_score(优先级得分)这些列 select (niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% # 去除重复的行,确保数据的唯一 distinct %>% # 将前面处理的数据与 top_ligand_receptor_niche_df 数据框进行内连接 inner_join (top_ligand_receptor_niche_df) %>% # 按照 ligand(配体)对连接后的数据进行分组,并在每个组内筛选出 receiver 列的值等于 receiver_oi(即 "Malignant_High")的行 group_by (ligand) %>% filter (receiver == receiver_oi) %>% # 每个配体组内,根据 prioritization_score(优先级得分)选取得分最高的前 2 行数据 top_n ( 2 , prioritization_score) %>% ungroup # 上面这一步你也可以从"pEMT-low"的角度进行可视化:if (F){ receiver_oi = "Malignant_Low" filtered_ligands = ligand_prioritized_tbl_oi %>% filter (receiver == receiver_oi) %>% top_n ( 50 , prioritization_score) %>% pull (ligand) %>% unique prioritized_tbl_oi = prioritization_tables $ prioritization_tbl_ligand_receptor %>% filter (ligand %in% filtered_ligands) %>% select (niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct %>% inner_join (top_ligand_receptor_niche_df) %>% group_by (ligand) %>% filter (receiver == receiver_oi) %>% top_n ( 2 , prioritization_score) %>% ungroup lfc_plot = make_ligand_receptor_lfc_plot (receiver_oi, prioritized_tbl_oi, prioritization_tables $ prioritization_tbl_ligand_receptor, plot_legend = FALSE , heights = NULL , widths = NULL ) lfc_plot } 3.9.2 可视化 查看配体在 Sender 的组间差异,查看受体在 Receiver 中的组间差异:# 绘制配体-受体对的 log fold change(LFC)差异表达热图 # LFC 越高,表示该基因在该 niche 中差异表达越显著 # receiver_oi:receiver(接收细胞类型)"Malignant_High" # prioritized_tbl_oi根据优先级筛选出来的配体-受体表 # prioritization_tables$prioritization_tbl_ligand_receptor:完整的配体-受体优先级评分表 lfc_plot <- make_ligand_receptor_lfc_plot (receiver_oi, prioritized_tbl_oi, prioritization_tables $ prioritization_tbl_ligand_receptor, plot_legend = FALSE , # 不显示图例 heights = NULL , # 采用默认比例 widths = NULL ) lfc_plot 查看配体表达量、配体活性以及target genes与配体的关联度 # 可视化在指定接收细胞类型(receiver_oi)中,配体-靶点信号活性和表达信息的整体概况 lfc_cutoff = 3 # 1.配体表达图, # 展示选定的发送细胞中,配体的表达强,来源于 output$exprs_tbl_ligand # 横轴为发送细胞类型,纵轴为配体,颜色表示表达水平 # 2.信号活性图(Ligand Activity Plot):# 展示每个配体对于目标基因的调控活性(来源于 ligand_target_matrix 计算的活性分值,通常为 Pearson 分数或调控得分)。

# 通常表现为热图或者点图,纵轴为配体,横轴为 target 基因,颜色表示活性分值 exprs_activity_target_plot <- make_ligand_activity_target_exprs_plot ( receiver_oi, prioritized_tbl_oi, # 包含所有配体 - 受体对优先级信息的数据框 prioritization_tables $ prioritization_tbl_ligand_receptor, # 存储配体 - 靶基因对优先级信息的数据框 prioritization_tables $ prioritization_tbl_ligand_target, # 记录配体基因和靶基因表达信息的数据框 output $ exprs_tbl_ligand, output $ exprs_tbl_target, # 对数倍数变化(Log2 Fold Change)的阈值,用于筛选具有显著表达差异 lfc_cutoff, ligand_target_matrix, plot_legend = FALSE , heights = NULL , widths = NULL ) exprs_activity_target_plot $ combined_plot 展示top20 # 从 ligand_prioritized_tbl_oi 数据框中筛选出receiver列等于receiver_oi("Malignant_High")的行 # 对筛选后的行,按照 prioritization_score(优先级得分)从高到低排序,选取前20行 # 提取这些行中的ligand列,并去除重复的配体名称,得到一个包含20个高优先级配体的向量filtered_ligands filtered_ligands <- ligand_prioritized_tbl_oi %>% filter (receiver == receiver_oi) %>% top_n ( 20 , prioritization_score) %>% pull (ligand) %>% unique # 从prioritization_tables$prioritization_tbl_ligand_receptor数据框中筛选出ligand列的值在filtered_ligands 向量中的行 # 选取筛选后数据中的 niche(细胞生态位)、sender(发送细胞)、receiver(接收细胞)、ligand(配体)、receptor(受体)、ligand_receptor(配体 - 受体组合)和 prioritization_score(优先级得分)这些列,并去除重复行 # 将处理后的数据与top_ligand_receptor_niche_df 数据框进行内连接,整合更多相关信息 # 按照 ligand 对连接后的数据进行分组,并在每个组内筛选出receiver列等于 receiver_oi 的行 # 对每个配体组内的行,按照 prioritization_score 从高到低排序,选取前2行,最后取消分组操作,得到一个新的数据框 prioritized_tbl_oi,包含了特定接收细胞相关的高优先级配体 - 受体对信息 prioritized_tbl_oi <- prioritization_tables $ prioritization_tbl_ligand_receptor %>% filter (ligand %in% filtered_ligands) %>% select (niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct %>% inner_join (top_ligand_receptor_niche_df) %>% group_by (ligand) %>% filter (receiver == receiver_oi) %>% top_n ( 2 , prioritization_score) %>% ungroup # 生成绘图数据 exprs_activity_target_plot <- make_ligand_activity_target_exprs_plot ( receiver_oi, prioritized_tbl_oi, prioritization_tables $ prioritization_tbl_ligand_receptor, prioritization_tables $ prioritization_tbl_ligand_target, output $ exprs_tbl_ligand, output $ exprs_tbl_target, lfc_cutoff, ligand_target_matrix, plot_legend = FALSE , heights = NULL , widths = NULL ) # 整合和可视化细胞间通讯中与特定接收细胞相关的高优先级发送细胞中的配体表达、配体到下游基因的信号调控活性和接收细胞中靶点基因的表达 exprs_activity_target_plot $ combined_plot 配受体对的 circos 图片 # 选择优先级排名top15的配体 filtered_ligands <- ligand_prioritized_tbl_oi %>% filter (receiver == receiver_oi) %>% top_n ( 15 , prioritization_score) %>% pull (ligand) %>% unique # 保留每个配体在当前 receiver(receiver_oi)下得分最高的两个配体-受体对 prioritized_tbl_oi <- prioritization_tables $ prioritization_tbl_ligand_receptor %>% filter (ligand %in% filtered_ligands) %>% select (niche, sender, receiver, ligand, receptor, ligand_receptor, prioritization_score) %>% distinct %>% inner_join (top_ligand_receptor_niche_df) %>% group_by (ligand) %>% filter (receiver == receiver_oi) %>% top_n ( 2 , prioritization_score) %>% ungroup # 设置颜色 # brewer.pal(n, name = "Spectral"):从 "Spectral" 调色板中选取 n 种颜色 # magrittr::set_names:将这些颜色与sender列的不同值一一对应,得到一个命名向量colors_sender,用于在绘图时为不同的发送细胞类型分配颜色 colors_sender <- brewer.pal ( n = prioritized_tbl_oi $ sender %>% unique %>% sort %>% length , name = "Spectral" ) %>% magrittr :: set_names (prioritized_tbl_oi $ sender %>% unique %>% sort ) # receiver统一用lavender色(淡紫色),这里的receiver为Malignant_High colors_receiver <- c ( "lavender" ) %>% magrittr :: set_names (prioritized_tbl_oi $ receiver %>% unique %>% sort ) # 这个函数把上面筛选出的配体-受体对绘制成一个 circos plot,显示它们的连接关系,并使用 sender/receiver 的颜色编码 circos_output <- make_circos_lr (prioritized_tbl_oi, colors_sender, colors_receiver) Sys.time # [1] "2025-05-15 11:29:40 CST" save.image ( "result_nichenet/nichenet.rdata" )

← 上一章 下一章 →