Score gene signatures

  • Related to Figure 3 and Figure 6.
renin_seurat <- subset(lineage_named,
    idents = c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "fbJG", "JG"), invert = F)
renin_seurat$genotype <- factor(renin_seurat$genotype, levels=c("KO", "WT"))

IFRG_genes <- unique(c("Tnfrsf1a", "Tnfrsf1b", "Tnfrsf21", "Tnfrsf19",
    "Il15ra", "Il17f", "Cd55", "Cd300c", "Tnfsf15", "Csf1", "Havcr2",
    "Il1r1", "Il18r1", "Lifr", "Cxcl1", "Ros1", "Plaur", "Acvr2a",
    "Klf6", "Gabbr1", "Aplnr", "Sphk1", "Pdpn", "Adora2b", "Ripk2", "Ptges",
    "Cxcl9", "Wnt7a", "Adora2b", "Hrh1", "Tnfsf9", "Kl", "Agt",
    "Mc1r", "Il17d"))

hypoxia_genes <- unique(c("Plaur", "Pabpc1l", "Slc16a12", "Nfe2l3",
    "Kcnab1", "Gpc3", "Kif5a", "Plaur", "Ankzf1", "Ets1", "Selenbp1", "Aldoa",
    "Angptl4", "Anln", "Mrgbp", "Cdkn3", "Col4a6", "Dcbld1", "Eno1",
    "Fam83b", "Fosl1", "Gnai1", "Hilpda", "Kctd11", "Krt17", "Ldha", "P4ha1",
    "Pgam1", "Pgk1", "Sdc1", "Slc16a1", "Slc2a1", "Tpi1", "Vegfa", "Cygb",
    "Hif1a", "Arnt"))

sasp_genes <- unique(c("Il6", "Il1a", "Il1b", "Il1r1", "Il1rn", "Il7",
    "Il7r", "Il15",  "Cxcl1", "Cxcl2", 
    "Ccl8", "Ccl20", "Ccl11", "Ccl25",
    "Mif", "Fn1", "Areg", "Nrg1", "Fgf2",
    "Hgf", "Fgf7", "Vegfa", "Ang", "Pigf",
    "Mmp3", "Mmp12", "Mmp13", "Mmp14", "Timp2",
    "Serpine1", "Ctsb", "Icam1", "Tnfrsf11b",
    "Fas", "Plaur", "Il6st", "Egfr", "Ptger2", "Nos1",
    "Cdkn1a", "Cdkn2a", "Lmnb1", "Bcl2l1", "Bcl2l2",
    "Hsp90aa1", "Hsp90ab1", "Igf1", "Igf1r", "Igf2",
    "Igfbp1", "Igfbp2", "Igfbp3", "Igfbp4", "Igfbp5",
    "Igfbp6", "Igfbp7"))

matrisome <- fread('matrisome_gene_list_mouse.csv')
matrisome$`Category` <- stringr::str_replace_all(matrisome$`Category`, " ", "_")
matrisome$`Category` <- stringr::str_replace_all(matrisome$`Category`, "-", "_")
matrisome$`Division` <- stringr::str_replace_all(matrisome$`Division`, " ", "_")
matrisome$`Gene Symbol` <- stringr::str_replace(matrisome$`Gene Symbol`, "Ctgf", "Ccn2")
matrisome$`Gene Symbol` <- stringr::str_replace(matrisome$`Gene Symbol`, "Emilin3", "Mmrn2")
matrisome$`Gene Symbol` <- stringr::str_replace(matrisome$`Gene Symbol`, "Vwa9", "Ints14")
matrisome$`Gene Symbol` <- stringr::str_replace(matrisome$`Gene Symbol`, "Wisp2", "Ccn5")
matrisome$`Gene Symbol` <- stringr::str_replace(matrisome$`Gene Symbol`, "Lepre1", "P3h1")
matrisome$`Gene Symbol` <- stringr::str_replace(matrisome$`Gene Symbol`, "Leprel1", "P3h2")
ecm_genes <- list(ECM_Glycoproteins=matrisome[matrisome$Category == "ECM_Glycoproteins",]$`Gene Symbol`,
                  Collagens=matrisome[matrisome$Category == "Collagens",]$`Gene Symbol`,
                  ECM_Regulators=matrisome[matrisome$Category == "ECM_Regulators",]$`Gene Symbol`,
                  ECM_affiliated_Proteins=matrisome[matrisome$Category == "ECM_affiliated_Proteins",]$`Gene Symbol`,
                  Proteoglycans=matrisome[matrisome$Category == "Proteoglycans",]$`Gene Symbol`,
                  Secreted_Factors=matrisome[matrisome$Category == "Secreted_Factors",]$`Gene Symbol`)

Score gene sets to identify CAAH gene signature

cpm_obj  <- NormalizeData(renin_seurat, method = "RC", scale.factor = 1e6)
data_mat <- as.matrix(SeuratObject::LayerData(cpm_obj, assay = "RNA", layer = "counts"))
caah_scores <- ScoreSignatures_UCell(data_mat, features=list(CAAH=current_top_scoring_genes))

num_permutations <- 3500
for (i in 1:num_permutations) {
    message(paste0("round: ", i))
    sampled_gene_list <- base::sample(current_top_scoring_genes, 10, replace=FALSE)
    caah_scores <- ScoreSignatures_UCell(data_mat,
        features=list(CAAH=sampled_gene_list),
        ncores=parallel::detectCores()/6)
    melted <- reshape2::melt(caah_scores)
    colnames(melted) <- c("Cell", "Signature", "UCell_score")
    melted_dt <- as.data.table(melted)
    melted2 <- merge(melted_dt, renin_metadata, by.x="Cell", by.y="rn")
    sampled_trt_caah_score <- mean(melted2[genotype == "KO",]$UCell_score)
    sampled_ctrl_caah_score <- mean(melted2[genotype == "WT",]$UCell_score)
    sampled_score_diff <- sampled_trt_caah_score - sampled_ctrl_caah_score
    if (sampled_score_diff > caah_score_diff) {
        message(paste0("gene_list_", i, " is better"))
        trt_caah_score  <- sampled_trt_caah_score
        ctrl_caah_score <- sampled_ctrl_caah_score
        caah_score_diff <- sampled_score_diff
        final_gene_list <- sampled_gene_list 
        dt <- data.table(gene_list = sampled_gene_list,
                         trt_score = sampled_trt_caah_score,
                         ctrl_score = sampled_ctrl_caah_score,
                         diff_score = caah_score_diff)
        write.csv(dt, paste0("caah_scores/gene_list_", i, ".csv"))
    }
}

# final set between both models
caah_genes <- c("Lamp2", "Igf1", "Cdh13", "Col4a2", "Sod1", "Timp3",
                "Wfdc2", "Gpc4", "Hsp90ab1", "S100a9")

Custom function to make this easy to execute over and over.

scoreGeneList <- function(object, gene_list, name="CAAH_list", outdir="figures/CAAH/") {
    object <- AddModuleScore_UCell(object, features=list(Signature=gene_list))
    scores_dt <- as.data.table(object@meta.data, keep.rownames=T)[, c("rn", "genotype", "cell_type", "Signature_UCell", "Pseudotime")]
    trt_score  <- mean(scores_dt[genotype == "KO",]$Signature_UCell)
    ctrl_score <- mean(scores_dt[genotype == "WT",]$Signature_UCell)
    score_diff <- trt_score - ctrl_score

    idents_plt <- VlnPlot_scCustom(object, features = "Signature_UCell",
                    alpha=0.1, group.by = "cell_type", split.by="genotype",
                    pt.size = 0, color_seed = 99) +
          custom_theme() +
          ggforce::geom_sina(seed=99, binwidth=100, adjust=4) +
          theme(plot.title = element_text(size = 15/.pt),
                axis.text.x = element_text(size = 12/.pt, angle = 90,
                                           hjust = 1, vjust = 0.5),
                axis.text.y = element_text(size = 12/.pt),
                axis.title.y = element_text(size = 15/.pt)) +
          labs(x="", y=paste0(name, " Enrichment Score (normalized U Statistic)"))
    idents_plt <- idents_plt + labs(
              caption = paste0("KO ", name, " Score: ", round(trt_score, 3), "\n",
                               "WT ", name, " Score: ", round(ctrl_score, 3), "\n",
                               "Difference: ", round(score_diff, 3)))
    ggsave(paste0(outdir, "ren1c_trajectory-only_", name, "_", Sys.Date(), ".png"),
       plot = idents_plt, width = 90, height = 90, units = "mm", dpi = 300)
    ggsave(paste0(outdir, "ren1c_trajectory-only_", name, "_", Sys.Date(), ".svg"),
       plot = idents_plt, width = 90, height = 90, units = "mm", dpi = 300)

    genotype_plt <- VlnPlot_scCustom(object, features = "Signature_UCell", alpha=0.1,
                       group.by = "genotype",
                       pt.size = 0, color_seed = 99) +
          custom_theme() +
          ggforce::geom_sina(seed=99, binwidth=100, adjust=4) +
          theme(plot.title = element_text(size = 15/.pt),
                axis.text.x = element_text(size = 12/.pt, angle = 90,
                                           hjust = 1, vjust = 0.5),
                axis.text.y = element_text(size = 12/.pt),
                axis.title.y = element_text(size = 15/.pt)) +
          labs(x="", y=paste0(name, " Enrichment Score (normalized U Statistic)"))
    genotype_plt <- genotype_plt + labs(
              caption = paste0("KO ", name, " Score: ", round(trt_score, 3), "\n",
                               "WT ", name, " Score: ", round(ctrl_score, 3), "\n",
                               "Difference: ", round(score_diff, 3)))
    ggsave(paste0(outdir, "ren1c_trajectory-only_genotype-split_", name, "_", Sys.Date(), ".png"),
       plot = genotype_plt, width = 90, height = 90, units = "mm", dpi = 300)
    ggsave(paste0(outdir, "ren1c_trajectory-only_genotype-split_", name, "_", Sys.Date(), ".svg"),
       plot = genotype_plt, width = 90, height = 90, units = "mm", dpi = 300)

    cpm_obj  <- NormalizeData(object, method = "RC", scale.factor = 1e6)
    data_mat <- as.matrix(SeuratObject::LayerData(cpm_obj, assay = "RNA", layer = "counts"))
    scores <- ScoreSignatures_UCell(data_mat, features=list(Signature=gene_list))
    melted <- reshape2::melt(scores)
    colnames(melted) <- c("Cell", "Signature", "UCell_score")
    melted_dt <- as.data.table(melted)
    object_metadata <- as.data.table(object@meta.data, keep.rownames=T)[, c("cell_type", "genotype", "rn")]
    melted2 <- merge(melted_dt, object_metadata, by.x="Cell", by.y="rn")
    signature.lm <- lm(UCell_score ~ cell_type * genotype, data=melted2)
    signature.pw <- pairs(emmeans(signature.lm, ~ genotype | cell_type))
    signature_pvals <- as.data.table(test(signature.pw, by = NULL, adjust = "BH"))
    fwrite(signature_pvals,
        file.path(outdir, paste0(name, "_EMMs_BHadjusted_pvalues.csv")),
        sep=",")
}

Score CAAH related genes from the final set.

scoreGeneList(object = renin_seurat, gene_list = caah_genes,
              name = "CAAH", outdir="figures/CAAH/")

Score additional gene signatures

  1. Senescence-associated secretory phenotype genes (SASP)
  2. Inflammataion related genes (IFRG)
  3. Hypoxia related genes
  4. Extracellular matrix genes (ECM); the "matrisome"
scoreGeneList(object = renin_seurat, gene_list = sasp_genes,
              name = "SASP", outdir="figures/SASP/")
scoreGeneList(object = renin_seurat, gene_list = IFRG_genes,
              name = "IFRG", outdir="figures/IFRG/")
scoreGeneList(object = renin_seurat, gene_list = hypoxia_genes,
              name = "Hypoxia", outdir="figures/hypoxia/")
scoreGeneList(object = renin_seurat, gene_list = ecm_genes,
              name = "ECM", outdir="figures/ECM/")

Individual hypoxia and glycolytic gene expression

For Figure 3c (violin plots of key hypoxia-responsive and glycolytic genes in Ren1c KO vs WT renin-lineage cells).

key_genes       <- c("Hif1a", "Epas1", "Epo", "Epor", "Vegfa",
                     "Hk2", "Pfkm", "Pkm", "Ldha")
key_genes_avail <- key_genes[key_genes %in% rownames(renin_seurat)]

p_key <- VlnPlot(renin_seurat, features = key_genes_avail,
                 group.by = "genotype", pt.size = 0, ncol = 5) &
    scale_fill_manual(values = c("KO" = "#D73027", "WT" = "#4575B4")) &
    theme_classic(base_size = 9) &
    theme(axis.title.x = element_blank(), legend.position = "none")

ggsave(paste0(outdir, "ren1c_key-hypoxia-glycolytic-genes_violin_", Sys.Date(), ".svg"),
       p_key, width = 200, height = 80, units = "mm", dpi = 300)