3E. Construct the inferred gene regulatory network

motif1 <- Tranfac201803_Mm_MotifTFsF
candidate_tfs <- unlist(strsplit(motif1$TFs,';'))

lineage_named <- AddMetaData(
  object = lineage_named,
  metadata = cds_integrated_filtered@principal_graph_aux@listData$UMAP$pseudotime,
  col.name = "Pseudotime"
)
cds_integrated_filtered_subset <- cds_integrated_filtered[,rownames(lineage_named@meta.data[lineage_named@meta.data$cell_type %in% c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "JG", "fbJG"),])]

saveRDS(cds_integrated_filtered_subset, paste0("cds-integrated-filtered-subset_", Sys.Date(), ".rds"))

integrated_fit_models <- fit_models(cds_integrated_filtered_subset,
    model_formula_str = "~cell_type")
integrated_fit_coefs <- coefficient_table(integrated_fit_models)
integrated_sig_genes <- integrated_fit_coefs %>% 
         dplyr::filter(term != "(Intercept)") %>%
         dplyr::filter (q_value < 0.01) %>%
         dplyr::filter (num_cells_expressed > 0) %>%
         dplyr::filter (normalized_effect > 0) %>%    # [maybe dont need this one?](https://github.com/cole-trapnell-lab/monocle3/issues/307)
         dplyr::select(gene_short_name, term, q_value, estimate)

saveRDS(integrated_sig_genes, paste0("figures/IReNA/integrated/integrated_lineage-named_sig-genes_trajectory-pops-only_", Sys.Date(), ".rds"))

pops_of_interest <- c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "fbJG", "JG")
lineage_named_trj <- subset(lineage_named, idents = pops_of_interest, invert = F)
lineage_named_trj$cell_type <- factor(lineage_named_trj$cell_type, levels=
    c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "fbJG", "JG"))
saveRDS(lineage_named_trj, "figures/IReNA/integrated/lineage_named_trj.rds")

TFs_integrated <- candidate_tfs[candidate_tfs%in%rownames(lineage_named_trj)]
matrix_integrated_tf <- lineage_named_trj@assays$RNA@layers$counts
matrix_integrated_rownames <- rownames(lineage_named_trj@assays$RNA@features@.Data)
rownames(matrix_integrated_tf) <- matrix_integrated_rownames
final_integrated_matrix <- matrix_integrated_tf[TFs_integrated,]
cells_quantile <- 0.05
quantile_integrated_exp <- ncol(as.matrix(final_integrated_matrix))/(1/cells_quantile)
TfExp_integrated <- final_integrated_matrix[ncol(as.matrix(final_integrated_matrix))-rowSums(as.matrix(final_integrated_matrix==0))>quantile_integrated_exp,]
expressed_integrated_tf <- rownames(TfExp_integrated)
expressed_tf_integrated_filt <- expressed_integrated_tf[expressed_integrated_tf%in%integrated_sig_genes$gene_short_name]

refined_integrated_seurat <- subset(lineage_named_trj, features = c(expressed_tf_integrated_filt, integrated_sig_genes$gene_short_name))
integrated_trj_dimplot <- DimPlot_scCustom(refined_integrated_seurat, colors_use = pal_lineage,
    reduction="umap_cca", pt.size = 1, label=TRUE)
integrated_trj_split_dimplot <- DimPlot_scCustom(refined_integrated_seurat, 
    reduction="umap_cca", pt.size = 1, label=TRUE, split.by="genotype")
ggsave(paste0("figures/IReNA/integrated/", sample_name, "_UMAP-plt_", Sys.Date(), ".png"),
       plot = integrated_trj_dimplot | integrated_trj_split_dimplot, width=720, height=180, units = "mm", dpi = 300)
ggsave(paste0("figures/IReNA/integrated/", sample_name, "_UMAP-plt_", Sys.Date(), ".svg"),
       plot = integrated_trj_dimplot | integrated_trj_split_dimplot, width=720, height=180, units = "mm", dpi = 300)

FC <- TRUE; Bin <- 50; method <- 'Pseudotime'; FcType <- "Q95"; FcType1 <- FcType; ByBin1 <- c("Equal.Pseudotime")
true_max <- max(refined_integrated_seurat$Pseudotime[is.finite(refined_integrated_seurat$Pseudotime)])
refined_integrated_seurat$Pseudotime[is.infinite(refined_integrated_seurat$Pseudotime)] <- true_max
obj <- refined_integrated_seurat
TotalBin1      <- Bin
Pseudotime1    <- obj@meta.data
SmoothLength1  <- TotalBin1
PseudotimeBin1 <- seq(min(Pseudotime1$Pseudotime), max(Pseudotime1$Pseudotime),
                      length.out = SmoothLength1 + 1)
Exp1 <- as.matrix(obj@assays$RNA@layers$data)
colnames(Exp1) <- colnames(obj)
Exp2 <- array(0, dim = c(nrow(Exp1), SmoothLength1))
for (i in 1:(length(PseudotimeBin1) - 1)) {
    if (ByBin1[1] == "Equal.Pseudotime") {
      if (i == length(PseudotimeBin1) - 1) {
        Cells1 <- Pseudotime1[Pseudotime1$Pseudotime >= PseudotimeBin1[i] &
                                Pseudotime1$Pseudotime <= PseudotimeBin1[i + 1], ]
      } else {
        Cells1 <- Pseudotime1[Pseudotime1$Pseudotime >= PseudotimeBin1[i] &
                                Pseudotime1$Pseudotime < PseudotimeBin1[i + 1], ]
      }
    }
    if (nrow(Cells1) > 1) {
      Exp2[, i] <- rowMeans(Exp1[, match(rownames(Cells1), colnames(Exp1))])
    } else if (nrow(Cells1) == 1) {
      Exp2[, i] <- Exp1[, match(rownames(Cells1), colnames(Exp1))]
    }
}
rownames(Exp2) <- rownames(Exp1)
Exp1 <- Exp2
Prob1 <- c(0.05, 0.95)
Fc1 <- apply(Exp1, 1, function(x1) {
  x12 <- quantile(x1, probs = Prob1)
  x2 <- x12[2] - x12[1]
  return(x2)
})
Exp2 <- cbind(Fc1, Exp1)
colnames(Exp2)[1:ncol(Exp2)] <- c(paste0("FoldChange", FcType1),
                                  paste0("SmExp", 1:ncol(Exp1)))
var1 <- as.data.frame(Exp2)
integrated_expression_profile <- var1
rownames(integrated_expression_profile) <- rownames(obj)
saveRDS(integrated_expression_profile,
    paste0("integrated_lineage-named_trajectory-only_expression-profile_", Sys.Date(), ".rds"))

# Drop mito/ribo/hemo genes
features  <- rownames(integrated_expression_profile)
no_ribo   <- features[!grepl("^Rp[sl][[:digit:]]|^Rplp[[:digit:]]|^Rpsa", features)]
no_ribo_mito <- sort(no_ribo[!grepl("^mt-(.*)", no_ribo)])
no_ribo_mito_hemo <- sort(no_ribo_mito[!grepl("^H2-(.*)", no_ribo_mito)])
filtered_features <- sort(no_ribo_mito_hemo[!grepl("^Hb[ab]-(.*)", no_ribo_mito_hemo)])
integrated_expression_profile <- integrated_expression_profile[rownames(integrated_expression_profile) %in% filtered_features, ] 
# Filter noise and logFC in expression profile
integrated_expression_profile_filter <- filter_expression_profile(integrated_expression_profile, FC=0.25, filterfc = T)
saveRDS(integrated_expression_profile_filter,
    paste0("figures/IReNA/integrated/integrated_lineage-named_trajectory-only_expression-profile-filter_", Sys.Date(), ".rds"))

# Determine optimum k
library(factoextra)
library(NbClust)
RNA1 <- t(scale(t(integrated_expression_profile_filter)))
NbClust(RNA1[,-1], method = 'complete', index = 'all')$Best.nc

elbow_plt <- fviz_nbclust(RNA1, kmeans, method = "wss") +
  geom_vline(xintercept = 2, linetype = 2)+
  labs(subtitle = "Elbow method")
silhouette_plt <- fviz_nbclust(RNA1, kmeans, method = "silhouette")+
  labs(subtitle = "Silhouette method")
gap_plt <- fviz_nbclust(RNA1, kmeans, nstart = 25,  k.max = 10, 
    method = "gap_stat", nboot = 50) +
  labs(subtitle = "Gap statistic method")

ggsave(paste0("figures/IReNA/integrated/", sample_name,
              "_optimum-cluster-plts_", Sys.Date(), ".png"),
       plot = elbow_plt | silhouette_plt | gap_plt, units = "mm", dpi = 300)
ggsave(paste0("figures/IReNA/integrated/", sample_name,
              "_optimum-cluster-plts_", Sys.Date(), ".svg"),
       plot = elbow_plt | silhouette_plt | gap_plt, units = "mm", dpi = 300)

# choose 3
clustering_integrated <- clustering_Kmeans(integrated_expression_profile_filter, K1=3)
saveRDS(clustering_integrated ,"figures/IReNA/integrated/integrated_lineage-named-trajectory-only_clustering_integrated.rds")
col1 <- c('#67C1E3','#EF9951','#00BFC4','#AEC7E8','#C067A9','#E56145','#2F4F4F')
plt <- plot_kmeans_pheatmap(clustering_integrated, ModuleColor1 = col1,
                            Range1=c(-5,5), NumRowBlank1=1, ModuleScale1 = 20)
ggsave(paste0("figures/IReNA/integrated", sample_name, "_IReNA-heatmap_", Sys.Date(), ".png"),
       plot = plt, create.dir=TRUE, units = "mm", dpi = 300)
ggsave(paste0("figures/IReNA/integrated", sample_name, "_IReNA-heatmap_", Sys.Date(), ".svg"),
       plot = plt, create.dir=TRUE, units = "mm", dpi = 300)

ensembl_ids <- mapIds(org.Mm.eg.db, keys = rownames(clustering_integrated), keytype = "SYMBOL", column="ENSEMBL")
ensembl_ids <- na.omit(ensembl_ids)
ensembl_key <- as.data.table(ensembl_ids, keep.rownames=TRUE)
fwrite(ensembl_key, "figures/IReNA/integrated/ensembl_key_integrated-trajectory-only.csv", sep=",")
ensembl_df <- data.frame(`Symbol` = ensembl_key$rn,
                         NCBIID = character(length(ensembl_key$rn)),
                         OfficialSymbol = ensembl_key$rn,
                         GRCm39_locus = character(length(ensembl_key$rn)))
rownames(ensembl_df) <- ensembl_key$ensembl_ids

integrated_Kmeans_clustering_ENS <- add_ENSID(clustering_integrated, GeneInf1 = ensembl_df)
saveRDS(integrated_Kmeans_clustering_ENS, paste0("figures/IReNA/integrated/integrated_lineage-named_trajectory-only_Kmeans_clustering_ENS.rds"))

enrichment_GO_integrated <- enrich_module(integrated_Kmeans_clustering_ENS, org.Mm.eg.db,
                                  'GO', organism = 'mmu')
saveRDS(enrichment_GO_integrated, paste0("figures/IReNA/integrated/integrated_enrichment_GO_lineage-named_trajectory-only.rds"))

refined_integrated_matrix <- as.matrix(refined_integrated_seurat@assays$RNA@layers$data)
rownames(refined_integrated_matrix) <- rownames(refined_integrated_seurat)
saveRDS(refined_integrated_seurat, paste0("figures/IReNA/integrated/integrated_refined-seurat_lineage-named_trajectory-pops-only_", Sys.Date(), ".rds"))
saveRDS(refined_integrated_matrix, paste0("figures/IReNA/integrated/integrated_refined-matrix_lineage-named_trajectory-pops-only_", Sys.Date(), ".rds"))

# GENIE3
weightMat <- GENIE3(refined_integrated_matrix, nCores = 8)
weightMat <- getLinkList(weightMat)
regulation <- weightMat[weightMat[,3]>0.0002,]
### add regulation type for each gene pair
regulatory_relationships <- add_regulation_type(integrated_Kmeans_clustering_ENS, regulation)
### check whether source genes are transcription factors
motifTF <- c()
for (i in 1:nrow(motif1)) {
  TF <- strsplit(motif1[i,5],';')[[1]]
  motifTF <- c(motifTF,TF)
}
rr_integrated <- regulatory_relationships[regulatory_relationships[,1] %in% motifTF,]

saveRDS(rr_integrated, paste0("figures/IReNA/integrated/integrated_regulatory-relationships_lineage-named_trajectory-pops-only_", Sys.Date(), ".rds"))

Generate networks

  • Related to Figure 2.
gtf     <- read.delim("Mus_musculus.GRCm39.113.gtf",
                      header=FALSE, comment.char="#")
gtf[,1] <- paste0('chr',gtf[,1])
integrated_gene_tss <- get_tss_region(gtf, rownames(integrated_Kmeans_clustering_ENS))
integrated_gene_tss$chr <- gsub("chrMT", "chrM", integrated_gene_tss$chr)
integrated_gene_tss_GR  <- GRanges(paste0(integrated_gene_tss[,2],':',
    integrated_gene_tss[,3],'-', integrated_gene_tss[,4]))
integrated_gene_tss_GR  <- keepStandardChromosomes(integrated_gene_tss_GR, pruning.mode = "coarse")
integrated_fimo_regulation <- identify_region_tfs(integrated_gene_tss_GR,
    integrated_gene_tss[,1], motif1, BSdb=BSgenome.Mmusculus.UCSC.mm10)
integrated_filtered_rr <- filter_regulation_fimo(integrated_fimo_regulation, rr_integrated)
fwrite(integrated_filtered_rr,
       paste0("figures/IReNA/integrated/integrated_filtered-regulatory-relationships_", Sys.Date(), ".csv"),
       sep=",")

integrated_TFs_FDR2 <- network_analysis(integrated_filtered_rr, integrated_Kmeans_clustering_ENS,
                                TFFDR1 = 2, TFFDR2 = 2, ModuleFDR = 0.05)
saveRDS(integrated_TFs_FDR2, "figures/IReNA/integrated/integrated_TFs_FDR2_ModuleFDR-0.05_lineage-named_trajectory-pops-only.rds")
for (dataset in names(integrated_TFs_FDR2)) {
    dat <- as.data.table(integrated_TFs_FDR2[[dataset]])
    fwrite(dat, paste0("figures/IReNA/integrated/", sample_name, "_trajectory-pops-only_TFs-FDR2_", dataset, "_IReNA-network-analysis.csv"), sep=",")
}

ggsave(paste0("figures/IReNA/integrated/", sample_name,
              "_trajectory-pops-only_FDR2_IReNA_TF-Network-Plot.svg"),
        plot = plot_tf_network(TFs_list=integrated_TFs_FDR2), units="mm")

integrated_enrichment_KEGG <- enrich_module(integrated_Kmeans_clustering_ENS,
    org.Mm.eg.db, enrich.db = 'KEGG', organism = 'mmu')
saveRDS(integrated_enrichment_KEGG, "figures/IReNA/integrated/integrated_lineage-named_trajectory-pops-only_KEGG.rds")

integrated_enrichment_GO <- enrich_module(integrated_Kmeans_clustering_ENS,
    org.Mm.eg.db, 'GO', organism = 'mmu')
saveRDS(integrated_enrichment_GO, "figures/IReNA/integrated/integrated_lineage-named_trajectory-pops-only_GO.rds")

TFs_gene_names <- copy(integrated_TFs_FDR2)
ensembl_convert_res <- select(
  org.Mm.eg.db,
  keys = TFs_gene_names$TF_list,
  column = c('SYMBOL', 'ENTREZID', 'ENSEMBL'),
  keytype = 'ENSEMBL')
ids_list <- data.table(ENSEMBL = TFs_gene_names$TF_list,
                       order = seq(1:length(TFs_gene_names$TF_list)))
gene_names <- merge(ids_list, ensembl_convert_res)
TFs_gene_names$TF_list <- gene_names$SYMBOL
original_colnames <- colnames(TFs_gene_names$TF_network)
# This only applies to a GENIE3 generated list
colnames(TFs_gene_names$TF_network) <- c("TFSymbol", "TF", "TFGroup",
    "TFMinNlogfdr", "TFMinGroup", "SigActModules", "SigRepModules", "TargetSymbol",
    "Target", "TargetGroup", "Weight", "Correlation", "Regulation")
network <- TFs_gene_names$TF_network
network_tab <- as.data.table(as.data.frame.matrix(table(network$TF, network$TFGroup)), keep.rownames=T)
colnames(network_tab) <- c("TF", "module1", "module2", "module3")
fwrite(network_tab, paste0("figures/IReNA/integrated/",
                        "integrated_lineage-named_trajectory-pops-only",
                        "_TF-FDR2_IReNA-network-table.csv"),
       sep=",")

ggsave(paste0("figures/IReNA/integrated/", 
              "integrated_lineage-named_trajectory-pops-only_",
              "FDR2_IReNA_TF-intramodular-network_GO-terms.svg"), 
    plot = plot_intramodular_network(integrated_TFs_FDR2, 
            integrated_enrichment_GO, layout = 'circle'), units="mm")

Calculate the log2FC of the genes composing the GRN from the trajectory

  • Related to Figure 2.
module1_tfs <- c("Zfp467", "Klf2", "Hoxb2", "Twist1", "Jun", "Atf5", "Jund", "Tbx2", "Xbp1", "Sp100")
module2_tfs <- c("Smad3", "E2f3", "Mafb", "Pou2f1", "Meis1", "Ets1", "Sox4", "Foxo3", "Nr2f1", "Tbx3", "Tcf4", "Mecom", "Sox5", "Nr2f2", "Pbx1", "Elk3", "Ebf1", "Foxn3", "Peg3", "Esr1", "Foxp2", "Rora", "Crem", "Gtf2ird1", "Zbtb20")
module3_tfs <- c("Egr1", "Klf12", "Mef2c", "Zfp36l1", "Thrb", "Fos", "Smad7", "Junb", "Hes1", "Zfhx3", "Arid5b")

pops_of_interest <- c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "JG", "fbJG")
Idents(lineage_named) <- lineage_named$cell_type
lineage_subset <- subset(lineage_named, idents = pops_of_interest, invert = F)
lineage_subset$cell_type <- factor(lineage_subset$cell_type, levels=
    c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "fbJG", "JG"))
Idents(lineage_subset) <- lineage_subset$condition

module1_diff <- as.data.table(FindMarkers(lineage_subset, ident.1 = "KO", ident.2 = "WT", features = module1_tfs), keep.rownames=T)

module1_diff_MAST <- as.data.table(FindMarkers(lineage_subset, ident.1 = "KO", ident.2 = "WT", features = module1_tfs, test="MAST", min.pct = 0, logfc.threshold = 0), keep.rownames=T)
tmp <- FindMarkers(lineage_subset, ident.1 = "KO", ident.2 = "WT",  min.pct = 0, logfc.threshold = 0)
klf12_dt <- as.data.table(tmp[rownames(tmp) == "Klf12",], keep.rownames=T)
zfhx3_dt <- as.data.table(tmp[rownames(tmp) == "Zfhx3",], keep.rownames=T)
module1_diff_MAST <- rbind(module1_diff_MAST, klf12_dt, zfhx3_dt)

module2_diff_MAST <- as.data.table(FindMarkers(lineage_subset, ident.1 = "KO", ident.2 = "WT", features = module2_tfs, test="MAST"), keep.rownames=T)
ebf1_dt <- as.data.table(tmp[rownames(tmp) == "Ebf1",], keep.rownames=T)
tbx3_dt <- as.data.table(tmp[rownames(tmp) == "Tbx3",], keep.rownames=T)
module2_diff_MAST <- rbind(module2_diff_MAST, ebf1_dt, tbx3_dt)

module3_diff_MAST <- as.data.table(FindMarkers(lineage_subset, ident.1 = "KO", ident.2 = "WT", features = module3_tfs, test="MAST"), keep.rownames=T)

module1_diff_MAST$module <- "Module1"
module2_diff_MAST$module <- "Module2"
module3_diff_MAST$module <- "Module3"
modules_mat <- rbind(module1_diff_MAST, module2_diff_MAST, module3_diff_MAST)

heat_mat <- as.data.frame.matrix(xtabs(avg_log2FC~rn + module, modules_mat))
pheatmap_obj <- pheatmap::pheatmap(heat_mat,
                    cluster_rows=TRUE,
                    cluster_cols=FALSE, scale="column",
                    clustering_method="ward.D2",
                    cellwidth = 5, cellheight = 5,
                    breaks = seq(-2,2, 0.04),
                    color = colorRampPalette(c("navy", "white", "red"))(100),
                    fontsize=6)[[4]]
heatmap_plt <- grid.arrange(pheatmap_obj, nrow=1, ncol=1)
heatmap_plt

ggsave(paste0("figures/IReNA/integrated/IReNA-integrated_TF-heatmap_TFs-clustered_", Sys.Date(), ".png"),
       plot = heatmap_plt, units = "mm", dpi = 300)
ggsave(paste0("figures/IReNA/integrated/IReNA-integrated_TF-heatmap_TFs-clustered_", Sys.Date(), ".svg"),
       plot = heatmap_plt, units = "mm", dpi = 300)

Do heatmaps of KO vs WT of each trajectory population's top differentially expressed genes

  • Related to Figure 2.
lineage_named$cell_type_genotype <- paste0(lineage_named$cell_type, "_", lineage_named$genotype)
Idents(lineage_named) <- lineage_named$cell_type_genotype

features  <- rownames(lineage_named)
no_ribo   <- features[!grepl("^Rp[sl][[:digit:]]|^Rplp[[:digit:]]|^Rpsa", features)]
no_ribo_mito <- sort(no_ribo[!grepl("^mt-(.*)", no_ribo)])
no_ribo_mito_hemo <- sort(no_ribo_mito[!grepl("^H2-(.*)", no_ribo_mito)])
filtered_features <- sort(no_ribo_mito_hemo[!grepl("^Hb[ab]-(.*)", no_ribo_mito_hemo)])

for (pop in c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "JG", "fbJG")) {
    diff_MAST <- as.data.table(FindMarkers(lineage_named,
        ident.1 = paste0(pop, "_KO"),
        ident.2 = paste0(pop, "_WT"),
        features = filtered_features, test="MAST"), keep.rownames=T)
    diff_MAST_sig <- diff_MAST[p_val_adj < 0.01,]
    heat_mat <- as.matrix(xtabs(avg_log2FC~rn, diff_MAST_sig[1:20,]))
    pheatmap_obj <- pheatmap::pheatmap(heat_mat,
                        cluster_rows=TRUE,
                        cluster_cols=FALSE, scale="column",
                        clustering_method="ward.D2",
                        cellwidth = 5, cellheight = 5,
                        breaks = seq(-2,2, 0.04),
                        color = colorRampPalette(c("navy", "white", "red"))(100),
                        fontsize=6)[[4]]
    heatmap_plt <- grid.arrange(pheatmap_obj, nrow=1, ncol=1)
    ggsave(paste0("figures/IReNA/integrated/IReNA-integrated_", pop,
                  "-heatmap_Top20genes-clustered_", Sys.Date(), ".png"),
           plot = heatmap_plt, units = "mm", dpi = 300)
    ggsave(paste0("figures/IReNA/integrated/IReNA-integrated_", pop,
                  "-heatmap_Top20genes-clustered_", Sys.Date(), ".svg"),
           plot = heatmap_plt, units = "mm", dpi = 300)
}

Perform pathway enrichment

  • Related to Figure 2.
GO_list <- c(GO_hallmarks, GO_cell_signature, GO_regulatory_targets,
             GO_canonical_pathways, GO_curated_gene_sets, GO_biological_process,
             GO_cellular_component, GO_molecular_function)
names(GO_list) <- c("hallmarks", "cell-signature", "regulatory-targets",
                    "canonical-pathways", "curated-gene-sets", "BP", "CC", "MF")

traj_subset <- subset(lineage_named, idents = c("SMC", "tSMC_1", "tSMC_2",
    "tSMC_3", "fbJG", "JG"), invert = F)
traj_subset$genotype <- factor(traj_subset$genotype, levels=
    c("KO", "WT"))

Idents(traj_subset) <- traj_subset$genotype
features  <- Features(traj_subset)
no_ribo   <- features[!grepl("^Rp[sl][[:digit:]]|^Rplp[[:digit:]]|^Rpsa", features)]
no_ribo_mito <- sort(no_ribo[!grepl("^mt-(.*)", no_ribo)])
no_ribo_mito_hemo <- sort(no_ribo_mito[!grepl("^H2-(.*)", no_ribo_mito)])
filtered_features <- sort(no_ribo_mito_hemo[!grepl("^Hb[ab]-(.*)", no_ribo_mito_hemo)])

wt_ko_markers <- FindAllMarkers(traj_subset, only.pos = FALSE,
                             test.use="MAST", features=filtered_features,
                             min.pct = 0.01, logfc.threshold = 0.5)  %>%
    Add_Pct_Diff() 
wt_ko_markers %>% dplyr::filter(pct_diff > 0.3)

wt_gene_list <- wt_ko_markers[wt_ko_markers$cluster=="WT",]$avg_log2FC
names(wt_gene_list) <- wt_ko_markers[wt_ko_markers$cluster=="WT",]$gene
wt_gene_list <- sort(wt_gene_list, decreasing = TRUE)
wt_gene_list <- wt_gene_list[!duplicated(names(wt_gene_list))]

ko_gene_list <- wt_ko_markers[wt_ko_markers$cluster=="KO",]$avg_log2FC
names(ko_gene_list) <- wt_ko_markers[wt_ko_markers$cluster=="KO",]$gene
ko_gene_list <- sort(ko_gene_list, decreasing = TRUE)
ko_gene_list <- ko_gene_list[!duplicated(names(ko_gene_list))]

combined_list <- c(wt_gene_list, ko_gene_list)
combined_list <- sort(combined_list, decreasing = TRUE)
combined_list <- combined_list[!duplicated(names(combined_list))]

gene_list=combined_list
GO_file=GO_list[GO_file]
pval = 0.05
score_type="std"
myGO <- fgsea::gmtPathways(GO_file)
fgRes <- fgsea::fgsea(pathways = myGO,
                    stats = gene_list,
                    minSize=15,
                    maxSize=400,
                    nPermSimple = 10000,
                    nproc = 2,
                    scoreType = score_type) %>% 
              as.data.frame() %>% 
              dplyr::filter(padj < !!pval) %>% 
              dplyr::arrange(desc(NES))
concise_pathways = fgsea::collapsePathways(data.table::as.data.table(fgRes),
                                  pathways = myGO,
                                  stats = gene_list)
fgRes <- fgRes[fgRes$pathway %in% concise_pathways$mainPathways, ]
fgRes$Enrichment <- ifelse(fgRes$NES > 0, "Up-regulated", "Down-regulated")
filtRes <- rbind(head(fgRes, n = 10),
              tail(fgRes, n = 10 ))
total_up   <- sum(fgRes$Enrichment == "Up-regulated")
total_down <- sum(fgRes$Enrichment == "Down-regulated")
header     <- paste0("Top 10 (Total pathways: Up=", total_up,",
                    Down=", total_down, ")")
colos <- setNames(c("firebrick2", "dodgerblue2"),
                c("Up-regulated", "Down-regulated"))
heat_mat <- as.data.frame.matrix(xtabs(NES~pathway + padj, fgRes))
heat_mat <- as.matrix(xtabs(NES~pathway, fgRes[fgRes$padj < 0.05,]))
pheatmap_obj <- pheatmap::pheatmap(heat_mat,
                        cluster_rows=TRUE,
                        cluster_cols=FALSE, scale="column",
                        clustering_method="ward.D2",
                        cellwidth = 5, cellheight = 5,
                        breaks = seq(-2,2, 0.04),
                        color = colorRampPalette(c("navy", "white", "red"))(100),
                        fontsize=6)[[4]]
heatmap_plt <- grid.arrange(pheatmap_obj, nrow=1, ncol=1)
ggsave(paste0("figures/GSEA/", "ren1c_WT-KO_hallmark-pathways_heatmap_",
              Sys.Date(), ".png"),
       plot = heatmap_plt, width = 90, height = 90, units = "mm", dpi = 300)
ggsave(paste0("figures/GSEA/", "ren1c_WT-KO_hallmark-pathways_heatmap_",
              Sys.Date(), ".svg"),
       plot = heatmap_plt, width = 90, height = 90, units = "mm", dpi = 300)