3D. Construct single-cell trajectory

We want to know what's different between KO and WT along the shared trajectory. So, let's calculate the integrated trajectory. Then, for each population composing the trajectory, plot a heatmap showing the DEGs. Then, show the GRN network modules with each TF circle being colored to match the log2FC between KO and WT.
- Related to Figure 2.

integrated_dimplt <- DimPlot_scCustom(lineage_named, reduction = "umap_cca",
        colors_use=pal_lineage, group.by = "cell_type",
        label = TRUE, repel = TRUE)
split_dimplt <- DimPlot_scCustom(lineage_named, reduction = "umap_cca",
        colors_use=pal_lineage, group.by = "cell_type", split.by="genotype",
        label = TRUE, repel = TRUE)
ggsave(paste0("figures/", sample_name, "_lineage-UMAP_", Sys.Date(), ".svg"),
       plot = integrated_dimplt | split_dimplt, width = 180, height = 180,
       units = "mm", dpi = 300)

cds_integrated <- as.cell_data_set(lineage_named,
                                   default.reduction = "umap_cca",
                                   group.by = "cell_type")
reducedDimNames(cds_integrated) <- c("PCA", "INTEGRATED_CCA", "INTEGRATED_RPCA",
    "HARMONY", "UMAP", "UMAP_RPCA", "UMAP_HARMONY") # change UMAP_CCA to UMAP
cds_integrated <- cluster_cells(cds_integrated, num_iter=10, k=25, random_seed = 99)

integrated_dimplt | plot_cells(cds_integrated, color_cells_by = "partition", label_cell_groups=TRUE,
           graph_label_size = 5, group_label_size = 8)

cds_integrated <- preprocess_cds(cds_integrated)
rowData(cds_integrated)$gene_name <- rownames(cds_integrated)
rowData(cds_integrated)$gene_short_name <- rownames(rowData(cds_integrated))

colData(cds_integrated)[['Size_Factor']] <- NULL
cds_integrated <- estimate_size_factors(cds_integrated)
colData(cds_integrated)$Size_Factor

cds_integrated <- learn_graph(cds_integrated,
    learn_graph_control=list(ncenter=75, nn.k=NULL,
                             euclidean_distance_ratio = 1, 
                             minimal_branch_len=5))

trajectory_plt <- plot_cells(cds_integrated,
           color_cells_by = "cell_type",
           label_groups_by_cluster=F,
           label_leaves=T,
           label_branch_points=T,
           cell_size = 1,
           cell_stroke = 0,
           graph_label_size = 6, group_label_size = 4) +
           scale_color_manual(values=pal_lineage)
ggsave(paste0("figures/", sample_name, "_Integrated-Monocle3-Trajectory_", Sys.Date(), ".png"),
       plot = trajectory_plt, width = 180, height = 180, units = "mm", dpi = 600)
ggsave(paste0("figures/", sample_name, "_Integrated-Monocle3-Trajectory", Sys.Date(), ".svg"),
       plot = trajectory_plt, width = 180, height = 180, units = "mm", dpi = 300)

get_earliest_principal_node <- function(cds, starting_cell="SMC"){
  cell_ids <- which(colData(cds)[, "cell_type"] == starting_cell)

  closest_vertex <-
  cds@principal_graph_aux[["UMAP"]]$pr_graph_cell_proj_closest_vertex
  closest_vertex <- as.matrix(closest_vertex[colnames(cds), ])
  root_pr_nodes <-
    igraph::V(principal_graph(cds)[["UMAP"]])$name[as.numeric(names
   (which.max(table(closest_vertex[cell_ids,]))))]

  root_pr_nodes
}

get_earliest_principal_node(cds_integrated, "SMC") # Y_28
cds_integrated <- order_cells(cds_integrated, root_pr_nodes=c("Y_28"))
pseudotime_plt <- plot_cells(cds_integrated,
           color_cells_by = "pseudotime",
           label_groups_by_cluster = TRUE,
           label_leaves=TRUE,
           label_branch_points=TRUE,
           cell_size = 1,
           cell_stroke = 0,
           graph_label_size=3,
           group_label_size = 4)

ggsave(paste0("figures/ren1c_SMC-root_Monocle3-Trajectory-Pseudotime_", Sys.Date(), ".png"),
       plot = pseudotime_plt | trajectory_plt, width = 360, height = 180, units = "mm", dpi = 600)
ggsave(paste0("figures/ren1c_SMC-root_Monocle3-Trajectory-Pseudotime_", Sys.Date(), ".svg"),
       plot = pseudotime_plt | trajectory_plt, width = 360, height = 180, units = "mm", dpi = 300) 

features  <- rownames(cds_integrated)
no_ribo   <- features[!grepl("^Rp[sl][[:digit:]]|^Rplp[[:digit:]]|^Rpsa", features)]
no_ribo_mito <- sort(no_ribo[!grepl("^mt-(.*)", no_ribo)])
no_ribo_mito_hemo <- sort(no_ribo_mito[!grepl("^H2-(.*)", no_ribo_mito)])
filtered_features <- sort(no_ribo_mito_hemo[!grepl("^Hb[ab]-(.*)", no_ribo_mito_hemo)])

cds_integrated_filtered <- cds_integrated[rownames(cds_integrated) %in% filtered_features, ] 
saveRDS(cds_integrated_filtered, paste0("cds-integrated-filtered_", Sys.Date(), ".rds"))

cds_integrated_filtered_subset <- cds_integrated_filtered[,rownames(lineage_named@meta.data[lineage_named@meta.data$cell_type %in% c("SMC", "tSMC_1", "tSMC_2", "tSMC_3", "JG", "fbJG"),])]

cds_integrated_filtered_subset_results <- graph_test(cds_integrated_filtered_subset,
    neighbor_graph="principal_graph", cores=1)
saveRDS(cds_integrated_filtered_subset_results,
    paste0("cds-integrated-filtered-subset-results_", Sys.Date(), ".rds"))

sig_genes <- row.names(subset(cds_integrated_filtered_subset_results, q_value < 0.05))

sig_dt <- as.data.table(subset(cds_integrated_filtered_subset_results,
    morans_I > 0.01 & q_value < 0.05), keep.rownames=T)
sig_genes_I1Q01 <- row.names(subset(cds_integrated_filtered_subset_results,
    morans_I > 0.1 & q_value < 0.01))
sig_genes_I15Q01 <- row.names(subset(cds_integrated_filtered_subset_results,
    morans_I > 0.15 & q_value < 0.01))
sig_genes_I25Q01 <- row.names(subset(cds_integrated_filtered_subset_results,
    morans_I > 0.25 & q_value < 0.01))

gene_modules_Q05 <- find_gene_modules(cds_integrated_filtered_subset[sig_genes,],
    resolution=c(10^seq(-6,-1)), k=20)
gene_modules_Q05_dt <- as.data.table(gene_modules_Q05)

gene_modules_I1Q01 <- find_gene_modules(cds_integrated_filtered_subset[sig_genes_I1Q01,],
    resolution=c(10^seq(-6,-1)), k=20)
gene_modules_I1Q01_dt <- as.data.table(gene_modules_I1Q01)

gene_modules_I15Q01 <- find_gene_modules(cds_integrated_filtered_subset[sig_genes_I15Q01,],
    resolution=c(10^seq(-6,-1)), k=20)
gene_modules_I15Q01_dt <- as.data.table(gene_modules_I15Q01)

gene_modules_I25Q01 <- find_gene_modules(cds_integrated_filtered_subset[sig_genes_I25Q01,],
    resolution=c(10^seq(-6,-1)), k=20)
gene_modules_I25Q01_dt <- as.data.table(gene_modules_I25Q01)

fwrite(gene_modules_Q05_dt, paste0("figures/ren1c-trajectory_GeneModules-Q05.csv"), sep=",")
fwrite(gene_modules_I1Q01_dt, paste0("figures/ren1c-trajectory_GeneModules-I01-Q01.csv"), sep=",")
fwrite(gene_modules_I15Q01_dt, paste0("figures/ren1c-trajectory_GeneModules-I015-Q01.csv"), sep=",")
fwrite(gene_modules_I25Q01_dt, paste0("figures/ren1c-trajectory_GeneModules-I025-Q01.csv"), sep=",")

Plot gene modules heatmap

cellname_celltypes <- colData(cds_integrated_filtered_subset)$cell_type
names(cellname_celltypes) <- row.names(colData(cds_integrated_filtered_subset))

df <- tibble::tibble(cell=row.names(colData(cds_integrated_filtered_subset)), 
                            cell_group=cellname_celltypes[colnames(cds_integrated_filtered_subset)])

agg_mat <- aggregate_gene_expression(cds_integrated_filtered_subset, gene_modules_Q05, df)
row.names(agg_mat) <- stringr::str_c("Module ", row.names(agg_mat))
colnames(agg_mat)  <- stringr::str_c(colnames(agg_mat))

pheatmap_obj <- pheatmap::pheatmap(agg_mat, cluster_rows=TRUE, cluster_cols=TRUE,
                   scale="column", clustering_method="ward.D2",
                   cellwidth = 5, cellheight = 5,
                   fontsize=6)[[4]]
heatmap_plt <- grid.arrange(pheatmap_obj, nrow=1, ncol=1)
heatmap_plt

ggsave(paste0("figures/ren1c-trajectory_GeneModules-Q05_Heatmap_", Sys.Date(), ".png"),
       plot = heatmap_plt, width = 180, height = 180, units = "mm", dpi = 600)
ggsave(paste0("figures/ren1c-trajectory_GeneModules-Q05_Heatmap_", Sys.Date(), ".svg"),
       plot = heatmap_plt, width = 180, height = 180, units = "mm", dpi = 300)