3B. Create the initial Seurat objects

Initialize and QC the control (phenotypically WT) Ren1c 1mo sample

sample_name <- "ren1c_named"
ren1c_named_RNA <- Read10X(data.dir = file.path(matrix_path_WT))
# Initialize the Seurat object with the raw (non-normalized data).
ren1c_named_seurat <- CreateSeuratObject(counts = ren1c_named_RNA, project = "ren1c",
                                  min.cells = 3, min.features = 200)
ren1c_named_seurat[["percent.mt"]] <- PercentageFeatureSet(
    ren1c_named_seurat, pattern = "^mt-")
mtDNA_cutoff <- quantile(ren1c_named_seurat[["percent.mt"]]$percent.mt, 0.925)

# get rid of red blood cells, which have hemoglobin RNAs 
ren1c_named_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = ren1c_named_seurat,
                                                      pattern = "^Hb[^(p)]")
Hb_cutoff <- quantile(ren1c_named_seurat[["percent.hemo"]]$percent.hemo, 0.975)

mt_plot <- FeatureScatter(ren1c_named_seurat,
                          feature1 = "nCount_RNA",
                          feature2 = "percent.mt")
svglite(paste0("QC_plots/", sample_name, "_nCount-MT_", Sys.Date(), ".svg"),
        bg = "transparent", fix_text_size = FALSE, pointsize = 6)
mt_plot +
    geom_hline(yintercept = mtDNA_cutoff, col='red') + 
    custom_theme() +
    labs(x="RNA Count (N)", y="mtDNA (%)")
dev.off()

hemo_plot <- FeatureScatter(ren1c_named_seurat,
                            feature1 = "nCount_RNA",
                            feature2 = "percent.hemo")
svglite(paste0("QC_plots/", sample_name, "_nCount-hemoglobin_", Sys.Date(), ".svg"),
        bg = "transparent", fix_text_size = FALSE, pointsize = 6)
hemo_plot + 
    geom_hline(yintercept = Hb_cutoff, col='red') + 
    custom_theme() +
    labs(x="RNA Count (N)", y="HbDNA (%)")
dev.off()

RNA.feature_upper_cutoff <- quantile(ren1c_named_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.975)
RNA.feature_lower_cutoff <- quantile(ren1c_named_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.025)
RNA.count_upper_cutoff <- quantile(ren1c_named_seurat[["nCount_RNA"]]$nCount_RNA, 0.975)
RNA.count_lower_cutoff <- quantile(ren1c_named_seurat[["nCount_RNA"]]$nCount_RNA, 0.025)

count_ft_plot <- FeatureScatter(ren1c_named_seurat,
                                feature1 = "nCount_RNA",
                                feature2 = "nFeature_RNA")
svglite(paste0("QC_plots/", sample_name, "_nCount-nFeature_", Sys.Date(), ".svg"),
        bg = "transparent", fix_text_size = FALSE, pointsize = 6)
count_ft_plot +
    geom_hline(yintercept = RNA.feature_upper_cutoff, col='red') + 
    geom_hline(yintercept = RNA.feature_lower_cutoff, col='red') + 
    geom_vline(xintercept = RNA.count_upper_cutoff, col='blue') + 
    geom_vline(xintercept = RNA.count_lower_cutoff, col='blue') + 
    custom_theme() +
    labs(x="RNA Count (N)", y="RNA Features (N)")
dev.off()

ren1c_named_seurat <- subset(ren1c_named_seurat,
                       subset = nCount_RNA < RNA.count_upper_cutoff &
                                nCount_RNA > RNA.count_lower_cutoff &
                                nFeature_RNA > RNA.feature_lower_cutoff &
                                nFeature_RNA < RNA.feature_upper_cutoff &
                                percent.mt   < mtDNA_cutoff &
                                percent.hemo < Hb_cutoff)

Initialize and QC Ren1cKO 1mo sample

sample_name <- "ren1cKO"
ren1cKO_RNA <- Read10X(data.dir = file.path(matrix_path_KO))
# Initialize the Seurat object with the raw (non-normalized data).
ren1cKO_seurat <- CreateSeuratObject(counts = ren1cKO_RNA, project = "ren1c",
                                  min.cells = 3, min.features = 200)
ren1cKO_seurat[["percent.mt"]] <- PercentageFeatureSet(
    ren1cKO_seurat, pattern = "^mt-")
mtDNA_cutoff <- quantile(ren1cKO_seurat[["percent.mt"]]$percent.mt, 0.925)

# get rid of red blood cells, which have hemoglobin RNAs 
ren1cKO_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = ren1cKO_seurat,
                                                      pattern = "^Hb[^(p)]")
Hb_cutoff <- quantile(ren1cKO_seurat[["percent.hemo"]]$percent.hemo, 0.975)

mt_plot <- FeatureScatter(ren1cKO_seurat,
                          feature1 = "nCount_RNA",
                          feature2 = "percent.mt")
svglite(paste0("QC_plots/", sample_name, "_nCount-MT_", Sys.Date(), ".svg"),
        bg = "transparent", fix_text_size = FALSE, pointsize = 6)
mt_plot +
    geom_hline(yintercept = mtDNA_cutoff, col='red') + 
    custom_theme() +
    labs(x="RNA Count (N)", y="mtDNA (%)")
dev.off()

hemo_plot <- FeatureScatter(ren1cKO_seurat,
                            feature1 = "nCount_RNA",
                            feature2 = "percent.hemo")
svglite(paste0("QC_plots/", sample_name, "_nCount-hemoglobin_", Sys.Date(), ".svg"),
        bg = "transparent", fix_text_size = FALSE, pointsize = 6)
hemo_plot + 
    geom_hline(yintercept = Hb_cutoff, col='red') + 
    custom_theme() +
    labs(x="RNA Count (N)", y="HbDNA (%)")
dev.off()

RNA.feature_upper_cutoff <- quantile(ren1cKO_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.975)
RNA.feature_lower_cutoff <- quantile(ren1cKO_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.025)
RNA.count_upper_cutoff <- quantile(ren1cKO_seurat[["nCount_RNA"]]$nCount_RNA, 0.975)
RNA.count_lower_cutoff <- quantile(ren1cKO_seurat[["nCount_RNA"]]$nCount_RNA, 0.025)

count_ft_plot <- FeatureScatter(ren1cKO_seurat,
                                feature1 = "nCount_RNA",
                                feature2 = "nFeature_RNA")
svglite(paste0("QC_plots/", sample_name, "_nCount-nFeature_", Sys.Date(), ".svg"),
        bg = "transparent", fix_text_size = FALSE, pointsize = 6)
count_ft_plot +
    geom_hline(yintercept = RNA.feature_upper_cutoff, col='red') + 
    geom_hline(yintercept = RNA.feature_lower_cutoff, col='red') + 
    geom_vline(xintercept = RNA.count_upper_cutoff, col='blue') + 
    geom_vline(xintercept = RNA.count_lower_cutoff, col='blue') + 
    custom_theme() +
    labs(x="RNA Count (N)", y="RNA Features (N)")
dev.off()

ren1cKO_seurat <- subset(ren1cKO_seurat,
                       subset = nCount_RNA < RNA.count_upper_cutoff &
                                nCount_RNA > RNA.count_lower_cutoff &
                                nFeature_RNA > RNA.feature_lower_cutoff &
                                nFeature_RNA < RNA.feature_upper_cutoff &
                                percent.mt   < mtDNA_cutoff &
                                percent.hemo < Hb_cutoff)

Integrate the samples

ren1c_named_seurat$genotype <- "WT"
ren1c_named_seurat$timepoint <- "1mo"
ren1c_named_seurat$condition <- paste0(ren1c_named_seurat$genotype, "_", ren1c_named_seurat$timepoint)

ren1cKO_seurat$genotype <- "KO"
ren1cKO_seurat$timepoint <- "1mo"
ren1cKO_seurat$condition <- paste0(ren1cKO_seurat$genotype, "_", ren1cKO_seurat$timepoint)

obj_list <- c(ren1c_named_seurat, ren1cKO_seurat)
names(obj_list) <- c("WT", "KO")
obj <- merge(x=obj_list$WT, y=obj_list$KO),
             add.cell.ids= c("WT", "KO"))

Cluster cells

obj <- NormalizeData(obj)
obj <- FindVariableFeatures(obj)

s.genes <- stringr::str_to_sentence(cc.genes$s.genes)
s.genes <- stringr::str_replace(s.genes, "Mlf1ip", "Cenpu")
g2m.genes <- stringr::str_to_sentence(cc.genes$g2m.genes)
g2m.genes <- stringr::str_replace(g2m.genes, "Fam64a", "Pimreg")
g2m.genes <- stringr::str_replace(g2m.genes, "Hn1", "Jpt1")

# Must join to run CellCycleScoring
obj[["RNA"]] <- JoinLayers(obj[["RNA"]])
obj <- CellCycleScoring(obj, search=TRUE,
    s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
obj$CC.Difference <- obj$S.Score - obj$G2M.Score
obj[["RNA"]] <- split(obj[["RNA"]], f = obj$condition)
obj

obj <- ScaleData(obj, vars.to.regress = c("percent.mt", "percent.hemo", "CC.Difference"))
obj <- RunPCA(obj, npcs = 30, verbose = FALSE)

obj <- IntegrateLayers(
  obj = obj, method = CCAIntegration,
  orig.reduction = "pca", new.reduction = "integrated_cca",
  verbose = FALSE
)

obj <- IntegrateLayers(
  obj = obj, method = RPCAIntegration,
  orig.reduction = "pca", new.reduction = "integrated_rpca",
  verbose = FALSE
)

obj <- IntegrateLayers(
  obj = obj, method = HarmonyIntegration,
  orig.reduction = "pca", new.reduction = "harmony",
  verbose = FALSE
)

sample_name <- "ren1c_smmhc"

# join layers after integration
obj[["RNA"]] <- JoinLayers(obj[["RNA"]])

obj <- FindNeighbors(obj, reduction = "integrated_cca", dims = 1:15)
obj <- FindClusters(obj, resolution = 1.5, cluster.name = "cca_clusters",
                    random.seed = 99, algorithm=1, group.singletons=TRUE)
obj <- RunUMAP(obj, reduction = "integrated_cca", dims = 1:15,
               reduction.name = "umap_cca", seed.use = 99, n.neighbors = 15L,
               n.epochs = 500, min.dist=0.3, spread=0.5, verbose = FALSE)

obj <- FindNeighbors(obj, reduction = "integrated_rpca", dims = 1:15)
obj <- FindClusters(obj, resolution = 1.5, cluster.name = "rpca_clusters",
                    random.seed = 99, algorithm=1, group.singletons=TRUE)
obj <- RunUMAP(obj, reduction = "integrated_rpca", dims = 1:15,
               reduction.name = "umap_rpca", seed.use = 99, n.neighbors = 15L,
               n.epochs = 500, min.dist=0.3, spread=0.5, verbose = FALSE)

obj <- FindNeighbors(obj, reduction = "harmony", dims = 1:15)
obj <- FindClusters(obj, resolution = 1.5, cluster.name = "harmony_clusters",
                    random.seed = 99, algorithm=1, group.singletons=TRUE)
obj <- RunUMAP(obj, reduction = "harmony", dims = 1:15,
               reduction.name = "umap_harmony", seed.use = 99,
               n.neighbors = 15L, n.epochs = 500, min.dist=0.3,
               spread=0.5, verbose = FALSE)

cca_umap <- DimPlot(
  obj,
  reduction = "umap_cca",
  group.by = c("condition", "cca_clusters"),
  combine = FALSE, label.size = 2
)

rpca_umap <- DimPlot(
  obj,
  reduction = "umap_rpca",
  group.by = c("condition", "rpca_clusters"),
  combine = FALSE, label.size = 2
)

harmony_umap <- DimPlot(
  obj,
  reduction = "umap_harmony",
  group.by = c("condition", "harmony_clusters"),
  combine = FALSE, label.size = 2
)

wrap_plots(c(cca_umap, rpca_umap, harmony_umap), ncol = 3, byrow = F)

Idents(obj) <- obj$harmony_clusters
pal         <- get_random_grid_colors(length(unique(obj$harmony_clusters)))
names(pal)  <- c(unique(obj$harmony_clusters))

pal[["WT"]]  <- uva_blue
pal[["KO"]]  <- uva_orange

p1 <- DimPlot(obj, reduction = "umap_harmony", cols=pal,
              group.by = "condition") & 
        custom_theme() &
        theme(legend.position = "right")
p2 <- DimPlot(obj, reduction = "umap_harmony", cols=pal,
              group.by = "harmony_clusters", label = TRUE, repel = TRUE) & 
        custom_theme() &
        theme(legend.position = "right")

ggsave(paste0(sample_name, "_unlabeled-UMAP_", Sys.Date(), ".svg"),
       plot = p1 | p2, create.dir=TRUE)