3B. Create the initial Seurat objects
Initialize and QC the control (phenotypically WT) Ren1c 1mo sample
sample_name <- "ren1c_named"
ren1c_named_RNA <- Read10X(data.dir = file.path(matrix_path_WT))
# Initialize the Seurat object with the raw (non-normalized data).
ren1c_named_seurat <- CreateSeuratObject(counts = ren1c_named_RNA, project = "ren1c",
min.cells = 3, min.features = 200)
ren1c_named_seurat[["percent.mt"]] <- PercentageFeatureSet(
ren1c_named_seurat, pattern = "^mt-")
mtDNA_cutoff <- quantile(ren1c_named_seurat[["percent.mt"]]$percent.mt, 0.925)
# get rid of red blood cells, which have hemoglobin RNAs
ren1c_named_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = ren1c_named_seurat,
pattern = "^Hb[^(p)]")
Hb_cutoff <- quantile(ren1c_named_seurat[["percent.hemo"]]$percent.hemo, 0.975)
mt_plot <- FeatureScatter(ren1c_named_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.mt")
svglite(paste0("QC_plots/", sample_name, "_nCount-MT_", Sys.Date(), ".svg"),
bg = "transparent", fix_text_size = FALSE, pointsize = 6)
mt_plot +
geom_hline(yintercept = mtDNA_cutoff, col='red') +
custom_theme() +
labs(x="RNA Count (N)", y="mtDNA (%)")
dev.off()
hemo_plot <- FeatureScatter(ren1c_named_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.hemo")
svglite(paste0("QC_plots/", sample_name, "_nCount-hemoglobin_", Sys.Date(), ".svg"),
bg = "transparent", fix_text_size = FALSE, pointsize = 6)
hemo_plot +
geom_hline(yintercept = Hb_cutoff, col='red') +
custom_theme() +
labs(x="RNA Count (N)", y="HbDNA (%)")
dev.off()
RNA.feature_upper_cutoff <- quantile(ren1c_named_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.975)
RNA.feature_lower_cutoff <- quantile(ren1c_named_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.025)
RNA.count_upper_cutoff <- quantile(ren1c_named_seurat[["nCount_RNA"]]$nCount_RNA, 0.975)
RNA.count_lower_cutoff <- quantile(ren1c_named_seurat[["nCount_RNA"]]$nCount_RNA, 0.025)
count_ft_plot <- FeatureScatter(ren1c_named_seurat,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")
svglite(paste0("QC_plots/", sample_name, "_nCount-nFeature_", Sys.Date(), ".svg"),
bg = "transparent", fix_text_size = FALSE, pointsize = 6)
count_ft_plot +
geom_hline(yintercept = RNA.feature_upper_cutoff, col='red') +
geom_hline(yintercept = RNA.feature_lower_cutoff, col='red') +
geom_vline(xintercept = RNA.count_upper_cutoff, col='blue') +
geom_vline(xintercept = RNA.count_lower_cutoff, col='blue') +
custom_theme() +
labs(x="RNA Count (N)", y="RNA Features (N)")
dev.off()
ren1c_named_seurat <- subset(ren1c_named_seurat,
subset = nCount_RNA < RNA.count_upper_cutoff &
nCount_RNA > RNA.count_lower_cutoff &
nFeature_RNA > RNA.feature_lower_cutoff &
nFeature_RNA < RNA.feature_upper_cutoff &
percent.mt < mtDNA_cutoff &
percent.hemo < Hb_cutoff)
Initialize and QC Ren1cKO 1mo sample
sample_name <- "ren1cKO"
ren1cKO_RNA <- Read10X(data.dir = file.path(matrix_path_KO))
# Initialize the Seurat object with the raw (non-normalized data).
ren1cKO_seurat <- CreateSeuratObject(counts = ren1cKO_RNA, project = "ren1c",
min.cells = 3, min.features = 200)
ren1cKO_seurat[["percent.mt"]] <- PercentageFeatureSet(
ren1cKO_seurat, pattern = "^mt-")
mtDNA_cutoff <- quantile(ren1cKO_seurat[["percent.mt"]]$percent.mt, 0.925)
# get rid of red blood cells, which have hemoglobin RNAs
ren1cKO_seurat[["percent.hemo"]] <- PercentageFeatureSet(object = ren1cKO_seurat,
pattern = "^Hb[^(p)]")
Hb_cutoff <- quantile(ren1cKO_seurat[["percent.hemo"]]$percent.hemo, 0.975)
mt_plot <- FeatureScatter(ren1cKO_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.mt")
svglite(paste0("QC_plots/", sample_name, "_nCount-MT_", Sys.Date(), ".svg"),
bg = "transparent", fix_text_size = FALSE, pointsize = 6)
mt_plot +
geom_hline(yintercept = mtDNA_cutoff, col='red') +
custom_theme() +
labs(x="RNA Count (N)", y="mtDNA (%)")
dev.off()
hemo_plot <- FeatureScatter(ren1cKO_seurat,
feature1 = "nCount_RNA",
feature2 = "percent.hemo")
svglite(paste0("QC_plots/", sample_name, "_nCount-hemoglobin_", Sys.Date(), ".svg"),
bg = "transparent", fix_text_size = FALSE, pointsize = 6)
hemo_plot +
geom_hline(yintercept = Hb_cutoff, col='red') +
custom_theme() +
labs(x="RNA Count (N)", y="HbDNA (%)")
dev.off()
RNA.feature_upper_cutoff <- quantile(ren1cKO_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.975)
RNA.feature_lower_cutoff <- quantile(ren1cKO_seurat[["nFeature_RNA"]]$nFeature_RNA, 0.025)
RNA.count_upper_cutoff <- quantile(ren1cKO_seurat[["nCount_RNA"]]$nCount_RNA, 0.975)
RNA.count_lower_cutoff <- quantile(ren1cKO_seurat[["nCount_RNA"]]$nCount_RNA, 0.025)
count_ft_plot <- FeatureScatter(ren1cKO_seurat,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA")
svglite(paste0("QC_plots/", sample_name, "_nCount-nFeature_", Sys.Date(), ".svg"),
bg = "transparent", fix_text_size = FALSE, pointsize = 6)
count_ft_plot +
geom_hline(yintercept = RNA.feature_upper_cutoff, col='red') +
geom_hline(yintercept = RNA.feature_lower_cutoff, col='red') +
geom_vline(xintercept = RNA.count_upper_cutoff, col='blue') +
geom_vline(xintercept = RNA.count_lower_cutoff, col='blue') +
custom_theme() +
labs(x="RNA Count (N)", y="RNA Features (N)")
dev.off()
ren1cKO_seurat <- subset(ren1cKO_seurat,
subset = nCount_RNA < RNA.count_upper_cutoff &
nCount_RNA > RNA.count_lower_cutoff &
nFeature_RNA > RNA.feature_lower_cutoff &
nFeature_RNA < RNA.feature_upper_cutoff &
percent.mt < mtDNA_cutoff &
percent.hemo < Hb_cutoff)
Integrate the samples
ren1c_named_seurat$genotype <- "WT"
ren1c_named_seurat$timepoint <- "1mo"
ren1c_named_seurat$condition <- paste0(ren1c_named_seurat$genotype, "_", ren1c_named_seurat$timepoint)
ren1cKO_seurat$genotype <- "KO"
ren1cKO_seurat$timepoint <- "1mo"
ren1cKO_seurat$condition <- paste0(ren1cKO_seurat$genotype, "_", ren1cKO_seurat$timepoint)
obj_list <- c(ren1c_named_seurat, ren1cKO_seurat)
names(obj_list) <- c("WT", "KO")
obj <- merge(x=obj_list$WT, y=obj_list$KO),
add.cell.ids= c("WT", "KO"))
Cluster cells
obj <- NormalizeData(obj)
obj <- FindVariableFeatures(obj)
s.genes <- stringr::str_to_sentence(cc.genes$s.genes)
s.genes <- stringr::str_replace(s.genes, "Mlf1ip", "Cenpu")
g2m.genes <- stringr::str_to_sentence(cc.genes$g2m.genes)
g2m.genes <- stringr::str_replace(g2m.genes, "Fam64a", "Pimreg")
g2m.genes <- stringr::str_replace(g2m.genes, "Hn1", "Jpt1")
# Must join to run CellCycleScoring
obj[["RNA"]] <- JoinLayers(obj[["RNA"]])
obj <- CellCycleScoring(obj, search=TRUE,
s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
obj$CC.Difference <- obj$S.Score - obj$G2M.Score
obj[["RNA"]] <- split(obj[["RNA"]], f = obj$condition)
obj
obj <- ScaleData(obj, vars.to.regress = c("percent.mt", "percent.hemo", "CC.Difference"))
obj <- RunPCA(obj, npcs = 30, verbose = FALSE)
obj <- IntegrateLayers(
obj = obj, method = CCAIntegration,
orig.reduction = "pca", new.reduction = "integrated_cca",
verbose = FALSE
)
obj <- IntegrateLayers(
obj = obj, method = RPCAIntegration,
orig.reduction = "pca", new.reduction = "integrated_rpca",
verbose = FALSE
)
obj <- IntegrateLayers(
obj = obj, method = HarmonyIntegration,
orig.reduction = "pca", new.reduction = "harmony",
verbose = FALSE
)
sample_name <- "ren1c_smmhc"
# join layers after integration
obj[["RNA"]] <- JoinLayers(obj[["RNA"]])
obj <- FindNeighbors(obj, reduction = "integrated_cca", dims = 1:15)
obj <- FindClusters(obj, resolution = 1.5, cluster.name = "cca_clusters",
random.seed = 99, algorithm=1, group.singletons=TRUE)
obj <- RunUMAP(obj, reduction = "integrated_cca", dims = 1:15,
reduction.name = "umap_cca", seed.use = 99, n.neighbors = 15L,
n.epochs = 500, min.dist=0.3, spread=0.5, verbose = FALSE)
obj <- FindNeighbors(obj, reduction = "integrated_rpca", dims = 1:15)
obj <- FindClusters(obj, resolution = 1.5, cluster.name = "rpca_clusters",
random.seed = 99, algorithm=1, group.singletons=TRUE)
obj <- RunUMAP(obj, reduction = "integrated_rpca", dims = 1:15,
reduction.name = "umap_rpca", seed.use = 99, n.neighbors = 15L,
n.epochs = 500, min.dist=0.3, spread=0.5, verbose = FALSE)
obj <- FindNeighbors(obj, reduction = "harmony", dims = 1:15)
obj <- FindClusters(obj, resolution = 1.5, cluster.name = "harmony_clusters",
random.seed = 99, algorithm=1, group.singletons=TRUE)
obj <- RunUMAP(obj, reduction = "harmony", dims = 1:15,
reduction.name = "umap_harmony", seed.use = 99,
n.neighbors = 15L, n.epochs = 500, min.dist=0.3,
spread=0.5, verbose = FALSE)
cca_umap <- DimPlot(
obj,
reduction = "umap_cca",
group.by = c("condition", "cca_clusters"),
combine = FALSE, label.size = 2
)
rpca_umap <- DimPlot(
obj,
reduction = "umap_rpca",
group.by = c("condition", "rpca_clusters"),
combine = FALSE, label.size = 2
)
harmony_umap <- DimPlot(
obj,
reduction = "umap_harmony",
group.by = c("condition", "harmony_clusters"),
combine = FALSE, label.size = 2
)
wrap_plots(c(cca_umap, rpca_umap, harmony_umap), ncol = 3, byrow = F)
Idents(obj) <- obj$harmony_clusters
pal <- get_random_grid_colors(length(unique(obj$harmony_clusters)))
names(pal) <- c(unique(obj$harmony_clusters))
pal[["WT"]] <- uva_blue
pal[["KO"]] <- uva_orange
p1 <- DimPlot(obj, reduction = "umap_harmony", cols=pal,
group.by = "condition") &
custom_theme() &
theme(legend.position = "right")
p2 <- DimPlot(obj, reduction = "umap_harmony", cols=pal,
group.by = "harmony_clusters", label = TRUE, repel = TRUE) &
custom_theme() &
theme(legend.position = "right")
ggsave(paste0(sample_name, "_unlabeled-UMAP_", Sys.Date(), ".svg"),
plot = p1 | p2, create.dir=TRUE)