4B. Create the initial Seurat objects

obj_list <- vector(mode = "list", length = length(file_paths$sample))
names(obj_list) <- file_paths$sample
for(i in seq_along(file_paths$sample)) {
    counts <- Read10X_h5(file_paths$counts_path[i])
    obj <- CreateSeuratObject(
        counts = counts$`Gene Expression`,
        assay = "RNA",
        project = file_paths[i, sample]
    )
    obj <- Add_Cell_QC_Metrics_Mouse(seurat_object = obj, species = "mouse")
    # create ATAC assay and add it to the object
    atac_counts <- counts$`Peaks`
    # we'll only use peaks in standard chromosomes
    grange_counts <- StringToGRanges(rownames(atac_counts), sep = c(":", "-"))
    grange_use <- seqnames(grange_counts) %in% standardChromosomes(grange_counts)
    atac_counts <- atac_counts[as.vector(grange_use), ]

    obj[["ATAC"]] <- CreateChromatinAssay(
        counts = atac_counts,
        sep = c(":", "-"),
        fragments = file_paths$frag_path[i],
        annotation = annotations
    )
    DefaultAssay(obj) <- "ATAC"
    obj <- NucleosomeSignal(obj)
    obj <- TSSEnrichment(obj)
    DefaultAssay(obj) <- "RNA"
    obj_list[[i]] <- obj
}

Create quality control plots

for (object in names(obj_list)) {
    plot_QC_Metrics(obj_list[[object]], object, "RNA")
    plot_QC_Metrics(obj_list[[object]], object, "ATAC")
}

Quality control filtering

unfiltered_obj_list <- copy(obj_list)

for (object in names(obj_list)) {
    # filter out low quality cells
    complexity_cutoff <- quantile(obj_list[[object]]@meta.data$log10GenesPerUMI[!is.nan(obj_list[[object]]@meta.data$log10GenesPerUMI)], 0.025)
    mtDNA_cutoff   <- quantile(obj_list[[object]]@meta.data$percent_mito[!is.nan(obj_list[[object]]@meta.data$percent_mito)], 0.925)
    RNA_feature_upper_cutoff <- quantile(obj_list[[object]][["nFeature_RNA"]]$nFeature_RNA, 0.975)
    RNA_feature_lower_cutoff <- quantile(obj_list[[object]][["nFeature_RNA"]]$nFeature_RNA, 0.025)
    RNA_count_upper_cutoff   <- quantile(obj_list[[object]][["nCount_RNA"]]$nCount_RNA, 0.975)
    RNA_count_lower_cutoff   <- quantile(obj_list[[object]][["nCount_RNA"]]$nCount_RNA, 0.025)
    nCount_ATAC_upper <- quantile(obj_list[[object]]$nCount_ATAC, 0.975)
    nCount_ATAC_lower <- quantile(obj_list[[object]]$nCount_ATAC, 0.025)
    TSS_threshold <- quantile(obj_list[[object]]$TSS.enrichment, 0.025)

    obj_list[[object]] <- subset(
      x = obj_list[[object]],
      subset = nCount_ATAC < nCount_ATAC_upper &
        nCount_RNA < RNA_count_upper_cutoff &
        nCount_ATAC > nCount_ATAC_lower &
        nCount_RNA > RNA_count_lower_cutoff &
        nucleosome_signal < 2 &
        percent_mito < mtDNA_cutoff &
        TSS.enrichment > TSS_threshold
    )
}

saveRDS(obj_list, paste0("obj_list_", Sys.Date(), ".rds"))

Add CellCycleScores

for (object in names(obj_list)) {
    obj_list[[object]] <- addCellCycleScores(obj_list[[object]])
}

Initial data processing

for (object in names(obj_list)) {
    obj_list[[object]] <- preProcess(obj_list[[object]])
}

saveRDS(obj_list, paste0("obj_list_preprocess_", Sys.Date(), ".rds"))

Integrate the samples

## uniqify the individual seurat object cell names
for (object in names(obj_list)) {
    obj_list[[object]] <- RenameCells(object = obj_list[[object]],
        add.cell.id = gsub("captopril-", "", object))
}

obj_merged <- merge(obj_list[[1]], y = c(obj_list[[2]], obj_list[[3]],
                    obj_list[[4]], obj_list[[5]], obj_list[[6]]), 
    project = "multiome_captopril",
    merge.data = TRUE)
table(obj_merged$orig.ident)

obj_merged <- NormalizeData(obj_merged)
obj_merged <- FindVariableFeatures(obj_merged)
obj_merged <- ScaleData(obj_merged)
obj_merged <- RunPCA(obj_merged)
obj_merged <- FindNeighbors(obj_merged, dims = 1:30, reduction = "pca")
obj_merged <- FindClusters(obj_merged, resolution = 2,
                           cluster.name = "unintegrated_clusters")

obj_merged <- RunUMAP(obj_merged, dims = 1:30, reduction = "pca",
                      reduction.name = "umap.unintegrated")
DimPlot(obj_merged, reduction = "umap.unintegrated", label=T, pt.size=0.5,
        group.by = c("predicted.kidney_ref.id", "orig.ident")) & NoLegend()

options(future.globals.maxSize= 5033164800)

obj_merged <- IntegrateLayers(object = obj_merged, method = CCAIntegration,
    orig.reduction = "pca", new.reduction = "integrated.cca", verbose = FALSE)

saveRDS(obj_merged, paste0("obj_merged_", Sys.Date(), ".rds"))

obj_merged <- IntegrateLayers(
  object = obj_merged, method = RPCAIntegration,
  orig.reduction = "pca", new.reduction = "integrated.rpca",
  verbose = FALSE
)
obj_merged <- IntegrateLayers(
  object = obj_merged, method = HarmonyIntegration,
  orig.reduction = "pca", new.reduction = "harmony",
  verbose = FALSE
)
obj_merged <- IntegrateLayers(
  object = obj_merged, method = FastMNNIntegration,
  new.reduction = "integrated.mnn",
  verbose = FALSE
)

# re-join layers after integration
obj_merged[["RNA"]] <- JoinLayers(obj_merged[["RNA"]])

# calculate clusters for each integration approach
obj_merged <- FindNeighbors(obj_merged, reduction = "integrated.cca", dims = 1:30)
obj_merged <- FindClusters(obj_merged, resolution = 1, cluster.name = "clusters.cca")
obj_merged <- RunUMAP(obj_merged, dims = 1:30, reduction = "integrated.cca",
                      reduction.name = "umap.cca")
# Visualization
DimPlot(obj_merged, reduction = "umap.cca", label=T, pt.size=0.5,
        group.by = c("orig.ident", "clusters.cca")) & NoLegend()

obj_merged <- FindNeighbors(obj_merged, reduction = "integrated.rpca", dims = 1:30)
obj_merged <- FindClusters(obj_merged, resolution = 1, cluster.name = "clusters.rpca")
obj_merged <- RunUMAP(obj_merged, dims = 1:30, reduction = "integrated.rpca",
                      reduction.name = "umap.rpca")
# Visualization
DimPlot(obj_merged, reduction = "umap.rpca", label=T, pt.size=0.5,
        group.by = c("orig.ident", "clusters.rpca")) & NoLegend()

obj_merged <- FindNeighbors(obj_merged, reduction = "harmony", dims = 1:30)
obj_merged <- FindClusters(obj_merged, resolution = 1, cluster.name = "clusters.harmony")
obj_merged <- RunUMAP(obj_merged, dims = 1:30, reduction = "harmony",
                      reduction.name = "umap.harmony")
# Visualization
DimPlot(obj_merged, reduction = "umap.harmony", label=T, pt.size=0.5,
        group.by = c("orig.ident", "clusters.harmony")) & NoLegend()

obj_merged <- FindNeighbors(obj_merged, reduction = "integrated.mnn", dims = 1:30)
obj_merged <- FindClusters(obj_merged, resolution = 1, cluster.name = "clusters.mnn")
obj_merged <- RunUMAP(obj_merged, dims = 1:30, reduction = "integrated.mnn",
                      reduction.name = "umap.mnn")
# Visualization
DimPlot(obj_merged, reduction = "umap.mnn", label=T, pt.size=0.5,
        group.by = c("orig.ident", "clusters.mnn")) & NoLegend()

saveRDS(obj_merged, paste0("obj_merged_integrated_", Sys.Date(), ".rds"))