4D. Analyze chromatin-derived features
Add Peaks to Gene links
DefaultAssay(obj_named) <- "SCT"
features <- Features(obj_named)
filtered_features <- features[!grepl("^Rp[sl][[:digit:]]|^Rplp[[:digit:]]|^Rprl[[:digit:]]|^Rpsa", features)]
filtered_features <- sort(filtered_features[!grepl("^mt-(.*)", filtered_features)])
filtered_features <- sort(filtered_features[!grepl("^H2-(.*)", filtered_features)])
filtered_features <- sort(filtered_features[!grepl("^Hb[a-b](.*)", filtered_features)])
filtered_features <- filtered_features[!grepl("^[0-9]", filtered_features)]
DefaultAssay(obj_named) <- "ATAC"
obj_named@assays$ATAC@ranges # NCBI chromosome style
# first compute the GC content for each peak
seqlevelsStyle(BSgenome.Mmusculus.UCSC.mm39) <- "NCBI"
obj_named <- RegionStats(obj_named, assay="ATAC",
genome = BSgenome.Mmusculus.UCSC.mm39)
# link peaks to genes
obj_named <- LinkPeaks(
object = obj_named,
peak.assay = "ATAC",
expression.assay = "SCT",
genes.use = filtered_features
)
saveRDS(obj_named, paste0("obj_named_linkPeaks_", Sys.Date(), ".rds"))
Examine the accessible regions of each cell to determine enriched motifs
cd /processed/R_analysis/captopril_multiome/
mkdir motif_files/ $$ cd motif_files/
wget https://hocomoco12.autosome.org/final_bundle/hocomoco12/H12CORE/formatted_motifs/H12CORE_jaspar_format.txt
dos2unix H12CORE_jaspar_format.txt
dos2unix H12CORE_pwms.txt
Scan the DNA sequence of each peak for the presence of each motif, and create a Motif object
DefaultAssay(obj_named) <- "ATAC"
jaspar24 <- JASPAR2024()
sq24 <- RSQLite::dbConnect(RSQLite::SQLite(), db(jaspar24))
pwm_set <- TFBSTools::getMatrixSet(sq24,
list(species = "Mus musculus", collection = "CORE"))
motif_names <- data.table(id=names(pwm_set),
gene_name=character(length(names(pwm_set))))
for (name in names(pwm_set)) {
motif_names[id==name,]$gene_name <- name(pwm_set[[name]])
}
motif_matrix_j24 <- CreateMotifMatrix(features = granges(obj_named),
pwm = pwm_set, genome = BSgenome.Mmusculus.UCSC.mm39, use.counts = FALSE)
motif_object_j24 <- CreateMotifObject(data = motif_matrix_j24, pwm = pwm_set)
mouse_pwm <- query(MotifDb, andStrings="mmusculus",
notStrings=c("Hsapiens", "Rnorvegicus", "Xlaevis", "Ggallus", "Btaurus",
"Ocuniculus", "Rrattus", "jaspar2016", "jaspar2018",
"JASPAR_2014", "JASPAR_CORE"))
# See: https://rdrr.io/bioc/MotifDb/src/R/MotifList-class.R
motifs.pfmatrix <- lapply(mouse_pwm, function(mouse_pwm) convert_motifs(mouse_pwm, "TFBSTools-PFMatrix"))
motifs.pfmList <- do.call(PFMatrixList, motifs.pfmatrix)
motif_names <- data.table(id=names(motifs.pfmList),
gene_name=character(length(names(motifs.pfmList))))
for (name in names(motifs.pfmList)) {
motif_names[id==name,]$gene_name <- name(motifs.pfmList[[name]])
}
motif_matrix <- CreateMotifMatrix(features = granges(obj_named),
pwm = motifs.pfmList, genome = BSgenome.Mmusculus.UCSC.mm39, use.counts = FALSE)
motif_object <- CreateMotifObject(data = motif_matrix, pwm = motifs.pfmList)
motif_names <- names(motif_object@motif.names)
motif_names <- gsub("Mmusculus-", "", motif_names)
motif_names <- gsub("jolma2013-", "", motif_names)
motif_names <- gsub("UniPROBE-", "", motif_names)
motif_names <- gsub("jaspar2022-", "", motif_names)
motif_names <- gsub("HOCOMOCOv10-", "", motif_names)
motif_names <- gsub("cisbp_1.02-", "", motif_names)
motif_names <- gsub("_1.02", "", motif_names)
motif_names <- gsub("\\.UP*.*", "", motif_names)
motif_names <- gsub("_MOUSE\\.H10MO.*", "", motif_names)
motif_names <- gsub("-MA.*", "", motif_names)
motif_list <- as.list(motif_names)
names(motif_list) <- names(motif_object@motif.names)
motif_object@motif.names <- motif_list
obj_named <- SetAssayData(obj_named, assay = 'ATAC',
layer = 'motifs', new.data = motif_object)
## Reduce core use to save memory reqs
register(SerialParam()) # turn off parallelization
obj_named <- RunChromVAR(
object = obj_named,
genome = BSgenome.Mmusculus.UCSC.mm39
)
Use just HOCOMOCOv12
# Use just HOCOMOCO v12 (from mouse)
hoco_v12 <- universalmotif::read_matrix(file.path(getwd(),
"motif_files/H12CORE_pwms.txt"),
headers = ">", positions = "rows", sep=NULL)
for (i in 1:length(hoco_v12)) {
full_name <- hoco_v12[[i]]["name"]
#names(hoco_v12)[[i]] <- gsub(".H12CORE.*", "", full_name)
names(hoco_v12)[[i]] <- full_name
hoco_v12[[i]] <- convert_motifs(hoco_v12[[i]], "TFBSTools-PFMatrix")
}
hoco_v12_pfmList <- do.call(PFMatrixList, hoco_v12)
names(hoco_v12_pfmList)[grep("mef2c", names(hoco_v12_pfmList), ignore.case=TRUE)]
hoco_0rankedsubtypes <- copy(hoco_v12_pfmList)
for (motif_name in names(hoco_0rankedsubtypes)) {
rank <- stringr::str_split(motif_name, "\\.")[[1]][3]
if (rank != "0") {
hoco_0rankedsubtypes[[motif_name]] <- NULL
}
}
hoco_0rankedsubtypes <- do.call(PFMatrixList, hoco_0rankedsubtypes)
# Use HOCOMOCO non-redundant ("https://hocomoco12.autosome.org/final_bundle/hocomoco12/H12CORE-CLUSTERED/H12CORE-CLUSTERED_pwm.tar.gz")
hoco_mm <- CreateMotifMatrix(features = granges(obj_named),
pwm = hoco_0rankedsubtypes,
genome = BSgenome.Mmusculus.UCSC.mm39,
use.counts = FALSE)
hoco_motif_object <- CreateMotifObject(data = hoco_mm, pwm = hoco_0rankedsubtypes)
saveRDS(hoco_motif_object, "hoco_motif_object.rds")
obj_named <- SetAssayData(obj_named, assay = 'ATAC',
layer = 'motifs', new.data = hoco_motif_object)
## Reduce core use to save memory reqs
register(SerialParam()) # turn off parallelization
obj_named <- RunChromVAR(
object = obj_named,
genome = BSgenome.Mmusculus.UCSC.mm39
)
Add some helpful columns showing treatment status and timepoint
obj_named$condition <- gsub("captopril-", "", obj_named$orig.ident)
obj_named$condition <- gsub("_.*", "", obj_named$condition)
obj_named$timepoint <- gsub("captopril-ctrl_", "", obj_named$orig.ident)
obj_named$timepoint <- gsub("captopril-trt_", "", obj_named$timepoint)
obj_named$cell_type_x_condition <- paste(obj_named$condition, obj_named$cell_type, sep="_")
table(obj_named$cell_type_x_condition)
obj_named$cell_type_x_timepoint <- paste(obj_named$timepoint, obj_named$cell_type, sep="_")
table(obj_named$cell_type_x_timepoint)
obj_named$cell_type_x_condition_x_source <- paste(obj_named$condition, obj_named$timepoint, obj_named$cell_type, sep="_")
table(obj_named$cell_type_x_condition_x_source)
Quantify motif enrichment and expression values
markers_rna <- presto:::wilcoxauc.Seurat(X = obj_named,
group_by = 'cell_type', assay = 'data', seurat_assay = 'SCT')
markers_motifs <- presto:::wilcoxauc.Seurat(X = obj_named,
group_by = 'cell_type', assay = 'data', seurat_assay = 'chromvar')
motif_names <- markers_motifs$feature
colnames(markers_rna) <- paste0("RNA.", colnames(markers_rna))
colnames(markers_motifs) <- paste0("motif.", colnames(markers_motifs))
markers_rna$gene <- markers_rna$RNA.feature
markers_motifs$gene <- ConvertMotifID(obj_named, id = motif_names)
markers_motifs$gene <- stringr::str_to_sentence(gsub(".H12CORE.*", "", markers_motifs$gene))
topTFs <- function(celltype, padj_cutoff = 1e-2) {
ctmarkers_rna <- dplyr::filter(
markers_rna, RNA.group == celltype, RNA.padj < padj_cutoff, abs(RNA.logFC) > 0) %>%
arrange(-RNA.auc)
ctmarkers_motif <- dplyr::filter(
markers_motifs, motif.group == celltype, motif.padj < padj_cutoff, abs(motif.logFC) > 0) %>%
arrange(-motif.auc)
top_tfs <- inner_join(
x = ctmarkers_rna[, c(2, 11, 4, 6, 8)],
y = ctmarkers_motif[, c(2, 1, 11, 4, 6, 8)], by = "gene"
)
top_tfs$avg_auc <- (top_tfs$RNA.auc + top_tfs$motif.auc) / 2
top_tfs <- arrange(top_tfs, -avg_auc)
return(top_tfs)
}
for (celltype in unique(obj_named$cell_type)) {
dat <- as.data.table(head(topTFs(celltype), 10))
fwrite(dat, paste0(sample_name, "_", celltype, "_Top10-Putative-Regulators.csv"), sep=",")
}
Identify regulators of cells split on condition (control vs treated)
Idents(obj_named) <- obj_named$cell_type_x_condition
markers_rna_default <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
features=filtered_features,
min.pct = 0.01, logfc.threshold = 1.00)
saveRDS(markers_rna_default, "markers_rna_default.rds")
markers_rna_default <- as.data.table(markers_rna_default, keep.rownames=TRUE)
markers_rna_roc <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
features=filtered_features, test.use="roc",
min.pct = 0.01, logfc.threshold = 1.00)
saveRDS(markers_rna_roc, "markers_rna_roc.rds")
markers_rna_roc <- as.data.table(markers_rna_roc, keep.rownames=TRUE)
markers_rna_MAST <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
test.use="MAST", features=filtered_features,
min.pct = 0.01, logfc.threshold = 1.00)
saveRDS(markers_rna_MAST, "markers_rna_MAST.rds")
markers_rna <- merge(markers_rna_default, markers_rna_roc, by=c("cluster", "gene"))
markers_rna <- markers_rna[,c(1, 2, 4:8, 10:12)]
colnames(markers_rna) <- c("group", "gene", "p_val", "logFC", "pct.1",
"pct.2", "padj", "auc", "avg_diff", "power")
saveRDS(markers_rna, "markers_rna.rds")
DefaultAssay(obj_named) <- "chromvar"
features <- Features(obj_named)
markers_motif_MAST <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
test.use="MAST", features=features,
min.pct = 0.01, logfc.threshold = 1.00)
markers_motif_default <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
features=features, min.pct = 0.01, logfc.threshold = 1.00)
markers_motif_default$gene <- stringr::str_to_sentence(gsub("\\.H12CORE\\..*", "", markers_motif_default$gene))
saveRDS(markers_motif_default, "markers_motif_default.rds")
markers_motif_default <- as.data.table(markers_motif_default, keep.rownames=TRUE)
markers_motif_roc_0.5 <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
features=features, test.use="roc",
min.pct = 0.01, logfc.threshold = 0.5)
markers_motif_roc_0 <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
features=features, test.use="roc",
min.pct = 0.01, logfc.threshold = 0)
markers_motif_roc_0$gene <- stringr::str_to_sentence(gsub("\\.H12CORE\\..*", "", markers_motif_roc_0$gene))
markers_motif_roc_0 <- as.data.table(markers_motif_roc_0, keep.rownames=TRUE)
saveRDS(markers_motif_roc_0, "markers_motif_roc_0.rds")
markers_motif_roc <- FindAllMarkers(obj_named, only.pos = F, random.seed=99,
features=features, test.use="roc",
min.pct = 0.01, logfc.threshold = 1.00)
saveRDS(markers_motif_roc, "markers_motif_roc.rds")
markers_motif_roc <- as.data.table(markers_motif_roc, keep.rownames=TRUE)
markers_motif_roc$gene <- stringr::str_to_sentence(gsub("\\.H12CORE\\..*", "", markers_motif_roc$gene))
markers_motif <- merge(markers_motif_default, markers_motif_roc, by=c("cluster", "gene"))
markers_motif <- markers_motif[,c(1, 2, 4:9, 10:12)]
colnames(markers_motif) <- c("group", "gene", "p_val", "logFC", "pct.1",
"pct.2", "padj", "motif.name", "auc", "avg_diff",
"power")
saveRDS(markers_motif, "markers_motif.rds")
motif_names <- markers_motif$motif.name
colnames(markers_rna) <- paste0("RNA.", colnames(markers_rna))
colnames(markers_rna) <- c("RNA.group", "gene", "RNA.p_val", "RNA.logFC",
"RNA.pct.1", "RNA.pct.2", "RNA.padj", "RNA.auc",
"RNA.avg_diff", "RNA.power")
colnames(markers_motif) <- paste0("motif.", colnames(markers_motif))
colnames(markers_motif) <- c("motif.group", "gene", "motif.p_val",
"motif.logFC", "motif.pct.1", "motif.pct.2", "motif.padj", "motif.name",
"motif.auc", "motif.avg_diff", "motif.power")
topTFs <- function(celltype,
rna_markers = markers_rna,
motif_markers = markers_motif,
padj_cutoff = 1e-2,
log2FC_cutoff = 0) {
ctmarkers_rna <- dplyr::filter(
rna_markers, RNA.group == celltype, RNA.padj < padj_cutoff,
abs(RNA.logFC) > log2FC_cutoff) %>%
arrange(-RNA.logFC)
ctmarkers_motif <- dplyr::filter(
motif_markers, motif.group == celltype, motif.padj < padj_cutoff,
abs(motif.logFC) > log2FC_cutoff) %>%
arrange(motif.padj)
top_tfs <- inner_join(
x = ctmarkers_rna,
y = ctmarkers_motif, by = "gene"
)
top_tfs$avg_auc <- (top_tfs$RNA.auc + top_tfs$motif.auc) / 2
top_tfs <- arrange(top_tfs, -avg_auc)
return(top_tfs)
}
Plot TF regulators
obj_named$cell_type <- factor(obj_named$cell_type,
levels = c("POD", "POD_Plasma", "POD_EnC", "EnC", "glomEnC", "fEnC",
"arterialEnC", "PEC", "IC", "aIC", "bIC", "CDPC", "mCDPC",
"CNT_CDPC", "CNT", "DCT", "aLoH", "dLoH", "PTs1", "URO",
"JG", "tSMC", "SMC_RPC", "SMC_MC", "RPC_MC", "MD", "FB",
"cDC", "KRM", "Tcell"))
DefaultAssay(obj_named) <- "RNA"
dir.create(file.path(getwd(), "TF_regulators/roc"))
for (celltype in unique(obj_named$cell_type_x_condition)) {
dat <- as.data.table(head(topTFs(celltype,
rna_markers = markers_rna,
motif_markers = markers_motif,
padj_cutoff = 1e-1, log2FC_cutoff = 0), 10))
if (nrow(dat) > 0) {
fwrite(dat, paste0("TF_regulators/roc/", sample_name, "_", celltype,
"_Top10-Putative-Regulators.csv"), sep=",")
vln_plt <- Stacked_VlnPlot(obj_named, assay = "RNA",
features = dat$gene[!is.na(dat$gene[1:4])],
alpha=0.25, group.by = "cell_type",
split.by = "condition", pt.size = 0.1,
color_seed = 99, split.plot=TRUE,
x_lab_rotate = 90) +
custom_theme() +
theme(plot.title = element_text(size = 15/.pt),
axis.text.x = element_text(size = 12/.pt, angle = 90,
hjust = 1, vjust = 0.5),
axis.text.y = element_text(size = 12/.pt),
axis.title.y = element_text(size = 15/.pt)) +
labs(x="", y="Module Enrichment Score (normalized U Statistic)")
if (!is.null(vln_plt)) {
ggsave(paste0("TF_regulators/roc/", sample_name, "_", celltype,
"-TFregulators-VlnPlt_", Sys.Date(), ".png"),
plot = vln_plt, width = 180, height = 90, units = "mm", dpi = 300)
ggsave(paste0("TF_regulators/roc/", sample_name, "_", celltype,
"-TFregulators-VlnPlt_", Sys.Date(), ".svg"),
plot = vln_plt, width = 180, height = 90, units = "mm", dpi = 300)
}
ctrl_cells <- rownames(obj_named@meta.data[obj_named@meta.data$condition == "ctrl",])
ft_plt_ctrl <- FeaturePlot(obj_named, features = dat$gene[!is.na(dat$gene[1:4])],
cells=ctrl_cells, reduction = "umap.mnn",
label.size=1, ncol = 2, order = T,
label = TRUE, repel = TRUE) &
custom_theme() &
theme(text = element_text(size = 12/.pt),
plot.title = element_text(size = 15/.pt),
axis.text.x = element_text(size = 12/.pt),
axis.text.y = element_text(size = 12/.pt),
axis.title.x = element_text(size = 15/.pt),
axis.title.y = element_text(size = 15/.pt))
trt_cells <- rownames(obj_named@meta.data[obj_named@meta.data$condition == "trt",])
ft_plt_trt <- FeaturePlot(obj_named, features = dat$gene[!is.na(dat$gene[1:4])],
cells=trt_cells, reduction = "umap.mnn",
label.size=1, ncol = 2, order = T,
label = TRUE, repel = TRUE) &
custom_theme() &
theme(text = element_text(size = 12/.pt),
plot.title = element_text(size = 15/.pt),
axis.text.x = element_text(size = 12/.pt),
axis.text.y = element_text(size = 12/.pt),
axis.title.x = element_text(size = 15/.pt),
axis.title.y = element_text(size = 15/.pt))
ggsave(paste0("TF_regulators/roc/", sample_name, "_", celltype,
"-TFregulators-FeatPlt_", Sys.Date(), ".png"),
plot = ft_plt_ctrl | ft_plt_trt, width = 360, height = 180,
units = "mm", dpi = 300)
ggsave(paste0("TF_regulators/roc/", sample_name, "_", celltype,
"-TFregulators-FeatPlt_", Sys.Date(), ".svg"),
plot = ft_plt_ctrl | ft_plt_trt, width = 360, height = 180,
units = "mm", dpi = 300)
}
}
DefaultAssay(obj_named) <- "ATAC"
markers_rna_presto <- presto:::wilcoxauc.Seurat(X = obj_named,
group_by = 'cell_type_x_condition', assay = 'data', seurat_assay = 'RNA') # 'SCT'
markers_motifs_presto <- presto:::wilcoxauc.Seurat(X = obj_named,
group_by = 'cell_type_x_condition', assay = 'data', seurat_assay = 'chromvar')
motif_names <- markers_motifs_presto$feature
colnames(markers_rna_presto) <- paste0("RNA.", colnames(markers_rna_presto))
colnames(markers_motifs_presto) <- paste0("motif.", colnames(markers_motifs_presto))
markers_rna_presto$gene <- markers_rna_presto$RNA.feature
markers_motifs_presto$gene <- ConvertMotifID(obj_named, id = motif_names)
markers_motifs_presto$gene <- stringr::str_to_sentence(gsub(".H12CORE.*", "", markers_motifs_presto$gene))
dir.create(file.path(getwd(), "TF_regulators/presto"))
for (celltype in unique(obj_named$cell_type_x_condition)) {
dat <- as.data.table(head(topTFs(celltype,
rna_markers = markers_rna_presto,
motif_markers = markers_motifs_presto,
padj_cutoff = 1e-2, log2FC_cutoff = 0.5), 10))
if (nrow(dat) > 0) {
fwrite(dat, paste0("TF_regulators/presto/", sample_name, "_", celltype,
"_Top10-Putative-Regulators.csv"), sep=",")
vln_plt <- Stacked_VlnPlot(obj_named, assay = "RNA",
features = dat$gene[!is.na(dat$gene[1:4])],
alpha=0.25, group.by = "cell_type",
split.by = "condition", pt.size = 0.1,
color_seed = 99, split.plot=TRUE,
x_lab_rotate = 90) +
custom_theme() +
theme(plot.title = element_text(size = 15/.pt),
axis.text.x = element_text(size = 12/.pt, angle = 90,
hjust = 1, vjust = 0.5),
axis.text.y = element_text(size = 12/.pt),
axis.title.y = element_text(size = 15/.pt)) +
labs(x="", y="Module Enrichment Score (normalized U Statistic)")
if (!is.null(vln_plt)) {
ggsave(paste0("TF_regulators/presto/", sample_name, "_", celltype,
"-TFregulators-VlnPlt_", Sys.Date(), ".png"),
plot = vln_plt, width = 180, height = 90, units = "mm", dpi = 300)
ggsave(paste0("TF_regulators/presto/", sample_name, "_", celltype,
"-TFregulators-VlnPlt_", Sys.Date(), ".svg"),
plot = vln_plt, width = 180, height = 90, units = "mm", dpi = 300)
}
ctrl_cells <- rownames(obj_named@meta.data[obj_named@meta.data$condition == "ctrl",])
ft_plt_ctrl <- FeaturePlot(obj_named, features = dat$gene[!is.na(dat$gene[1:4])],
cells=ctrl_cells, reduction = "umap.mnn",
label.size=1, ncol = 2, order = T,
label = TRUE, repel = TRUE) &
custom_theme() &
theme(text = element_text(size = 12/.pt),
plot.title = element_text(size = 15/.pt),
axis.text.x = element_text(size = 12/.pt),
axis.text.y = element_text(size = 12/.pt),
axis.title.x = element_text(size = 15/.pt),
axis.title.y = element_text(size = 15/.pt))
trt_cells <- rownames(obj_named@meta.data[obj_named@meta.data$condition == "trt",])
ft_plt_trt <- FeaturePlot(obj_named, features = dat$gene[!is.na(dat$gene[1:4])],
cells=trt_cells, reduction = "umap.mnn",
label.size=1, ncol = 2, order = T,
label = TRUE, repel = TRUE) &
custom_theme() &
theme(text = element_text(size = 12/.pt),
plot.title = element_text(size = 15/.pt),
axis.text.x = element_text(size = 12/.pt),
axis.text.y = element_text(size = 12/.pt),
axis.title.x = element_text(size = 15/.pt),
axis.title.y = element_text(size = 15/.pt))
ggsave(paste0("TF_regulators/presto/", sample_name, "_", celltype,
"-TFregulators-FeatPlt_", Sys.Date(), ".png"),
plot = ft_plt_ctrl | ft_plt_trt, width = 360, height = 180,
units = "mm", dpi = 300)
ggsave(paste0("TF_regulators/presto/", sample_name, "_", celltype,
"-TFregulators-FeatPlt_", Sys.Date(), ".svg"),
plot = ft_plt_ctrl | ft_plt_trt, width = 360, height = 180,
units = "mm", dpi = 300)
}
}