library(data.table)
library(SingleCellExperiment)
library(zellkonverter)
library(stringr)
library(GSEABase)
library(dreamlet)
library(scater)
library(zenith)
library(knitr)
library(kableExtra)
library(scattermore)
library(cowplot)
library(ggplot2)
library(qvalue)
library(tidyverse)
library(RColorBrewer)
library(BiocParallel)
library(DelayedArray)
# read H5AD file
file = "/sc/arion/projects/CommonMind/hoffman/scRNAseq_data/Kfoury_CancerCell_2021/Kfoury_CancerCell_2021.h5ad"
sce = readH5AD(file, use_hdf5=TRUE)
sce$cells = factor(sce$cells, sort(levels(sce$cells)))
# create pseudobulk
pb <- aggregateToPseudoBulk(sce,
assay = "counts",
cluster_id = "cells",
sample_id = "ID")
colData(sce) %>%
data.frame() %>%
ggplot(aes(Coord1, Coord2, color=cells)) +
geom_scattermore() +
theme_classic() +
theme(aspect.ratio=1, axis.text=element_blank()) +
scale_color_discrete(name="Cell type")
df = data.frame(table(pb$Status))
colnames(df) = c("Disease status", "Count")
df %>%
kbl(row.names=FALSE) %>%
kable_classic(full_width = FALSE)
Disease status | Count |
---|---|
Benign | 7 |
Distal | 8 |
Involved | 8 |
Tumor | 9 |
df = list()
df$`# Samples` = ncol(pb)
df$`# Subjects` = nlevels(sce$subject.id)
df$`# Cells` = ncol(sce)
df = unlist(df)
df = data.frame(Propert=names(df), count = df)
df %>%
kbl(row.names=FALSE) %>%
kable_classic(full_width = FALSE)
Propert | count |
---|---|
# Samples | 32 |
# Subjects | 16 |
# Cells | 76709 |
types = c("Benign", "Distal", "Involved", "Tumor")
pb$Status = factor(pb$Status, types)
# Normalize and apply voom/voomWithDreamWeights
form = ~ (1|subject.id) + (1|Status) + (1|subject.status)
res.proc = processAssays( pb,
form,
min.cells = 10,
min.counts = 10,
BPPARAM = SnowParam(6))
details(res.proc)
## assay n_retain
## 1 CD4+ Naive 31
## 2 CD8+ Naive 14
## 3 CTL-1 28
## 4 CTL-2 31
## 5 Endothelial 6
## 6 Erythroid 18
## 7 Immature B cells 14
## 8 Mature B 31
## 9 mDC 27
## 10 memBcell 21
## 11 Mono1 29
## 12 Mono2 26
## 13 Mono3 25
## 14 Monocyte prog 24
## 15 NK 30
## 16 NKT 29
## 17 Osteoblasts 7
## 18 pDC 20
## 19 Pericytes 6
## 20 Pro-B 12
## 21 Progenitors 32
## 22 TAM 9
## 23 Th1/17 29
## 24 TIM 14
## 25 Treg Active 13
## 26 Treg Resting 17
## 27 Tumor 11
## formula formDropsTerms
## 1 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 2 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 3 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 4 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 5 ~1 TRUE
## 6 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 7 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 8 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 9 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 10 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 11 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 12 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 13 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 14 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 15 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 16 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 17 ~1 TRUE
## 18 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 19 ~1 TRUE
## 20 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 21 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 22 ~(1 | subject.id) + (1 | Status) TRUE
## 23 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 24 ~(1 | subject.id) + (1 | Status) TRUE
## 25 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 26 ~(1 | subject.id) + (1 | Status) + (1 | subject.status) FALSE
## 27 ~(1 | subject.id) + (1 | Status) TRUE
## n_genes n_errors error_initial
## 1 7965 0 FALSE
## 2 1759 0 FALSE
## 3 3424 0 FALSE
## 4 5371 0 FALSE
## 5 1787 0 FALSE
## 6 7406 1 FALSE
## 7 1314 0 FALSE
## 8 4302 0 FALSE
## 9 3839 0 FALSE
## 10 2937 0 FALSE
## 11 4740 0 FALSE
## 12 3707 0 FALSE
## 13 1899 0 FALSE
## 14 3524 0 FALSE
## 15 4730 0 FALSE
## 16 3899 0 FALSE
## 17 2232 0 FALSE
## 18 2517 0 FALSE
## 19 905 0 FALSE
## 20 2102 0 FALSE
## 21 7825 0 FALSE
## 22 1553 0 FALSE
## 23 4299 0 FALSE
## 24 2402 0 FALSE
## 25 579 3 FALSE
## 26 869 0 FALSE
## 27 2655 0 FALSE
# show voom plot for each cell clusters
plotVoom( res.proc, ncol=4)
# run variance partitioning analysis
form = ~ (1|subject.id) + (1|Status) + (1|subject.status)
vp.lst = fitVarPart( res.proc, form, BPPARAM = SnowParam(6, progressbar=TRUE))
# Summarize variance fractions genome-wide for each cell type
plotVarPart(sortCols(vp.lst), label.angle=60, ncol=4)
# Differential expression analysis for Status within each cell type,
# evaluated on the voom normalized data
form = ~ 0 + Status + (1|subject.id)
# Specify contrasts to compare regression coefficients
# For example,
# Tumor_Involved = 'StatusTumor - StatusInvolved' tests if
# expression in Tumor is different that in Involved samples
contrasts = c(Tumor_Involved = 'StatusTumor - StatusInvolved',
Tumor_Distal = 'StatusTumor - StatusDistal',
Involved_Distal = 'StatusInvolved - StatusDistal')
# dreamlet analysis
res.dl = dreamlet(res.proc,
form,
contrasts=contrasts,
BPPARAM = SnowParam(6, progressbar=TRUE))
plotVolcano( res.dl, coef = 'Tumor_Involved', ncol=4)
plotVolcano( res.dl, coef = 'Tumor_Distal', ncol=4)
plotVolcano( res.dl, coef = 'Involved_Distal', ncol=4)
file = "./topTable_Kfoury_2021.tsv"
names(contrasts) %>%
map_df(function(x)
topTable(res.dl, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = x)) %>%
write.table(file, quote=FALSE, sep='\t', row.names=FALSE)
# Summarize differential expression for each coef and assay
df = names(contrasts) %>%
map_df(function(x)
topTable(res.dl, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = x)) %>%
group_by(coef,assay) %>%
summarize( nDE = sum(adj.P.Val < 0.05),
pi1 = 1 - pi0est(P.Value)$pi0,
nGenes = length(adj.P.Val)) %>%
mutate(assay = factor(assay, assayNames(pb)))
ymax = 1.05*max(df$nGenes)
fig1 = ggplot(df, aes(nGenes, assay, fill=assay)) +
geom_bar(stat="identity") +
theme_classic() +
theme(aspect.ratio=1, legend.position="none") +
scale_x_continuous(limits=c(0,ymax), expand=c(0,0)) +
xlab("# genes expressed") +
ylab("Cell type") +
facet_wrap(~ coef)
ymax = 1.05*max(df$nDE)
fig2 = ggplot(df, aes(nDE, assay, fill=assay)) +
geom_bar(stat="identity") +
theme_classic() +
theme(aspect.ratio=1, legend.position="none") +
scale_x_continuous(limits=c(0,ymax), expand=c(0,0)) +
xlab("# genes with FDR < 5%") +
ylab("Cell type") +
facet_wrap(~ coef)
fig3 = ggplot(df, aes(pi1, assay, fill=assay)) +
geom_bar(stat="identity") +
theme_classic() +
theme(aspect.ratio=1, legend.position="none") +
scale_x_continuous(limits=c(0,1), expand=c(0,0)) +
xlab(bquote(pi[1])) +
ylab("Cell type") +
facet_wrap(~ coef)
plot_grid(fig1, fig2, fig3, labels=LETTERS[1:3], nrow=3, axis="tblr", align="hv")
# Load Gene Ontology database
go.gs = get_GeneOntology(to="SYMBOL")
# Run zenith gene set analysis on result of dreamlet
res_zenith.Tumor_Involved = zenith_gsa(res.dl, coef = 'Tumor_Involved', go.gs)
res_zenith.Tumor_Distal = zenith_gsa(res.dl, coef = 'Tumor_Distal', go.gs)
res_zenith.Involved_Distal = zenith_gsa(res.dl, coef = 'Involved_Distal', go.gs)
plotZenithResults(res_zenith.Tumor_Involved, 5, 3)
plotZenithResults(res_zenith.Tumor_Distal, 5, 3)
plotZenithResults(res_zenith.Involved_Distal, 5, 3)
Tumor vs Involved Monocytes show increase MHC class Involved_Distal and interferon signalling Increased translation in tumor inflammatory monocytes (TIMs) Erythroid show increased antigen presentation B-cell progenentors show icnrease positive regulation of cell growth
CTL exhaustion, loss of mitochrondrial translation
CT = "Mono3"
fig.embed = colData(sce) %>%
data.frame() %>%
ggplot(aes(Coord1, Coord2, color=ifelse(cells==CT, CT, ''))) +
geom_scattermore() +
theme_classic() +
scale_color_manual(name='', values=c("grey", "orange")) +
theme(aspect.ratio=1,
axis.text=element_blank(),
axis.ticks=element_blank(),
legend.position="bottom") +
xlab("Coord 1") +
ylab("Coord 2")
thm = theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), aspect.ratio=1)
col = c(brewer.pal(n=3, "Set1"), "grey85")
fig.voom = plotVoom( res.proc[[CT]]) + theme_classic() + thm
fig.vp = plotVarPart( sortCols(vp.lst), label.angle=20, assays=CT, col=col) + thm
genes = c('CD52', 'IDH3G', "CCNL1", 'NAMPT', 'CHMP1B', 'IFNAR1', 'VIM', 'UFC1', 'NOTCH2')
fig.percent = plotPercentBars( sortCols(vp.lst), assays = CT, genes = genes, col=col) + theme(aspect.ratio=1, legend.position="bottom")
fig.vol = plotVolcano( res.dl[[CT]], coef = 'Tumor_Involved') + ggtitle("Tumor vs Involved")
data = extractData(res.proc, CT)
fig.strat0 = plotStratify( CCNL1 ~ Status, data, legend=FALSE, x.labels=TRUE, sort=FALSE) +
theme(aspect.ratio=2,
plot.title = element_text(hjust = 0.5),
axis.text.x=element_text(hjust=1, vjust=1, angle=30)) +
ggtitle('CCNL1') +
ylab(bquote(log[2]~CPM)) +
xlab('')
fig.strat1 = plotStratify( VIM ~ Status, data, legend=FALSE, x.labels=TRUE, sort=FALSE) +
theme(aspect.ratio=2,
plot.title = element_text(hjust = 0.5),
axis.text.x=element_text(hjust=1, vjust=1, angle=30)) +
ggtitle('VIM') +
ylab(bquote(log[2]~CPM)) +
xlab('')
fig.strat2 = plotStratify( CD52 ~ subject.id, data, legend=FALSE, x.labels=TRUE) +
theme(aspect.ratio=1,
plot.title = element_text(hjust = 0.5),
axis.text.x=element_text(hjust=1, vjust=1, angle=70)) +
ggtitle('CD52') +
ylab(bquote(log[2]~CPM)) +
xlab('')
plot_grid(fig.strat0, fig.strat1, fig.strat2,
nrow=1, ncol=3, labels=LETTERS[1:10], align="hv", axis="t")
gs = c('GO0022624', 'GO1905369', 'GO0007186', 'GO0050920', 'GO0042613', 'GO0044183', 'GO1903561', 'GO0023026', 'GO0042026')
res_zenith = res_zenith.Tumor_Involved
idx = grep(paste0(gs, collapse="|"), res_zenith$Geneset)
res_zenith$assay = factor(res_zenith$assay)
fig.zenith = plotZenithResults(res_zenith[idx,], 5, 3, transpose=FALSE) + theme(axis.text.x=element_text(size=7), legend.position="right",legend.key.size = unit(.4, 'cm'))
lst = list(fig.embed, fig.voom, fig.vp, fig.percent, fig.strat0, fig.strat1, fig.vol)
plot_grid(plotlist=lst, nrow=2, ncol=4, labels=LETTERS[1:10], align="hv", axis="t")
# variance partition results
vp.lst %>%
as_tibble %>%
filter(assay == "Mono3") %>%
summarize(count = sum(Status > 0.1), ngenes = length(Status))
## # A tibble: 1 × 2
## count ngenes
## <int> <int>
## 1 525 1899
# count DE genes
names(contrasts) %>%
map_df(function(x)
topTable(res.dl, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = x)) %>%
filter(assay == "Mono3") %>%
group_by(coef) %>%
summarize(count = sum(adj.P.Val < 0.05), ngenes = length(adj.P.Val))
## # A tibble: 3 × 3
## coef count ngenes
## <chr> <int> <int>
## 1 Involved_Distal 0 1899
## 2 Tumor_Distal 105 1899
## 3 Tumor_Involved 157 1899
res_zenith = res_zenith.Tumor_Involved
idx = grep(paste0(gs, collapse="|"), res_zenith$Geneset)
res_zenith$assay = factor(res_zenith$assay)
fig.zenith1 = plotZenithResults(res_zenith[idx,], 5, 3, sortByGeneset=FALSE) +
theme(axis.text.x=element_text(size=7, angle=90),
legend.position="right",
plot.title = element_text(hjust = 0.5),
legend.key.size = unit(.4, 'cm'))+
ggtitle('Tumor_Involved')
res_zenith = res_zenith.Tumor_Distal
idx = grep(paste0(gs, collapse="|"), res_zenith$Geneset)
res_zenith$assay = factor(res_zenith$assay)
fig.zenith2 = plotZenithResults(res_zenith[idx,], 5, 3, sortByGeneset=FALSE) +
theme(axis.text.x=element_text(size=7, angle=90),
legend.position="right",
plot.title = element_text(hjust = 0.5),
legend.key.size = unit(.4, 'cm')) +
ggtitle('Tumor_Distal')
plot_grid(fig.zenith1, fig.zenith2, ncol=1)
gs1 = "GO0044183: protein folding chaperone"
plotGeneHeatmap( res.dl, coef="Tumor_Involved", genes=sort(geneIds(go.gs[[gs1]])), transpose=TRUE) +
ggtitle(gs1) +
theme(legend.position = "bottom",
axis.text.x=element_text(size=10, angle=60),
axis.text.y=element_text(size=10)) +
xlab('') + ylab('')
gs1 = "GO0044183: protein folding chaperone"
plotGeneHeatmap( res.dl, coef="Tumor_Involved", genes=sort(geneIds(go.gs[[gs1]])), assay=CT, transpose=TRUE) +
ggtitle(gs1) +
theme(legend.position = "bottom",
axis.text.x=element_text(size=10, angle=45),
axis.text.y=element_text(size=10)) +
xlab('') + ylab('')
sessionInfo()
## R version 4.3.3 (2024-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-centos7/oneAPI/p_2024.1.0.560/toolkits/mkl/2024.1/lib/libmkl_gf_lp64.so.2; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] DelayedArray_0.28.0 SparseArray_1.2.4
## [3] S4Arrays_1.2.1 abind_1.4-5
## [5] Matrix_1.6-5 RColorBrewer_1.1-3
## [7] lubridate_1.9.3 forcats_1.0.0
## [9] dplyr_1.1.4 purrr_1.0.2
## [11] readr_2.1.5 tidyr_1.3.1
## [13] tibble_3.2.1 tidyverse_2.0.0
## [15] qvalue_2.34.0 cowplot_1.1.3
## [17] scattermore_1.2 kableExtra_1.4.0
## [19] knitr_1.45 zenith_1.4.2
## [21] scater_1.30.1 scuttle_1.12.0
## [23] dreamlet_1.1.23 variancePartition_1.33.15
## [25] BiocParallel_1.36.0 limma_3.58.1
## [27] ggplot2_3.5.1 GSEABase_1.64.0
## [29] graph_1.80.0 annotate_1.80.0
## [31] XML_3.99-0.16.1 AnnotationDbi_1.64.1
## [33] stringr_1.5.1 zellkonverter_1.13.3
## [35] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
## [37] Biobase_2.62.0 GenomicRanges_1.54.1
## [39] GenomeInfoDb_1.38.8 IRanges_2.36.0
## [41] S4Vectors_0.40.2 BiocGenerics_0.48.1
## [43] MatrixGenerics_1.14.0 matrixStats_1.3.0
## [45] data.table_1.15.4
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 httr_1.4.7
## [3] Rgraphviz_2.46.0 numDeriv_2016.8-1.1
## [5] tools_4.3.3 backports_1.4.1
## [7] utf8_1.2.4 R6_2.5.1
## [9] metafor_4.6-0 HDF5Array_1.30.1
## [11] rhdf5filters_1.14.1 withr_3.0.0
## [13] prettyunits_1.2.0 gridExtra_2.3
## [15] cli_3.6.2 labeling_0.4.3
## [17] sass_0.4.9 KEGGgraph_1.62.0
## [19] SQUAREM_2021.1 mvtnorm_1.2-4
## [21] mixsqp_0.3-54 systemfonts_1.1.0
## [23] svglite_2.1.3 invgamma_1.1
## [25] rstudioapi_0.16.0 RSQLite_2.3.6
## [27] generics_0.1.3 gtools_3.9.5
## [29] metadat_1.2-0 ggbeeswarm_0.7.2
## [31] fansi_1.0.6 lifecycle_1.0.4
## [33] yaml_2.3.8 edgeR_4.0.16
## [35] mathjaxr_1.6-0 BiocFileCache_2.10.2
## [37] gplots_3.1.3.1 rhdf5_2.46.1
## [39] grid_4.3.3 blob_1.2.4
## [41] crayon_1.5.2 dir.expiry_1.10.0
## [43] lattice_0.22-5 beachmat_2.18.1
## [45] msigdbr_7.5.1 KEGGREST_1.42.0
## [47] pillar_1.9.0 boot_1.3-29
## [49] corpcor_1.6.10 codetools_0.2-19
## [51] glue_1.7.0 vctrs_0.6.5
## [53] png_0.1-8 Rdpack_2.6
## [55] gtable_0.3.5 assertthat_0.2.1
## [57] cachem_1.1.0 xfun_0.43
## [59] rbibutils_2.2.16 Rfast_2.1.0
## [61] iterators_1.0.14 statmod_1.5.0
## [63] nlme_3.1-164 pbkrtest_0.5.2
## [65] bit64_4.0.5 progress_1.2.3
## [67] EnvStats_2.8.1 filelock_1.0.3
## [69] bslib_0.7.0 irlba_2.3.5.1
## [71] vipor_0.4.7 KernSmooth_2.23-22
## [73] colorspace_2.1-0 rmeta_3.0
## [75] DBI_1.2.2 tidyselect_1.2.1
## [77] curl_5.2.1 bit_4.0.5
## [79] compiler_4.3.3 BiocNeighbors_1.20.2
## [81] basilisk.utils_1.14.1 xml2_1.3.6
## [83] scales_1.3.0 caTools_1.18.2
## [85] remaCor_0.0.18 digest_0.6.35
## [87] minqa_1.2.6 rmarkdown_2.26
## [89] basilisk_1.14.3 aod_1.3.3
## [91] XVector_0.42.0 RhpcBLASctl_0.23-42
## [93] htmltools_0.5.8.1 pkgconfig_2.0.3
## [95] lme4_1.1-35.2 sparseMatrixStats_1.14.0
## [97] dbplyr_2.5.0 highr_0.10
## [99] mashr_0.2.79 fastmap_1.2.0
## [101] rlang_1.1.3 DelayedMatrixStats_1.24.0
## [103] farver_2.1.2 jquerylib_0.1.4
## [105] jsonlite_1.8.8 BiocSingular_1.18.0
## [107] RCurl_1.98-1.14 magrittr_2.0.3
## [109] GenomeInfoDbData_1.2.11 Rhdf5lib_1.24.2
## [111] munsell_0.5.1 Rcpp_1.0.12
## [113] babelgene_22.9 viridis_0.6.5
## [115] reticulate_1.36.1 EnrichmentBrowser_2.32.0
## [117] RcppZiggurat_0.1.6 stringi_1.8.4
## [119] zlibbioc_1.48.2 MASS_7.3-60.0.1
## [121] plyr_1.8.9 parallel_4.3.3
## [123] ggrepel_0.9.5 Biostrings_2.70.3
## [125] splines_4.3.3 hms_1.1.3
## [127] locfit_1.5-9.9 reshape2_1.4.4
## [129] ScaledMatrix_1.10.0 evaluate_0.23
## [131] RcppParallel_5.1.7 nloptr_2.0.3
## [133] tzdb_0.4.0 ashr_2.2-63
## [135] rsvd_1.0.5 broom_1.0.6
## [137] xtable_1.8-4 fANCOVA_0.6-1
## [139] viridisLite_0.4.2 snow_0.4-4
## [141] truncnorm_1.0-9 lmerTest_3.1-3
## [143] memoise_2.0.1 beeswarm_0.4.0
## [145] timechange_0.3.0