Load packages
library(crumblr)
library(SingleCellExperiment)
library(zellkonverter)
library(dreamlet)
library(kableExtra)
library(scattermore)
library(cowplot)
library(ggplot2)
library(ggtree)
library(aplot)
library(tidyverse)
library(ggcorrplot)
library(DelayedArray)
library(RColorBrewer)
Load data
# read H5AD file
file = "/sc/arion/projects/CommonMind/hoffman/scRNAseq_data/Kfoury_CancerCell_2021/Kfoury_CancerCell_2021.h5ad"
sce = readH5AD(file, use_hdf5=TRUE, verbose=TRUE)
counts(sce) = assay(sce, "X")
sce$cells = factor(sce$cells, sort(levels(sce$cells)))
# create pseudobulk
pb <- aggregateToPseudoBulk(sce,
assay = "counts",
cluster_id = "cells",
sample_id = "ID")
UMAP embedding
colData(sce) %>%
data.frame() %>%
ggplot(aes(Coord1, Coord2, color=cells)) +
geom_scattermore() +
theme_classic() +
theme(aspect.ratio=1, axis.text=element_blank()) +
scale_color_discrete(name="Cell type")

Properties of dataset
df = data.frame(table(pb$Status))
colnames(df) = c("Disease status", "Count")
df %>%
kbl(row.names=FALSE) %>%
kable_classic(full_width = FALSE)
Disease status
|
Count
|
Benign
|
7
|
Distal
|
8
|
Involved
|
8
|
Tumor
|
9
|
df = list()
df$`# Samples` = ncol(pb)
df$`# Subjects` = nlevels(sce$subject.id)
df$`# Cells` = ncol(sce)
df = unlist(df)
df = data.frame(Propert=names(df), count = df)
df %>%
kbl(row.names=FALSE) %>%
kable_classic(full_width = FALSE)
Propert
|
count
|
# Samples
|
32
|
# Subjects
|
16
|
# Cells
|
76709
|
form = ~ subject.id + Status + subject.status
C = canCorPairs(form, colData(pb) )
ggcorrplot(C, hc.order = TRUE)

Plots of cell fractions
fracs = cellCounts(pb) / rowSums(cellCounts(pb))
figList = lapply(levels(pb$Status), function(lvl){
i = pb$Status == lvl
plotPercentBars(fracs[i,], col=ggColorHue(ncol(fracs))) +
ylab("Cell fractions") +
theme(legend.position = "none", axis.text.y=element_blank()) +
ggtitle(lvl)
})
plot_grid(plotlist = figList)

df = data.frame(fracs, diagnosis = pb$Status, check.names=FALSE)
df = reshape2::melt(df, id.vars="diagnosis")
ggplot(df, aes(diagnosis, value, fill=variable)) +
geom_violin() +
geom_boxplot(fill="grey50", width=.1) +
facet_wrap(~ variable) +
theme_classic() +
theme(aspect.ratio=1, legend.position="none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab(bquote(Fraction~(log[10]))) +
scale_y_log10()

df %>%
group_by(variable, diagnosis) %>%
mutate(diagnosis = recode(diagnosis, '0' = "Control", '1' = "Alzheimer's")) %>%
summarize(mean = mean(value)) %>%
pivot_wider(names_from=c('diagnosis'), values_from=c("mean")) %>%
kbl %>%
kable_styling(full_width = F)
variable
|
Benign
|
Distal
|
Involved
|
Tumor
|
CD4+ Naive
|
0.2177292
|
0.2145399
|
0.1585681
|
0.1005954
|
CD8+ Naive
|
0.0319700
|
0.0116995
|
0.0029211
|
0.0033353
|
CTL-1
|
0.0828786
|
0.0279128
|
0.0259314
|
0.0578042
|
CTL-2
|
0.0991289
|
0.0629982
|
0.0539045
|
0.0819523
|
Endothelial
|
0.0000942
|
0.0000449
|
0.0003057
|
0.0629532
|
Erythroid
|
0.0031734
|
0.0385307
|
0.0501333
|
0.0122791
|
Immature B cells
|
0.1087081
|
0.0041559
|
0.0044353
|
0.0022227
|
Mature B
|
0.1052535
|
0.0602596
|
0.0437274
|
0.0322325
|
mDC
|
0.0115375
|
0.0129284
|
0.0219190
|
0.0220848
|
memBcell
|
0.0064152
|
0.0222006
|
0.0139807
|
0.0319918
|
Mono1
|
0.0370914
|
0.0637082
|
0.0901730
|
0.0488815
|
Mono2
|
0.0163508
|
0.0957013
|
0.1029294
|
0.0079260
|
Mono3
|
0.0029651
|
0.0121092
|
0.0341568
|
0.0223334
|
Monocyte prog
|
0.0133041
|
0.0288313
|
0.0404555
|
0.0380992
|
NK
|
0.0327551
|
0.1059334
|
0.0934318
|
0.0333365
|
NKT
|
0.0327416
|
0.0872520
|
0.0642379
|
0.0314052
|
Osteoblasts
|
0.0007109
|
0.0003002
|
0.0009555
|
0.0528835
|
Osteoclasts
|
0.0000000
|
0.0001841
|
0.0002069
|
0.0108651
|
pDC
|
0.0130209
|
0.0064411
|
0.0125584
|
0.0099439
|
Pericytes
|
0.0000000
|
0.0000000
|
0.0000000
|
0.0449904
|
Pro-B
|
0.0289733
|
0.0051036
|
0.0035428
|
0.0022767
|
Progenitors
|
0.0575814
|
0.0658152
|
0.1075834
|
0.0507230
|
TAM
|
0.0003468
|
0.0005166
|
0.0032153
|
0.0456149
|
Th1/17
|
0.0839697
|
0.0508120
|
0.0366079
|
0.0656371
|
TIM
|
0.0002932
|
0.0104515
|
0.0150103
|
0.0821360
|
Treg Active
|
0.0058054
|
0.0034376
|
0.0028552
|
0.0072373
|
Treg Resting
|
0.0072015
|
0.0061415
|
0.0042586
|
0.0041857
|
Tumor
|
0.0000000
|
0.0019907
|
0.0119948
|
0.0340733
|
crumblr
Variance partitioning analysis
cobj = crumblr(cellCounts(pb))
form = ~ (1|subject.id) + (1|Status) + (1|subject.status)
vp.c = fitExtractVarPartModel(cobj, form, colData(pb))
plotVarPart(sortCols(vp.c), label.angle=60, ncol=4) + theme(aspect.ratio=1)

fig.vp = plotPercentBars( sortCols(vp.c) )
# analysis with dream()
form = ~ 0 + Status + (1|subject.id) #+ (1|subject.status)
keep = pb$subject.status == "prostate cancer patient"
# Specify contrasts to compare regression coefficients
# For example,
# Tumor_Involved = 'StatusTumor - StatusInvolved' tests if
# expression in Tumor is different that in Involved samples
contrasts = c(Tumor_Involved = 'StatusTumor - StatusInvolved',
Tumor_Distal = 'StatusTumor - StatusDistal',
Involved_Distal = 'StatusInvolved - StatusDistal')
L = makeContrastsDream(form, colData(pb)[keep,], contrasts=contrasts)
fit = dream( cobj[,keep], form, colData(pb)[keep,], L=L)
fit = eBayes(fit)
Multivariate test along hierarchy
coef = "Tumor_Involved"
hc = buildClusterTreeFromPB(pb)
res = treeTest( fit, cobj, hc, coef=coef)
cols = c(brewer.pal(3, "Set1"), "grey85")
fig1 = plotTreeTestBeta(res) + xlim(0, 15) +
theme(legend.position="bottom", legend.box = "vertical")
fig2 = crumblr::plotForest(res, hide=TRUE)
fig3 = plotPercentBars( sortCols(vp.c), col=cols)
# combine plots
fig2 %>% insert_left(fig1) %>% insert_right(fig3)

Plot of pericytes
library(ggbeeswarm)
library(ggrepel)
CT = "Pericytes"
df = data.frame(CLR = cobj$E[CT,],
DiseaseStatus = pb$Status,
se = 1/sqrt(cobj$weights[CT,]),
counts = cellCounts(pb)[,CT],
totalCells = rowSums(cellCounts(pb)))
# points to highlight
df_count1 = df %>%
filter(counts == 0) %>%
arrange(-se) %>%
head(2)
df_count2 = df %>%
arrange(se, -counts) %>%
head(2)
df_count = rbind(df_count1, df_count2)
df %>%
ggplot(aes(DiseaseStatus, CLR, color=se)) +
geom_boxplot(width=.3) +
geom_beeswarm(cex = 2, size=3) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1, axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
ggtitle(CT) +
scale_color_gradient2(low="grey", high="red", name = "Standard error") +
geom_text_repel(data=df_count, aes(DiseaseStatus, CLR, label=paste(counts, totalCells, sep=' / ')), color="black", box.padding=2)
