Load packages

library(crumblr)
library(SingleCellExperiment)
library(zellkonverter)
library(dreamlet)
library(kableExtra)
library(scattermore)
library(cowplot)
library(ggplot2)
library(ggtree)
library(aplot)
library(tidyverse)
library(ggcorrplot)
library(DelayedArray)
library(RColorBrewer)

Load data

# read H5AD file
file = "/sc/arion/projects/CommonMind/hoffman/scRNAseq_data/Kfoury_CancerCell_2021/Kfoury_CancerCell_2021.h5ad"
sce = readH5AD(file, use_hdf5=TRUE, verbose=TRUE) 
counts(sce) = assay(sce, "X")  

sce$cells = factor(sce$cells, sort(levels(sce$cells)))

# create pseudobulk 
pb <- aggregateToPseudoBulk(sce,
    assay = "counts",     
    cluster_id = "cells",  
    sample_id = "ID")

UMAP embedding

colData(sce) %>%
  data.frame() %>%
  ggplot(aes(Coord1, Coord2, color=cells)) + 
  geom_scattermore() +
  theme_classic() + 
  theme(aspect.ratio=1, axis.text=element_blank()) +
  scale_color_discrete(name="Cell type")

Properties of dataset

df = data.frame(table(pb$Status))
colnames(df) = c("Disease status", "Count")

df %>%
  kbl(row.names=FALSE) %>% 
  kable_classic(full_width = FALSE) 
Disease status Count
Benign 7
Distal 8
Involved 8
Tumor 9
df = list()
df$`# Samples` = ncol(pb)
df$`# Subjects` = nlevels(sce$subject.id)
df$`# Cells` = ncol(sce)
df = unlist(df)
df = data.frame(Propert=names(df), count = df)

df %>%  
  kbl(row.names=FALSE) %>% 
  kable_classic(full_width = FALSE) 
Propert count
# Samples 32
# Subjects 16
# Cells 76709
form = ~ subject.id + Status + subject.status 
C = canCorPairs(form, colData(pb) )
ggcorrplot(C, hc.order = TRUE)

Plots of cell fractions

fracs = cellCounts(pb) / rowSums(cellCounts(pb))

figList = lapply(levels(pb$Status), function(lvl){
  i = pb$Status == lvl
  plotPercentBars(fracs[i,], col=ggColorHue(ncol(fracs))) + 
    ylab("Cell fractions") + 
    theme(legend.position = "none", axis.text.y=element_blank()) +
    ggtitle(lvl)
})

plot_grid(plotlist = figList)

df = data.frame(fracs, diagnosis = pb$Status, check.names=FALSE)
df = reshape2::melt(df, id.vars="diagnosis")
 
ggplot(df, aes(diagnosis, value, fill=variable)) + 
  geom_violin() +
  geom_boxplot(fill="grey50", width=.1) + 
  facet_wrap(~ variable) +
  theme_classic() +
  theme(aspect.ratio=1, legend.position="none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ylab(bquote(Fraction~(log[10]))) +
  scale_y_log10() 

df %>%
  group_by(variable, diagnosis) %>%
  mutate(diagnosis = recode(diagnosis, '0' = "Control", '1' = "Alzheimer's"))  %>% 
  summarize(mean = mean(value)) %>%
  pivot_wider(names_from=c('diagnosis'), values_from=c("mean")) %>%
  kbl %>%
  kable_styling(full_width = F)
variable Benign Distal Involved Tumor
CD4+ Naive 0.2177292 0.2145399 0.1585681 0.1005954
CD8+ Naive 0.0319700 0.0116995 0.0029211 0.0033353
CTL-1 0.0828786 0.0279128 0.0259314 0.0578042
CTL-2 0.0991289 0.0629982 0.0539045 0.0819523
Endothelial 0.0000942 0.0000449 0.0003057 0.0629532
Erythroid 0.0031734 0.0385307 0.0501333 0.0122791
Immature B cells 0.1087081 0.0041559 0.0044353 0.0022227
Mature B 0.1052535 0.0602596 0.0437274 0.0322325
mDC 0.0115375 0.0129284 0.0219190 0.0220848
memBcell 0.0064152 0.0222006 0.0139807 0.0319918
Mono1 0.0370914 0.0637082 0.0901730 0.0488815
Mono2 0.0163508 0.0957013 0.1029294 0.0079260
Mono3 0.0029651 0.0121092 0.0341568 0.0223334
Monocyte prog 0.0133041 0.0288313 0.0404555 0.0380992
NK 0.0327551 0.1059334 0.0934318 0.0333365
NKT 0.0327416 0.0872520 0.0642379 0.0314052
Osteoblasts 0.0007109 0.0003002 0.0009555 0.0528835
Osteoclasts 0.0000000 0.0001841 0.0002069 0.0108651
pDC 0.0130209 0.0064411 0.0125584 0.0099439
Pericytes 0.0000000 0.0000000 0.0000000 0.0449904
Pro-B 0.0289733 0.0051036 0.0035428 0.0022767
Progenitors 0.0575814 0.0658152 0.1075834 0.0507230
TAM 0.0003468 0.0005166 0.0032153 0.0456149
Th1/17 0.0839697 0.0508120 0.0366079 0.0656371
TIM 0.0002932 0.0104515 0.0150103 0.0821360
Treg Active 0.0058054 0.0034376 0.0028552 0.0072373
Treg Resting 0.0072015 0.0061415 0.0042586 0.0041857
Tumor 0.0000000 0.0019907 0.0119948 0.0340733

crumblr

Variance partitioning analysis

cobj = crumblr(cellCounts(pb))

form = ~ (1|subject.id) + (1|Status) + (1|subject.status) 

vp.c = fitExtractVarPartModel(cobj, form, colData(pb))

plotVarPart(sortCols(vp.c), label.angle=60, ncol=4) + theme(aspect.ratio=1)

fig.vp = plotPercentBars( sortCols(vp.c) )

# analysis with dream()
form = ~ 0 + Status + (1|subject.id) #+ (1|subject.status)

keep = pb$subject.status == "prostate cancer patient"

# Specify contrasts to compare regression coefficients
# For example, 
# Tumor_Involved = 'StatusTumor - StatusInvolved' tests if 
# expression in Tumor is different that in Involved samples
contrasts = c(Tumor_Involved  = 'StatusTumor - StatusInvolved',
              Tumor_Distal    = 'StatusTumor - StatusDistal',
              Involved_Distal = 'StatusInvolved - StatusDistal')

L = makeContrastsDream(form, colData(pb)[keep,], contrasts=contrasts)

fit = dream( cobj[,keep], form, colData(pb)[keep,], L=L)
fit = eBayes(fit)

Multivariate test along hierarchy

coef = "Tumor_Involved"
hc = buildClusterTreeFromPB(pb)
 
res = treeTest( fit, cobj, hc, coef=coef)
cols = c(brewer.pal(3, "Set1"), "grey85")

fig1 = plotTreeTestBeta(res) + xlim(0, 15) +
   theme(legend.position="bottom", legend.box = "vertical") 
fig2 = crumblr::plotForest(res, hide=TRUE) 
fig3 = plotPercentBars( sortCols(vp.c), col=cols) 

# combine plots
fig2 %>% insert_left(fig1) %>% insert_right(fig3)

Plot of pericytes

library(ggbeeswarm)
library(ggrepel)

CT = "Pericytes"
df = data.frame(CLR = cobj$E[CT,],
                DiseaseStatus = pb$Status,
                se = 1/sqrt(cobj$weights[CT,]),
                counts = cellCounts(pb)[,CT],
                totalCells = rowSums(cellCounts(pb)))

# points to highlight
df_count1 = df %>%
  filter(counts == 0) %>%
  arrange(-se) %>% 
  head(2) 

df_count2 = df %>%
  arrange(se, -counts) %>% 
  head(2) 

df_count = rbind(df_count1, df_count2)

df %>%
  ggplot(aes(DiseaseStatus, CLR, color=se)) +
    geom_boxplot(width=.3) +
    geom_beeswarm(cex = 2, size=3) + 
    theme_classic() +
    theme(plot.title = element_text(hjust = 0.5), aspect.ratio=1, axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
    ggtitle(CT) +
    scale_color_gradient2(low="grey", high="red", name = "Standard error") +
    geom_text_repel(data=df_count, aes(DiseaseStatus, CLR, label=paste(counts, totalCells, sep=' / ')), color="black", box.padding=2)