Variance partitioning results
plotVarPart(sortCols(res.vp), label.angle=60, ncol=4)

plotVolcano(fit, coef="SourceCOVID_MILD", ncol=4)

coef_array = c("SourceCOVID_HCW_MILD", "SourceCOVID_MILD", "SourceCOVID_SEV", "SourceCOVID_CRIT", "SourceSepsis")
df = coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = factor(x, coef_array))) %>%
mutate(se = logFC / t) %>%
group_by(coef,assay)
genes = c("IFI27", "PPARG", "ZNF217", "PIM3")
df %>%
filter(ID %in% genes) %>%
filter(assay %in% c("cMono", 'ncMono')) %>%
ggplot(aes(logFC, coef, color=coef)) +
geom_point() +
geom_errorbar(aes(xmin = logFC - 1.96*se, xmax = logFC + 1.96*se), width=0) +
theme_classic() +
theme(aspect.ratio=1) +
facet_grid(ID ~assay)
genes = df %>%
filter(assay %in% c("cMono", 'ncMono')) %>%
arrange(-abs(logFC)) %>%
ungroup %>%
select(ID) %>%
distinct %>%
head(100) %>%
c
tab = res.gsa %>%
as_tibble %>%
mutate(coef = factor(coef, coef_array)) %>%
filter(assay %in% c("cMono", 'ncMono')) %>%
arrange(PValue)
gs = "interferon|interlukin|activiation|inflamm|extracell|chemokin"
tab2 = tab %>%
filter(grepl(gs, Geneset)) %>%
filter(assay == "cMono") %>%
mutate(tstat = delta/ se)
nrow = length(unique(tab2$coef))
ncol = length(unique(tab2$Geneset))
zmax = max(abs(tab2$tstat), na.rm=TRUE)
fig = tab2 %>%
ggplot(aes(coef, Geneset, fill=tstat, label=ifelse(FDR < 0.05, '*', ''))) +
geom_tile() +
theme_classic() +
scale_fill_gradient2("t-statistic", low="blue", mid="white", high="red", limits=c(-zmax, zmax)) +
theme(aspect.ratio=ncol/nrow, axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), plot.title = element_text(hjust = 0.5)) +
geom_text(vjust=1, hjust=0.5)
fig
# this showed enrichment for type I and II interferon pathways in the less severe hospitalized COVID-19 patients across cell types.
gs = c("GO0002544: chronic inflammatory response",
"GO0031012: extracellular matrix",
"GO0035456: response to interferon−beta",
"GO0071346: cellular response to type II interferon")
genes = geneIds(go.gs[[gs[4]]])
tab3 = df %>%
filter(assay == c( "ncMono"), ID %in% genes)
nrow = length(unique(tab3$coef))
ncol = length(unique(tab3$ID))
zmax = max(abs(tab3$logFC), na.rm=TRUE)
fig = tab3 %>%
ggplot(aes(coef, ID, fill=logFC, label=ifelse(adj.P.Val < 0.05, '*', ''))) +
geom_tile() +
theme_classic() +
scale_fill_gradient2("logFC", low="blue", mid="white", high="red", limits=c(-zmax, zmax)) +
theme(aspect.ratio=ncol/nrow, axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), plot.title = element_text(hjust = 0.5)) +
geom_text(vjust=1, hjust=0.5) +
facet_wrap(~ assay)
ggsave(fig, file="~/www/test.pdf", width=15, height=18)
Summarize differential expression
file = "./topTable_COVID_2022.tsv"
coef_array = c("SourceCOVID_HCW_MILD", "SourceCOVID_MILD", "SourceCOVID_SEV", "SourceCOVID_CRIT", "SourceSepsis")
coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = x)) %>%
write.table(file, quote=FALSE, sep='\t', row.names=FALSE)
# Summarize differential expression for each coef and assay
df = coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = factor(x, coef_array))) %>%
group_by(coef,assay) %>%
summarize( nDE = sum(adj.P.Val < 0.05),
pi1 = 1 - pi0est(P.Value)$pi0,
nGenes = length(adj.P.Val)) %>%
mutate(assay = factor(assay, assayNames(pb)))
ymax = 1.05*max(df$nGenes)
fig1 = ggplot(df, aes(nGenes, assay, fill=assay)) +
geom_bar(stat="identity") +
theme_classic() +
theme(aspect.ratio=3, legend.position="none") +
scale_x_continuous(limits=c(0,ymax), expand=c(0,0)) +
xlab("# genes expressed") +
ylab("Cell type") +
facet_wrap(~ coef, nrow=1)
ymax = 1.05*max(df$nDE)
fig2 = ggplot(df, aes(nDE, assay, fill=assay)) +
geom_bar(stat="identity") +
theme_classic() +
theme(aspect.ratio=3, legend.position="none") +
scale_x_continuous(limits=c(0,ymax), expand=c(0,0)) +
xlab("# genes with FDR < 5%") +
ylab("Cell type") +
facet_wrap(~ coef, nrow=1)
fig3 = ggplot(df, aes(pi1, assay, fill=assay)) +
geom_bar(stat="identity") +
theme_classic() +
theme(aspect.ratio=3, legend.position="none") +
scale_x_continuous(limits=c(0,1), expand=c(0,0)) +
xlab(bquote(pi[1])) +
ylab("Cell type") +
facet_wrap(~ coef, nrow=1)
plot_grid(fig1, fig2, fig3, labels=LETTERS[1:3], nrow=3, axis="tblr", align="hv")

Gene set analysis
# Load Gene Ontology database
go.gs = get_GeneOntology(to="SYMBOL")
# remove set with > 1000 genes
go.gs = go.gs[sapply(go.gs, function(x) length(geneIds(x))) < 1000]
# zenith analysis
res.gsa = zenith_gsa(fit, go.gs, coefs=coef_array, progressbar=FALSE)
plotZenithResults(res.gsa, 5, 3)

i = grep("Mono|cDC", res.gsa$assay)
plotZenithResults(res.gsa[i,], 5, 3) + facet_wrap(~coef, nrow=1)

MSigDB
# Load Gene Ontology database
msigdb.gs = get_MSigDB("H", to="SYMBOL")
# zenith analysis
res.msigdb = zenith_gsa(fit, msigdb.gs, coefs=coef_array, progressbar=FALSE)
plotZenithResults(res.msigdb, 5, 3)

i = grep("Mono|cDC", res.msigdb$assay)
fig = plotZenithResults(res.msigdb[i,], 5, 3) + facet_wrap(~coef, nrow=1)
fig

gs.array = c("M5936_HALLMARK_OXIDATIVE_PHOSPHORYLATION",
'M5892_HALLMARK_CHOLESTEROL_HOMEOSTASIS',
'M5936_HALLMARK_OXIDATIVE_PHOSPHORYLATION',
'M5890_HALLMARK_TNFA_SIGNALING_VIA_NFKB',
'M5921_HALLMARK_COMPLEMENT',
'M5913_HALLMARK_INTERFERON_GAMMA_RESPONSE')
gs1 = 'M5890_HALLMARK_TNFA_SIGNALING_VIA_NFKB'
genes = sort(geneIds(msigdb.gs[[gs1]]))
# Summarize differential expression for each coef and assay
df = coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = factor(x, coef_array))) %>%
mutate(assay = factor(assay, assayNames(pb))) %>%
filter(grepl("Mono", assay), ID %in% genes) %>%
droplevels
# heatmap of significant genes
genes = df %>%
filter(assay == 'cMono') %>%
group_by(ID) %>%
summarize(FDR = min(adj.P.Val)) %>%
filter(FDR < 0.001) %>%
pull(ID) %>%
sort
df2 = df %>%
filter(assay == 'cMono', ID %in% genes)
hcl = df2 %>%
select(ID, t, coef) %>%
pivot_wider(names_from = ID, values_from=t) %>%
column_to_rownames(var='coef') %>%
t %>%
dist(.) %>%
hclust
df2 = df2 %>%
mutate(ID = factor(ID, hcl$labels[hcl$order]))
df2 %>%
ggplot(aes(ID, coef, fill=t)) +
geom_tile() +
scale_fill_gradient2(low="blue", mid="white", high="red") +
theme_classic() +
coord_equal() +
facet_wrap(~assay, ncol=4) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle(gs1) +
geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8)

# Heatmap of Gene set results
df_gsa = res.msigdb %>%
as_tibble %>%
filter(Geneset %in% gs.array) %>%
filter(grepl("Mono|cDC", assay)) %>%
mutate(coef = factor(coef, coef_array)) %>%
mutate(tstat = delta/se)
zmax = max(abs(df_gsa$tstat))
fig = df_gsa %>%
ggplot(aes(assay , coef, fill=tstat)) +
geom_tile() +
scale_fill_gradient2(low="blue", mid="white", high="red", limits=c(-zmax, zmax)) +
theme_classic() +
coord_equal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = df_gsa %>% filter(FDR < 0.05), label='*', hjust=0.5, vjust=0.8) +
facet_wrap(~Geneset, nrow=1)
fig

Figures
gs.array = c("GO0006119: oxidative phosphorylation",
"GO0070469: respirasome",
"GO0031012: extracellular matrix",
"GO0000502: proteasome complex",
"GO0038024: cargo receptor activity",
"GO0035456: response to interferon-beta",
"GO0034340: response to type I interferon")
gs1 = "GO0034340: response to type I interferon"
genes = sort(geneIds(go.gs[[gs1]]))
# Summarize differential expression for each coef and assay
df = coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = factor(x, coef_array))) %>%
mutate(assay = factor(assay, assayNames(pb))) %>%
filter(grepl("Mono", assay), ID %in% genes) %>%
droplevels
# heatmap of significant genes
genes = df %>%
filter(assay == 'cMono') %>%
group_by(ID) %>%
summarize(FDR = min(adj.P.Val)) %>%
filter(FDR < 0.01) %>%
pull(ID) %>%
sort
df2 = df %>%
filter(assay == 'cMono', ID %in% genes)
hcl = df2 %>%
select(ID, t, coef) %>%
pivot_wider(names_from = ID, values_from=t) %>%
column_to_rownames(var='coef') %>%
t %>%
dist(.) %>%
hclust
df2 = df2 %>%
mutate(ID = factor(ID, hcl$labels[hcl$order]))
df2 %>%
ggplot(aes(ID, coef, fill=t)) +
geom_tile() +
scale_fill_gradient2(low="blue", mid="white", high="red") +
theme_classic() +
coord_equal() +
facet_wrap(~assay, ncol=4) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle(gs1) +
geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8)

# Heatmap of Gene set results
df_gsa = res.gsa %>%
as_tibble %>%
filter(Geneset %in% gs.array) %>%
filter(grepl("Mono|cDC", assay)) %>%
mutate(coef = factor(coef, coef_array)) %>%
mutate(tstat = delta/se)
zmax = max(abs(df_gsa$tstat))
fig = df_gsa %>%
ggplot(aes(coef, Geneset, fill=tstat)) +
geom_tile() +
scale_fill_gradient2(low="blue", mid="white", high="red", limits=c(-zmax, zmax)) +
theme_classic() +
coord_equal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = df_gsa %>% filter(FDR < 0.05), label='*', hjust=0.5, vjust=0.8) +
facet_wrap(~assay)
fig

library(paletteer)
# cluster, GEX_region, minor_subset, major_subset
incl = grep("cDC|Mono", sce$GEX_region)
figA = plotProjection(sce[,incl], "X_umap", "minor_subset", pointsize=2) + scale_color_manual(values = adjustcolor(paletteer_d("ggsci::default_nejm"), offset=c(.3, .3, .3, 0)))
figB = plotVoom(res.proc[["cMono"]])
incl = res.vp$assay == "cMono"
col = c('#3A488AFF', '#DABD61FF', '#BE3428FF', "grey85")
figC = plotVarPart( sortCols(res.vp[incl,]), col=col )
# statistics
sortCols(res.vp[incl,]) %>%
as_tibble %>%
summarize(median = median(Source),
N10 = sum(Source> .25),
length = length(Source))
## # A tibble: 1 × 3
## median N10 length
## <dbl> <int> <int>
## 1 0.114 3250 12472
figD = plotVolcano(fit[["cMono"]], coef="SourceCOVID_MILD")
df.vp = res.vp %>%
as_tibble %>%
filter(assay == "cMono") %>%
arrange(Source, Age, sex, Residuals) %>%
column_to_rownames(var='gene') %>%
select(-assay)
genes = c("NFKBIA", "PIM3", "CLU", "PRDX2", "XIST", "CETN2")
genes = c(genes, 'NLRC5', 'GIGYF2', 'FADD', 'PTPN2', 'IFI27')
genes = genes[rev(order(match(genes, rownames(df.vp))))]
figE = plotPercentBars(sortCols(df.vp), genes = genes, assays="cMono", col=col) + theme(aspect.ratio=1, legend.position="none")
plot_grid(figA, figB, figC, figD, figE, ncol=4, labels=LETTERS)

gs.array = c("GO0006119: oxidative phosphorylation",
"GO0070469: respirasome",
"GO0031012: extracellular matrix",
"GO0000502: proteasome complex",
"GO0038024: cargo receptor activity",
"GO0035456: response to interferon-beta",
"GO0034340: response to type I interferon")
gs1 = gs.array[6]
genes = sort(geneIds(go.gs[[gs1]]))
# Summarize differential expression for each coef and assay
df = coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = factor(x, coef_array))) %>%
mutate(assay = factor(assay, assayNames(pb))) %>%
filter(grepl("Mono", assay), ID %in% genes) %>%
droplevels
fig = df %>%
ggplot(aes(coef, ID, fill=t)) +
geom_tile() +
scale_fill_gradient2(low="blue", mid="white", high="red") +
theme_classic() +
coord_equal() +
facet_wrap(~assay, ncol=4) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle(gs1) +
geom_text(data = df %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8)
# heatmap of significant genes
genes = df %>%
filter(assay == 'cMono') %>%
group_by(ID) %>%
summarize(FDR = min(adj.P.Val)) %>%
filter(FDR < 0.05) %>%
pull(ID) %>%
sort
df2 = df %>%
filter(assay == 'cMono', ID %in% genes)
hcl = df2 %>%
select(ID, t, coef) %>%
pivot_wider(names_from = ID, values_from=t) %>%
column_to_rownames(var='coef') %>%
t %>%
dist(.) %>%
hclust
df2 = df2 %>%
mutate(ID = factor(ID, hcl$labels[hcl$order]))
fig = df2 %>%
ggplot(aes(ID, coef, fill=t)) +
geom_tile() +
scale_fill_gradient2(low="blue", mid="white", high="red") +
theme_classic() +
coord_equal() +
facet_wrap(~assay, ncol=4) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle(gs1) +
geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8)
df_gsa = res.gsa %>%
as_tibble %>%
filter(grepl("nterferon", Geneset)) %>%
filter(grepl("Mono|cDC", assay))
fig = df_gsa %>%
ggplot(aes(coef, Geneset, fill=delta/se)) +
geom_tile() +
scale_fill_gradient2(low="blue", mid="white", high="red") +
theme_classic() +
coord_equal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
geom_text(data = df_gsa %>% filter(FDR < 0.05), label='*', hjust=0.5, vjust=0.8) +
facet_wrap(~assay, nrow=1)
fig

# Summarize differential expression for each coef and assay
df = coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = factor(x, coef_array))) %>%
mutate(assay = factor(assay, assayNames(pb))) %>%
filter(grepl("Mono|cDC", assay), ID %in% genes) %>%
droplevels
figList = lapply(genes, function(gene){
df %>%
filter(ID == gene) %>%
mutate(se = logFC / t) %>%
ggplot(aes(coef, logFC, color = -log10(pmax(1e-4, adj.P.Val)))) +
geom_point() +
theme_classic() +
geom_errorbar(aes(ymin = logFC - 1.96 * se, ymax = logFC + 1.96 * se), width = 0) +
facet_wrap(~assay) +
theme(aspect.ratio=1) +
coord_flip() +
scale_color_gradient(name = bquote(-log[10] ~ FDR), low = "grey", high = "red", limits = c(0, 4)) +
ggtitle(gene) +
ylab(bquote(log[2] ~ fold ~ change)) +
geom_hline(yintercept = 0, linetype = "dashed")
})
figList
## [[1]]

##
## [[2]]

##
## [[3]]

##
## [[4]]

##
## [[5]]

##
## [[6]]

##
## [[7]]

##
## [[8]]

##
## [[9]]

##
## [[10]]

##
## [[11]]

##
## [[12]]

##
## [[13]]

##
## [[14]]

##
## [[15]]

# heatmap
###########
genes = c("NLRC5","PTPN2", "FADD")
# Summarize differential expression for each coef and assay
df = coef_array %>%
map_df(function(x)
topTable(fit, coef = x, number=Inf) %>%
as_tibble %>%
mutate(coef = factor(x, coef_array))) %>%
mutate(assay = factor(assay, assayNames(pb))) %>%
filter(grepl("Mono|cDC", assay)) %>%
droplevels
df2 = df %>%
filter(ID %in% genes, assay == "cMono") %>%
mutate(se = logFC / t)
zmax = 1.05*max(abs(df2$logFC))
fig1 = df2 %>%
ggplot(aes(coef, ID, fill=logFC)) +
geom_tile() +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
scale_fill_gradient2("logFC", low = "blue", mid = "white", high = "red", limits = c(-zmax, zmax), na.value = "grey70") +
geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8) +
coord_equal()
genes = c("NFKBIA", "CLU", "IFI27", "PIM3")
df2 = df %>%
filter(ID %in% genes, assay == "cMono") %>%
mutate(se = logFC / t)
zmax = 1.05*max(abs(df2$logFC))
fig2 = df2 %>%
ggplot(aes(coef, ID, fill=logFC)) +
geom_tile() +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
coord_equal() +
scale_fill_gradient2("logFC", low = "blue", mid = "white", high = "red", limits = c(-zmax, zmax), na.value = "grey70") +
geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8) +
coord_equal()
plot_grid(fig1 + theme(axis.text.x = element_blank()), fig2, ncol=1, align="h", axis="lr")
