UMAP provided with data
- Properties of dataset
  - Show details of processing
  - Show voom-style mean-variance trends
Variance partitioning results
- Summarize differential expression
- Gene set analysis
  - MSigDB
Figures

library(SingleCellExperiment)
library(zellkonverter)
library(dreamlet)
library(zenith)
library(GSEABase)
library(qvalue)
library(RColorBrewer) 
library(cowplot)
library(kableExtra)
library(tidyverse)

# read single cell RNA-seq
file = "/sc/arion/projects/CommonMind/hoffman/scRNAseq_data/covid19_combat/covid_counts.h5ad"
sce = readH5AD(file, use_hdf5=TRUE)
counts(sce) = assay(sce, 'X')

# use HGNC names
rownames(sce) = rowData(sce)$feature_name

# remove doublets
sce = sce[,sce$GEX_region != "E: Doublets"]

# remove nan cells
sce = sce[,sce$minor_subset != "nan"]

UMAP provided with data

plotProjection(sce, "X_umap", "minor_subset")

# read metadata
file = "/sc/arion/projects/CommonMind/hoffman/scRNAseq_data/covid19_combat/CBD-KEY-CLINVAR/COMBAT_CLINVAR_for_processed.txt"
df = read.table(file, header=TRUE)

# filter and merge metadata
df = df[df$scRNASeq_sample_ID %in% sce$scRNASeq_sample_ID,]
idx = match(sce$scRNASeq_sample_ID, df$scRNASeq_sample_ID)
colData(sce) = cbind(colData(sce), df[idx,])

# For each donor, select the most severe sample
res = colData(sce) %>%
    as_tibble %>%
    group_by(donor_id) %>%
    select(scRNASeq_sample_ID, Source) %>%
    distinct %>%
    summarize(scRNASeq_sample_ID, Source, i = which.max(Source),
        use_sample_id = scRNASeq_sample_ID[which.max(Source)])

# subset to one sample per donor
sceSub = sce[,sce$scRNASeq_sample_ID %in% droplevels(res$use_sample_id)]

# Keep only Oxford samples since Source and Institute are confounded:
# Flu was the only one collected at St_Georges
sceSub = sceSub[,sceSub$Institute == "Oxford"]

# create pseudobulk
pb <- aggregateToPseudoBulk(sceSub,
    assay = "counts",     
    cluster_id = "minor_subset",  
    sample_id = "scRNASeq_sample_ID",
    verbose = FALSE)

Properties of dataset

df = data.frame(table(pb$Source))
colnames(df) = c("Disease status", "Count")

df %>%
  kbl(row.names=FALSE) %>% 
  kable_classic(full_width = FALSE)

Disease status	Count
HV	10
COVID_MILD	17
COVID_SEV	28
COVID_CRIT	19
COVID_HCW_MILD	13
Sepsis	23

df = list()
df$`# Samples` = ncol(pb)
df$`# Cells` = ncol(sce)
df = unlist(df)
df = data.frame(Property=names(df), count = df)

df %>%  
  kbl(row.names=FALSE) %>% 
  kable_classic(full_width = FALSE)

Property	count
# Samples	110
# Cells	787807

form = ~ Age + (1|sex) + (1|Source) 
res.proc = processAssays(pb, form,  
                          min.count = 3,
                          min.cells = 3,
                          min.samples = 8,
                          BPPARAM = SnowParam(6))

form = ~ Age + (1|sex) + (1|Source)
res.vp = fitVarPart(res.proc, form, BPPARAM=SnowParam(6) )

form = ~ Source + Age + sex 
fit = dreamlet(res.proc, form, BPPARAM=SnowParam(6))

Show details of processing

details(res.proc)

##                       assay n_retain                         formula
## 1                     B.INT      107 ~Age + (1 | sex) + (1 | Source)
## 2                     B.MEM      106 ~Age + (1 | sex) + (1 | Source)
## 3                   B.NAIVE      107 ~Age + (1 | sex) + (1 | Source)
## 4                 CD4.NAIVE      108 ~Age + (1 | sex) + (1 | Source)
## 5                   CD4.TCM      108 ~Age + (1 | sex) + (1 | Source)
## 6                  CD4.TEFF      108 ~Age + (1 | sex) + (1 | Source)
## 7           CD4.TEFF.prolif      108 ~Age + (1 | sex) + (1 | Source)
## 8             CD4.TEM/TEMRA      108 ~Age + (1 | sex) + (1 | Source)
## 9                  CD4.TREG      108 ~Age + (1 | sex) + (1 | Source)
## 10                CD8.NAIVE      107 ~Age + (1 | sex) + (1 | Source)
## 11                  CD8.TCM      106 ~Age + (1 | sex) + (1 | Source)
## 12             CD8.TCM.CCL5      105 ~Age + (1 | sex) + (1 | Source)
## 13                 CD8.TEFF       97 ~Age + (1 | sex) + (1 | Source)
## 14          CD8.TEFF.prolif      105 ~Age + (1 | sex) + (1 | Source)
## 15                  CD8.TEM      108 ~Age + (1 | sex) + (1 | Source)
## 16                CD8.TEMRA      107 ~Age + (1 | sex) + (1 | Source)
## 17                 CD8.TREG       25 ~Age + (1 | sex) + (1 | Source)
## 18               CD8.mitohi      107 ~Age + (1 | sex) + (1 | Source)
## 19                       DN      106 ~Age + (1 | sex) + (1 | Source)
## 20                       DP      108 ~Age + (1 | sex) + (1 | Source)
## 21                  GDT.VD2      101 ~Age + (1 | sex) + (1 | Source)
## 22               GDT.VD2neg       97 ~Age + (1 | sex) + (1 | Source)
## 23                      HSC       95 ~Age + (1 | sex) + (1 | Source)
## 24                     MAIT       86 ~Age + (1 | sex) + (1 | Source)
## 25                NK.CD16hi      108 ~Age + (1 | sex) + (1 | Source)
## 26               NK.CD16int       91 ~Age + (1 | sex) + (1 | Source)
## 27 NK.CD56hi.CD16int.XCL1/2      108 ~Age + (1 | sex) + (1 | Source)
## 28                   NK.cyc      107 ~Age + (1 | sex) + (1 | Source)
## 29                NK.mitohi      108 ~Age + (1 | sex) + (1 | Source)
## 30                       PB      109 ~Age + (1 | sex) + (1 | Source)
## 31                      PLT       78 ~Age + (1 | sex) + (1 | Source)
## 32                      RET       12 ~Age + (1 | sex) + (1 | Source)
## 33                      cDC      108 ~Age + (1 | sex) + (1 | Source)
## 34                    cMono      108 ~Age + (1 | sex) + (1 | Source)
## 35                cMono.PLT      106 ~Age + (1 | sex) + (1 | Source)
## 36                cMono.cyc      106 ~Age + (1 | sex) + (1 | Source)
## 37                     iNKT       19 ~Age + (1 | sex) + (1 | Source)
## 38                   ncMono      108 ~Age + (1 | sex) + (1 | Source)
## 39                      pDC       92 ~Age + (1 | sex) + (1 | Source)
##    formDropsTerms n_genes n_errors error_initial
## 1           FALSE    5772        0         FALSE
## 2           FALSE    6208        0         FALSE
## 3           FALSE    6716        0         FALSE
## 4           FALSE   11200        0         FALSE
## 5           FALSE   11472        0         FALSE
## 6           FALSE    7261        0         FALSE
## 7           FALSE    7307        0         FALSE
## 8           FALSE    4184        0         FALSE
## 9           FALSE    7328        0         FALSE
## 10          FALSE    7389        0         FALSE
## 11          FALSE    3989        0         FALSE
## 12          FALSE    4840        0         FALSE
## 13          FALSE    2521        0         FALSE
## 14          FALSE    5839        0         FALSE
## 15          FALSE    6469        0         FALSE
## 16          FALSE    8038        0         FALSE
## 17          FALSE     425        0         FALSE
## 18          FALSE    4241        0         FALSE
## 19          FALSE    3256        0         FALSE
## 20          FALSE    6383        0         FALSE
## 21          FALSE    3115        0         FALSE
## 22          FALSE    1801        0         FALSE
## 23          FALSE    3378        0         FALSE
## 24          FALSE    3067        0         FALSE
## 25          FALSE    9469        0         FALSE
## 26          FALSE    1793        0         FALSE
## 27          FALSE    4965        0         FALSE
## 28          FALSE    4847        0         FALSE
## 29          FALSE    4238        0         FALSE
## 30          FALSE    7940        0         FALSE
## 31          FALSE     683        0         FALSE
## 32          FALSE     221        0         FALSE
## 33          FALSE    6785        0         FALSE
## 34          FALSE   12472        0         FALSE
## 35          FALSE    3523        0         FALSE
## 36          FALSE    2806        0         FALSE
## 37          FALSE     636        0         FALSE
## 38          FALSE    9576        0         FALSE
## 39          FALSE    3374        0         FALSE

Show voom-style mean-variance trends

# show voom plot for each cell clusters  
plotVoom( res.proc, ncol=4)

details(fit)

##                       assay n_retain             formula formDropsTerms n_genes
## 1                     B.INT      107 ~Source + Age + sex          FALSE    5772
## 2                     B.MEM      106 ~Source + Age + sex          FALSE    6208
## 3                   B.NAIVE      107 ~Source + Age + sex          FALSE    6716
## 4                 CD4.NAIVE      108 ~Source + Age + sex          FALSE   11200
## 5                   CD4.TCM      108 ~Source + Age + sex          FALSE   11472
## 6                  CD4.TEFF      108 ~Source + Age + sex          FALSE    7261
## 7           CD4.TEFF.prolif      108 ~Source + Age + sex          FALSE    7307
## 8             CD4.TEM/TEMRA      108 ~Source + Age + sex          FALSE    4184
## 9                  CD4.TREG      108 ~Source + Age + sex          FALSE    7328
## 10                CD8.NAIVE      107 ~Source + Age + sex          FALSE    7389
## 11                  CD8.TCM      106 ~Source + Age + sex          FALSE    3989
## 12             CD8.TCM.CCL5      105 ~Source + Age + sex          FALSE    4840
## 13                 CD8.TEFF       97 ~Source + Age + sex          FALSE    2521
## 14          CD8.TEFF.prolif      105 ~Source + Age + sex          FALSE    5839
## 15                  CD8.TEM      108 ~Source + Age + sex          FALSE    6469
## 16                CD8.TEMRA      107 ~Source + Age + sex          FALSE    8038
## 17                 CD8.TREG       25 ~Source + Age + sex          FALSE     425
## 18               CD8.mitohi      107 ~Source + Age + sex          FALSE    4241
## 19                       DN      106 ~Source + Age + sex          FALSE    3256
## 20                       DP      108 ~Source + Age + sex          FALSE    6383
## 21                  GDT.VD2      101 ~Source + Age + sex          FALSE    3115
## 22               GDT.VD2neg       97 ~Source + Age + sex          FALSE    1801
## 23                      HSC       95 ~Source + Age + sex          FALSE    3378
## 24                     MAIT       86 ~Source + Age + sex          FALSE    3067
## 25                NK.CD16hi      108 ~Source + Age + sex          FALSE    9469
## 26               NK.CD16int       91 ~Source + Age + sex          FALSE    1793
## 27 NK.CD56hi.CD16int.XCL1/2      108 ~Source + Age + sex          FALSE    4965
## 28                   NK.cyc      107 ~Source + Age + sex          FALSE    4847
## 29                NK.mitohi      108 ~Source + Age + sex          FALSE    4238
## 30                       PB      109 ~Source + Age + sex          FALSE    7940
## 31                      PLT       78 ~Source + Age + sex          FALSE     683
## 32                      RET       12 ~Source + Age + sex          FALSE     221
## 33                      cDC      108 ~Source + Age + sex          FALSE    6785
## 34                    cMono      108 ~Source + Age + sex          FALSE   12472
## 35                cMono.PLT      106 ~Source + Age + sex          FALSE    3523
## 36                cMono.cyc      106 ~Source + Age + sex          FALSE    2806
## 37                     iNKT       19 ~Source + Age + sex          FALSE     636
## 38                   ncMono      108 ~Source + Age + sex          FALSE    9576
## 39                      pDC       92 ~Source + Age + sex          FALSE    3374
##    n_errors error_initial
## 1         0         FALSE
## 2         0         FALSE
## 3         0         FALSE
## 4         0         FALSE
## 5         0         FALSE
## 6         0         FALSE
## 7         0         FALSE
## 8         0         FALSE
## 9         0         FALSE
## 10        0         FALSE
## 11        0         FALSE
## 12        0         FALSE
## 13        0         FALSE
## 14        0         FALSE
## 15        0         FALSE
## 16        0         FALSE
## 17        0         FALSE
## 18        0         FALSE
## 19        0         FALSE
## 20        0         FALSE
## 21        0         FALSE
## 22        0         FALSE
## 23        0         FALSE
## 24        0         FALSE
## 25        0         FALSE
## 26        0         FALSE
## 27        0         FALSE
## 28        0         FALSE
## 29        0         FALSE
## 30        0         FALSE
## 31        0         FALSE
## 32        0         FALSE
## 33        0         FALSE
## 34        0         FALSE
## 35        0         FALSE
## 36        0         FALSE
## 37        0         FALSE
## 38        0         FALSE
## 39        0         FALSE

Variance partitioning results

plotVarPart(sortCols(res.vp), label.angle=60, ncol=4)

plotVolcano(fit, coef="SourceCOVID_MILD", ncol=4)

coef_array = c("SourceCOVID_HCW_MILD", "SourceCOVID_MILD", "SourceCOVID_SEV", "SourceCOVID_CRIT", "SourceSepsis")

df = coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = factor(x, coef_array))) %>%
    mutate(se = logFC / t) %>%
    group_by(coef,assay)

genes = c("IFI27", "PPARG", "ZNF217", "PIM3")

df %>% 
  filter(ID %in% genes) %>%
  filter(assay %in% c("cMono", 'ncMono')) %>%
  ggplot(aes(logFC, coef, color=coef)) +
    geom_point() +
    geom_errorbar(aes(xmin = logFC - 1.96*se, xmax = logFC + 1.96*se), width=0) +
    theme_classic() +
    theme(aspect.ratio=1) +
    facet_grid(ID ~assay)

genes = df %>%
  filter(assay %in% c("cMono", 'ncMono')) %>%
  arrange(-abs(logFC)) %>%
  ungroup %>%
  select(ID) %>%
  distinct %>%
  head(100) %>%
  c

tab = res.gsa %>%
  as_tibble %>%
  mutate(coef = factor(coef, coef_array)) %>%
  filter(assay %in% c("cMono", 'ncMono')) %>%
  arrange(PValue) 



gs = "interferon|interlukin|activiation|inflamm|extracell|chemokin"

tab2 = tab %>%
  filter(grepl(gs, Geneset)) %>%
  filter(assay == "cMono") %>%
  mutate(tstat = delta/ se)


nrow = length(unique(tab2$coef))
ncol = length(unique(tab2$Geneset))
zmax = max(abs(tab2$tstat), na.rm=TRUE)

fig = tab2 %>%
  ggplot(aes(coef, Geneset, fill=tstat, label=ifelse(FDR < 0.05, '*', ''))) +
    geom_tile() +    
    theme_classic() + 
    scale_fill_gradient2("t-statistic", low="blue", mid="white", high="red", limits=c(-zmax, zmax)) + 
    theme(aspect.ratio=ncol/nrow, axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), plot.title = element_text(hjust = 0.5)) + 
    geom_text(vjust=1, hjust=0.5) 
fig

# this showed enrichment for type I and II interferon pathways in the less severe hospitalized COVID-19 patients across cell types.


gs = c("GO0002544: chronic inflammatory response", 
"GO0031012: extracellular matrix",
"GO0035456: response to interferon−beta",
"GO0071346: cellular response to type II interferon")

genes = geneIds(go.gs[[gs[4]]])

tab3 = df %>%
  filter(assay == c( "ncMono"), ID %in% genes) 

nrow = length(unique(tab3$coef))
ncol = length(unique(tab3$ID))
zmax = max(abs(tab3$logFC), na.rm=TRUE)

fig = tab3  %>%
  ggplot(aes(coef, ID, fill=logFC, label=ifelse(adj.P.Val < 0.05, '*', ''))) +
    geom_tile() +    
    theme_classic() + 
    scale_fill_gradient2("logFC", low="blue", mid="white", high="red", limits=c(-zmax, zmax)) + 
    theme(aspect.ratio=ncol/nrow, axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), plot.title = element_text(hjust = 0.5)) + 
    geom_text(vjust=1, hjust=0.5) +
    facet_wrap(~ assay)
    
ggsave(fig, file="~/www/test.pdf", width=15, height=18)

Summarize differential expression

file = "./topTable_COVID_2022.tsv"

coef_array = c("SourceCOVID_HCW_MILD", "SourceCOVID_MILD", "SourceCOVID_SEV", "SourceCOVID_CRIT", "SourceSepsis")

coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = x)) %>%
    write.table(file, quote=FALSE, sep='\t', row.names=FALSE)

# Summarize differential expression for each coef and assay
df = coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = factor(x, coef_array))) %>%
  group_by(coef,assay) %>%
  summarize( nDE = sum(adj.P.Val < 0.05), 
            pi1 = 1 - pi0est(P.Value)$pi0,
            nGenes = length(adj.P.Val)) %>%
  mutate(assay = factor(assay, assayNames(pb))) 


ymax = 1.05*max(df$nGenes)
fig1 = ggplot(df, aes(nGenes, assay, fill=assay)) + 
    geom_bar(stat="identity") + 
    theme_classic() +
    theme(aspect.ratio=3, legend.position="none") +
    scale_x_continuous(limits=c(0,ymax), expand=c(0,0)) +
    xlab("# genes expressed") +
    ylab("Cell type") +
    facet_wrap(~ coef, nrow=1)

ymax = 1.05*max(df$nDE)
fig2 = ggplot(df, aes(nDE, assay, fill=assay)) + 
    geom_bar(stat="identity") + 
    theme_classic() +
    theme(aspect.ratio=3, legend.position="none") +
    scale_x_continuous(limits=c(0,ymax), expand=c(0,0)) +
    xlab("# genes with FDR < 5%") +
    ylab("Cell type") +
    facet_wrap(~ coef, nrow=1)

fig3 = ggplot(df, aes(pi1, assay, fill=assay)) + 
    geom_bar(stat="identity") + 
    theme_classic() +
    theme(aspect.ratio=3, legend.position="none") +
    scale_x_continuous(limits=c(0,1), expand=c(0,0)) +
    xlab(bquote(pi[1])) +
    ylab("Cell type") +
    facet_wrap(~ coef, nrow=1)

plot_grid(fig1, fig2, fig3, labels=LETTERS[1:3], nrow=3, axis="tblr", align="hv")

Gene set analysis

# Load Gene Ontology database 
go.gs = get_GeneOntology(to="SYMBOL")

# remove set with > 1000 genes
go.gs = go.gs[sapply(go.gs, function(x) length(geneIds(x))) < 1000]

# zenith analysis
res.gsa = zenith_gsa(fit, go.gs, coefs=coef_array, progressbar=FALSE)

plotZenithResults(res.gsa, 5, 3)

i = grep("Mono|cDC", res.gsa$assay)
plotZenithResults(res.gsa[i,], 5, 3) + facet_wrap(~coef, nrow=1)

MSigDB

# Load Gene Ontology database 
msigdb.gs = get_MSigDB("H", to="SYMBOL")

# zenith analysis
res.msigdb = zenith_gsa(fit, msigdb.gs, coefs=coef_array, progressbar=FALSE)

plotZenithResults(res.msigdb, 5, 3)

i = grep("Mono|cDC", res.msigdb$assay)
fig = plotZenithResults(res.msigdb[i,], 5, 3) + facet_wrap(~coef, nrow=1)
fig

gs.array = c("M5936_HALLMARK_OXIDATIVE_PHOSPHORYLATION",
'M5892_HALLMARK_CHOLESTEROL_HOMEOSTASIS',
'M5936_HALLMARK_OXIDATIVE_PHOSPHORYLATION',
'M5890_HALLMARK_TNFA_SIGNALING_VIA_NFKB',
'M5921_HALLMARK_COMPLEMENT',
'M5913_HALLMARK_INTERFERON_GAMMA_RESPONSE')

gs1 = 'M5890_HALLMARK_TNFA_SIGNALING_VIA_NFKB'
genes = sort(geneIds(msigdb.gs[[gs1]]))
# Summarize differential expression for each coef and assay
df = coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = factor(x, coef_array))) %>%
  mutate(assay = factor(assay, assayNames(pb))) %>%
  filter(grepl("Mono", assay), ID %in% genes) %>%
  droplevels 

# heatmap of significant genes
genes = df %>% 
  filter(assay == 'cMono') %>%
  group_by(ID) %>%
  summarize(FDR = min(adj.P.Val)) %>%
  filter(FDR < 0.001) %>%
  pull(ID) %>%
  sort

df2 = df %>%
    filter(assay == 'cMono', ID %in% genes) 

hcl = df2 %>%
  select(ID, t, coef) %>%
  pivot_wider(names_from = ID, values_from=t) %>%
  column_to_rownames(var='coef') %>%
  t %>%
  dist(.) %>%
  hclust

df2 = df2 %>%
      mutate(ID = factor(ID, hcl$labels[hcl$order]))

df2 %>%
    ggplot(aes(ID, coef, fill=t)) +
    geom_tile() +
    scale_fill_gradient2(low="blue", mid="white", high="red") +
    theme_classic() +
    coord_equal() +
    facet_wrap(~assay, ncol=4) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle(gs1) +
    geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8)

# Heatmap of Gene set results
df_gsa = res.msigdb %>%
          as_tibble %>%
          filter(Geneset %in% gs.array) %>%
          filter(grepl("Mono|cDC", assay)) %>%
          mutate(coef = factor(coef, coef_array)) %>%
          mutate(tstat = delta/se)         

zmax = max(abs(df_gsa$tstat))             

fig = df_gsa %>%
      ggplot(aes(assay , coef, fill=tstat)) +
        geom_tile() +
        scale_fill_gradient2(low="blue", mid="white", high="red", limits=c(-zmax, zmax)) +
        theme_classic() +
        coord_equal()  +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
        geom_text(data = df_gsa %>% filter(FDR < 0.05), label='*', hjust=0.5, vjust=0.8) +
        facet_wrap(~Geneset, nrow=1)
fig

Figures

gs.array = c("GO0006119: oxidative phosphorylation",
"GO0070469: respirasome",
"GO0031012: extracellular matrix",
"GO0000502: proteasome complex",
"GO0038024: cargo receptor activity",
"GO0035456: response to interferon-beta",
"GO0034340: response to type I interferon")

gs1 = "GO0034340: response to type I interferon"

genes = sort(geneIds(go.gs[[gs1]]))
# Summarize differential expression for each coef and assay
df = coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = factor(x, coef_array))) %>%
  mutate(assay = factor(assay, assayNames(pb))) %>%
  filter(grepl("Mono", assay), ID %in% genes) %>%
  droplevels 

# heatmap of significant genes
genes = df %>% 
  filter(assay == 'cMono') %>%
  group_by(ID) %>%
  summarize(FDR = min(adj.P.Val)) %>%
  filter(FDR < 0.01) %>%
  pull(ID) %>%
  sort

df2 = df %>%
    filter(assay == 'cMono', ID %in% genes) 

hcl = df2 %>%
  select(ID, t, coef) %>%
  pivot_wider(names_from = ID, values_from=t) %>%
  column_to_rownames(var='coef') %>%
  t %>%
  dist(.) %>%
  hclust

df2 = df2 %>%
      mutate(ID = factor(ID, hcl$labels[hcl$order]))

df2 %>%
    ggplot(aes(ID, coef, fill=t)) +
    geom_tile() +
    scale_fill_gradient2(low="blue", mid="white", high="red") +
    theme_classic() +
    coord_equal() +
    facet_wrap(~assay, ncol=4) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle(gs1) +
    geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8)

# Heatmap of Gene set results
df_gsa = res.gsa %>%
          as_tibble %>%
          filter(Geneset %in% gs.array) %>%
          filter(grepl("Mono|cDC", assay)) %>%
          mutate(coef = factor(coef, coef_array)) %>%
          mutate(tstat = delta/se)         

zmax = max(abs(df_gsa$tstat))          

fig = df_gsa %>%
      ggplot(aes(coef, Geneset, fill=tstat)) +
        geom_tile() +
        scale_fill_gradient2(low="blue", mid="white", high="red", limits=c(-zmax, zmax)) +
        theme_classic() +
        coord_equal()  +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
        geom_text(data = df_gsa %>% filter(FDR < 0.05), label='*', hjust=0.5, vjust=0.8) +
        facet_wrap(~assay)
fig

library(paletteer)

# cluster, GEX_region, minor_subset, major_subset
incl = grep("cDC|Mono", sce$GEX_region)
figA = plotProjection(sce[,incl], "X_umap", "minor_subset", pointsize=2) + scale_color_manual(values = adjustcolor(paletteer_d("ggsci::default_nejm"), offset=c(.3, .3, .3, 0)))

figB = plotVoom(res.proc[["cMono"]])

incl = res.vp$assay == "cMono"
col = c('#3A488AFF', '#DABD61FF', '#BE3428FF', "grey85")
figC = plotVarPart( sortCols(res.vp[incl,]), col=col ) 

# statistics
sortCols(res.vp[incl,]) %>%
  as_tibble %>%
  summarize(median = median(Source), 
    N10 = sum(Source> .25), 
    length = length(Source))

## # A tibble: 1 × 3
##   median   N10 length
##    <dbl> <int>  <int>
## 1  0.114  3250  12472

figD = plotVolcano(fit[["cMono"]], coef="SourceCOVID_MILD")

df.vp = res.vp %>%
  as_tibble %>%
  filter(assay == "cMono") %>%
  arrange(Source, Age, sex, Residuals) %>%
  column_to_rownames(var='gene') %>%
  select(-assay)

genes = c("NFKBIA", "PIM3", "CLU", "PRDX2", "XIST", "CETN2")
genes = c(genes, 'NLRC5', 'GIGYF2', 'FADD', 'PTPN2', 'IFI27')
genes = genes[rev(order(match(genes, rownames(df.vp))))]
figE = plotPercentBars(sortCols(df.vp), genes = genes, assays="cMono", col=col) + theme(aspect.ratio=1, legend.position="none")

plot_grid(figA, figB, figC, figD, figE, ncol=4, labels=LETTERS)

gs.array = c("GO0006119: oxidative phosphorylation",
"GO0070469: respirasome",
"GO0031012: extracellular matrix",
"GO0000502: proteasome complex",
"GO0038024: cargo receptor activity",
"GO0035456: response to interferon-beta",
"GO0034340: response to type I interferon")

gs1 = gs.array[6]
genes = sort(geneIds(go.gs[[gs1]]))

# Summarize differential expression for each coef and assay
df = coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = factor(x, coef_array))) %>%
  mutate(assay = factor(assay, assayNames(pb))) %>%
  filter(grepl("Mono", assay), ID %in% genes) %>%
  droplevels 
  
fig = df %>% 
  ggplot(aes(coef, ID, fill=t)) +
    geom_tile() +
    scale_fill_gradient2(low="blue", mid="white", high="red") +
    theme_classic() +
    coord_equal() +
    facet_wrap(~assay, ncol=4) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle(gs1) +
    geom_text(data = df %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8)



# heatmap of significant genes
genes = df %>% 
  filter(assay == 'cMono') %>%
  group_by(ID) %>%
  summarize(FDR = min(adj.P.Val)) %>%
  filter(FDR < 0.05) %>%
  pull(ID) %>%
  sort

df2 = df %>%
    filter(assay == 'cMono', ID %in% genes) 

hcl = df2 %>%
  select(ID, t, coef) %>%
  pivot_wider(names_from = ID, values_from=t) %>%
  column_to_rownames(var='coef') %>%
  t %>%
  dist(.) %>%
  hclust

df2 = df2 %>%
      mutate(ID = factor(ID, hcl$labels[hcl$order]))

fig = df2 %>%
    ggplot(aes(ID, coef, fill=t)) +
    geom_tile() +
    scale_fill_gradient2(low="blue", mid="white", high="red") +
    theme_classic() +
    coord_equal() +
    facet_wrap(~assay, ncol=4) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle(gs1) +
    geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8) 


df_gsa = res.gsa %>%
          as_tibble %>%
          filter(grepl("nterferon", Geneset)) %>%
          filter(grepl("Mono|cDC", assay))

fig = df_gsa %>%
      ggplot(aes(coef, Geneset, fill=delta/se)) +
        geom_tile() +
        scale_fill_gradient2(low="blue", mid="white", high="red") +
        theme_classic() +
        coord_equal()  +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
        geom_text(data = df_gsa %>% filter(FDR < 0.05), label='*', hjust=0.5, vjust=0.8) +
        facet_wrap(~assay, nrow=1)
fig

# Summarize differential expression for each coef and assay
df = coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = factor(x, coef_array))) %>%
  mutate(assay = factor(assay, assayNames(pb))) %>%
  filter(grepl("Mono|cDC", assay), ID %in% genes) %>%
  droplevels 

figList = lapply(genes, function(gene){
  df %>%
  filter(ID == gene) %>%
  mutate(se = logFC / t) %>%
  ggplot(aes(coef, logFC,  color = -log10(pmax(1e-4, adj.P.Val)))) +
    geom_point() +
    theme_classic() +
    geom_errorbar(aes(ymin = logFC - 1.96 * se, ymax = logFC + 1.96 * se), width = 0) +
    facet_wrap(~assay) +
    theme(aspect.ratio=1) +
    coord_flip() +
    scale_color_gradient(name = bquote(-log[10] ~ FDR), low = "grey", high = "red", limits = c(0, 4)) +
    ggtitle(gene) +
    ylab(bquote(log[2] ~ fold ~ change)) +
    geom_hline(yintercept = 0, linetype = "dashed") 
})

figList

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

# heatmap
###########
genes = c("NLRC5","PTPN2", "FADD")

# Summarize differential expression for each coef and assay
df = coef_array %>%
  map_df(function(x) 
    topTable(fit, coef = x, number=Inf) %>%
    as_tibble %>%
    mutate(coef = factor(x, coef_array))) %>%
  mutate(assay = factor(assay, assayNames(pb))) %>%
  filter(grepl("Mono|cDC", assay)) %>%
  droplevels 


df2 = df %>%
  filter(ID %in% genes, assay == "cMono") %>%
  mutate(se = logFC / t)

zmax = 1.05*max(abs(df2$logFC))
fig1 = df2 %>%  
  ggplot(aes(coef, ID, fill=logFC)) +
    geom_tile() +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))  +
    scale_fill_gradient2("logFC", low = "blue", mid = "white", high = "red", limits = c(-zmax, zmax), na.value = "grey70") +
    geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8) +
    coord_equal()

genes = c("NFKBIA", "CLU", "IFI27", "PIM3")

df2 = df %>%
  filter(ID %in% genes, assay == "cMono") %>%
  mutate(se = logFC / t)

zmax = 1.05*max(abs(df2$logFC))
fig2 = df2 %>%  
  ggplot(aes(coef, ID, fill=logFC)) +
    geom_tile() +
    theme_classic() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    coord_equal() +
    scale_fill_gradient2("logFC", low = "blue", mid = "white", high = "red", limits = c(-zmax, zmax), na.value = "grey70") +
    geom_text(data = df2 %>% filter(adj.P.Val < 0.05), label='*', hjust=0.5, vjust=0.8) +
    coord_equal()

plot_grid(fig1 + theme(axis.text.x =  element_blank()), fig2, ncol=1, align="h", axis="lr")

COVID COMBAT study

COvid-19 Multi-omics Blood ATlas (COMBAT) Consortium, Cell, 2022

Developed by Gabriel Hoffman

Run on 2024-05-31 15:58:59.283146

UMAP provided with data

Properties of dataset

Show details of processing

Show voom-style mean-variance trends

Variance partitioning results

Summarize differential expression

Gene set analysis

MSigDB

Figures