Data is available from Synapse
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(edgeR))
suppressPackageStartupMessages(library(foreach))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(variancePartition))
suppressPackageStartupMessages(library(GenomicRanges))
suppressPackageStartupMessages(library(genomation))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(decorate))
suppressPackageStartupMessages(library(BiocParallel))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(qvalue))
options(xtable.type="html")
knitr::opts_chunk$set(
echo=TRUE,
warning=FALSE,
message=TRUE,
error = FALSE,
tidy = FALSE,
cache = TRUE,
cache.lazy = FALSE,
dev = c("png", "pdf"),
fig.width=7, fig.height=7)
options(markdown.HTML.stylesheet = 'css/custom.css')
suppressPackageStartupMessages(library(BiocParallel))
register(SnowParam(8, "SOCK", progressbar=TRUE))
suppressPackageStartupMessages(library(synapser))
# login once and then save info
# synLogin("user.name", "password", rememberMe=TRUE)
synLogin()
## Welcome, Gabriel Hoffman!
## NULL
# load gene locations
# this is ENSEMBL v75 from the hg19 assembly
# but other versions and assemblies are available
library(EnsDb.Hsapiens.v75)
## Loading required package: ensembldb
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: AnnotationFilter
##
## Attaching package: 'ensembldb'
## The following object is masked from 'package:stats':
##
## filter
# metadata
metadata = fread( synGet('syn5691351')$path )
metadata$CellType = factor( metadata$CellType, c('NeuN-', 'NeuN+'))
# metadata = metadata[HistoneMark=='H3K27ac',]
metadata$name = gsub("^HBCC_", '', metadata$Sample_ID)
metadata = data.frame(metadata)
rownames(metadata) = metadata$name
metadata_clinical = fread( synGet('syn5691350')$path )
metadata = merge(metadata, metadata_clinical, by="Individual_ID")
rownames(metadata) = gsub("HBCC_", "", metadata$Sample_ID)
# filter to retain only DLPFC samples
idx = metadata$BrainRegion == "DLPFC"
metadata = metadata[idx,]
replaceName = function( name){
name = gsub("B$", "NeuN-", name)
name = gsub("D$", "NeuN-", name)
name = gsub("A$", "NeuN+", name)
name = gsub("C$", "NeuN+", name)
name
}
processData = function(synid_counts, synid_peaks, prefix){
# chip-Seq counts
chipCounts = read.table( synGet(synid_counts)$path, header=TRUE, stringsAsFactors=FALSE, sep=',', row.names=1)
rownames(chipCounts) = paste0( rownames(chipCounts), '_', prefix)
# peak locations
peakLocs = readBed( synGet(synid_peaks)$path )
names(peakLocs) = paste0("peak_", 1:length(peakLocs), "_", prefix)
# get overlapping peaks
isect = intersect( rownames(chipCounts), names(peakLocs))
chipCounts = chipCounts[rownames(chipCounts) %in% isect,]
peakLocs = peakLocs[names(peakLocs) %in% isect ]
identical(rownames(chipCounts), names(peakLocs))
# get overlapping samples
isect = intersect(colnames(chipCounts), metadata$name)
chipCounts = chipCounts[,colnames(chipCounts) %in% isect]
# metadata = metadata[metadata$name %in% isect,]
colnames(chipCounts) = replaceName( colnames(chipCounts) )
# match order
# idx = match( metadata$name, colnames(chipCounts))
# # metadata = metadata[idx,]
# chipCounts = chipCounts[,idx]
# identical(colnames(chipCounts), metadata$name)
return(list(chipCounts = chipCounts, peakLocs = peakLocs))
}
# H3K27ac
data_H3K27ac = processData('syn8078978', 'syn8080422', "H3K27AC")
# H3K4ME3
data_H3K4ME3 = processData('syn8078923', 'syn8078929', 'H3K4ME3')
count = table(c(colnames(data_H3K27ac$chipCounts), colnames(data_H3K4ME3$chipCounts)))
ids = names(count[count==2])
# Combine
#--------
chipCounts = rbind(data_H3K27ac$chipCounts[,ids], data_H3K4ME3$chipCounts[,ids])
peakLocs = c(data_H3K27ac$peakLocs, data_H3K4ME3$peakLocs)
ord = order(peakLocs)
chipCounts = chipCounts[ord,]
peakLocs = peakLocs[ord]
# change row names of metadata to use NeuN+/-
metadata$name = replaceName( metadata$name )
idx = match(colnames(chipCounts), metadata$name)
metadata = metadata[idx,]
rownames(metadata) = replaceName( rownames(metadata) )
# only keep autosomes
idx = which(seqnames(peakLocs) %in% paste0("chr", 1:22))
chipCounts = chipCounts[idx,]
peakLocs = keepSeqlevels(peakLocs[idx], paste0("chr", 1:22))
isexpr = rowSums(cpm(chipCounts)>1) >= 0.1*ncol(chipCounts)
peakLocs2 = peakLocs[which(isexpr)]
# Standard usage of limma/voom
countObj = DGEList( chipCounts[isexpr,] )
countObj = calcNormFactors( countObj )
design = model.matrix( ~ CellType, metadata)
vobj = voom( countObj, design, plot=TRUE)
# identical(names(peakLocs2), rownames(vobj))
# PCA on original, mean centered data
fit = lmFit(vobj, model.matrix( ~ 1, metadata))
dcmp = svd(residuals(fit, vobj), nv=5, nu=0)
frac = dcmp$d^2 / sum(dcmp$d^2) * 100
xlab = paste0('PC1: ', round(frac[1], 1), '%')
ylab = paste0('PC2: ', round(frac[2], 1), '%')
df_pca = data.frame(PC1 = dcmp$v[,1], PC2=dcmp$v[,2], cellType=metadata$CellType)
ggplot(df_pca, aes(PC1, PC2, color=cellType)) + geom_point() + theme_bw(17) + theme(aspect.ratio=1, plot.title = element_text(hjust = 0.5)) + scale_color_manual(values=c("gold", "blue")) + ggtitle("Original data") + xlab(xlab) + ylab(ylab)
# compute PC's after residualizing for CellType
fit = lmFit(vobj, model.matrix( ~ CellType, metadata))
dcmp = svd(residuals(fit, vobj), nv=5, nu=0)
# dsgn = model.matrix( ~ dcmp$v[,1:2], metadata)
dsgn = model.matrix( ~ CellType + Sex + dcmp$v[,1:5] , metadata)
fitPC = lmFit(vobj, dsgn)
fitPC = eBayes(fitPC)
# table(topTable(fitPC, coef='CellTypeNeuN+', number=Inf)$adj.P.Val < 0.05)
quantResid = residuals( fitPC, vobj )
dcmp = svd(quantResid, nv=5, nu=0)
frac = dcmp$d^2 / sum(dcmp$d^2) * 100
xlab = paste0('PC1: ', round(frac[1], 1), '%')
ylab = paste0('PC2: ', round(frac[2], 1), '%')
df_pca2 = data.frame(PC1 = dcmp$v[,1], PC2=dcmp$v[,2], cellType=metadata$CellType)
ggplot(df_pca2, aes(PC1, PC2, color=cellType)) + geom_point() + theme_bw(17) + theme(aspect.ratio=1, plot.title = element_text(hjust = 0.5)) + scale_color_manual(values=c("gold", "blue")) + ggtitle("Residuals") + xlab(xlab) + ylab(ylab)
# Compute correlation and hierarchical clustering on controls
treeList = runOrderedClusteringGenome( quantResid, peakLocs2, method.corr="spearman", quiet=TRUE )
##
Evaluating: chr1
Evaluating: chr2
Evaluating: chr3
Evaluating: chr4
Evaluating: chr5
Evaluating: chr6
Evaluating: chr7
Evaluating: chr8
Evaluating: chr9
Evaluating: chr10
Evaluating: chr11
Evaluating: chr12
Evaluating: chr13
Evaluating: chr14
Evaluating: chr15
Evaluating: chr16
Evaluating: chr17
Evaluating: chr18
Evaluating: chr19
Evaluating: chr20
Evaluating: chr21
Evaluating: chr22
# decay in original data
treeListOriginal = runOrderedClusteringGenome( vobj$E, peakLocs2, method.corr="spearman", quiet=TRUE )
##
Evaluating: chr1
Evaluating: chr2
Evaluating: chr3
Evaluating: chr4
Evaluating: chr5
Evaluating: chr6
Evaluating: chr7
Evaluating: chr8
Evaluating: chr9
Evaluating: chr10
Evaluating: chr11
Evaluating: chr12
Evaluating: chr13
Evaluating: chr14
Evaluating: chr15
Evaluating: chr16
Evaluating: chr17
Evaluating: chr18
Evaluating: chr19
Evaluating: chr20
Evaluating: chr21
Evaluating: chr22
dfDistOriginal = evaluateCorrDecay( treeListOriginal, peakLocs2, "chr1", verbose=FALSE )
plotCorrDecay( dfDistOriginal, method="Rsq", xlim=c(500, 1e6) )
Notice much lower correlation between distant features after residualizing
# decay in residuals
dfDist = evaluateCorrDecay( treeList, peakLocs2, "chr1", verbose=FALSE )
plotCorrDecay( dfDist, method="Rsq", xlim=c(500, 1e6) )
treeListClusters = createClusters( treeList, method='meanClusterSize', meanClusterSize=c( 10, 20, 30, 40, 50, 100, 200) )
## Method: meanClusterSize
# get total number of clusters
n_clusters = countClusters( treeListClusters )
# score each cluster to only retain
# clusters with strong correlation structure
clstScore = scoreClusters(treeList, treeListClusters )
## Evaluating strength of each cluster...
##
## Dividing work into 154 chunks...
df_LEF = lapply(clstScore, function(x){
with(x, data.frame(id, N, chrom, cluster, LEF))
})
df_LEF = do.call('rbind', df_LEF )
df_LEF$id = factor(df_LEF$id, sort(unique(df_LEF$id)))
ggplot(df_LEF, aes(LEF, color=id)) + geom_density() + theme_bw(17) + theme(aspect.ratio=1) + xlim(0,1)
# Evaluate LEF when features are permuted
clustScoreNull = runPermutedData(vobj$E, peakLocs2, method.corr="spearman", meanClusterSize=c( 10, 20, 30, 40, 50, 100, 200) )
##
Evaluating: chr1
Evaluating: chr2
Evaluating: chr3
Evaluating: chr4
Evaluating: chr5
Evaluating: chr6
Evaluating: chr7
Evaluating: chr8
Evaluating: chr9
Evaluating: chr10
Evaluating: chr11
Evaluating: chr12
Evaluating: chr13
Evaluating: chr14
Evaluating: chr15
Evaluating: chr16
Evaluating: chr17
Evaluating: chr18
Evaluating: chr19
Evaluating: chr20
Evaluating: chr21
Evaluating: chr22
## Method: meanClusterSize
## Evaluating strength of each cluster...
##
## Dividing work into 154 chunks...
df_LEF = do.call("rbind", clustScoreNull$clusterScores)
df_LEF$id = factor(df_LEF$id, sort(unique(df_LEF$id)))
ggplot(df_LEF, aes(LEF, color=id)) + geom_density() + theme_bw(17) + theme(aspect.ratio=1) + ggtitle("LEF for permuted data")
# retain clusters based on filterign criteria
# If lead eigen value fraction (LEF) > 40% then ke/ep clusters
# LEF is the fraction of variance explained by the first eigen-value
clustInclude = retainClusters( clstScore, "LEF", 0.05 )
## Using cutoffs:
## Cluster set cutoff
## 10 0.05
## 20 0.05
## 30 0.05
## 40 0.05
## 50 0.05
## 100 0.05
## 200 0.05
# get retained clusters
treeListClusters_filter = filterClusters( treeListClusters, clustInclude )
# Collapse similar clusters
treeListClusters_collapse = collapseClusters( treeListClusters_filter, peakLocs2 )
## Identifying redundant clusters...
# get total number of clusters
n_clusters = countClusters( treeListClusters_collapse )
# Evaluate Differential Correlation between two subsets of data
ecdBox = evalDiffCorr( quantResid, metadata$CellType, peakLocs2, treeListClusters_collapse, method="Box.permute", method.corr="spearman" )
## Note that clusters of 2 or fewer features are omitted from analysis
##
## # Clusters: 17720
## Dividing work into 100 chunks...
## Combining results...
# Apply deltaSLE test
# ecdSLE = evalDiffCorr( quantResid, metadata$CellType, peakLocs2, treeListClusters_collapse, method="deltaSLE", method.corr="spearman" )
# get summary of results
df = summary( ecdBox )
# print results
head(df)
## id chrom cluster pValue stat n.perm p.adjust
## 1 10 chr20 277 5.990694e-09 5.527021 0 6.039219e-05
## 2 10 chr10 42 6.638198e-08 6.368878 0 3.345984e-04
## 3 10 chr2 518 2.523406e-07 -3.500187 0 8.479484e-04
## 4 10 chr11 49 8.732245e-07 3.945022 0 2.038629e-03
## 5 20 chr6 387 1.011124e-06 -14.442468 0 2.038629e-03
## 6 10 chr13 60 1.254119e-06 5.486114 0 2.107129e-03
## id chrom cluster pValue stat n.perm p.adjust
## 1 10 chr20 277 5.990694e-09 5.527021 0 6.039219e-05
## 2 10 chr10 42 6.638198e-08 6.368878 0 3.345984e-04
## 3 10 chr2 518 2.523406e-07 -3.500187 0 8.479484e-04
## 4 10 chr11 49 8.732245e-07 3.945022 0 2.038629e-03
## 5 20 chr6 387 1.011124e-06 -14.442468 0 2.038629e-03
## 6 10 chr13 60 1.254119e-06 5.486114 0 2.107129e-03
Combine results to merge properties of each cluster into a single data.frame
df_results = combineResults( ecdBox, clstScore, treeListClusters_collapse, peakLocs2)
## Summarizing analysis...
## Summarizing cluster properties...
## Collecting cluster locations...
## Summarizing each cluster...
## Merging results...
ids = unique(df_results$id)
df_results$id = factor(df_results$id, ids[order(as.numeric(ids))])
head(df_results)
## id chrom cluster pValue stat n.perm p.adjust N
## 1 10 chr20 277 5.990694e-09 5.527021 0 6.039219e-05 4
## 2 10 chr10 42 6.638198e-08 6.368878 0 3.345984e-04 5
## 3 10 chr2 518 2.523406e-07 -3.500187 0 8.479484e-04 11
## 4 10 chr11 49 8.732245e-07 3.945022 0 2.038629e-03 3
## 5 20 chr6 387 1.011124e-06 -14.442468 0 2.038629e-03 13
## 6 10 chr13 60 1.254119e-06 5.486114 0 2.107129e-03 9
## mean_abs_corr quantile75 quantile90 quantile95 LEF start
## 1 0.5381720 0.8033266 0.8477823 0.8527218 0.4666648 60316988
## 2 0.4990726 0.5700605 0.6927419 0.7018145 0.4030064 6793470
## 3 0.4100513 0.5532258 0.5841935 0.6194355 0.2823671 115481968
## 4 0.6866935 0.7368952 0.7532258 0.7586694 0.5626655 8278159
## 5 0.1754136 0.2375000 0.3781855 0.4601613 0.1626700 169661154
## 6 0.2822581 0.4420363 0.6066532 0.6816532 0.2566316 27559336
## end width
## 1 60364793 47806
## 2 6829879 36410
## 3 115685472 203505
## 4 8285754 7596
## 5 170020581 359428
## 6 27619224 59889
Of the 10081 clusters tested, 100 have a adjusted p-value < 0.05. Also, pi1=0.
# Histogram of LEF
ggplot(df_results, aes(LEF, fill=id)) + geom_histogram(alpha=0.7) + theme_bw(17) + xlim(0,1) + theme(aspect.ratio=1, legend.position="bottom", plot.title = element_text(hjust = 0.5)) + scale_fill_discrete(name = "Requested mean cluster size") + xlab("Lead eigenvalue fraction (LEF)") + ggtitle("Summarize LEF")
# Histogram of mean absolute correlation
ggplot(df_results, aes(mean_abs_corr, fill=id)) + geom_histogram(alpha=0.7) + theme_bw(17) + xlim(0,1) + theme(aspect.ratio=1, legend.position="bottom", plot.title = element_text(hjust = 0.5)) + scale_fill_discrete(name = "Requested mean cluster size") + xlab("Mean absolute correlation") + ggtitle("Summarize absolute correlation")
# Boxplot of number of features per cluster
ggplot(df_results, aes(id, N, fill=id)) + geom_boxplot() + theme_bw(17) + theme(aspect.ratio=1, legend.position="bottom", plot.title = element_text(hjust = 0.5)) + scale_fill_discrete(name = "Feature per cluster") + xlab("Requested mean cluster size") + ylab("Number of features") + ggtitle("Summarize feature per cluster") + coord_flip()
figlist = lapply(1:50, function(i){
# extract peak ID's from most significant cluster
peakIDs = getFeaturesInCluster( treeListClusters_collapse, df$chrom[i], df$cluster[i], df$id[i])
# get location of peaks in this cluster
query = range(peakLocs2[names(peakLocs2) %in% peakIDs])
# expand window to include adjacent clusters
window = 1e5
start(query) = start(query) - window
end(query) = end(query) + window
fig1 = plotDecorate( ensdb, treeList, treeListClusters_collapse, peakLocs2, query, data=quantResid[,metadata$CellType=='NeuN+'])
fig2 = plotDecorate( ensdb, treeList, treeListClusters_collapse, peakLocs2, query, data=quantResid[,metadata$CellType=='NeuN-'])
plot_grid( fig1, fig2, ncol=2, labels=c('A: CellType==NeuN+', 'B: CellType==NeuN-') )
})
figlist
i = 30
peakIDs = getFeaturesInCluster( treeListClusters_collapse, df$chrom[i], df$cluster[i], df$id[i])
main = paste0(df$chrom[i], ': cluster ', df$cluster[i])
plotScatterPairs( vobj$E, peakIDs, metadata$CellType) + ggtitle(main)
loc = '/sc/orga/projects/psychencode/gabriel/decorate_analysis/bed/'
# all features
rtracklayer::export.bed( peakLocs2, paste0(loc, "EpiMap_all.bed"))
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
# background - only features consider in tests
featureNames_background = getFeaturesInClusterList( treeListClusters_collapse, chrom=df_results$chrom, clustID=df_results$cluster, id=df_results$id)
featureNames_background = unique(unlist(featureNames_background))
rtracklayer::export.bed( peakLocs2[featureNames_background], paste0(loc, "EpiMap_background.bed"))
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
# get significant peaks
idx = which(df_results$p.adjust < 0.05)
featureNames_signif = getFeaturesInClusterList( treeListClusters_collapse, chrom=df_results$chrom[idx], clustID=df_results$cluster[idx], id=df_results$id[idx])
featureNames_signif = unique(unlist(featureNames_signif))
rtracklayer::export.bed( peakLocs2[featureNames_signif], paste0(loc, "EpiMap_signif.bed"))
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
# get significant peaks - UP
idx = which(df_results$p.adjust < 0.05 & df_results$stat > 0)
featureNames_signif = getFeaturesInClusterList( treeListClusters_collapse, chrom=df_results$chrom[idx], clustID=df_results$cluster[idx], id=df_results$id[idx])
featureNames_signif = unique(unlist(featureNames_signif))
rtracklayer::export.bed( peakLocs2[featureNames_signif], paste0(loc, "EpiMap_signif_up.bed"))
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
# get significant peaks - DOWN
idx = which(df_results$p.adjust < 0.05 & df_results$stat < 0)
featureNames_signif = getFeaturesInClusterList( treeListClusters_collapse, chrom=df_results$chrom[idx], clustID=df_results$cluster[idx], id=df_results$id[idx])
featureNames_signif = unique(unlist(featureNames_signif))
rtracklayer::export.bed( peakLocs2[featureNames_signif], paste0(loc, "EpiMap_signif_down.bed"))
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'
## Found more than one class "DataFrame" in cache; using the first, from namespace 'S4Vectors'
## Also defined by 'PythonEmbedInR'