Comparing a combined (i.e., processing samples together) and an independent (i.e., processing samples separately) construction of metacells with SuperCell (related to @daskelly question https://github.com/GfellerLab/SuperCell/issues/11#issuecomment-1090916447).
library(SuperCell)
library(Matrix)
data(cell_lines)
<- cell_lines$GE
GE <- cell_lines$meta cell.meta
<- 20 # graining level
gamma <- 10 # number of PCs n.pc
To compare results, we use 2 samples that correspond to two different cancer cell lines (data from Tian et al., 2019)
<- which(cell.meta == "HCC827")
cell.idx.HCC827 <- which(cell.meta == "H838") cell.idx.H838
<- SCimplify(
SC.HCC827.H838 c(cell.idx.HCC827, cell.idx.H838)], # log-normalized gene expression matrix
GE[,gamma = gamma, # graining level
cell.split.condition = cell.meta[c(cell.idx.HCC827, cell.idx.H838)], # metacell do not mix cells from different cell lines
n.pc = n.pc) # number of proncipal components to use
<- SC.HCC827.H838$genes.use
genes.use
$cell.line <- supercell_assign(cell.meta[c(cell.idx.HCC827, cell.idx.H838)],
SC.HCC827.H838supercell_membership = SC.HCC827.H838$membership)
<- supercell_GE(GE[,c(cell.idx.HCC827, cell.idx.H838)], groups = SC.HCC827.H838$membership)
SC.GE.HCC827.H838
$SC_PCA <- supercell_prcomp(
SC.HCC827.H838::t(SC.GE.HCC827.H838),
Matrixsupercell_size = SC.HCC827.H838$supercell_size,
genes.use = genes.use)
$SC_UMAP <- supercell_UMAP(
SC.HCC827.H838
SC.HCC827.H838, n_neighbors = 10)
supercell_plot_UMAP(
SC.HCC827.H838,group = "cell.line",
title = paste0("Combined construction of HCC827 and H838 metacells")
)
gamma
and the
same set of features genes.use
)<- SCimplify(GE[,cell.idx.HCC827], # log-normalized gene expression matrix
SC.HCC827 gamma = gamma, # graining level
n.pc = n.pc, # number of proncipal components to use
genes.use = genes.use) # using the same set of genes as for the combined analysis
$cell.line <- supercell_assign(cell.meta[cell.idx.HCC827], supercell_membership = SC.HCC827$membership)
SC.HCC827
<- SCimplify(GE[,cell.idx.H838], # log-normalized gene expression matrix
SC.H838 gamma = gamma, # graining level
n.pc = n.pc, # number of proncipal components to use
genes.use = genes.use) # using the same set of genes as for the combined analysis
$cell.line <- supercell_assign(cell.meta[cell.idx.H838], supercell_membership = SC.H838$membership)
SC.H838
<- supercell_merge(list(SC.HCC827, SC.H838), fields = c("cell.line"))
SC.merged
# compute metacell gene expression for SC.HCC827
<- supercell_GE(GE[, cell.idx.HCC827], groups = SC.HCC827$membership)
SC.GE.HCC827 # compute metacell gene expression for SC.H838
<- supercell_GE(GE[, cell.idx.H838], groups = SC.H838$membership)
SC.GE.H838 # merge GE matricies
<- supercell_mergeGE(list(SC.GE.HCC827, SC.GE.H838))
SC.GE.merged
$SC_PCA <- supercell_prcomp(
SC.merged::t(SC.GE.merged),
Matrixsupercell_size = SC.merged$supercell_size,
genes.use = genes.use)
$SC_UMAP <- supercell_UMAP(
SC.merged
SC.merged, n_neighbors = 10)
<- supercell_plot_UMAP(
g
SC.merged,group = "cell.line",
title = paste0("Independent construction of HCC827 and H838 metacells")
)
As the dimensionality reductions (even on the same set of features) are different for the combined (HCC827+H838) dataset and for the independent (HCC827 and H838 separately) datasets. The first PCA basen on global variability between two cell lines and the PCAs from the second approach represent local variability within each cell line. (sample).
heatmap(as.matrix(table(SC.merged$membership, SC.HCC827.H838$membership)), scale = "none")
Metacell size distribution
summary(SC.merged$supercell_size)
summary(SC.HCC827.H838$supercell_size)
Also, in the combined analysis, the graining level does not mean that
each cell line (or sample) will have this particular graining level. For
instance, in the combined analysis, the graining level for
HCC827 is 18.6
and for H838 is
21.8
, but this difference might be even more prominent if
the heterogeneity and complexity of two samples are more different.
## Combined analysis
# actual graining level for H838 cell line
length(cell.idx.H838)/sum(SC.HCC827.H838$cell.line == "H838")
# actual graining level for H838 cell line
length(cell.idx.HCC827)/sum(SC.HCC827.H838$cell.line == "HCC827")
## Independent analysis
# actual graining level for H838 cell line
length(cell.idx.H838)/sum(SC.merged$cell.line == "H838")
# actual graining level for HCC827 cell line
length(cell.idx.HCC827)/sum(SC.merged$cell.line == "HCC827")
# actual overall graining level in the combined analysis
length(SC.merged$membership)/max(SC.merged$membership)