# Integration with scanpy's AnnData object. defintegrate_scanpy(adatas, **kwargs): """Integrate a list of `scanpy.api.AnnData`.
Parameters ---------- adatas : `list` of `scanpy.api.AnnData` Data sets to integrate. kwargs : `dict` See documentation for the `integrate()` method for a full list of parameters to use for batch correction.
Returns ------- None """ datasets_dimred, genes = integrate( [adata.X for adata in adatas], [adata.var_names.values for adata in adatas], **kwargs )
for adata, X_dimred in zip(adatas, datasets_dimred): adata.obsm['X_scanorama'] = X_dimred
correct_scanpy 有返回值,返回的是 新整理的 new_adatas 对象
# Batch correction with scanpy's AnnData object. defcorrect_scanpy(adatas, **kwargs): """Batch correct a list of `scanpy.api.AnnData`.
Parameters ---------- adatas : `list` of `scanpy.api.AnnData` Data sets to integrate and/or correct. `adata.var_names` must be set to the list of genes. return_dimred : `bool`, optional (default=`False`) When `True`, the returned `adatas` are each modified to also have the integrated low-dimensional embeddings in `adata.obsm['X_scanorama']`. kwargs : `dict` See documentation for the `correct()` method for a full list of parameters to use for batch correction.
Returns ------- corrected By default (`return_dimred=False`), returns a list of new `scanpy.api.AnnData`. When `return_dimred=True`, `corrected` also includes the integrated low-dimensional embeddings in `adata.obsm['X_scanorama']`. """ if'return_dimred'in kwargs and kwargs['return_dimred']: datasets_dimred, datasets, genes = correct( [adata.X for adata in adatas], [adata.var_names.values for adata in adatas], **kwargs ) else: datasets, genes = correct( [adata.X for adata in adatas], [adata.var_names.values for adata in adatas], **kwargs )
from anndata import AnnData
new_adatas = [] for i in range(len((adatas))): adata = AnnData(datasets[i]) adata.obs = adatas[i].obs adata.obsm = adatas[i].obsm
# Ensure that variables are in the right order, # as Scanorama rearranges genes to be in alphabetical # order and as the intersection (or union) of the # original gene sets. adata.var_names = genes gene2idx = { gene: idx for idx, gene in zip(adatas[i].var.index, adatas[i].var_names.values) } var_idx = [ gene2idx[gene] for gene in genes ] adata.var = adatas[i].var.loc[var_idx]
# List of datasets (matrices of cells-by-genes): datasets = [ list of scipy.sparse.csr_matrix or numpy.ndarray ] # List of gene lists: genes_list = [ list of list of string ]