Landscape Clustergram Chromium¶
In [2]:
Copied!
import scanpy as sc
import celldega as dega
import scanpy as sc
import celldega as dega
/Users/feni/Documents/celldega/dega/lib/python3.12/site-packages/h5py/__init__.py:36: UserWarning: h5py is running against HDF5 1.14.5 when it was built against 1.14.6, this may cause problems _warn(("h5py is running against HDF5 {0} when it was built against {1}, "
In [3]:
Copied!
adata = sc.read_h5ad('data/chromium_data/chromium_pbmc_healthy-donor/adata.h5ad')
adata
adata = sc.read_h5ad('data/chromium_data/chromium_pbmc_healthy-donor/adata.h5ad')
adata
Out[3]:
AnnData object with n_obs × n_vars = 6565 × 18314 obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'n_genes', 'leiden' var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_pca', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances'
In [6]:
Copied!
# Example assuming your DataFrame is called marker_df and has a 'gene' column
# Define unwanted prefixes (uppercased for case-insensitive matching)
bad_prefixes = ('MT-', 'RPS', 'RPL', 'MRPS', 'MRPL')
# Create a boolean mask for genes to keep
keep_mask = ~adata.var_names.str.upper().str.startswith(bad_prefixes)
# Apply the mask
adata = adata[:, keep_mask]
# Example assuming your DataFrame is called marker_df and has a 'gene' column
# Define unwanted prefixes (uppercased for case-insensitive matching)
bad_prefixes = ('MT-', 'RPS', 'RPL', 'MRPS', 'MRPL')
# Create a boolean mask for genes to keep
keep_mask = ~adata.var_names.str.upper().str.startswith(bad_prefixes)
# Apply the mask
adata = adata[:, keep_mask]
In [7]:
Copied!
adata
adata
Out[7]:
View of AnnData object with n_obs × n_vars = 6565 × 18124 obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'n_genes', 'leiden' var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'umap' obsm: 'X_pca', 'X_umap' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances'
In [8]:
Copied!
base_url = 'https://raw.githubusercontent.com/broadinstitute/chromium_pbmc_healthy-donor/main/'
base_url = 'https://raw.githubusercontent.com/broadinstitute/chromium_pbmc_healthy-donor/main/'
In [9]:
Copied!
landscape = dega.viz.Landscape(
technology='Chromium',
base_url = base_url,
height=600,
adata=adata,
cell_attr=['leiden'],
ini_zoom=-4.5,
ini_x=4000,
ini_y=5000
)
# landscape
landscape = dega.viz.Landscape(
technology='Chromium',
base_url = base_url,
height=600,
adata=adata,
cell_attr=['leiden'],
ini_zoom=-4.5,
ini_x=4000,
ini_y=5000
)
# landscape
/var/folders/8d/jxpy9rd10j7fp2rcj_s5sz3c0000gq/T/ipykernel_71464/4210038785.py:1: UserWarning: Transformation matrix not found at https://raw.githubusercontent.com/broadinstitute/chromium_pbmc_healthy-donor/main//micron_to_image_transform.csv. Using identity. landscape = dega.viz.Landscape(
In [10]:
Copied!
mat = dega.clust.Matrix(adata)
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=5000)
mat.downsample_to(category='leiden')
mat.norm(axis='row', by='zscore')
mat.cluster()
cgm = dega.Clustergram(matrix=mat, width=500, height=500)
mat = dega.clust.Matrix(adata)
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=5000)
mat.downsample_to(category='leiden')
mat.norm(axis='row', by='zscore')
mat.cluster()
cgm = dega.Clustergram(matrix=mat, width=500, height=500)
/Users/feni/Documents/celldega/src/celldega/clust/matrix.py:194: UserWarning: Large matrix (6565 x 18124). Consider filtering. self.load_adata(data, col_attr=col_attr, row_attr=row_attr)
Landscape-Clustergram¶
In [11]:
Copied!
dega.viz.landscape_clustergram(landscape, cgm)
dega.viz.landscape_clustergram(landscape, cgm)
Out[11]:
Single-Cell Clustergram-Enrichr¶
In [12]:
Copied!
mat = dega.clust.Matrix(adata, name='sc', col_attr=['leiden'])
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=200)
mat.norm(axis='row', by='zscore')
mat.cluster()
cgm_2 = dega.Clustergram(matrix=mat, width=500, height=500)
mat = dega.clust.Matrix(adata, name='sc', col_attr=['leiden'])
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=200)
mat.norm(axis='row', by='zscore')
mat.cluster()
cgm_2 = dega.Clustergram(matrix=mat, width=500, height=500)
/Users/feni/Documents/celldega/src/celldega/clust/matrix.py:194: UserWarning: Large matrix (6565 x 18124). Consider filtering. self.load_adata(data, col_attr=col_attr, row_attr=row_attr)
In [13]:
Copied!
dega.viz.clustergram_enrich(cgm_2)
dega.viz.clustergram_enrich(cgm_2)
Out[13]:
In [ ]:
Copied!
In [ ]:
Copied!