Landscape Clustergram Chromium¶

In [2]:

Copied!

import scanpy as sc
import celldega as dega
import scanpy as sc
import celldega as dega

/Users/feni/Documents/celldega/dega/lib/python3.12/site-packages/h5py/__init__.py:36: UserWarning: h5py is running against HDF5 1.14.5 when it was built against 1.14.6, this may cause problems
  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "

In [3]:

Copied!

adata = sc.read_h5ad('data/chromium_data/chromium_pbmc_healthy-donor/adata.h5ad')
adata
adata = sc.read_h5ad('data/chromium_data/chromium_pbmc_healthy-donor/adata.h5ad')
adata

Out[3]:

AnnData object with n_obs × n_vars = 6565 × 18314
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'n_genes', 'leiden'
    var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

In [6]:

Copied!

# Example assuming your DataFrame is called marker_df and has a 'gene' column

# Define unwanted prefixes (uppercased for case-insensitive matching)
bad_prefixes = ('MT-', 'RPS', 'RPL', 'MRPS', 'MRPL')

# Create a boolean mask for genes to keep
keep_mask = ~adata.var_names.str.upper().str.startswith(bad_prefixes)

# Apply the mask
adata = adata[:, keep_mask]
# Example assuming your DataFrame is called marker_df and has a 'gene' column

# Define unwanted prefixes (uppercased for case-insensitive matching)
bad_prefixes = ('MT-', 'RPS', 'RPL', 'MRPS', 'MRPL')

# Create a boolean mask for genes to keep
keep_mask = ~adata.var_names.str.upper().str.startswith(bad_prefixes)

# Apply the mask
adata = adata[:, keep_mask]

In [7]:

Copied!

adata
adata

Out[7]:

View of AnnData object with n_obs × n_vars = 6565 × 18124
    obs: 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'n_genes', 'leiden'
    var: 'gene_ids', 'feature_types', 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'counts'
    obsp: 'connectivities', 'distances'

In [8]:

Copied!

base_url = 'https://raw.githubusercontent.com/broadinstitute/chromium_pbmc_healthy-donor/main/'
base_url = 'https://raw.githubusercontent.com/broadinstitute/chromium_pbmc_healthy-donor/main/'

In [9]:

Copied!





landscape = dega.viz.Landscape(
    technology='Chromium',
    base_url = base_url,
    height=600, 
    adata=adata, 
    cell_attr=['leiden'],
    ini_zoom=-4.5,
    ini_x=4000,
    ini_y=5000
)
# landscape
landscape = dega.viz.Landscape(
    technology='Chromium',
    base_url = base_url,
    height=600, 
    adata=adata, 
    cell_attr=['leiden'],
    ini_zoom=-4.5,
    ini_x=4000,
    ini_y=5000
)
# landscape

/var/folders/8d/jxpy9rd10j7fp2rcj_s5sz3c0000gq/T/ipykernel_71464/4210038785.py:1: UserWarning: Transformation matrix not found at https://raw.githubusercontent.com/broadinstitute/chromium_pbmc_healthy-donor/main//micron_to_image_transform.csv. Using identity.
  landscape = dega.viz.Landscape(

In [10]:

Copied!





mat = dega.clust.Matrix(adata)
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=5000)
mat.downsample_to(category='leiden')
mat.norm(axis='row', by='zscore')
mat.cluster()

cgm = dega.Clustergram(matrix=mat, width=500, height=500)
mat = dega.clust.Matrix(adata)
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=5000)
mat.downsample_to(category='leiden')
mat.norm(axis='row', by='zscore')
mat.cluster()

cgm = dega.Clustergram(matrix=mat, width=500, height=500)

/Users/feni/Documents/celldega/src/celldega/clust/matrix.py:194: UserWarning: Large matrix (6565 x 18124). Consider filtering.
  self.load_adata(data, col_attr=col_attr, row_attr=row_attr)

Landscape-Clustergram¶

In [11]:

Copied!

dega.viz.landscape_clustergram(landscape, cgm)
dega.viz.landscape_clustergram(landscape, cgm)

Out[11]:

Single-Cell Clustergram-Enrichr¶

In [12]:

Copied!





mat = dega.clust.Matrix(adata, name='sc', col_attr=['leiden'])
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=200)
mat.norm(axis='row', by='zscore')
mat.cluster()
cgm_2 = dega.Clustergram(matrix=mat, width=500, height=500)
mat = dega.clust.Matrix(adata, name='sc', col_attr=['leiden'])
mat.filter(axis='row', by='mean', num=10000)
mat.filter(axis='row', by='var', num=200)
mat.norm(axis='row', by='zscore')
mat.cluster()
cgm_2 = dega.Clustergram(matrix=mat, width=500, height=500)

/Users/feni/Documents/celldega/src/celldega/clust/matrix.py:194: UserWarning: Large matrix (6565 x 18124). Consider filtering.
  self.load_adata(data, col_attr=col_attr, row_attr=row_attr)

In [13]:

Copied!

dega.viz.clustergram_enrich(cgm_2)
dega.viz.clustergram_enrich(cgm_2)

Out[13]:

In [ ]: