Pre-process_Xenium_V1_human_Pancreas_FFPE_outs¶

In [1]:

Copied!

%load_ext autoreload
%autoreload 2
%env ANYWIDGET_HMR=1
%load_ext autoreload
%autoreload 2
%env ANYWIDGET_HMR=1

env: ANYWIDGET_HMR=1

In [2]:

Copied!





import numpy as np
import pandas as pd

# macOS requirement
import os
os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/lib:' + os.environ.get('DYLD_LIBRARY_PATH', '')

import celldega as dega

import tifffile
import zarr

import matplotlib.pyplot as plt
from matplotlib.colors import to_hex

import geopandas as gpd
import shapely

import tarfile
dega.__version__
import numpy as np
import pandas as pd

# macOS requirement
import os
os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/lib:' + os.environ.get('DYLD_LIBRARY_PATH', '')

import celldega as dega

import tifffile
import zarr

import matplotlib.pyplot as plt
from matplotlib.colors import to_hex

import geopandas as gpd
import shapely

import tarfile
dega.__version__

merged in latest changes

Out[2]:

'0.5.4'

In [3]:

Copied!

ls ../data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/
ls ../data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/

ls: ../data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/: No such file or directory

In [4]:

Copied!

ls ../data/xenium_landscapes/
ls ../data/xenium_landscapes/

Landscape_Xenium_V1_human_Pancreas_FFPE_outs_backup/
Landscape_Xenium_V1_human_Pancreas_FFPE_outs_png/
Landscape_Xenium_V1_human_Pancreas_FFPE_outs_webp/
Xenium_Prime_Human_Lymph_Node_Reactive_FFPE_outs/
Xenium_Prime_Human_Lymph_Node_Reactive_FFPE_outs_landscape_files/
Xenium_Prime_Human_Prostate_FFPE_outs/
Xenium_Prime_Human_Skin_FFPE_outs_original/
Xenium_V1_hBoneMarrow_nondiseased_section_outs_landscape_files/
Xenium_V1_hBoneMarrow_nondiseased_section_outs_unscaled/

In [5]:

Copied!

dataset_name = 'Xenium_V1_human_Pancreas_FFPE_outs'
dataset_name = 'Xenium_V1_human_Pancreas_FFPE_outs'

In [6]:

Copied!

base_path = 'data/xenium_data/' + dataset_name + '/'
base_path = 'data/xenium_data/' + dataset_name + '/'

In [7]:

Copied!

path_landscape_files = 'data/xenium_landscapes/' + dataset_name + '_sparse/'
path_landscape_files = 'data/xenium_landscapes/' + dataset_name + '_sparse/'

In [8]:

Copied!

base_path
base_path

Out[8]:

'data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/'

In [9]:

Copied!

path_landscape_files
path_landscape_files

Out[9]:

'data/xenium_landscapes/Xenium_V1_human_Pancreas_FFPE_outs_sparse/'

In [10]:

Copied!

if not os.path.exists(path_landscape_files):
    os.mkdir(path_landscape_files)
if not os.path.exists(path_landscape_files):
    os.mkdir(path_landscape_files)

Unzip Xenium Data¶

Decompress Cell Feature Matrix MTX Files¶

In [11]:

Copied!





# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'cell_feature_matrix.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files

# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
#     # Extract all contents to the specified directory
#     tar.extractall(path=output_directory)

# print(f"File {tar_file_path} has been decompressed to {output_directory}")
# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'cell_feature_matrix.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files

# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
#     # Extract all contents to the specified directory
#     tar.extractall(path=output_directory)

# print(f"File {tar_file_path} has been decompressed to {output_directory}")

Decompress Xenium Analysis Files¶

In [12]:

Copied!





# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'analysis.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files

# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
#     # Extract all contents to the specified directory
#     tar.extractall(path=output_directory)

# print(f"File {tar_file_path} has been decompressed to {output_directory}")
# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'analysis.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files

# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
#     # Extract all contents to the specified directory
#     tar.extractall(path=output_directory)

# print(f"File {tar_file_path} has been decompressed to {output_directory}")

CBG¶

In [13]:

Copied!

cbg = dega.pre.read_cbg_mtx(base_path + 'cell_feature_matrix/')
cbg
cbg = dega.pre.read_cbg_mtx(base_path + 'cell_feature_matrix/')
cbg

Reading mtx file from  data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/cell_feature_matrix/

Out[13]:

__index_level_0__	ABCC11	ACE2	ACKR1	ACTA2	ACTG2	ADAM28	ADAMTS1	ADGRE1	ADGRL4	ADH1C	...	UnassignedCodeword_0490	UnassignedCodeword_0491	UnassignedCodeword_0492	UnassignedCodeword_0493	UnassignedCodeword_0494	UnassignedCodeword_0495	UnassignedCodeword_0496	UnassignedCodeword_0497	UnassignedCodeword_0498	UnassignedCodeword_0499
0
aaaadnje-1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
aaacalai-1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
aaacjgil-1	0	0	0	0	1	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
aaacpcil-1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
aaadhocp-1	0	0	0	1	2	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
oiloppgp-1	0	0	0	0	1	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
oilpccne-1	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
oimacfoj-1	0	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0
oimaiaae-1	0	0	1	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
oimajkkk-1	0	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	0	0	0	0

140702 rows × 541 columns

In [14]:

Copied!

meta_gene_exp = dega.pre.calc_meta_gene_data(cbg)
meta_gene_exp = dega.pre.calc_meta_gene_data(cbg)

calculating mean expression from sparse float data
calculating variance by looping over rows

Gene Metadata¶

In [15]:

Copied!

path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'meta_gene.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)
path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'meta_gene.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)

Reading mtx file from  data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/cell_feature_matrix/
calculating mean expression from sparse float data
calculating variance by looping over rows

Cell-by-gene Files¶

In [16]:

Copied!

dega.pre.save_cbg_gene_parquets(path_landscape_files, cbg, verbose=True)
dega.pre.save_cbg_gene_parquets(path_landscape_files, cbg, verbose=True)

Processing gene 0: ABCC11
Processing gene 100: CLECL1
Processing gene 200: IL1RL1
Processing gene 300: RGS16
Processing gene 400: NegControlCodeword_0503
Processing gene 500: UnassignedCodeword_0459

Image Tiles¶

In [17]:

Copied!





import tifffile

# Path to your OME-TIFF file
file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'

# Open the OME-TIFF file and read the image data
with tifffile.TiffFile(file_path) as tif:
    series = tif.series[0]  # Assuming you are interested in the first series
    image_data = series.asarray()
import tifffile

# Path to your OME-TIFF file
file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'

# Open the OME-TIFF file and read the image data
with tifffile.TiffFile(file_path) as tif:
    series = tif.series[0]  # Assuming you are interested in the first series
    image_data = series.asarray()

<tifffile.TiffFile 'morphology_focus_0000.ome.tif'> OME series cannot read multi-file pyramids

In [18]:

Copied!

image_data.shape
image_data.shape

Out[18]:

(4, 13770, 34155)

DAPI¶

In [19]:

Copied!

# from skimage.io import imread
# from skimage.io import imread

In [20]:

Copied!

# image_scale = 1
# # file_path = f"{data_dir}/morphology_focus_0000.ome.tif"
# file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'

# img = imread(file_path)[...,0]

# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# image_scale = 1
# # file_path = f"{data_dir}/morphology_focus_0000.ome.tif"
# file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'

# img = imread(file_path)[...,0]

# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)

how can this be adapted to additional channels?

In [21]:

Copied!





# img = imread(file_path)[...,0]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'dapi', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,0]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'dapi', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )

In [22]:

Copied!





# img = imread(file_path)[...,1]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'bound', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,1]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'bound', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )

In [23]:

Copied!





# img = imread(file_path)[...,2]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'rna', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,2]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'rna', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )

In [24]:

Copied!





# img = imread(file_path)[...,3]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'prot', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,3]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
#     img_8bit, 
#     f"{path_landscape_files}/pyramid_images", 
#     'prot', 
#     clahe_tile_size=32, 
#     clahe_contrast_limit=60, 
#     suffix=".webp[Q=100]"
# )

In [25]:

Copied!

image_scale = 1.0
image_scale = 1.0

In [26]:

Copied!

suffix = '.webp[Q=100]'
suffix = '.webp[Q=100]'

In [27]:

Copied!





image_data_scaled = image_data[0,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'dapi', suffix=suffix)
image_data_scaled = image_data[0,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'dapi', suffix=suffix)

In [28]:

Copied!





image_data_scaled = image_data[1,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'bound', suffix=suffix)
image_data_scaled = image_data[1,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'bound', suffix=suffix)

In [29]:

Copied!





image_data_scaled = image_data[2,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'rna', suffix=suffix)
image_data_scaled = image_data[2,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'rna', suffix=suffix)

In [30]:

Copied!





image_data_scaled = image_data[3,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'prot', suffix=suffix)
image_data_scaled = image_data[3,:,:] * 2

# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'prot', suffix=suffix)

Cell Metadata¶

In [31]:

Copied!





# Function to open a Zarr file
def open_zarr(path: str) -> zarr.Group:
    store = (zarr.ZipStore(path, mode="r")
    if path.endswith(".zip")
    else zarr.DirectoryStore(path)
    )
    return zarr.group(store=store)

# For example, use the above function to open the cells Zarr file, which contains segmentation mask Zarr arrays
root = open_zarr(base_path + "cells.zarr.zip")

# # Look at group array info and structure
# root.info
# root.tree() # shows structure, array dimensions, data types
# Function to open a Zarr file
def open_zarr(path: str) -> zarr.Group:
    store = (zarr.ZipStore(path, mode="r")
    if path.endswith(".zip")
    else zarr.DirectoryStore(path)
    )
    return zarr.group(store=store)

# For example, use the above function to open the cells Zarr file, which contains segmentation mask Zarr arrays
root = open_zarr(base_path + "cells.zarr.zip")

# # Look at group array info and structure
# root.info
# root.tree() # shows structure, array dimensions, data types

In [32]:

Copied!

transformation_matrix = root['masks']['homogeneous_transform'][:]
transformation_matrix
transformation_matrix = root['masks']['homogeneous_transform'][:]
transformation_matrix

Out[32]:

array([[4.705882, 0.      , 0.      , 0.      ],
       [0.      , 4.705882, 0.      , 0.      ],
       [0.      , 0.      , 1.      , 0.      ],
       [0.      , 0.      , 0.      , 1.      ]], dtype=float32)

In [33]:

Copied!





pd.DataFrame(transformation_matrix[:3,:3]).to_csv(
    path_landscape_files + 'xenium_transform.csv', 
    sep=' ', 
    header=False, 
    index=False
)
pd.DataFrame(transformation_matrix[:3,:3]).to_csv(
    path_landscape_files + 'xenium_transform.csv', 
    sep=' ', 
    header=False, 
    index=False
)

In [34]:

Copied!

path_transformation_matrix = path_landscape_files + 'xenium_transform.csv'
path_meta_cell_micron = base_path + 'cells.csv.gz'
path_meta_cell_image = path_landscape_files + 'cell_metadata.parquet'
path_transformation_matrix = path_landscape_files + 'xenium_transform.csv'
path_meta_cell_micron = base_path + 'cells.csv.gz'
path_meta_cell_image = path_landscape_files + 'cell_metadata.parquet'

In [35]:

Copied!

default_clustering = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
default_clustering
default_clustering = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
default_clustering

Out[35]:

	Cluster
Barcode
aaaadnje-1	15
aaacalai-1	9
aaacjgil-1	15
aaacpcil-1	13
aaadhocp-1	18
...	...
oiloppgp-1	10
oilpccne-1	6
oimacfoj-1	10
oimaiaae-1	11
oimajkkk-1	23

140194 rows × 1 columns

Save cell metadata¶

In [36]:

Copied!





# do not including clustering information in default cell metadata
dega.pre.make_meta_cell_image_coord(
    'Xenium', 
    path_transformation_matrix, 
    path_meta_cell_micron, 
    path_meta_cell_image, 
    image_scale=image_scale
)
# do not including clustering information in default cell metadata
dega.pre.make_meta_cell_image_coord(
    'Xenium', 
    path_transformation_matrix, 
    path_meta_cell_micron, 
    path_meta_cell_image, 
    image_scale=image_scale
)

Save default clustering results¶

In [37]:

Copied!

if not os.path.exists(path_landscape_files + 'cell_clusters/'):
    os.mkdir(path_landscape_files + 'cell_clusters/')
if not os.path.exists(path_landscape_files + 'cell_clusters/'):
    os.mkdir(path_landscape_files + 'cell_clusters/')

In [38]:

Copied!

default_clustering = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering.head()
default_clustering = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering.head()

Out[38]:

	cluster
aaaadnje-1	15
aaacalai-1	9
aaacjgil-1	15
aaacpcil-1	13
aaadhocp-1	18

In [39]:

Copied!

default_clustering_ini = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering_ini.head()
default_clustering_ini = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering_ini.head()

Out[39]:

	cluster
aaaadnje-1	15
aaacalai-1	9
aaacjgil-1	15
aaacpcil-1	13
aaadhocp-1	18

In [40]:

Copied!

meta_cell = pd.read_parquet(path_landscape_files + 'cell_metadata.parquet')
meta_cell.shape
meta_cell = pd.read_parquet(path_landscape_files + 'cell_metadata.parquet')
meta_cell.shape

Out[40]:

(140702, 2)

In [41]:

Copied!

default_clustering_ini['cluster'] = default_clustering_ini['cluster'].astype('string')
default_clustering_ini['cluster'] = default_clustering_ini['cluster'].astype('string')

In [42]:

Copied!

default_clustering = pd.DataFrame(index=meta_cell.index.tolist())

default_clustering.loc[default_clustering_ini.index.tolist(), 'cluster'] = default_clustering_ini['cluster']
default_clustering = pd.DataFrame(index=meta_cell.index.tolist())

default_clustering.loc[default_clustering_ini.index.tolist(), 'cluster'] = default_clustering_ini['cluster']

In [43]:

Copied!

default_clustering.to_parquet(path_landscape_files + 'cell_clusters/cluster.parquet')
default_clustering.to_parquet(path_landscape_files + 'cell_clusters/cluster.parquet')

In [44]:

Copied!

df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']
df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']

In [45]:

Copied!





# dega.pre.make_meta_cell_image_coord(
#     'Xenium', 
#     path_transformation_matrix, 
#     path_meta_cell_micron, 
#     path_meta_cell_image, 
#     df_meta=df_meta
# )
# dega.pre.make_meta_cell_image_coord(
#     'Xenium', 
#     path_transformation_matrix, 
#     path_meta_cell_micron, 
#     path_meta_cell_image, 
#     df_meta=df_meta
# )

Cluster Colors¶

In [46]:

Copied!

ser_counts = default_clustering['cluster'].value_counts()
clusters = ser_counts.index.tolist()
ser_counts = default_clustering['cluster'].value_counts()
clusters = ser_counts.index.tolist()

In [47]:

Copied!





# Get all categorical color palettes from Matplotlib and flatten them into a single list of colors
palettes = [plt.get_cmap(name).colors for name in plt.colormaps() if "tab" in name]
flat_colors = [color for palette in palettes for color in palette]

# Convert RGB tuples to hex codes
flat_colors_hex = [to_hex(color) for color in flat_colors]

# Use modular arithmetic to assign a color to each gene, white for genes with "Blank"
colors = [
    flat_colors_hex[i % len(flat_colors_hex)] if "Blank" not in cluster else "#FFFFFF"
    for i, cluster in enumerate(clusters)
]

# Create a DataFrame with genes and their assigned colors
ser_color = pd.Series(colors, index=clusters, name='color')

meta_cluster = pd.DataFrame(ser_color)

meta_cluster['count'] = ser_counts

meta_cluster.to_parquet(path_landscape_files + 'cell_clusters/meta_cluster.parquet')
# Get all categorical color palettes from Matplotlib and flatten them into a single list of colors
palettes = [plt.get_cmap(name).colors for name in plt.colormaps() if "tab" in name]
flat_colors = [color for palette in palettes for color in palette]

# Convert RGB tuples to hex codes
flat_colors_hex = [to_hex(color) for color in flat_colors]

# Use modular arithmetic to assign a color to each gene, white for genes with "Blank"
colors = [
    flat_colors_hex[i % len(flat_colors_hex)] if "Blank" not in cluster else "#FFFFFF"
    for i, cluster in enumerate(clusters)
]

# Create a DataFrame with genes and their assigned colors
ser_color = pd.Series(colors, index=clusters, name='color')

meta_cluster = pd.DataFrame(ser_color)

meta_cluster['count'] = ser_counts

meta_cluster.to_parquet(path_landscape_files + 'cell_clusters/meta_cluster.parquet')

Transcripts¶

In [48]:

Copied!

tile_size = 200
tile_size = 200

In [49]:

Copied!





%%time 
technology = 'Xenium'
path_trx = base_path + 'transcripts.parquet'
path_trx_tiles = path_landscape_files + 'transcript_tiles'
tile_bounds = dega.pre.make_trx_tiles(
    'Xenium', 
    path_trx, 
    path_transformation_matrix, 
    path_trx_tiles,
    # tile_size=tile_size,
    # coarse_tile_size=tile_size * 10,
    tile_size=tile_size,    
    image_scale=image_scale
    # verbose=True
)
%%time 
technology = 'Xenium'
path_trx = base_path + 'transcripts.parquet'
path_trx_tiles = path_landscape_files + 'transcript_tiles'
tile_bounds = dega.pre.make_trx_tiles(
    'Xenium', 
    path_trx, 
    path_transformation_matrix, 
    path_trx_tiles,
    # tile_size=tile_size,
    # coarse_tile_size=tile_size * 10,
    tile_size=tile_size,    
    image_scale=image_scale
    # verbose=True
)

Processing chunks: 100%|████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 14.69it/s]
Processing coarse tiles: 126tile [00:17,  7.15tile/s]

CPU times: user 30.2 s, sys: 27 s, total: 57.1 s
Wall time: 18.8 s

Cell Boundaries¶

In [50]:

Copied!





%%time
path_cell_boundaries = base_path + 'cell_boundaries.parquet'
path_output = path_landscape_files + 'cell_segmentation'
dega.pre.make_cell_boundary_tiles(
    'Xenium',
    path_cell_boundaries, 
    path_meta_cell_micron, 
    path_transformation_matrix, 
    path_output,
    #coarse_tile_size=tile_size * 10,
    tile_size=tile_size,
    tile_bounds=tile_bounds,
    image_scale=image_scale
)
%%time
path_cell_boundaries = base_path + 'cell_boundaries.parquet'
path_output = path_landscape_files + 'cell_segmentation'
dega.pre.make_cell_boundary_tiles(
    'Xenium',
    path_cell_boundaries, 
    path_meta_cell_micron, 
    path_transformation_matrix, 
    path_output,
    #coarse_tile_size=tile_size * 10,
    tile_size=tile_size,
    tile_bounds=tile_bounds,
    image_scale=image_scale
)

Processing coarse tiles: 100%|██████████████████████████████████████████████████████████| 9/9 [00:13<00:00,  1.52s/it]

CPU times: user 23.1 s, sys: 7.1 s, total: 30.3 s
Wall time: 24.4 s

Gene Metadata¶

In [51]:

Copied!

path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'gene_metadata.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)
path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'gene_metadata.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)

Reading mtx file from  data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/cell_feature_matrix/
calculating mean expression from sparse float data
calculating variance by looping over rows

Max Zoom¶

In [52]:

Copied!

# Example usage:
path_image_pyramid = path_landscape_files + 'pyramid_images/dapi_files/'  # Change this to your actual directory path
max_pyramid_zoom = dega.pre.get_max_zoom_level(path_image_pyramid)

print(max_pyramid_zoom)
# Example usage:
path_image_pyramid = path_landscape_files + 'pyramid_images/dapi_files/'  # Change this to your actual directory path
max_pyramid_zoom = dega.pre.get_max_zoom_level(path_image_pyramid)

print(max_pyramid_zoom)

Cluster Gene Expression¶

In [53]:

Copied!





usecols = ['cell_id', 'x_centroid', 'y_centroid']
meta_cell = pd.read_csv(base_path + 'cells.csv.gz', index_col=0, usecols=usecols)
meta_cell.columns = ['center_x', 'center_y']
meta_cell
usecols = ['cell_id', 'x_centroid', 'y_centroid']
meta_cell = pd.read_csv(base_path + 'cells.csv.gz', index_col=0, usecols=usecols)
meta_cell.columns = ['center_x', 'center_y']
meta_cell

Out[53]:

	center_x	center_y
cell_id
aaaadnje-1	446.326691	1701.357300
aaacalai-1	441.307831	1735.877930
aaacjgil-1	466.053192	1712.259766
aaacpcil-1	430.858093	1707.464600
aaadhocp-1	476.111145	1711.089355
...	...	...
oiloppgp-1	6082.675781	555.142883
oilpccne-1	6106.899414	494.951843
oimacfoj-1	6080.991211	626.742126
oimaiaae-1	6030.594727	536.503418
oimajkkk-1	6022.637207	573.784302

140702 rows × 2 columns

In [54]:

Copied!

df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']
df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']

In [55]:

Copied!

meta_cell['cluster'] = df_meta['cluster']
meta_cell['cluster'] = df_meta['cluster']

In [56]:

Copied!





list_ser = []
for inst_cat in meta_cell['cluster'].unique().tolist():
    if inst_cat is not None:
        inst_cells = meta_cell[meta_cell['cluster'] == inst_cat].index.tolist()
        # print(inst_cat, len(inst_cells))

        inst_ser = cbg.loc[inst_cells].sum()/len(inst_cells)
        inst_ser.name = inst_cat

        list_ser.append(inst_ser)

df_sig = pd.concat(list_ser, axis=1)    
list_ser = []
for inst_cat in meta_cell['cluster'].unique().tolist():
    if inst_cat is not None:
        inst_cells = meta_cell[meta_cell['cluster'] == inst_cat].index.tolist()
        # print(inst_cat, len(inst_cells))

        inst_ser = cbg.loc[inst_cells].sum()/len(inst_cells)
        inst_ser.name = inst_cat

        list_ser.append(inst_ser)

df_sig = pd.concat(list_ser, axis=1)    

In [57]:

Copied!





df_sig = pd.concat(list_ser, axis=1)
# handling weird behavior where there is a multiindex it appears
df_sig.columns = df_sig.columns.tolist()
df_sig.index = df_sig.index.tolist()
df_sig = pd.concat(list_ser, axis=1)
# handling weird behavior where there is a multiindex it appears
df_sig.columns = df_sig.columns.tolist()
df_sig.index = df_sig.index.tolist()

In [58]:

Copied!





keep_genes = df_sig.index.tolist()
keep_genes = [x for x in keep_genes if 'Unassigned' not in x]
keep_genes = [x for x in keep_genes if 'NegControl' not in x]
keep_genes = [x for x in keep_genes if 'DeprecatedCodeword' not in x]
len(keep_genes)

df_sig = df_sig.loc[keep_genes, clusters]
df_sig.shape
keep_genes = df_sig.index.tolist()
keep_genes = [x for x in keep_genes if 'Unassigned' not in x]
keep_genes = [x for x in keep_genes if 'NegControl' not in x]
keep_genes = [x for x in keep_genes if 'DeprecatedCodeword' not in x]
len(keep_genes)

df_sig = df_sig.loc[keep_genes, clusters]
df_sig.shape

Out[58]:

(377, 28)

In [59]:

Copied!

df_sig.sparse.to_dense().to_parquet(path_landscape_files + 'df_sig.parquet')
df_sig.sparse.to_dense().to_parquet(path_landscape_files + 'df_sig.parquet')

Save Landscape Parameters JSON¶

In [60]:

Copied!





image_info =  [
        {
            "name": "dapi",
            "button_name": "DAPI",
            "color": [
                0,
                0,
                255
            ]
        },
        {
            "name": "bound",
            "button_name": "BOUND",
            "color": [
                0,
                255,
                0
            ]
        },
        {
            "name": "rna",
            "button_name": "RNA",
            "color": [
                255,
                0,
                0
            ]
        },
        {
            "name": "prot",
            "button_name": "PROT",
            "color": [
                255,
                255,
                255
            ]
        }
    ]
image_info =  [
        {
            "name": "dapi",
            "button_name": "DAPI",
            "color": [
                0,
                0,
                255
            ]
        },
        {
            "name": "bound",
            "button_name": "BOUND",
            "color": [
                0,
                255,
                0
            ]
        },
        {
            "name": "rna",
            "button_name": "RNA",
            "color": [
                255,
                0,
                0
            ]
        },
        {
            "name": "prot",
            "button_name": "PROT",
            "color": [
                255,
                255,
                255
            ]
        }
    ]

In [61]:

Copied!





dega.pre.save_landscape_parameters(
    'Xenium', 
    path_landscape_files,
    'dapi_files',
    tile_size=tile_size,
    image_info=image_info,
    image_format='.webp'
)
dega.pre.save_landscape_parameters(
    'Xenium', 
    path_landscape_files,
    'dapi_files',
    tile_size=tile_size,
    image_info=image_info,
    image_format='.webp'
)

data/xenium_landscapes/Xenium_V1_human_Pancreas_FFPE_outs_sparse//pyramid_images/dapi_files

In [ ]: