Pre-process_Xenium_V1_human_Pancreas_FFPE_outs¶
In [1]:
Copied!
%load_ext autoreload
%autoreload 2
%env ANYWIDGET_HMR=1
%load_ext autoreload
%autoreload 2
%env ANYWIDGET_HMR=1
env: ANYWIDGET_HMR=1
In [2]:
Copied!
import numpy as np
import pandas as pd
# macOS requirement
import os
os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/lib:' + os.environ.get('DYLD_LIBRARY_PATH', '')
import celldega as dega
import tifffile
import zarr
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex
import geopandas as gpd
import shapely
import tarfile
dega.__version__
import numpy as np
import pandas as pd
# macOS requirement
import os
os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/lib:' + os.environ.get('DYLD_LIBRARY_PATH', '')
import celldega as dega
import tifffile
import zarr
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex
import geopandas as gpd
import shapely
import tarfile
dega.__version__
merged in latest changes
Out[2]:
'0.5.4'
In [3]:
Copied!
ls ../data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/
ls ../data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/
ls: ../data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/: No such file or directory
In [4]:
Copied!
ls ../data/xenium_landscapes/
ls ../data/xenium_landscapes/
Landscape_Xenium_V1_human_Pancreas_FFPE_outs_backup/ Landscape_Xenium_V1_human_Pancreas_FFPE_outs_png/ Landscape_Xenium_V1_human_Pancreas_FFPE_outs_webp/ Xenium_Prime_Human_Lymph_Node_Reactive_FFPE_outs/ Xenium_Prime_Human_Lymph_Node_Reactive_FFPE_outs_landscape_files/ Xenium_Prime_Human_Prostate_FFPE_outs/ Xenium_Prime_Human_Skin_FFPE_outs_original/ Xenium_V1_hBoneMarrow_nondiseased_section_outs_landscape_files/ Xenium_V1_hBoneMarrow_nondiseased_section_outs_unscaled/
In [5]:
Copied!
dataset_name = 'Xenium_V1_human_Pancreas_FFPE_outs'
dataset_name = 'Xenium_V1_human_Pancreas_FFPE_outs'
In [6]:
Copied!
base_path = 'data/xenium_data/' + dataset_name + '/'
base_path = 'data/xenium_data/' + dataset_name + '/'
In [7]:
Copied!
path_landscape_files = 'data/xenium_landscapes/' + dataset_name + '_sparse/'
path_landscape_files = 'data/xenium_landscapes/' + dataset_name + '_sparse/'
In [8]:
Copied!
base_path
base_path
Out[8]:
'data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/'
In [9]:
Copied!
path_landscape_files
path_landscape_files
Out[9]:
'data/xenium_landscapes/Xenium_V1_human_Pancreas_FFPE_outs_sparse/'
In [10]:
Copied!
if not os.path.exists(path_landscape_files):
os.mkdir(path_landscape_files)
if not os.path.exists(path_landscape_files):
os.mkdir(path_landscape_files)
Unzip Xenium Data¶
Decompress Cell Feature Matrix MTX Files¶
In [11]:
Copied!
# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'cell_feature_matrix.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files
# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
# # Extract all contents to the specified directory
# tar.extractall(path=output_directory)
# print(f"File {tar_file_path} has been decompressed to {output_directory}")
# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'cell_feature_matrix.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files
# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
# # Extract all contents to the specified directory
# tar.extractall(path=output_directory)
# print(f"File {tar_file_path} has been decompressed to {output_directory}")
Decompress Xenium Analysis Files¶
In [12]:
Copied!
# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'analysis.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files
# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
# # Extract all contents to the specified directory
# tar.extractall(path=output_directory)
# print(f"File {tar_file_path} has been decompressed to {output_directory}")
# # Path to the tar.gz file you want to decompress
# tar_file_path = base_path + 'analysis.tar.gz'
# # Path to the directory where you want to extract the contents
# output_directory = path_landscape_files
# # Open the tar.gz file
# with tarfile.open(tar_file_path, "r:gz") as tar:
# # Extract all contents to the specified directory
# tar.extractall(path=output_directory)
# print(f"File {tar_file_path} has been decompressed to {output_directory}")
CBG¶
In [13]:
Copied!
cbg = dega.pre.read_cbg_mtx(base_path + 'cell_feature_matrix/')
cbg
cbg = dega.pre.read_cbg_mtx(base_path + 'cell_feature_matrix/')
cbg
Reading mtx file from data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/cell_feature_matrix/
Out[13]:
__index_level_0__ | ABCC11 | ACE2 | ACKR1 | ACTA2 | ACTG2 | ADAM28 | ADAMTS1 | ADGRE1 | ADGRL4 | ADH1C | ... | UnassignedCodeword_0490 | UnassignedCodeword_0491 | UnassignedCodeword_0492 | UnassignedCodeword_0493 | UnassignedCodeword_0494 | UnassignedCodeword_0495 | UnassignedCodeword_0496 | UnassignedCodeword_0497 | UnassignedCodeword_0498 | UnassignedCodeword_0499 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | |||||||||||||||||||||
aaaadnje-1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
aaacalai-1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
aaacjgil-1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
aaacpcil-1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
aaadhocp-1 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
oiloppgp-1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
oilpccne-1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
oimacfoj-1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
oimaiaae-1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
oimajkkk-1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
140702 rows × 541 columns
In [14]:
Copied!
meta_gene_exp = dega.pre.calc_meta_gene_data(cbg)
meta_gene_exp = dega.pre.calc_meta_gene_data(cbg)
calculating mean expression from sparse float data calculating variance by looping over rows
Gene Metadata¶
In [15]:
Copied!
path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'meta_gene.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)
path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'meta_gene.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)
Reading mtx file from data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/cell_feature_matrix/ calculating mean expression from sparse float data calculating variance by looping over rows
Cell-by-gene Files¶
In [16]:
Copied!
dega.pre.save_cbg_gene_parquets(path_landscape_files, cbg, verbose=True)
dega.pre.save_cbg_gene_parquets(path_landscape_files, cbg, verbose=True)
Processing gene 0: ABCC11 Processing gene 100: CLECL1 Processing gene 200: IL1RL1 Processing gene 300: RGS16 Processing gene 400: NegControlCodeword_0503 Processing gene 500: UnassignedCodeword_0459
Image Tiles¶
In [17]:
Copied!
import tifffile
# Path to your OME-TIFF file
file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'
# Open the OME-TIFF file and read the image data
with tifffile.TiffFile(file_path) as tif:
series = tif.series[0] # Assuming you are interested in the first series
image_data = series.asarray()
import tifffile
# Path to your OME-TIFF file
file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'
# Open the OME-TIFF file and read the image data
with tifffile.TiffFile(file_path) as tif:
series = tif.series[0] # Assuming you are interested in the first series
image_data = series.asarray()
<tifffile.TiffFile 'morphology_focus_0000.ome.tif'> OME series cannot read multi-file pyramids
In [18]:
Copied!
image_data.shape
image_data.shape
Out[18]:
(4, 13770, 34155)
DAPI¶
In [19]:
Copied!
# from skimage.io import imread
# from skimage.io import imread
In [20]:
Copied!
# image_scale = 1
# # file_path = f"{data_dir}/morphology_focus_0000.ome.tif"
# file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'
# img = imread(file_path)[...,0]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# image_scale = 1
# # file_path = f"{data_dir}/morphology_focus_0000.ome.tif"
# file_path = base_path + 'morphology_focus/morphology_focus_0000.ome.tif'
# img = imread(file_path)[...,0]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
how can this be adapted to additional channels?
In [21]:
Copied!
# img = imread(file_path)[...,0]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'dapi',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,0]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'dapi',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
In [22]:
Copied!
# img = imread(file_path)[...,1]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'bound',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,1]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'bound',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
In [23]:
Copied!
# img = imread(file_path)[...,2]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'rna',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,2]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'rna',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
In [24]:
Copied!
# img = imread(file_path)[...,3]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'prot',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
# img = imread(file_path)[...,3]
# img_8bit = dega.pre.check_and_convert_16_to_8_bit(img)
# dega.pre.make_deepzoom_pyramid(
# img_8bit,
# f"{path_landscape_files}/pyramid_images",
# 'prot',
# clahe_tile_size=32,
# clahe_contrast_limit=60,
# suffix=".webp[Q=100]"
# )
In [25]:
Copied!
image_scale = 1.0
image_scale = 1.0
In [26]:
Copied!
suffix = '.webp[Q=100]'
suffix = '.webp[Q=100]'
In [27]:
Copied!
image_data_scaled = image_data[0,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'dapi', suffix=suffix)
image_data_scaled = image_data[0,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'dapi', suffix=suffix)
In [28]:
Copied!
image_data_scaled = image_data[1,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'bound', suffix=suffix)
image_data_scaled = image_data[1,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'bound', suffix=suffix)
In [29]:
Copied!
image_data_scaled = image_data[2,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'rna', suffix=suffix)
image_data_scaled = image_data[2,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'rna', suffix=suffix)
In [30]:
Copied!
image_data_scaled = image_data[3,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'prot', suffix=suffix)
image_data_scaled = image_data[3,:,:] * 2
# Save the image data to a regular TIFF file without compression
tifffile.imwrite(path_landscape_files + 'output_regular.tif', image_data_scaled, compression=None)
image_ds = dega.pre.reduce_image_size(path_landscape_files + 'output_regular.tif', image_scale, path_landscape_files)
image_png = dega.pre.convert_to_png(image_ds)
dega.pre.make_deepzoom_pyramid(image_png, path_landscape_files + 'pyramid_images/', 'prot', suffix=suffix)
Cell Metadata¶
In [31]:
Copied!
# Function to open a Zarr file
def open_zarr(path: str) -> zarr.Group:
store = (zarr.ZipStore(path, mode="r")
if path.endswith(".zip")
else zarr.DirectoryStore(path)
)
return zarr.group(store=store)
# For example, use the above function to open the cells Zarr file, which contains segmentation mask Zarr arrays
root = open_zarr(base_path + "cells.zarr.zip")
# # Look at group array info and structure
# root.info
# root.tree() # shows structure, array dimensions, data types
# Function to open a Zarr file
def open_zarr(path: str) -> zarr.Group:
store = (zarr.ZipStore(path, mode="r")
if path.endswith(".zip")
else zarr.DirectoryStore(path)
)
return zarr.group(store=store)
# For example, use the above function to open the cells Zarr file, which contains segmentation mask Zarr arrays
root = open_zarr(base_path + "cells.zarr.zip")
# # Look at group array info and structure
# root.info
# root.tree() # shows structure, array dimensions, data types
In [32]:
Copied!
transformation_matrix = root['masks']['homogeneous_transform'][:]
transformation_matrix
transformation_matrix = root['masks']['homogeneous_transform'][:]
transformation_matrix
Out[32]:
array([[4.705882, 0. , 0. , 0. ], [0. , 4.705882, 0. , 0. ], [0. , 0. , 1. , 0. ], [0. , 0. , 0. , 1. ]], dtype=float32)
In [33]:
Copied!
pd.DataFrame(transformation_matrix[:3,:3]).to_csv(
path_landscape_files + 'xenium_transform.csv',
sep=' ',
header=False,
index=False
)
pd.DataFrame(transformation_matrix[:3,:3]).to_csv(
path_landscape_files + 'xenium_transform.csv',
sep=' ',
header=False,
index=False
)
In [34]:
Copied!
path_transformation_matrix = path_landscape_files + 'xenium_transform.csv'
path_meta_cell_micron = base_path + 'cells.csv.gz'
path_meta_cell_image = path_landscape_files + 'cell_metadata.parquet'
path_transformation_matrix = path_landscape_files + 'xenium_transform.csv'
path_meta_cell_micron = base_path + 'cells.csv.gz'
path_meta_cell_image = path_landscape_files + 'cell_metadata.parquet'
In [35]:
Copied!
default_clustering = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
default_clustering
default_clustering = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
default_clustering
Out[35]:
Cluster | |
---|---|
Barcode | |
aaaadnje-1 | 15 |
aaacalai-1 | 9 |
aaacjgil-1 | 15 |
aaacpcil-1 | 13 |
aaadhocp-1 | 18 |
... | ... |
oiloppgp-1 | 10 |
oilpccne-1 | 6 |
oimacfoj-1 | 10 |
oimaiaae-1 | 11 |
oimajkkk-1 | 23 |
140194 rows × 1 columns
Save cell metadata¶
In [36]:
Copied!
# do not including clustering information in default cell metadata
dega.pre.make_meta_cell_image_coord(
'Xenium',
path_transformation_matrix,
path_meta_cell_micron,
path_meta_cell_image,
image_scale=image_scale
)
# do not including clustering information in default cell metadata
dega.pre.make_meta_cell_image_coord(
'Xenium',
path_transformation_matrix,
path_meta_cell_micron,
path_meta_cell_image,
image_scale=image_scale
)
Save default clustering results¶
In [37]:
Copied!
if not os.path.exists(path_landscape_files + 'cell_clusters/'):
os.mkdir(path_landscape_files + 'cell_clusters/')
if not os.path.exists(path_landscape_files + 'cell_clusters/'):
os.mkdir(path_landscape_files + 'cell_clusters/')
In [38]:
Copied!
default_clustering = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering.head()
default_clustering = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering.head()
Out[38]:
cluster | |
---|---|
aaaadnje-1 | 15 |
aaacalai-1 | 9 |
aaacjgil-1 | 15 |
aaacpcil-1 | 13 |
aaadhocp-1 | 18 |
In [39]:
Copied!
default_clustering_ini = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering_ini.head()
default_clustering_ini = pd.DataFrame(default_clustering.values, index=default_clustering.index.tolist(), columns=['cluster'])
default_clustering_ini.head()
Out[39]:
cluster | |
---|---|
aaaadnje-1 | 15 |
aaacalai-1 | 9 |
aaacjgil-1 | 15 |
aaacpcil-1 | 13 |
aaadhocp-1 | 18 |
In [40]:
Copied!
meta_cell = pd.read_parquet(path_landscape_files + 'cell_metadata.parquet')
meta_cell.shape
meta_cell = pd.read_parquet(path_landscape_files + 'cell_metadata.parquet')
meta_cell.shape
Out[40]:
(140702, 2)
In [41]:
Copied!
default_clustering_ini['cluster'] = default_clustering_ini['cluster'].astype('string')
default_clustering_ini['cluster'] = default_clustering_ini['cluster'].astype('string')
In [42]:
Copied!
default_clustering = pd.DataFrame(index=meta_cell.index.tolist())
default_clustering.loc[default_clustering_ini.index.tolist(), 'cluster'] = default_clustering_ini['cluster']
default_clustering = pd.DataFrame(index=meta_cell.index.tolist())
default_clustering.loc[default_clustering_ini.index.tolist(), 'cluster'] = default_clustering_ini['cluster']
In [43]:
Copied!
default_clustering.to_parquet(path_landscape_files + 'cell_clusters/cluster.parquet')
default_clustering.to_parquet(path_landscape_files + 'cell_clusters/cluster.parquet')
In [44]:
Copied!
df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']
df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']
In [45]:
Copied!
# dega.pre.make_meta_cell_image_coord(
# 'Xenium',
# path_transformation_matrix,
# path_meta_cell_micron,
# path_meta_cell_image,
# df_meta=df_meta
# )
# dega.pre.make_meta_cell_image_coord(
# 'Xenium',
# path_transformation_matrix,
# path_meta_cell_micron,
# path_meta_cell_image,
# df_meta=df_meta
# )
Cluster Colors¶
In [46]:
Copied!
ser_counts = default_clustering['cluster'].value_counts()
clusters = ser_counts.index.tolist()
ser_counts = default_clustering['cluster'].value_counts()
clusters = ser_counts.index.tolist()
In [47]:
Copied!
# Get all categorical color palettes from Matplotlib and flatten them into a single list of colors
palettes = [plt.get_cmap(name).colors for name in plt.colormaps() if "tab" in name]
flat_colors = [color for palette in palettes for color in palette]
# Convert RGB tuples to hex codes
flat_colors_hex = [to_hex(color) for color in flat_colors]
# Use modular arithmetic to assign a color to each gene, white for genes with "Blank"
colors = [
flat_colors_hex[i % len(flat_colors_hex)] if "Blank" not in cluster else "#FFFFFF"
for i, cluster in enumerate(clusters)
]
# Create a DataFrame with genes and their assigned colors
ser_color = pd.Series(colors, index=clusters, name='color')
meta_cluster = pd.DataFrame(ser_color)
meta_cluster['count'] = ser_counts
meta_cluster.to_parquet(path_landscape_files + 'cell_clusters/meta_cluster.parquet')
# Get all categorical color palettes from Matplotlib and flatten them into a single list of colors
palettes = [plt.get_cmap(name).colors for name in plt.colormaps() if "tab" in name]
flat_colors = [color for palette in palettes for color in palette]
# Convert RGB tuples to hex codes
flat_colors_hex = [to_hex(color) for color in flat_colors]
# Use modular arithmetic to assign a color to each gene, white for genes with "Blank"
colors = [
flat_colors_hex[i % len(flat_colors_hex)] if "Blank" not in cluster else "#FFFFFF"
for i, cluster in enumerate(clusters)
]
# Create a DataFrame with genes and their assigned colors
ser_color = pd.Series(colors, index=clusters, name='color')
meta_cluster = pd.DataFrame(ser_color)
meta_cluster['count'] = ser_counts
meta_cluster.to_parquet(path_landscape_files + 'cell_clusters/meta_cluster.parquet')
Transcripts¶
In [48]:
Copied!
tile_size = 200
tile_size = 200
In [49]:
Copied!
%%time
technology = 'Xenium'
path_trx = base_path + 'transcripts.parquet'
path_trx_tiles = path_landscape_files + 'transcript_tiles'
tile_bounds = dega.pre.make_trx_tiles(
'Xenium',
path_trx,
path_transformation_matrix,
path_trx_tiles,
# tile_size=tile_size,
# coarse_tile_size=tile_size * 10,
tile_size=tile_size,
image_scale=image_scale
# verbose=True
)
%%time
technology = 'Xenium'
path_trx = base_path + 'transcripts.parquet'
path_trx_tiles = path_landscape_files + 'transcript_tiles'
tile_bounds = dega.pre.make_trx_tiles(
'Xenium',
path_trx,
path_transformation_matrix,
path_trx_tiles,
# tile_size=tile_size,
# coarse_tile_size=tile_size * 10,
tile_size=tile_size,
image_scale=image_scale
# verbose=True
)
Processing chunks: 100%|████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 14.69it/s] Processing coarse tiles: 126tile [00:17, 7.15tile/s]
CPU times: user 30.2 s, sys: 27 s, total: 57.1 s Wall time: 18.8 s
Cell Boundaries¶
In [50]:
Copied!
%%time
path_cell_boundaries = base_path + 'cell_boundaries.parquet'
path_output = path_landscape_files + 'cell_segmentation'
dega.pre.make_cell_boundary_tiles(
'Xenium',
path_cell_boundaries,
path_meta_cell_micron,
path_transformation_matrix,
path_output,
#coarse_tile_size=tile_size * 10,
tile_size=tile_size,
tile_bounds=tile_bounds,
image_scale=image_scale
)
%%time
path_cell_boundaries = base_path + 'cell_boundaries.parquet'
path_output = path_landscape_files + 'cell_segmentation'
dega.pre.make_cell_boundary_tiles(
'Xenium',
path_cell_boundaries,
path_meta_cell_micron,
path_transformation_matrix,
path_output,
#coarse_tile_size=tile_size * 10,
tile_size=tile_size,
tile_bounds=tile_bounds,
image_scale=image_scale
)
Processing coarse tiles: 100%|██████████████████████████████████████████████████████████| 9/9 [00:13<00:00, 1.52s/it]
CPU times: user 23.1 s, sys: 7.1 s, total: 30.3 s Wall time: 24.4 s
Gene Metadata¶
In [51]:
Copied!
path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'gene_metadata.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)
path_cbg = base_path + 'cell_feature_matrix/'
path_output = path_landscape_files + 'gene_metadata.parquet'
dega.pre.make_meta_gene('Xenium', path_cbg, path_output)
Reading mtx file from data/xenium_data/Xenium_V1_human_Pancreas_FFPE_outs/cell_feature_matrix/ calculating mean expression from sparse float data calculating variance by looping over rows
Max Zoom¶
In [52]:
Copied!
# Example usage:
path_image_pyramid = path_landscape_files + 'pyramid_images/dapi_files/' # Change this to your actual directory path
max_pyramid_zoom = dega.pre.get_max_zoom_level(path_image_pyramid)
print(max_pyramid_zoom)
# Example usage:
path_image_pyramid = path_landscape_files + 'pyramid_images/dapi_files/' # Change this to your actual directory path
max_pyramid_zoom = dega.pre.get_max_zoom_level(path_image_pyramid)
print(max_pyramid_zoom)
16
Cluster Gene Expression¶
In [53]:
Copied!
usecols = ['cell_id', 'x_centroid', 'y_centroid']
meta_cell = pd.read_csv(base_path + 'cells.csv.gz', index_col=0, usecols=usecols)
meta_cell.columns = ['center_x', 'center_y']
meta_cell
usecols = ['cell_id', 'x_centroid', 'y_centroid']
meta_cell = pd.read_csv(base_path + 'cells.csv.gz', index_col=0, usecols=usecols)
meta_cell.columns = ['center_x', 'center_y']
meta_cell
Out[53]:
center_x | center_y | |
---|---|---|
cell_id | ||
aaaadnje-1 | 446.326691 | 1701.357300 |
aaacalai-1 | 441.307831 | 1735.877930 |
aaacjgil-1 | 466.053192 | 1712.259766 |
aaacpcil-1 | 430.858093 | 1707.464600 |
aaadhocp-1 | 476.111145 | 1711.089355 |
... | ... | ... |
oiloppgp-1 | 6082.675781 | 555.142883 |
oilpccne-1 | 6106.899414 | 494.951843 |
oimacfoj-1 | 6080.991211 | 626.742126 |
oimaiaae-1 | 6030.594727 | 536.503418 |
oimajkkk-1 | 6022.637207 | 573.784302 |
140702 rows × 2 columns
In [54]:
Copied!
df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']
df_meta = pd.read_csv(base_path + 'analysis/clustering/gene_expression_graphclust/clusters.csv', index_col=0)
df_meta['Cluster'] = df_meta['Cluster'].astype('string')
df_meta.columns = ['cluster']
In [55]:
Copied!
meta_cell['cluster'] = df_meta['cluster']
meta_cell['cluster'] = df_meta['cluster']
In [56]:
Copied!
list_ser = []
for inst_cat in meta_cell['cluster'].unique().tolist():
if inst_cat is not None:
inst_cells = meta_cell[meta_cell['cluster'] == inst_cat].index.tolist()
# print(inst_cat, len(inst_cells))
inst_ser = cbg.loc[inst_cells].sum()/len(inst_cells)
inst_ser.name = inst_cat
list_ser.append(inst_ser)
df_sig = pd.concat(list_ser, axis=1)
list_ser = []
for inst_cat in meta_cell['cluster'].unique().tolist():
if inst_cat is not None:
inst_cells = meta_cell[meta_cell['cluster'] == inst_cat].index.tolist()
# print(inst_cat, len(inst_cells))
inst_ser = cbg.loc[inst_cells].sum()/len(inst_cells)
inst_ser.name = inst_cat
list_ser.append(inst_ser)
df_sig = pd.concat(list_ser, axis=1)
In [57]:
Copied!
df_sig = pd.concat(list_ser, axis=1)
# handling weird behavior where there is a multiindex it appears
df_sig.columns = df_sig.columns.tolist()
df_sig.index = df_sig.index.tolist()
df_sig = pd.concat(list_ser, axis=1)
# handling weird behavior where there is a multiindex it appears
df_sig.columns = df_sig.columns.tolist()
df_sig.index = df_sig.index.tolist()
In [58]:
Copied!
keep_genes = df_sig.index.tolist()
keep_genes = [x for x in keep_genes if 'Unassigned' not in x]
keep_genes = [x for x in keep_genes if 'NegControl' not in x]
keep_genes = [x for x in keep_genes if 'DeprecatedCodeword' not in x]
len(keep_genes)
df_sig = df_sig.loc[keep_genes, clusters]
df_sig.shape
keep_genes = df_sig.index.tolist()
keep_genes = [x for x in keep_genes if 'Unassigned' not in x]
keep_genes = [x for x in keep_genes if 'NegControl' not in x]
keep_genes = [x for x in keep_genes if 'DeprecatedCodeword' not in x]
len(keep_genes)
df_sig = df_sig.loc[keep_genes, clusters]
df_sig.shape
Out[58]:
(377, 28)
In [59]:
Copied!
df_sig.sparse.to_dense().to_parquet(path_landscape_files + 'df_sig.parquet')
df_sig.sparse.to_dense().to_parquet(path_landscape_files + 'df_sig.parquet')
Save Landscape Parameters JSON¶
In [60]:
Copied!
image_info = [
{
"name": "dapi",
"button_name": "DAPI",
"color": [
0,
0,
255
]
},
{
"name": "bound",
"button_name": "BOUND",
"color": [
0,
255,
0
]
},
{
"name": "rna",
"button_name": "RNA",
"color": [
255,
0,
0
]
},
{
"name": "prot",
"button_name": "PROT",
"color": [
255,
255,
255
]
}
]
image_info = [
{
"name": "dapi",
"button_name": "DAPI",
"color": [
0,
0,
255
]
},
{
"name": "bound",
"button_name": "BOUND",
"color": [
0,
255,
0
]
},
{
"name": "rna",
"button_name": "RNA",
"color": [
255,
0,
0
]
},
{
"name": "prot",
"button_name": "PROT",
"color": [
255,
255,
255
]
}
]
In [61]:
Copied!
dega.pre.save_landscape_parameters(
'Xenium',
path_landscape_files,
'dapi_files',
tile_size=tile_size,
image_info=image_info,
image_format='.webp'
)
dega.pre.save_landscape_parameters(
'Xenium',
path_landscape_files,
'dapi_files',
tile_size=tile_size,
image_info=image_info,
image_format='.webp'
)
data/xenium_landscapes/Xenium_V1_human_Pancreas_FFPE_outs_sparse//pyramid_images/dapi_files
In [ ]:
Copied!
In [ ]:
Copied!