{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import loompy for working with the Loom files and pandas to make a new dataframe\n", "import loompy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/ekiernan/Desktop\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Assign the project metadata manifest file to it's own variable\n", "metadata = pandas.read_csv(\"/Users/ekiernan/Desktop/Matrix_doc_improvements/HumanTissueTcellActivation 2021-07-03 03.29 (1).txt\", sep=\"\\t\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
source_idsource_specbundle_uuidbundle_versionfile_document_idfile_typefile_namefile_formatread_indexfile_size...organoid.provenance.document_idorganoid.biomaterial_core.biomaterial_idorganoid.model_organorganoid.model_organ_part_entity_typesample.provenance.document_idsample.biomaterial_core.biomaterial_idsequencing_input.provenance.document_idsequencing_input.biomaterial_core.biomaterial_idsequencing_input_type
06f1987da-5cbf-492c-b930-fd24891099a8tdr:broad-datarepo-terra-prod-hca2:snapshot/hc...5395ccdf-03f7-4955-90b1-192b1b297e5e2019-09-13T18:02:37.651387Z0bcb3617-9a3e-4c16-88da-d20a95457c79sequence_filePP019_R1.fastq.gzfastq.gzread17332137699...NaNNaNNaNNaNspecimensedd28ba8-0bb3-476c-9079-d01b939617b0PP01971785992-f5eb-41e2-9c0c-9435d2fa33aaPP019_suspensioncell_suspension
16f1987da-5cbf-492c-b930-fd24891099a8tdr:broad-datarepo-terra-prod-hca2:snapshot/hc...67295473-f856-403f-b893-b849f0b781fa2019-09-13T18:02:37.639820Z0fa77cad-2133-4e93-b9d6-d06a6f6771desequence_filePP013_R1.fastq.gzfastq.gzread16874754868...NaNNaNNaNNaNspecimens85a0036b-fb11-40b5-b805-0af94bceee23PP01378b08aea-c194-4aa1-9e2e-56df9649bef1PP013_suspensioncell_suspension
26f1987da-5cbf-492c-b930-fd24891099a8tdr:broad-datarepo-terra-prod-hca2:snapshot/hc...f3b7ca6b-84a3-43ba-afad-a78beae7c9272019-09-13T18:02:37.638486Z0fe2dfb1-a7ca-4faf-bcb4-1cb3c89756desequence_filePP006_R2.fastq.gzfastq.gzread227601166881...NaNNaNNaNNaNspecimensc2fed28d-cdd4-4fb4-bb94-5681dc99f52dPP006cf7767c4-1daa-4d43-8155-65f30325e936PP006_suspensioncell_suspension
36f1987da-5cbf-492c-b930-fd24891099a8tdr:broad-datarepo-terra-prod-hca2:snapshot/hc...e3ecdfc2-4454-5be3-8ea9-4475a04d1b702021-02-02T23:55:00.000000Z11413ad3-0fa4-5e32-b518-965c28b86e2canalysis_filec763f679-e13d-4f81-844f-c2c80fc90f46.bambamNaN23812202516...NaNNaNNaNNaNspecimens55c59b93-6ce2-4d53-a7b8-5573d4f962fbPP003849419b1-77af-43aa-8ce9-6438ddee7420PP003_suspensioncell_suspension
46f1987da-5cbf-492c-b930-fd24891099a8tdr:broad-datarepo-terra-prod-hca2:snapshot/hc...8e850d2d-0b76-501d-913e-8b92eb761d292021-02-02T23:50:00.000000Z11abfd14-f28a-533f-a06a-a0dbf278de80analysis_file3ddf143f-36bd-49c5-9bbf-f5b71e384063.bambamNaN30307807764...NaNNaNNaNNaNspecimens03a73511-bdeb-47e0-8c1d-588fdbe8bb66PP0201cf9b45c-268c-4934-9203-f9e16a90d46dPP020_suspensioncell_suspension
\n", "

5 rows × 55 columns

\n", "
" ], "text/plain": [ " source_id \\\n", "0 6f1987da-5cbf-492c-b930-fd24891099a8 \n", "1 6f1987da-5cbf-492c-b930-fd24891099a8 \n", "2 6f1987da-5cbf-492c-b930-fd24891099a8 \n", "3 6f1987da-5cbf-492c-b930-fd24891099a8 \n", "4 6f1987da-5cbf-492c-b930-fd24891099a8 \n", "\n", " source_spec \\\n", "0 tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... \n", "1 tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... \n", "2 tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... \n", "3 tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... \n", "4 tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... \n", "\n", " bundle_uuid bundle_version \\\n", "0 5395ccdf-03f7-4955-90b1-192b1b297e5e 2019-09-13T18:02:37.651387Z \n", "1 67295473-f856-403f-b893-b849f0b781fa 2019-09-13T18:02:37.639820Z \n", "2 f3b7ca6b-84a3-43ba-afad-a78beae7c927 2019-09-13T18:02:37.638486Z \n", "3 e3ecdfc2-4454-5be3-8ea9-4475a04d1b70 2021-02-02T23:55:00.000000Z \n", "4 8e850d2d-0b76-501d-913e-8b92eb761d29 2021-02-02T23:50:00.000000Z \n", "\n", " file_document_id file_type \\\n", "0 0bcb3617-9a3e-4c16-88da-d20a95457c79 sequence_file \n", "1 0fa77cad-2133-4e93-b9d6-d06a6f6771de sequence_file \n", "2 0fe2dfb1-a7ca-4faf-bcb4-1cb3c89756de sequence_file \n", "3 11413ad3-0fa4-5e32-b518-965c28b86e2c analysis_file \n", "4 11abfd14-f28a-533f-a06a-a0dbf278de80 analysis_file \n", "\n", " file_name file_format read_index \\\n", "0 PP019_R1.fastq.gz fastq.gz read1 \n", "1 PP013_R1.fastq.gz fastq.gz read1 \n", "2 PP006_R2.fastq.gz fastq.gz read2 \n", "3 c763f679-e13d-4f81-844f-c2c80fc90f46.bam bam NaN \n", "4 3ddf143f-36bd-49c5-9bbf-f5b71e384063.bam bam NaN \n", "\n", " file_size ... organoid.provenance.document_id \\\n", "0 7332137699 ... NaN \n", "1 6874754868 ... NaN \n", "2 27601166881 ... NaN \n", "3 23812202516 ... NaN \n", "4 30307807764 ... NaN \n", "\n", " organoid.biomaterial_core.biomaterial_id organoid.model_organ \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " organoid.model_organ_part _entity_type \\\n", "0 NaN specimens \n", "1 NaN specimens \n", "2 NaN specimens \n", "3 NaN specimens \n", "4 NaN specimens \n", "\n", " sample.provenance.document_id \\\n", "0 edd28ba8-0bb3-476c-9079-d01b939617b0 \n", "1 85a0036b-fb11-40b5-b805-0af94bceee23 \n", "2 c2fed28d-cdd4-4fb4-bb94-5681dc99f52d \n", "3 55c59b93-6ce2-4d53-a7b8-5573d4f962fb \n", "4 03a73511-bdeb-47e0-8c1d-588fdbe8bb66 \n", "\n", " sample.biomaterial_core.biomaterial_id \\\n", "0 PP019 \n", "1 PP013 \n", "2 PP006 \n", "3 PP003 \n", "4 PP020 \n", "\n", " sequencing_input.provenance.document_id \\\n", "0 71785992-f5eb-41e2-9c0c-9435d2fa33aa \n", "1 78b08aea-c194-4aa1-9e2e-56df9649bef1 \n", "2 cf7767c4-1daa-4d43-8155-65f30325e936 \n", "3 849419b1-77af-43aa-8ce9-6438ddee7420 \n", "4 1cf9b45c-268c-4934-9203-f9e16a90d46d \n", "\n", " sequencing_input.biomaterial_core.biomaterial_id sequencing_input_type \n", "0 PP019_suspension cell_suspension \n", "1 PP013_suspension cell_suspension \n", "2 PP006_suspension cell_suspension \n", "3 PP003_suspension cell_suspension \n", "4 PP020_suspension cell_suspension \n", "\n", "[5 rows x 55 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Look at the columns of the metadata manifest\n", "metadata.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(73, 55)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metadata.shape" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Read the contributor-generated matrix into a pandas datafrane\n", "contributor=pandas.read_csv(\"/Users/ekiernan/Desktop/Matrix_doc_improvements/TCellActivation-Blood-10x_cell_type_2020-03-11 (1).csv\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
specimen_from_organism.provenance.document_idspecimen_from_organism.biomaterial_core.biomaterial_idannotated_cell_identity.textannotated_cell_identity.ontologyannotated_cell_identity.ontology_labelbarcode
046cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT
146cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanACGCCAGAGAATTCCC
246cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD8+ T cellCL:0001049activated CD8-positive, alpha-beta T cell, humanCAGCTGGGTCATTAGC
346cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGGGTCTGTCAGCCTAA
446cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD8+ T cellCL:0001049activated CD8-positive, alpha-beta T cell, humanCATATTCTCGCTTAGA
\n", "
" ], "text/plain": [ " specimen_from_organism.provenance.document_id \\\n", "0 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "1 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "2 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "3 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "4 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "\n", " specimen_from_organism.biomaterial_core.biomaterial_id \\\n", "0 PP017 \n", "1 PP017 \n", "2 PP017 \n", "3 PP017 \n", "4 PP017 \n", "\n", " annotated_cell_identity.text annotated_cell_identity.ontology \\\n", "0 activated CD4+ T cell CL:0001043 \n", "1 activated CD4+ T cell CL:0001043 \n", "2 activated CD8+ T cell CL:0001049 \n", "3 activated CD4+ T cell CL:0001043 \n", "4 activated CD8+ T cell CL:0001049 \n", "\n", " annotated_cell_identity.ontology_label barcode \n", "0 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "1 activated CD4-positive, alpha-beta T cell, human ACGCCAGAGAATTCCC \n", "2 activated CD8-positive, alpha-beta T cell, human CAGCTGGGTCATTAGC \n", "3 activated CD4-positive, alpha-beta T cell, human GGGTCTGTCAGCCTAA \n", "4 activated CD8-positive, alpha-beta T cell, human CATATTCTCGCTTAGA " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Look at the columns\n", "# Determine which column matches the metadata manifest. This will vary across different contributor matrices.\n", "# In this example contributor matrix, the column `specimen_from_organism.provenance.document_id` matches the metadata manifest\n", "contributor.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(17625, 6)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Number of unique cell barcodes from the contributor matrix\n", "contributor.shape" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "source_id 1\n", "source_spec 1\n", "bundle_uuid 37\n", "bundle_version 20\n", "file_document_id 73\n", "file_type 3\n", "file_name 73\n", "file_format 5\n", "read_index 2\n", "file_size 73\n", "file_uuid 73\n", "file_version 68\n", "file_crc32c 73\n", "file_sha256 73\n", "file_content_type 5\n", "file_drs_uri 73\n", "file_url 73\n", "cell_suspension.provenance.document_id 20\n", "cell_suspension.biomaterial_core.biomaterial_id 20\n", "cell_suspension.estimated_cell_count 0\n", "cell_suspension.selected_cell_type 1\n", "sequencing_process.provenance.document_id 20\n", "sequencing_protocol.instrument_manufacturer_model 1\n", "sequencing_protocol.paired_end 1\n", "library_preparation_protocol.library_construction_approach 1\n", "library_preparation_protocol.nucleic_acid_source 1\n", "project.provenance.document_id 1\n", "project.contributors.institution 1\n", "project.contributors.laboratory 1\n", "project.project_core.project_short_name 1\n", "project.project_core.project_title 1\n", "specimen_from_organism.provenance.document_id 20\n", "specimen_from_organism.diseases 0\n", "specimen_from_organism.organ 5\n", "specimen_from_organism.organ_part 4\n", "specimen_from_organism.preservation_storage.preservation_method 1\n", "donor_organism.sex 1\n", "donor_organism.biomaterial_core.biomaterial_id 11\n", "donor_organism.provenance.document_id 6\n", "donor_organism.genus_species 1\n", "donor_organism.development_stage 2\n", "donor_organism.diseases 2\n", "donor_organism.organism_age 4\n", "cell_line.provenance.document_id 0\n", "cell_line.biomaterial_core.biomaterial_id 0\n", "organoid.provenance.document_id 0\n", "organoid.biomaterial_core.biomaterial_id 0\n", "organoid.model_organ 0\n", "organoid.model_organ_part 0\n", "_entity_type 1\n", "sample.provenance.document_id 20\n", "sample.biomaterial_core.biomaterial_id 20\n", "sequencing_input.provenance.document_id 20\n", "sequencing_input.biomaterial_core.biomaterial_id 20\n", "sequencing_input_type 1\n", "dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Look at number of unqiue elements in each dataframe\n", "# Will see 73 unique files for this matrix; each file can contain multiple cells\n", "metadata.nunique()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "specimen_from_organism.provenance.document_id 4\n", "specimen_from_organism.biomaterial_core.biomaterial_id 4\n", "annotated_cell_identity.text 4\n", "annotated_cell_identity.ontology 4\n", "annotated_cell_identity.ontology_label 4\n", "barcode 17483\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 17,483 unique bcarcodes, which matches the contributor.shape (# of rows in the matrix)\n", "contributor.nunique()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# Open the project Loom file\n", "ds=loompy.connect(\"/Users/ekiernan/Desktop/Matrix_doc_improvements/t-cell-activation-human-blood-10XV2.loom\")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['CACATTTAGTGGAGAA', 'GGGTCTGCAGATAATG', 'TGACGGCTCCGTTGCT', ...,\n", " 'TGCCCATAGCGAGAAA', 'CTTACCGTCCAGAAGG', 'TGTGGTACAGTATGCT'],\n", " dtype=object)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Look at the non-unique barcodes in the CellID column\n", "ds.ca[\"CellID\"]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['219e1b92-9749-490c-b08a-f375ad4c9884',\n", " '219e1b92-9749-490c-b08a-f375ad4c9884',\n", " '219e1b92-9749-490c-b08a-f375ad4c9884', ...,\n", " 'bfbf2ca6-13e5-4fe0-bb03-a592e1e72648',\n", " 'bfbf2ca6-13e5-4fe0-bb03-a592e1e72648',\n", " 'bfbf2ca6-13e5-4fe0-bb03-a592e1e72648'], dtype=object)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Look at the Loom input_id which represents each individual donor\n", "ds.ca[\"input_id\"]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(58347, 23127)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# #cells = 23,127 minimally filtered cells and 58,347 genes\n", "# Because this contains data from multiple library preparations, cell barcodes in the CellID column can be repeated\n", "ds.shape" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['219e1b92-9749-490c-b08a-f375ad4c9884',\n", " '24ae6c0b-d147-4051-a85b-c6204531b9d7',\n", " '3ddf143f-36bd-49c5-9bbf-f5b71e384063',\n", " 'bfbf2ca6-13e5-4fe0-bb03-a592e1e72648'], dtype=object)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check for unique input ids; the number should match donor number in the contributor matrix\n", "# In this case there are 4 unique donors for blood\n", "np.unique(ds.ca[\"input_id\"])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# make pandas dataframe for mapping loom to metadata TSV\n", "# the input_id in the loom matches the sequencing_process.provenance.document_id in the manifest\n", "# CellID is the non-unique barcode\n", "loom_to_metadata= pandas.DataFrame({\"sequencing_process.provenance.document_id\":ds.ca[\"input_id\"], \"barcode\":ds.ca[\"CellID\"]})" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sequencing_process.provenance.document_idbarcode
0219e1b92-9749-490c-b08a-f375ad4c9884CACATTTAGTGGAGAA
1219e1b92-9749-490c-b08a-f375ad4c9884GGGTCTGCAGATAATG
2219e1b92-9749-490c-b08a-f375ad4c9884TGACGGCTCCGTTGCT
3219e1b92-9749-490c-b08a-f375ad4c9884GTCTTCGCATAGACTC
4219e1b92-9749-490c-b08a-f375ad4c9884CTGTGCTGTTCAGCGC
\n", "
" ], "text/plain": [ " sequencing_process.provenance.document_id barcode\n", "0 219e1b92-9749-490c-b08a-f375ad4c9884 CACATTTAGTGGAGAA\n", "1 219e1b92-9749-490c-b08a-f375ad4c9884 GGGTCTGCAGATAATG\n", "2 219e1b92-9749-490c-b08a-f375ad4c9884 TGACGGCTCCGTTGCT\n", "3 219e1b92-9749-490c-b08a-f375ad4c9884 GTCTTCGCATAGACTC\n", "4 219e1b92-9749-490c-b08a-f375ad4c9884 CTGTGCTGTTCAGCGC" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loom_to_metadata.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(23127, 2)" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# number of cell barcodes processed by our pipeline (i.e. number of cells)\n", "loom_to_metadata.shape" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "sequencing_process.provenance.document_id 4\n", "barcode 22717\n", "dtype: int64" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loom_to_metadata.nunique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "# map the Loom + metadata dataframe to the manifest metadata that's included in the contributor data frame\n", "# in this case, the manifest metadata matching the contributor matrix is the specimen_from_organism.provenance.document_id\n", "metadata_to_contributor = loom_to_metadata.merge(metadata[[\"sequencing_process.provenance.document_id\", \"specimen_from_organism.provenance.document_id\"]], on=\"sequencing_process.provenance.document_id\", how=\"outer\")\n" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sequencing_process.provenance.document_idbarcodespecimen_from_organism.provenance.document_id
0219e1b92-9749-490c-b08a-f375ad4c9884CACATTTAGTGGAGAA46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c
1219e1b92-9749-490c-b08a-f375ad4c9884CACATTTAGTGGAGAA46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c
2219e1b92-9749-490c-b08a-f375ad4c9884CACATTTAGTGGAGAA46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c
3219e1b92-9749-490c-b08a-f375ad4c9884CACATTTAGTGGAGAA46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c
4219e1b92-9749-490c-b08a-f375ad4c9884GGGTCTGCAGATAATG46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c
\n", "
" ], "text/plain": [ " sequencing_process.provenance.document_id barcode \\\n", "0 219e1b92-9749-490c-b08a-f375ad4c9884 CACATTTAGTGGAGAA \n", "1 219e1b92-9749-490c-b08a-f375ad4c9884 CACATTTAGTGGAGAA \n", "2 219e1b92-9749-490c-b08a-f375ad4c9884 CACATTTAGTGGAGAA \n", "3 219e1b92-9749-490c-b08a-f375ad4c9884 CACATTTAGTGGAGAA \n", "4 219e1b92-9749-490c-b08a-f375ad4c9884 GGGTCTGCAGATAATG \n", "\n", " specimen_from_organism.provenance.document_id \n", "0 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "1 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "2 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "3 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "4 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c " ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metadata_to_contributor.head()" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(92565, 3)" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "metadata_to_contributor.shape" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "sequencing_process.provenance.document_id 4\n", "barcode 22717\n", "specimen_from_organism.provenance.document_id 4\n", "dtype: int64" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metadata_to_contributor.nunique()" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "# Final dataframe- map the shared metadata back to the contributor matrix\n", "merge_df=contributor.merge(metadata_to_contributor, on=[\"specimen_from_organism.provenance.document_id\",\"barcode\"], how=\"outer\")" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(92565, 7)" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merge_df.shape" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "specimen_from_organism.provenance.document_id 20\n", "specimen_from_organism.biomaterial_core.biomaterial_id 4\n", "annotated_cell_identity.text 4\n", "annotated_cell_identity.ontology 4\n", "annotated_cell_identity.ontology_label 4\n", "barcode 22717\n", "sequencing_process.provenance.document_id 20\n", "dtype: int64" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merge_df.nunique()" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
specimen_from_organism.provenance.document_idspecimen_from_organism.biomaterial_core.biomaterial_idannotated_cell_identity.textannotated_cell_identity.ontologyannotated_cell_identity.ontology_labelbarcodesequencing_process.provenance.document_id
046cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
146cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
246cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
346cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
446cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanACGCCAGAGAATTCCC219e1b92-9749-490c-b08a-f375ad4c9884
\n", "
" ], "text/plain": [ " specimen_from_organism.provenance.document_id \\\n", "0 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "1 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "2 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "3 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "4 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "\n", " specimen_from_organism.biomaterial_core.biomaterial_id \\\n", "0 PP017 \n", "1 PP017 \n", "2 PP017 \n", "3 PP017 \n", "4 PP017 \n", "\n", " annotated_cell_identity.text annotated_cell_identity.ontology \\\n", "0 activated CD4+ T cell CL:0001043 \n", "1 activated CD4+ T cell CL:0001043 \n", "2 activated CD4+ T cell CL:0001043 \n", "3 activated CD4+ T cell CL:0001043 \n", "4 activated CD4+ T cell CL:0001043 \n", "\n", " annotated_cell_identity.ontology_label barcode \\\n", "0 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "1 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "2 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "3 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "4 activated CD4-positive, alpha-beta T cell, human ACGCCAGAGAATTCCC \n", "\n", " sequencing_process.provenance.document_id \n", "0 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "1 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "2 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "3 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "4 219e1b92-9749-490c-b08a-f375ad4c9884 " ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merge_df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['specimen_from_organism.provenance.document_id',\n", " 'specimen_from_organism.biomaterial_core.biomaterial_id',\n", " 'annotated_cell_identity.text', 'annotated_cell_identity.ontology',\n", " 'annotated_cell_identity.ontology_label', 'barcode',\n", " 'sequencing_process.provenance.document_id'],\n", " dtype='object')" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's change sequencing_process.provenance.document_id to the name \"input_id\" and barcode to CellID. This will make it easier to go back to the Loom\n", "merge_df.columns" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
specimen_from_organism.provenance.document_idspecimen_from_organism.biomaterial_core.biomaterial_idannotated_cell_identity.textannotated_cell_identity.ontologyannotated_cell_identity.ontology_labelCellIDinput_id
046cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
146cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
246cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
346cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanGTCATTTAGTGTGAAT219e1b92-9749-490c-b08a-f375ad4c9884
446cbd6a3-1ba4-4f57-b27d-4e2b918b0d4cPP017activated CD4+ T cellCL:0001043activated CD4-positive, alpha-beta T cell, humanACGCCAGAGAATTCCC219e1b92-9749-490c-b08a-f375ad4c9884
\n", "
" ], "text/plain": [ " specimen_from_organism.provenance.document_id \\\n", "0 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "1 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "2 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "3 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "4 46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c \n", "\n", " specimen_from_organism.biomaterial_core.biomaterial_id \\\n", "0 PP017 \n", "1 PP017 \n", "2 PP017 \n", "3 PP017 \n", "4 PP017 \n", "\n", " annotated_cell_identity.text annotated_cell_identity.ontology \\\n", "0 activated CD4+ T cell CL:0001043 \n", "1 activated CD4+ T cell CL:0001043 \n", "2 activated CD4+ T cell CL:0001043 \n", "3 activated CD4+ T cell CL:0001043 \n", "4 activated CD4+ T cell CL:0001043 \n", "\n", " annotated_cell_identity.ontology_label CellID \\\n", "0 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "1 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "2 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "3 activated CD4-positive, alpha-beta T cell, human GTCATTTAGTGTGAAT \n", "4 activated CD4-positive, alpha-beta T cell, human ACGCCAGAGAATTCCC \n", "\n", " input_id \n", "0 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "1 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "2 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "3 219e1b92-9749-490c-b08a-f375ad4c9884 \n", "4 219e1b92-9749-490c-b08a-f375ad4c9884 " ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merge_df.columns=['specimen_from_organism.provenance.document_id',\n", " 'specimen_from_organism.biomaterial_core.biomaterial_id',\n", " 'annotated_cell_identity.text', 'annotated_cell_identity.ontology',\n", " 'annotated_cell_identity.ontology_label', 'CellID',\n", " 'input_id']\n", "merge_df.head()" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "# Drop duplicates\n", "# merge_df.drop_duplicates(inplace=True)\n", "# Remove NAs\n", "# merge_df.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(92565, 7)" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merge_df.shape" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CACATTTAGTGGAGAA 219e1b92-9749-490c-b08a-f375ad4c9884\n" ] } ], "source": [ "# Add the annotations (annotated_cell_identity.text) as a new column attribute to the Loom file\n", "# First copy CellID to a new column called Contributor Annotation\n", "ds.ca[\"Contributor_Annotation\"]=ds.ca[\"CellID\"]\n", "# Look up for CellID and InputID, what is th value in the \"annotated_cell_identity.text\"\n", "i=0\n", "print(ds.ca[\"CellID\"][i],ds.ca[\"input_id\"][0])\n" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['activated CD8+ T cell'], dtype=object)" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Set up variables for the for loop used to add annotations to the Loom\n", "my_cell=ds.ca[\"CellID\"][i]\n", "my_input_id=ds.ca[\"input_id\"][i]\n", "merge_df[merge_df.CellID.isin([my_cell])&merge_df.input_id.isin([my_input_id])][\"annotated_cell_identity.text\"].unique()" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "# Make a for loop for adding each annotation to the Loom matrix; takes a few minutes\n", "for i in range(len(ds.ca[\"CellID\"])):\n", " my_cell=ds.ca[\"CellID\"][i]\n", " my_input_id=ds.ca[\"input_id\"][i]\n", " my_cell_type=merge_df[merge_df.CellID.isin([my_cell])&merge_df.input_id.isin([my_input_id])][\"annotated_cell_identity.text\"].unique()[0]\n", " ds.ca[\"Contributor_Annotation\"][i]=my_cell_type" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "23126\n", "['activated CD8+ T cell' 'activated CD4+ T cell' 'activated CD4+ T cell'\n", " ... nan nan nan]\n" ] } ], "source": [ "print(i)\n", "print(ds.ca[\"Contributor_Annotation\"])" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "# Prepare the data to look at expression of just one cell that matches a specific barcode and belongs to a single library preparation of choice (input_id)\n", "cell=((ds.ca[\"CellID\"]==\"GTCATTTAGTGTGAAT\") & (ds.ca[\"input_id\"]==\"219e1b92-9749-490c-b08a-f375ad4c9884\"))" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0],\n", " [0],\n", " [0],\n", " ...,\n", " [0],\n", " [0],\n", " [0]], dtype=uint32)" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# We expect that only one cell in this array will return a true value (1).\n", "ds[:,cell]" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dtype('bool')" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cell.dtype" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, ..., 0, 0, 0], dtype=uint32)" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Flatten the data\n", "np.ndarray.flatten(ds[:,cell])" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "# Plot a histogram of the UMIs per gene (gene is on y-axis)\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([5.8243e+04, 3.9000e+01, 3.2000e+01, 1.4000e+01, 7.0000e+00,\n", " 6.0000e+00, 4.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00]),\n", " array([ 0. , 56.5, 113. , 169.5, 226. , 282.5, 339. , 395.5, 452. ,\n", " 508.5, 565. ]),\n", " )" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEwJJREFUeJzt3W2sXdV95/Hvr3ZIaNrEJtxayCZjqliN3GhCyBU4SjRKQTWGVjUvUgSqag9jxS9CRlSq1DGd0aDmQUrelAYpRYOCB1NlSmjaCItx6noMVdUXPFwCAQxluKEgbAF2YwPTiZoM6X9enGV66nXte3xtfO69/n6krbP2f6+9z1rKwb+7H85JqgpJkob9zLgHIEmafwwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdZaOewBzdf7559fq1avHPQxJWjAee+yxf6iqiVH6jhQOSZYB3wA+AhTwH4DngG8Bq4EXgWur6kiSAF8DrgZ+BPz7qvpeO85m4L+0w36pqna0+seBu4BzgV3ATTXL73qsXr2aqampUYYvSQKSvDRq31EvK30N+Muq+jDwUeBZYBuwt6rWAHvbOsBVwJq2bAVub4M6D7gFuAy4FLglyfK2z+3AZ4f22zDqBCRJp9+s4ZDk/cC/A+4EqKqfVNXrwEZgR+u2A7imtTcCd9fAQ8CyJBcAVwJ7qupwVR0B9gAb2rb3VdVD7Wzh7qFjSZLGYJQzh4uAQ8B/T/J4km8keS+woqpeaX1eBVa09krg5aH997faier7Z6h3kmxNMpVk6tChQyMMXZI0F6OEw1LgEuD2qvoY8H/5l0tIALS/+N/x3/6uqjuqarKqJicmRrqnIkmag1HCYT+wv6oebuvfZhAWr7VLQrTXg237AeDCof1XtdqJ6qtmqEuSxmTWcKiqV4GXk/xSK10BPAPsBDa32mbgvtbeCWzKwDrgjXb5aTewPsnydiN6PbC7bXszybr2pNOmoWNJksZg1O85/Efgm0nOAV4AbmAQLPcm2QK8BFzb+u5i8BjrNINHWW8AqKrDSb4IPNr6faGqDrf25/iXR1m/2xZJ0phkof7fhE5OTpbfc5Ck0SV5rKomR+nrz2dIkjoL9uczTsXqbf9zLO/74ld+bSzvK0knyzMHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdUYKhyQvJnkqyRNJplrtvCR7kjzfXpe3epLclmQ6yZNJLhk6zubW//kkm4fqH2/Hn2775nRPVJI0upM5c/iVqrq4qibb+jZgb1WtAfa2dYCrgDVt2QrcDoMwAW4BLgMuBW45Giitz2eH9tsw5xlJkk7ZqVxW2gjsaO0dwDVD9btr4CFgWZILgCuBPVV1uKqOAHuADW3b+6rqoaoq4O6hY0mSxmDUcCjgr5I8lmRrq62oqlda+1VgRWuvBF4e2nd/q52ovn+GuiRpTJaO2O9TVXUgyS8Ae5L83fDGqqokdfqH96+1YNoK8MEPfvCdfjtJOmuNdOZQVQfa60HgOwzuGbzWLgnRXg+27geAC4d2X9VqJ6qvmqE+0zjuqKrJqpqcmJgYZeiSpDmYNRySvDfJzx9tA+uBp4GdwNEnjjYD97X2TmBTe2ppHfBGu/y0G1ifZHm7Eb0e2N22vZlkXXtKadPQsSRJYzDKZaUVwHfa06VLgf9RVX+Z5FHg3iRbgJeAa1v/XcDVwDTwI+AGgKo6nOSLwKOt3xeq6nBrfw64CzgX+G5bJEljMms4VNULwEdnqP8QuGKGegE3HudY24HtM9SngI+MMF5J0hngN6QlSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSZ2RwyHJkiSPJ7m/rV+U5OEk00m+leScVn93W59u21cPHePmVn8uyZVD9Q2tNp1k2+mbniRpLk7mzOEm4Nmh9a8Ct1bVh4AjwJZW3wIcafVbWz+SrAWuA34Z2AD8cQucJcDXgauAtcD1ra8kaUxGCockq4BfA77R1gNcDny7ddkBXNPaG9s6bfsVrf9G4J6q+nFV/T0wDVzalumqeqGqfgLc0/pKksZk1DOHPwJ+D/jntv4B4PWqequt7wdWtvZK4GWAtv2N1v/t+jH7HK8uSRqTWcMhya8DB6vqsTMwntnGsjXJVJKpQ4cOjXs4krRojXLm8EngN5K8yOCSz+XA14BlSZa2PquAA619ALgQoG1/P/DD4fox+xyv3qmqO6pqsqomJyYmRhi6JGkuZg2Hqrq5qlZV1WoGN5QfqKrfAh4EPtO6bQbua+2dbZ22/YGqqla/rj3NdBGwBngEeBRY055+Oqe9x87TMjtJ0pwsnb3Lcf0n4J4kXwIeB+5s9TuBP0kyDRxm8I89VbUvyb3AM8BbwI1V9VOAJJ8HdgNLgO1Vte8UxiVJOkUnFQ5V9dfAX7f2CwyeNDq2zz8Bv3mc/b8MfHmG+i5g18mMRZL0zvEb0pKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSeoYDpKkjuEgSerMGg5J3pPkkSTfT7IvyR+0+kVJHk4yneRbSc5p9Xe39em2ffXQsW5u9eeSXDlU39Bq00m2nf5pSpJOxihnDj8GLq+qjwIXAxuSrAO+CtxaVR8CjgBbWv8twJFWv7X1I8la4Drgl4ENwB8nWZJkCfB14CpgLXB96ytJGpNZw6EG/rGtvqstBVwOfLvVdwDXtPbGtk7bfkWStPo9VfXjqvp7YBq4tC3TVfVCVf0EuKf1lSSNyUj3HNpf+E8AB4E9wA+A16vqrdZlP7CytVcCLwO07W8AHxiuH7PP8eqSpDEZKRyq6qdVdTGwisFf+h9+R0d1HEm2JplKMnXo0KFxDEGSzgon9bRSVb0OPAh8AliWZGnbtAo40NoHgAsB2vb3Az8crh+zz/HqM73/HVU1WVWTExMTJzN0SdJJGOVppYkky1r7XOBXgWcZhMRnWrfNwH2tvbOt07Y/UFXV6te1p5kuAtYAjwCPAmva00/nMLhpvfN0TE6SNDdLZ+/CBcCO9lTRzwD3VtX9SZ4B7knyJeBx4M7W/07gT5JMA4cZ/GNPVe1Lci/wDPAWcGNV/RQgyeeB3cASYHtV7TttM5QknbRZw6GqngQ+NkP9BQb3H46t/xPwm8c51peBL89Q3wXsGmG8kqQzwG9IS5I6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4SJI6s4ZDkguTPJjkmST7ktzU6ucl2ZPk+fa6vNWT5LYk00meTHLJ0LE2t/7PJ9k8VP94kqfaPrclyTsxWUnSaEY5c3gL+N2qWgusA25MshbYBuytqjXA3rYOcBWwpi1bgdthECbALcBlwKXALUcDpfX57NB+G059apKkuZo1HKrqlar6Xmv/H+BZYCWwEdjRuu0ArmntjcDdNfAQsCzJBcCVwJ6qOlxVR4A9wIa27X1V9VBVFXD30LEkSWNwUvcckqwGPgY8DKyoqlfapleBFa29Enh5aLf9rXai+v4Z6pKkMRk5HJL8HPDnwO9U1ZvD29pf/HWaxzbTGLYmmUoydejQoXf67STprDVSOCR5F4Ng+GZV/UUrv9YuCdFeD7b6AeDCod1XtdqJ6qtmqHeq6o6qmqyqyYmJiVGGLkmag1GeVgpwJ/BsVf3h0KadwNEnjjYD9w3VN7WnltYBb7TLT7uB9UmWtxvR64HdbdubSda199o0dCxJ0hgsHaHPJ4HfBp5K8kSr/T7wFeDeJFuAl4Br27ZdwNXANPAj4AaAqjqc5IvAo63fF6rqcGt/DrgLOBf4blskSWMyazhU1d8Cx/vewRUz9C/gxuMcazuwfYb6FPCR2cYiSToz/Ia0JKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKljOEiSOoaDJKkzazgk2Z7kYJKnh2rnJdmT5Pn2urzVk+S2JNNJnkxyydA+m1v/55NsHqp/PMlTbZ/bkuR0T1KSdHJGOXO4C9hwTG0bsLeq1gB72zrAVcCatmwFbodBmAC3AJcBlwK3HA2U1uezQ/sd+16SpDNs1nCoqr8BDh9T3gjsaO0dwDVD9btr4CFgWZILgCuBPVV1uKqOAHuADW3b+6rqoaoq4O6hY0mSxmSu9xxWVNUrrf0qsKK1VwIvD/Xb32onqu+foT6jJFuTTCWZOnTo0ByHLkmazSnfkG5/8ddpGMso73VHVU1W1eTExMSZeEtJOivNNRxea5eEaK8HW/0AcOFQv1WtdqL6qhnqkqQxmms47ASOPnG0GbhvqL6pPbW0DnijXX7aDaxPsrzdiF4P7G7b3kyyrj2ltGnoWJKkMVk6W4ckfwp8Gjg/yX4GTx19Bbg3yRbgJeDa1n0XcDUwDfwIuAGgqg4n+SLwaOv3hao6epP7cwyeiDoX+G5bJEljNGs4VNX1x9l0xQx9C7jxOMfZDmyfoT4FfGS2cUiSzhy/IS1J6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqTOvAmHJBuSPJdkOsm2cY9Hks5m8yIckiwBvg5cBawFrk+ydryjkqSz17wIB+BSYLqqXqiqnwD3ABvHPCZJOmvNl3BYCbw8tL6/1SRJY7B03AM4GUm2Alvb6j8meW6Ohzof+IfTM6rR5avv+FuMZV5nwGKc12KcEziv+e7fjNpxvoTDAeDCofVVrfavVNUdwB2n+mZJpqpq8lSPM984r4VjMc4JnNdiMl8uKz0KrElyUZJzgOuAnWMekySdtebFmUNVvZXk88BuYAmwvar2jXlYknTWmhfhAFBVu4BdZ+jtTvnS1DzlvBaOxTgncF6LRqpq3GOQJM0z8+WegyRpHjmrwmEh/0RHku1JDiZ5eqh2XpI9SZ5vr8tbPUlua/N8Mskl4xv5iSW5MMmDSZ5Jsi/JTa2+oOeW5D1JHkny/TavP2j1i5I83Mb/rfYABkne3dan2/bV4xz/iSRZkuTxJPe39QU/J4AkLyZ5KskTSaZabUF/Dk/FWRMOi+AnOu4CNhxT2wbsrao1wN62DoM5rmnLVuD2MzTGuXgL+N2qWgusA25s/7ss9Ln9GLi8qj4KXAxsSLIO+Cpwa1V9CDgCbGn9twBHWv3W1m++ugl4dmh9MczpqF+pqouHHltd6J/Duauqs2IBPgHsHlq/Gbh53OM6yTmsBp4eWn8OuKC1LwCea+3/Blw/U7/5vgD3Ab+6mOYG/CzwPeAyBl+kWtrqb38mGTyp94nWXtr6Zdxjn2Euqxj8I3k5cD+QhT6nobm9CJx/TG3RfA5PdjlrzhxYnD/RsaKqXmntV4EVrb0g59ouO3wMeJhFMLd2+eUJ4CCwB/gB8HpVvdW6DI/97Xm17W8AHzizIx7JHwG/B/xzW/8AC39ORxXwV0kea7/GAIvgczhX8+ZRVp2aqqokC/bRsyQ/B/w58DtV9WaSt7ct1LlV1U+Bi5MsA74DfHjMQzolSX4dOFhVjyX59LjH8w74VFUdSPILwJ4kfze8caF+DufqbDpzGOknOhaY15JcANBeD7b6gpprkncxCIZvVtVftPKimBtAVb0OPMjgksuyJEf/KBse+9vzatvfD/zwDA91Np8EfiPJiwx+Ofly4Gss7Dm9raoOtNeDDML8UhbR5/BknU3hsBh/omMnsLm1NzO4Xn+0vqk9UbEOeGPo1HheyeAU4U7g2ar6w6FNC3puSSbaGQNJzmVwH+VZBiHxmdbt2Hkdne9ngAeqXcyeL6rq5qpaVVWrGfz380BV/RYLeE5HJXlvkp8/2gbWA0+zwD+Hp2TcNz3O5AJcDfxvBtd+//O4x3OSY/9T4BXg/zG4vrmFwfXbvcDzwP8Czmt9w+DJrB8ATwGT4x7/Ceb1KQbXep8EnmjL1Qt9bsC/BR5v83oa+K+t/ovAI8A08GfAu1v9PW19um3/xXHPYZb5fRq4f7HMqc3h+23Zd/Tfh4X+OTyVxW9IS5I6Z9NlJUnSiAwHSVLHcJAkdQwHSVLHcJAkdQwHSVLHcJAkdQwHSVLn/wPSoncN/GJe4wAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.hist(np.ndarray.flatten(ds[:,cell]))" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([26., 22., 10., 6., 4., 3., 2., 0., 0., 1.]),\n", " array([101. , 147.4, 193.8, 240.2, 286.6, 333. , 379.4, 425.8, 472.2,\n", " 518.6, 565. ]),\n", " )" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAADM5JREFUeJzt3V+MXOV5x/Hvr5gmbUANxBvLAtxNUtSKi8ZUK0oEF4Q0EYWqJFJUFVWpL5CciyCBhFS5qdSmd47UQFupQnUEggtK/wgQCFCJ6yChSBWpTRwwOAgSOSqWwab5A71pa3h6scdog7zZ2Z1Zj/eZ70c6mnPec3bOM688P7/7zpmzqSokSRvfL0y7AEnSZBjoktSEgS5JTRjoktSEgS5JTRjoktSEgS5JTRjoktSEgS5JTWw6kyfbvHlzzc/Pn8lTStKGd+DAgTeqam6l485ooM/Pz7N///4zeUpJ2vCS/HCU45xykaQmDHRJasJAl6QmDHRJasJAl6QmDHRJasJAl6QmDHRJasJAl6Qmzug3Rccxv+vxqZ37yO4bpnZuSRqVI3RJasJAl6QmDHRJamLFQE9ySZKnkryY5IUktw7tX0lyNMnBYbl+/cuVJC1nlA9FTwK3V9WzSc4HDiTZO+y7s6r+av3KkySNasVAr6pjwLFh/a0kh4GL1rswSdLqrGoOPck8cDnwzNB0S5LnktyT5IIJ1yZJWoWRAz3JecCDwG1V9SZwF/AxYDuLI/ivLfNzO5PsT7L/xIkTEyhZknQ6IwV6knNZDPP7q+ohgKp6varerqp3gK8DV5zuZ6tqT1UtVNXC3NyKfxJPkrRGo1zlEuBu4HBV3bGkfeuSwz4HHJp8eZKkUY1ylctVwBeA55McHNq+DNyUZDtQwBHgi+tSoSRpJKNc5fItIKfZ9cTky5EkrZXfFJWkJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJkb5I9Ezb37X41M575HdN0zlvJI2JkfoktSEgS5JTRjoktSEgS5JTRjoktSEgS5JTRjoktSEgS5JTRjoktSEgS5JTRjoktTEioGe5JIkTyV5MckLSW4d2i9MsjfJy8PjBetfriRpOaOM0E8Ct1fVZcCVwJeSXAbsAvZV1aXAvmFbkjQlKwZ6VR2rqmeH9beAw8BFwI3AfcNh9wGfXa8iJUkrW9UcepJ54HLgGWBLVR0bdr0GbJloZZKkVRk50JOcBzwI3FZVby7dV1UF1DI/tzPJ/iT7T5w4MVaxkqTljRToSc5lMczvr6qHhubXk2wd9m8Fjp/uZ6tqT1UtVNXC3NzcJGqWJJ3GKFe5BLgbOFxVdyzZ9SiwY1jfATwy+fIkSaMa5U/QXQV8AXg+ycGh7cvAbuCfk9wM/BD4g/UpUZI0ihUDvaq+BWSZ3Z+abDmSpLXym6KS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1ISBLklNGOiS1MSKgZ7kniTHkxxa0vaVJEeTHByW69e3TEnSSkYZod8LXHea9juravuwPDHZsiRJq7VioFfV08CPzkAtkqQxjDOHfkuS54YpmQuWOyjJziT7k+w/ceLEGKeTJP08aw30u4CPAduBY8DXljuwqvZU1UJVLczNza3xdJKklawp0Kvq9ap6u6reAb4OXDHZsiRJq7WmQE+ydcnm54BDyx0rSTozNq10QJIHgGuAzUleBf4CuCbJdqCAI8AX17FGSdIIVgz0qrrpNM13r0MtkqQx+E1RSWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJgx0SWrCQJekJlYM9CT3JDme5NCStguT7E3y8vB4wfqWKUlaySgj9HuB697TtgvYV1WXAvuGbUnSFK0Y6FX1NPCj9zTfCNw3rN8HfHbCdUmSVmmtc+hbqurYsP4asGVC9UiS1mjTuE9QVZWkltufZCewE2Dbtm3jnm6mzO96fCrnPbL7hqmcV9J41jpCfz3JVoDh8fhyB1bVnqpaqKqFubm5NZ5OkrSStQb6o8COYX0H8MhkypEkrdUoly0+APw78OtJXk1yM7Ab+HSSl4HfGbYlSVO04hx6Vd20zK5PTbgWSdIY/KaoJDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDWxadoF6Owzv+vxqZ37yO4bpnZuaaNzhC5JTRjoktSEgS5JTYw1h57kCPAW8DZwsqoWJlGUJGn1JvGh6Cer6o0JPI8kaQxOuUhSE+MGegHfSHIgyc5JFCRJWptxp1yurqqjST4M7E3yvap6eukBQ9DvBNi2bduYp5MkLWesEXpVHR0ejwMPA1ec5pg9VbVQVQtzc3PjnE6S9HOsOdCTfCDJ+afWgc8AhyZVmCRpdcaZctkCPJzk1PP8Q1X960SqkiSt2poDvap+AHx8grVIksbgZYuS1ISBLklNGOiS1IT3Q9dZZVr3Yvc+7OrAEbokNWGgS1ITBrokNWGgS1ITBrokNWGgS1ITBrokNeF16BLTu/4dvAZek+MIXZKaMNAlqQkDXZKaMNAlqQkDXZKaMNAlqQkDXZKa8Dp0acq8B7wmxRG6JDVhoEtSEwa6JDVhoEtSEwa6JDVhoEtSEwa6JDXhdeiSZkb3+947QpekJgx0SWrCQJekJsYK9CTXJXkpyStJdk2qKEnS6q050JOcA/wd8LvAZcBNSS6bVGGSpNUZZ4R+BfBKVf2gqv4X+EfgxsmUJUlarXEC/SLgP5dsvzq0SZKmYN2vQ0+yE9g5bP53kpfW+FSbgTcmU9WGZR8ssh8m0Af56oQqma4N829hzP7+1VEOGifQjwKXLNm+eGj7GVW1B9gzxnkASLK/qhbGfZ6NzD5YZD/YB6fYDz9rnCmX/wAuTfKRJL8I/CHw6GTKkiSt1ppH6FV1MsktwJPAOcA9VfXCxCqTJK3KWHPoVfUE8MSEalnJ2NM2DdgHi+wH++AU+2GJVNW0a5AkTYBf/ZekJs6KQE9yT5LjSQ4tabswyd4kLw+PFwztSfK3w+0GnkvyW9OrfHKSXJLkqSQvJnkhya1D+6z1w/uTfDvJd4d++Muh/SNJnhle7z8NH8ST5H3D9ivD/vlp1j9JSc5J8p0kjw3bs9gHR5I8n+Rgkv1D20y9J1bjrAh04F7guve07QL2VdWlwL5hGxZvNXDpsOwE7jpDNa63k8DtVXUZcCXwpeFWCrPWD/8DXFtVHwe2A9cluRL4KnBnVf0a8GPg5uH4m4EfD+13Dsd1cStweMn2LPYBwCeravuSyxNn7T0xuqo6KxZgHji0ZPslYOuwvhV4aVj/e+Cm0x3XaQEeAT49y/0A/DLwLPDbLH55ZNPQ/gngyWH9SeATw/qm4bhMu/YJvPaLWQyra4HHgMxaHwyv5wiw+T1tM/ueWGk5W0bop7Olqo4N668BW4b19rccGH5lvhx4hhnsh2Gq4SBwHNgLfB/4SVWdHA5Z+lrf7Ydh/0+BD53ZitfFXwN/ArwzbH+I2esDgAK+keTA8K1zmMH3xKg2xJ+gq6pKMhOX4yQ5D3gQuK2q3kzy7r5Z6YeqehvYnuSDwMPAb0y5pDMqye8Bx6vqQJJrpl3PlF1dVUeTfBjYm+R7S3fOyntiVGfzCP31JFsBhsfjQ/tItxzYiJKcy2KY319VDw3NM9cPp1TVT4CnWJxe+GCSUwOQpa/13X4Y9v8K8F9nuNRJuwr4/SRHWLyL6bXA3zBbfQBAVR0dHo+z+J/7Fczwe2IlZ3OgPwrsGNZ3sDinfKr9j4dPtK8Efrrk168NK4tD8buBw1V1x5Jds9YPc8PInCS/xOLnCIdZDPbPD4e9tx9O9c/ngW/WMIG6UVXVn1bVxVU1z+ItNb5ZVX/EDPUBQJIPJDn/1DrwGeAQM/aeWJVpT+IP/+4eAI4B/8fivNfNLM4B7gNeBv4NuHA4Niz+YY3vA88DC9Ouf0J9cDWL84XPAQeH5foZ7IffBL4z9MMh4M+H9o8C3wZeAf4FeN/Q/v5h+5Vh/0en/Rom3B/XAI/NYh8Mr/e7w/IC8GdD+0y9J1az+E1RSWribJ5ykSStgoEuSU0Y6JLUhIEuSU0Y6JLUhIEuSU0Y6JLUhIEuSU38P5wCYWoG/KaaAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Let's filter expression for 100 UMIs (counts)\n", "barcode_expression = np.ndarray.flatten(ds[:,cell]) \n", "plt.hist(barcode_expression[barcode_expression>100])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }