{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import loompy for working with the Loom files and pandas to make a new dataframe\n", "import loompy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/ekiernan/Desktop\r\n" ] } ], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Assign the project metadata manifest file to it's own variable\n", "metadata = pandas.read_csv(\"/Users/ekiernan/Desktop/Matrix_doc_improvements/HumanTissueTcellActivation 2021-07-03 03.29 (1).txt\", sep=\"\\t\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | source_id | \n", "source_spec | \n", "bundle_uuid | \n", "bundle_version | \n", "file_document_id | \n", "file_type | \n", "file_name | \n", "file_format | \n", "read_index | \n", "file_size | \n", "... | \n", "organoid.provenance.document_id | \n", "organoid.biomaterial_core.biomaterial_id | \n", "organoid.model_organ | \n", "organoid.model_organ_part | \n", "_entity_type | \n", "sample.provenance.document_id | \n", "sample.biomaterial_core.biomaterial_id | \n", "sequencing_input.provenance.document_id | \n", "sequencing_input.biomaterial_core.biomaterial_id | \n", "sequencing_input_type | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "6f1987da-5cbf-492c-b930-fd24891099a8 | \n", "tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... | \n", "5395ccdf-03f7-4955-90b1-192b1b297e5e | \n", "2019-09-13T18:02:37.651387Z | \n", "0bcb3617-9a3e-4c16-88da-d20a95457c79 | \n", "sequence_file | \n", "PP019_R1.fastq.gz | \n", "fastq.gz | \n", "read1 | \n", "7332137699 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "specimens | \n", "edd28ba8-0bb3-476c-9079-d01b939617b0 | \n", "PP019 | \n", "71785992-f5eb-41e2-9c0c-9435d2fa33aa | \n", "PP019_suspension | \n", "cell_suspension | \n", "
1 | \n", "6f1987da-5cbf-492c-b930-fd24891099a8 | \n", "tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... | \n", "67295473-f856-403f-b893-b849f0b781fa | \n", "2019-09-13T18:02:37.639820Z | \n", "0fa77cad-2133-4e93-b9d6-d06a6f6771de | \n", "sequence_file | \n", "PP013_R1.fastq.gz | \n", "fastq.gz | \n", "read1 | \n", "6874754868 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "specimens | \n", "85a0036b-fb11-40b5-b805-0af94bceee23 | \n", "PP013 | \n", "78b08aea-c194-4aa1-9e2e-56df9649bef1 | \n", "PP013_suspension | \n", "cell_suspension | \n", "
2 | \n", "6f1987da-5cbf-492c-b930-fd24891099a8 | \n", "tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... | \n", "f3b7ca6b-84a3-43ba-afad-a78beae7c927 | \n", "2019-09-13T18:02:37.638486Z | \n", "0fe2dfb1-a7ca-4faf-bcb4-1cb3c89756de | \n", "sequence_file | \n", "PP006_R2.fastq.gz | \n", "fastq.gz | \n", "read2 | \n", "27601166881 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "specimens | \n", "c2fed28d-cdd4-4fb4-bb94-5681dc99f52d | \n", "PP006 | \n", "cf7767c4-1daa-4d43-8155-65f30325e936 | \n", "PP006_suspension | \n", "cell_suspension | \n", "
3 | \n", "6f1987da-5cbf-492c-b930-fd24891099a8 | \n", "tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... | \n", "e3ecdfc2-4454-5be3-8ea9-4475a04d1b70 | \n", "2021-02-02T23:55:00.000000Z | \n", "11413ad3-0fa4-5e32-b518-965c28b86e2c | \n", "analysis_file | \n", "c763f679-e13d-4f81-844f-c2c80fc90f46.bam | \n", "bam | \n", "NaN | \n", "23812202516 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "specimens | \n", "55c59b93-6ce2-4d53-a7b8-5573d4f962fb | \n", "PP003 | \n", "849419b1-77af-43aa-8ce9-6438ddee7420 | \n", "PP003_suspension | \n", "cell_suspension | \n", "
4 | \n", "6f1987da-5cbf-492c-b930-fd24891099a8 | \n", "tdr:broad-datarepo-terra-prod-hca2:snapshot/hc... | \n", "8e850d2d-0b76-501d-913e-8b92eb761d29 | \n", "2021-02-02T23:50:00.000000Z | \n", "11abfd14-f28a-533f-a06a-a0dbf278de80 | \n", "analysis_file | \n", "3ddf143f-36bd-49c5-9bbf-f5b71e384063.bam | \n", "bam | \n", "NaN | \n", "30307807764 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "specimens | \n", "03a73511-bdeb-47e0-8c1d-588fdbe8bb66 | \n", "PP020 | \n", "1cf9b45c-268c-4934-9203-f9e16a90d46d | \n", "PP020_suspension | \n", "cell_suspension | \n", "
5 rows × 55 columns
\n", "\n", " | specimen_from_organism.provenance.document_id | \n", "specimen_from_organism.biomaterial_core.biomaterial_id | \n", "annotated_cell_identity.text | \n", "annotated_cell_identity.ontology | \n", "annotated_cell_identity.ontology_label | \n", "barcode | \n", "
---|---|---|---|---|---|---|
0 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "
1 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "ACGCCAGAGAATTCCC | \n", "
2 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD8+ T cell | \n", "CL:0001049 | \n", "activated CD8-positive, alpha-beta T cell, human | \n", "CAGCTGGGTCATTAGC | \n", "
3 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GGGTCTGTCAGCCTAA | \n", "
4 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD8+ T cell | \n", "CL:0001049 | \n", "activated CD8-positive, alpha-beta T cell, human | \n", "CATATTCTCGCTTAGA | \n", "
\n", " | sequencing_process.provenance.document_id | \n", "barcode | \n", "
---|---|---|
0 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "CACATTTAGTGGAGAA | \n", "
1 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "GGGTCTGCAGATAATG | \n", "
2 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "TGACGGCTCCGTTGCT | \n", "
3 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "GTCTTCGCATAGACTC | \n", "
4 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "CTGTGCTGTTCAGCGC | \n", "
\n", " | sequencing_process.provenance.document_id | \n", "barcode | \n", "specimen_from_organism.provenance.document_id | \n", "
---|---|---|---|
0 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "CACATTTAGTGGAGAA | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "
1 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "CACATTTAGTGGAGAA | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "
2 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "CACATTTAGTGGAGAA | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "
3 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "CACATTTAGTGGAGAA | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "
4 | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "GGGTCTGCAGATAATG | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "
\n", " | specimen_from_organism.provenance.document_id | \n", "specimen_from_organism.biomaterial_core.biomaterial_id | \n", "annotated_cell_identity.text | \n", "annotated_cell_identity.ontology | \n", "annotated_cell_identity.ontology_label | \n", "barcode | \n", "sequencing_process.provenance.document_id | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
1 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
2 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
3 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
4 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "ACGCCAGAGAATTCCC | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
\n", " | specimen_from_organism.provenance.document_id | \n", "specimen_from_organism.biomaterial_core.biomaterial_id | \n", "annotated_cell_identity.text | \n", "annotated_cell_identity.ontology | \n", "annotated_cell_identity.ontology_label | \n", "CellID | \n", "input_id | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
1 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
2 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
3 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "GTCATTTAGTGTGAAT | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "
4 | \n", "46cbd6a3-1ba4-4f57-b27d-4e2b918b0d4c | \n", "PP017 | \n", "activated CD4+ T cell | \n", "CL:0001043 | \n", "activated CD4-positive, alpha-beta T cell, human | \n", "ACGCCAGAGAATTCCC | \n", "219e1b92-9749-490c-b08a-f375ad4c9884 | \n", "