Skip to content

Commit

Permalink
prototype for numba / per chain version of mutational load function
Browse files Browse the repository at this point in the history
  • Loading branch information
grst committed Nov 24, 2024
1 parent 42f79da commit 43ae89a
Show file tree
Hide file tree
Showing 3 changed files with 365 additions and 145 deletions.
212 changes: 210 additions & 2 deletions docs/tutorials/tutorial_5k_bcr.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"# not part of the tutorial, but temporarily surpresses distracting warnings\n",
"import warnings\n",
"from numba import NumbaDeprecationWarning\n",
Expand Down Expand Up @@ -216,7 +218,13 @@
"output_type": "stream",
"text": [
"Filtering chains...\n",
"Indexing VJ chains...\n",
"Indexing VJ chains...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Indexing VDJ chains...\n",
"build result array\n",
"Stored result in `mdata.obs[\"airr:receptor_type\"]`.\n",
Expand Down Expand Up @@ -382,7 +390,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Initializing lookup tables. \n",
"Initializing lookup tables. \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computing clonotype x clonotype distances.\n",
"Stored result in `mdata.obs[\"airr:clone_id_85_similarity\"]`.\n",
"Stored result in `mdata.obs[\"airr:clone_id_85_similarity_size\"]`.\n"
Expand Down Expand Up @@ -1057,6 +1071,200 @@
"Setting `region = \"IMGT_V_segment\"` counts only differences within the V-Region according to the IMGT-unique numbering scheme. Alternatively, `region = \"IMGT_V(D)J\"` includes the whole sequence alignment and `region = \"subregion\"` separately calculates mutations in each distinct region (FWR1, CDR1, FWR2, CDR2, FWR3, CDR3, FWR4). The boolean parameter `frequency` can be used to specify if absolute or relative counts are desired."
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "1c41f849",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>[[{&#x27;&#x27;: &#x27;12038&#x27;, c_call: &#x27;IGHM&#x27;, c_call_10x: &#x27;IGHM&#x27;, ...}, ..., {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;12125&#x27;, c_call: &#x27;IGKC&#x27;, c_call_10x: &#x27;IGKC&#x27;, ...}, ..., {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;12128&#x27;, c_call: &#x27;IGHG1&#x27;, c_call_10x: &#x27;IGHG1&#x27;, ...}, {...}],\n",
" [{&#x27;&#x27;: &#x27;12148&#x27;, c_call: &#x27;IGHG1&#x27;, c_call_10x: &#x27;IGHG1&#x27;, ...}, {...}],\n",
" [{&#x27;&#x27;: &#x27;12181&#x27;, c_call: &#x27;IGKC&#x27;, c_call_10x: &#x27;IGKC&#x27;, ...}, ..., {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;12217&#x27;, c_call: &#x27;IGHG1&#x27;, c_call_10x: &#x27;IGHG1&#x27;, ...}, {...}],\n",
" [{&#x27;&#x27;: &#x27;12227&#x27;, c_call: &#x27;IGLC3&#x27;, c_call_10x: &#x27;IGLC3&#x27;, ...}, {...}],\n",
" [{&#x27;&#x27;: &#x27;12264&#x27;, c_call: &#x27;IGKC&#x27;, c_call_10x: &#x27;IGKC&#x27;, ...}, {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;12266&#x27;, c_call: &#x27;IGKC&#x27;, c_call_10x: &#x27;IGKC&#x27;, ...}, ..., {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;12281&#x27;, c_call: &#x27;IGHA1&#x27;, c_call_10x: &#x27;IGHA1&#x27;, ...}, {...}],\n",
" ...,\n",
" [{&#x27;&#x27;: &#x27;327585&#x27;, c_call: None, c_call_10x: None, ...}, ..., {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;327630&#x27;, c_call: &#x27;IGHM&#x27;, c_call_10x: &#x27;IGHM&#x27;, ...}, {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;327635&#x27;, c_call: &#x27;IGHM&#x27;, c_call_10x: &#x27;IGHM&#x27;, ...}, {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;327759&#x27;, c_call: &#x27;IGHM&#x27;, c_call_10x: &#x27;IGHM&#x27;, ...}, {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;327786&#x27;, c_call: &#x27;IGLC3&#x27;, c_call_10x: &#x27;IGLC3&#x27;, ...}, {...}, {...}],\n",
" [{&#x27;&#x27;: &#x27;327848&#x27;, c_call: &#x27;IGLC1&#x27;, c_call_10x: &#x27;IGLC1&#x27;, ...}, {...}],\n",
" [{&#x27;&#x27;: &#x27;327850&#x27;, c_call: &#x27;IGHM&#x27;, c_call_10x: &#x27;IGHM&#x27;, ...}, {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;327855&#x27;, c_call: &#x27;IGHM&#x27;, c_call_10x: &#x27;IGHM&#x27;, ...}, {&#x27;&#x27;: ..., ...}],\n",
" [{&#x27;&#x27;: &#x27;327870&#x27;, c_call: &#x27;IGHG1&#x27;, c_call_10x: &#x27;IGHG1&#x27;, ...}, {...}, {...}]]\n",
"-------------------------------------------------------------------------------\n",
"type: 5000 * var * {\n",
" &quot;&quot;: string,\n",
" c_call: ?string,\n",
" c_call_10x: ?string,\n",
" c_germline_alignment: ?string,\n",
" c_identity: ?float64,\n",
" c_score: ?float64,\n",
" c_sequence_alignment: ?string,\n",
" c_sequence_end: ?unknown,\n",
" c_sequence_start: ?unknown,\n",
" c_support: ?float64,\n",
" cdr1: string,\n",
" cdr1_aa: ?string,\n",
" cdr2: string,\n",
" cdr2_aa: ?string,\n",
" cdr3: ?string,\n",
" cdr3_aa: ?string,\n",
" consensus_count: int64,\n",
" d_call: ?string,\n",
" d_call_10x: ?string,\n",
" d_cigar: ?string,\n",
" d_germline_end: ?unknown,\n",
" d_germline_start: ?unknown,\n",
" d_identity: ?float64,\n",
" d_score: ?float64,\n",
" d_sequence_alignment_aa: ?string,\n",
" d_sequence_end: ?unknown,\n",
" d_sequence_start: ?unknown,\n",
" d_support: ?float64,\n",
" duplicate_count: int64,\n",
" fwr1: string,\n",
" fwr1_aa: ?string,\n",
" fwr2: string,\n",
" fwr2_aa: ?string,\n",
" fwr3: string,\n",
" fwr3_aa: ?string,\n",
" fwr4: ?string,\n",
" fwr4_aa: ?string,\n",
" germline_alignment: string,\n",
" germline_alignment_d_mask: string,\n",
" j_call: string,\n",
" j_call_10x: ?string,\n",
" j_call_IMGT: string,\n",
" j_cigar: string,\n",
" j_germline_end: int64,\n",
" j_germline_start: int64,\n",
" j_identity: float64,\n",
" j_score: float64,\n",
" j_sequence_alignment_aa: string,\n",
" j_sequence_end: int64,\n",
" j_sequence_start: int64,\n",
" j_support: float64,\n",
" junction: string,\n",
" junction_10x: ?string,\n",
" junction_10x_aa: ?string,\n",
" junction_aa: ?string,\n",
" junction_aa_length: ?unknown,\n",
" junction_length: int64,\n",
" locus: string,\n",
" mu_freq: string,\n",
" np1_length: int64,\n",
" np2_length: ?unknown,\n",
" productive: bool,\n",
" rev_comp: bool,\n",
" sample_id: string,\n",
" sequence: string,\n",
" sequence_alignment: string,\n",
" sequence_alignment_aa: string,\n",
" sequence_id: string,\n",
" stop_codon: bool,\n",
" v_call: string,\n",
" v_call_10x: ?string,\n",
" v_call_IMGT: string,\n",
" v_call_genotyped: string,\n",
" v_cigar: string,\n",
" v_germline_end: int64,\n",
" v_germline_start: int64,\n",
" v_identity: float64,\n",
" v_score: float64,\n",
" v_sequence_alignment_aa: string,\n",
" v_sequence_end: int64,\n",
" v_sequence_start: int64,\n",
" v_support: float64,\n",
" vj_in_frame: bool,\n",
" mutation_count: int64,\n",
" v_mutation_count: ?int64,\n",
" fwr1_mutation_count: ?int64,\n",
" cdr1_mutation_count: ?int64,\n",
" fwr2_mutation_count: ?int64,\n",
" cdr2_mutation_count: ?int64,\n",
" fwr3_mutation_count: ?int64\n",
"}</pre>"
],
"text/plain": [
"<Array [[{'': '12038', ...}, ..., {...}], ...] type='5000 * var * {\"\": stri...'>"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mdata[\"airr\"].obsm[\"airr\"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "7eae4b3a",
"metadata": {},
"outputs": [],
"source": [
"ir.tl.mutational_load(mdata)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "8ee3f8e6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>MuData object with n_obs × n_vars = 5000 × 24929\n",
" uns:\t&#x27;VDJ_1_v_call_colors&#x27;\n",
" 2 modalities\n",
" gex:\t5000 x 24929\n",
" obs:\t&#x27;sample_id&#x27;, &#x27;n_genes&#x27;, &#x27;n_genes_by_counts&#x27;, &#x27;total_counts&#x27;, &#x27;total_counts_mt&#x27;, &#x27;pct_counts_mt&#x27;, &#x27;full_clustering&#x27;, &#x27;initial_clustering&#x27;, &#x27;Resample&#x27;, &#x27;Collection_Day&#x27;, &#x27;Sex&#x27;, &#x27;Age_interval&#x27;, &#x27;Swab_result&#x27;, &#x27;Status&#x27;, &#x27;Smoker&#x27;, &#x27;Status_on_day_collection&#x27;, &#x27;Status_on_day_collection_summary&#x27;, &#x27;Days_from_onset&#x27;, &#x27;Site&#x27;, &#x27;time_after_LPS&#x27;, &#x27;Worst_Clinical_Status&#x27;, &#x27;Outcome&#x27;, &#x27;patient_id&#x27;\n",
" var:\t&#x27;feature_types&#x27;\n",
" uns:\t&#x27;Status_on_day_collection_summary_colors&#x27;\n",
" obsm:\t&#x27;X_pca&#x27;, &#x27;X_pca_harmony&#x27;, &#x27;X_umap&#x27;\n",
" layers:\t&#x27;raw&#x27;\n",
" airr:\t5000 x 0\n",
" obs:\t&#x27;receptor_type&#x27;, &#x27;receptor_subtype&#x27;, &#x27;chain_pairing&#x27;, &#x27;clone_id_85_similarity&#x27;, &#x27;clone_id_85_similarity_size&#x27;, &#x27;clonal_expansion&#x27;\n",
" uns:\t&#x27;chain_indices&#x27;, &#x27;scirpy_version&#x27;, &#x27;ir_dist_nt_normalized_hamming&#x27;, &#x27;clone_id_85_similarity&#x27;, &#x27;clonotype_network&#x27;\n",
" obsm:\t&#x27;airr&#x27;, &#x27;chain_indices&#x27;, &#x27;X_clonotype_network&#x27;</pre>"
],
"text/plain": [
"MuData object with n_obs × n_vars = 5000 × 24929\n",
" uns:\t'VDJ_1_v_call_colors'\n",
" 2 modalities\n",
" gex:\t5000 x 24929\n",
" obs:\t'sample_id', 'n_genes', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'full_clustering', 'initial_clustering', 'Resample', 'Collection_Day', 'Sex', 'Age_interval', 'Swab_result', 'Status', 'Smoker', 'Status_on_day_collection', 'Status_on_day_collection_summary', 'Days_from_onset', 'Site', 'time_after_LPS', 'Worst_Clinical_Status', 'Outcome', 'patient_id'\n",
" var:\t'feature_types'\n",
" uns:\t'Status_on_day_collection_summary_colors'\n",
" obsm:\t'X_pca', 'X_pca_harmony', 'X_umap'\n",
" layers:\t'raw'\n",
" airr:\t5000 x 0\n",
" obs:\t'receptor_type', 'receptor_subtype', 'chain_pairing', 'clone_id_85_similarity', 'clone_id_85_similarity_size', 'clonal_expansion'\n",
" uns:\t'chain_indices', 'scirpy_version', 'ir_dist_nt_normalized_hamming', 'clone_id_85_similarity', 'clonotype_network'\n",
" obsm:\t'airr', 'chain_indices', 'X_clonotype_network'"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mdata"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
8 changes: 4 additions & 4 deletions src/scirpy/tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,23 +760,23 @@ def test_clonotype_imbalance(adata_tra):
def test_mutational_load(adata_mutation, adata_not_aligned):
mutation_VDJ = ir.tl.mutational_load(
adata_mutation,
germline_alignment="germline_alignment",
germline_key="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
region="IMGT_V(D)J",
)
mutation_V_segment = ir.tl.mutational_load(
adata_mutation,
germline_alignment="germline_alignment",
germline_key="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
region="IMGT_V_segment",
)
mutation_subregion = ir.tl.mutational_load(
adata_mutation,
germline_alignment="germline_alignment",
germline_key="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
Expand Down Expand Up @@ -1075,7 +1075,7 @@ def test_mutational_load(adata_mutation, adata_not_aligned):
with npt.assert_raises(ValueError):
ir.tl.mutational_load(
adata_not_aligned,
germline_alignment="germline_alignment",
germline_key="germline_alignment",
chains=["VDJ_1", "VJ_1"],
frequency=False,
inplace=False,
Expand Down
Loading

0 comments on commit 43ae89a

Please sign in to comment.