Skip to content

Commit

Permalink
clear up notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
Tristan Dennis authored and Tristan Dennis committed Aug 9, 2024
1 parent 31f6469 commit 4ef5e9c
Showing 1 changed file with 4 additions and 161 deletions.
165 changes: 4 additions & 161 deletions notebooks/plink_convert.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,172 +6,15 @@
"metadata": {},
"outputs": [],
"source": [
"import malariagen_data\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import malariagen_data\n",
"\n",
"ag3 = malariagen_data.Ag3(pre=True)\n",
"\n",
"ag3.biallelic_snps_to_plink(results_dir='/Users/dennistpw/Projects/malariagen-data-python/',\n",
" region='2L:100000-2000000',\n",
" n_snps=2000,\n",
" sample_sets='AG1000G-AO',\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"region='2L:1001000-2009000',\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds = ag3.snp_calls(\n",
" region=region,\n",
" sample_sets=sample_sets\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Perform an allele count.\n",
"ac = ag3.snp_allele_counts(\n",
" region=region,\n",
" sample_sets=sample_sets\n",
")\n",
"\n",
"# Locate biallelic SNPs.\n",
"loc_bi = allel.AlleleCountsArray(ac).is_biallelic()\n",
"\n",
"# Remap alleles to squeeze out unobserved alleles.\n",
"ac_bi = ac[loc_bi]\n",
"allele_mapping = trim_alleles(ac_bi)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds_bi = ds.isel(variants=loc_bi)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds_bi['variant_allele'].compute()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Any, Dict, List, Optional, Tuple, Union\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Start building a new dataset.\n",
"coords: Dict[str, Any] = dict()\n",
"data_vars: Dict[str, Any] = dict()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Store sample IDs.\n",
"coords[\"sample_id\"] = (\"samples\",), ds_bi[\"sample_id\"].data\n",
"\n",
"# Store contig.\n",
"coords[\"variant_contig\"] = (\"variants\",), ds_bi[\"variant_contig\"].data\n",
"\n",
"# Store position.\n",
"coords[\"variant_position\"] = (\"variants\",), ds_bi[\"variant_position\"].data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import dask.array as da\n",
"variant_allele = ds_bi[\"variant_allele\"].data\n",
"variant_allele = variant_allele.rechunk((variant_allele.chunks[0], -1))\n",
"variant_allele_out = da.map_blocks(\n",
" lambda block: apply_allele_mapping(block, allele_mapping, max_allele=1),\n",
" variant_allele,\n",
" dtype=variant_allele.dtype,\n",
" chunks=(variant_allele.chunks[0], [2]),\n",
")\n",
"variant_allele_out.compute()\n",
"# Store allele counts, transformed, so we don't have to recompute.\n",
"#ac_out = apply_allele_mapping(ac_bi, allele_mapping, max_allele=1)\n",
"#data_vars[\"variant_allele_count\"] = (\"variants\", \"alleles\"), ac_out"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When we try to select only biallelic snps, we\n",
"- count the number of alleles\n",
"- select only"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"allele_mapping"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"variant_allele.shape[0]"
" )\n"
]
}
],
Expand All @@ -191,7 +34,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.19"
"version": "3.10.10"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 4ef5e9c

Please sign in to comment.