diff --git a/notebooks/plink_convert.ipynb b/notebooks/plink_convert.ipynb index 473da2c85..f6768f7b5 100644 --- a/notebooks/plink_convert.ipynb +++ b/notebooks/plink_convert.ipynb @@ -6,172 +6,15 @@ "metadata": {}, "outputs": [], "source": [ - "import malariagen_data\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ + "import malariagen_data\n", + "\n", "ag3 = malariagen_data.Ag3(pre=True)\n", "\n", "ag3.biallelic_snps_to_plink(results_dir='/Users/dennistpw/Projects/malariagen-data-python/',\n", " region='2L:100000-2000000',\n", " n_snps=2000,\n", " sample_sets='AG1000G-AO',\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "region='2L:1001000-2009000',\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds = ag3.snp_calls(\n", - " region=region,\n", - " sample_sets=sample_sets\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Perform an allele count.\n", - "ac = ag3.snp_allele_counts(\n", - " region=region,\n", - " sample_sets=sample_sets\n", - ")\n", - "\n", - "# Locate biallelic SNPs.\n", - "loc_bi = allel.AlleleCountsArray(ac).is_biallelic()\n", - "\n", - "# Remap alleles to squeeze out unobserved alleles.\n", - "ac_bi = ac[loc_bi]\n", - "allele_mapping = trim_alleles(ac_bi)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds_bi = ds.isel(variants=loc_bi)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ds_bi['variant_allele'].compute()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Any, Dict, List, Optional, Tuple, Union\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Start building a new dataset.\n", - "coords: Dict[str, Any] = dict()\n", - "data_vars: Dict[str, Any] = dict()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Store sample IDs.\n", - "coords[\"sample_id\"] = (\"samples\",), ds_bi[\"sample_id\"].data\n", - "\n", - "# Store contig.\n", - "coords[\"variant_contig\"] = (\"variants\",), ds_bi[\"variant_contig\"].data\n", - "\n", - "# Store position.\n", - "coords[\"variant_position\"] = (\"variants\",), ds_bi[\"variant_position\"].data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import dask.array as da\n", - "variant_allele = ds_bi[\"variant_allele\"].data\n", - "variant_allele = variant_allele.rechunk((variant_allele.chunks[0], -1))\n", - "variant_allele_out = da.map_blocks(\n", - " lambda block: apply_allele_mapping(block, allele_mapping, max_allele=1),\n", - " variant_allele,\n", - " dtype=variant_allele.dtype,\n", - " chunks=(variant_allele.chunks[0], [2]),\n", - ")\n", - "variant_allele_out.compute()\n", - "# Store allele counts, transformed, so we don't have to recompute.\n", - "#ac_out = apply_allele_mapping(ac_bi, allele_mapping, max_allele=1)\n", - "#data_vars[\"variant_allele_count\"] = (\"variants\", \"alleles\"), ac_out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "When we try to select only biallelic snps, we\n", - "- count the number of alleles\n", - "- select only" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "allele_mapping" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "variant_allele.shape[0]" + " )\n" ] } ], @@ -191,7 +34,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.19" + "version": "3.10.10" } }, "nbformat": 4,