From 8732fffd08eefbf69488c0ca4019bdd1f5b7cbb3 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 19 Dec 2020 15:07:52 -0800 Subject: [PATCH] update reporting notebook to include % of bases covered, % of k-mers covered by genome --- genome_grist/notebooks/report-sample.ipynb | 79 +++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-) diff --git a/genome_grist/notebooks/report-sample.ipynb b/genome_grist/notebooks/report-sample.ipynb index 3aac6e88..5ca2cbb9 100644 --- a/genome_grist/notebooks/report-sample.ipynb +++ b/genome_grist/notebooks/report-sample.ipynb @@ -21,7 +21,7 @@ }, "outputs": [], "source": [ - "sample_id='SRR606249'\n", + "sample_id='SRR1976948'\n", "#sample_id = 'p8808mo9'\n", "#sample_id = 'p8808mo11'\n", "outdir = 'outputs'\n" @@ -321,7 +321,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## fig 8: correlation between hashes and mapping rates" + "## fig 8: correlation between hash abundance and mapping rates" ] }, { @@ -340,6 +340,81 @@ "pylab.title(f'{sample_id}: gather f_match vs leftover mapping bp covered')\n", "pylab.legend(loc='lower right')" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## fig 9: fraction of genome covered by mapping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pylab.figure(num=None, figsize=(10, 10))\n", + "\n", + "pylab.plot(left_df.covered_bp / left_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'bo', label='mapped bp to this genome')\n", + "pylab.plot(all_df.covered_bp / all_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'r.', label='mapped bp to this genome')\n", + "\n", + "pylab.plot(((all_df.covered_bp - left_df.covered_bp) / all_df[\"genome bp\"]) * 100, left_df.iloc[::-1].index, 'b-', label='diff')\n", + "\n", + "#pylab.plot(gather_df.intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'gx', label='hashes classified to this species')\n", + "#pylab.plot(gather_df.unique_intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'ro', label='hashes classified for this genome')\n", + "\n", + "positions = list(gather_df.index)\n", + "labels = list(reversed(names_df.ncbi_tax_name))\n", + "pylab.yticks(positions, labels, fontsize='small')\n", + "\n", + "pylab.xlabel('percent genome covered by reads')\n", + "pylab.legend(loc='lower right')\n", + "pylab.title(f'{sample_id}: mapped bp, all & leftover')\n", + "pylab.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## fig 10: fraction of genome covered by hashes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pylab.figure(num=None, figsize=(10, 10))\n", + "\n", + "#pylab.plot(left_df.covered_bp / left_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'bo') #, label='mapped bp to this genome')\n", + "#pylab.plot(all_df.covered_bp / all_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'r.') #, label='mapped bp to this genome')\n", + "\n", + "pylab.plot(gather_df.intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'bo', label='hashes classified to this species')\n", + "pylab.plot(gather_df.unique_intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'r.', label='hashes classified for this genome')\n", + "\n", + "pylab.plot((gather_df.intersect_bp - gather_df.unique_intersect_bp) / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'b-', label='hashes classified for this genome')\n", + "\n", + "positions = list(gather_df.index)\n", + "labels = list(reversed(names_df.ncbi_tax_name))\n", + "pylab.yticks(positions, labels, fontsize='small')\n", + "\n", + "pylab.xlabel('percent genome covered by reads')\n", + "pylab.legend(loc='lower right')\n", + "pylab.title(f'{sample_id}: unique vs all hashes')\n", + "pylab.tight_layout()\n", + "\n", + "pylab.savefig(f'/tmp/gather-{sample_id}.pdf')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {