dib-lab · ctb · Dec 19, 2020
diff --git a/genome_grist/notebooks/report-sample.ipynb b/genome_grist/notebooks/report-sample.ipynb
@@ -21,7 +21,7 @@
    },
    "outputs": [],
    "source": [
-    "sample_id='SRR606249'\n",
+    "sample_id='SRR1976948'\n",
     "#sample_id = 'p8808mo9'\n",
     "#sample_id = 'p8808mo11'\n",
     "outdir = 'outputs'\n"
@@ -321,7 +321,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## fig 8: correlation between hashes and mapping rates"
+    "## fig 8: correlation between hash abundance and mapping rates"
    ]
   },
   {
@@ -340,6 +340,81 @@
     "pylab.title(f'{sample_id}: gather f_match vs leftover mapping bp covered')\n",
     "pylab.legend(loc='lower right')"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## fig 9: fraction of genome covered by mapping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pylab.figure(num=None, figsize=(10, 10))\n",
+    "\n",
+    "pylab.plot(left_df.covered_bp / left_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'bo', label='mapped bp to this genome')\n",
+    "pylab.plot(all_df.covered_bp / all_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'r.', label='mapped bp to this genome')\n",
+    "\n",
+    "pylab.plot(((all_df.covered_bp - left_df.covered_bp) / all_df[\"genome bp\"]) * 100, left_df.iloc[::-1].index, 'b-', label='diff')\n",
+    "\n",
+    "#pylab.plot(gather_df.intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'gx', label='hashes classified to this species')\n",
+    "#pylab.plot(gather_df.unique_intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'ro', label='hashes classified for this genome')\n",
+    "\n",
+    "positions = list(gather_df.index)\n",
+    "labels = list(reversed(names_df.ncbi_tax_name))\n",
+    "pylab.yticks(positions, labels, fontsize='small')\n",
+    "\n",
+    "pylab.xlabel('percent genome covered by reads')\n",
+    "pylab.legend(loc='lower right')\n",
+    "pylab.title(f'{sample_id}: mapped bp, all & leftover')\n",
+    "pylab.tight_layout()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## fig 10: fraction of genome covered by hashes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pylab.figure(num=None, figsize=(10, 10))\n",
+    "\n",
+    "#pylab.plot(left_df.covered_bp / left_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'bo') #, label='mapped bp to this genome')\n",
+    "#pylab.plot(all_df.covered_bp / all_df[\"genome bp\"] * 100, left_df.iloc[::-1].index, 'r.') #, label='mapped bp to this genome')\n",
+    "\n",
+    "pylab.plot(gather_df.intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'bo', label='hashes classified to this species')\n",
+    "pylab.plot(gather_df.unique_intersect_bp / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'r.', label='hashes classified for this genome')\n",
+    "\n",
+    "pylab.plot((gather_df.intersect_bp - gather_df.unique_intersect_bp) / left_df[\"genome bp\"] * 100, gather_df.iloc[::-1].index, 'b-', label='hashes classified for this genome')\n",
+    "\n",
+    "positions = list(gather_df.index)\n",
+    "labels = list(reversed(names_df.ncbi_tax_name))\n",
+    "pylab.yticks(positions, labels, fontsize='small')\n",
+    "\n",
+    "pylab.xlabel('percent genome covered by reads')\n",
+    "pylab.legend(loc='lower right')\n",
+    "pylab.title(f'{sample_id}: unique vs all hashes')\n",
+    "pylab.tight_layout()\n",
+    "\n",
+    "pylab.savefig(f'/tmp/gather-{sample_id}.pdf')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {