diff --git a/notebooks/statements.ipynb b/notebooks/statements.ipynb index 4a8b2882..1ab14321 100644 --- a/notebooks/statements.ipynb +++ b/notebooks/statements.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -75,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -155,7 +155,7 @@ "4 predictsSensitivityTo " ] }, - "execution_count": 11, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -185,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -249,14 +249,18 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/5t/sfw5tjx56m10xb861_pd3wfm0000gq/T/ipykernel_38992/171895612.py:8: DeprecationWarning: Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.\n", + "/var/folders/5t/sfw5tjx56m10xb861_pd3wfm0000gq/T/ipykernel_62613/171895612.py:8: DeprecationWarning: Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.\n", + " with driver.session() as session:\n", + "/var/folders/5t/sfw5tjx56m10xb861_pd3wfm0000gq/T/ipykernel_62613/171895612.py:8: DeprecationWarning: Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.\n", + " with driver.session() as session:\n", + "/var/folders/5t/sfw5tjx56m10xb861_pd3wfm0000gq/T/ipykernel_62613/171895612.py:8: DeprecationWarning: Using a driver after it has been closed is deprecated. Future versions of the driver will raise an error.\n", " with driver.session() as session:\n" ] } @@ -274,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -454,7 +458,7 @@ "[1042 rows x 6 columns]" ] }, - "execution_count": 58, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -465,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -670,7 +674,7 @@ "[1042 rows x 7 columns]" ] }, - "execution_count": 61, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -678,7 +682,7 @@ "source": [ "df['statement_full'] = None\n", "for idx, row in df.iterrows():\n", - " sentence = f\"{row['subject']} with treatment by {row['obj']} {row['predicate']} the disease {row['qualifier']}\"\n", + " sentence = f\"{row['subject']} {row['predicate']} treatment by {row['obj']} for the disease {row['qualifier']}\"\n", " df.at[idx, 'statement_full'] = sentence\n", " \n", "df" @@ -686,7 +690,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -703,7 +707,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -809,7 +813,7 @@ "[903 rows x 2 columns]" ] }, - "execution_count": 68, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -821,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -830,7 +834,7 @@ "'ga4gh:VA.sMA9h8fzDi0RvweMlxtD0_Oi8B-JZ1V- with treatment by rxcui:1721560 predictsSensitivityTo the disease ncit:C2926'" ] }, - "execution_count": 70, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1052,7 +1056,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1158,18 +1162,18 @@ "[86 rows x 2 columns]" ] }, - "execution_count": 84, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "data[0:86]" + "data[0:86]\n" ] }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1178,7 +1182,7 @@ "225" ] }, - "execution_count": 102, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1189,24 +1193,16 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['ga4gh:VA.sMA9h8fzDi0RvweMlxtD0_Oi8B-JZ1V-',\n", - " 'with',\n", - " 'treatment',\n", - " 'by',\n", - " 'rxcui:1721560',\n", - " 'predictsSensitivityTo',\n", - " 'the',\n", - " 'disease',\n", - " 'ncit:C2926']" + "'ncit:C2926'" ] }, - "execution_count": 86, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1221,7 +1217,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1248,8 +1244,8 @@ " Statement\n", " counts\n", " variant\n", - " therapeutic\n", " predicate\n", + " therapeutic\n", " disease\n", " \n", " \n", @@ -1259,8 +1255,8 @@ " ga4gh:VA.sMA9h8fzDi0RvweMlxtD0_Oi8B-JZ1V- with...\n", " 8\n", " ga4gh:VA.sMA9h8fzDi0RvweMlxtD0_Oi8B-JZ1V-\n", - " rxcui:1721560\n", " predictsSensitivityTo\n", + " rxcui:1721560\n", " ncit:C2926\n", " \n", " \n", @@ -1268,8 +1264,8 @@ " ga4gh:VA.S41CcMJT2bcd8R4-qXZWH1PoHWNtG2PZ with...\n", " 7\n", " ga4gh:VA.S41CcMJT2bcd8R4-qXZWH1PoHWNtG2PZ\n", - " rxcui:337525\n", " predictsSensitivityTo\n", + " rxcui:337525\n", " ncit:C2926\n", " \n", " \n", @@ -1277,8 +1273,8 @@ " ga4gh:VA.j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L with...\n", " 7\n", " ga4gh:VA.j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L\n", - " rxcui:1147220\n", " predictsSensitivityTo\n", + " rxcui:1147220\n", " ncit:C3224\n", " \n", " \n", @@ -1286,8 +1282,8 @@ " ga4gh:VA.ORvaSNcFK71WOVr_gi2vv6oPCcXgljab with...\n", " 6\n", " ga4gh:VA.ORvaSNcFK71WOVr_gi2vv6oPCcXgljab\n", - " rxcui:318341\n", " predictsResistanceTo\n", + " rxcui:318341\n", " ncit:C4978\n", " \n", " \n", @@ -1295,8 +1291,8 @@ " ga4gh:VA.Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9 with...\n", " 5\n", " ga4gh:VA.Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9\n", - " rxcui:282388\n", " predictsResistanceTo\n", + " rxcui:282388\n", " ncit:C3868\n", " \n", " \n", @@ -1313,8 +1309,8 @@ " ga4gh:VA.mecsBfk2t315ZcdZCTD7TTRVezaXskCy with...\n", " 1\n", " ga4gh:VA.mecsBfk2t315ZcdZCTD7TTRVezaXskCy\n", - " rxcui:337525\n", " predictsSensitivityTo\n", + " rxcui:337525\n", " ncit:C2926\n", " \n", " \n", @@ -1322,8 +1318,8 @@ " ga4gh:VA.e0t-Kq4iX8IsDH1F0zj6xClkCKCJpwwk with...\n", " 1\n", " ga4gh:VA.e0t-Kq4iX8IsDH1F0zj6xClkCKCJpwwk\n", - " rxcui:337525\n", " predictsSensitivityTo\n", + " rxcui:337525\n", " ncit:C2926\n", " \n", " \n", @@ -1331,8 +1327,8 @@ " ga4gh:VA.RWiEzkpxrOKfQHfoE6T5CEpJPgqgA_YB with...\n", " 1\n", " ga4gh:VA.RWiEzkpxrOKfQHfoE6T5CEpJPgqgA_YB\n", - " rxcui:337525\n", " predictsSensitivityTo\n", + " rxcui:337525\n", " ncit:C2926\n", " \n", " \n", @@ -1340,8 +1336,8 @@ " ga4gh:VA.0X2BApoy4FSxtnvy5az1bXu8YMs8E__x with...\n", " 1\n", " ga4gh:VA.0X2BApoy4FSxtnvy5az1bXu8YMs8E__x\n", - " rxcui:337525\n", " predictsResistanceTo\n", + " rxcui:337525\n", " ncit:C2926\n", " \n", " \n", @@ -1349,8 +1345,8 @@ " ga4gh:VA.udBCHwlrf8xNiRy_19bLi-h5LhnZLgCt with...\n", " 1\n", " ga4gh:VA.udBCHwlrf8xNiRy_19bLi-h5LhnZLgCt\n", - " moa.ctid:5e89NyyfMGR167xjfPQC_QJBxBI9OYZx\n", " predictsSensitivityTo\n", + " moa.ctid:5e89NyyfMGR167xjfPQC_QJBxBI9OYZx\n", " ncit:C5105\n", " \n", " \n", @@ -1372,49 +1368,36 @@ "901 ga4gh:VA.0X2BApoy4FSxtnvy5az1bXu8YMs8E__x with... 1 \n", "902 ga4gh:VA.udBCHwlrf8xNiRy_19bLi-h5LhnZLgCt with... 1 \n", "\n", - " variant \\\n", - "0 ga4gh:VA.sMA9h8fzDi0RvweMlxtD0_Oi8B-JZ1V- \n", - "1 ga4gh:VA.S41CcMJT2bcd8R4-qXZWH1PoHWNtG2PZ \n", - "2 ga4gh:VA.j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L \n", - "3 ga4gh:VA.ORvaSNcFK71WOVr_gi2vv6oPCcXgljab \n", - "4 ga4gh:VA.Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9 \n", - ".. ... \n", - "898 ga4gh:VA.mecsBfk2t315ZcdZCTD7TTRVezaXskCy \n", - "899 ga4gh:VA.e0t-Kq4iX8IsDH1F0zj6xClkCKCJpwwk \n", - "900 ga4gh:VA.RWiEzkpxrOKfQHfoE6T5CEpJPgqgA_YB \n", - "901 ga4gh:VA.0X2BApoy4FSxtnvy5az1bXu8YMs8E__x \n", - "902 ga4gh:VA.udBCHwlrf8xNiRy_19bLi-h5LhnZLgCt \n", - "\n", - " therapeutic predicate \\\n", - "0 rxcui:1721560 predictsSensitivityTo \n", - "1 rxcui:337525 predictsSensitivityTo \n", - "2 rxcui:1147220 predictsSensitivityTo \n", - "3 rxcui:318341 predictsResistanceTo \n", - "4 rxcui:282388 predictsResistanceTo \n", + " variant predicate \\\n", + "0 ga4gh:VA.sMA9h8fzDi0RvweMlxtD0_Oi8B-JZ1V- predictsSensitivityTo \n", + "1 ga4gh:VA.S41CcMJT2bcd8R4-qXZWH1PoHWNtG2PZ predictsSensitivityTo \n", + "2 ga4gh:VA.j4XnsLZcdzDIYa5pvvXM7t1wn9OITr0L predictsSensitivityTo \n", + "3 ga4gh:VA.ORvaSNcFK71WOVr_gi2vv6oPCcXgljab predictsResistanceTo \n", + "4 ga4gh:VA.Dy7soaZQU1vH9Eb93xG_pJyhu7xTDDC9 predictsResistanceTo \n", ".. ... ... \n", - "898 rxcui:337525 predictsSensitivityTo \n", - "899 rxcui:337525 predictsSensitivityTo \n", - "900 rxcui:337525 predictsSensitivityTo \n", - "901 rxcui:337525 predictsResistanceTo \n", - "902 moa.ctid:5e89NyyfMGR167xjfPQC_QJBxBI9OYZx predictsSensitivityTo \n", + "898 ga4gh:VA.mecsBfk2t315ZcdZCTD7TTRVezaXskCy predictsSensitivityTo \n", + "899 ga4gh:VA.e0t-Kq4iX8IsDH1F0zj6xClkCKCJpwwk predictsSensitivityTo \n", + "900 ga4gh:VA.RWiEzkpxrOKfQHfoE6T5CEpJPgqgA_YB predictsSensitivityTo \n", + "901 ga4gh:VA.0X2BApoy4FSxtnvy5az1bXu8YMs8E__x predictsResistanceTo \n", + "902 ga4gh:VA.udBCHwlrf8xNiRy_19bLi-h5LhnZLgCt predictsSensitivityTo \n", "\n", - " disease \n", - "0 ncit:C2926 \n", - "1 ncit:C2926 \n", - "2 ncit:C3224 \n", - "3 ncit:C4978 \n", - "4 ncit:C3868 \n", - ".. ... \n", - "898 ncit:C2926 \n", - "899 ncit:C2926 \n", - "900 ncit:C2926 \n", - "901 ncit:C2926 \n", - "902 ncit:C5105 \n", + " therapeutic disease \n", + "0 rxcui:1721560 ncit:C2926 \n", + "1 rxcui:337525 ncit:C2926 \n", + "2 rxcui:1147220 ncit:C3224 \n", + "3 rxcui:318341 ncit:C4978 \n", + "4 rxcui:282388 ncit:C3868 \n", + ".. ... ... \n", + "898 rxcui:337525 ncit:C2926 \n", + "899 rxcui:337525 ncit:C2926 \n", + "900 rxcui:337525 ncit:C2926 \n", + "901 rxcui:337525 ncit:C2926 \n", + "902 moa.ctid:5e89NyyfMGR167xjfPQC_QJBxBI9OYZx ncit:C5105 \n", "\n", "[903 rows x 6 columns]" ] }, - "execution_count": 89, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1438,7 +1421,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -1454,7 +1437,7 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -2543,7 +2526,8 @@ "\n", "fig = px.bar(data[0:86], x='Statement', y='counts', color='predicate', title='Overlap of Full Evidence Statements in MetaKB v2')\n", "fig.update_layout(yaxis_title='# Overlapping Statements')\n", - "fig.show()" + "fig.show()\n", + "pio.write_image(fig, \"Var_mp_score_dist_BRAF_vs_Other.png\", format='png', width=1200, height=400, scale=5)" ] }, { @@ -2560,7 +2544,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -5234,6 +5218,16 @@ "fig.update_traces(textinfo='percent+label')\n", "fig.show()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: Loop in evidence level, coding table (HAS_STRENGTH) ---> https://docs.google.com/spreadsheets/d/1FpUmoXmDLVXsNgqog6A9q6o0jIOi_760j02rxIQGFMo/edit?gid=0#gid=0\n", + "# Check to see if regulatory approval exists in the extensions, does it have specific indications? If the disease occurs outside the indication, it gets lowered to a different level. " + ] } ], "metadata": {