Skip to content

Commit

Permalink
Merge pull request #83 from Genentech/new-motifs
Browse files Browse the repository at this point in the history
  • Loading branch information
avantikalal authored Nov 20, 2024
2 parents e2ae40d + 671a108 commit efd3081
Show file tree
Hide file tree
Showing 12 changed files with 30,554 additions and 43,069 deletions.
343 changes: 158 additions & 185 deletions docs/tutorials/2_finetune.ipynb

Large diffs are not rendered by default.

73 changes: 40 additions & 33 deletions docs/tutorials/3_train.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"id": "1e30a103-e5b9-4044-921a-7536a606d356",
"metadata": {},
"outputs": [
Expand All @@ -98,7 +98,7 @@
"\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33manony-mouse-180959755991866352\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: Downloading large artifact fragment_file:latest, 2203.42MB. 1 files... \n",
"\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n",
"Done. 0:0:4.4\n",
"Done. 0:0:4.0\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: 1 of 1 files downloaded. \n"
]
}
Expand All @@ -115,7 +115,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"id": "a7e75053-b64c-4b6a-a52a-2c15102638da",
"metadata": {},
"outputs": [],
Expand All @@ -135,7 +135,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"id": "2b471ef2-e124-4a36-bcbb-78bd51edb661",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -165,7 +165,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"id": "048626a4-9e37-44e6-a3c0-75246456e8f9",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -258,7 +258,7 @@
"83318 2.86533 6.19767 4.20704 17 "
]
},
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -288,7 +288,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "6620e13f-9d66-4b59-8f05-ea6a5106ca5c",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -348,7 +348,7 @@
"83318 chrY 56873035 56875149"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -382,7 +382,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "eeac5047-f85b-4ff8-b4b5-f528e0a4bcc1",
"metadata": {},
"outputs": [
Expand All @@ -408,7 +408,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"id": "79bccf2a-2e25-471f-951e-1f50eb523d8a",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -446,7 +446,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 12,
"id": "0ad3c925-162b-4bca-a8e6-6a9b99fe8a15",
"metadata": {
"scrolled": true
Expand Down Expand Up @@ -517,7 +517,7 @@
"20216 chr1 858284 860398"
]
},
"execution_count": 11,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -545,7 +545,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"id": "b551d1e0-bac0-4d96-953c-d2cee89eefea",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -587,7 +587,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"id": "8f174ef8-4f59-47c5-9b9b-2df0c7b15c05",
"metadata": {},
"outputs": [
Expand All @@ -597,7 +597,7 @@
"157176"
]
},
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -726,12 +726,20 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "de620fd0-deb1-4d33-8543-9993cb1cd304",
"metadata": {},
"outputs": [],
"source": [
"import grelu.data.dataset"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "44ed7a4d-aebd-4160-bd38-bb7dfe73aab5",
"metadata": {},
"outputs": [],
"source": [
"import grelu.data.dataset\n",
"\n",
"train_ds = grelu.data.dataset.BigWigSeqDataset(\n",
" intervals = train,\n",
" bw_files=[bw_file],\n",
Expand All @@ -756,7 +764,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 19,
"id": "4f432723-08c6-4504-9911-1a11911e0272",
"metadata": {},
"outputs": [
Expand All @@ -766,7 +774,7 @@
"(126394, 699, 791)"
]
},
"execution_count": 17,
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -1019,7 +1027,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 21,
"id": "3810704c-eec4-4e11-a163-7a0d69154d31",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -1158,14 +1166,14 @@
"GPU available: True (cuda), used: True\n",
"TPU available: False, using: 0 TPU cores\n",
"HPU available: False, using: 0 HPUs\n",
"LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]\n"
"LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicting DataLoader 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 13.02it/s]\n"
"Predicting DataLoader 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 6.93it/s]\n"
]
},
{
Expand Down Expand Up @@ -1252,30 +1260,29 @@
"id": "41e1ee1b-ee93-41d5-9e19-264bc100f64b",
"metadata": {},
"source": [
"To understand the effect of the `AC0622:ELF_SPIB:Ets` motif, we perform a marginalization experiment. In this, we take the `AC0622:ELF_SPIB:Ets` motif and insert it into shuffled background sequences, and compare the predictions of the model before and after inserting this motif.\n",
"To understand the effect of the `SPI1.H12CORE.0.P.B` motif in HOCOMOCO v12, we perform a marginalization experiment. In this, we take the `SPI1.H12CORE.0.P.B` motif and insert it into shuffled background sequences, and compare the predictions of the model before and after inserting this motif.\n",
"\n",
"First, we read this motif from the MEME file and extract the consensus sequence."
]
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 22,
"id": "d6ed3e54-d9df-4a6c-8ecb-9a1723f71adf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['AAGAGGAAGT']\n"
"['AAAAGAGGAAGTGA']\n"
]
}
],
"source": [
"import grelu.io.motifs\n",
"import grelu.interpret.motifs\n",
"\n",
"motifs = grelu.io.motifs.read_meme_file(\"consensus\", names=[\"AC0622:ELF_SPIB:Ets\"])\n",
"motifs = grelu.io.motifs.read_meme_file(\"hocomoco_v12\", names=[\"SPI1.H12CORE.0.P.B\"])\n",
"patterns = grelu.interpret.motifs.motifs_to_strings(motifs)\n",
"\n",
"print(patterns)"
Expand All @@ -1291,7 +1298,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 23,
"id": "1efa04a3-05ae-472f-8f9a-a5dc2bfafed8",
"metadata": {},
"outputs": [
Expand All @@ -1302,14 +1309,14 @@
"GPU available: True (cuda), used: True\n",
"TPU available: False, using: 0 TPU cores\n",
"HPU available: False, using: 0 HPUs\n",
"LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [2]\n"
"LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicting DataLoader 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 12.08it/s]\n"
"Predicting DataLoader 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 2.85it/s]\n"
]
}
],
Expand All @@ -1330,17 +1337,17 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 24,
"id": "4d44c080-906e-4ccb-afb9-dae964be5f77",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.9763343"
"7.7413588"
]
},
"execution_count": 33,
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
Loading

0 comments on commit efd3081

Please sign in to comment.