From 5281a481c3466f2e173879a8c94cff8647decc41 Mon Sep 17 00:00:00 2001
From: Alex Wolf <f.alexander.wolf@gmail.com>
Date: Fri, 1 Sep 2023 11:09:04 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20Refactor=20&=20debug=20id=20capt?=
 =?UTF-8?q?ure=20(#14)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 📝 Refactor & debug id capture

* 💄 Polish

* 💚 Fix

* 💚 Fix

* 💄 Polish
---
 docs/guide/bulk_rna_seq.ipynb | 81 ++++++++++++++++++++++++-----------
 1 file changed, 56 insertions(+), 25 deletions(-)

diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb
index 3734af0..dd2d3e2 100644
--- a/docs/guide/bulk_rna_seq.ipynb
+++ b/docs/guide/bulk_rna_seq.ipynb
@@ -20,6 +20,14 @@
     "![](https://raw.githubusercontent.com/nf-core/rnaseq/3.12.0//docs/images/nf-core-rnaseq_metro_map_grey.png)\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "fe9ab200",
+   "metadata": {},
+   "source": [
+    "## Setup"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "b7c8e52d",
@@ -49,7 +57,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import lamindb as ln"
+    "import lamindb as ln\n",
+    "from subprocess import getoutput"
    ]
   },
   {
@@ -99,6 +108,7 @@
    "source": [
     "download = ln.Transform(name=\"Download\")\n",
     "download_url = \"https://github.com/nf-core/test-datasets\"\n",
+    "# create global run containing the download_url\n",
     "ln.track(download, reference=download_url, reference_type=\"url\")"
    ]
   },
@@ -166,7 +176,7 @@
    "id": "3e1224fd",
    "metadata": {},
    "source": [
-    "Track the Nextflow pipeline & run:"
+    "Track the Nextflow workflow & run:"
    ]
   },
   {
@@ -176,13 +186,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "nextflow_bulkrna = ln.Transform(\n",
+    "transform = ln.Transform(\n",
     "    name=\"nf-core rnaseq\",\n",
     "    version=\"3.11.2\",\n",
     "    type=\"pipeline\",\n",
     "    reference=\"https://github.com/laminlabs/nextflow-lamin-usecases\",\n",
     ")\n",
-    "ln.track(nextflow_bulkrna)"
+    "transform.save()\n",
+    "run = ln.Run(transform=transform)"
    ]
   },
   {
@@ -192,7 +203,7 @@
    "source": [
     "If we now stage input files, they'll be tracked as run inputs.\n",
     "\n",
-    "(As data is already locally available in this test case, staging won't download anything.)"
+    "(In this test case, data is already locally available and staging won't download anything.)"
    ]
   },
   {
@@ -206,8 +217,8 @@
    },
    "outputs": [],
    "source": [
-    "sample_sheet.stage()\n",
-    "[input_fastq.stage() for input_fastq in input_fastqs]"
+    "input_sample_sheet_path = sample_sheet.stage()\n",
+    "input_paths = [input_fastq.stage() for input_fastq in input_fastqs]"
    ]
   },
   {
@@ -215,7 +226,7 @@
    "id": "17f9905e-0a34-4335-b0c4-eb9b598c8eaf",
    "metadata": {},
    "source": [
-    "All data is now in place and we can run the nextflow pipeline:"
+    "All data is now locally available and we can run the nextflow pipeline:"
    ]
   },
   {
@@ -229,7 +240,7 @@
    },
    "outputs": [],
    "source": [
-    "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name {ln.dev.run_context.run.id}"
+    "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name {run.id}"
    ]
   },
   {
@@ -303,28 +314,38 @@
   },
   {
    "cell_type": "markdown",
-   "id": "dd98074b",
+   "id": "429bf863",
    "metadata": {},
    "source": [
-    "## Link biological entities"
+    "## Track Nextflow ID"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0",
+   "id": "7afa797c",
    "metadata": {},
    "source": [
-    "To make the count matrix queryable by biological entities (genes, experimental metadata, etc.), we can now proceed with: {doc}`docs:bulkrna`"
+    "Let us look at the nextflow logs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11538f9b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!nextflow log"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "429bf863",
+   "id": "36e4afc3",
    "metadata": {},
    "source": [
-    "## Register the Nextflow execution id\n",
-    "\n",
-    "If we want to be able to query LaminDB for Nextflow execution ID, this here is a way to get it:"
+    "Let us add the information about the session ID to our `run` record:"
    ]
   },
   {
@@ -334,18 +355,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import subprocess\n",
-    "\n",
-    "session_id = subprocess.getoutput(\n",
-    "    f\"nextflow log | awk '/{ln.dev.run_context.run.id}/{{print $8}}'\"\n",
-    ")\n",
-    "\n",
-    "run = ln.Run.filter(transform__name=\"nf-core rnaseq\").order_by(\"-run_at\").first()\n",
-    "run.reference = session_id\n",
+    "nextflow_id = getoutput(f\"nextflow log | awk '/{run.id}/{{print $8}}'\")\n",
+    "run.reference = nextflow_id\n",
     "run.reference_type = \"nextflow_id\"\n",
     "run.save()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "dd98074b",
+   "metadata": {},
+   "source": [
+    "## Link biological entities"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0",
+   "metadata": {},
+   "source": [
+    "To make the count matrix queryable by biological entities (genes, experimental metadata, etc.), we can now proceed with: {doc}`docs:bulkrna`"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "9f607150",