From 5281a481c3466f2e173879a8c94cff8647decc41 Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Fri, 1 Sep 2023 11:09:04 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20Refactor=20&=20debug=20id=20capt?= =?UTF-8?q?ure=20(#14)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 📝 Refactor & debug id capture * 💄 Polish * 💚 Fix * 💚 Fix * 💄 Polish --- docs/guide/bulk_rna_seq.ipynb | 81 ++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb index 3734af0..dd2d3e2 100644 --- a/docs/guide/bulk_rna_seq.ipynb +++ b/docs/guide/bulk_rna_seq.ipynb @@ -20,6 +20,14 @@ "![](https://raw.githubusercontent.com/nf-core/rnaseq/3.12.0//docs/images/nf-core-rnaseq_metro_map_grey.png)\n" ] }, + { + "cell_type": "markdown", + "id": "fe9ab200", + "metadata": {}, + "source": [ + "## Setup" + ] + }, { "cell_type": "markdown", "id": "b7c8e52d", @@ -49,7 +57,8 @@ "metadata": {}, "outputs": [], "source": [ - "import lamindb as ln" + "import lamindb as ln\n", + "from subprocess import getoutput" ] }, { @@ -99,6 +108,7 @@ "source": [ "download = ln.Transform(name=\"Download\")\n", "download_url = \"https://github.com/nf-core/test-datasets\"\n", + "# create global run containing the download_url\n", "ln.track(download, reference=download_url, reference_type=\"url\")" ] }, @@ -166,7 +176,7 @@ "id": "3e1224fd", "metadata": {}, "source": [ - "Track the Nextflow pipeline & run:" + "Track the Nextflow workflow & run:" ] }, { @@ -176,13 +186,14 @@ "metadata": {}, "outputs": [], "source": [ - "nextflow_bulkrna = ln.Transform(\n", + "transform = ln.Transform(\n", " name=\"nf-core rnaseq\",\n", " version=\"3.11.2\",\n", " type=\"pipeline\",\n", " reference=\"https://github.com/laminlabs/nextflow-lamin-usecases\",\n", ")\n", - "ln.track(nextflow_bulkrna)" + "transform.save()\n", + "run = ln.Run(transform=transform)" ] }, { @@ -192,7 +203,7 @@ "source": [ "If we now stage input files, they'll be tracked as run inputs.\n", "\n", - "(As data is already locally available in this test case, staging won't download anything.)" + "(In this test case, data is already locally available and staging won't download anything.)" ] }, { @@ -206,8 +217,8 @@ }, "outputs": [], "source": [ - "sample_sheet.stage()\n", - "[input_fastq.stage() for input_fastq in input_fastqs]" + "input_sample_sheet_path = sample_sheet.stage()\n", + "input_paths = [input_fastq.stage() for input_fastq in input_fastqs]" ] }, { @@ -215,7 +226,7 @@ "id": "17f9905e-0a34-4335-b0c4-eb9b598c8eaf", "metadata": {}, "source": [ - "All data is now in place and we can run the nextflow pipeline:" + "All data is now locally available and we can run the nextflow pipeline:" ] }, { @@ -229,7 +240,7 @@ }, "outputs": [], "source": [ - "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name {ln.dev.run_context.run.id}" + "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name {run.id}" ] }, { @@ -303,28 +314,38 @@ }, { "cell_type": "markdown", - "id": "dd98074b", + "id": "429bf863", "metadata": {}, "source": [ - "## Link biological entities" + "## Track Nextflow ID" ] }, { "cell_type": "markdown", - "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0", + "id": "7afa797c", "metadata": {}, "source": [ - "To make the count matrix queryable by biological entities (genes, experimental metadata, etc.), we can now proceed with: {doc}`docs:bulkrna`" + "Let us look at the nextflow logs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11538f9b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "!nextflow log" ] }, { "cell_type": "markdown", - "id": "429bf863", + "id": "36e4afc3", "metadata": {}, "source": [ - "## Register the Nextflow execution id\n", - "\n", - "If we want to be able to query LaminDB for Nextflow execution ID, this here is a way to get it:" + "Let us add the information about the session ID to our `run` record:" ] }, { @@ -334,18 +355,28 @@ "metadata": {}, "outputs": [], "source": [ - "import subprocess\n", - "\n", - "session_id = subprocess.getoutput(\n", - " f\"nextflow log | awk '/{ln.dev.run_context.run.id}/{{print $8}}'\"\n", - ")\n", - "\n", - "run = ln.Run.filter(transform__name=\"nf-core rnaseq\").order_by(\"-run_at\").first()\n", - "run.reference = session_id\n", + "nextflow_id = getoutput(f\"nextflow log | awk '/{run.id}/{{print $8}}'\")\n", + "run.reference = nextflow_id\n", "run.reference_type = \"nextflow_id\"\n", "run.save()" ] }, + { + "cell_type": "markdown", + "id": "dd98074b", + "metadata": {}, + "source": [ + "## Link biological entities" + ] + }, + { + "cell_type": "markdown", + "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0", + "metadata": {}, + "source": [ + "To make the count matrix queryable by biological entities (genes, experimental metadata, etc.), we can now proceed with: {doc}`docs:bulkrna`" + ] + }, { "cell_type": "markdown", "id": "9f607150",