From 26fa8e2c5b28d81c2f666a332f1d024156eb04f2 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Sun, 13 Aug 2023 15:25:17 +0200
Subject: [PATCH 1/5] :sparkles: Use new cleanup API

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/guide/bulk_rna_seq.ipynb | 39 ++++++++++++++++++-----------------
 docs/guide/index.md           |  2 +-
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb
index 79d249d..c897d06 100644
--- a/docs/guide/bulk_rna_seq.ipynb
+++ b/docs/guide/bulk_rna_seq.ipynb
@@ -67,6 +67,7 @@
     "import lnschema_bionty as lb\n",
     "import pandas as pd\n",
     "import os\n",
+    "import anndata as ad\n",
     "from pathlib import Path\n",
     "\n",
     "ln.settings.verbosity = 3  # show hints"
@@ -216,11 +217,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "2219c55e",
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": true
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name $LAMINDB_RUN_ID -resume"
@@ -300,35 +297,39 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c1a58047-0c25-4632-b355-69610c6176f3",
+   "cell_type": "markdown",
+   "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "salmon_gene_counts_table = ln.File.from_df(salmon_gene_counts_table_df, run=run)\n",
-    "ln.save(salmon_gene_counts_table)"
+    "We curate the count table analogously to {doc}`/bulkrna`."
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "813ae546-3b76-4aaa-ace0-4621eeadd839",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b0ca2da-8bff-4750-972d-3f1c0cdb28e8",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "We further track all genes that are associated with the count table."
+    "salmon_gene_counts_table_df = salmon_gene_counts_table_df.T\n",
+    "var = pd.DataFrame(\n",
+    "    {\"gene_name\": salmon_gene_counts_table_df.loc[\"gene_name\"].values},\n",
+    "    index=salmon_gene_counts_table_df.loc[\"gene_id\"],\n",
+    ")\n",
+    "adata = ad.AnnData(salmon_gene_counts_table_df.iloc[2:].astype(\"float32\"), var=var)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e655b46d-2bee-404e-9ecc-0d219e97d976",
+   "id": "c1a58047-0c25-4632-b355-69610c6176f3",
    "metadata": {},
    "outputs": [],
    "source": [
-    "genes = ln.FeatureSet.from_values(\n",
-    "    salmon_gene_counts_table_df[\"gene_name\"], lb.Gene.symbol\n",
+    "curated_salmon_gene_counts_file = ln.File.from_anndata(\n",
+    "    adata, description=\"Curated bulk RNA counts\", var_ref=lb.Gene.stable_id, run=run\n",
     ")\n",
-    "salmon_gene_counts_table.features.add_feature_set(genes, slot=\"rna\")"
+    "ln.save(curated_salmon_gene_counts_file)"
    ]
   },
   {
@@ -338,7 +339,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "salmon_gene_counts_table.describe()"
+    "curated_salmon_gene_counts_file.describe()"
    ]
   },
   {
diff --git a/docs/guide/index.md b/docs/guide/index.md
index 9ed405b..4f1be38 100644
--- a/docs/guide/index.md
+++ b/docs/guide/index.md
@@ -9,5 +9,5 @@ This makes it both easy for the user to understand the documentation, and for th
 ```{toctree}
 :maxdepth: 1
 
-quickstart
+bulk_rna_seq
 ```

From 5b7254f988b98474025907437cb52f4e2af652c5 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Sun, 13 Aug 2023 16:15:00 +0200
Subject: [PATCH 2/5] :sparkles: Hide some output

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/guide/bulk_rna_seq.ipynb | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb
index c897d06..aa6b23c 100644
--- a/docs/guide/bulk_rna_seq.ipynb
+++ b/docs/guide/bulk_rna_seq.ipynb
@@ -96,7 +96,7 @@
    "id": "3e1224fd",
    "metadata": {},
    "source": [
-    "[nf-core rnaseq](https://nf-co.re/rnaseq/3.12.0) is arguably one of the most popular pipelines for bulk RNA sequencing using STAR, RSEM, HISAT2 or Salmon with gene/isoform counts and extensive quality control.\n",
+    "The Nextflow pipeline [nf-core rnaseq](https://nf-co.re/rnaseq/3.12.0) is arguably one of the most popular pipelines for bulk RNA sequencing using STAR, RSEM, HISAT2 or Salmon with gene/isoform counts and extensive quality control.\n",
     "\n",
     "First, we create a new Transform object for our pipeline run."
    ]
@@ -131,7 +131,7 @@
    "id": "b20dbc7d-0e75-4b06-8f7a-d540bffbdb44",
    "metadata": {},
    "source": [
-    "We download the [test data](https://github.com/nf-core/test-datasets/tree/rnaseq3) for the pipeline to track it with Lamin."
+    "We download the [test data](https://github.com/nf-core/test-datasets/tree/rnaseq3) for the pipeline which we track with Lamin."
    ]
   },
   {
@@ -141,6 +141,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "%%capture command\n",
     "!git clone https://github.com/nf-core/test-datasets --single-branch --branch rnaseq3"
    ]
   },
@@ -184,6 +185,7 @@
    "outputs": [],
    "source": [
     "run.input_files.set(input_fastqs_file)\n",
+    "run.reference = \"lamin_rnaseq\"\n",
     "run.reference_type = \"nextflow_name\""
    ]
   },
@@ -202,7 +204,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "os.environ[\"LAMINDB_RUN_ID\"] = \"lamin_rnaseq\""
+    "os.environ[\"LAMINDB_RUN_ID\"] = run.reference"
    ]
   },
   {
@@ -220,6 +222,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "%%capture command\n",
     "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name $LAMINDB_RUN_ID -resume"
    ]
   },
@@ -355,7 +358,7 @@
    "id": "8bba6911-70b6-4a99-a95e-6c9659435af6",
    "metadata": {},
    "source": [
-    "Lamin makes it easy to track pipeline executions and to ingest and output files that can subsequently be used for custom downstream analyses. This is complementary to nf-tower."
+    "Lamin makes it easy to track pipeline executions and to ingest input and output files that can subsequently be used for advanced downstream analyses. This is complementary to nf-tower."
    ]
   }
  ],

From 72fbbf74024fc4394aa8b579e8e37794edbad6c9 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Tue, 15 Aug 2023 11:59:35 +0200
Subject: [PATCH 3/5] :art: Use proper doc reference syntax

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/guide/bulk_rna_seq.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb
index aa6b23c..edb2537 100644
--- a/docs/guide/bulk_rna_seq.ipynb
+++ b/docs/guide/bulk_rna_seq.ipynb
@@ -304,7 +304,7 @@
    "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0",
    "metadata": {},
    "source": [
-    "We curate the count table analogously to {doc}`/bulkrna`."
+    "We curate the count table analogously to {doc}`docs:/bulkrna`."
    ]
   },
   {

From 630a005c70c3db0299d2fac539e2fa9d924ce018 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Tue, 15 Aug 2023 12:37:27 +0200
Subject: [PATCH 4/5] :art: Without html

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/guide/bulk_rna_seq.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb
index edb2537..39abbcf 100644
--- a/docs/guide/bulk_rna_seq.ipynb
+++ b/docs/guide/bulk_rna_seq.ipynb
@@ -275,8 +275,8 @@
     "from IPython.display import IFrame\n",
     "\n",
     "# Copying file to a directory accessible by the IPython Tornado web server\n",
-    "shutil.copy(multiqc_file.stage(), \"./multiqc_report.html\")\n",
-    "IFrame(src=\"multiqc_report.html\", width=1000, height=600)"
+    "shutil.copy(multiqc_file.stage(), \"./multiqc_report\")\n",
+    "IFrame(src=\"multiqc_report\", width=1000, height=600)"
    ]
   },
   {

From 64e9041648feb04feb43348c9968fa6db2654aa8 Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Tue, 15 Aug 2023 13:50:45 +0200
Subject: [PATCH 5/5] :art: Remove IFrame

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 docs/guide/bulk_rna_seq.ipynb | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb
index 39abbcf..8030831 100644
--- a/docs/guide/bulk_rna_seq.ipynb
+++ b/docs/guide/bulk_rna_seq.ipynb
@@ -231,7 +231,7 @@
    "id": "a56e8a22-94dd-413b-989d-f13f59addbe6",
    "metadata": {},
    "source": [
-    "As a first step, we ingest all results from the pipeline run."
+    "As a first step, we ingest all multiqc plots from the pipeline run."
    ]
   },
   {
@@ -256,29 +256,6 @@
     "multiqc_file"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "8e3813b0-d2c8-4126-bc96-a0fd68cc8b98",
-   "metadata": {},
-   "source": [
-    "Let's examine the multiqc report:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "48361e66-f50b-45d6-ae6a-1d6c24426d81",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import shutil\n",
-    "from IPython.display import IFrame\n",
-    "\n",
-    "# Copying file to a directory accessible by the IPython Tornado web server\n",
-    "shutil.copy(multiqc_file.stage(), \"./multiqc_report\")\n",
-    "IFrame(src=\"multiqc_report\", width=1000, height=600)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "29bae36c-dac6-4314-b85b-f3afd7e47fbd",