From 26fa8e2c5b28d81c2f666a332f1d024156eb04f2 Mon Sep 17 00:00:00 2001 From: zethson Date: Sun, 13 Aug 2023 15:25:17 +0200 Subject: [PATCH 1/5] :sparkles: Use new cleanup API Signed-off-by: zethson --- docs/guide/bulk_rna_seq.ipynb | 39 ++++++++++++++++++----------------- docs/guide/index.md | 2 +- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb index 79d249d..c897d06 100644 --- a/docs/guide/bulk_rna_seq.ipynb +++ b/docs/guide/bulk_rna_seq.ipynb @@ -67,6 +67,7 @@ "import lnschema_bionty as lb\n", "import pandas as pd\n", "import os\n", + "import anndata as ad\n", "from pathlib import Path\n", "\n", "ln.settings.verbosity = 3 # show hints" @@ -216,11 +217,7 @@ "cell_type": "code", "execution_count": null, "id": "2219c55e", - "metadata": { - "jupyter": { - "outputs_hidden": true - } - }, + "metadata": {}, "outputs": [], "source": [ "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name $LAMINDB_RUN_ID -resume" @@ -300,35 +297,39 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "c1a58047-0c25-4632-b355-69610c6176f3", + "cell_type": "markdown", + "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0", "metadata": {}, - "outputs": [], "source": [ - "salmon_gene_counts_table = ln.File.from_df(salmon_gene_counts_table_df, run=run)\n", - "ln.save(salmon_gene_counts_table)" + "We curate the count table analogously to {doc}`/bulkrna`." ] }, { - "cell_type": "markdown", - "id": "813ae546-3b76-4aaa-ace0-4621eeadd839", + "cell_type": "code", + "execution_count": null, + "id": "5b0ca2da-8bff-4750-972d-3f1c0cdb28e8", "metadata": {}, + "outputs": [], "source": [ - "We further track all genes that are associated with the count table." + "salmon_gene_counts_table_df = salmon_gene_counts_table_df.T\n", + "var = pd.DataFrame(\n", + " {\"gene_name\": salmon_gene_counts_table_df.loc[\"gene_name\"].values},\n", + " index=salmon_gene_counts_table_df.loc[\"gene_id\"],\n", + ")\n", + "adata = ad.AnnData(salmon_gene_counts_table_df.iloc[2:].astype(\"float32\"), var=var)" ] }, { "cell_type": "code", "execution_count": null, - "id": "e655b46d-2bee-404e-9ecc-0d219e97d976", + "id": "c1a58047-0c25-4632-b355-69610c6176f3", "metadata": {}, "outputs": [], "source": [ - "genes = ln.FeatureSet.from_values(\n", - " salmon_gene_counts_table_df[\"gene_name\"], lb.Gene.symbol\n", + "curated_salmon_gene_counts_file = ln.File.from_anndata(\n", + " adata, description=\"Curated bulk RNA counts\", var_ref=lb.Gene.stable_id, run=run\n", ")\n", - "salmon_gene_counts_table.features.add_feature_set(genes, slot=\"rna\")" + "ln.save(curated_salmon_gene_counts_file)" ] }, { @@ -338,7 +339,7 @@ "metadata": {}, "outputs": [], "source": [ - "salmon_gene_counts_table.describe()" + "curated_salmon_gene_counts_file.describe()" ] }, { diff --git a/docs/guide/index.md b/docs/guide/index.md index 9ed405b..4f1be38 100644 --- a/docs/guide/index.md +++ b/docs/guide/index.md @@ -9,5 +9,5 @@ This makes it both easy for the user to understand the documentation, and for th ```{toctree} :maxdepth: 1 -quickstart +bulk_rna_seq ``` From 5b7254f988b98474025907437cb52f4e2af652c5 Mon Sep 17 00:00:00 2001 From: zethson Date: Sun, 13 Aug 2023 16:15:00 +0200 Subject: [PATCH 2/5] :sparkles: Hide some output Signed-off-by: zethson --- docs/guide/bulk_rna_seq.ipynb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb index c897d06..aa6b23c 100644 --- a/docs/guide/bulk_rna_seq.ipynb +++ b/docs/guide/bulk_rna_seq.ipynb @@ -96,7 +96,7 @@ "id": "3e1224fd", "metadata": {}, "source": [ - "[nf-core rnaseq](https://nf-co.re/rnaseq/3.12.0) is arguably one of the most popular pipelines for bulk RNA sequencing using STAR, RSEM, HISAT2 or Salmon with gene/isoform counts and extensive quality control.\n", + "The Nextflow pipeline [nf-core rnaseq](https://nf-co.re/rnaseq/3.12.0) is arguably one of the most popular pipelines for bulk RNA sequencing using STAR, RSEM, HISAT2 or Salmon with gene/isoform counts and extensive quality control.\n", "\n", "First, we create a new Transform object for our pipeline run." ] @@ -131,7 +131,7 @@ "id": "b20dbc7d-0e75-4b06-8f7a-d540bffbdb44", "metadata": {}, "source": [ - "We download the [test data](https://github.com/nf-core/test-datasets/tree/rnaseq3) for the pipeline to track it with Lamin." + "We download the [test data](https://github.com/nf-core/test-datasets/tree/rnaseq3) for the pipeline which we track with Lamin." ] }, { @@ -141,6 +141,7 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture command\n", "!git clone https://github.com/nf-core/test-datasets --single-branch --branch rnaseq3" ] }, @@ -184,6 +185,7 @@ "outputs": [], "source": [ "run.input_files.set(input_fastqs_file)\n", + "run.reference = \"lamin_rnaseq\"\n", "run.reference_type = \"nextflow_name\"" ] }, @@ -202,7 +204,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ[\"LAMINDB_RUN_ID\"] = \"lamin_rnaseq\"" + "os.environ[\"LAMINDB_RUN_ID\"] = run.reference" ] }, { @@ -220,6 +222,7 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture command\n", "!nextflow run nf-core/rnaseq -r 3.11.2 -profile test,docker --outdir rna-seq-results -name $LAMINDB_RUN_ID -resume" ] }, @@ -355,7 +358,7 @@ "id": "8bba6911-70b6-4a99-a95e-6c9659435af6", "metadata": {}, "source": [ - "Lamin makes it easy to track pipeline executions and to ingest and output files that can subsequently be used for custom downstream analyses. This is complementary to nf-tower." + "Lamin makes it easy to track pipeline executions and to ingest input and output files that can subsequently be used for advanced downstream analyses. This is complementary to nf-tower." ] } ], From 72fbbf74024fc4394aa8b579e8e37794edbad6c9 Mon Sep 17 00:00:00 2001 From: zethson Date: Tue, 15 Aug 2023 11:59:35 +0200 Subject: [PATCH 3/5] :art: Use proper doc reference syntax Signed-off-by: zethson --- docs/guide/bulk_rna_seq.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb index aa6b23c..edb2537 100644 --- a/docs/guide/bulk_rna_seq.ipynb +++ b/docs/guide/bulk_rna_seq.ipynb @@ -304,7 +304,7 @@ "id": "22c88eed-61e0-4d12-96bb-ea4e10f476c0", "metadata": {}, "source": [ - "We curate the count table analogously to {doc}`/bulkrna`." + "We curate the count table analogously to {doc}`docs:/bulkrna`." ] }, { From 630a005c70c3db0299d2fac539e2fa9d924ce018 Mon Sep 17 00:00:00 2001 From: zethson Date: Tue, 15 Aug 2023 12:37:27 +0200 Subject: [PATCH 4/5] :art: Without html Signed-off-by: zethson --- docs/guide/bulk_rna_seq.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb index edb2537..39abbcf 100644 --- a/docs/guide/bulk_rna_seq.ipynb +++ b/docs/guide/bulk_rna_seq.ipynb @@ -275,8 +275,8 @@ "from IPython.display import IFrame\n", "\n", "# Copying file to a directory accessible by the IPython Tornado web server\n", - "shutil.copy(multiqc_file.stage(), \"./multiqc_report.html\")\n", - "IFrame(src=\"multiqc_report.html\", width=1000, height=600)" + "shutil.copy(multiqc_file.stage(), \"./multiqc_report\")\n", + "IFrame(src=\"multiqc_report\", width=1000, height=600)" ] }, { From 64e9041648feb04feb43348c9968fa6db2654aa8 Mon Sep 17 00:00:00 2001 From: zethson Date: Tue, 15 Aug 2023 13:50:45 +0200 Subject: [PATCH 5/5] :art: Remove IFrame Signed-off-by: zethson --- docs/guide/bulk_rna_seq.ipynb | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/docs/guide/bulk_rna_seq.ipynb b/docs/guide/bulk_rna_seq.ipynb index 39abbcf..8030831 100644 --- a/docs/guide/bulk_rna_seq.ipynb +++ b/docs/guide/bulk_rna_seq.ipynb @@ -231,7 +231,7 @@ "id": "a56e8a22-94dd-413b-989d-f13f59addbe6", "metadata": {}, "source": [ - "As a first step, we ingest all results from the pipeline run." + "As a first step, we ingest all multiqc plots from the pipeline run." ] }, { @@ -256,29 +256,6 @@ "multiqc_file" ] }, - { - "cell_type": "markdown", - "id": "8e3813b0-d2c8-4126-bc96-a0fd68cc8b98", - "metadata": {}, - "source": [ - "Let's examine the multiqc report:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "48361e66-f50b-45d6-ae6a-1d6c24426d81", - "metadata": {}, - "outputs": [], - "source": [ - "import shutil\n", - "from IPython.display import IFrame\n", - "\n", - "# Copying file to a directory accessible by the IPython Tornado web server\n", - "shutil.copy(multiqc_file.stage(), \"./multiqc_report\")\n", - "IFrame(src=\"multiqc_report\", width=1000, height=600)" - ] - }, { "cell_type": "markdown", "id": "29bae36c-dac6-4314-b85b-f3afd7e47fbd",