From 3fdd161252be5407677127e2937fcd0721f855cc Mon Sep 17 00:00:00 2001
From:  <>
Date: Tue, 15 Oct 2024 06:45:36 +0000
Subject: [PATCH] Deployed 2336109 with MkDocs version: 1.6.1

---
 0_setup/index.html                          |   2 +-
 1_rdm-guidelines/index.html                 |   2 +-
 2_starting-assay-project/index.html         |   2 +-
 3_pipelines/index.html                      |   2 +-
 404.html                                    |   2 +-
 4_conda/index.html                          |   2 +-
 5_vscode/index.html                         |  11 ++++++-----
 6_handy-scripts/index.html                  |   2 +-
 index.html                                  |   2 +-
 miscellaneous/dropbox/index.html            |   2 +-
 miscellaneous/ku-computer/index.html        |   2 +-
 miscellaneous/podman/index.html             |   2 +-
 search/search_index.json                    |   2 +-
 sitemap.xml.gz                              | Bin 127 -> 127 bytes
 tools_and_packages/alphafold2/index.html    |   2 +-
 tools_and_packages/dReg/index.html          |   2 +-
 tools_and_packages/packages/index.html      |   2 +-
 tools_and_packages/ucsc_liftover/index.html |   2 +-
 18 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/0_setup/index.html b/0_setup/index.html
index be964fa..f7799a9 100644
--- a/0_setup/index.html
+++ b/0_setup/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/1_rdm-guidelines/index.html b/1_rdm-guidelines/index.html
index 3c3531d..16aa036 100644
--- a/1_rdm-guidelines/index.html
+++ b/1_rdm-guidelines/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/2_starting-assay-project/index.html b/2_starting-assay-project/index.html
index dd2a6f3..cfad6ba 100644
--- a/2_starting-assay-project/index.html
+++ b/2_starting-assay-project/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/3_pipelines/index.html b/3_pipelines/index.html
index 90bb5f5..afa07e2 100644
--- a/3_pipelines/index.html
+++ b/3_pipelines/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/404.html b/404.html
index 5c123d1..35d79f5 100644
--- a/404.html
+++ b/404.html
@@ -12,7 +12,7 @@
       
       
       <link rel="icon" href="/assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/4_conda/index.html b/4_conda/index.html
index 4abfc49..c87f059 100644
--- a/4_conda/index.html
+++ b/4_conda/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/5_vscode/index.html b/5_vscode/index.html
index e041049..eea68a1 100644
--- a/5_vscode/index.html
+++ b/5_vscode/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
@@ -883,7 +883,7 @@ <h1 id="setup-r-with-visual-studio-code">Setup R with Visual Studio Code</h1>
 <h2 id="setting-up-remote-tunnels">Setting up Remote Tunnels</h2>
 <div class="admonition warning">
 <p class="admonition-title">Warning</p>
-<p>In this example we use version <strong>R/4.3.1</strong>. If you want to use a different one
+<p>In this example we use version <strong>R/4.2.1</strong>. If you want to use a different one
 change the R version!</p>
 </div>
 <h3 id="requirements">Requirements</h3>
@@ -896,7 +896,7 @@ <h3 id="setting-up-first-time">Setting up first time</h3>
 <li>Login to head node: <code>ssh $USER@danhead01fl.unicph.domain</code></li>
 <li>Start a new tmux session: <code>tmux new -s rstudio</code></li>
 <li>Start a new job: <code>srun -c 2 --mem=30gb --time=0-4:00:00 --pty bash</code></li>
-<li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.3.1 miniconda/latest</code></li>
+<li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.2.1 quarto</code></li>
 <li>Run command: <code>code tunnel</code><ol>
 <li>Choose <code>Microsoft account</code> when asked how you would like to log in to VScode</li>
 <li>Open the link from terminal and paste the <strong>CODE</strong></li>
@@ -912,6 +912,7 @@ <h3 id="setting-up-first-time">Setting up first time</h3>
 </ol>
 </li>
 <li>Move cursor on it and click the <code>-&gt;</code></li>
+<li>Install extension <a href="https://marketplace.visualstudio.com/items?itemName=ms-vscode.remote-server">Remote Tunnels</a></li>
 </ol>
 <p>To use <code>R</code>, install additional packages by clicking <code>Extensions</code> in the left panel.
 Search for packages:</p>
@@ -944,7 +945,7 @@ <h3 id="i-already-did-the-setup-i-want-my-r-again">I already did the setup, I wa
 <li>Login to head node: <code>ssh $USER@danhead01fl.unicph.domain</code></li>
 <li>Start a new tmux session: <code>tmux new -s rstudio</code></li>
 <li>Start a new job: <code>srun -c 2 --mem=30gb --time=0-4:00:00 --pty bash</code></li>
-<li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.3.1 miniconda/latest</code></li>
+<li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.2.1 quarto</code></li>
 <li>Run command: <code>code tunnel</code></li>
 <li>Go to your VSCode and on the left panel search for <code>Remote Explorer</code></li>
 <li>Click <code>dancmpn01flunicphdom</code> or <code>dancmpn02flunicphdom</code></li>
@@ -996,7 +997,7 @@ <h3 id="known-issues">Known issues</h3>
     <span class="md-icon" title="Last update">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg>
     </span>
-    <span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">July 5, 2024</span>
+    <span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 15, 2024</span>
   </span>
 
     
diff --git a/6_handy-scripts/index.html b/6_handy-scripts/index.html
index 469d9b2..a3d7688 100644
--- a/6_handy-scripts/index.html
+++ b/6_handy-scripts/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/index.html b/index.html
index 89464a7..86a2463 100644
--- a/index.html
+++ b/index.html
@@ -14,7 +14,7 @@
       
       
       <link rel="icon" href="assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/miscellaneous/dropbox/index.html b/miscellaneous/dropbox/index.html
index 1762e46..f8a1440 100644
--- a/miscellaneous/dropbox/index.html
+++ b/miscellaneous/dropbox/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/miscellaneous/ku-computer/index.html b/miscellaneous/ku-computer/index.html
index 73078bc..0c9bc75 100644
--- a/miscellaneous/ku-computer/index.html
+++ b/miscellaneous/ku-computer/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/miscellaneous/podman/index.html b/miscellaneous/podman/index.html
index 4f864c5..edd0cb1 100644
--- a/miscellaneous/podman/index.html
+++ b/miscellaneous/podman/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/search/search_index.json b/search/search_index.json
index e562eb4..837f0d3 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":"<p>Welcome to the Brickman Lab wiki!</p> <p>Here you can find documentation for our analysis workflows. For more information about our research, visit the Brickman Group website.</p>"},{"location":"#transcriptional-basis-for-cell-fate-choice","title":"Transcriptional basis for cell fate choice","text":"<p>The Brickman Group aims to understand the transcriptional basis for early embryonic lineage specification.</p> <p>We are interested in the dynamic mechanisms by which cells can both reversible prime towards a particular fate or undergo a transition into commitment.</p>"},{"location":"#publications","title":"Publications","text":"Selected publications <p>Wong, Y. F., Kumar, Y., Proks, M., Herrera, J. A. R., Rothov\u00e1,M. M., Monteiro, R. S., Pozzi, S., Jennings, R. E., Hanley, N. A., Bickmore, W. A., and Brickman, J. M. (2023). Expansion of ventral foregut is linked to changes in the enhancer landscape for organ-specific differentiation. Nature Cell Biology, doi: 10.1038/s41556-022-01075-8.</p> <p>Perera, M., Nissen, S. B., Proks, M., Pozzi, S., Monteiro, R. S., Trusina, A., and Brickman, J. M. (2022). Transcriptional heterogeneity and cell cycle regulation as central determinants of Primitive Endoderm priming. eLife, doi: 10.7554/eLife.78967.</p> <p>Rothov\u00e1, M. M., Nielsen, A. V., Proks, M., Wong, Y. F., Riveiro, A. R., Linneberg-Agerholm, M., David, E., Amit, I., Trusina, A., and Brickman, J. M. (2022). Identification of the central intermediate in the extra-embryonic to embryonic endoderm transition through single-cell transcriptomics. Nature Cell Biology, doi: 10.1038/s41556-022-00923-x.</p> <p>Riveiro, A. R., and Brickman, J. M. (2020). From pluripotency to totipotency: an experimentalist's guide to cellular potency. Development, doi: 10.1242/dev.189845.</p> <p>Hamilton, W.B., Mosesson, Y., Monteiro, R.S., Emdal, K.B., Knudsen, T.E., Francavilla, C., Barkai, N., Olsen, J.V. and Brickman, J.M. (2019). Dynamic lineage priming is driven via direct enhancer regulation by ERK. Nature, doi: 10.1038/s41586-019-1732-z.</p> <p>Weinert, B.T., Narita, T., Satpathy, S., Srinivasan, B., Hansen, B.K., Scholz, C., Hamilton, W.B., Zucconi, B.E., Wang, W.W., Liu, W.R., Brickman, J.M., Kesicki, E.A., Lai, A., Bromberg, K.D., Cole, P.A., and Choudhary, C. (2018). Time-Resolved Analysis Reveals Rapid Dynamics and Broad Scope of the CBP/p300 Acetylome. Cell 174, 231-244.e212, doi:10.1016/j.cell.2018.04.033.</p> <p>Anderson, K.G.V., Hamilton, W.B., Roske, F.V., Azad, A., Knudsen, T.E., Canham, M.A., Forrester, L.M., and Brickman, J.M. (2017). Insulin fine-tunes self-renewal pathways governing naive pluripotency and extra-embryonic endoderm. Nature Cell Biology 19, 1164-1177, doi:10.1038/ncb3617.</p> <p>Nissen, S.B., Perera, M., Gonzalez, J.M., Morgani, S.M., Jensen, M.H., Sneppen, K., Brickman, J.M., and Trusina, A. (2017). Four simple rules that are sufficient to generate the mammalian blastocyst. PLoS Biol 15, e2000737, doi:10.1371/journal.pbio.2000737.  *joint senior author</p> <p>Migueles, R.P., Shaw, L., Rodrigues, N.P., May, G., Henseleit, K., Anderson, K.G., Goker, H., Jones, C.M., de Bruijn, M.F., Brickman, J.M., and Enver, T. (2017). Transcriptional regulation of Hhex in hematopoiesis and hematopoietic stem cell ontogeny. Developmental Biology 424, 236-245, doi:10.1016/j.ydbio.2016.12.021.</p> <p>Illingworth, R.S., H\u00f6lzenspies, J.J., Roske, F.V., Bickmore, W.A., and Brickman, J.M. (2016). Polycomb enables primitive endoderm lineage priming in embryonic stem cells. Elife 5, doi:10.7554/eLife.14926.</p> <p>Martin Gonzalez, J., Morgani, S.M., Bone, R.A., Bonderup, K., Abelchian, S., Brakebusch, C., and Brickman, J.M. (2016). Embryonic Stem Cell Culture Conditions Support Distinct States Associated with Different Developmental Stages and Potency. Stem Cell Reports 7, 177-191, doi:10.1016/j.stemcr.2016.07.009.</p>"},{"location":"#datasets","title":"Datasets","text":"<p>Rothova et al., (2022). Nature Cell Biology. Single-cell RNA-seq datasets from FOXA2<sup>Venus</sup> reporter mouse embryos and embryonic stem cell differentiation towards endoderm.</p>"},{"location":"0_setup/","title":"First time on danserver","text":"<p>For starting on the server make sure to read:</p> <ul> <li>DanServer manual</li> <li>Genomics Platform wiki</li> <li>platforms: JupyterHub and RStudio</li> </ul>"},{"location":"0_setup/#first-time-on-server-checklist","title":"First time on server checklist","text":"<ol> <li>Login to danhead: <code>ssh $USER@danhead01fl.unicph.domain</code></li> <li>Run <code>nano ~/.bash_profile</code></li> </ol> <pre><code>if [ -f ~/.bashrc ]; then\n . ~/.bashrc\nfi\n</code></pre> <ol> <li>Run <code>nano ~/.bashrc</code></li> </ol> <pre><code># .bashrc\n\n# Source global definitions\nif [ -f /etc/bashrc ]; then\n . /etc/bashrc\nfi\n\n# User specific environment\nif ! [[ \"$PATH\" =~ \"$HOME/.local/bin:$HOME/bin:\" ]]\nthen\n    PATH=\"$HOME/.local/bin:$HOME/bin:$PATH\"\nfi\nexport PATH\n\n# Uncomment the following line if you don't like systemctl's auto-paging feature:\n# export SYSTEMD_PAGER=\n\n# User specific aliases and functions\n### Source DanGPU definitions\nif [ -f /maps/projects/dan1/apps/etc/bashrc ]; then\n . /maps/projects/dan1/apps/etc/bashrc\nfi\n\n### Source Brickman definitions\nif [ -f /maps/projects/dan1/data/Brickman/config/brickman.bashrc ]; then\n . /maps/projects/dan1/data/Brickman/config/brickman.bashrc\nfi\n</code></pre> <ol> <li>Logout and login again, you should have see now <code>Brickman</code> folder</li> </ol>"},{"location":"1_rdm-guidelines/","title":"Research Data Management Guidelines for NGS","text":"<p>This section provides guidelines for effective research data management within our lab. By adopting these guidelines, we aim to improve data organization and naming conventions, leading to enhanced data governance and research efficiency. The guidelines include the following steps:</p> <ol> <li>Adhere to folder structure and naming conventions for <code>Assays</code> and <code>Projects</code> folders.</li> <li>Add relevant metadata to a <code>metadata.yml</code> in each folder</li> <li>Create a database from metadata files in <code>Assays</code> and <code>Projects</code> folders and browse it with a Panel python app.</li> <li><code>Projects</code> folders will be version controlled with Github and the Brickman organization.</li> <li><code>Projects</code> reports will be displayed under the Brickman organization GitHub Pages.</li> <li><code>Projects</code> will be syncronized and archived in Zenodo, which will give a DOI that can be used in a publication.</li> <li>NGS <code>Assays</code> folder will be uploaded to GEO, with the information provided in the metadata file.</li> <li>Create a Data Management Plan template that it is prefilled with repetitive information using DMPonline</li> </ol>"},{"location":"1_rdm-guidelines/#1-folder-structure-and-organization","title":"1. Folder structure and organization","text":"<p>To ensure efficient data management, it is important to establish a consistent approach to organizing research data. We consider the following practices:</p> <ul> <li>Folder structure: we aim to a logical and intuitive folder structure that reflects the organization of research projects and experimental data. We use descriptive folder names to make it easy to locate and access specific data files.</li> <li>Subfolders: Use subfolders to further categorize data based on their contents, such as code notebooks, results, reports, etc. This helps to keep data organized and facilitates quick retrieval.</li> <li>File naming conventions: implement a standardized file naming convention to ensure consistency and clarity. Use descriptive names that include relevant information, such as type of plots, results tables, etc.</li> </ul>"},{"location":"1_rdm-guidelines/#11-template-engine","title":"1.1 Template engine","text":"<p>We are currently using a cookiecutter template to generate a folder structure. Use cruft when generating assay and project folders to allow us to validate and sync old templates with the latest version.</p> <p>See this section to get started with a new project/assay.</p>"},{"location":"1_rdm-guidelines/#12-assay-folder","title":"1.2 Assay folder","text":"<p>For each NGS experiment there should be an <code>Assay</code> folder that will contain all experimental datasets (raw files and pipeline processed files). Inside <code>Assay</code> there will be subfolders named after a unique NGS ID and the date it was created:</p> <pre><code>&lt;Assay-ID&gt;_YYYYMMDD\n</code></pre>"},{"location":"1_rdm-guidelines/#assay-id-code-names","title":"Assay ID code names","text":"<ul> <li><code>CHIP</code>: ChIP-seq</li> <li><code>RNA</code>: RNA-seq</li> <li><code>ATAC</code>: ATAC-seq</li> <li><code>SCR</code>: scRNA-seq</li> <li><code>PROT</code>: Mass Spectrometry Assay</li> <li><code>CAT</code>: Cut&amp;Tag</li> <li><code>CAR</code>: Cut&amp;Run</li> <li><code>RIME</code>: Rapid Immunoprecipitation Mass spectrometry of Endogenous proteins</li> </ul> <p>For example <code>CHIP_20230101</code> is a ChIPseq assay made on 1st January 2023.</p>"},{"location":"1_rdm-guidelines/#folder-structure","title":"Folder Structure","text":"<pre><code>CHIP_20230424\n\u251c\u2500\u2500 description.yaml\n\u251c\u2500\u2500 metadata.yaml\n\u251c\u2500\u2500 pipeline.md\n\u251c\u2500\u2500 processed\n\u2514\u2500\u2500 raw\n   \u251c\u2500\u2500 .fastq.gz\n   \u2514\u2500\u2500 samplesheet.csv\n</code></pre> <ul> <li>description.yaml: short and long descriptions of the assay in yaml format.</li> <li>metadata.yaml: metadata file for the assay describing different keys (see below).</li> <li>pipeline.md: description of the pipeline used to process raw data.</li> <li>processed: folder with results of the preprocessing pipeline. Contents depend on the pipeline used.</li> <li>raw: folder with the raw data.<ul> <li>.fastq.gz:In the case of NGS assays, there should be fastq files.</li> <li>samplesheet.csv: file that contains metadata information for the samples. This file is used to run the nf-core pipelines. Ideally, it will also contain a column with info regarding the experimental variables and batches so it can be used for down stream analysis as well.</li> </ul> </li> </ul>"},{"location":"1_rdm-guidelines/#13-project-folder","title":"1.3 Project folder","text":"<p>There should be another folder called <code>Projects</code> that will contain project information and data analysis.</p> <p>A project may use one or more assays to answer a scientific question. This should be, for example, all the data analysis related to a publication.</p> <p>The project folder should be named after a unique identifier, such as:</p> <pre><code>&lt;Project-ID&gt;_YYYYMMDD\n</code></pre> <p><code>&lt;Project-ID&gt;</code> should be the initials of the owner of the project folder and the publication year, e.g. <code>JARH_et_al_20230101</code>.</p>"},{"location":"1_rdm-guidelines/#folder-structure_1","title":"Folder structure","text":"<pre><code>&lt;Project-ID&gt;_20230424\n\u251c\u2500\u2500 data\n\u2502  \u251c\u2500\u2500 assays\n\u2502  \u251c\u2500\u2500 external\n\u2502  \u2514\u2500\u2500 processed\n\u251c\u2500\u2500 documents\n\u2502  \u2514\u2500\u2500 Non-sensitive_NGS_research_project_template.docx\n\u251c\u2500\u2500 notebooks\n\u2502  \u2514\u2500\u2500 01_data_analysis.rmd\n\u251c\u2500\u2500 README.md\n\u251c\u2500\u2500 reports\n\u2502  \u251c\u2500\u2500 figures\n\u2502  \u2502  \u2514\u2500\u2500 01_data_analysis\n\u2502  \u2514\u2500\u2500 01_data_analysis.html\n\u251c\u2500\u2500 requirements.txt\n\u251c\u2500\u2500 results\n\u2502  \u2514\u2500\u2500 01_data_analysis/\n\u251c\u2500\u2500 scripts\n\u251c\u2500\u2500 description.yml\n\u2514\u2500\u2500 metadata.yml\n</code></pre> <ul> <li>data: folder that contains symlinks or shortcuts to where the data is, avoiding copying and modification of original files.</li> <li>documents: folder containing word documents, slides or pdfs related to the project, such as explanations of the data or project, papers, etc. It also contains your Data Management Plan.<ul> <li>Non-sensitive_NGS_research_project_template.docx. This is a prefilled Data Management Plan based on the Horizon Europe guidelines.</li> </ul> </li> <li>notebooks: folder containing Jupyter, R markdown or Quarto notebooks with the actual data analysis. Using annotated notebooks is ideal for reproducibility and readability purposes. Notebooks should be labeled numerically in order they were created e.g. <code>00_preprocessing</code></li> <li>README.md: detailed description of the project in markdown format.</li> <li>reports: notebooks rendered as html/docx/pdf versions, ideal for sharing with colleagues and also as a formal report of the data analysis procedure.<ul> <li>figures: figures produced upon rendering notebooks. The figures will be saved under a subfolder named after the notebook that created them. This is for provenance purposes so we know which notebook created which figures.</li> </ul> </li> <li>results: results from the data analysis, such as tables with differentially expressed genes, enrichment results, etc. These results should be saved under a subfolder named after the notebook that created them. This is for provenance purposes so we know which notebook created which results.</li> <li>scripts: folder containing helper scripts needed to run data analysis or reproduce the work of the folder</li> <li>description.yml: short description of the project.</li> <li>metadata.yml: metadata file for the assay describing different keys (see below).</li> </ul>"},{"location":"1_rdm-guidelines/#14-synchronization-with-dangpu-server","title":"1.4 Synchronization with DanGPU server","text":"<p>We will have to setup a cron job to perform one-way sync between the <code>/projects</code> folder and <code>NGS_data</code> folder. All the analysis will be done on danGPU server, with no exceptions!</p> <p>After project is done and published, it will be moved to <code>NGS_data</code>.</p>"},{"location":"1_rdm-guidelines/#15-general-naming-conventions-and-more-info","title":"1.5 General naming conventions and more info","text":"<ul> <li>date format: <code>YYYYMMDD</code></li> <li>authors: initials</li> <li>file and folder names: No use of spaces. Field sections are separated by underscores <code>_</code>. Words in each section are written in camelCase. For example: <code>field1_word1Word2.txt</code>.</li> </ul> <p>Transcriptomics metadata standards and fields</p> <p>More info on naming conventions for different types of files and analysis is in development.</p> name description naming_convention file format example .fastq raw sequencing reads nan nan sampleID_run_read1.fastq .fastqc quality control from fastqc nan nan sampleID_run_read1.fastqc .bam aligned reads nan nan sampleID_run_read1.bam GTF sequence annotation nan nan one of https://www.gencodegenes.org/ GFF sequence annotation nan nan one of https://www.gencodegenes.org/ .bed genome locations nan nan nan .bigwig genome coverage nan nan nan .fasta sequence data (nucleotide/aminoacid) nan nan one of https://www.gencodegenes.org/ Multiqc report QC aggregated report &lt;assayID&gt;_YYYYMMDD.multiqc multiqc RNA_20200101.multiqc Count matrix final count matrix &lt;assayID&gt;_cm_aligner_YYYYMMDD.tsv tsv RNA_cm_salmon_20200101.tsv DEA differential expression analysis results DEA_&lt;condition1-condition2&gt;_LFC&lt;absolute_threshold&gt;_p&lt;pvalue decimals&gt;_YYYYMMDD.tsv tsv DEA_treat-untreat_LFC1_p01_20200101.tsv DBA differential binding analysis results DBA_&lt;condition1-condition2&gt;_LFC&lt;absolute_threshold&gt;_p&lt;pvalue decimals&gt;_YYYYMMDD.tsv tsv DBA_treat-untreat_LFC1_p01_20200101.tsv MAplot MA plot MAplot_&lt;condition1-condition2&gt;_YYYYMMDD.jpeg jpeg MAplot_treat-untreat_20200101.jpeg Heatmap plot Heatmap plot of anything heatmap_&lt;type&gt;_YYYYMMDD.jpeg jpeg Heatmap_sampleCor_20200101.jpeg Volcano plot Volcano plot volcano_&lt;condition1-condition2&gt;_YYYYMMDD.jpeg jpeg volcano_treat-untreat_20200101.jpeg Venn diagram Venn diagram venn_&lt;type&gt;_YYYYMMDD.jpeg jpeg venn_consensus_20200101.jpeg Enrichment table Enrichment results nan tsv nan"},{"location":"1_rdm-guidelines/#2-metadata-and-documentation","title":"2. Metadata and documentation","text":"<p>Accurate documentation and metadata play a crucial role in facilitating data discovery and interpretation. Consider the following guidelines:</p> <ul> <li>Metadata capture: Record essential metadata for each dataset, including type of experiment, date, organisms, etc. This information provides context and helps others understand and reuse the data effectively.</li> <li>Readme files: Create readme files for each project or dataset. These files should provide a brief overview of the project, list the files and their descriptions, and explain any specific instructions or dependencies required for data analysis.</li> </ul>"},{"location":"1_rdm-guidelines/#21-assay-metadata-fields","title":"2.1 Assay metadata fields","text":"Metadata field Definition Format Example assay_id Identifier for the assay &lt;assay&gt;_&lt;codename&gt;_YYYYMMDD CHIP_Oct4_20200101 assay What kind of NGS was used in your experiment? [\"CHIP\", \"RNA\", \"ATAC\", \"SCR\", \"PROT\", \"CAT\", \"CAR\", \"RIME\", \"TAP\"] ChIPseq owner Who performed the experiment? &lt;First Name&gt; &lt;Last Name&gt; Jose Romero date Date of sequencing, should be the same as defined by Genomics Platform in YYYYMMDD format! YYYYMMDD 20200101 codename Your name initials [Example: JB for Josh Brickman] &lt;Initials OR keyword&gt; JR eln_id Optional: Electronic lab notebook ID Free text 12345 technology What technology was used? [Example: 10X Genomics if you used SCR] Free text 10X genomics sequencer What sequencing machine was used? [Example: NovaSeq 2000/NextSeq 2000/NextSeq 500] Free text NextSeq 2000 seq_kit What sequencing kit did you use? Please provide product number if available Free text nan n_samples How many samples have been sequenced? &lt;integer&gt; 9 is_paired Paired fastq files or not &lt;single-end OR paired-end&gt; single-end pipeline Pipeline name [Example: nf-core/rnaseq 3.12.0 or custom] Free text nf-core/chipseq -r 1.0 processed_by Person responsible for pre-processing (pipeline execution) &lt;First Name&gt; &lt;Last Name&gt; Sarah Lundregan organism What organism is this? &lt;mouse OR human OR other&gt; mouse organism_version Which version of genome was used [Example: mm10, hg38] Free text mm10 organism_subgroup In vitro or in vivo? &lt;in vivo OR in vitro&gt; in vitro origin Is this internal experiment of external (collaborator/publication)? &lt;internal OR external&gt; internal note Optional: Was there something worth knowing? Free text Low quality experiment/Indexes are swapped ... genomics_path Path to where the data is &lt;/path/to/file&gt; smb:/path/to/file"},{"location":"1_rdm-guidelines/#22-project-metadata-fields","title":"2.2 Project metadata fields","text":"<p>In development.</p> Metadata field Definition Format Example project Project name &lt;name&gt;_&lt;keyword&gt;_YYYY lundregan_oct4_2023 author Owner of the project &lt;First name&gt; &lt;Surname&gt; Sarah Lundregran date Date of creation YYYYMMDD 20230101 description Short description of the project Plain text This is a project describing the effect of Oct4 perturbation after pERK activation"},{"location":"1_rdm-guidelines/#3-data-catalogue-and-browser","title":"3. Data catalogue and browser","text":"<p>@SLundregan is in the process of building a prototype for <code>Assay</code>, using the metadata contained in all <code>description.yml</code> and <code>metadata.yml</code> files in the assay folder. This will be in the form of an SQLite database that that is easily updatable by running a helper script.</p> <p>@SLundregan is also working on a browsable database using Panel python app. The app will display the latest version of the SQLite database. Clicking on an item from the database will open a tab containing all available metadata for the assay.</p> <p>Also, it would be nice if you can create an <code>Assay</code> folder directly from there, making it easy to fill up the info for the metadata and GEO submission (see below)</p> <p>In the future, you could ideally visualize an analysed single cell RNAseq dataset by opening Cirrocumulus session.</p>"},{"location":"1_rdm-guidelines/#4-projects-version-control","title":"4. <code>Projects</code> version control","text":"<p>All projects should be version controlled using GitHub under the Brickman organization. After creating a cookiecutter template, initiate a git repository on the folder. The Git repository can stay private until it is ready for publication.</p>"},{"location":"1_rdm-guidelines/#5-projects-github-pages","title":"5. <code>Projects</code> GitHub pages","text":"<p>Using GitHub pages, it is possible to display your data analyses (or anything related to the project) inside the <code>Projects</code> folder so that they are open to the public in a html format. This is great for transparency and reproducibility purposes. This can be done after the paper has been made public (it is not possible to do with a private repository without paying).</p> <p>Info on how this is done should be put here</p>"},{"location":"1_rdm-guidelines/#6-project-archiving-in-zenodo","title":"6. <code>Project</code> archiving in Zenodo","text":"<p>Before submitting, link the repository to Zenodo and then create a Git release. This release will be caught by Zenodo and will give you a DOI that you can submit along the manuscript.</p>"},{"location":"1_rdm-guidelines/#7-data-upload-to-geo","title":"7. Data upload to GEO","text":"<p>The raw data from NGS experiments will be uploaded to the Gene Expression Omnibus (GEO). Whenever a new Assay folder is created, the data owner must fill up the required documentation and information needed to make the GEO submission as smooth as possible.</p>"},{"location":"1_rdm-guidelines/#8-create-a-data-management-plan","title":"8. Create a Data Management Plan","text":"<p>From the University of Copenhagen RDM team</p> <p>\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200bA Data Management Plan (DMP) is a planning tool that helps researchers to establish good practices for working with physical m\u200baterial and data in a research project. A DMP covers all relevant aspects of research data management throughout the project. Writing a DMP early on in a project helps:</p> <ul> <li>identify potential issues with the management of research data.</li> <li>comply with relevant legislation, policies, and funder requirements.</li> <li>document agreements related to the collection, usage, and dissemination of research data between project partners or between student and supervisor.</li> </ul> <p>We are have written a DMP template that it is prefilled with repetitive information using DMPonline and the Horizon Europe guidelines. This template contains all the necessary information regarding common practices that we will use, the repositories we use for NGS, etc. The template is part of the <code>project</code> folder template, under <code>documents</code>. You can check the file here.</p> <p>The Horizon Europe template is mostly focused on digital data and so, it is maybe not the best option regarding the needs of the Brickman Lab, due to the fact that it is mostly a wet lab with some bioinformatics. We will start working on another DMP based on the KU template, which is designed for both physical and digital data.</p>"},{"location":"2_starting-assay-project/","title":"Starting a new assay or project","text":"<p>Whenever you obtain sequencing data from Genomic's Platform, you have to create an Assay. By running the commands below, you will have option to fill all required information about the experiment. This workflow will help us with tracking of all sequencing done in our lab.</p>"},{"location":"2_starting-assay-project/#assay","title":"Assay","text":"<p>When you sequence an experiment, we create an Assay out of it, so we can use it in a project afterwards.</p> <p>Login to danhead and run command:</p> <pre><code>create_assay\n</code></pre>"},{"location":"2_starting-assay-project/#project","title":"Project","text":"<p>Every time you want to make some analysis, you should create a project. Our folder structure will allow you to easily link various experiments to your project and make your analysis easier.</p> <p>Please use the following naming convention: <code>surname-&lt;YOUR_CODENAME&gt;</code></p> <pre><code>create_project\n</code></pre> <p>Link required assays to your project.</p> <pre><code>ln -s /maps/projects/dan1/data/Brickman/assays/&lt;ASSAY_ID&gt; /maps/projects/dan1/data/Brickman/projects/&lt;PROJECT_ID&gt;/data/assays/\n</code></pre> <p>Link external data if needed</p> <pre><code>ln -s /maps/projects/dan1/data/Brickman/shared /maps/projects/dan1/data/Brickman/projects/&lt;PROJECT_ID&gt;/data/external/\n</code></pre>"},{"location":"3_pipelines/","title":"Running pipelines","text":"<p>By default, we run nf-core pipelines. To run a pipeline, read the official documentation with an example.</p>"},{"location":"3_pipelines/#monitoring-runs-with-nextflow-tower","title":"Monitoring runs with Nextflow Tower","text":"<p>This is a guide on how to use Nextflow Tower to monitor nf-core pipeline runs.</p> <p>We have created an API token for our GitHub account (brickmanlab) and restricted it to run only pipelines, nothing else. The <code>TOWER_WORKSPACE_ID</code> and <code>TOWER_ACCESS_TOKEN</code> are stored in <code>Brickman/config/brickman.bashrc</code>.</p> <p>To do more advance stuff, you have to create your own personal access token.</p>"},{"location":"3_pipelines/#tower-cli-installation","title":"Tower CLI installation","text":"<p>The tower cli<sup>1</sup> is required to be installed only once to connect the server as a computing resource. Afterward, it's not required any more<sup>2</sup>.</p> <pre><code># Download the latest version of Tower CLI:\nwget https://github.com/seqeralabs/tower-cli/releases/download/v0.7.3/tw-0.7.3-linux-x86_64\n\n# Make the file executable and move to directory accessible by $PATH variable:\nmkdir ~/.local/bin &amp;&amp; mv tw-* tw &amp;&amp; chmod +x ~/.local/bin/tw\n</code></pre> <ol> <li> <p>Tower CLI configuration \u21a9</p> </li> <li> <p>Tower Agent \u21a9</p> </li> </ol>"},{"location":"4_conda/","title":"Conda &amp; modules","text":""},{"location":"4_conda/#conda","title":"Conda","text":"<p>If you work with <code>conda</code> you can use <code>mamba</code> instead, which is faster tool to install packages.</p> <p>We created shared <code>conda</code> environments to simplify your life.</p> <ul> <li>To list all available envs: <code>conda env list</code></li> <li>To activate env: <code>source activate brickman</code></li> </ul>"},{"location":"4_conda/#creating-own-shared-environment","title":"Creating own shared environment","text":"<p>Here is an example how we created shared environment called <code>brickman</code>.</p> <pre><code>module load miniconda/latest\n\nconda create --prefix /maps/projects/dan1/data/Brickman/conda/envs/brickman python=3.10\nsource activate brickman\npip install cruft cookiecutter\n\nchmod -R 755 /maps/projects/dan1/data/Brickman/conda/envs/brickman\n</code></pre> <p>To install shared <code>conda</code> environment for the lab, follow the steps below.</p> <ol> <li>Create a configuration file which contains the list of packages you want   to use. Use prefix <code>brickman-&lt;NGS&gt;.yml</code></li> <li>Populate the configuration file</li> <li>Create the conda environment</li> </ol> <pre><code>mamba env create -p /projects/dan1/data/Brickman/conda/envs/brickman-&lt;NGS&gt;.yml -f brickman-&lt;NGS&gt;.yml\n</code></pre>"},{"location":"4_conda/#example-conda-environment","title":"Example conda environment","text":"<p>Configuration for <code>brickman-chipseq</code> environment.</p> <pre><code>name: brickman-chipseq\nchannels:\n  - conda-forge\n  - bioconda\n  - anaconda\n  - defaults\ndependencies:\n  - bioconda::bedtools==2.31.0\n  - bioconda::deeptools==2.31.0\n  - bioconda::homer==4.11\n  - bioconda::intervene==0.6.4\n  - bioconda::macs2==2.2.9.1\n  - bioconda::pygenometracks==3.8\n  - bioconda::seacr==1.3\n  - bioconda::samtools==1.17\nprefix: /projects/dan1/data/Brickman/conda/envs/brickman-chipseq\n</code></pre> <p>To install the environment, run</p> <pre><code>mamba env create -p /projects/dan1/data/Brickman/conda/envs/brickman-chipseq -f brickman-chipseq.yml\n</code></pre>"},{"location":"4_conda/#modules","title":"Modules","text":"<pre><code>module avail\n\nmodule load miniconda/latest\n</code></pre>"},{"location":"5_vscode/","title":"Setup R with Visual Studio Code","text":"<p>This setup guides you through setting up <code>R</code> in VSCode so you can use it on <code>dancmpn01fl</code> and <code>dancmpn02fl</code> computing nodes.</p> <p>Info</p> <p>The original RStudio server is using 4.0.5 version. If you want to stick this version, make sure to specify it when loading modules.</p> <p>Why do you need this?</p> <p>Because RStudio server sucks when you don't have a license and our place, so alternative it is. Also, VSCode has a bunch of plugins.</p>"},{"location":"5_vscode/#setting-up-remote-tunnels","title":"Setting up Remote Tunnels","text":"<p>Warning</p> <p>In this example we use version R/4.3.1. If you want to use a different one change the R version!</p>"},{"location":"5_vscode/#requirements","title":"Requirements","text":"<ul> <li> VSCode</li> <li> Terminal</li> </ul>"},{"location":"5_vscode/#setting-up-first-time","title":"Setting up first time","text":"<ol> <li>Login to head node: <code>ssh $USER@danhead01fl.unicph.domain</code></li> <li>Start a new tmux session: <code>tmux new -s rstudio</code></li> <li>Start a new job: <code>srun -c 2 --mem=30gb --time=0-4:00:00 --pty bash</code></li> <li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.3.1 miniconda/latest</code></li> <li>Run command: <code>code tunnel</code><ol> <li>Choose <code>Microsoft account</code> when asked how you would like to log in to VScode</li> <li>Open the link from terminal and paste the CODE</li> <li>Login with your KU credentials</li> <li>Go back to terminal and wait</li> <li>Hit ENTER on questions about tunnel name</li> <li>If everything went well you should see some random messages about port forwarding</li> </ol> </li> <li>Go to your VSCode and on the left panel search for <code>Remote Explorer</code></li> <li>Click <code>Sign in to the tunnels registered with Microsoft</code><ol> <li>You should see <code>dancmpn01flunicphdom</code> or <code>dancmpn02flunicphdom</code></li> </ol> </li> <li>Move cursor on it and click the <code>-&gt;</code></li> </ol> <p>To use <code>R</code>, install additional packages by clicking <code>Extensions</code> in the left panel. Search for packages:</p> <ul> <li>R</li> <li> <p>Quarto</p> </li> <li> <p>Next, top panel lick <code>View</code> -&gt; <code>Terminal</code> -&gt; Write <code>R</code> and hit ENTER</p> </li> <li><code>install.packages(\"languageserver\")</code><ol> <li>If it asks to install to stuff to other directory type <code>YES</code> then hit ENTER</li> <li>When asking about mirror type <code>30</code> (Denmark servers to download packages)</li> </ol> </li> <li><code>install.packages(\"httpgd\")</code></li> <li><code>q()</code> to get out</li> <li>Top left panel: <code>Code</code> -&gt; <code>Settings</code> -&gt; <code>Settings</code><ol> <li>Type in search <code>r.plot.useHttpgd</code></li> <li>Tick the box if it is not checked</li> </ol> </li> </ul> <p>If everything went well, you should be able to do this. If not, you know what to do.</p> <p></p>"},{"location":"5_vscode/#i-already-did-the-setup-i-want-my-r-again","title":"I already did the setup, I want my R again","text":"<ol> <li>Login to head node: <code>ssh $USER@danhead01fl.unicph.domain</code></li> <li>Start a new tmux session: <code>tmux new -s rstudio</code></li> <li>Start a new job: <code>srun -c 2 --mem=30gb --time=0-4:00:00 --pty bash</code></li> <li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.3.1 miniconda/latest</code></li> <li>Run command: <code>code tunnel</code></li> <li>Go to your VSCode and on the left panel search for <code>Remote Explorer</code></li> <li>Click <code>dancmpn01flunicphdom</code> or <code>dancmpn02flunicphdom</code></li> </ol>"},{"location":"5_vscode/#admins-how-to-install","title":"Admins: How to install","text":""},{"location":"5_vscode/#creating-module","title":"Creating module","text":"<pre><code>curl -Lk 'https://code.visualstudio.com/sha/download?build=stable&amp;os=cli-alpine-x64' --output vscode_cli.tar.gz\ntar -xf vscode_cli.tar.gz\n</code></pre>"},{"location":"5_vscode/#known-issues","title":"Known issues","text":"<p>VSCode can be installed as a server <code>code-server</code>, however it is not possible to listen on the port when on computing node. This works only in the case of <code>dangpu01fl</code>.</p> <p>Error when trying to do reverse ssh:</p> <pre><code>error listen EADDRINUSE: address already in use 127.0.0.1:8080\n</code></pre> <p>VSCode <code>code-server</code> is an alternative to <code>code tunnel</code> that consists of running code-server on a compute node and accessing it via a web browser using reverse <code>ssh</code> tunnel.</p> <pre><code>curl -fL https://github.com/coder/code-server/releases/download/v4.90.2/code-server-4.90.2-linux-amd64.tar.gz | tar -C /maps/projects/dan1/data/Brickman/shared/modules/software/code-server/4.90.2 -xz\n</code></pre> <pre><code>ssh user@danhead01fl.unicph.domain\ntmux new\nsrun -c 2 --mem=30gb --time=0-4:00:00 -p gpuqueue --pty bash\nmodule load code-server\ncode-server\n# On local machine\nssh -fNL localhost:8080:localhost:8080 $USER@dangpu01fl.unicph.domain\n</code></pre>"},{"location":"6_handy-scripts/","title":"Handy scripts","text":""},{"location":"6_handy-scripts/#geo-submission","title":"GEO submission","text":"<ol> <li>Login to GEO using Google</li> <li>Login to danGPU and change path to where the files are located</li> <li>It's either <code>~/Brickman/projects/</code> or <code>~/ucph/ndir/SUN-RENEW-Brickman/</code></li> <li>Download and update GEO template.</li> <li>NOTE: always make sure you use the latest version</li> <li>Click <code>Transfer files</code> and copy the login information for the ftp</li> <li>Adjust the variable from above step</li> </ol> <p>NOTE: before running the command below, make sure you are already in the folder and you see all the folder/files you want to upload. It will make the steps below simpler.</p> <pre><code># we run tmux session in case we loose connection\ntmux new -s geo\n\n# this loges you to FTP\nsftp geoftp@sftp-private.ncbi.nlm.nih.gov\npassword: &lt;PASSWORD&gt;\n\ncd uploads/&lt;FOLDER&gt;\nmkdir &lt;RNAseq&gt;\ncd &lt;RNAseq&gt;\nmput *\n</code></pre>"},{"location":"miscellaneous/dropbox/","title":"Moving Dropbox to SUND","text":"<p>This is a step-by-step guide how I moved our Dropbox into SUND organized by KU IT. In first attempt I have tried moving the files into OneDrive, but because there might be issues with long filenames I eventually ran into more and more problems</p> <ul> <li>random errors, because of different file size</li> <li>path name too long</li> </ul> <p>Simpler solution is just to move things to SAMBA drives.</p> <p>First, ssh into the server</p> <pre><code>ssh danhead01fl\ntmux new -s dropbox-transfer\nmodule load rclone/1.65.1\n</code></pre>"},{"location":"miscellaneous/dropbox/#linking-remotes","title":"Linking remotes","text":""},{"location":"miscellaneous/dropbox/#dropbox","title":"Dropbox","text":"<pre><code>&gt; n\n&gt; dropbox\n&gt; client_id &lt;ENTER&gt;\n&gt; client_secret &lt;ENTER&gt;\n&gt; y\nforward port `ssh -fNL localhost:53682:localhost:53682 danhead01fl` and access the website locally\n</code></pre>"},{"location":"miscellaneous/dropbox/#onedrive","title":"Onedrive","text":"<pre><code>&gt; n\n&gt; onedrive\n&gt; client_id &lt;ENTER&gt;\n&gt; client_secret &lt;ENTER&gt;\n&gt; region &lt;ENTER&gt;\n&gt; y\nforward port `ssh -fNL localhost:53682:localhost:53682 danhead01fl` and access the website locally\n&gt; config_type 3\n&gt; https://alumni.sharepoint.com/sites/UCPH_BrickmanLab\n&gt; y\n</code></pre>"},{"location":"miscellaneous/dropbox/#test-connections","title":"Test connections","text":"<pre><code>rclone lsd Dropbox:\nrclone lsd dropbox_jb:\nrclone lsd Onedrive:\n</code></pre>"},{"location":"miscellaneous/dropbox/#copy-files","title":"Copy files","text":"<p>I have started first with manual folders because we had to many folders and sometimes there are timeout issues.</p> <pre><code>rclone copy --progress --checksum Dropbox:Computerome ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Computerome\nrclone copy --progress --checksum Dropbox:Courses ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Courses\nrclone copy --progress --checksum Dropbox:Grants ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Grants\nrclone copy --progress --checksum Dropbox:Other ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Other\nrclone copy --progress --checksum Dropbox:Papers ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Papers\nrclone copy --progress --checksum Dropbox:Pictures ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Pictures\nrclone copy --progress --checksum Dropbox:People ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/People\nrclone copy --progress --checksum Dropbox:sc_seq_analysis ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/sc_seq_analysis\n</code></pre> <p>After the initial copy, I ran again copy this time of all the folders, most of them should be present already. This is to make sure all files were moved.</p> <pre><code>rclone copy \\\n    --progress --checksum \\\n    --exclude=\"People/Fung/Home/IRCMS_interview_2024**\" \\\n    --exclude=\"People/Fung/Home/MB1016613_backup**\" \\\n    --exclude=\"GEO_data/**\" \\\n    Dropbox: ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/\n\nrclone copy --progress --checksum Dropbox:GEO_data ~/ucph/ndir/SUN-RENEW-Brickman/GEO_data/\nrclone copy --progress --checksum dropbox_jb: ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/JoshBrickman\n</code></pre>"},{"location":"miscellaneous/ku-computer/","title":"KU computer setup","text":""},{"location":"miscellaneous/ku-computer/#conda","title":"Conda","text":"<p>Go here and download Miniconda PKG not BASH. If you're running M1/2 please follow this guideline.</p>"},{"location":"miscellaneous/ku-computer/#example-for-chip-seq-setup","title":"Example for CHIP-seq setup","text":"<pre><code>conda create --name chipseq python=3.6\nconda activate chipseq\nconda install -c bioconda deeptools bedtools\npip install intervene\n</code></pre>"},{"location":"miscellaneous/podman/","title":"Podman","text":""},{"location":"miscellaneous/podman/#setup","title":"Setup","text":"<p>Storage for Podman needs to be configured to fix UID errors when running on UTF filesystem:</p> <pre><code>mkdir -p ~/.config/containers\ncp /maps/projects/dan1/apps/podman/4.0.2/storage.conf $HOME/.config/containers/\n</code></pre> <p>Rootless Podman also requires username and allowed UID range to be listed in /etc/subuid and /etc/subgid</p> <p>List running containers and run a publically available container image to confirm Podman is working:</p> <pre><code>podman ps\npodman run -it docker.io/library/busybox\n</code></pre>"},{"location":"miscellaneous/podman/#running-the-ku-sund-dangpu-nf-core-config-with-podman","title":"Running the KU SUND DANGPU nf-core config with Podman","text":"<p>Currently this is not practical because file permissions cause the following error:</p> <pre><code>error during container init: error setting cgroup config for procHooks process: cannot set memory limit: container could not join or create cgroup\n</code></pre> <p>The nf-core config file, podman.config, can be found at /scratch/Brickman/pipelines/</p> <p>Specify podman.config in nextflow run options to run a pipeline with Podman, e.g. for the rnaseq test profile:</p> <pre><code>nextflow run nf-core/rnaseq -r 3.8.1 -c podman.config -profile test --outdir nfcore_test\n</code></pre>"},{"location":"tools_and_packages/alphafold2/","title":"Alphafold 2","text":""},{"location":"tools_and_packages/alphafold2/#1-running","title":"1. Running","text":""},{"location":"tools_and_packages/alphafold2/#11-create-a-target-file","title":"1.1 Create a target file","text":"<pre><code># cat target.fasta\n&gt;query\nMAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAPKPH\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#12-setup-environments","title":"1.2. Setup environments","text":"<pre><code>srun -N 1 --ntasks-per-node=10 --gres=gpu:2 --pty bash\nmodule load miniconda/latest cuda/11.4 cudnn/8.2.2\nsource activate /maps/projects/dan1/data/Brickman/conda/envs/af2\n\ncd /maps/projects/dan1/data/Brickman/alphafold\nexport AF2_DATA_DIR=\"~/projects/data/Alphafold2/24022023\"\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#13-run-monomer-cli","title":"1.3. Run monomer (cli)","text":"<pre><code>python run_alphafold.py \\\n    --fasta_paths=~/projects/data/Brickman/target_01.fasta \\\n    --output_dir=/scratch/tmp/alphatest \\\n    --model_preset=monomer \\\n    --db_preset=full_dbs \\\n    --data_dir=$AF2_DATA_DIR \\\n    --uniref30_database_path=$AF2_DATA_DIR/uniref30/UniRef30_2021_03 \\\n    --uniref90_database_path=$AF2_DATA_DIR/uniref90/uniref90.fasta \\\n    --mgnify_database_path=$AF2_DATA_DIR/mgnify/mgy_clusters_2022_05.fa \\\n    --pdb70_database_path=$AF2_DATA_DIR/pdb70/pdb70 \\\n    --template_mmcif_dir=$AF2_DATA_DIR/pdb_mmcif/mmcif_files/ \\\n    --obsolete_pdbs_path=$AF2_DATA_DIR/pdb_mmcif/obsolete.dat \\\n    --bfd_database_path=$AF2_DATA_DIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \\\n    --max_template_date=2022-01-01 \\\n    --use_gpu_relax\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#14-run-multimer-cli","title":"1.4. Run multimer (cli)","text":"<p>The example below generates 10 models.</p> <pre><code>python run_alphafold.py \\\n    --fasta_paths=/home/fdb589/projects/data/Brickman/WTPU_1_WTC_EBPa.fasta \\\n    --output_dir=/scratch/tmp/alphatest \\\n    --model_preset=multimer \\\n    --db_preset=full_dbs \\\n    --data_dir=$AF2_DATA_DIR \\\n    --uniref30_database_path=$AF2_DATA_DIR/uniref30/UniRef30_2021_03 \\\n    --uniref90_database_path=$AF2_DATA_DIR/uniref90/uniref90.fasta \\\n    --mgnify_database_path=$AF2_DATA_DIR/mgnify/mgy_clusters_2022_05.fa \\\n    --template_mmcif_dir=$AF2_DATA_DIR/pdb_mmcif/mmcif_files/ \\\n    --obsolete_pdbs_path=$AF2_DATA_DIR/pdb_mmcif/obsolete.dat \\\n    --pdb_seqres_database_path=$AF2_DATA_DIR/pdb_seqres/pdb_seqres.txt \\\n    --uniprot_database_path=$AF2_DATA_DIR/uniprot/uniprot.fasta \\\n    --bfd_database_path=$AF2_DATA_DIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \\\n    --max_template_date=2022-01-01 \\\n    --num_multimer_predictions_per_model=10 \\\n    --use_gpu_relax\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#15-example-sbatch-script","title":"1.5. Example SBATCH script","text":"<pre><code>#!/bin/bash\n#SBATCH --job-name=AF2\n#SBATCH --gres=gpu:2\n#SBATCH --cpus-per-task=10\n#SBATCH --mail-type=BEGIN,END\n#SBATCH --mail-user=YOUR-EMAIL\n\nmodule load miniconda/latest cuda/11.4 cudnn/8.2.2\nsource activate /maps/projects/dan1/data/Brickman/conda/envs/af2\ncd ~/projects/data/Brickman/alphafold\nmkdir -p /scratch/tmp/alphatest\nexport AF2_DATA_DIR=\"~/projects/data/Alphafold2/24022023\"\n\nsrun python run_alphafold.py \\\n--fasta_paths=~/projects/data/Brickman/target_01.fasta \\\n--output_dir=/scratch/tmp/alphatest \\\n--model_preset=monomer \\\n--db_preset=full_dbs \\\n--data_dir=$AF2_DATA_DIR \\\n--uniref30_database_path=$AF2_DATA_DIR/uniref30/UniRef30_2021_03 \\\n--uniref90_database_path=$AF2_DATA_DIR/uniref90/uniref90.fasta \\\n--mgnify_database_path=$AF2_DATA_DIR/mgnify/mgy_clusters_2022_05.fa \\\n--pdb70_database_path=$AF2_DATA_DIR/pdb70/pdb70 \\\n--template_mmcif_dir=$AF2_DATA_DIR/pdb_mmcif/mmcif_files/ \\\n--obsolete_pdbs_path=$AF2_DATA_DIR/pdb_mmcif/obsolete.dat \\\n--bfd_database_path=$AF2_DATA_DIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \\\n--max_template_date=2022-01-01 \\\n--use_gpu_relax\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#2-installation","title":"2. Installation","text":"<pre><code>conda create --prefix /maps/projects/dan1/data/Brickman/conda/envs/af2 python=3.8\nsource activate /maps/projects/dan1/data/Brickman/conda/envs/af2\n\nmamba install hmmer\npip install py3dmol\nmamba install pdbfixer==1.7\nmamba install -c conda-forge openmm=7.5.1\n\ncd /maps/projects/dan1/data/Brickman/\ngit clone --branch main https://github.com/deepmind/alphafold alphafold\npip install -r ./alphafold/requirements.txt\npip install --no-dependencies ./alphafold\n\n# stereo chemical props needs to be in common folder\nwget \u2013q \u2013P  /maps/projects/dan1/data/Brickman/alphafold/alphafold/common/ https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt\n\n# skipping content part\nmkdir -p ./alphafold/data/params &amp;&amp; cd ./alphafold/data/params\nwget https://storage.googleapis.com/alphafold/alphafold_params_colab_2022-12-06.tar\ntar --extract --verbose --preserve-permissions --file alphafold_params_colab_2022-12-06.tar\npip install ipykernel ipywidgets tqdm\npip install --upgrade scprep phate\n\n# Install jax\nmodule load miniconda/latest\nmodule load cuda/11.4 cudnn/8.2.2\nexport CUDA_VISIBLE_DEVICES='3'\npip install \"jax[cuda11_cudnn82]\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n\n# fix last issues\nmamba install -c conda-forge -c bioconda hhsuite\nmamba install -c bioconda kalign3\npip install numpy==1.21.6\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#21-download-references","title":"2.1. Download references","text":"<p>Note</p> <p>Downloading references will not work on one try, had to do a lot of manual re-running of scripts.</p> <pre><code># create folder\nmkdir -p ~/projects/data/Alphafold2/24022023\ncd ~/projects/data/Alphafold2/24022023\n\n# Download all databases\nsh download_all_data.sh ~/projects/data/Alphafold2/24022023/ &gt; download.log 2&gt; download_all.log\n\n# Some fix-ups\n# mmCIF will not work because the firewall blocks the port, so I found this workaroud online\n# ref: https://github.com/deepmind/alphafold/issues/196\nwget -e robots=off -r --no-parent -nH --cut-dirs=7 -q ftp://ftp.ebi.ac.uk/pub/databases/pdb/data/structures/divided/mmCIF/ -P \"${RAW_DIR}\"\n\n# Last step is to fix all the permissions\nchmod -R 755 24022023/\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#references","title":"References","text":"<ul> <li>ifb-elixirfr</li> <li>deepmind/alphafold</li> </ul>"},{"location":"tools_and_packages/dReg/","title":"dReg","text":""},{"location":"tools_and_packages/dReg/#1-running","title":"1. Running","text":""},{"location":"tools_and_packages/dReg/#make-dreg-bigwigs","title":"Make dReg bigwigs","text":"<p>If your nascent RNA-seq data is already aligned, bw suitable for use with dReg can be prepared using Danko-Lab RunOnBamToBigWig</p> <p>If you have fastq files from PRO-seq, GRO-seq, or CHrO-seq, run the Danko-Lab's mapping pipeline using the shared <code>dReg_dataprep</code> conda environment</p> <p>Example SBATCH script for mapping pipeline</p> <pre><code>#!/bin/bash\n\n#SBATCH --job-name=pro_align\n#SBATCH -c 20\n#SBATCH --mem=30gb\n#SBATCH --time=00-24:00:00\n#SBATCH --output=01_proseq_alignment.out\n#SBATCH --mail-type=BEGIN,END\n#SBATCH --mail-user=YOUR-EMAIL\n\nmodule load miniconda/latest\nsource activate dReg_dataprep\n\nPROSEQ=(\"/maps/projects/dan1/data/Brickman/proseq2.0/proseq2.0.bsh\")\nGENO=(\"/scratch/Brickman/references/mus_musculus/ensembl/GRCm38_102/\")\nRESL=(\"/maps/projects/dan1/data/Brickman/projects/NAME_DATE/data/external/proseq/\")\nSAMPLES=(\"SRX14164616_SRR18010280 SRX14164617_SRR18010278\")\n\nfor sample in ${SAMPLES}; do\n    bash ${PROSEQ} -i ${GENO}bwa \\\n    -c ${GENO}GRCm38.102.genome \\\n    -PE --RNA5=R2_5prime --UMI1=6 \\\n    -O ${RESL} \\\n    -I ${sample} \\\n    --thread=20\ndone\n</code></pre>"},{"location":"tools_and_packages/dReg/#gpu-check","title":"GPU check","text":"<p>Check available GPUs and running processes before using dReg. GPU 0 is reserved for Brickman group</p> <pre><code>nvidia-smi\n</code></pre>"},{"location":"tools_and_packages/dReg/#example-dreg-script","title":"Example dReg script","text":"<pre><code>#!/bin/bash\n\n#SBATCH --job-name=dREG\n#SBATCH -c 30\n#SBATCH --mem=30gb\n#SBATCH --time=00-24:00:00\n#SBATCH --output=01-1_dREG.out\n#SBATCH --mail-type=BEGIN,END\n#SBATCH --mail-user=YOUR-EMAIL\n\nmodule load miniconda/latest cuda/11.8-dangpu cudnn/8.6.0-dangpu\nsource activate dReg\n\nBW=(\"../data/assays/RNA_INITIAL_DATE/processed/bw/\")\nRESL=(\"../results/01/dREG/\")\ndREG=(\"/projects/dan1/data/Brickman/dREG/run_dREG.bsh\")\nMODEL=(\"/projects/dan1/data/Brickman/dREG/resources/asvm.gdm.6.6M.20170828.rdata\")\n\n\nSAMPLES=(\"0h_A 0h_B 2h_A 2h_B\")\n\nfor sample in ${SAMPLES}; do\n    bash ${dREG} ${BW}${sample}_sorted_filt_dedup_plus.bw ${BW}${sample}_sorted_filt_dedup_minus.bw \\\n    ${RESL}${sample}_test ${MODEL} \\\n    30 0\ndone\n</code></pre>"},{"location":"tools_and_packages/dReg/#2-installation","title":"2. Installation","text":""},{"location":"tools_and_packages/dReg/#installing-dreg","title":"Installing dReg","text":"<p>Note: Python version in conda env must be 3.8, and R version &lt; 4.0</p> <pre><code>cd /maps/projects/dan1/data/Brickman/conda/\nmodule load miniconda/latest\nmamba env create -p /projects/dan1/data/Brickman/conda/envs/dReg -f dREG.yml\nsource activate dReg\n\ncd /maps/projects/dan1/data/Brickman/\ngit clone https://github.com/Danko-Lab/dREG\ncd dREG\nmake R_dependencies\n\nR\ndevtools::install_github(\"CshlSiepelLab/RPHAST\")\ndevtools::install_version(\"MASS\", version=\"7.3-51.5\", repos=\"https://mirrors.dotsrc.org/cran/\")\ninstall.packages(\"e1071\", repos=\"https://mirrors.dotsrc.org/cran/\")\ndevtools::install_version(\"randomForest\", version=\"4.6-14\", repos=\"https://mirrors.dotsrc.org/cran/\")\nquit()\n\nmake dreg\nmkdir resources\ncd resources\nwget ftp://cbsuftp.tc.cornell.edu/danko/hub/dreg.models/asvm.gdm.6.6M.20170828.rdata\n</code></pre>"},{"location":"tools_and_packages/dReg/#installing-rgtsvm","title":"Installing Rgtsvm","text":"<p>Rgtsvm is required for dReg to use GPU resources</p> <pre><code># make sure in dREG repo and that dReg environment is activated\ncd /maps/projects/dan1/data/Brickman/dREG\nsource activate dReg\n\nR\ninstall.packages(c(\"bit64\", \"snow\", \"SparseM\"), repos=\"https://mirrors.dotsrc.org/cran/\")\ndevtools::install_version(\"lattice\", version=\"0.20-41\", repos=\"https://mirrors.dotsrc.org/cran/\")\ninstall.packages(\"Matrix\", repos=\"https://mirrors.dotsrc.org/cran/\")\nquit()\nmamba install -c conda-forge boost=1.70.0\n\nmkdir third-party\ncd third-party\ngit clone https://github.com/Danko-Lab/Rgtsvm.git\ncd Rgtsvm\n\nmodule load cuda/11.8-dangpu\nmodule load cudnn/8.6.0-dangpu\n\nR CMD INSTALL --configure-args=\"--with-boost-home=$CONDA_PREFIX\" Rgtsvm\n</code></pre>"},{"location":"tools_and_packages/packages/","title":"Bioinformatics tools","text":"Tool Description NGS Language Link Functional enrichment on genomic regions CHIP-seq ATAC-seq R https://github.com/jokergoo/rGREAT Pseudotime inference scRNA-seq Python https://github.com/LouisFaure/scFates nan Single-cell analysis package scRNA-seq Python https://github.com/scverse/scanpy nan AI probabilistic package for transfer learning DR and more scRNA-seq Python https://github.com/scverse/scvi-tools Gene set enrichment analysis on steroids scRNA-seq Python https://github.com/zqfang/GSEApy nan UpsetR on stereoids (complicated Venn Diagrams) Plotting R https://github.com/krassowski/complex-upset nan Complex heatmap Plotting Python https://github.com/DingWB/PyComplexHeatmap nan"},{"location":"tools_and_packages/ucsc_liftover/","title":"UCSC liftover tool","text":"<p>Documentation for UCSC liftover.</p>"},{"location":"tools_and_packages/ucsc_liftover/#issue-separate-peaks-map-to-same-coordinates-after-liftover","title":"Issue: separate peaks map to same coordinates after liftover","text":"<ul> <li>When using the tool to liftover regions from hg19 to hg38, separate peaks in specific regions of hg19 map to overlapping coordinates after liftover.</li> <li>This is because some contigs were not carried forward from hg19 to hg38, due to problems with the hg19 assembly that were resolved in hg38</li> <li>All lifted over peaks with overlapping coordinates should be removed from analysis because the peaks were called on an uncertain region in the old genome assembly</li> <li>Issues like this likely apply to all genome updates, not only human</li> <li>The same issue also applies with Ensembl assembly converter</li> </ul>"},{"location":"tools_and_packages/ucsc_liftover/#solution","title":"Solution","text":"<p>Remove any peaks with overlapping coordinates after liftover before using the lifted over peak file:</p> <pre><code>#!/bin/bash\n\nmodule load bedtools\n\nEXTL=(\"../data/external/\")\n\n# Sort the lifted over peakfile for use with bedtools\nsort -k1,1 -k2,2n ${EXTL}wong_fig3c_peaks_GRCh38.bed &gt; ${EXTL}peaks.tmp &amp;&amp; mv ${EXTL}peaks.tmp ${EXTL}wong_fig3c_peaks_GRCh38.bed\n\n# Bedtools merge count rows contributing to merged peaks (overlapping peaks will have count &gt; 1)\nbedtools merge -i ${EXTL}wong_fig3c_peaks_GRCh38.bed -c 1 -o count &gt; ${EXTL}counted.bed\n\n# Get non-overlapping peaks\nawk '/\\t1$/{print}' ${EXTL}counted.bed &gt; ${EXTL}filtered.bed\n\n# Intersect original file with non-overlapping peaks and output overlapping peaks\nbedtools intersect -wa -a ${EXTL}wong_fig3c_peaks_GRCh38.bed -b ${EXTL}filtered.bed &gt; ${EXTL}wong_fig3c_peaks_GRCh38_correct_liftover.bed\nbedtools intersect -v -a ${EXTL}wong_fig3c_peaks_GRCh38.bed -b ${EXTL}filtered.bed &gt; ${EXTL}wong_fig3c_peaks_GRCh38_overlapping.bed\n</code></pre>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":"<p>Welcome to the Brickman Lab wiki!</p> <p>Here you can find documentation for our analysis workflows. For more information about our research, visit the Brickman Group website.</p>"},{"location":"#transcriptional-basis-for-cell-fate-choice","title":"Transcriptional basis for cell fate choice","text":"<p>The Brickman Group aims to understand the transcriptional basis for early embryonic lineage specification.</p> <p>We are interested in the dynamic mechanisms by which cells can both reversible prime towards a particular fate or undergo a transition into commitment.</p>"},{"location":"#publications","title":"Publications","text":"Selected publications <p>Wong, Y. F., Kumar, Y., Proks, M., Herrera, J. A. R., Rothov\u00e1,M. M., Monteiro, R. S., Pozzi, S., Jennings, R. E., Hanley, N. A., Bickmore, W. A., and Brickman, J. M. (2023). Expansion of ventral foregut is linked to changes in the enhancer landscape for organ-specific differentiation. Nature Cell Biology, doi: 10.1038/s41556-022-01075-8.</p> <p>Perera, M., Nissen, S. B., Proks, M., Pozzi, S., Monteiro, R. S., Trusina, A., and Brickman, J. M. (2022). Transcriptional heterogeneity and cell cycle regulation as central determinants of Primitive Endoderm priming. eLife, doi: 10.7554/eLife.78967.</p> <p>Rothov\u00e1, M. M., Nielsen, A. V., Proks, M., Wong, Y. F., Riveiro, A. R., Linneberg-Agerholm, M., David, E., Amit, I., Trusina, A., and Brickman, J. M. (2022). Identification of the central intermediate in the extra-embryonic to embryonic endoderm transition through single-cell transcriptomics. Nature Cell Biology, doi: 10.1038/s41556-022-00923-x.</p> <p>Riveiro, A. R., and Brickman, J. M. (2020). From pluripotency to totipotency: an experimentalist's guide to cellular potency. Development, doi: 10.1242/dev.189845.</p> <p>Hamilton, W.B., Mosesson, Y., Monteiro, R.S., Emdal, K.B., Knudsen, T.E., Francavilla, C., Barkai, N., Olsen, J.V. and Brickman, J.M. (2019). Dynamic lineage priming is driven via direct enhancer regulation by ERK. Nature, doi: 10.1038/s41586-019-1732-z.</p> <p>Weinert, B.T., Narita, T., Satpathy, S., Srinivasan, B., Hansen, B.K., Scholz, C., Hamilton, W.B., Zucconi, B.E., Wang, W.W., Liu, W.R., Brickman, J.M., Kesicki, E.A., Lai, A., Bromberg, K.D., Cole, P.A., and Choudhary, C. (2018). Time-Resolved Analysis Reveals Rapid Dynamics and Broad Scope of the CBP/p300 Acetylome. Cell 174, 231-244.e212, doi:10.1016/j.cell.2018.04.033.</p> <p>Anderson, K.G.V., Hamilton, W.B., Roske, F.V., Azad, A., Knudsen, T.E., Canham, M.A., Forrester, L.M., and Brickman, J.M. (2017). Insulin fine-tunes self-renewal pathways governing naive pluripotency and extra-embryonic endoderm. Nature Cell Biology 19, 1164-1177, doi:10.1038/ncb3617.</p> <p>Nissen, S.B., Perera, M., Gonzalez, J.M., Morgani, S.M., Jensen, M.H., Sneppen, K., Brickman, J.M., and Trusina, A. (2017). Four simple rules that are sufficient to generate the mammalian blastocyst. PLoS Biol 15, e2000737, doi:10.1371/journal.pbio.2000737.  *joint senior author</p> <p>Migueles, R.P., Shaw, L., Rodrigues, N.P., May, G., Henseleit, K., Anderson, K.G., Goker, H., Jones, C.M., de Bruijn, M.F., Brickman, J.M., and Enver, T. (2017). Transcriptional regulation of Hhex in hematopoiesis and hematopoietic stem cell ontogeny. Developmental Biology 424, 236-245, doi:10.1016/j.ydbio.2016.12.021.</p> <p>Illingworth, R.S., H\u00f6lzenspies, J.J., Roske, F.V., Bickmore, W.A., and Brickman, J.M. (2016). Polycomb enables primitive endoderm lineage priming in embryonic stem cells. Elife 5, doi:10.7554/eLife.14926.</p> <p>Martin Gonzalez, J., Morgani, S.M., Bone, R.A., Bonderup, K., Abelchian, S., Brakebusch, C., and Brickman, J.M. (2016). Embryonic Stem Cell Culture Conditions Support Distinct States Associated with Different Developmental Stages and Potency. Stem Cell Reports 7, 177-191, doi:10.1016/j.stemcr.2016.07.009.</p>"},{"location":"#datasets","title":"Datasets","text":"<p>Rothova et al., (2022). Nature Cell Biology. Single-cell RNA-seq datasets from FOXA2<sup>Venus</sup> reporter mouse embryos and embryonic stem cell differentiation towards endoderm.</p>"},{"location":"0_setup/","title":"First time on danserver","text":"<p>For starting on the server make sure to read:</p> <ul> <li>DanServer manual</li> <li>Genomics Platform wiki</li> <li>platforms: JupyterHub and RStudio</li> </ul>"},{"location":"0_setup/#first-time-on-server-checklist","title":"First time on server checklist","text":"<ol> <li>Login to danhead: <code>ssh $USER@danhead01fl.unicph.domain</code></li> <li>Run <code>nano ~/.bash_profile</code></li> </ol> <pre><code>if [ -f ~/.bashrc ]; then\n . ~/.bashrc\nfi\n</code></pre> <ol> <li>Run <code>nano ~/.bashrc</code></li> </ol> <pre><code># .bashrc\n\n# Source global definitions\nif [ -f /etc/bashrc ]; then\n . /etc/bashrc\nfi\n\n# User specific environment\nif ! [[ \"$PATH\" =~ \"$HOME/.local/bin:$HOME/bin:\" ]]\nthen\n    PATH=\"$HOME/.local/bin:$HOME/bin:$PATH\"\nfi\nexport PATH\n\n# Uncomment the following line if you don't like systemctl's auto-paging feature:\n# export SYSTEMD_PAGER=\n\n# User specific aliases and functions\n### Source DanGPU definitions\nif [ -f /maps/projects/dan1/apps/etc/bashrc ]; then\n . /maps/projects/dan1/apps/etc/bashrc\nfi\n\n### Source Brickman definitions\nif [ -f /maps/projects/dan1/data/Brickman/config/brickman.bashrc ]; then\n . /maps/projects/dan1/data/Brickman/config/brickman.bashrc\nfi\n</code></pre> <ol> <li>Logout and login again, you should have see now <code>Brickman</code> folder</li> </ol>"},{"location":"1_rdm-guidelines/","title":"Research Data Management Guidelines for NGS","text":"<p>This section provides guidelines for effective research data management within our lab. By adopting these guidelines, we aim to improve data organization and naming conventions, leading to enhanced data governance and research efficiency. The guidelines include the following steps:</p> <ol> <li>Adhere to folder structure and naming conventions for <code>Assays</code> and <code>Projects</code> folders.</li> <li>Add relevant metadata to a <code>metadata.yml</code> in each folder</li> <li>Create a database from metadata files in <code>Assays</code> and <code>Projects</code> folders and browse it with a Panel python app.</li> <li><code>Projects</code> folders will be version controlled with Github and the Brickman organization.</li> <li><code>Projects</code> reports will be displayed under the Brickman organization GitHub Pages.</li> <li><code>Projects</code> will be syncronized and archived in Zenodo, which will give a DOI that can be used in a publication.</li> <li>NGS <code>Assays</code> folder will be uploaded to GEO, with the information provided in the metadata file.</li> <li>Create a Data Management Plan template that it is prefilled with repetitive information using DMPonline</li> </ol>"},{"location":"1_rdm-guidelines/#1-folder-structure-and-organization","title":"1. Folder structure and organization","text":"<p>To ensure efficient data management, it is important to establish a consistent approach to organizing research data. We consider the following practices:</p> <ul> <li>Folder structure: we aim to a logical and intuitive folder structure that reflects the organization of research projects and experimental data. We use descriptive folder names to make it easy to locate and access specific data files.</li> <li>Subfolders: Use subfolders to further categorize data based on their contents, such as code notebooks, results, reports, etc. This helps to keep data organized and facilitates quick retrieval.</li> <li>File naming conventions: implement a standardized file naming convention to ensure consistency and clarity. Use descriptive names that include relevant information, such as type of plots, results tables, etc.</li> </ul>"},{"location":"1_rdm-guidelines/#11-template-engine","title":"1.1 Template engine","text":"<p>We are currently using a cookiecutter template to generate a folder structure. Use cruft when generating assay and project folders to allow us to validate and sync old templates with the latest version.</p> <p>See this section to get started with a new project/assay.</p>"},{"location":"1_rdm-guidelines/#12-assay-folder","title":"1.2 Assay folder","text":"<p>For each NGS experiment there should be an <code>Assay</code> folder that will contain all experimental datasets (raw files and pipeline processed files). Inside <code>Assay</code> there will be subfolders named after a unique NGS ID and the date it was created:</p> <pre><code>&lt;Assay-ID&gt;_YYYYMMDD\n</code></pre>"},{"location":"1_rdm-guidelines/#assay-id-code-names","title":"Assay ID code names","text":"<ul> <li><code>CHIP</code>: ChIP-seq</li> <li><code>RNA</code>: RNA-seq</li> <li><code>ATAC</code>: ATAC-seq</li> <li><code>SCR</code>: scRNA-seq</li> <li><code>PROT</code>: Mass Spectrometry Assay</li> <li><code>CAT</code>: Cut&amp;Tag</li> <li><code>CAR</code>: Cut&amp;Run</li> <li><code>RIME</code>: Rapid Immunoprecipitation Mass spectrometry of Endogenous proteins</li> </ul> <p>For example <code>CHIP_20230101</code> is a ChIPseq assay made on 1st January 2023.</p>"},{"location":"1_rdm-guidelines/#folder-structure","title":"Folder Structure","text":"<pre><code>CHIP_20230424\n\u251c\u2500\u2500 description.yaml\n\u251c\u2500\u2500 metadata.yaml\n\u251c\u2500\u2500 pipeline.md\n\u251c\u2500\u2500 processed\n\u2514\u2500\u2500 raw\n   \u251c\u2500\u2500 .fastq.gz\n   \u2514\u2500\u2500 samplesheet.csv\n</code></pre> <ul> <li>description.yaml: short and long descriptions of the assay in yaml format.</li> <li>metadata.yaml: metadata file for the assay describing different keys (see below).</li> <li>pipeline.md: description of the pipeline used to process raw data.</li> <li>processed: folder with results of the preprocessing pipeline. Contents depend on the pipeline used.</li> <li>raw: folder with the raw data.<ul> <li>.fastq.gz:In the case of NGS assays, there should be fastq files.</li> <li>samplesheet.csv: file that contains metadata information for the samples. This file is used to run the nf-core pipelines. Ideally, it will also contain a column with info regarding the experimental variables and batches so it can be used for down stream analysis as well.</li> </ul> </li> </ul>"},{"location":"1_rdm-guidelines/#13-project-folder","title":"1.3 Project folder","text":"<p>There should be another folder called <code>Projects</code> that will contain project information and data analysis.</p> <p>A project may use one or more assays to answer a scientific question. This should be, for example, all the data analysis related to a publication.</p> <p>The project folder should be named after a unique identifier, such as:</p> <pre><code>&lt;Project-ID&gt;_YYYYMMDD\n</code></pre> <p><code>&lt;Project-ID&gt;</code> should be the initials of the owner of the project folder and the publication year, e.g. <code>JARH_et_al_20230101</code>.</p>"},{"location":"1_rdm-guidelines/#folder-structure_1","title":"Folder structure","text":"<pre><code>&lt;Project-ID&gt;_20230424\n\u251c\u2500\u2500 data\n\u2502  \u251c\u2500\u2500 assays\n\u2502  \u251c\u2500\u2500 external\n\u2502  \u2514\u2500\u2500 processed\n\u251c\u2500\u2500 documents\n\u2502  \u2514\u2500\u2500 Non-sensitive_NGS_research_project_template.docx\n\u251c\u2500\u2500 notebooks\n\u2502  \u2514\u2500\u2500 01_data_analysis.rmd\n\u251c\u2500\u2500 README.md\n\u251c\u2500\u2500 reports\n\u2502  \u251c\u2500\u2500 figures\n\u2502  \u2502  \u2514\u2500\u2500 01_data_analysis\n\u2502  \u2514\u2500\u2500 01_data_analysis.html\n\u251c\u2500\u2500 requirements.txt\n\u251c\u2500\u2500 results\n\u2502  \u2514\u2500\u2500 01_data_analysis/\n\u251c\u2500\u2500 scripts\n\u251c\u2500\u2500 description.yml\n\u2514\u2500\u2500 metadata.yml\n</code></pre> <ul> <li>data: folder that contains symlinks or shortcuts to where the data is, avoiding copying and modification of original files.</li> <li>documents: folder containing word documents, slides or pdfs related to the project, such as explanations of the data or project, papers, etc. It also contains your Data Management Plan.<ul> <li>Non-sensitive_NGS_research_project_template.docx. This is a prefilled Data Management Plan based on the Horizon Europe guidelines.</li> </ul> </li> <li>notebooks: folder containing Jupyter, R markdown or Quarto notebooks with the actual data analysis. Using annotated notebooks is ideal for reproducibility and readability purposes. Notebooks should be labeled numerically in order they were created e.g. <code>00_preprocessing</code></li> <li>README.md: detailed description of the project in markdown format.</li> <li>reports: notebooks rendered as html/docx/pdf versions, ideal for sharing with colleagues and also as a formal report of the data analysis procedure.<ul> <li>figures: figures produced upon rendering notebooks. The figures will be saved under a subfolder named after the notebook that created them. This is for provenance purposes so we know which notebook created which figures.</li> </ul> </li> <li>results: results from the data analysis, such as tables with differentially expressed genes, enrichment results, etc. These results should be saved under a subfolder named after the notebook that created them. This is for provenance purposes so we know which notebook created which results.</li> <li>scripts: folder containing helper scripts needed to run data analysis or reproduce the work of the folder</li> <li>description.yml: short description of the project.</li> <li>metadata.yml: metadata file for the assay describing different keys (see below).</li> </ul>"},{"location":"1_rdm-guidelines/#14-synchronization-with-dangpu-server","title":"1.4 Synchronization with DanGPU server","text":"<p>We will have to setup a cron job to perform one-way sync between the <code>/projects</code> folder and <code>NGS_data</code> folder. All the analysis will be done on danGPU server, with no exceptions!</p> <p>After project is done and published, it will be moved to <code>NGS_data</code>.</p>"},{"location":"1_rdm-guidelines/#15-general-naming-conventions-and-more-info","title":"1.5 General naming conventions and more info","text":"<ul> <li>date format: <code>YYYYMMDD</code></li> <li>authors: initials</li> <li>file and folder names: No use of spaces. Field sections are separated by underscores <code>_</code>. Words in each section are written in camelCase. For example: <code>field1_word1Word2.txt</code>.</li> </ul> <p>Transcriptomics metadata standards and fields</p> <p>More info on naming conventions for different types of files and analysis is in development.</p> name description naming_convention file format example .fastq raw sequencing reads nan nan sampleID_run_read1.fastq .fastqc quality control from fastqc nan nan sampleID_run_read1.fastqc .bam aligned reads nan nan sampleID_run_read1.bam GTF sequence annotation nan nan one of https://www.gencodegenes.org/ GFF sequence annotation nan nan one of https://www.gencodegenes.org/ .bed genome locations nan nan nan .bigwig genome coverage nan nan nan .fasta sequence data (nucleotide/aminoacid) nan nan one of https://www.gencodegenes.org/ Multiqc report QC aggregated report &lt;assayID&gt;_YYYYMMDD.multiqc multiqc RNA_20200101.multiqc Count matrix final count matrix &lt;assayID&gt;_cm_aligner_YYYYMMDD.tsv tsv RNA_cm_salmon_20200101.tsv DEA differential expression analysis results DEA_&lt;condition1-condition2&gt;_LFC&lt;absolute_threshold&gt;_p&lt;pvalue decimals&gt;_YYYYMMDD.tsv tsv DEA_treat-untreat_LFC1_p01_20200101.tsv DBA differential binding analysis results DBA_&lt;condition1-condition2&gt;_LFC&lt;absolute_threshold&gt;_p&lt;pvalue decimals&gt;_YYYYMMDD.tsv tsv DBA_treat-untreat_LFC1_p01_20200101.tsv MAplot MA plot MAplot_&lt;condition1-condition2&gt;_YYYYMMDD.jpeg jpeg MAplot_treat-untreat_20200101.jpeg Heatmap plot Heatmap plot of anything heatmap_&lt;type&gt;_YYYYMMDD.jpeg jpeg Heatmap_sampleCor_20200101.jpeg Volcano plot Volcano plot volcano_&lt;condition1-condition2&gt;_YYYYMMDD.jpeg jpeg volcano_treat-untreat_20200101.jpeg Venn diagram Venn diagram venn_&lt;type&gt;_YYYYMMDD.jpeg jpeg venn_consensus_20200101.jpeg Enrichment table Enrichment results nan tsv nan"},{"location":"1_rdm-guidelines/#2-metadata-and-documentation","title":"2. Metadata and documentation","text":"<p>Accurate documentation and metadata play a crucial role in facilitating data discovery and interpretation. Consider the following guidelines:</p> <ul> <li>Metadata capture: Record essential metadata for each dataset, including type of experiment, date, organisms, etc. This information provides context and helps others understand and reuse the data effectively.</li> <li>Readme files: Create readme files for each project or dataset. These files should provide a brief overview of the project, list the files and their descriptions, and explain any specific instructions or dependencies required for data analysis.</li> </ul>"},{"location":"1_rdm-guidelines/#21-assay-metadata-fields","title":"2.1 Assay metadata fields","text":"Metadata field Definition Format Example assay_id Identifier for the assay &lt;assay&gt;_&lt;codename&gt;_YYYYMMDD CHIP_Oct4_20200101 assay What kind of NGS was used in your experiment? [\"CHIP\", \"RNA\", \"ATAC\", \"SCR\", \"PROT\", \"CAT\", \"CAR\", \"RIME\", \"TAP\"] ChIPseq owner Who performed the experiment? &lt;First Name&gt; &lt;Last Name&gt; Jose Romero date Date of sequencing, should be the same as defined by Genomics Platform in YYYYMMDD format! YYYYMMDD 20200101 codename Your name initials [Example: JB for Josh Brickman] &lt;Initials OR keyword&gt; JR eln_id Optional: Electronic lab notebook ID Free text 12345 technology What technology was used? [Example: 10X Genomics if you used SCR] Free text 10X genomics sequencer What sequencing machine was used? [Example: NovaSeq 2000/NextSeq 2000/NextSeq 500] Free text NextSeq 2000 seq_kit What sequencing kit did you use? Please provide product number if available Free text nan n_samples How many samples have been sequenced? &lt;integer&gt; 9 is_paired Paired fastq files or not &lt;single-end OR paired-end&gt; single-end pipeline Pipeline name [Example: nf-core/rnaseq 3.12.0 or custom] Free text nf-core/chipseq -r 1.0 processed_by Person responsible for pre-processing (pipeline execution) &lt;First Name&gt; &lt;Last Name&gt; Sarah Lundregan organism What organism is this? &lt;mouse OR human OR other&gt; mouse organism_version Which version of genome was used [Example: mm10, hg38] Free text mm10 organism_subgroup In vitro or in vivo? &lt;in vivo OR in vitro&gt; in vitro origin Is this internal experiment of external (collaborator/publication)? &lt;internal OR external&gt; internal note Optional: Was there something worth knowing? Free text Low quality experiment/Indexes are swapped ... genomics_path Path to where the data is &lt;/path/to/file&gt; smb:/path/to/file"},{"location":"1_rdm-guidelines/#22-project-metadata-fields","title":"2.2 Project metadata fields","text":"<p>In development.</p> Metadata field Definition Format Example project Project name &lt;name&gt;_&lt;keyword&gt;_YYYY lundregan_oct4_2023 author Owner of the project &lt;First name&gt; &lt;Surname&gt; Sarah Lundregran date Date of creation YYYYMMDD 20230101 description Short description of the project Plain text This is a project describing the effect of Oct4 perturbation after pERK activation"},{"location":"1_rdm-guidelines/#3-data-catalogue-and-browser","title":"3. Data catalogue and browser","text":"<p>@SLundregan is in the process of building a prototype for <code>Assay</code>, using the metadata contained in all <code>description.yml</code> and <code>metadata.yml</code> files in the assay folder. This will be in the form of an SQLite database that that is easily updatable by running a helper script.</p> <p>@SLundregan is also working on a browsable database using Panel python app. The app will display the latest version of the SQLite database. Clicking on an item from the database will open a tab containing all available metadata for the assay.</p> <p>Also, it would be nice if you can create an <code>Assay</code> folder directly from there, making it easy to fill up the info for the metadata and GEO submission (see below)</p> <p>In the future, you could ideally visualize an analysed single cell RNAseq dataset by opening Cirrocumulus session.</p>"},{"location":"1_rdm-guidelines/#4-projects-version-control","title":"4. <code>Projects</code> version control","text":"<p>All projects should be version controlled using GitHub under the Brickman organization. After creating a cookiecutter template, initiate a git repository on the folder. The Git repository can stay private until it is ready for publication.</p>"},{"location":"1_rdm-guidelines/#5-projects-github-pages","title":"5. <code>Projects</code> GitHub pages","text":"<p>Using GitHub pages, it is possible to display your data analyses (or anything related to the project) inside the <code>Projects</code> folder so that they are open to the public in a html format. This is great for transparency and reproducibility purposes. This can be done after the paper has been made public (it is not possible to do with a private repository without paying).</p> <p>Info on how this is done should be put here</p>"},{"location":"1_rdm-guidelines/#6-project-archiving-in-zenodo","title":"6. <code>Project</code> archiving in Zenodo","text":"<p>Before submitting, link the repository to Zenodo and then create a Git release. This release will be caught by Zenodo and will give you a DOI that you can submit along the manuscript.</p>"},{"location":"1_rdm-guidelines/#7-data-upload-to-geo","title":"7. Data upload to GEO","text":"<p>The raw data from NGS experiments will be uploaded to the Gene Expression Omnibus (GEO). Whenever a new Assay folder is created, the data owner must fill up the required documentation and information needed to make the GEO submission as smooth as possible.</p>"},{"location":"1_rdm-guidelines/#8-create-a-data-management-plan","title":"8. Create a Data Management Plan","text":"<p>From the University of Copenhagen RDM team</p> <p>\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200b\u200bA Data Management Plan (DMP) is a planning tool that helps researchers to establish good practices for working with physical m\u200baterial and data in a research project. A DMP covers all relevant aspects of research data management throughout the project. Writing a DMP early on in a project helps:</p> <ul> <li>identify potential issues with the management of research data.</li> <li>comply with relevant legislation, policies, and funder requirements.</li> <li>document agreements related to the collection, usage, and dissemination of research data between project partners or between student and supervisor.</li> </ul> <p>We are have written a DMP template that it is prefilled with repetitive information using DMPonline and the Horizon Europe guidelines. This template contains all the necessary information regarding common practices that we will use, the repositories we use for NGS, etc. The template is part of the <code>project</code> folder template, under <code>documents</code>. You can check the file here.</p> <p>The Horizon Europe template is mostly focused on digital data and so, it is maybe not the best option regarding the needs of the Brickman Lab, due to the fact that it is mostly a wet lab with some bioinformatics. We will start working on another DMP based on the KU template, which is designed for both physical and digital data.</p>"},{"location":"2_starting-assay-project/","title":"Starting a new assay or project","text":"<p>Whenever you obtain sequencing data from Genomic's Platform, you have to create an Assay. By running the commands below, you will have option to fill all required information about the experiment. This workflow will help us with tracking of all sequencing done in our lab.</p>"},{"location":"2_starting-assay-project/#assay","title":"Assay","text":"<p>When you sequence an experiment, we create an Assay out of it, so we can use it in a project afterwards.</p> <p>Login to danhead and run command:</p> <pre><code>create_assay\n</code></pre>"},{"location":"2_starting-assay-project/#project","title":"Project","text":"<p>Every time you want to make some analysis, you should create a project. Our folder structure will allow you to easily link various experiments to your project and make your analysis easier.</p> <p>Please use the following naming convention: <code>surname-&lt;YOUR_CODENAME&gt;</code></p> <pre><code>create_project\n</code></pre> <p>Link required assays to your project.</p> <pre><code>ln -s /maps/projects/dan1/data/Brickman/assays/&lt;ASSAY_ID&gt; /maps/projects/dan1/data/Brickman/projects/&lt;PROJECT_ID&gt;/data/assays/\n</code></pre> <p>Link external data if needed</p> <pre><code>ln -s /maps/projects/dan1/data/Brickman/shared /maps/projects/dan1/data/Brickman/projects/&lt;PROJECT_ID&gt;/data/external/\n</code></pre>"},{"location":"3_pipelines/","title":"Running pipelines","text":"<p>By default, we run nf-core pipelines. To run a pipeline, read the official documentation with an example.</p>"},{"location":"3_pipelines/#monitoring-runs-with-nextflow-tower","title":"Monitoring runs with Nextflow Tower","text":"<p>This is a guide on how to use Nextflow Tower to monitor nf-core pipeline runs.</p> <p>We have created an API token for our GitHub account (brickmanlab) and restricted it to run only pipelines, nothing else. The <code>TOWER_WORKSPACE_ID</code> and <code>TOWER_ACCESS_TOKEN</code> are stored in <code>Brickman/config/brickman.bashrc</code>.</p> <p>To do more advance stuff, you have to create your own personal access token.</p>"},{"location":"3_pipelines/#tower-cli-installation","title":"Tower CLI installation","text":"<p>The tower cli<sup>1</sup> is required to be installed only once to connect the server as a computing resource. Afterward, it's not required any more<sup>2</sup>.</p> <pre><code># Download the latest version of Tower CLI:\nwget https://github.com/seqeralabs/tower-cli/releases/download/v0.7.3/tw-0.7.3-linux-x86_64\n\n# Make the file executable and move to directory accessible by $PATH variable:\nmkdir ~/.local/bin &amp;&amp; mv tw-* tw &amp;&amp; chmod +x ~/.local/bin/tw\n</code></pre> <ol> <li> <p>Tower CLI configuration \u21a9</p> </li> <li> <p>Tower Agent \u21a9</p> </li> </ol>"},{"location":"4_conda/","title":"Conda &amp; modules","text":""},{"location":"4_conda/#conda","title":"Conda","text":"<p>If you work with <code>conda</code> you can use <code>mamba</code> instead, which is faster tool to install packages.</p> <p>We created shared <code>conda</code> environments to simplify your life.</p> <ul> <li>To list all available envs: <code>conda env list</code></li> <li>To activate env: <code>source activate brickman</code></li> </ul>"},{"location":"4_conda/#creating-own-shared-environment","title":"Creating own shared environment","text":"<p>Here is an example how we created shared environment called <code>brickman</code>.</p> <pre><code>module load miniconda/latest\n\nconda create --prefix /maps/projects/dan1/data/Brickman/conda/envs/brickman python=3.10\nsource activate brickman\npip install cruft cookiecutter\n\nchmod -R 755 /maps/projects/dan1/data/Brickman/conda/envs/brickman\n</code></pre> <p>To install shared <code>conda</code> environment for the lab, follow the steps below.</p> <ol> <li>Create a configuration file which contains the list of packages you want   to use. Use prefix <code>brickman-&lt;NGS&gt;.yml</code></li> <li>Populate the configuration file</li> <li>Create the conda environment</li> </ol> <pre><code>mamba env create -p /projects/dan1/data/Brickman/conda/envs/brickman-&lt;NGS&gt;.yml -f brickman-&lt;NGS&gt;.yml\n</code></pre>"},{"location":"4_conda/#example-conda-environment","title":"Example conda environment","text":"<p>Configuration for <code>brickman-chipseq</code> environment.</p> <pre><code>name: brickman-chipseq\nchannels:\n  - conda-forge\n  - bioconda\n  - anaconda\n  - defaults\ndependencies:\n  - bioconda::bedtools==2.31.0\n  - bioconda::deeptools==2.31.0\n  - bioconda::homer==4.11\n  - bioconda::intervene==0.6.4\n  - bioconda::macs2==2.2.9.1\n  - bioconda::pygenometracks==3.8\n  - bioconda::seacr==1.3\n  - bioconda::samtools==1.17\nprefix: /projects/dan1/data/Brickman/conda/envs/brickman-chipseq\n</code></pre> <p>To install the environment, run</p> <pre><code>mamba env create -p /projects/dan1/data/Brickman/conda/envs/brickman-chipseq -f brickman-chipseq.yml\n</code></pre>"},{"location":"4_conda/#modules","title":"Modules","text":"<pre><code>module avail\n\nmodule load miniconda/latest\n</code></pre>"},{"location":"5_vscode/","title":"Setup R with Visual Studio Code","text":"<p>This setup guides you through setting up <code>R</code> in VSCode so you can use it on <code>dancmpn01fl</code> and <code>dancmpn02fl</code> computing nodes.</p> <p>Info</p> <p>The original RStudio server is using 4.0.5 version. If you want to stick this version, make sure to specify it when loading modules.</p> <p>Why do you need this?</p> <p>Because RStudio server sucks when you don't have a license and our place, so alternative it is. Also, VSCode has a bunch of plugins.</p>"},{"location":"5_vscode/#setting-up-remote-tunnels","title":"Setting up Remote Tunnels","text":"<p>Warning</p> <p>In this example we use version R/4.2.1. If you want to use a different one change the R version!</p>"},{"location":"5_vscode/#requirements","title":"Requirements","text":"<ul> <li> VSCode</li> <li> Terminal</li> </ul>"},{"location":"5_vscode/#setting-up-first-time","title":"Setting up first time","text":"<ol> <li>Login to head node: <code>ssh $USER@danhead01fl.unicph.domain</code></li> <li>Start a new tmux session: <code>tmux new -s rstudio</code></li> <li>Start a new job: <code>srun -c 2 --mem=30gb --time=0-4:00:00 --pty bash</code></li> <li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.2.1 quarto</code></li> <li>Run command: <code>code tunnel</code><ol> <li>Choose <code>Microsoft account</code> when asked how you would like to log in to VScode</li> <li>Open the link from terminal and paste the CODE</li> <li>Login with your KU credentials</li> <li>Go back to terminal and wait</li> <li>Hit ENTER on questions about tunnel name</li> <li>If everything went well you should see some random messages about port forwarding</li> </ol> </li> <li>Go to your VSCode and on the left panel search for <code>Remote Explorer</code></li> <li>Click <code>Sign in to the tunnels registered with Microsoft</code><ol> <li>You should see <code>dancmpn01flunicphdom</code> or <code>dancmpn02flunicphdom</code></li> </ol> </li> <li>Move cursor on it and click the <code>-&gt;</code></li> <li>Install extension Remote Tunnels</li> </ol> <p>To use <code>R</code>, install additional packages by clicking <code>Extensions</code> in the left panel. Search for packages:</p> <ul> <li>R</li> <li> <p>Quarto</p> </li> <li> <p>Next, top panel lick <code>View</code> -&gt; <code>Terminal</code> -&gt; Write <code>R</code> and hit ENTER</p> </li> <li><code>install.packages(\"languageserver\")</code><ol> <li>If it asks to install to stuff to other directory type <code>YES</code> then hit ENTER</li> <li>When asking about mirror type <code>30</code> (Denmark servers to download packages)</li> </ol> </li> <li><code>install.packages(\"httpgd\")</code></li> <li><code>q()</code> to get out</li> <li>Top left panel: <code>Code</code> -&gt; <code>Settings</code> -&gt; <code>Settings</code><ol> <li>Type in search <code>r.plot.useHttpgd</code></li> <li>Tick the box if it is not checked</li> </ol> </li> </ul> <p>If everything went well, you should be able to do this. If not, you know what to do.</p> <p></p>"},{"location":"5_vscode/#i-already-did-the-setup-i-want-my-r-again","title":"I already did the setup, I want my R again","text":"<ol> <li>Login to head node: <code>ssh $USER@danhead01fl.unicph.domain</code></li> <li>Start a new tmux session: <code>tmux new -s rstudio</code></li> <li>Start a new job: <code>srun -c 2 --mem=30gb --time=0-4:00:00 --pty bash</code></li> <li>Load modules: <code>module load vscode_cli gcc/11.2.0 R/4.2.1 quarto</code></li> <li>Run command: <code>code tunnel</code></li> <li>Go to your VSCode and on the left panel search for <code>Remote Explorer</code></li> <li>Click <code>dancmpn01flunicphdom</code> or <code>dancmpn02flunicphdom</code></li> </ol>"},{"location":"5_vscode/#admins-how-to-install","title":"Admins: How to install","text":""},{"location":"5_vscode/#creating-module","title":"Creating module","text":"<pre><code>curl -Lk 'https://code.visualstudio.com/sha/download?build=stable&amp;os=cli-alpine-x64' --output vscode_cli.tar.gz\ntar -xf vscode_cli.tar.gz\n</code></pre>"},{"location":"5_vscode/#known-issues","title":"Known issues","text":"<p>VSCode can be installed as a server <code>code-server</code>, however it is not possible to listen on the port when on computing node. This works only in the case of <code>dangpu01fl</code>.</p> <p>Error when trying to do reverse ssh:</p> <pre><code>error listen EADDRINUSE: address already in use 127.0.0.1:8080\n</code></pre> <p>VSCode <code>code-server</code> is an alternative to <code>code tunnel</code> that consists of running code-server on a compute node and accessing it via a web browser using reverse <code>ssh</code> tunnel.</p> <pre><code>curl -fL https://github.com/coder/code-server/releases/download/v4.90.2/code-server-4.90.2-linux-amd64.tar.gz | tar -C /maps/projects/dan1/data/Brickman/shared/modules/software/code-server/4.90.2 -xz\n</code></pre> <pre><code>ssh user@danhead01fl.unicph.domain\ntmux new\nsrun -c 2 --mem=30gb --time=0-4:00:00 -p gpuqueue --pty bash\nmodule load code-server\ncode-server\n# On local machine\nssh -fNL localhost:8080:localhost:8080 $USER@dangpu01fl.unicph.domain\n</code></pre>"},{"location":"6_handy-scripts/","title":"Handy scripts","text":""},{"location":"6_handy-scripts/#geo-submission","title":"GEO submission","text":"<ol> <li>Login to GEO using Google</li> <li>Login to danGPU and change path to where the files are located</li> <li>It's either <code>~/Brickman/projects/</code> or <code>~/ucph/ndir/SUN-RENEW-Brickman/</code></li> <li>Download and update GEO template.</li> <li>NOTE: always make sure you use the latest version</li> <li>Click <code>Transfer files</code> and copy the login information for the ftp</li> <li>Adjust the variable from above step</li> </ol> <p>NOTE: before running the command below, make sure you are already in the folder and you see all the folder/files you want to upload. It will make the steps below simpler.</p> <pre><code># we run tmux session in case we loose connection\ntmux new -s geo\n\n# this loges you to FTP\nsftp geoftp@sftp-private.ncbi.nlm.nih.gov\npassword: &lt;PASSWORD&gt;\n\ncd uploads/&lt;FOLDER&gt;\nmkdir &lt;RNAseq&gt;\ncd &lt;RNAseq&gt;\nmput *\n</code></pre>"},{"location":"miscellaneous/dropbox/","title":"Moving Dropbox to SUND","text":"<p>This is a step-by-step guide how I moved our Dropbox into SUND organized by KU IT. In first attempt I have tried moving the files into OneDrive, but because there might be issues with long filenames I eventually ran into more and more problems</p> <ul> <li>random errors, because of different file size</li> <li>path name too long</li> </ul> <p>Simpler solution is just to move things to SAMBA drives.</p> <p>First, ssh into the server</p> <pre><code>ssh danhead01fl\ntmux new -s dropbox-transfer\nmodule load rclone/1.65.1\n</code></pre>"},{"location":"miscellaneous/dropbox/#linking-remotes","title":"Linking remotes","text":""},{"location":"miscellaneous/dropbox/#dropbox","title":"Dropbox","text":"<pre><code>&gt; n\n&gt; dropbox\n&gt; client_id &lt;ENTER&gt;\n&gt; client_secret &lt;ENTER&gt;\n&gt; y\nforward port `ssh -fNL localhost:53682:localhost:53682 danhead01fl` and access the website locally\n</code></pre>"},{"location":"miscellaneous/dropbox/#onedrive","title":"Onedrive","text":"<pre><code>&gt; n\n&gt; onedrive\n&gt; client_id &lt;ENTER&gt;\n&gt; client_secret &lt;ENTER&gt;\n&gt; region &lt;ENTER&gt;\n&gt; y\nforward port `ssh -fNL localhost:53682:localhost:53682 danhead01fl` and access the website locally\n&gt; config_type 3\n&gt; https://alumni.sharepoint.com/sites/UCPH_BrickmanLab\n&gt; y\n</code></pre>"},{"location":"miscellaneous/dropbox/#test-connections","title":"Test connections","text":"<pre><code>rclone lsd Dropbox:\nrclone lsd dropbox_jb:\nrclone lsd Onedrive:\n</code></pre>"},{"location":"miscellaneous/dropbox/#copy-files","title":"Copy files","text":"<p>I have started first with manual folders because we had to many folders and sometimes there are timeout issues.</p> <pre><code>rclone copy --progress --checksum Dropbox:Computerome ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Computerome\nrclone copy --progress --checksum Dropbox:Courses ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Courses\nrclone copy --progress --checksum Dropbox:Grants ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Grants\nrclone copy --progress --checksum Dropbox:Other ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Other\nrclone copy --progress --checksum Dropbox:Papers ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Papers\nrclone copy --progress --checksum Dropbox:Pictures ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/Pictures\nrclone copy --progress --checksum Dropbox:People ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/People\nrclone copy --progress --checksum Dropbox:sc_seq_analysis ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/sc_seq_analysis\n</code></pre> <p>After the initial copy, I ran again copy this time of all the folders, most of them should be present already. This is to make sure all files were moved.</p> <pre><code>rclone copy \\\n    --progress --checksum \\\n    --exclude=\"People/Fung/Home/IRCMS_interview_2024**\" \\\n    --exclude=\"People/Fung/Home/MB1016613_backup**\" \\\n    --exclude=\"GEO_data/**\" \\\n    Dropbox: ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/\n\nrclone copy --progress --checksum Dropbox:GEO_data ~/ucph/ndir/SUN-RENEW-Brickman/GEO_data/\nrclone copy --progress --checksum dropbox_jb: ~/ucph/ndir/SUN-RENEW-Brickman/Dropbox/JoshBrickman\n</code></pre>"},{"location":"miscellaneous/ku-computer/","title":"KU computer setup","text":""},{"location":"miscellaneous/ku-computer/#conda","title":"Conda","text":"<p>Go here and download Miniconda PKG not BASH. If you're running M1/2 please follow this guideline.</p>"},{"location":"miscellaneous/ku-computer/#example-for-chip-seq-setup","title":"Example for CHIP-seq setup","text":"<pre><code>conda create --name chipseq python=3.6\nconda activate chipseq\nconda install -c bioconda deeptools bedtools\npip install intervene\n</code></pre>"},{"location":"miscellaneous/podman/","title":"Podman","text":""},{"location":"miscellaneous/podman/#setup","title":"Setup","text":"<p>Storage for Podman needs to be configured to fix UID errors when running on UTF filesystem:</p> <pre><code>mkdir -p ~/.config/containers\ncp /maps/projects/dan1/apps/podman/4.0.2/storage.conf $HOME/.config/containers/\n</code></pre> <p>Rootless Podman also requires username and allowed UID range to be listed in /etc/subuid and /etc/subgid</p> <p>List running containers and run a publically available container image to confirm Podman is working:</p> <pre><code>podman ps\npodman run -it docker.io/library/busybox\n</code></pre>"},{"location":"miscellaneous/podman/#running-the-ku-sund-dangpu-nf-core-config-with-podman","title":"Running the KU SUND DANGPU nf-core config with Podman","text":"<p>Currently this is not practical because file permissions cause the following error:</p> <pre><code>error during container init: error setting cgroup config for procHooks process: cannot set memory limit: container could not join or create cgroup\n</code></pre> <p>The nf-core config file, podman.config, can be found at /scratch/Brickman/pipelines/</p> <p>Specify podman.config in nextflow run options to run a pipeline with Podman, e.g. for the rnaseq test profile:</p> <pre><code>nextflow run nf-core/rnaseq -r 3.8.1 -c podman.config -profile test --outdir nfcore_test\n</code></pre>"},{"location":"tools_and_packages/alphafold2/","title":"Alphafold 2","text":""},{"location":"tools_and_packages/alphafold2/#1-running","title":"1. Running","text":""},{"location":"tools_and_packages/alphafold2/#11-create-a-target-file","title":"1.1 Create a target file","text":"<pre><code># cat target.fasta\n&gt;query\nMAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAPKPH\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#12-setup-environments","title":"1.2. Setup environments","text":"<pre><code>srun -N 1 --ntasks-per-node=10 --gres=gpu:2 --pty bash\nmodule load miniconda/latest cuda/11.4 cudnn/8.2.2\nsource activate /maps/projects/dan1/data/Brickman/conda/envs/af2\n\ncd /maps/projects/dan1/data/Brickman/alphafold\nexport AF2_DATA_DIR=\"~/projects/data/Alphafold2/24022023\"\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#13-run-monomer-cli","title":"1.3. Run monomer (cli)","text":"<pre><code>python run_alphafold.py \\\n    --fasta_paths=~/projects/data/Brickman/target_01.fasta \\\n    --output_dir=/scratch/tmp/alphatest \\\n    --model_preset=monomer \\\n    --db_preset=full_dbs \\\n    --data_dir=$AF2_DATA_DIR \\\n    --uniref30_database_path=$AF2_DATA_DIR/uniref30/UniRef30_2021_03 \\\n    --uniref90_database_path=$AF2_DATA_DIR/uniref90/uniref90.fasta \\\n    --mgnify_database_path=$AF2_DATA_DIR/mgnify/mgy_clusters_2022_05.fa \\\n    --pdb70_database_path=$AF2_DATA_DIR/pdb70/pdb70 \\\n    --template_mmcif_dir=$AF2_DATA_DIR/pdb_mmcif/mmcif_files/ \\\n    --obsolete_pdbs_path=$AF2_DATA_DIR/pdb_mmcif/obsolete.dat \\\n    --bfd_database_path=$AF2_DATA_DIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \\\n    --max_template_date=2022-01-01 \\\n    --use_gpu_relax\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#14-run-multimer-cli","title":"1.4. Run multimer (cli)","text":"<p>The example below generates 10 models.</p> <pre><code>python run_alphafold.py \\\n    --fasta_paths=/home/fdb589/projects/data/Brickman/WTPU_1_WTC_EBPa.fasta \\\n    --output_dir=/scratch/tmp/alphatest \\\n    --model_preset=multimer \\\n    --db_preset=full_dbs \\\n    --data_dir=$AF2_DATA_DIR \\\n    --uniref30_database_path=$AF2_DATA_DIR/uniref30/UniRef30_2021_03 \\\n    --uniref90_database_path=$AF2_DATA_DIR/uniref90/uniref90.fasta \\\n    --mgnify_database_path=$AF2_DATA_DIR/mgnify/mgy_clusters_2022_05.fa \\\n    --template_mmcif_dir=$AF2_DATA_DIR/pdb_mmcif/mmcif_files/ \\\n    --obsolete_pdbs_path=$AF2_DATA_DIR/pdb_mmcif/obsolete.dat \\\n    --pdb_seqres_database_path=$AF2_DATA_DIR/pdb_seqres/pdb_seqres.txt \\\n    --uniprot_database_path=$AF2_DATA_DIR/uniprot/uniprot.fasta \\\n    --bfd_database_path=$AF2_DATA_DIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \\\n    --max_template_date=2022-01-01 \\\n    --num_multimer_predictions_per_model=10 \\\n    --use_gpu_relax\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#15-example-sbatch-script","title":"1.5. Example SBATCH script","text":"<pre><code>#!/bin/bash\n#SBATCH --job-name=AF2\n#SBATCH --gres=gpu:2\n#SBATCH --cpus-per-task=10\n#SBATCH --mail-type=BEGIN,END\n#SBATCH --mail-user=YOUR-EMAIL\n\nmodule load miniconda/latest cuda/11.4 cudnn/8.2.2\nsource activate /maps/projects/dan1/data/Brickman/conda/envs/af2\ncd ~/projects/data/Brickman/alphafold\nmkdir -p /scratch/tmp/alphatest\nexport AF2_DATA_DIR=\"~/projects/data/Alphafold2/24022023\"\n\nsrun python run_alphafold.py \\\n--fasta_paths=~/projects/data/Brickman/target_01.fasta \\\n--output_dir=/scratch/tmp/alphatest \\\n--model_preset=monomer \\\n--db_preset=full_dbs \\\n--data_dir=$AF2_DATA_DIR \\\n--uniref30_database_path=$AF2_DATA_DIR/uniref30/UniRef30_2021_03 \\\n--uniref90_database_path=$AF2_DATA_DIR/uniref90/uniref90.fasta \\\n--mgnify_database_path=$AF2_DATA_DIR/mgnify/mgy_clusters_2022_05.fa \\\n--pdb70_database_path=$AF2_DATA_DIR/pdb70/pdb70 \\\n--template_mmcif_dir=$AF2_DATA_DIR/pdb_mmcif/mmcif_files/ \\\n--obsolete_pdbs_path=$AF2_DATA_DIR/pdb_mmcif/obsolete.dat \\\n--bfd_database_path=$AF2_DATA_DIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \\\n--max_template_date=2022-01-01 \\\n--use_gpu_relax\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#2-installation","title":"2. Installation","text":"<pre><code>conda create --prefix /maps/projects/dan1/data/Brickman/conda/envs/af2 python=3.8\nsource activate /maps/projects/dan1/data/Brickman/conda/envs/af2\n\nmamba install hmmer\npip install py3dmol\nmamba install pdbfixer==1.7\nmamba install -c conda-forge openmm=7.5.1\n\ncd /maps/projects/dan1/data/Brickman/\ngit clone --branch main https://github.com/deepmind/alphafold alphafold\npip install -r ./alphafold/requirements.txt\npip install --no-dependencies ./alphafold\n\n# stereo chemical props needs to be in common folder\nwget \u2013q \u2013P  /maps/projects/dan1/data/Brickman/alphafold/alphafold/common/ https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt\n\n# skipping content part\nmkdir -p ./alphafold/data/params &amp;&amp; cd ./alphafold/data/params\nwget https://storage.googleapis.com/alphafold/alphafold_params_colab_2022-12-06.tar\ntar --extract --verbose --preserve-permissions --file alphafold_params_colab_2022-12-06.tar\npip install ipykernel ipywidgets tqdm\npip install --upgrade scprep phate\n\n# Install jax\nmodule load miniconda/latest\nmodule load cuda/11.4 cudnn/8.2.2\nexport CUDA_VISIBLE_DEVICES='3'\npip install \"jax[cuda11_cudnn82]\" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html\n\n# fix last issues\nmamba install -c conda-forge -c bioconda hhsuite\nmamba install -c bioconda kalign3\npip install numpy==1.21.6\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#21-download-references","title":"2.1. Download references","text":"<p>Note</p> <p>Downloading references will not work on one try, had to do a lot of manual re-running of scripts.</p> <pre><code># create folder\nmkdir -p ~/projects/data/Alphafold2/24022023\ncd ~/projects/data/Alphafold2/24022023\n\n# Download all databases\nsh download_all_data.sh ~/projects/data/Alphafold2/24022023/ &gt; download.log 2&gt; download_all.log\n\n# Some fix-ups\n# mmCIF will not work because the firewall blocks the port, so I found this workaroud online\n# ref: https://github.com/deepmind/alphafold/issues/196\nwget -e robots=off -r --no-parent -nH --cut-dirs=7 -q ftp://ftp.ebi.ac.uk/pub/databases/pdb/data/structures/divided/mmCIF/ -P \"${RAW_DIR}\"\n\n# Last step is to fix all the permissions\nchmod -R 755 24022023/\n</code></pre>"},{"location":"tools_and_packages/alphafold2/#references","title":"References","text":"<ul> <li>ifb-elixirfr</li> <li>deepmind/alphafold</li> </ul>"},{"location":"tools_and_packages/dReg/","title":"dReg","text":""},{"location":"tools_and_packages/dReg/#1-running","title":"1. Running","text":""},{"location":"tools_and_packages/dReg/#make-dreg-bigwigs","title":"Make dReg bigwigs","text":"<p>If your nascent RNA-seq data is already aligned, bw suitable for use with dReg can be prepared using Danko-Lab RunOnBamToBigWig</p> <p>If you have fastq files from PRO-seq, GRO-seq, or CHrO-seq, run the Danko-Lab's mapping pipeline using the shared <code>dReg_dataprep</code> conda environment</p> <p>Example SBATCH script for mapping pipeline</p> <pre><code>#!/bin/bash\n\n#SBATCH --job-name=pro_align\n#SBATCH -c 20\n#SBATCH --mem=30gb\n#SBATCH --time=00-24:00:00\n#SBATCH --output=01_proseq_alignment.out\n#SBATCH --mail-type=BEGIN,END\n#SBATCH --mail-user=YOUR-EMAIL\n\nmodule load miniconda/latest\nsource activate dReg_dataprep\n\nPROSEQ=(\"/maps/projects/dan1/data/Brickman/proseq2.0/proseq2.0.bsh\")\nGENO=(\"/scratch/Brickman/references/mus_musculus/ensembl/GRCm38_102/\")\nRESL=(\"/maps/projects/dan1/data/Brickman/projects/NAME_DATE/data/external/proseq/\")\nSAMPLES=(\"SRX14164616_SRR18010280 SRX14164617_SRR18010278\")\n\nfor sample in ${SAMPLES}; do\n    bash ${PROSEQ} -i ${GENO}bwa \\\n    -c ${GENO}GRCm38.102.genome \\\n    -PE --RNA5=R2_5prime --UMI1=6 \\\n    -O ${RESL} \\\n    -I ${sample} \\\n    --thread=20\ndone\n</code></pre>"},{"location":"tools_and_packages/dReg/#gpu-check","title":"GPU check","text":"<p>Check available GPUs and running processes before using dReg. GPU 0 is reserved for Brickman group</p> <pre><code>nvidia-smi\n</code></pre>"},{"location":"tools_and_packages/dReg/#example-dreg-script","title":"Example dReg script","text":"<pre><code>#!/bin/bash\n\n#SBATCH --job-name=dREG\n#SBATCH -c 30\n#SBATCH --mem=30gb\n#SBATCH --time=00-24:00:00\n#SBATCH --output=01-1_dREG.out\n#SBATCH --mail-type=BEGIN,END\n#SBATCH --mail-user=YOUR-EMAIL\n\nmodule load miniconda/latest cuda/11.8-dangpu cudnn/8.6.0-dangpu\nsource activate dReg\n\nBW=(\"../data/assays/RNA_INITIAL_DATE/processed/bw/\")\nRESL=(\"../results/01/dREG/\")\ndREG=(\"/projects/dan1/data/Brickman/dREG/run_dREG.bsh\")\nMODEL=(\"/projects/dan1/data/Brickman/dREG/resources/asvm.gdm.6.6M.20170828.rdata\")\n\n\nSAMPLES=(\"0h_A 0h_B 2h_A 2h_B\")\n\nfor sample in ${SAMPLES}; do\n    bash ${dREG} ${BW}${sample}_sorted_filt_dedup_plus.bw ${BW}${sample}_sorted_filt_dedup_minus.bw \\\n    ${RESL}${sample}_test ${MODEL} \\\n    30 0\ndone\n</code></pre>"},{"location":"tools_and_packages/dReg/#2-installation","title":"2. Installation","text":""},{"location":"tools_and_packages/dReg/#installing-dreg","title":"Installing dReg","text":"<p>Note: Python version in conda env must be 3.8, and R version &lt; 4.0</p> <pre><code>cd /maps/projects/dan1/data/Brickman/conda/\nmodule load miniconda/latest\nmamba env create -p /projects/dan1/data/Brickman/conda/envs/dReg -f dREG.yml\nsource activate dReg\n\ncd /maps/projects/dan1/data/Brickman/\ngit clone https://github.com/Danko-Lab/dREG\ncd dREG\nmake R_dependencies\n\nR\ndevtools::install_github(\"CshlSiepelLab/RPHAST\")\ndevtools::install_version(\"MASS\", version=\"7.3-51.5\", repos=\"https://mirrors.dotsrc.org/cran/\")\ninstall.packages(\"e1071\", repos=\"https://mirrors.dotsrc.org/cran/\")\ndevtools::install_version(\"randomForest\", version=\"4.6-14\", repos=\"https://mirrors.dotsrc.org/cran/\")\nquit()\n\nmake dreg\nmkdir resources\ncd resources\nwget ftp://cbsuftp.tc.cornell.edu/danko/hub/dreg.models/asvm.gdm.6.6M.20170828.rdata\n</code></pre>"},{"location":"tools_and_packages/dReg/#installing-rgtsvm","title":"Installing Rgtsvm","text":"<p>Rgtsvm is required for dReg to use GPU resources</p> <pre><code># make sure in dREG repo and that dReg environment is activated\ncd /maps/projects/dan1/data/Brickman/dREG\nsource activate dReg\n\nR\ninstall.packages(c(\"bit64\", \"snow\", \"SparseM\"), repos=\"https://mirrors.dotsrc.org/cran/\")\ndevtools::install_version(\"lattice\", version=\"0.20-41\", repos=\"https://mirrors.dotsrc.org/cran/\")\ninstall.packages(\"Matrix\", repos=\"https://mirrors.dotsrc.org/cran/\")\nquit()\nmamba install -c conda-forge boost=1.70.0\n\nmkdir third-party\ncd third-party\ngit clone https://github.com/Danko-Lab/Rgtsvm.git\ncd Rgtsvm\n\nmodule load cuda/11.8-dangpu\nmodule load cudnn/8.6.0-dangpu\n\nR CMD INSTALL --configure-args=\"--with-boost-home=$CONDA_PREFIX\" Rgtsvm\n</code></pre>"},{"location":"tools_and_packages/packages/","title":"Bioinformatics tools","text":"Tool Description NGS Language Link Functional enrichment on genomic regions CHIP-seq ATAC-seq R https://github.com/jokergoo/rGREAT Pseudotime inference scRNA-seq Python https://github.com/LouisFaure/scFates nan Single-cell analysis package scRNA-seq Python https://github.com/scverse/scanpy nan AI probabilistic package for transfer learning DR and more scRNA-seq Python https://github.com/scverse/scvi-tools Gene set enrichment analysis on steroids scRNA-seq Python https://github.com/zqfang/GSEApy nan UpsetR on stereoids (complicated Venn Diagrams) Plotting R https://github.com/krassowski/complex-upset nan Complex heatmap Plotting Python https://github.com/DingWB/PyComplexHeatmap nan"},{"location":"tools_and_packages/ucsc_liftover/","title":"UCSC liftover tool","text":"<p>Documentation for UCSC liftover.</p>"},{"location":"tools_and_packages/ucsc_liftover/#issue-separate-peaks-map-to-same-coordinates-after-liftover","title":"Issue: separate peaks map to same coordinates after liftover","text":"<ul> <li>When using the tool to liftover regions from hg19 to hg38, separate peaks in specific regions of hg19 map to overlapping coordinates after liftover.</li> <li>This is because some contigs were not carried forward from hg19 to hg38, due to problems with the hg19 assembly that were resolved in hg38</li> <li>All lifted over peaks with overlapping coordinates should be removed from analysis because the peaks were called on an uncertain region in the old genome assembly</li> <li>Issues like this likely apply to all genome updates, not only human</li> <li>The same issue also applies with Ensembl assembly converter</li> </ul>"},{"location":"tools_and_packages/ucsc_liftover/#solution","title":"Solution","text":"<p>Remove any peaks with overlapping coordinates after liftover before using the lifted over peak file:</p> <pre><code>#!/bin/bash\n\nmodule load bedtools\n\nEXTL=(\"../data/external/\")\n\n# Sort the lifted over peakfile for use with bedtools\nsort -k1,1 -k2,2n ${EXTL}wong_fig3c_peaks_GRCh38.bed &gt; ${EXTL}peaks.tmp &amp;&amp; mv ${EXTL}peaks.tmp ${EXTL}wong_fig3c_peaks_GRCh38.bed\n\n# Bedtools merge count rows contributing to merged peaks (overlapping peaks will have count &gt; 1)\nbedtools merge -i ${EXTL}wong_fig3c_peaks_GRCh38.bed -c 1 -o count &gt; ${EXTL}counted.bed\n\n# Get non-overlapping peaks\nawk '/\\t1$/{print}' ${EXTL}counted.bed &gt; ${EXTL}filtered.bed\n\n# Intersect original file with non-overlapping peaks and output overlapping peaks\nbedtools intersect -wa -a ${EXTL}wong_fig3c_peaks_GRCh38.bed -b ${EXTL}filtered.bed &gt; ${EXTL}wong_fig3c_peaks_GRCh38_correct_liftover.bed\nbedtools intersect -v -a ${EXTL}wong_fig3c_peaks_GRCh38.bed -b ${EXTL}filtered.bed &gt; ${EXTL}wong_fig3c_peaks_GRCh38_overlapping.bed\n</code></pre>"}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 86adead17c36d78fce94b5d04f3e51a0be8345ab..d2fe56b82e27a38a1e278a6ef8498ac390b72be6 100644
GIT binary patch
delta 14
Vcmb=g=aBE_;Aq&un?8}F8~`BI1a$xa

delta 14
Vcmb=g=aBE_;Apt_H*F$EIRGY=1(E;&

diff --git a/tools_and_packages/alphafold2/index.html b/tools_and_packages/alphafold2/index.html
index 4104135..3728e46 100644
--- a/tools_and_packages/alphafold2/index.html
+++ b/tools_and_packages/alphafold2/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/tools_and_packages/dReg/index.html b/tools_and_packages/dReg/index.html
index 0a73bb5..cf39f86 100644
--- a/tools_and_packages/dReg/index.html
+++ b/tools_and_packages/dReg/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/tools_and_packages/packages/index.html b/tools_and_packages/packages/index.html
index bb4cadf..e280ee4 100644
--- a/tools_and_packages/packages/index.html
+++ b/tools_and_packages/packages/index.html
@@ -16,7 +16,7 @@
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">
     
     
       
diff --git a/tools_and_packages/ucsc_liftover/index.html b/tools_and_packages/ucsc_liftover/index.html
index fa0a8f9..eb91633 100644
--- a/tools_and_packages/ucsc_liftover/index.html
+++ b/tools_and_packages/ucsc_liftover/index.html
@@ -14,7 +14,7 @@
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.39">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.40">