Update homepage

DaehwanKimLab · Feb 6, 2020 · 0b2fe54 · 0b2fe54
1 parent f5323ba
commit 0b2fe54
Show file tree

Hide file tree

Showing 16 changed files with 320 additions and 187 deletions.
diff --git a/.gitignore b/.gitignore
@@ -27,4 +27,7 @@ hisat2_test/kim_example*.malignment.gcsa
 hisat2_test/genome*
 hisat2_test/2*
 hisat2_test/snp142*
-hisat2_test/testset*
+hisat2_test/testset*
+docs/_site
+docs/*.lock
+docs/.*-cache
diff --git a/docs/_config.yml b/docs/_config.yml
@@ -36,6 +36,18 @@ icons:
   wordpress:
   youtube:
 
+#
+# default for front matter
+#
+defaults:
+  - 
+    scope:
+      path: ""
+    values:
+      category: "main"
+
+
+
 #
 # Prettify url.
 #

diff --git a/docs/_data/contributor.yml b/docs/_data/contributor.yml
@@ -0,0 +1,8 @@
+- name: Chanhee Park
+  url: /chanhee.park/
+- name: Ben Langmead
+  url: http://www.langmead-lab.org/
+- name: Steven Salzberg
+  url: https://salzberg-lab.org/in-the-news/about-me/
+- name: Daehwan Kim
+  url: https://kim-lab.org/daehwan-kim-principal-investigator/
diff --git a/docs/_data/download-binary.yml b/docs/_data/download-binary.yml
@@ -1,5 +1,12 @@
-latest_version: 2.1.0
+latest_version: 2.2.0,2.1.0
 release:
+  - version: 2.2.0
+    date: 2/6/2020
+    name: HISAT2
+    artifacts:
+      Source: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-220-source/download
+      OSX_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-220-OSX_x86_64/download
+      Linux_x86_64: https://cloud.biohpc.swmed.edu/index.php/s/hisat2-220-Linux_x86_64/download
   - version: 2.1.0
     date: 6/8/2017
     name: HISAT2

diff --git a/docs/_data/download-index.yml b/docs/_data/download-index.yml
@@ -9,6 +9,10 @@
         url: https://cloud.biohpc.swmed.edu/index.php/s/grch38_tran/download
       genome_snp_tran: 
         url: https://cloud.biohpc.swmed.edu/index.php/s/grch38_snp_tran/download
+      genome_rep(above 2.2.0): 
+        url: https://cloud.biohpc.swmed.edu/index.php/s/grch38_rep/download
+      genome_snp_rep(above 2.2.0): 
+        url: https://cloud.biohpc.swmed.edu/index.php/s/grch38_snp_rep/download
     UCSC hg38:
       genome: 
         url: https://cloud.biohpc.swmed.edu/index.php/s/hg38/download
@@ -74,4 +78,4 @@
         url: ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/r64_tran.tar.gz
     UCSC sacCer3:
       genome: 
-        url: ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/sc3.tar.gz
+        url: ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/sc3.tar.gz
diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html
@@ -70,27 +70,62 @@ <h1 class="clearfix">
 
     <aside class="site-aside">
         <div class="inner">
-                <div class="block">
+			<div class="block">
                         <form action="{{ site.baseurl }}/search">
                             <input type="search" id="search" name="q" placeholder="{{ site.str_search }}" />
                         </form>
-                    </div>
+           </div>
 
+		   <div class="block">
             <ul>
-                {% assign pages = site.pages | sort: 'order' %}
+                {% assign pages = site.pages | where: "category", "main" | sort: 'order' %}
                 {% for page in pages %}
                 {% if page.title and page.hide != true %}
                 <li><a class="page-link" href="{{ page.url | prepend: site.baseurl }}">{{ page.title }}</a></li>
                 {% endif %}
                 {% endfor %}
             </ul>
+			</div>
             <!--
             <ul class="icons">
                 {% include icons.html icons=site.icons %}
             </ul>
             <hr class="with-no-margin margin-bottom"/>
             -->
 
+
+			<div class="block">
+			<h2>Getting Help</h2>
+			<br>
+			Please use <a href="mailto:[email protected]">[email protected]</a> for private communications only. Please do not email technical questions to HISAT2 contributors directly.
+			</div>
+
+			<div class="block">
+			<h2>Publications</h2>
+			<div style="font-size: 0.8em">
+			<ul>
+			<li>Kim, D., Paggi, J.M., Park, C. <i>et al.</i> <a class="publication" href="https://doi.org/10.1038/s41587-019-0201-4">Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype.</a> <a class="publication" href="https://www.nature.com/nbt/"><i>Nat Biotechnol</i></a> <b>37</b>, 907–915 (2019).</li>
+			<li>Kim D, Langmead B and Salzberg SL. <a class="publication" href="https://doi.org/10.1038/nmeth.3317">HISAT: a fast spliced aligner with low memory requirements.</a> <a class="publication" href="https://www.nature.com/nmeth/"><i>Nature Methods</i></a> 2015</li>
+			<li>Pertea M, Kim D, Pertea G, Leek JT and Salzberg SL. <a class="publication" href="https://doi.org/10.1038/nprot.2016.095">Transcript-level expression analysis of RNA-seq experiments with HISAT, StringTie and Ballgown.</a> <a class="publication" href="https://www.nature.com/nprot/"><i>Nature Protocols</i></a> 2016</li>
+			</ul>
+			</div>
+			</div>
+
+			<div class="block">
+			<h2>Contributors</h2>
+            <ul>
+            {% for item in site.data.contributor %}
+			    <li>
+				{% if item.url contains "http://" or item.url contains "https://" %}
+				<a class="page-link" href="{{ item.url }}">{{ item.name }}</a>
+				{% else %}
+				<a class="page-link" href="{{ item.url | prepend: site.baseurl }}">{{ item.name }}</a>
+				{% endif %}
+				</li>
+            {% endfor %}
+            </ul>
+			</div>
+
             {% if site.data.collaborate %}
             <div class="block">
             {% for item in site.data.collaborate %}
@@ -103,6 +138,7 @@ <h1 class="clearfix">
             </div>
             {% endif %}
 
+			<!--
             <div class="block sticky">
                 <h2>{{ site.str_recent_posts }}</h2>
                 <ul>
@@ -118,6 +154,7 @@ <h2>{{ site.str_recent_posts }}</h2>
                     {% endfor %}
                 </ul>
             </div>
+			-->
 
         </div>
     </aside>

diff --git a/docs/_pages/contributors/chanheepark.md b/docs/_pages/contributors/chanheepark.md
@@ -0,0 +1,12 @@
+---
+layout: page
+title: Chanhee Park 
+permalink: /chanhee.park/
+order: 1
+share: false
+category: contributor 
+---
+
+Chanhee Park is a Scientific Software Engineer in the Kim Lab at UTSW responsible for maintaining and improving HISAT2.
+
+[Linkedin](https://www.linkedin.com/in/chanhee-park-97677297/)
diff --git a/docs/_pages/download.md b/docs/_pages/download.md
@@ -6,6 +6,9 @@ order: 4
 share: false
 ---
 
+Please cite:  
+>Kim, D., Paggi, J.M., Park, C. _et al._ Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype. _Nat Biotechnol_ **37**, 907–915 (2019). <https://doi.org/10.1038/s41587-019-0201-4>
+
 - TOC
 {:toc}
 
@@ -29,12 +32,14 @@ share: false
     genome_tran: HISAT2 Graph index for reference plus transcripts
     genome_snp_tran: HISAT2 Graph index for reference plus SNPs and transcripts
 
+
 ## Binaries
+{: binaries }
 
-{% assign target = site.data.download-binary.latest_version %}
+{% assign targets = site.data.download-binary.latest_version | split: "," %}
 {% for release in site.data.download-binary.release %}
 {% assign version = release['version'] %}
-{% if version == target or target == null %}
+{% if targets contains version or targets == null %}
 {% assign name = release['name'] %}
 ### Version: {{name}} {{version}}
 <table style="border-collapse: collapse; border: none;">

diff --git a/docs/_pages/hisat2.md b/docs/_pages/hisat2.md
@@ -9,7 +9,7 @@ share: false
 **HISAT2** is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human genomes as well as to a single reference genome. Based on an extension of BWT for graphs ([Sir&eacute;n et al. 2014](http://dl.acm.org/citation.cfm?id=2674828)), we designed and implemented a graph FM index (GFM), an original approach and its first implementation. In addition to using one global GFM index that represents a population of human genomes, **HISAT2** uses a large set of small GFM indexes that collectively cover the whole genome. These small indexes (called local indexes), combined with several alignment strategies, enable rapid and accurate alignment of sequencing reads. This new indexing scheme is called a Hierarchical Graph FM index (HGFM).
 
 
-### HISAT 2.2.0 release 1/2x/2020
+### HISAT 2.2.0 release 2/6/2020
 
 This major version update includes a new feature to handle “repeat” reads. Based on sets of 100-bp simulated and 101-bp real reads that we tested, we found that 2.6-3.4% and 1.4-1.8% of the reads were mapped to >5 locations and >100 locations, respectively. Attempting to report all alignments would likely consume a prohibitive amount of disk space. In order to address this issue, our repeat indexing and alignment approach directly aligns reads to repeat sequences, resulting in one repeat alignment per read. HISAT2 provides application programming interfaces (API) for C++, Python, and JAVA that rapidly retrieve genomic locations from repeat alignments for use in downstream analyses.  
 Other minor bug fixes are also included as follows:  
@@ -18,23 +18,37 @@ Other minor bug fixes are also included as follows:
 * Fixed duplicate read alignments in SAM file
 * Skip a splice site if exon's last base or first base is ambiguous (N) 
 
+
+### Index files are moved to a different location. 8/30/2019
+
+Due to a high volume of index downloads, we have moved HISAT2 index files to a different location in order to provide faster download speed. If you use wget or curl to download index files, then you may need to use the following commands to get the correct file name.
+* `wget --content-disposition` *download_link*
+* `curl -OJ` *download_link*
+
+
+### [The HISAT2 paper](https://www.nature.com/articles/s41587-019-0201-4) is out in *Nature Biotechnology*. 8/2/2019
+
+
 ### HISAT 2.1.0 release 6/8/2017
 
-* This major version includes the first release of HISAT-genotype, which currently performs HLA typing, DNA fingerprinting analysis, and CYP typing on whole genome sequencing (WGS) reads.  We plan to extend the system so that it can analyze not just a few genes, but a whole human genome.  Please refer to [the HISAT-genotype website](http://ccb.jhu.edu/hisat-genotype) for more details.
+* This major version includes the first release of HISAT-genotype, which currently performs HLA typing,
+  DNA fingerprinting analysis, and CYP typing on whole genome sequencing (WGS) reads. 
+  We plan to extend the system so that it can analyze not just a few genes, but a whole human genome. 
+  Please refer to [the HISAT-genotype website](https://daehwankimlab.github.io/hisat-genotype) for more details.
 * HISAT2 can be directly compiled and executed on Windows system using Visual Studio, thanks to [Nigel Dyer](http://www2.warwick.ac.uk/fac/sci/systemsbiology/staff/dyer/).
-* Implemented --new-summary option to output a new style of alignment summary, which is easier to parse for programming purposes.
-* Implemented --summary-file option to output alignment summary to a file in addition to the terminal (e.g. stderr).
+* Implemented `--new-summary` option to output a new style of alignment summary, which is easier to parse for programming purposes.
+* Implemented `--summary-file` option to output alignment summary to a file in addition to the terminal (e.g. stderr).
 * Fixed discrepancy in HISAT2’s alignment summary.
-* Implemented --no-templatelen-adjustment option to disable automatic template length adjustment for RNA-seq reads.
+* Implemented `--no-templatelen-adjustment` option to disable automatic template length adjustment for RNA-seq reads.
 
 
 ### HISAT2 2.0.5 release 11/4/2016
 Version 2.0.5 is a minor release with the following changes.
 * Due to a policy change (HTTP to HTTPS) in using SRA data (`--sra-option`), users are strongly encouraged to use this version. As of 11/9/2016, NCBI will begin a permanent redirect to HTTPS, which means the previous versions of HISAT2 no longer works with `--sra-acc` option soon.
-* Implemented -I and -X options for specifying minimum and maximum fragment lengths.  The options are valid only when used with --no-spliced-alignment, which is used for the alignment of DNA-seq reads.
+* Implemented `-I` and `-X` options for specifying minimum and maximum fragment lengths.  The options are valid only when used with `--no-spliced-alignment`, which is used for the alignment of DNA-seq reads.
 * Fixed some cases where reads with SNPs on their 5' ends were not properly aligned.
-* Implemented --no-softclip option to disable soft-clipping.
-* Implemented --max-seeds to specify the maximum number of seeds that HISAT2 will try to extend to full-length alignments (see the manual for details).
+* Implemented `--no-softclip` option to disable soft-clipping.
+* Implemented `--max-seeds` to specify the maximum number of seeds that HISAT2 will try to extend to full-length alignments (see [the manual] for details).
 
 
 ### [HISAT, StringTie and Ballgown protocol](http://www.nature.com/nprot/journal/v11/n9/full/nprot.2016.095.html) published at Nature Protocols 8/11/2016
@@ -46,7 +60,7 @@ Version 2.0.5 is a minor release with the following changes.
 ### HISAT2 2.0.4 release 5/18/2016
 Version 2.0.4 is a minor release with the following changes.
 * Improved template length estimation (the 9th column of the SAM format) of RNA-seq reads by taking introns into account.
-* Introduced two options, --remove-chrname and --add-chrname, to remove "chr" from reference names or add "chr" to reference names in the alignment output, respectively (the 3rd column of the SAM format).
+* Introduced two options, `--remove-chrname` and `--add-chrname`, to remove "chr" from reference names or add "chr" to reference names in the alignment output, respectively (the 3rd column of the SAM format).
 * Changed the maximum of mapping quality (the 5th column of the SAM format) from 255 to 60. Note that 255 is an undefined value according to the SAM manual and some programs would not work with this value (255) properly.
 * Fixed NH (number of hits) in the alignment output.
 * HISAT2 allows indels of any length pertaining to minimum alignment score (previously, the maximum length of indels was 3 bp).
@@ -57,23 +71,23 @@ Version 2.0.4 is a minor release with the following changes.
 ### HISAT2 2.0.3-beta release 3/28/2016
 Version 2.0.3-beta is a minor release with the following changes.
 * Fixed graph index building when using both SNPs and transcripts. As a result, genome_snp_tran indexes here on the HISAT2 website have been rebuilt.
-* Included some missing files needed to follow the small test example (see the manual for details).
+* Included some missing files needed to follow the small test example (see [the manual] for details).
 
 
 ### HISAT2 2.0.2-beta release 3/17/2016
 **Note (3/19/2016):** this version is slightly updated to handle reporting splice sites with the correct chromosome names.
 Version 2.0.2-beta is a major release with the following changes.
-* Memory mappaped IO (--mm option) works now.
+* Memory mappaped IO (`--mm` option) works now.
 * Building linear index can be now done using multi-threads.
-* Changed the minimum score for alignment in keeping with read lengths, so it's now --score-min L,0.0,-0.2, meaning a minimum score of -20 for 100-bp reads and -30 for 150-bp reads.
-* Fixed a bug that the same read was written into a file multiple times when --un-conc was used.
+* Changed the minimum score for alignment in keeping with read lengths, so it's now `--score-min L,0.0,-0.2`, meaning a minimum score of -20 for 100-bp reads and -30 for 150-bp reads.
+* Fixed a bug that the same read was written into a file multiple times when `--un-conc` was used.
 * Fixed another bug that caused reads to map beyond reference sequences.
-* Introduced --haplotype option in the hisat2-build (index building), which is used with --snp option together to incorporate those SNP combinations present in the human population.  This option also prevents graph construction from exploding due to exponential combinations of SNPs in small genomic regions.
+* Introduced `--haplotype` option in the hisat2-build (index building), which is used with `--snp` option together to incorporate those SNP combinations present in the human population.  This option also prevents graph construction from exploding due to exponential combinations of SNPs in small genomic regions.
 * Provided a new python script to extract SNPs and haplotypes from VCF files, <i>hisat2_extract_snps_haplotypes_VCF.py</i>
 * Changed several python script names as follows<
-  * <i>extract_splice_sites.py</i> to <i>hisat2_extract_splice_sites.py</i>
-  * <i>extract_exons.py</i> to <i>hisat2_extract_exons.py</i>
-  * <i>extract_snps.py</i> to <i>hisat2_extract_snps_haplotypes_UCSC.py</i>
+  * *extract_splice_sites.py* to *hisat2_extract_splice_sites.py*
+  * *extract_exons.py* to *hisat2_extract_exons.py*
+  * *extract_snps.py* to *hisat2_extract_snps_haplotypes_UCSC.py*
 
 
 ### HISAT2 2.0.1-beta release 11/19/2015
@@ -96,11 +110,12 @@ We extended the BWT/FM index to incorporate genomic differences among individual
   * HISAT2 can be considered an enhanced version of HISAT with many improvements and bug fixes. The alignment speed and memory requirements of HISAT2 are virtually the same as those of HISAT when using the HFM index (<i>genome</i>).
   * When using graph-based indexes (HGFM), the runtime of HISAT2 is slightly slower than HISAT (30~80% additional CPU time).
   * HISAT2 allows for mapping reads directly against transcripts, similar to that of TopHat2 (use <i>genome_tran</i> or <i>genome_snp_tran</i>).
-* When reads contain SNPs, the SNP information is provided as an optional field in the SAM output of HISAT2 (e.g., <strong>Zs:Z:1|S|rs3747203,97|S|rs16990981</strong> - see <a href="manual.shtml">the manual</a> for details).  This feature enables fast and sensitive genotyping in downstream analyses. Note that there is no alignment penalty for mismatches, insertions, and deletions if they correspond to known SNPs.
-* HISAT2 provides options for transcript assemblers (e.g., StringTie and Cufflinks) to work better with the alignment from HISAT2 (see options such as --dta and --dta-cufflinks).
-* Some slides about HISAT2 are found <a href="data/HISAT2-first_release-Sept_8_2015.pdf">here</a> and we are preparing detailed documention.
+* When reads contain SNPs, the SNP information is provided as an optional field in the SAM output of HISAT2 (e.g., **<code>Zs:Z:1|S|rs3747203,97|S|rs16990981</code>** - see [the manual] for details).  This feature enables fast and sensitive genotyping in downstream analyses. Note that there is no alignment penalty for mismatches, insertions, and deletions if they correspond to known SNPs.
+* HISAT2 provides options for transcript assemblers (e.g., StringTie and Cufflinks) to work better with the alignment from HISAT2 (see options such as `--dta` and `--dta-cufflinks`).
+* Some slides about HISAT2 are found [here]({{ '/assets/data/HISAT2-first_release-Sept_8_2015.pdf' | prepend: site.baseurl }}) and we are preparing detailed documention.
 * We plan to incorporate a larger set of SNPs and structural variations (SV) into this index (e.g., long insertions/deletions, inversions, and translocations).
 
+[the manual]: {{ site.baseurl }}{% link _pages/manual.md %}
 
 ### The HISAT2 source code is available in a [public GitHub repository](https://github.com/DaehwanKimLab/hisat2) (5/30/2015).
 

diff --git a/docs/_pages/howto.md b/docs/_pages/howto.md
@@ -14,7 +14,7 @@ share: false
 
 ### Building indexes
 Depend on your purpose, you have to download reference sequence, gene annotation and SNP files.  
-We also provides scripts to build indexes. [Download Link]
+We also provides scripts to build indexes. [Download]({{ site.baseurl }}{% link _pages/download.md %})
 
 #### Prepare data
 1. Download reference

diff --git a/docs/_pages/links.md b/docs/_pages/links.md
@@ -0,0 +1,17 @@
+---
+layout: page
+title: Links 
+permalink: /links/
+order: 6
+share: false
+---
+
+* KimLab - <https://kim-lab.org>
+  * github - <https://github.com/DaehwanKimLab>
+* hisat-genotype - <https://daehwankimlab.github.io/hisat-genotype>
+  * github for hisat-genotype - <https://github.com/DaehwanKimLab/hisat-genotype>
+
+* Lyda Hill Department of Bioinformatics at UT Southwestern Medical Center - <https://www.utsouthwestern.edu/departments/bioinformatics>
+
+* Center for Computational Biology at Johns Hopkins University - <http://www.ccb.jhu.edu> 
+