From 07c0ba412dc43c03f5474d80657f612abe276487 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Mon, 6 Dec 2021 17:48:27 +0900
Subject: [PATCH 01/26] Bump up version number

---
 CHANGELOG.rst  | 3 +++
 fuc/version.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 6201458..a16fdb9 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,9 @@
 Changelog
 *********
 
+0.29.0 (in development)
+-----------------------
+
 0.28.0 (2021-12-05)
 -------------------
 
diff --git a/fuc/version.py b/fuc/version.py
index 1bf3675..9093e4e 100644
--- a/fuc/version.py
+++ b/fuc/version.py
@@ -1 +1 @@
-__version__ = '0.28.0'
+__version__ = '0.29.0'

From 770a159bc97e2811c116529c8d7cac25eaeb4bee Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Mon, 6 Dec 2021 20:36:48 +0900
Subject: [PATCH 02/26] Add `pyvcf.VcfFrame.phased`

---
 CHANGELOG.rst    |  2 ++
 fuc/api/pyvcf.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a16fdb9..fa90e17 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,6 +4,8 @@ Changelog
 0.29.0 (in development)
 -----------------------
 
+* Add new property ``pyvcf.VcfFrame.phased``.
+
 0.28.0 (2021-12-05)
 -------------------
 
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 650f5ff..f50b2e0 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -1127,11 +1127,80 @@ def sites_only(self):
         """bool : Whether the VCF is sites-only."""
         return not self.samples or 'FORMAT' not in self.df.columns
 
+    @property
+    def phased(self):
+        """
+        Return True if every genotype in VcfFrame is haplotype phased.
+
+        Returns
+        -------
+        bool
+            If VcfFrame is fully phased, return True, if not return False.
+
+        Examples
+        --------
+
+        >>> from fuc import pyvcf
+        >>> data1 = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102],
+        ...     'ID': ['.', '.', '.'],
+        ...     'REF': ['G', 'T', 'A'],
+        ...     'ALT': ['A', 'C', 'T'],
+        ...     'QUAL': ['.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.'],
+        ...     'INFO': ['.', '.', '.'],
+        ...     'FORMAT': ['GT', 'GT', 'GT'],
+        ...     'A': ['1|1', '0|0', '1|0'],
+        ...     'B': ['1|0', '0|1', '1|0'],
+        ... }
+        >>> vf1 = pyvcf.VcfFrame.from_dict([], data1)
+        >>> vf1.df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  100  .   G   A    .      .    .     GT  1|1  1|0
+        1  chr1  101  .   T   C    .      .    .     GT  0|0  0|1
+        2  chr1  102  .   A   T    .      .    .     GT  1|0  1|0
+        >>> vf1.phased
+        True
+        >>> data2 = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102],
+        ...     'ID': ['.', '.', '.'],
+        ...     'REF': ['G', 'T', 'A'],
+        ...     'ALT': ['A', 'C', 'T'],
+        ...     'QUAL': ['.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.'],
+        ...     'INFO': ['.', '.', '.'],
+        ...     'FORMAT': ['GT', 'GT', 'GT'],
+        ...     'C': ['1|1', '0/0', '1|0'],
+        ...     'D': ['1|0', '0/1', '1|0'],
+        ... }
+        >>> vf2 = pyvcf.VcfFrame.from_dict([], data2)
+        >>> vf2.df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    C    D
+        0  chr1  100  .   G   A    .      .    .     GT  1|1  1|0
+        1  chr1  101  .   T   C    .      .    .     GT  0/0  0/1
+        2  chr1  102  .   A   T    .      .    .     GT  1|0  1|0
+        >>> vf2.phased
+        False
+        """
+        def one_row(r):
+            def one_gt(g):
+                return '|' in g.split(':')[0]
+            return r[9:].apply(one_gt).all()
+        s = self.df.apply(one_row, axis=1)
+        return s.all()
+
     @property
     def empty(self):
         """
         Indicator whether VcfFrame is empty.
 
+        Returns
+        -------
+        bool
+            If VcfFrame is empty, return True, if not return False.
+
         Examples
         --------
 

From 22c965fac48877d26a58815bb394da74456ba2fc Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 8 Dec 2021 15:23:29 +0900
Subject: [PATCH 03/26] Update `pyvcf.VcfFrame.phased`

---
 fuc/api/pyvcf.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index f50b2e0..13e45e3 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -1132,6 +1132,8 @@ def phased(self):
         """
         Return True if every genotype in VcfFrame is haplotype phased.
 
+        The method will return False if VcfFrame is empty.
+
         Returns
         -------
         bool
@@ -1184,6 +1186,8 @@ def phased(self):
         >>> vf2.phased
         False
         """
+        if self.empty:
+            return False
         def one_row(r):
             def one_gt(g):
                 return '|' in g.split(':')[0]

From f8e29bf8240a46c821218733efe8ffbd34c62dca Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 8 Dec 2021 15:40:23 +0900
Subject: [PATCH 04/26] Update docs

---
 fuc/api/pyvcf.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 13e45e3..cd8e9c6 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -1132,12 +1132,11 @@ def phased(self):
         """
         Return True if every genotype in VcfFrame is haplotype phased.
 
-        The method will return False if VcfFrame is empty.
-
         Returns
         -------
         bool
             If VcfFrame is fully phased, return True, if not return False.
+            Also return False if VcfFrame is empty.
 
         Examples
         --------

From 432639d5749d0be1d6db858e4d9c36e517881ff2 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 8 Dec 2021 15:59:53 +0900
Subject: [PATCH 05/26] Update `pyvcf.VcfFrame.slice`

---
 CHANGELOG.rst     |  1 +
 fuc/api/common.py | 19 +++++++++++++++----
 fuc/api/pyvcf.py  |  4 ++++
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index fa90e17..c1723a9 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -5,6 +5,7 @@ Changelog
 -----------------------
 
 * Add new property ``pyvcf.VcfFrame.phased``.
+* Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/fuc/api/common.py b/fuc/api/common.py
index 33b6ea4..164b314 100644
--- a/fuc/api/common.py
+++ b/fuc/api/common.py
@@ -1333,6 +1333,9 @@ def update_chr_prefix(regions, mode='remove'):
     """
     Add or remove the (annoying) 'chr' string from specified regions.
 
+    The method will automatically detect regions that don't need to be
+    updated and will return them unchanged.
+
     Parameters
     ----------
     regions : str or list
@@ -1349,10 +1352,18 @@ def update_chr_prefix(regions, mode='remove'):
     -------
 
     >>> from fuc import common
-    >>> common.update_chr_prefix(['chr1:100-200', '1:300-400'], mode='remove')
-    ['1:100-200', '1:300-400']
-    >>> common.update_chr_prefix(['chr1:100-200', '1:300-400'], mode='add')
-    ['chr1:100-200', 'chr1:300-400']
+    >>> common.update_chr_prefix(['chr1:100-200', '2:300-400'], mode='remove')
+    ['1:100-200', '2:300-400']
+    >>> common.update_chr_prefix(['chr1:100-200', '2:300-400'], mode='add')
+    ['chr1:100-200', 'chr2:300-400']
+    >>> common.update_chr_prefix('chr1:100-200', mode='remove')
+    '1:100-200'
+    >>> common.update_chr_prefix('chr1:100-200', mode='add')
+    'chr1:100-200'
+    >>> common.update_chr_prefix('2:300-400', mode='add')
+    'chr2:300-400'
+    >>> common.update_chr_prefix('2:300-400', mode='remove')
+    '2:300-400'
     """
     def remove(x):
         return x.replace('chr', '')
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index cd8e9c6..be634a7 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -4703,6 +4703,10 @@ def slice(self, region):
         0  chr1  205  .   T   C    .      .    .     GT    1/1
         1  chr1  297  .   A   T    .      .    .     GT    0/1
         """
+        if self.has_chr_prefix:
+            region = common.update_chr_prefix(region, mode='add')
+        else:
+            region = common.update_chr_prefix(region, mode='remove')
         chrom, start, end = common.parse_region(region)
         df = self.df[self.df.CHROM == chrom]
         if not pd.isna(start):

From 0bbbcabc413794a8bcf11999367ea76d3195c40d Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 8 Dec 2021 16:27:16 +0900
Subject: [PATCH 06/26] Update docs

---
 fuc/api/pyvcf.py | 75 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 51 insertions(+), 24 deletions(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index be634a7..294d8bd 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -4646,12 +4646,14 @@ def func(r):
 
     def slice(self, region):
         """
-        Slice the VcfFrame for the region.
+        Slice VcfFrame for specified region.
 
         Parameters
         ----------
         region : str
-            Region ('chrom:start-end').
+            Region to slice ('chrom:start-end'). The 'chr' string will be
+            handled automatically. For example, 'chr1' and '1' will have the
+            same effect in slicing.
 
         Returns
         -------
@@ -4661,6 +4663,8 @@ def slice(self, region):
         Examples
         --------
 
+        Assume we have following data:
+
         >>> from fuc import pyvcf
         >>> data = {
         ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr2'],
@@ -4672,36 +4676,59 @@ def slice(self, region):
         ...     'FILTER': ['.', '.', '.', '.'],
         ...     'INFO': ['.', '.', '.', '.'],
         ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-        ...     'Steven': ['0/1', '1/1', '0/1', '0/1']
+        ...     'A': ['0/1', '1/1', '0/0', '0/0'],
+        ...     'B': ['0/0', '0/0', '0/1', '0/1'],
         ... }
         >>> vf = pyvcf.VcfFrame.from_dict([], data)
         >>> vf.df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .    .     GT    0/1
-        1  chr1  205  .   T   C    .      .    .     GT    1/1
-        2  chr1  297  .   A   T    .      .    .     GT    0/1
-        3  chr2  101  .   C   A    .      .    .     GT    0/1
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  100  .   G   A    .      .    .     GT  0/1  0/0
+        1  chr1  205  .   T   C    .      .    .     GT  1/1  0/0
+        2  chr1  297  .   A   T    .      .    .     GT  0/0  0/1
+        3  chr2  101  .   C   A    .      .    .     GT  0/0  0/1
+
+        We can specify a full region:
+
         >>> vf.slice('chr1:101-300').df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  205  .   T   C    .      .    .     GT    1/1
-        1  chr1  297  .   A   T    .      .    .     GT    0/1
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  205  .   T   C    .      .    .     GT  1/1  0/0
+        1  chr1  297  .   A   T    .      .    .     GT  0/0  0/1
+
+        We can specify a region without the 'chr' string:
+
+        >>> vf.slice('1:101-300').df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  205  .   T   C    .      .    .     GT  1/1  0/0
+        1  chr1  297  .   A   T    .      .    .     GT  0/0  0/1
+
+        We can specify the contig only:
+
         >>> vf.slice('chr1').df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .    .     GT    0/1
-        1  chr1  205  .   T   C    .      .    .     GT    1/1
-        2  chr1  297  .   A   T    .      .    .     GT    0/1
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  100  .   G   A    .      .    .     GT  0/1  0/0
+        1  chr1  205  .   T   C    .      .    .     GT  1/1  0/0
+        2  chr1  297  .   A   T    .      .    .     GT  0/0  0/1
+
+        We can omit the start position:
+
         >>> vf.slice('chr1:-296').df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .    .     GT    0/1
-        1  chr1  205  .   T   C    .      .    .     GT    1/1
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  100  .   G   A    .      .    .     GT  0/1  0/0
+        1  chr1  205  .   T   C    .      .    .     GT  1/1  0/0
+
+        We can omit the end position as well:
+
         >>> vf.slice('chr1:101').df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  205  .   T   C    .      .    .     GT    1/1
-        1  chr1  297  .   A   T    .      .    .     GT    0/1
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  205  .   T   C    .      .    .     GT  1/1  0/0
+        1  chr1  297  .   A   T    .      .    .     GT  0/0  0/1
+
+        You can also omit the end position this way:
+
         >>> vf.slice('chr1:101-').df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  205  .   T   C    .      .    .     GT    1/1
-        1  chr1  297  .   A   T    .      .    .     GT    0/1
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  205  .   T   C    .      .    .     GT  1/1  0/0
+        1  chr1  297  .   A   T    .      .    .     GT  0/0  0/1
         """
         if self.has_chr_prefix:
             region = common.update_chr_prefix(region, mode='add')

From aefc0cda23bd753ebb0a1774b8ed1f1dede5f8e7 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Thu, 9 Dec 2021 09:31:02 +0900
Subject: [PATCH 07/26] Fix typo

---
 fuc/api/pyvcf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 294d8bd..41511a6 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -1806,7 +1806,7 @@ def from_file(
             except AttributeError:
                 pass
         else:
-            TypeError('Incorrect input type')
+            raise TypeError(f'Incorrect input type: {type(fn).__name__}')
         vf = cls.from_string(s)
         return vf
 

From ce34f36ee69b4ea40c43e89a98fa0a81f2e44598 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 10 Dec 2021 12:45:07 +0900
Subject: [PATCH 08/26] Update `ngs-hc`:

* Update :command:`ngs-hc` to set ``--native-pair-hmm-threads 1`` for
HaplotypeCaller.
---
 CHANGELOG.rst     | 1 +
 fuc/cli/ngs_hc.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c1723a9..896a9c1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,7 @@ Changelog
 
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
+* Update :command:`ngs-hc` to set ``--native-pair-hmm-threads 1`` for HaplotypeCaller.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index 0e931d0..33930d2 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -144,6 +144,7 @@ def main(args):
             command += f' --QUIET'
             command += f' --java-options "{args.java1}"'
             command += f' -R {args.fasta}'
+            command += f' --native-pair-hmm-threads 1'
             command += f' --emit-ref-confidence GVCF'
             command += f' -I {r.BAM}'
             command += f' -O {args.output}/temp/{basename}.g.vcf'

From f626dde4f3d790d294af0d5f1c56d4402eedadef Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 10 Dec 2021 12:45:49 +0900
Subject: [PATCH 09/26] Update CHANGELOG.rst

---
 CHANGELOG.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 896a9c1..f68d635 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,7 +6,7 @@ Changelog
 
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
-* Update :command:`ngs-hc` to set ``--native-pair-hmm-threads 1`` for HaplotypeCaller.
+* Update :command:`ngs-hc` command to set ``--native-pair-hmm-threads 1`` for HaplotypeCaller.
 
 0.28.0 (2021-12-05)
 -------------------

From 5b4f81a57f3508198128f5b18b37c397fd071d22 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 10 Dec 2021 15:20:28 +0900
Subject: [PATCH 10/26] Update `ngs-hc`:

* Update :command:`ngs-hc` command to set ``-XX:ParallelGCThreads=1``
and ``-XX:ConcGCThreads=1`` for Java.
---
 CHANGELOG.rst     |  1 +
 fuc/cli/ngs_hc.py | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index f68d635..5662d11 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,7 @@ Changelog
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
 * Update :command:`ngs-hc` command to set ``--native-pair-hmm-threads 1`` for HaplotypeCaller.
+* Update :command:`ngs-hc` command to set ``-XX:ParallelGCThreads=1`` and ``-XX:ConcGCThreads=1`` for Java.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index 33930d2..3a155e9 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -128,6 +128,8 @@ def main(args):
     else:
         remove = 'rm'
 
+    java_shared = '-XX:ParallelGCThreads=1 -XX:ConcGCThreads=1'
+
     basenames = []
 
     for i, r in df.iterrows():
@@ -142,7 +144,7 @@ def main(args):
 
             command = 'gatk HaplotypeCaller'
             command += f' --QUIET'
-            command += f' --java-options "{args.java1}"'
+            command += f' --java-options "{java_shared} {args.java1}"'
             command += f' -R {args.fasta}'
             command += f' --native-pair-hmm-threads 1'
             command += f' --emit-ref-confidence GVCF'
@@ -188,7 +190,7 @@ def main(args):
 
             command1 = 'gatk GenomicsDBImport'
             command1 += f' --QUIET'
-            command1 += f' --java-options "{args.java2}"'
+            command1 += f' --java-options "{java_shared} {args.java2}"'
             command1 += f' -L {chrom}'
             command1 += f' --genomicsdb-workspace-path {args.output}/temp/db-{chrom}'
             command1 += ' ' + ' '.join([f'-V {args.output}/temp/{x}.g.vcf' for x in basenames])
@@ -199,7 +201,7 @@ def main(args):
 
             command2 = 'gatk GenotypeGVCFs'
             command2 += f' --QUIET'
-            command2 += f' --java-options "{args.java2}"'
+            command2 += f' --java-options "{java_shared} {args.java2}"'
             command2 += f' -R {args.fasta}'
             command2 += f' -V gendb://{args.output}/temp/db-{chrom}'
             command2 += f' -O {args.output}/temp/{chrom}.joint.vcf'
@@ -213,7 +215,7 @@ def main(args):
 
             command3 = 'gatk VariantFiltration'
             command3 += f' --QUIET'
-            command3 += f' --java-options "{args.java2}"'
+            command3 += f' --java-options "{java_shared} {args.java2}"'
             command3 += f' -R {args.fasta}'
             command3 += f' -O {args.output}/temp/{chrom}.joint.filtered.vcf'
             command3 += f' --variant {args.output}/temp/{chrom}.joint.vcf'
@@ -247,7 +249,7 @@ def main(args):
 
         command = 'gatk GatherVcfs'
         command += f' --QUIET'
-        command += f' --java-options "{args.java2}"'
+        command += f' --java-options "{java_shared} {args.java2}"'
         command += f' -O {args.output}/merged.joint.filtered.vcf'
         command += ' ' + ' '.join([f'-I {args.output}/temp/{x}.joint.filtered.vcf' for x in chroms])
 

From 3d094e2e33b274ca76e522716f40fd47bde16c62 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 13 Dec 2021 10:38:18 +0900
Subject: [PATCH 11/26] Update `ngs-hc`:

* Add new argument ``--thread`` to :command:`ngs-hc` command. This
argument will be used to set ``--native-pair-hmm-threads`` for
HaplotypeCaller and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads``
for Java.
---
 CHANGELOG.rst     |  3 +--
 docs/cli.rst      |  5 +++--
 fuc/cli/ngs_hc.py | 11 +++++++++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5662d11..c60b161 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,8 +6,7 @@ Changelog
 
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
-* Update :command:`ngs-hc` command to set ``--native-pair-hmm-threads 1`` for HaplotypeCaller.
-* Update :command:`ngs-hc` command to set ``-XX:ParallelGCThreads=1`` and ``-XX:ConcGCThreads=1`` for Java.
+* Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for HaplotypeCaller and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/docs/cli.rst b/docs/cli.rst
index 848b2bf..f252853 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -844,8 +844,8 @@ ngs-hc
 .. code-block:: text
 
    $ fuc ngs-hc -h
-   usage: fuc ngs-hc [-h] [--bed PATH] [--dbsnp PATH] [--job TEXT] [--force]
-                     [--keep] [--posix]
+   usage: fuc ngs-hc [-h] [--bed PATH] [--dbsnp PATH] [--thread INT] [--job TEXT]
+                     [--force] [--keep] [--posix]
                      manifest fasta output qsub java1 java2
    
    Pipeline for germline short variant discovery.
@@ -869,6 +869,7 @@ ngs-hc
      -h, --help    Show this help message and exit.
      --bed PATH    BED file.
      --dbsnp PATH  VCF file from dbSNP.
+     --thread INT  Number of threads to use (default: 1).
      --job TEXT    Job submission ID for SGE.
      --force       Overwrite the output directory if it already exists.
      --keep        Keep temporary files.
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index 3a155e9..668810b 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -87,6 +87,13 @@ def create_parser(subparsers):
         type=str,
         help='VCF file from dbSNP.'
     )
+    parser.add_argument(
+        '--thread',
+        metavar='INT',
+        type=int,
+        default=1,
+        help='Number of threads to use (default: 1).'
+    )
     parser.add_argument(
         '--job',
         metavar='TEXT',
@@ -128,7 +135,7 @@ def main(args):
     else:
         remove = 'rm'
 
-    java_shared = '-XX:ParallelGCThreads=1 -XX:ConcGCThreads=1'
+    java_shared = f'-XX:ParallelGCThreads={args.thread} -XX:ConcGCThreads={args.thread}'
 
     basenames = []
 
@@ -146,7 +153,7 @@ def main(args):
             command += f' --QUIET'
             command += f' --java-options "{java_shared} {args.java1}"'
             command += f' -R {args.fasta}'
-            command += f' --native-pair-hmm-threads 1'
+            command += f' --native-pair-hmm-threads {args.thread}'
             command += f' --emit-ref-confidence GVCF'
             command += f' -I {r.BAM}'
             command += f' -O {args.output}/temp/{basename}.g.vcf'

From 560eb1e2d6185fb44ad1fe228c6d2905fe9de93d Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 13 Dec 2021 17:08:07 +0900
Subject: [PATCH 12/26] Update `ngs-hc`

---
 CHANGELOG.rst     | 2 +-
 fuc/cli/ngs_hc.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index c60b161..2fa926c 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,7 +6,7 @@ Changelog
 
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
-* Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for HaplotypeCaller and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
+* Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for GATK's :command:`HaplotypeCaller` command, ``--reader-threads`` for GATK's :command:`GenomicsDBImport` command, and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index 668810b..9c581b9 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -199,6 +199,7 @@ def main(args):
             command1 += f' --QUIET'
             command1 += f' --java-options "{java_shared} {args.java2}"'
             command1 += f' -L {chrom}'
+            command1 += f' --reader-threads {args.thread}'
             command1 += f' --genomicsdb-workspace-path {args.output}/temp/db-{chrom}'
             command1 += ' ' + ' '.join([f'-V {args.output}/temp/{x}.g.vcf' for x in basenames])
 

From 596c6223e0515151689d520a582d7eab6162ac3a Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 10:04:30 +0900
Subject: [PATCH 13/26] Fix typo in docs

---
 docs/cli.rst         | 2 +-
 fuc/cli/bam_slice.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/cli.rst b/docs/cli.rst
index f252853..34cb41f 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -210,7 +210,7 @@ bam-slice
                     provide a BED file (compressed or uncompressed) to specify 
                     regions. Note that the 'chr' prefix in contig names (e.g. 
                     'chr1' vs. '1') will be automatically added or removed as 
-                    necessary to match the input VCF's contig names.
+                    necessary to match the input BED's contig names.
    
    Optional arguments:
      -h, --help     Show this help message and exit.
diff --git a/fuc/cli/bam_slice.py b/fuc/cli/bam_slice.py
index cbbabff..91324df 100644
--- a/fuc/cli/bam_slice.py
+++ b/fuc/cli/bam_slice.py
@@ -43,7 +43,7 @@ def create_parser(subparsers):
              "provide a BED file (compressed or uncompressed) to specify \n"
              "regions. Note that the 'chr' prefix in contig names (e.g. \n"
              "'chr1' vs. '1') will be automatically added or removed as \n"
-             "necessary to match the input VCF's contig names."
+             "necessary to match the input BED's contig names."
     )
     parser.add_argument(
         '--format',

From eab410b0630e1f2e1f86d4a72109d20d7336a357 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 11:29:43 +0900
Subject: [PATCH 14/26] Update `ngs-hc`:

* Add new argument ``--batch`` to :command:`ngs-hc` command. This
argument sets the batch size used for :command:`GenomicsDBImport`
command.
---
 CHANGELOG.rst     |  1 +
 docs/cli.rst      | 10 ++++++++--
 fuc/cli/ngs_hc.py | 13 +++++++++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2fa926c..5ab3497 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,7 @@ Changelog
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
 * Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for GATK's :command:`HaplotypeCaller` command, ``--reader-threads`` for GATK's :command:`GenomicsDBImport` command, and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
+* Add new argument ``--batch`` to :command:`ngs-hc` command. This argument sets the batch size used for :command:`GenomicsDBImport` command.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/docs/cli.rst b/docs/cli.rst
index 34cb41f..d3c021d 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -844,8 +844,8 @@ ngs-hc
 .. code-block:: text
 
    $ fuc ngs-hc -h
-   usage: fuc ngs-hc [-h] [--bed PATH] [--dbsnp PATH] [--thread INT] [--job TEXT]
-                     [--force] [--keep] [--posix]
+   usage: fuc ngs-hc [-h] [--bed PATH] [--dbsnp PATH] [--thread INT]
+                     [--batch INT] [--job TEXT] [--force] [--keep] [--posix]
                      manifest fasta output qsub java1 java2
    
    Pipeline for germline short variant discovery.
@@ -870,6 +870,12 @@ ngs-hc
      --bed PATH    BED file.
      --dbsnp PATH  VCF file from dbSNP.
      --thread INT  Number of threads to use (default: 1).
+     --batch INT   Batch size used for the GenomicsDBImport command. This 
+                   argument controls the number of samples for which readers 
+                   are open at once and therefore provides a way to minimize 
+                   memory consumption. The size of 0 (default) means no 
+                   batching (i.e. readers for all samples will be opened at 
+                   once).
      --job TEXT    Job submission ID for SGE.
      --force       Overwrite the output directory if it already exists.
      --keep        Keep temporary files.
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index 9c581b9..2081d3e 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -94,6 +94,18 @@ def create_parser(subparsers):
         default=1,
         help='Number of threads to use (default: 1).'
     )
+    parser.add_argument(
+        '--batch',
+        metavar='INT',
+        type=int,
+        default=0,
+        help='Batch size used for the GenomicsDBImport command. This \n'
+             'argument controls the number of samples for which readers \n'
+             'are open at once and therefore provides a way to minimize \n'
+             'memory consumption. The size of 0 (default) means no \n'
+             'batching (i.e. readers for all samples will be opened at \n'
+             'once).'
+    )
     parser.add_argument(
         '--job',
         metavar='TEXT',
@@ -200,6 +212,7 @@ def main(args):
             command1 += f' --java-options "{java_shared} {args.java2}"'
             command1 += f' -L {chrom}'
             command1 += f' --reader-threads {args.thread}'
+            command1 += f' --batch-size {args.batch}'
             command1 += f' --genomicsdb-workspace-path {args.output}/temp/db-{chrom}'
             command1 += ' ' + ' '.join([f'-V {args.output}/temp/{x}.g.vcf' for x in basenames])
 

From e5e0315d4e4ca96e4b55ecfa57e289a51e2cb697 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 12:45:59 +0900
Subject: [PATCH 15/26] Update docs

---
 docs/cli.rst      | 11 +++++------
 fuc/cli/ngs_hc.py | 11 +++++------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/docs/cli.rst b/docs/cli.rst
index d3c021d..b75499c 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -870,12 +870,11 @@ ngs-hc
      --bed PATH    BED file.
      --dbsnp PATH  VCF file from dbSNP.
      --thread INT  Number of threads to use (default: 1).
-     --batch INT   Batch size used for the GenomicsDBImport command. This 
-                   argument controls the number of samples for which readers 
-                   are open at once and therefore provides a way to minimize 
-                   memory consumption. The size of 0 (default) means no 
-                   batching (i.e. readers for all samples will be opened at 
-                   once).
+     --batch INT   Batch size used for GenomicsDBImport (default: 0). This 
+                   controls the number of samples for which readers are 
+                   open at once and therefore provides a way to minimize 
+                   memory consumption. The size of 0 means no batching (i.e. 
+                   readers for all samples will be opened at once).
      --job TEXT    Job submission ID for SGE.
      --force       Overwrite the output directory if it already exists.
      --keep        Keep temporary files.
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index 2081d3e..d0e10e4 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -99,12 +99,11 @@ def create_parser(subparsers):
         metavar='INT',
         type=int,
         default=0,
-        help='Batch size used for the GenomicsDBImport command. This \n'
-             'argument controls the number of samples for which readers \n'
-             'are open at once and therefore provides a way to minimize \n'
-             'memory consumption. The size of 0 (default) means no \n'
-             'batching (i.e. readers for all samples will be opened at \n'
-             'once).'
+        help='Batch size used for GenomicsDBImport (default: 0). This \n'
+             'controls the number of samples for which readers are \n'
+             'open at once and therefore provides a way to minimize \n'
+             'memory consumption. The size of 0 means no batching (i.e. \n'
+             'readers for all samples will be opened at once).'
     )
     parser.add_argument(
         '--job',

From 9e2b2430d8be2c7e5dfe03bd259c43a2f9ea0eb8 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 12:51:39 +0900
Subject: [PATCH 16/26] Update `ngs-bam2fq`:

* Update :command:`ngs-bam2fq` command to fix the SGE issue that outputs
an error like ``Unable to run job: denied: "XXXXX" is not a valid object
name (cannot start with a digit)``.
---
 CHANGELOG.rst         | 1 +
 fuc/cli/ngs_bam2fq.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5ab3497..74265f0 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -8,6 +8,7 @@ Changelog
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
 * Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for GATK's :command:`HaplotypeCaller` command, ``--reader-threads`` for GATK's :command:`GenomicsDBImport` command, and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
 * Add new argument ``--batch`` to :command:`ngs-hc` command. This argument sets the batch size used for :command:`GenomicsDBImport` command.
+* Update :command:`ngs-bam2fq` command to fix the SGE issue that outputs an error like ``Unable to run job: denied: "XXXXX" is not a valid object name (cannot start with a digit)``.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/fuc/cli/ngs_bam2fq.py b/fuc/cli/ngs_bam2fq.py
index da74e58..7561c20 100644
--- a/fuc/cli/ngs_bam2fq.py
+++ b/fuc/cli/ngs_bam2fq.py
@@ -115,6 +115,6 @@ def main(args):
 
 for name in ${{names[@]}}
 do
-  qsub {args.qsub} -S /bin/bash -e $p/log -o $p/log -N $name $p/shell/$name.sh
+  qsub {args.qsub} -S /bin/bash -e $p/log -o $p/log -N S1-$name $p/shell/$name.sh
 done
 """)

From 40d952e72196a668e04aa3c0484db4ee00a05ae7 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 16:00:15 +0900
Subject: [PATCH 17/26] Update `ngs-hc`:

* Update :command:`ngs-hc` command so that when ``--posix`` is set it
will use ``--genomicsdb-shared-posixfs-optimizations`` argument from
:command:`GenomicsDBImport` command instead of doing ``export
TILEDB_DISABLE_FILE_LOCKING=1``.
---
 CHANGELOG.rst     |  1 +
 docs/cli.rst      |  8 +++++++-
 fuc/cli/ngs_hc.py | 22 +++++++++-------------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 74265f0..d4dc878 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,6 +9,7 @@ Changelog
 * Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for GATK's :command:`HaplotypeCaller` command, ``--reader-threads`` for GATK's :command:`GenomicsDBImport` command, and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
 * Add new argument ``--batch`` to :command:`ngs-hc` command. This argument sets the batch size used for :command:`GenomicsDBImport` command.
 * Update :command:`ngs-bam2fq` command to fix the SGE issue that outputs an error like ``Unable to run job: denied: "XXXXX" is not a valid object name (cannot start with a digit)``.
+* Update :command:`ngs-hc` command so that when ``--posix`` is set it will use ``--genomicsdb-shared-posixfs-optimizations`` argument from :command:`GenomicsDBImport` command instead of doing ``export TILEDB_DISABLE_FILE_LOCKING=1``.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/docs/cli.rst b/docs/cli.rst
index b75499c..f702704 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -878,7 +878,13 @@ ngs-hc
      --job TEXT    Job submission ID for SGE.
      --force       Overwrite the output directory if it already exists.
      --keep        Keep temporary files.
-     --posix       Optimize for a POSIX filesystem.
+     --posix       Set GenomicsDBImport to allow for optimizations to improve 
+                   the usability and performance for shared Posix Filesystems 
+                   (e.g. NFS, Lustre). If set, file level locking is disabled 
+                   and file system writes are minimized by keeping a higher 
+                   number of file descriptors open for longer periods of time. 
+                   Use with --batch if keeping a large number of file 
+                   descriptors open is an issue.
    
    [Example] Specify queue:
      $ fuc ngs-hc \
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index d0e10e4..504a49e 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -124,7 +124,13 @@ def create_parser(subparsers):
     parser.add_argument(
         '--posix',
         action='store_true',
-        help='Optimize for a POSIX filesystem.'
+        help='Set GenomicsDBImport to allow for optimizations to improve \n'
+             'the usability and performance for shared Posix Filesystems \n'
+             '(e.g. NFS, Lustre). If set, file level locking is disabled \n'
+             'and file system writes are minimized by keeping a higher \n'
+             'number of file descriptors open for longer periods of time. \n'
+             'Use with --batch if keeping a large number of file \n'
+             'descriptors open is an issue.'
     )
 
 def main(args):
@@ -193,15 +199,6 @@ def main(args):
     for chrom in chroms:
         with open(f'{args.output}/shell/S2-{chrom}.sh', 'w') as f:
 
-            ####################
-            # POSIX filesystem #
-            ####################
-
-            if args.posix:
-                export = 'export TILEDB_DISABLE_FILE_LOCKING=1'
-            else:
-                export = '# export TILEDB_DISABLE_FILE_LOCKING=1'
-
             ####################
             # GenomicsDBImport #
             ####################
@@ -211,6 +208,8 @@ def main(args):
             command1 += f' --java-options "{java_shared} {args.java2}"'
             command1 += f' -L {chrom}'
             command1 += f' --reader-threads {args.thread}'
+            if args.posix:
+                command1 += ' --genomicsdb-shared-posixfs-optimizations'
             command1 += f' --batch-size {args.batch}'
             command1 += f' --genomicsdb-workspace-path {args.output}/temp/db-{chrom}'
             command1 += ' ' + ' '.join([f'-V {args.output}/temp/{x}.g.vcf' for x in basenames])
@@ -245,9 +244,6 @@ def main(args):
             f.write(
 f"""#!/bin/bash
 
-# Optimize for POSIX filesystem.
-{export}
-
 # Activate conda environment.
 source activate {api.common.conda_env()}
 

From 963015dca95c9c96ba391e8e9d4ca0bcdd4a64d4 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 16:30:22 +0900
Subject: [PATCH 18/26] Update `ngs-fq2bam`:

* Add new argument ``--job`` argument to :command:`ngs-fq2bam` command.
* Update :command:`ngs-fq2bam` command so that BAM creation step and BAM
processing step are now in one step.
---
 CHANGELOG.rst         |  2 +
 docs/cli.rst          |  3 +-
 fuc/cli/ngs_fq2bam.py | 95 +++++++++++++++++++++++--------------------
 3 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d4dc878..4e9789b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -10,6 +10,8 @@ Changelog
 * Add new argument ``--batch`` to :command:`ngs-hc` command. This argument sets the batch size used for :command:`GenomicsDBImport` command.
 * Update :command:`ngs-bam2fq` command to fix the SGE issue that outputs an error like ``Unable to run job: denied: "XXXXX" is not a valid object name (cannot start with a digit)``.
 * Update :command:`ngs-hc` command so that when ``--posix`` is set it will use ``--genomicsdb-shared-posixfs-optimizations`` argument from :command:`GenomicsDBImport` command instead of doing ``export TILEDB_DISABLE_FILE_LOCKING=1``.
+* Add new argument ``--job`` argument to :command:`ngs-fq2bam` command.
+* Update :command:`ngs-fq2bam` command so that BAM creation step and BAM processing step are now in one step.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/docs/cli.rst b/docs/cli.rst
index f702704..7c8eb4b 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -773,7 +773,7 @@ ngs-fq2bam
 
    $ fuc ngs-fq2bam -h
    usage: fuc ngs-fq2bam [-h] [--bed PATH] [--thread INT] [--platform TEXT]
-                         [--force] [--keep]
+                         [--job TEXT] [--force] [--keep]
                          manifest fasta output qsub1 qsub2 java vcf [vcf ...]
    
    Pipeline for converting FASTQ files to analysis-ready BAM files.
@@ -813,6 +813,7 @@ ngs-fq2bam
      --bed PATH       BED file.
      --thread INT     Number of threads to use (default: 1).
      --platform TEXT  Sequencing platform (default: 'Illumina').
+     --job TEXT       Job submission ID for SGE.
      --force          Overwrite the output directory if it already exists.
      --keep           Keep temporary files.
    
diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index c39217b..299e9d6 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -116,6 +116,12 @@ def create_parser(subparsers):
         default='Illumina',
         help="Sequencing platform (default: 'Illumina')."
     )
+    parser.add_argument(
+        '--job',
+        metavar='TEXT',
+        type=str,
+        help='Job submission ID for SGE.'
+    )
     parser.add_argument(
         '--force',
         action='store_true',
@@ -153,63 +159,49 @@ def main(args):
             # BWA-MEM #
             ###########
 
-            f.write(
-f"""#!/bin/bash
-
-# Activate conda environment.
-source activate {api.common.conda_env()}
-
-# Get read group information.
-first=`zcat {r.Read1} | head -1`
-flowcell=`echo "$first" | awk -F " " '{{print $1}}' | awk -F ":" '{{print $3}}'`
-barcode=`echo "$first" | awk -F " " '{{print $2}}' | awk -F ":" '{{print $4}}'`
-group="@RG\\tID:$flowcell\\tPU:$flowcell.$barcode\\tSM:{r.Name}\\tPL:{args.platform}\\tLB:{r.Name}"
-
-# Align and sort seuqnece reads. Assign read group as well.
-bwa mem -M -R $group -t {args.thread} {args.fasta} {r.Read1} {r.Read2} | samtools sort -@ {args.thread} -o {args.output}/temp/{r.Name}.sorted.bam -
-""")
-
-        with open(f'{args.output}/shell/S2-{r.Name}.sh', 'w') as f:
+            command1 = 'bwa mem'
+            command1 += f' -M -R $group -t {args.thread} '
+            command1 += f' {args.fasta} {r.Read1} {r.Read2} |'
+            command1 += f' samtools sort -@ {args.thread}'
+            command1 += f' -o {args.output}/temp/{r.Name}.sorted.bam -'
 
             ##################
             # MarkDuplicates #
             ##################
 
-            command1 = 'gatk MarkDuplicates'
-            command1 += f' --QUIET'
-            command1 += f' --java-options "{args.java}"'
-            command1 += f' -I {args.output}/temp/{r.Name}.sorted.bam'
-            command1 += f' -O {args.output}/temp/{r.Name}.sorted.markdup.bam'
-            command1 += f' -M {args.output}/temp/{r.Name}.metrics'
+            command2 = 'gatk MarkDuplicates'
+            command2 += f' --QUIET'
+            command2 += f' --java-options "{args.java}"'
+            command2 += f' -I {args.output}/temp/{r.Name}.sorted.bam'
+            command2 += f' -O {args.output}/temp/{r.Name}.sorted.markdup.bam'
+            command2 += f' -M {args.output}/temp/{r.Name}.metrics'
 
             ####################
             # BaseRecalibrator #
             ####################
 
-            command2 = 'gatk BaseRecalibrator'
-            command2 += f' --QUIET'
-            command2 += f' --java-options "{args.java}"'
-            command2 += f' -R {args.fasta}'
-            command2 += f' -I {args.output}/temp/{r.Name}.sorted.markdup.bam'
-            command2 += f' -O {args.output}/temp/{r.Name}.table'
-            command2 += ' ' + ' '.join([f'--known-sites {x}' for x in args.vcf])
-
+            command3 = 'gatk BaseRecalibrator'
+            command3 += f' --QUIET'
+            command3 += f' --java-options "{args.java}"'
+            command3 += f' -R {args.fasta}'
+            command3 += f' -I {args.output}/temp/{r.Name}.sorted.markdup.bam'
+            command3 += f' -O {args.output}/temp/{r.Name}.table'
+            command3 += ' ' + ' '.join([f'--known-sites {x}' for x in args.vcf])
             if args.bed is not None:
-                command2 += f' -L {args.bed}'
+                command3 += f' -L {args.bed}'
 
             #############
             # ApplyBQSR #
             #############
 
-            command3 = 'gatk ApplyBQSR'
-            command3 += f' --QUIET'
-            command3 += f' --java-options "{args.java}"'
-            command3 += f' -bqsr {args.output}/temp/{r.Name}.table'
-            command3 += f' -I {args.output}/temp/{r.Name}.sorted.markdup.bam'
-            command3 += f' -O {args.output}/{r.Name}.sorted.markdup.recal.bam'
-
+            command4 = 'gatk ApplyBQSR'
+            command4 += f' --QUIET'
+            command4 += f' --java-options "{args.java}"'
+            command4 += f' -bqsr {args.output}/temp/{r.Name}.table'
+            command4 += f' -I {args.output}/temp/{r.Name}.sorted.markdup.bam'
+            command4 += f' -O {args.output}/{r.Name}.sorted.markdup.recal.bam'
             if args.bed is not None:
-                command3 += f' -L {args.bed}'
+                command4 += f' -L {args.bed}'
 
             f.write(
 f"""#!/bin/bash
@@ -217,17 +209,26 @@ def main(args):
 # Activate conda environment.
 source activate {api.common.conda_env()}
 
-# Mark duplicate reads.
+# Get read group information.
+first=`zcat {r.Read1} | head -1`
+flowcell=`echo "$first" | awk -F " " '{{print $1}}' | awk -F ":" '{{print $3}}'`
+barcode=`echo "$first" | awk -F " " '{{print $2}}' | awk -F ":" '{{print $4}}'`
+group="@RG\\tID:$flowcell\\tPU:$flowcell.$barcode\\tSM:{r.Name}\\tPL:{args.platform}\\tLB:{r.Name}"
+
+# Align and sort seuqnece reads. Assign read group as well.
 {command1}
 
+# Mark duplicate reads.
+{command2}
+
 # Index BAM file.
 samtools index {args.output}/temp/{r.Name}.sorted.markdup.bam
 
 # Build BQSR model.
-{command2}
+{command3}
 
 # Apply BQSR model.
-{command3}
+{command4}
 
 # Remove temporary files.
 {remove} {args.output}/temp/{r.Name}.metrics
@@ -237,6 +238,11 @@ def main(args):
 {remove} {args.output}/temp/{r.Name}.sorted.markdup.bam.bai
 """)
 
+    if args.job is None:
+        jid = ''
+    else:
+        jid = '-' + args.job
+
     with open(f'{args.output}/shell/qsubme.sh', 'w') as f:
         f.write(
 f"""#!/bin/bash
@@ -247,7 +253,6 @@ def main(args):
 
 for sample in ${{samples[@]}}
 do
-  qsub {args.qsub1} -S /bin/bash -e $p/log -o $p/log -N S1-$sample $p/shell/S1-$sample.sh
-  qsub {args.qsub2} -S /bin/bash -e $p/log -o $p/log -N S2-$sample -hold_jid S1-$sample $p/shell/S2-$sample.sh
+  qsub {args.qsub1} -S /bin/bash -e $p/log -o $p/log -N S1-$sample{jid} $p/shell/S1-$sample.sh
 done
 """)

From 492de555c4863576f20a3a35097863dff570b47e Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 16:42:55 +0900
Subject: [PATCH 19/26] Update `ngs-fq2bam`

---
 docs/cli.rst          | 11 ++---------
 fuc/cli/ngs_fq2bam.py | 17 +++--------------
 2 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/docs/cli.rst b/docs/cli.rst
index 7c8eb4b..dc747a9 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -774,7 +774,7 @@ ngs-fq2bam
    $ fuc ngs-fq2bam -h
    usage: fuc ngs-fq2bam [-h] [--bed PATH] [--thread INT] [--platform TEXT]
                          [--job TEXT] [--force] [--keep]
-                         manifest fasta output qsub1 qsub2 java vcf [vcf ...]
+                         manifest fasta output qsub java vcf [vcf ...]
    
    Pipeline for converting FASTQ files to analysis-ready BAM files.
    
@@ -798,12 +798,7 @@ ngs-fq2bam
      manifest         Sample manifest CSV file.
      fasta            Reference FASTA file.
      output           Output directory.
-     qsub1            SGE resoruce to request with qsub for read alignment 
-                      and sorting. Since both tasks support multithreading, 
-                      it is recommended to speicfy a parallel environment (PE) 
-                      to speed up the process (also see --thread).
-     qsub2            SGE resoruce to request with qsub for the rest of the 
-                      tasks, which do not support multithreading.
+     qsub             SGE resoruce to request for qsub.
      java             Java resoruce to request for GATK.
      vcf              One or more reference VCF files containing known variant 
                       sites (e.g. 1000 Genomes Project).
@@ -823,7 +818,6 @@ ngs-fq2bam
      ref.fa \
      output_dir \
      "-q queue_name -pe pe_name 10" \
-     "-q queue_name" \
      "-Xmx15g -Xms15g" \
      1.vcf 2.vcf 3.vcf \
      --thread 10
@@ -834,7 +828,6 @@ ngs-fq2bam
      ref.fa \
      output_dir \
      "-l h='node_A|node_B' -pe pe_name 10" \
-     "-l h='node_A|node_B'" \
      "-Xmx15g -Xms15g" \
      1.vcf 2.vcf 3.vcf \
      --thread 10
diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index 299e9d6..979da5f 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -33,7 +33,6 @@
   ref.fa \\
   output_dir \\
   "-q queue_name -pe pe_name 10" \\
-  "-q queue_name" \\
   "-Xmx15g -Xms15g" \\
   1.vcf 2.vcf 3.vcf \\
   --thread 10
@@ -44,7 +43,6 @@
   ref.fa \\
   output_dir \\
   "-l h='node_A|node_B' -pe pe_name 10" \\
-  "-l h='node_A|node_B'" \\
   "-Xmx15g -Xms15g" \\
   1.vcf 2.vcf 3.vcf \\
   --thread 10
@@ -72,18 +70,9 @@ def create_parser(subparsers):
         help='Output directory.'
     )
     parser.add_argument(
-        'qsub1',
+        'qsub',
         type=str,
-        help="SGE resoruce to request with qsub for read alignment \n"
-             "and sorting. Since both tasks support multithreading, \n"
-             "it is recommended to speicfy a parallel environment (PE) \n"
-             "to speed up the process (also see --thread)."
-    )
-    parser.add_argument(
-        'qsub2',
-        type=str,
-        help="SGE resoruce to request with qsub for the rest of the \n"
-             "tasks, which do not support multithreading."
+        help='SGE resoruce to request for qsub.'
     )
     parser.add_argument(
         'java',
@@ -253,6 +242,6 @@ def main(args):
 
 for sample in ${{samples[@]}}
 do
-  qsub {args.qsub1} -S /bin/bash -e $p/log -o $p/log -N S1-$sample{jid} $p/shell/S1-$sample.sh
+  qsub {args.qsub} -S /bin/bash -e $p/log -o $p/log -N S1-$sample{jid} $p/shell/S1-$sample.sh
 done
 """)

From e71b613c4bfd59dd481d96c133a6dc15f1c9ffaa Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 16:44:08 +0900
Subject: [PATCH 20/26] Update docs

---
 CHANGELOG.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 4e9789b..0b4deb1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -10,7 +10,7 @@ Changelog
 * Add new argument ``--batch`` to :command:`ngs-hc` command. This argument sets the batch size used for :command:`GenomicsDBImport` command.
 * Update :command:`ngs-bam2fq` command to fix the SGE issue that outputs an error like ``Unable to run job: denied: "XXXXX" is not a valid object name (cannot start with a digit)``.
 * Update :command:`ngs-hc` command so that when ``--posix`` is set it will use ``--genomicsdb-shared-posixfs-optimizations`` argument from :command:`GenomicsDBImport` command instead of doing ``export TILEDB_DISABLE_FILE_LOCKING=1``.
-* Add new argument ``--job`` argument to :command:`ngs-fq2bam` command.
+* Add new argument ``--job`` to :command:`ngs-fq2bam` command.
 * Update :command:`ngs-fq2bam` command so that BAM creation step and BAM processing step are now in one step.
 
 0.28.0 (2021-12-05)

From 300706e9e6e39a9ab335e6fe45e89e0c2867958c Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 17:11:31 +0900
Subject: [PATCH 21/26] Update `ngs-fq2bam`:

* Update :command:`ngs-fq2bam` command so that ``--thread`` is now also
used to set ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for
Java.
---
 CHANGELOG.rst         | 5 +++--
 fuc/cli/ngs_fq2bam.py | 8 +++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 0b4deb1..0ff768e 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,11 +7,12 @@ Changelog
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.
 * Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for GATK's :command:`HaplotypeCaller` command, ``--reader-threads`` for GATK's :command:`GenomicsDBImport` command, and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
-* Add new argument ``--batch`` to :command:`ngs-hc` command. This argument sets the batch size used for :command:`GenomicsDBImport` command.
+* Add new argument ``--batch`` to :command:`ngs-hc` command. This argument will be used to set ``--batch-size`` for GATK's :command:`GenomicsDBImport` command.
 * Update :command:`ngs-bam2fq` command to fix the SGE issue that outputs an error like ``Unable to run job: denied: "XXXXX" is not a valid object name (cannot start with a digit)``.
-* Update :command:`ngs-hc` command so that when ``--posix`` is set it will use ``--genomicsdb-shared-posixfs-optimizations`` argument from :command:`GenomicsDBImport` command instead of doing ``export TILEDB_DISABLE_FILE_LOCKING=1``.
+* Update :command:`ngs-hc` command so that when ``--posix`` is set, it will use ``--genomicsdb-shared-posixfs-optimizations`` argument from GATK's :command:`GenomicsDBImport` command instead of exporting shell variable (i.e. ``export TILEDB_DISABLE_FILE_LOCKING=1``).
 * Add new argument ``--job`` to :command:`ngs-fq2bam` command.
 * Update :command:`ngs-fq2bam` command so that BAM creation step and BAM processing step are now in one step.
+* Update :command:`ngs-fq2bam` command so that ``--thread`` is now also used to set ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index 979da5f..0aacd75 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -141,6 +141,8 @@ def main(args):
     else:
         remove = 'rm'
 
+    java_shared = f'-XX:ParallelGCThreads={args.thread} -XX:ConcGCThreads={args.thread}'
+
     for i, r in df.iterrows():
         with open(f'{args.output}/shell/S1-{r.Name}.sh', 'w') as f:
 
@@ -160,7 +162,7 @@ def main(args):
 
             command2 = 'gatk MarkDuplicates'
             command2 += f' --QUIET'
-            command2 += f' --java-options "{args.java}"'
+            command2 += f' --java-options "{java_shared} {args.java}"'
             command2 += f' -I {args.output}/temp/{r.Name}.sorted.bam'
             command2 += f' -O {args.output}/temp/{r.Name}.sorted.markdup.bam'
             command2 += f' -M {args.output}/temp/{r.Name}.metrics'
@@ -171,7 +173,7 @@ def main(args):
 
             command3 = 'gatk BaseRecalibrator'
             command3 += f' --QUIET'
-            command3 += f' --java-options "{args.java}"'
+            command3 += f' --java-options "{java_shared} {args.java}"'
             command3 += f' -R {args.fasta}'
             command3 += f' -I {args.output}/temp/{r.Name}.sorted.markdup.bam'
             command3 += f' -O {args.output}/temp/{r.Name}.table'
@@ -185,7 +187,7 @@ def main(args):
 
             command4 = 'gatk ApplyBQSR'
             command4 += f' --QUIET'
-            command4 += f' --java-options "{args.java}"'
+            command4 += f' --java-options "{java_shared} {args.java}"'
             command4 += f' -bqsr {args.output}/temp/{r.Name}.table'
             command4 += f' -I {args.output}/temp/{r.Name}.sorted.markdup.bam'
             command4 += f' -O {args.output}/{r.Name}.sorted.markdup.recal.bam'

From 5efb28ba503ac8b5d041573982f3b84c7c122b32 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 15 Dec 2021 17:23:04 +0900
Subject: [PATCH 22/26] Update `ngs-hc`

---
 CHANGELOG.rst     |  2 +-
 fuc/cli/ngs_hc.py | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 0ff768e..a38016b 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,7 +9,7 @@ Changelog
 * Add new argument ``--thread`` to :command:`ngs-hc` command. This argument will be used to set ``--native-pair-hmm-threads`` for GATK's :command:`HaplotypeCaller` command, ``--reader-threads`` for GATK's :command:`GenomicsDBImport` command, and ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
 * Add new argument ``--batch`` to :command:`ngs-hc` command. This argument will be used to set ``--batch-size`` for GATK's :command:`GenomicsDBImport` command.
 * Update :command:`ngs-bam2fq` command to fix the SGE issue that outputs an error like ``Unable to run job: denied: "XXXXX" is not a valid object name (cannot start with a digit)``.
-* Update :command:`ngs-hc` command so that when ``--posix`` is set, it will use ``--genomicsdb-shared-posixfs-optimizations`` argument from GATK's :command:`GenomicsDBImport` command instead of exporting shell variable (i.e. ``export TILEDB_DISABLE_FILE_LOCKING=1``).
+* Update :command:`ngs-hc` command so that when ``--posix`` is set, it will use ``--genomicsdb-shared-posixfs-optimizations`` argument from GATK's :command:`GenomicsDBImport` command in addition to exporting relevant shell variable (i.e. ``export TILEDB_DISABLE_FILE_LOCKING=1``).
 * Add new argument ``--job`` to :command:`ngs-fq2bam` command.
 * Update :command:`ngs-fq2bam` command so that BAM creation step and BAM processing step are now in one step.
 * Update :command:`ngs-fq2bam` command so that ``--thread`` is now also used to set ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index 504a49e..b39e29d 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -199,6 +199,15 @@ def main(args):
     for chrom in chroms:
         with open(f'{args.output}/shell/S2-{chrom}.sh', 'w') as f:
 
+            ####################
+            # POSIX filesystem #
+            ####################
+
+            if args.posix:
+                export = 'export TILEDB_DISABLE_FILE_LOCKING=1'
+            else:
+                export = '# export TILEDB_DISABLE_FILE_LOCKING=1'
+
             ####################
             # GenomicsDBImport #
             ####################
@@ -244,6 +253,9 @@ def main(args):
             f.write(
 f"""#!/bin/bash
 
+# Optimize for POSIX filesystem.
+{export}
+
 # Activate conda environment.
 source activate {api.common.conda_env()}
 

From a01fe8f76baaf9f5251324d100b47a412f586ef6 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Thu, 16 Dec 2021 10:22:57 +0900
Subject: [PATCH 23/26] Update `ngs-fq2bam`

---
 fuc/cli/ngs_fq2bam.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index 0aacd75..239385e 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -151,7 +151,7 @@ def main(args):
             ###########
 
             command1 = 'bwa mem'
-            command1 += f' -M -R $group -t {args.thread} '
+            command1 += f' -M -R $group -t {args.thread}'
             command1 += f' {args.fasta} {r.Read1} {r.Read2} |'
             command1 += f' samtools sort -@ {args.thread}'
             command1 += f' -o {args.output}/temp/{r.Name}.sorted.bam -'
@@ -209,6 +209,13 @@ def main(args):
 # Align and sort seuqnece reads. Assign read group as well.
 {command1}
 
+if [ $? -eq 0 ]; then
+    echo "Successfully finished aligning and sorting seuqnece reads"
+else
+    echo "Failed to align and sort seuqnece reads, aborting"
+    exit 1
+fi
+
 # Mark duplicate reads.
 {command2}
 

From 36aee4c672259f614790651036388df3674947ae Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Thu, 16 Dec 2021 11:25:34 +0900
Subject: [PATCH 24/26] Update `ngs-fq2bam`

---
 fuc/cli/ngs_fq2bam.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index 239385e..c207b0a 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -239,7 +239,7 @@ def main(args):
     if args.job is None:
         jid = ''
     else:
-        jid = '-' + args.job
+        jid = args.job + '-'
 
     with open(f'{args.output}/shell/qsubme.sh', 'w') as f:
         f.write(
@@ -251,6 +251,6 @@ def main(args):
 
 for sample in ${{samples[@]}}
 do
-  qsub {args.qsub} -S /bin/bash -e $p/log -o $p/log -N S1-$sample{jid} $p/shell/S1-$sample.sh
+  qsub {args.qsub} -S /bin/bash -e $p/log -o $p/log -N {jid}S1-$sample $p/shell/S1-$sample.sh
 done
 """)

From d5250d17d124aacd103edc799001dda1db4b545f Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 17 Dec 2021 16:45:25 +0900
Subject: [PATCH 25/26] Add `common.parse_list_or_file`

---
 CHANGELOG.rst     |  1 +
 fuc/api/common.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a38016b..9977e01 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -13,6 +13,7 @@ Changelog
 * Add new argument ``--job`` to :command:`ngs-fq2bam` command.
 * Update :command:`ngs-fq2bam` command so that BAM creation step and BAM processing step are now in one step.
 * Update :command:`ngs-fq2bam` command so that ``--thread`` is now also used to set ``-XX:ParallelGCThreads`` and ``-XX:ConcGCThreads`` for Java.
+* Add new method :meth:`common.parse_list_or_file`.
 
 0.28.0 (2021-12-05)
 -------------------
diff --git a/fuc/api/common.py b/fuc/api/common.py
index 164b314..b352265 100644
--- a/fuc/api/common.py
+++ b/fuc/api/common.py
@@ -1379,3 +1379,54 @@ def add(x):
         return modes[mode](regions)
 
     return [modes[mode](x) for x in regions]
+
+def parse_list_or_file(obj, extensions=['txt', 'tsv', 'csv', 'list']):
+    """
+    Parse the input variable and then return a list of items.
+
+    This method is useful when parsing a command line argument that accepts
+    either a list of items or a text file containing one item per line.
+
+    Parameters
+    ----------
+    obj : str or list
+        Object to be tested. Must be non-empty.
+    extensions : list, default: ['txt', 'tsv', 'csv', 'list']
+        Recognized file extensions.
+
+    Returns
+    -------
+    list
+        List of items.
+
+    Examples
+    --------
+
+    >>> from fuc import common
+    >>> common.parse_list_or_file(['A', 'B', 'C'])
+    ['A', 'B', 'C']
+    >>> common.parse_list_or_file('A')
+    ['A']
+    >>> common.parse_list_or_file('example.txt')
+    ['A', 'B', 'C']
+    >>> common.parse_list_or_file(['example.txt'])
+    ['A', 'B', 'C']
+    """
+    if not isinstance(obj, str) and not isinstance(obj, list):
+        raise TypeError(
+            f'Input must be str or list, not {type(obj).__name__}')
+
+    if not obj:
+        raise ValueError('Input is empty')
+
+    if isinstance(obj, str):
+        obj = [obj]
+
+    if len(obj) > 1:
+        return obj
+
+    for extension in extensions:
+        if obj[0].endswith(f'.{extension}'):
+            return convert_file2list(obj[0])
+
+    return obj

From ab764b0e092df36594422901118b60deb00e9409 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sun, 19 Dec 2021 16:07:37 +0900
Subject: [PATCH 26/26] Update CHANGELOG.rst

---
 CHANGELOG.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 9977e01..e8fc786 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,8 +1,8 @@
 Changelog
 *********
 
-0.29.0 (in development)
------------------------
+0.29.0 (2021-12-19)
+-------------------
 
 * Add new property ``pyvcf.VcfFrame.phased``.
 * Update :meth:`pyvcf.VcfFrame.slice` method to automatically handle the 'chr' string.