From 29e668a58711c489cbf8cd5c6a2ddad1887c4bfd Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 5 Feb 2022 18:40:57 +0900
Subject: [PATCH 01/22] Bump up version number

---
 CHANGELOG.rst  | 3 +++
 fuc/version.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a1187a3..dae6157 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,9 @@
 Changelog
 *********
 
+0.31.0 (in development)
+-----------------------
+
 0.30.0 (2022-02-05)
 -------------------
 
diff --git a/fuc/version.py b/fuc/version.py
index e187e0a..c3d10d7 100644
--- a/fuc/version.py
+++ b/fuc/version.py
@@ -1 +1 @@
-__version__ = '0.30.0'
+__version__ = '0.31.0'

From fc0ec14294b7c388da47b4e4a79b068c4e05ce89 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 9 Feb 2022 15:16:05 +0900
Subject: [PATCH 02/22] Fix bug in
 `pykallisto.KallistoFrame.compute_fold_change`

---
 CHANGELOG.rst         | 2 ++
 fuc/api/pykallisto.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index dae6157..915a55e 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,6 +4,8 @@ Changelog
 0.31.0 (in development)
 -----------------------
 
+* Fix bug in :meth:`pykallisto.KallistoFrame.compute_fold_change` method.
+
 0.30.0 (2022-02-05)
 -------------------
 
diff --git a/fuc/api/pykallisto.py b/fuc/api/pykallisto.py
index fac4f17..1e8ed63 100644
--- a/fuc/api/pykallisto.py
+++ b/fuc/api/pykallisto.py
@@ -163,7 +163,7 @@ def compute_fold_change(self, group, genes, unit='tpm', flip=False):
         df = getattr(self, f'df_gene_{unit}')
         df = df[df.index.isin(genes)].T
         df = df.merge(self.metadata[group], left_index=True, right_index=True)
-        df = df.groupby('lauren').mean().T
+        df = df.groupby(group).mean().T
         a, b = df.columns
         if flip:
             a, b = b, a

From 9a04852bf920fcff4de2d5378e3503bcce363621 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Fri, 11 Feb 2022 17:19:32 +0900
Subject: [PATCH 03/22] Add `pyvcf.call` and `vcf-call`

---
 CHANGELOG.rst         |  1 +
 README.rst            |  7 +++-
 docs/cli.rst          | 43 ++++++++++++++++++++-
 fuc/api/pyvcf.py      | 89 ++++++++++++++++++++++++++++++++++++++++++-
 fuc/cli/fuc_undetm.py |  4 +-
 fuc/cli/ngs_fq2bam.py |  3 +-
 fuc/cli/vcf_call.py   | 66 ++++++++++++++++++++++++++++++++
 7 files changed, 205 insertions(+), 8 deletions(-)
 create mode 100644 fuc/cli/vcf_call.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 915a55e..463d6ec 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -5,6 +5,7 @@ Changelog
 -----------------------
 
 * Fix bug in :meth:`pykallisto.KallistoFrame.compute_fold_change` method.
+* Add new method :meth:`pyvcf.call` and new command :command:`vcf-call`.
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/README.rst b/README.rst
index 4c7621e..b2246a7 100644
--- a/README.rst
+++ b/README.rst
@@ -131,13 +131,15 @@ For getting help on the fuc CLI:
        fuc-exist    Check whether certain files exist.
        fuc-find     Retrieve absolute paths of files whose name matches a 
                     specified pattern, optionally recursively.
-       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from bcl2fastq.
+       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from 
+                    bcl2fastq.
        maf-maf2vcf  Convert a MAF file to a VCF file.
        maf-oncoplt  Create an oncoplot with a MAF file.
        maf-sumplt   Create a summary plot with a MAF file.
        maf-vcf2maf  Convert a VCF file to a MAF file.
        ngs-bam2fq   Pipeline for converting BAM files to FASTQ files.
-       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM files.
+       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM 
+                    files.
        ngs-hc       Pipeline for germline short variant discovery.
        ngs-m2       Pipeline for somatic short variant discovery.
        ngs-pon      Pipeline for constructing a panel of normals (PoN).
@@ -148,6 +150,7 @@ For getting help on the fuc CLI:
        tabix-slice  Slice a GFF/BED/SAM/VCF file with Tabix.
        tbl-merge    Merge two table files.
        tbl-sum      Summarize a table file.
+       vcf-call     Perform variant calling and filtering for BAM files.
        vcf-filter   Filter a VCF file.
        vcf-index    Index a VCF file.
        vcf-merge    Merge two or more VCF files.
diff --git a/docs/cli.rst b/docs/cli.rst
index ae904c5..65f2bb8 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -37,13 +37,15 @@ For getting help on the fuc CLI:
        fuc-exist    Check whether certain files exist.
        fuc-find     Retrieve absolute paths of files whose name matches a 
                     specified pattern, optionally recursively.
-       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from bcl2fastq.
+       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from 
+                    bcl2fastq.
        maf-maf2vcf  Convert a MAF file to a VCF file.
        maf-oncoplt  Create an oncoplot with a MAF file.
        maf-sumplt   Create a summary plot with a MAF file.
        maf-vcf2maf  Convert a VCF file to a MAF file.
        ngs-bam2fq   Pipeline for converting BAM files to FASTQ files.
-       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM files.
+       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM 
+                    files.
        ngs-hc       Pipeline for germline short variant discovery.
        ngs-m2       Pipeline for somatic short variant discovery.
        ngs-pon      Pipeline for constructing a panel of normals (PoN).
@@ -54,6 +56,7 @@ For getting help on the fuc CLI:
        tabix-slice  Slice a GFF/BED/SAM/VCF file with Tabix.
        tbl-merge    Merge two table files.
        tbl-sum      Summarize a table file.
+       vcf-call     Perform variant calling and filtering for BAM files.
        vcf-filter   Filter a VCF file.
        vcf-index    Index a VCF file.
        vcf-merge    Merge two or more VCF files.
@@ -1210,6 +1213,42 @@ tbl-sum
    [Example] Summarize a table:
      $ fuc tbl-sum table.tsv
 
+vcf-call
+========
+
+.. code-block:: text
+
+   $ fuc vcf-call -h
+   usage: fuc vcf-call [-h] [--regions TEXT [TEXT ...]] [--min-mq INT]
+                       [--max-depth INT]
+                       fasta bam [bam ...]
+   
+   Perform variant calling and filtering for BAM files.
+   
+   Positional arguments:
+     fasta                 FASTA file.
+     bam                   One or more BAM files.
+   
+   Optional arguments:
+     -h, --help            Show this help message and exit.
+     --regions TEXT [TEXT ...]
+                           Only call variants in given regions. Each region must 
+                           have the format chrom:start-end and be a half-open 
+                           interval with (start, end]. This means, for example, 
+                           chr1:100-103 will extract positions 101, 102, and 
+                           103. Alternatively, you can provide a BED file 
+                           (compressed or uncompressed) to specify regions. Note 
+                           that the 'chr' prefix in contig names (e.g. 'chr1' 
+                           vs. '1') will be automatically added or removed as 
+                           necessary to match the input VCF's contig names.
+     --min-mq INT          Minimum mapping quality for an alignment to be used 
+                           (default: 1).
+     --max-depth INT       At a position, read maximally this number of reads 
+                           per input file (default: 250).
+   
+   [Example] Specify regions manually:
+     $ fuc vcf-call ref.fa in1.bam in2.bam -r chr1:100-200 chr2:300-400 > out.vcf
+
 vcf-filter
 ==========
 
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 418e658..ad1bcd6 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -58,9 +58,11 @@
 
 import os
 import re
+import sys
 import gzip
 from copy import deepcopy
 import warnings
+import tempfile
 
 from . import pybed, common, pymaf
 
@@ -72,7 +74,7 @@
 import matplotlib.pyplot as plt
 from matplotlib_venn import venn2, venn3
 import seaborn as sns
-from pysam import VariantFile
+from pysam import VariantFile, bcftools
 from io import StringIO
 
 HEADERS = {
@@ -135,6 +137,91 @@
     '#AD_FRAC_ALT': ['AD', lambda x: np.nan if sum([int(y) for y in x.split(',')]) == 0 else sum([int(y) for y in x.split(',')[1:]]) / sum([int(y) for y in x.split(',')]), True],
 }
 
+def call(
+    fasta, bams, path=None, regions=None, min_mq=1, max_depth=250
+):
+    """
+    Perform variant calling and filtering for BAM files.
+
+    This method will run a fully customizable, bcftools-based pipeline for
+    calling and filtering variants.
+
+    Parameters
+    ----------
+    fasta : str
+        FASTA file.
+    bams : str or list
+        One or more BAM files.
+    path : str, optional
+        Output VCF file. Writes to stdout when ``path='-'``. If None is
+        provided the result is returned as a string.
+    regions : str, list, or pybed.BedFrame
+        Only call variants in given regions. Each region must have the format
+        chrom:start-end and be a half-open interval with (start, end]. This
+        means, for example, chr1:100-103 will extract positions 101, 102, and
+        103. Alternatively, you can provide a BED file (compressed or
+        uncompressed) to specify regions. Note that the 'chr' prefix in
+        contig names (e.g. 'chr1' vs. '1') will be automatically added or
+        removed as necessary to match the input VCF's contig names.
+    min_mq : int, default: 1
+        Minimum mapping quality for an alignment to be used.
+    max_depth : int, default: 250
+        At a position, read maximally this number of reads per input file.
+
+    Returns
+    -------
+    str
+        VcfFrame object.
+    """
+    if isinstance(bams, str):
+        bams = [bams]
+
+    # Parse target regions, if provided.
+    if regions is not None:
+        if isinstance(regions, str):
+            regions = [regions]
+        if '.bed' in regions[0]:
+            parsed_regions = ['-R', regions[0]]
+        else:
+            parsed_regions = ['-r'] + regions
+
+    with tempfile.TemporaryDirectory() as t:
+        # Step 1: Get genotype likelihoods.
+        args = ['-Ou', '-a', 'AD']
+        args += ['-q', str(min_mq)]
+        args += ['--max-depth', str(max_depth)]
+        args += ['-f', fasta]
+        if parsed_regions is not None:
+            args += parsed_regions
+        args += bams
+        results = bcftools.mpileup(*args)
+        with open(f'{t}/likelihoods.bcf', 'wb') as f:
+            f.write(results)
+
+        # Step 2: Call variants.
+        args = [f'{t}/likelihoods.bcf', '-Oz', '-mv']
+        results = bcftools.call(*args)
+        with open('calls.bcf', 'wb') as f:
+            f.write(results)
+
+        # Step 3: Normalize indels.
+        args = ['calls.bcf', '-Ob', '-f', fasta]
+        results = bcftools.norm(*args)
+        with open('calls.normalized.bcf', 'wb') as f:
+            f.write(results)
+
+        # Step 4: Filter variant.
+        args = ['calls.normalized.bcf', '-Ov', '--IndelGap', '5']
+        results = bcftools.filter(*args)
+
+        if path is None:
+            return results
+        elif path == '-':
+            sys.stdout.write(results)
+        else:
+            with open(path, 'w') as f:
+                f.write(results)
+
 def rescue_filtered_variants(vfs, format='GT'):
     """
     Rescue filtered variants if they are PASS in at least one of the input
diff --git a/fuc/cli/fuc_undetm.py b/fuc/cli/fuc_undetm.py
index 3bc82d9..a257419 100644
--- a/fuc/cli/fuc_undetm.py
+++ b/fuc/cli/fuc_undetm.py
@@ -20,8 +20,8 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Compute top unknown barcodes using undertermined FASTQ '
-             'from bcl2fastq.',
+        help='Compute top unknown barcodes using undertermined FASTQ from \n'
+             'bcl2fastq.'
     )
     parser.add_argument(
         'fastq',
diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index c207b0a..8ae3588 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -54,7 +54,8 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Pipeline for converting FASTQ files to analysis-ready BAM files.',
+        help='Pipeline for converting FASTQ files to analysis-ready BAM \n'
+             'files.'
     )
     parser.add_argument(
         'manifest',
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
new file mode 100644
index 0000000..2e9e9d2
--- /dev/null
+++ b/fuc/cli/vcf_call.py
@@ -0,0 +1,66 @@
+import sys
+
+from .. import api
+
+description = f"""
+Perform variant calling and filtering for BAM files.
+"""
+
+epilog = f"""
+[Example] Specify regions manually:
+  $ fuc {api.common._script_name()} ref.fa in1.bam in2.bam -r chr1:100-200 chr2:300-400 > out.vcf
+"""
+
+def create_parser(subparsers):
+    parser = api.common._add_parser(
+        subparsers,
+        api.common._script_name(),
+        description=description,
+        epilog=epilog,
+        help='Perform variant calling and filtering for BAM files.',
+    )
+    parser.add_argument(
+        'fasta',
+        help="FASTA file."
+    )
+    parser.add_argument(
+        'bam',
+        nargs='+',
+        help="One or more BAM files."
+    )
+    parser.add_argument(
+        '--regions',
+        nargs='+',
+        metavar='TEXT',
+        help="Only call variants in given regions. Each region must \n"
+             "have the format chrom:start-end and be a half-open \n"
+             "interval with (start, end]. This means, for example, \n"
+             "chr1:100-103 will extract positions 101, 102, and \n"
+             "103. Alternatively, you can provide a BED file \n"
+             "(compressed or uncompressed) to specify regions. Note \n"
+             "that the 'chr' prefix in contig names (e.g. 'chr1' \n"
+             "vs. '1') will be automatically added or removed as \n"
+             "necessary to match the input VCF's contig names."
+    )
+    parser.add_argument(
+        '--min-mq',
+        metavar='INT',
+        type=int,
+        default=1,
+        help="Minimum mapping quality for an alignment to be used \n"
+             "(default: 1)."
+    )
+    parser.add_argument(
+        '--max-depth',
+        metavar='INT',
+        type=int,
+        default=250,
+        help="At a position, read maximally this number of reads \n"
+             "per input file (default: 250)."
+    )
+
+def main(args):
+    api.pyvcf.call(
+        args.fasta, args.bam, path='-', regions=args.regions,
+        min_mq=args.min_mq, max_depth=args.max_depth
+    )

From 4bd6f383538799ac16e7cffd6d67e0c1f4f77bb0 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Fri, 11 Feb 2022 20:25:39 +0900
Subject: [PATCH 04/22] Update docs

---
 docs/cli.rst        | 11 ++++++++---
 fuc/api/pyvcf.py    | 16 ++++++++--------
 fuc/cli/vcf_call.py | 13 +++++++++----
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/docs/cli.rst b/docs/cli.rst
index 65f2bb8..5abf537 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -1221,13 +1221,18 @@ vcf-call
    $ fuc vcf-call -h
    usage: fuc vcf-call [-h] [--regions TEXT [TEXT ...]] [--min-mq INT]
                        [--max-depth INT]
-                       fasta bam [bam ...]
+                       fasta bams [bams ...]
    
    Perform variant calling and filtering for BAM files.
    
+   This command will run a fully customizable, bcftools-based pipeline for
+   calling and filtering variants.
+   
    Positional arguments:
-     fasta                 FASTA file.
-     bam                   One or more BAM files.
+     fasta                 Reference FASTA file.
+     bams                  One or more input BAM files. Alternatively, you can 
+                           provide a text file (.txt, .tsv, .csv, or .list) 
+                           containing one BAM file per line.
    
    Optional arguments:
      -h, --help            Show this help message and exit.
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index ad1bcd6..959fa5e 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -149,9 +149,10 @@ def call(
     Parameters
     ----------
     fasta : str
-        FASTA file.
+        Reference FASTA file.
     bams : str or list
-        One or more BAM files.
+        One or more input BAM files. Alternatively, you can provide a text
+        file (.txt, .tsv, .csv, or .list) containing one BAM file per line.
     path : str, optional
         Output VCF file. Writes to stdout when ``path='-'``. If None is
         provided the result is returned as a string.
@@ -173,8 +174,7 @@ def call(
     str
         VcfFrame object.
     """
-    if isinstance(bams, str):
-        bams = [bams]
+    bams = common.parse_list_or_file(bams)
 
     # Parse target regions, if provided.
     if regions is not None:
@@ -201,17 +201,17 @@ def call(
         # Step 2: Call variants.
         args = [f'{t}/likelihoods.bcf', '-Oz', '-mv']
         results = bcftools.call(*args)
-        with open('calls.bcf', 'wb') as f:
+        with open(f'{t}/calls.bcf', 'wb') as f:
             f.write(results)
 
         # Step 3: Normalize indels.
-        args = ['calls.bcf', '-Ob', '-f', fasta]
+        args = [f'{t}/calls.bcf', '-Ob', '-f', fasta]
         results = bcftools.norm(*args)
-        with open('calls.normalized.bcf', 'wb') as f:
+        with open(f'{t}/calls.normalized.bcf', 'wb') as f:
             f.write(results)
 
         # Step 4: Filter variant.
-        args = ['calls.normalized.bcf', '-Ov', '--IndelGap', '5']
+        args = [f'{t}/calls.normalized.bcf', '-Ov', '--IndelGap', '5']
         results = bcftools.filter(*args)
 
         if path is None:
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index 2e9e9d2..68d7f9f 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -4,6 +4,9 @@
 
 description = f"""
 Perform variant calling and filtering for BAM files.
+
+This command will run a fully customizable, bcftools-based pipeline for
+calling and filtering variants.
 """
 
 epilog = f"""
@@ -21,12 +24,14 @@ def create_parser(subparsers):
     )
     parser.add_argument(
         'fasta',
-        help="FASTA file."
+        help="Reference FASTA file."
     )
     parser.add_argument(
-        'bam',
+        'bams',
         nargs='+',
-        help="One or more BAM files."
+        help="One or more input BAM files. Alternatively, you can \n"
+             "provide a text file (.txt, .tsv, .csv, or .list) \n"
+             "containing one BAM file per line."
     )
     parser.add_argument(
         '--regions',
@@ -61,6 +66,6 @@ def create_parser(subparsers):
 
 def main(args):
     api.pyvcf.call(
-        args.fasta, args.bam, path='-', regions=args.regions,
+        args.fasta, args.bams, path='-', regions=args.regions,
         min_mq=args.min_mq, max_depth=args.max_depth
     )

From f544745e51d0f0a1b12e0bf82d377838a728453e Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Fri, 11 Feb 2022 20:49:22 +0900
Subject: [PATCH 05/22] Update `pycov.CovFrame.from_bam` and `bam-depth`:

* Combine optional arguments ``bam`` and ``fn`` into single positional
argument ``bams`` for :meth:`pycov.CovFrame.from_bam` method. The same
goes for :command:`bam-depth` command (combine ``--bam`` and ``--fn``
into ``bams``).
---
 CHANGELOG.rst        |  1 +
 docs/cli.rst         | 23 ++++++++++++-----------
 fuc/api/pycov.py     | 43 +++++++++++++------------------------------
 fuc/cli/bam_depth.py | 16 +++++-----------
 4 files changed, 31 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 463d6ec..3ecd7bf 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,7 @@ Changelog
 
 * Fix bug in :meth:`pykallisto.KallistoFrame.compute_fold_change` method.
 * Add new method :meth:`pyvcf.call` and new command :command:`vcf-call`.
+* Combine optional arguments ``bam`` and ``fn`` into single positional argument ``bams`` for :meth:`pycov.CovFrame.from_bam` method. The same goes for :command:`bam-depth` command (combine ``--bam`` and ``--fn`` into ``bams``).
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/docs/cli.rst b/docs/cli.rst
index 5abf537..d004d73 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -107,8 +107,8 @@ bam-depth
 .. code-block:: text
 
    $ fuc bam-depth -h
-   usage: fuc bam-depth [-h] [--bam PATH [PATH ...]] [--fn PATH] [--bed PATH]
-                        [--region TEXT] [--zero]
+   usage: fuc bam-depth [-h] [--bed PATH] [--region TEXT] [--zero]
+                        bams [bams ...]
    
    Compute read depth from SAM/BAM/CRAM files.
    
@@ -117,16 +117,17 @@ bam-depth
    close attention to the 'chr' string in contig names (e.g. 'chr1' vs. '1').
    Note also that --region requires the input files be indexed.
    
+   Positional arguments:
+     bams           One or more input BAM files. Alternatively, you can provide 
+                    a text file (.txt, .tsv, .csv, or .list) containing one BAM 
+                    file per line.
+   
    Optional arguments:
-     -h, --help            Show this help message and exit.
-     --bam PATH [PATH ...]
-                           One or more alignment files. Cannot be used with --fn.
-     --fn PATH             File containing one alignment file per line. Cannot 
-                           be used with --bam.
-     --bed PATH            BED file. Cannot be used with --region.
-     --region TEXT         Target region ('chrom:start-end'). Cannot be used 
-                           with --bed.
-     --zero                Output all positions including those with zero depth.
+     -h, --help     Show this help message and exit.
+     --bed PATH     BED file. Cannot be used with --region.
+     --region TEXT  Target region ('chrom:start-end'). Cannot be used 
+                    with --bed.
+     --zero         Output all positions including those with zero depth.
    
    [Example] To specify regions with a BED file:
      $ fuc bam-depth \
diff --git a/fuc/api/pycov.py b/fuc/api/pycov.py
index 7cc2b67..ba76940 100644
--- a/fuc/api/pycov.py
+++ b/fuc/api/pycov.py
@@ -165,15 +165,12 @@ def shape(self):
 
     @classmethod
     def from_bam(
-        cls, bam=None, fn=None, bed=None, region=None, zero=False,
-        map_qual=None, names=None
+        cls, bams, bed=None, region=None, zero=False, map_qual=None,
+        names=None
     ):
         """
         Construct CovFrame from one or more SAM/BAM/CRAM files.
 
-        Alignment files must be specified with either ``bam`` or ``fn``, but
-        it's an error to use both.
-
         By default, the method will count all reads within the alignment
         files. You can specify target regions with either ``bed`` or
         ``region``, but not both. When you do this, pay close attention to
@@ -188,10 +185,10 @@ def from_bam(
 
         Parameters
         ----------
-        bam : str or list, optional
-            One or more alignment files.
-        fn : str, optional
-            File containing one alignment file per line.
+        bams : str or list
+            One or more input BAM files. Alternatively, you can provide a
+            text file (.txt, .tsv, .csv, or .list) containing one BAM file
+            per line.
         bed : str, optional
             BED file.
         region : str, optional
@@ -226,21 +223,7 @@ def from_bam(
         >>> cf = pycov.CovFrame.from_bam([bam1, bam2])
         >>> cf = pycov.CovFrame.from_bam(bam, region='19:41497204-41524301')
         """
-        bam_files = []
-
-        if bam is None and fn is None:
-            raise ValueError(
-                "Either the 'bam' or 'fn' parameter must be provided.")
-        elif bam is not None and fn is not None:
-            raise ValueError(
-                "The 'bam' and 'fn' parameters cannot be used together.")
-        elif bam is not None and fn is None:
-            if isinstance(bam, str):
-                bam_files.append(bam)
-            else:
-                bam_files += bam
-        else:
-            bam_files += common.convert_file2list(fn)
+        bams = common.parse_list_or_file(bams)
 
         args = []
 
@@ -253,25 +236,25 @@ def from_bam(
         if bed is not None:
             args += ['-b', bed]
 
-        args += bam_files
+        args += bams
         s = pysam.depth(*args)
         headers = ['Chromosome', 'Position']
         dtype = {'Chromosome': str,'Position': int}
-        for i, bam_file in enumerate(bam_files):
+        for i, bam in enumerate(bams):
             if names:
                 name = names[i]
             else:
-                samples = pybam.tag_sm(bam_file)
+                samples = pybam.tag_sm(bam)
                 if not samples:
-                    basename = Path(bam_file).stem
+                    basename = Path(bam).stem
                     message = (
-                        f'SM tags were not found for {bam_file}, will use '
+                        f'SM tags were not found for {bam}, will use '
                         f'file name as sample name ({basename})'
                     )
                     samples = [basename]
                     warnings.warn(message)
                 if len(samples) > 1:
-                    m = f'multiple sample names detected: {bam_file}'
+                    m = f'multiple sample names detected: {bam}'
                     raise ValueError(m)
                 name = samples[0]
             headers.append(name)
diff --git a/fuc/cli/bam_depth.py b/fuc/cli/bam_depth.py
index fdb2f45..6e3afab 100644
--- a/fuc/cli/bam_depth.py
+++ b/fuc/cli/bam_depth.py
@@ -34,16 +34,11 @@ def create_parser(subparsers):
         help='Compute read depth from SAM/BAM/CRAM files.',
     )
     parser.add_argument(
-        '--bam',
-        metavar='PATH',
+        'bams',
         nargs='+',
-        help='One or more alignment files. Cannot be used with --fn.'
-    )
-    parser.add_argument(
-        '--fn',
-        metavar='PATH',
-        help='File containing one alignment file per line. Cannot \n'
-             'be used with --bam.'
+        help="One or more input BAM files. Alternatively, you can provide \n"
+             "a text file (.txt, .tsv, .csv, or .list) containing one BAM \n"
+             "file per line."
     )
     parser.add_argument(
         '--bed',
@@ -64,7 +59,6 @@ def create_parser(subparsers):
 
 def main(args):
     cf = api.pycov.CovFrame.from_bam(
-        bam=args.bam, fn=args.fn, bed=args.bed, region=args.region,
-        zero=args.zero
+        args.bams, bed=args.bed, region=args.region, zero=args.zero
     )
     sys.stdout.write(cf.to_string())

From 6fa67285ff8988e93763b56b0a7982f5fb9c210c Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 12 Feb 2022 10:10:20 +0900
Subject: [PATCH 06/22] Update docs

---
 fuc/api/pycov.py    | 2 +-
 fuc/api/pyvcf.py    | 2 +-
 fuc/cli/vcf_call.py | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fuc/api/pycov.py b/fuc/api/pycov.py
index ba76940..5616f4b 100644
--- a/fuc/api/pycov.py
+++ b/fuc/api/pycov.py
@@ -169,7 +169,7 @@ def from_bam(
         names=None
     ):
         """
-        Construct CovFrame from one or more SAM/BAM/CRAM files.
+        Construct CovFrame from BAM files.
 
         By default, the method will count all reads within the alignment
         files. You can specify target regions with either ``bed`` or
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 959fa5e..70648a8 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -141,7 +141,7 @@ def call(
     fasta, bams, path=None, regions=None, min_mq=1, max_depth=250
 ):
     """
-    Perform variant calling and filtering for BAM files.
+    Call variants (SNVs/indels) from BAM files.
 
     This method will run a fully customizable, bcftools-based pipeline for
     calling and filtering variants.
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index 68d7f9f..a055695 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -3,7 +3,7 @@
 from .. import api
 
 description = f"""
-Perform variant calling and filtering for BAM files.
+Call variants (SNVs/indels) from BAM files.
 
 This command will run a fully customizable, bcftools-based pipeline for
 calling and filtering variants.
@@ -20,7 +20,7 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Perform variant calling and filtering for BAM files.',
+        help='Call variants (SNVs/indels) from BAM files.',
     )
     parser.add_argument(
         'fasta',

From e567b3a098671538c5e650e91879f79dbc540d92 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 12 Feb 2022 13:29:26 +0900
Subject: [PATCH 07/22] Update docs

---
 fuc/api/pybed.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fuc/api/pybed.py b/fuc/api/pybed.py
index 495fe42..6383dab 100644
--- a/fuc/api/pybed.py
+++ b/fuc/api/pybed.py
@@ -493,9 +493,9 @@ def to_regions(self, merge=True):
         ...     'End': [40, 50, 25, 35, 60, 80]
         ... }
         >>> bf = pybed.BedFrame.from_dict([], data)
-        >>> bf.regions()
+        >>> bf.to_regions()
         ['chr1:10-50', 'chr2:15-35', 'chr3:50-60', 'chr3:61-80']
-        >>> bf.regions(merge=False)
+        >>> bf.to_regions(merge=False)
         ['chr1:10-40', 'chr1:30-50', 'chr2:15-25', 'chr2:25-35', 'chr3:50-60', 'chr3:61-80']
         """
         if merge:

From 600aaf67e9f0c30b5198d62a65c9e9dae92dc05b Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 12 Feb 2022 13:29:40 +0900
Subject: [PATCH 08/22] Update docs

---
 fuc/api/pyvcf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 70648a8..f74692e 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -156,7 +156,7 @@ def call(
     path : str, optional
         Output VCF file. Writes to stdout when ``path='-'``. If None is
         provided the result is returned as a string.
-    regions : str, list, or pybed.BedFrame
+    regions : str, list, or pybed.BedFrame, optional
         Only call variants in given regions. Each region must have the format
         chrom:start-end and be a half-open interval with (start, end]. This
         means, for example, chr1:100-103 will extract positions 101, 102, and

From f6171047afa897ad1d44f8104dfa30920af928a2 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 12 Feb 2022 13:49:45 +0900
Subject: [PATCH 09/22] Update `pycov.CovFrame.from_bam` and `bam-depth`:

* Update :meth:`pycov.CovFrame.from_bam` method and :command:`bam-depth`
command to automatically handle the 'chr' string.
---
 CHANGELOG.rst        |  2 ++
 README.rst           |  4 +--
 docs/cli.rst         | 60 +++++++++++++++++--------------
 fuc/api/pycov.py     | 85 +++++++++++++++++++++++++++++---------------
 fuc/cli/bam_depth.py | 60 +++++++++++++++++--------------
 5 files changed, 126 insertions(+), 85 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 3ecd7bf..97c1149 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -7,6 +7,8 @@ Changelog
 * Fix bug in :meth:`pykallisto.KallistoFrame.compute_fold_change` method.
 * Add new method :meth:`pyvcf.call` and new command :command:`vcf-call`.
 * Combine optional arguments ``bam`` and ``fn`` into single positional argument ``bams`` for :meth:`pycov.CovFrame.from_bam` method. The same goes for :command:`bam-depth` command (combine ``--bam`` and ``--fn`` into ``bams``).
+* Combine optional arguments ``bed`` and ``region`` into single optional argument ``regions`` for :meth:`pycov.CovFrame.from_bam` method. The same goes for :command:`bam-depth` command (combine ``--bed`` and ``--region`` into ``--regions``).
+* Update :meth:`pycov.CovFrame.from_bam` method and :command:`bam-depth` command to automatically handle the 'chr' string.
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/README.rst b/README.rst
index b2246a7..7f66236 100644
--- a/README.rst
+++ b/README.rst
@@ -113,7 +113,7 @@ For getting help on the fuc CLI:
    positional arguments:
      COMMAND
        bam-aldepth  Compute allelic depth from a SAM/BAM/CRAM file.
-       bam-depth    Compute read depth from SAM/BAM/CRAM files.
+       bam-depth    Compute per-base read depth from BAM files.
        bam-head     Print the header of a SAM/BAM/CRAM file.
        bam-index    Index a SAM/BAM/CRAM file.
        bam-rename   Rename the sample in a SAM/BAM/CRAM file.
@@ -150,7 +150,7 @@ For getting help on the fuc CLI:
        tabix-slice  Slice a GFF/BED/SAM/VCF file with Tabix.
        tbl-merge    Merge two table files.
        tbl-sum      Summarize a table file.
-       vcf-call     Perform variant calling and filtering for BAM files.
+       vcf-call     Call variants (SNVs/indels) from BAM files.
        vcf-filter   Filter a VCF file.
        vcf-index    Index a VCF file.
        vcf-merge    Merge two or more VCF files.
diff --git a/docs/cli.rst b/docs/cli.rst
index d004d73..173a384 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -19,7 +19,7 @@ For getting help on the fuc CLI:
    positional arguments:
      COMMAND
        bam-aldepth  Compute allelic depth from a SAM/BAM/CRAM file.
-       bam-depth    Compute read depth from SAM/BAM/CRAM files.
+       bam-depth    Compute per-base read depth from BAM files.
        bam-head     Print the header of a SAM/BAM/CRAM file.
        bam-index    Index a SAM/BAM/CRAM file.
        bam-rename   Rename the sample in a SAM/BAM/CRAM file.
@@ -56,7 +56,7 @@ For getting help on the fuc CLI:
        tabix-slice  Slice a GFF/BED/SAM/VCF file with Tabix.
        tbl-merge    Merge two table files.
        tbl-sum      Summarize a table file.
-       vcf-call     Perform variant calling and filtering for BAM files.
+       vcf-call     Call variants (SNVs/indels) from BAM files.
        vcf-filter   Filter a VCF file.
        vcf-index    Index a VCF file.
        vcf-merge    Merge two or more VCF files.
@@ -107,37 +107,43 @@ bam-depth
 .. code-block:: text
 
    $ fuc bam-depth -h
-   usage: fuc bam-depth [-h] [--bed PATH] [--region TEXT] [--zero]
-                        bams [bams ...]
+   usage: fuc bam-depth [-h] [--regions TEXT [TEXT ...]] [--zero] bams [bams ...]
    
-   Compute read depth from SAM/BAM/CRAM files.
+   Compute per-base read depth from BAM files.
    
-   By default, the command will count all reads within the alignment files. You
-   can specify regions of interest with --bed or --region. When you do this, pay
-   close attention to the 'chr' string in contig names (e.g. 'chr1' vs. '1').
-   Note also that --region requires the input files be indexed.
+   Under the hood, the command computes read depth using the 'samtools depth'
+   command.
    
    Positional arguments:
-     bams           One or more input BAM files. Alternatively, you can provide 
-                    a text file (.txt, .tsv, .csv, or .list) containing one BAM 
-                    file per line.
+     bams                  One or more input BAM files. Alternatively, you can
+                           provide a text file (.txt, .tsv, .csv, or .list)
+                           containing one BAM file per line.
    
    Optional arguments:
-     -h, --help     Show this help message and exit.
-     --bed PATH     BED file. Cannot be used with --region.
-     --region TEXT  Target region ('chrom:start-end'). Cannot be used 
-                    with --bed.
-     --zero         Output all positions including those with zero depth.
-   
-   [Example] To specify regions with a BED file:
-     $ fuc bam-depth \
-     --bam 1.bam 2.bam \
-     --bed in.bed > out.tsv
+     -h, --help            Show this help message and exit.
+     --regions TEXT [TEXT ...]
+                           By default, the command counts all reads in BAM
+                           files, which can be excruciatingly slow for large
+                           files (e.g. whole genome sequencing). Therefore, use
+                           this argument to only output positions in given
+                           regions. Each region must have the format
+                           chrom:start-end and be a half-open interval with
+                           (start, end]. This means, for example, chr1:100-103
+                           will extract positions 101, 102, and 103.
+                           Alternatively, you can provide a BED file (compressed
+                           or uncompressed) to specify regions. Note that the
+                           'chr' prefix in contig names (e.g. 'chr1' vs. '1')
+                           will be automatically added or removed as necessary
+                           to match the input BAM's contig names.
+     --zero                Output all positions including those with zero depth.
+   
+   [Example] Specify regions with a BED file:
+     $ fuc bam-depth 1.bam 2.bam \
+     --regions in.bed > out.tsv
    
-   [Example] To specify regions manually:
-     $ fuc bam-depth \
-     --fn bam.list \
-     --region chr1:100-200 > out.tsv
+   [Example] Specify regions manually:
+     $ fuc bam-depth bam.list \
+     --regions chr1:100-200 chr2:400-500 > out.tsv
 
 bam-head
 ========
@@ -1224,7 +1230,7 @@ vcf-call
                        [--max-depth INT]
                        fasta bams [bams ...]
    
-   Perform variant calling and filtering for BAM files.
+   Call variants (SNVs/indels) from BAM files.
    
    This command will run a fully customizable, bcftools-based pipeline for
    calling and filtering variants.
diff --git a/fuc/api/pycov.py b/fuc/api/pycov.py
index 5616f4b..f32fb96 100644
--- a/fuc/api/pycov.py
+++ b/fuc/api/pycov.py
@@ -95,7 +95,7 @@ class CovFrame:
     See Also
     --------
     CovFrame.from_bam
-        Construct CovFrame from one or more SAM/BAM/CRAM files.
+        Construct CovFrame from BAM files.
     CovFrame.from_dict
         Construct CovFrame from dict of array-like or dicts.
     CovFrame.from_file
@@ -165,41 +165,42 @@ def shape(self):
 
     @classmethod
     def from_bam(
-        cls, bams, bed=None, region=None, zero=False, map_qual=None,
-        names=None
+        cls, bams, regions=None, zero=False, map_qual=None, names=None
     ):
         """
         Construct CovFrame from BAM files.
 
-        By default, the method will count all reads within the alignment
-        files. You can specify target regions with either ``bed`` or
-        ``region``, but not both. When you do this, pay close attention to
-        the 'chr' string in contig names (e.g. 'chr1' vs. '1'). Note also
-        that ``region`` requires the input files be indexed.
-
         Under the hood, the method computes read depth using the
         :command:`samtools depth` command.
 
-        Sample name is extracted from the SM tag. If the tag is missing, the
-        method will set the filename as sample name.
-
         Parameters
         ----------
         bams : str or list
             One or more input BAM files. Alternatively, you can provide a
             text file (.txt, .tsv, .csv, or .list) containing one BAM file
             per line.
-        bed : str, optional
-            BED file.
-        region : str, optional
-            Target region ('chrom:start-end').
+        regions : str, list, or pybed.BedFrame, optional
+            By default (``regions=None``), the method counts all reads in BAM
+            files, which can be excruciatingly slow for large files (e.g.
+            whole genome sequencing). Therefore, use this argument to only
+            output positions in given regions. Each region must have the
+            format chrom:start-end and be a half-open interval with (start,
+            end]. This means, for example, chr1:100-103 will extract
+            positions 101, 102, and 103. Alternatively, you can provide a BED
+            file (compressed or uncompressed) or a :class:`pybed.BedFrame`
+            object to specify regions. Note that the 'chr' prefix in contig
+            names (e.g. 'chr1' vs. '1') will be automatically added or
+            removed as necessary to match the input BAM's contig names.
         zero : bool, default: False
-            If True, output all positions (including those with zero depth).
+            If True, output all positions including those with zero depth.
         map_qual: int, optional
             Only count reads with mapping quality greater than or equal to
             this number.
         names : list, optional
-            Use these as sample names instead of the SM tags.
+            By default (``names=None``), sample name is extracted using SM
+            tag in BAM files. If the tag is missing, the method will set the
+            filename as sample name. Use this argument to manually provide
+            sample names.
 
         Returns
         -------
@@ -223,23 +224,45 @@ def from_bam(
         >>> cf = pycov.CovFrame.from_bam([bam1, bam2])
         >>> cf = pycov.CovFrame.from_bam(bam, region='19:41497204-41524301')
         """
+        # Parse input BAM files.
         bams = common.parse_list_or_file(bams)
 
-        args = []
+        # Check the 'chr' prefix.
+        if all([pybam.has_chr_prefix(x) for x in bams]):
+            chr_prefix = 'chr'
+        else:
+            chr_prefix = ''
 
+        # Run the 'samtools depth' command.
+        args = []
         if zero:
             args += ['-a']
-        if region is not None:
-            args += ['-r', region]
         if map_qual is not None:
             args += ['-Q', str(map_qual)]
-        if bed is not None:
-            args += ['-b', bed]
+        results = ''
+        if regions is None:
+            results += pysam.depth(*(bams + args))
+        else:
+            if isinstance(regions, str):
+                regions = [regions]
+            elif isinstance(regions, list):
+                pass
+            elif isinstance(regions, pybed.BedFrame):
+                regions = bf.to_regions()
+            else:
+                raise TypeError("Incorrect type of argument 'regions'")
+            if '.bed' in regions[0]:
+                bf = pybed.BedFrame.from_file(regions[0])
+                regions = bf.to_regions()
+            else:
+                regions = common.sort_regions(regions)
+            for region in regions:
+                region = chr_prefix + region.replace('chr', '')
+                results += pysam.depth(*(bams + args + ['-r', region]))
 
-        args += bams
-        s = pysam.depth(*args)
         headers = ['Chromosome', 'Position']
         dtype = {'Chromosome': str,'Position': int}
+
         for i, bam in enumerate(bams):
             if names:
                 name = names[i]
@@ -259,8 +282,12 @@ def from_bam(
                 name = samples[0]
             headers.append(name)
             dtype[name] = int
-        df = pd.read_csv(StringIO(s), sep='\t', header=None,
-                         names=headers, dtype=dtype)
+
+        df = pd.read_csv(
+            StringIO(results), sep='\t', header=None,
+            names=headers, dtype=dtype
+        )
+
         return cls(df)
 
     @classmethod
@@ -283,7 +310,7 @@ def from_dict(cls, data):
         CovFrame
             CovFrame object creation using constructor.
         CovFrame.from_bam
-            Construct CovFrame from one or more SAM/BAM/CRAM files.
+            Construct CovFrame from BAM files.
         CovFrame.from_file
             Construct CovFrame from a text file containing read depth data.
 
@@ -333,7 +360,7 @@ def from_file(cls, fn, compression=False):
         CovFrame
             CovFrame object creation using constructor.
         CovFrame.from_bam
-            Construct CovFrame from one or more SAM/BAM/CRAM files.
+            Construct CovFrame from BAM files.
         CovFrame.from_dict
             Construct CovFrame from dict of array-like or dicts.
 
diff --git a/fuc/cli/bam_depth.py b/fuc/cli/bam_depth.py
index 6e3afab..b000ba2 100644
--- a/fuc/cli/bam_depth.py
+++ b/fuc/cli/bam_depth.py
@@ -5,24 +5,20 @@
 import pysam
 
 description = """
-Compute read depth from SAM/BAM/CRAM files.
+Compute per-base read depth from BAM files.
 
-By default, the command will count all reads within the alignment files. You
-can specify regions of interest with --bed or --region. When you do this, pay
-close attention to the 'chr' string in contig names (e.g. 'chr1' vs. '1').
-Note also that --region requires the input files be indexed.
+Under the hood, the command computes read depth using the 'samtools depth'
+command.
 """
 
 epilog = f"""
-[Example] To specify regions with a BED file:
-  $ fuc {api.common._script_name()} \\
-  --bam 1.bam 2.bam \\
-  --bed in.bed > out.tsv
+[Example] Specify regions with a BED file:
+  $ fuc {api.common._script_name()} 1.bam 2.bam \\
+  --regions in.bed > out.tsv
 
-[Example] To specify regions manually:
-  $ fuc {api.common._script_name()} \\
-  --fn bam.list \\
-  --region chr1:100-200 > out.tsv
+[Example] Specify regions manually:
+  $ fuc {api.common._script_name()} bam.list \\
+  --regions chr1:100-200 chr2:400-500 > out.tsv
 """
 
 def create_parser(subparsers):
@@ -31,34 +27,44 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Compute read depth from SAM/BAM/CRAM files.',
+        help="Compute per-base read depth from BAM files."
     )
     parser.add_argument(
         'bams',
         nargs='+',
-        help="One or more input BAM files. Alternatively, you can provide \n"
-             "a text file (.txt, .tsv, .csv, or .list) containing one BAM \n"
-             "file per line."
+        help=
+"""One or more input BAM files. Alternatively, you can
+provide a text file (.txt, .tsv, .csv, or .list)
+containing one BAM file per line."""
     )
     parser.add_argument(
-        '--bed',
-        metavar='PATH',
-        help='BED file. Cannot be used with --region.'
-    )
-    parser.add_argument(
-        '--region',
+        '--regions',
+        nargs='+',
         metavar='TEXT',
-        help="Target region ('chrom:start-end'). Cannot be used \n"
-             "with --bed."
+        help=
+"""By default, the command counts all reads in BAM
+files, which can be excruciatingly slow for large
+files (e.g. whole genome sequencing). Therefore, use
+this argument to only output positions in given
+regions. Each region must have the format
+chrom:start-end and be a half-open interval with
+(start, end]. This means, for example, chr1:100-103
+will extract positions 101, 102, and 103.
+Alternatively, you can provide a BED file (compressed
+or uncompressed) to specify regions. Note that the
+'chr' prefix in contig names (e.g. 'chr1' vs. '1')
+will be automatically added or removed as necessary
+to match the input BAM's contig names."""
     )
     parser.add_argument(
         '--zero',
         action='store_true',
-        help='Output all positions including those with zero depth.'
+        help=
+"""Output all positions including those with zero depth."""
     )
 
 def main(args):
     cf = api.pycov.CovFrame.from_bam(
-        args.bams, bed=args.bed, region=args.region, zero=args.zero
+        args.bams, regions=args.regions, zero=args.zero
     )
     sys.stdout.write(cf.to_string())

From fccf85e6fbc5c55fb1de02190cc18fd54b130980 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 12 Feb 2022 14:40:05 +0900
Subject: [PATCH 10/22] Update docs

---
 README.rst            |  2 +-
 docs/cli.rst          | 61 ++++++++++++++++++++++++-------------------
 fuc/api/pyvcf.py      | 24 ++++++++++-------
 fuc/cli/bam_depth.py  | 12 +++++----
 fuc/cli/bam_rename.py |  2 +-
 fuc/cli/fuc_undetm.py |  2 +-
 fuc/cli/ngs_bam2fq.py |  2 +-
 fuc/cli/ngs_fq2bam.py |  2 +-
 fuc/cli/ngs_pon.py    |  2 +-
 fuc/cli/ngs_quant.py  |  2 +-
 fuc/cli/ngs_trim.py   |  2 +-
 fuc/cli/vcf_call.py   | 61 +++++++++++++++++++++++++++----------------
 fuc/cli/vcf_merge.py  |  2 +-
 fuc/cli/vcf_slice.py  |  2 +-
 fuc/cli/vcf_split.py  |  2 +-
 fuc/cli/vcf_vep.py    |  2 +-
 16 files changed, 105 insertions(+), 77 deletions(-)

diff --git a/README.rst b/README.rst
index 7f66236..12f95e2 100644
--- a/README.rst
+++ b/README.rst
@@ -150,7 +150,7 @@ For getting help on the fuc CLI:
        tabix-slice  Slice a GFF/BED/SAM/VCF file with Tabix.
        tbl-merge    Merge two table files.
        tbl-sum      Summarize a table file.
-       vcf-call     Call variants (SNVs/indels) from BAM files.
+       vcf-call     Call SNVs and indels from BAM files.
        vcf-filter   Filter a VCF file.
        vcf-index    Index a VCF file.
        vcf-merge    Merge two or more VCF files.
diff --git a/docs/cli.rst b/docs/cli.rst
index 173a384..dcc4aaa 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -56,7 +56,7 @@ For getting help on the fuc CLI:
        tabix-slice  Slice a GFF/BED/SAM/VCF file with Tabix.
        tbl-merge    Merge two table files.
        tbl-sum      Summarize a table file.
-       vcf-call     Call variants (SNVs/indels) from BAM files.
+       vcf-call     Call SNVs and indels from BAM files.
        vcf-filter   Filter a VCF file.
        vcf-index    Index a VCF file.
        vcf-merge    Merge two or more VCF files.
@@ -107,7 +107,7 @@ bam-depth
 .. code-block:: text
 
    $ fuc bam-depth -h
-   usage: fuc bam-depth [-h] [--regions TEXT [TEXT ...]] [--zero] bams [bams ...]
+   usage: fuc bam-depth [-h] [-r TEXT [TEXT ...]] [--zero] bams [bams ...]
    
    Compute per-base read depth from BAM files.
    
@@ -121,7 +121,7 @@ bam-depth
    
    Optional arguments:
      -h, --help            Show this help message and exit.
-     --regions TEXT [TEXT ...]
+     -r TEXT [TEXT ...], --regions TEXT [TEXT ...]
                            By default, the command counts all reads in BAM
                            files, which can be excruciatingly slow for large
                            files (e.g. whole genome sequencing). Therefore, use
@@ -137,13 +137,13 @@ bam-depth
                            to match the input BAM's contig names.
      --zero                Output all positions including those with zero depth.
    
-   [Example] Specify regions with a BED file:
+   [Example] Specify regions manually:
      $ fuc bam-depth 1.bam 2.bam \
-     --regions in.bed > out.tsv
+     -r chr1:100-200 chr2:400-500 > out.tsv
    
-   [Example] Specify regions manually:
+   [Example] Specify regions with a BED file:
      $ fuc bam-depth bam.list \
-     --regions chr1:100-200 chr2:400-500 > out.tsv
+     -r in.bed > out.tsv
 
 bam-head
 ========
@@ -1226,40 +1226,47 @@ vcf-call
 .. code-block:: text
 
    $ fuc vcf-call -h
-   usage: fuc vcf-call [-h] [--regions TEXT [TEXT ...]] [--min-mq INT]
-                       [--max-depth INT]
+   usage: fuc vcf-call [-h] [-r TEXT [TEXT ...]] [--min-mq INT] [--max-depth INT]
                        fasta bams [bams ...]
    
-   Call variants (SNVs/indels) from BAM files.
+   Call SNVs and indels from BAM files.
    
-   This command will run a fully customizable, bcftools-based pipeline for
-   calling and filtering variants.
+   Under the hood, the command utilizes the bcftool program to call variants.
    
    Positional arguments:
      fasta                 Reference FASTA file.
-     bams                  One or more input BAM files. Alternatively, you can 
-                           provide a text file (.txt, .tsv, .csv, or .list) 
+     bams                  One or more input BAM files. Alternatively, you can
+                           provide a text file (.txt, .tsv, .csv, or .list)
                            containing one BAM file per line.
    
    Optional arguments:
      -h, --help            Show this help message and exit.
-     --regions TEXT [TEXT ...]
-                           Only call variants in given regions. Each region must 
-                           have the format chrom:start-end and be a half-open 
-                           interval with (start, end]. This means, for example, 
-                           chr1:100-103 will extract positions 101, 102, and 
-                           103. Alternatively, you can provide a BED file 
-                           (compressed or uncompressed) to specify regions. Note 
-                           that the 'chr' prefix in contig names (e.g. 'chr1' 
-                           vs. '1') will be automatically added or removed as 
-                           necessary to match the input VCF's contig names.
-     --min-mq INT          Minimum mapping quality for an alignment to be used 
+     -r TEXT [TEXT ...], --regions TEXT [TEXT ...]
+                           By default, the command looks at each genomic
+                           position with coverage in BAM files, which can be
+                           excruciatingly slow for large files (e.g. whole
+                           genome sequencing). Therefore, use this argument to
+                           only call variants in given regions. Each region must
+                           have the format chrom:start-end and be a half-open
+                           interval with (start, end]. This means, for example,
+                           chr1:100-103 will extract positions 101, 102, and
+                           103. Alternatively, you can provide a BED file
+                           (compressed or uncompressed) to specify regions. Note
+                           that the 'chr' prefix in contig names (e.g. 'chr1'
+                           vs. '1') will be automatically added or removed as
+                           necessary to match the input BAM's contig names.
+     --min-mq INT          Minimum mapping quality for an alignment to be used
                            (default: 1).
-     --max-depth INT       At a position, read maximally this number of reads 
+     --max-depth INT       At a position, read maximally this number of reads
                            per input file (default: 250).
    
    [Example] Specify regions manually:
-     $ fuc vcf-call ref.fa in1.bam in2.bam -r chr1:100-200 chr2:300-400 > out.vcf
+     $ fuc vcf-call ref.fa 1.bam 2.bam \
+     -r chr1:100-200 chr2:400-500 > out.vcf
+   
+   [Example] Specify regions with a BED file:
+     $ fuc vcf-call ref.fa bam.list \
+     -r in.bed > out.vcf
 
 vcf-filter
 ==========
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index f74692e..1fe7431 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -141,10 +141,9 @@ def call(
     fasta, bams, path=None, regions=None, min_mq=1, max_depth=250
 ):
     """
-    Call variants (SNVs/indels) from BAM files.
+    Call SNVs and indels from BAM files.
 
-    This method will run a fully customizable, bcftools-based pipeline for
-    calling and filtering variants.
+    Under the hood, the method utilizes the bcftool program to call variants.
 
     Parameters
     ----------
@@ -157,13 +156,17 @@ def call(
         Output VCF file. Writes to stdout when ``path='-'``. If None is
         provided the result is returned as a string.
     regions : str, list, or pybed.BedFrame, optional
-        Only call variants in given regions. Each region must have the format
-        chrom:start-end and be a half-open interval with (start, end]. This
-        means, for example, chr1:100-103 will extract positions 101, 102, and
-        103. Alternatively, you can provide a BED file (compressed or
-        uncompressed) to specify regions. Note that the 'chr' prefix in
-        contig names (e.g. 'chr1' vs. '1') will be automatically added or
-        removed as necessary to match the input VCF's contig names.
+        By default (``regions=None``), the method looks at each genomic
+        position with coverage in BAM files, which can be excruciatingly slow
+        for large files (e.g. whole genome sequencing). Therefore, use this
+        argument to only call variants in given regions. Each region must
+        have the format chrom:start-end and be a half-open interval with
+        (start, end]. This means, for example, chr1:100-103 will extract
+        positions 101, 102, and 103. Alternatively, you can provide a BED
+        file (compressed or uncompressed) or a :class:`pybed.BedFrame` object
+        to specify regions. Note that the 'chr' prefix in contig names (e.g.
+        'chr1' vs. '1') will be automatically added or removed as necessary
+        to match the input VCF's contig names.
     min_mq : int, default: 1
         Minimum mapping quality for an alignment to be used.
     max_depth : int, default: 250
@@ -174,6 +177,7 @@ def call(
     str
         VcfFrame object.
     """
+    # Parse input BAM files.
     bams = common.parse_list_or_file(bams)
 
     # Parse target regions, if provided.
diff --git a/fuc/cli/bam_depth.py b/fuc/cli/bam_depth.py
index b000ba2..77f90e4 100644
--- a/fuc/cli/bam_depth.py
+++ b/fuc/cli/bam_depth.py
@@ -12,13 +12,13 @@
 """
 
 epilog = f"""
-[Example] Specify regions with a BED file:
+[Example] Specify regions manually:
   $ fuc {api.common._script_name()} 1.bam 2.bam \\
-  --regions in.bed > out.tsv
+  -r chr1:100-200 chr2:400-500 > out.tsv
 
-[Example] Specify regions manually:
+[Example] Specify regions with a BED file:
   $ fuc {api.common._script_name()} bam.list \\
-  --regions chr1:100-200 chr2:400-500 > out.tsv
+  -r in.bed > out.tsv
 """
 
 def create_parser(subparsers):
@@ -27,7 +27,8 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help="Compute per-base read depth from BAM files."
+        help=
+"""Compute per-base read depth from BAM files."""
     )
     parser.add_argument(
         'bams',
@@ -38,6 +39,7 @@ def create_parser(subparsers):
 containing one BAM file per line."""
     )
     parser.add_argument(
+        '-r',
         '--regions',
         nargs='+',
         metavar='TEXT',
diff --git a/fuc/cli/bam_rename.py b/fuc/cli/bam_rename.py
index 714c4c4..0ed6d79 100644
--- a/fuc/cli/bam_rename.py
+++ b/fuc/cli/bam_rename.py
@@ -5,7 +5,7 @@
 
 import pysam
 
-description = f"""
+description = """
 Rename the sample in a SAM/BAM/CRAM file.
 """
 
diff --git a/fuc/cli/fuc_undetm.py b/fuc/cli/fuc_undetm.py
index a257419..30d10bf 100644
--- a/fuc/cli/fuc_undetm.py
+++ b/fuc/cli/fuc_undetm.py
@@ -2,7 +2,7 @@
 
 from .. import api
 
-description = f"""
+description = """
 Compute top unknown barcodes using undertermined FASTQ from bcl2fastq.
 
 This command will compute top unknown barcodes using undertermined FASTQ from
diff --git a/fuc/cli/ngs_bam2fq.py b/fuc/cli/ngs_bam2fq.py
index 7561c20..3e2bcb8 100644
--- a/fuc/cli/ngs_bam2fq.py
+++ b/fuc/cli/ngs_bam2fq.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 
-description = f"""
+description = """
 Pipeline for converting BAM files to FASTQ files.
 
 This pipeline will assume input BAM files consist of paired-end reads
diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index 8ae3588..ffdd5f4 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 
-description = f"""
+description = """
 Pipeline for converting FASTQ files to analysis-ready BAM files.
 
 Here, "analysis-ready" means that the final BAM files are: 1) aligned to a
diff --git a/fuc/cli/ngs_pon.py b/fuc/cli/ngs_pon.py
index 459942b..2cb0ed1 100644
--- a/fuc/cli/ngs_pon.py
+++ b/fuc/cli/ngs_pon.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 
-description = f"""
+description = """
 Pipeline for constructing a panel of normals (PoN).
 
 Dependencies:
diff --git a/fuc/cli/ngs_quant.py b/fuc/cli/ngs_quant.py
index 28d41de..a0f9196 100644
--- a/fuc/cli/ngs_quant.py
+++ b/fuc/cli/ngs_quant.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 
-description = f"""
+description = """
 Pipeline for running RNAseq quantification from FASTQ files with Kallisto.
 
 External dependencies:
diff --git a/fuc/cli/ngs_trim.py b/fuc/cli/ngs_trim.py
index e44a5b5..4384fc3 100644
--- a/fuc/cli/ngs_trim.py
+++ b/fuc/cli/ngs_trim.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 
-description = f"""
+description = """
 Pipeline for trimming adapters from FASTQ files.
 
 External dependencies:
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index a055695..d0d27c5 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -2,16 +2,20 @@
 
 from .. import api
 
-description = f"""
-Call variants (SNVs/indels) from BAM files.
+description = """
+Call SNVs and indels from BAM files.
 
-This command will run a fully customizable, bcftools-based pipeline for
-calling and filtering variants.
+Under the hood, the command utilizes the bcftool program to call variants.
 """
 
 epilog = f"""
 [Example] Specify regions manually:
-  $ fuc {api.common._script_name()} ref.fa in1.bam in2.bam -r chr1:100-200 chr2:300-400 > out.vcf
+  $ fuc {api.common._script_name()} ref.fa 1.bam 2.bam \\
+  -r chr1:100-200 chr2:400-500 > out.vcf
+
+[Example] Specify regions with a BED file:
+  $ fuc {api.common._script_name()} ref.fa bam.list \\
+  -r in.bed > out.vcf
 """
 
 def create_parser(subparsers):
@@ -20,48 +24,59 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Call variants (SNVs/indels) from BAM files.',
+        help=
+"""Call SNVs and indels from BAM files."""
     )
     parser.add_argument(
         'fasta',
-        help="Reference FASTA file."
+        help=
+"""Reference FASTA file."""
     )
     parser.add_argument(
         'bams',
         nargs='+',
-        help="One or more input BAM files. Alternatively, you can \n"
-             "provide a text file (.txt, .tsv, .csv, or .list) \n"
-             "containing one BAM file per line."
+        help=
+"""One or more input BAM files. Alternatively, you can
+provide a text file (.txt, .tsv, .csv, or .list)
+containing one BAM file per line."""
     )
     parser.add_argument(
+        '-r',
         '--regions',
         nargs='+',
         metavar='TEXT',
-        help="Only call variants in given regions. Each region must \n"
-             "have the format chrom:start-end and be a half-open \n"
-             "interval with (start, end]. This means, for example, \n"
-             "chr1:100-103 will extract positions 101, 102, and \n"
-             "103. Alternatively, you can provide a BED file \n"
-             "(compressed or uncompressed) to specify regions. Note \n"
-             "that the 'chr' prefix in contig names (e.g. 'chr1' \n"
-             "vs. '1') will be automatically added or removed as \n"
-             "necessary to match the input VCF's contig names."
+        help=
+"""By default, the command looks at each genomic
+position with coverage in BAM files, which can be
+excruciatingly slow for large files (e.g. whole
+genome sequencing). Therefore, use this argument to
+only call variants in given regions. Each region must
+have the format chrom:start-end and be a half-open
+interval with (start, end]. This means, for example,
+chr1:100-103 will extract positions 101, 102, and
+103. Alternatively, you can provide a BED file
+(compressed or uncompressed) to specify regions. Note
+that the 'chr' prefix in contig names (e.g. 'chr1'
+vs. '1') will be automatically added or removed as
+necessary to match the input BAM's contig names."""
     )
     parser.add_argument(
         '--min-mq',
         metavar='INT',
         type=int,
         default=1,
-        help="Minimum mapping quality for an alignment to be used \n"
-             "(default: 1)."
+        help=
+"""Minimum mapping quality for an alignment to be used
+(default: 1)."""
     )
     parser.add_argument(
         '--max-depth',
         metavar='INT',
         type=int,
         default=250,
-        help="At a position, read maximally this number of reads \n"
-             "per input file (default: 250)."
+        help=
+"""At a position, read maximally this number of reads
+per input file (default: 250)."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_merge.py b/fuc/cli/vcf_merge.py
index 0f7a05b..60d5353 100644
--- a/fuc/cli/vcf_merge.py
+++ b/fuc/cli/vcf_merge.py
@@ -2,7 +2,7 @@
 
 from .. import api
 
-description = f"""
+description = """
 Merge two or more VCF files.
 """
 
diff --git a/fuc/cli/vcf_slice.py b/fuc/cli/vcf_slice.py
index 3acfba2..388fcea 100644
--- a/fuc/cli/vcf_slice.py
+++ b/fuc/cli/vcf_slice.py
@@ -2,7 +2,7 @@
 
 from .. import api
 
-description = f"""
+description = """
 Slice a VCF file for specified regions.
 """
 
diff --git a/fuc/cli/vcf_split.py b/fuc/cli/vcf_split.py
index 8761a09..8d14dd8 100644
--- a/fuc/cli/vcf_split.py
+++ b/fuc/cli/vcf_split.py
@@ -4,7 +4,7 @@
 
 from .. import api
 
-description = f"""
+description = """
 Split a VCF file by individual.
 """
 
diff --git a/fuc/cli/vcf_vep.py b/fuc/cli/vcf_vep.py
index eb1774a..969b21f 100644
--- a/fuc/cli/vcf_vep.py
+++ b/fuc/cli/vcf_vep.py
@@ -2,7 +2,7 @@
 
 from .. import api
 
-description = f"""
+description = """
 Filter a VCF file by annotations from Ensembl VEP.
 """
 

From 2e596b84fc7ff8b987726c6c1e9efc37bad1e7bc Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Sat, 12 Feb 2022 15:26:11 +0900
Subject: [PATCH 11/22] Update `pyvcf.call`

---
 fuc/api/pyvcf.py    | 35 ++++++++++++++++++++++++-----------
 fuc/cli/vcf_call.py |  2 +-
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 1fe7431..4b643e4 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -64,7 +64,7 @@
 import warnings
 import tempfile
 
-from . import pybed, common, pymaf
+from . import pybed, common, pymaf, pybam
 
 import numpy as np
 import pandas as pd
@@ -138,7 +138,7 @@
 }
 
 def call(
-    fasta, bams, path=None, regions=None, min_mq=1, max_depth=250
+    fasta, bams, regions=None, path=None, min_mq=1, max_depth=250
 ):
     """
     Call SNVs and indels from BAM files.
@@ -152,9 +152,6 @@ def call(
     bams : str or list
         One or more input BAM files. Alternatively, you can provide a text
         file (.txt, .tsv, .csv, or .list) containing one BAM file per line.
-    path : str, optional
-        Output VCF file. Writes to stdout when ``path='-'``. If None is
-        provided the result is returned as a string.
     regions : str, list, or pybed.BedFrame, optional
         By default (``regions=None``), the method looks at each genomic
         position with coverage in BAM files, which can be excruciatingly slow
@@ -167,6 +164,9 @@ def call(
         to specify regions. Note that the 'chr' prefix in contig names (e.g.
         'chr1' vs. '1') will be automatically added or removed as necessary
         to match the input VCF's contig names.
+    path : str, optional
+        Output VCF file. Writes to stdout when ``path='-'``. If None is
+        provided the result is returned as a string.
     min_mq : int, default: 1
         Minimum mapping quality for an alignment to be used.
     max_depth : int, default: 250
@@ -180,14 +180,28 @@ def call(
     # Parse input BAM files.
     bams = common.parse_list_or_file(bams)
 
+    # Check the 'chr' prefix.
+    if all([pybam.has_chr_prefix(x) for x in bams]):
+        chr_prefix = 'chr'
+    else:
+        chr_prefix = ''
+
     # Parse target regions, if provided.
     if regions is not None:
         if isinstance(regions, str):
             regions = [regions]
+        elif isinstance(regions, list):
+            pass
+        elif isinstance(regions, pybed.BedFrame):
+            regions = bf.to_regions()
+        else:
+            raise TypeError("Incorrect type of argument 'regions'")
         if '.bed' in regions[0]:
-            parsed_regions = ['-R', regions[0]]
+            bf = pybed.BedFrame.from_file(regions[0])
+            regions = bf.to_regions()
         else:
-            parsed_regions = ['-r'] + regions
+            regions = common.sort_regions(regions)
+        regions = [chr_prefix + x.replace('chr', '') for x in regions]
 
     with tempfile.TemporaryDirectory() as t:
         # Step 1: Get genotype likelihoods.
@@ -195,10 +209,9 @@ def call(
         args += ['-q', str(min_mq)]
         args += ['--max-depth', str(max_depth)]
         args += ['-f', fasta]
-        if parsed_regions is not None:
-            args += parsed_regions
-        args += bams
-        results = bcftools.mpileup(*args)
+        if regions is not None:
+            args += ['-r', ','.join(regions)]
+        results = bcftools.mpileup(*(args + bams))
         with open(f'{t}/likelihoods.bcf', 'wb') as f:
             f.write(results)
 
diff --git a/fuc/cli/vcf_call.py b/fuc/cli/vcf_call.py
index d0d27c5..b200783 100644
--- a/fuc/cli/vcf_call.py
+++ b/fuc/cli/vcf_call.py
@@ -81,6 +81,6 @@ def create_parser(subparsers):
 
 def main(args):
     api.pyvcf.call(
-        args.fasta, args.bams, path='-', regions=args.regions,
+        args.fasta, args.bams, regions=args.regions, path='-',
         min_mq=args.min_mq, max_depth=args.max_depth
     )

From 66fcfb859fd31fd0d1b944eb21ee16ad19595d82 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 15 Feb 2022 10:29:08 +0900
Subject: [PATCH 12/22] Rename `pyvcf.VcfFrame.variants` to
 `pyvcf.VcfFrame.to_variants`

---
 CHANGELOG.rst    |   1 +
 fuc/api/pyvcf.py | 204 +++++++++++++++++++++++------------------------
 2 files changed, 103 insertions(+), 102 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 97c1149..77d36ea 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,6 +9,7 @@ Changelog
 * Combine optional arguments ``bam`` and ``fn`` into single positional argument ``bams`` for :meth:`pycov.CovFrame.from_bam` method. The same goes for :command:`bam-depth` command (combine ``--bam`` and ``--fn`` into ``bams``).
 * Combine optional arguments ``bed`` and ``region`` into single optional argument ``regions`` for :meth:`pycov.CovFrame.from_bam` method. The same goes for :command:`bam-depth` command (combine ``--bed`` and ``--region`` into ``--regions``).
 * Update :meth:`pycov.CovFrame.from_bam` method and :command:`bam-depth` command to automatically handle the 'chr' string.
+* Rename :meth:`pyvcf.VcfFrame.variants` method to :meth:`pyvcf.VcfFrame.to_variants`.
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 4b643e4..18ab30f 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -2344,6 +2344,70 @@ def copy(self):
         """Return a copy of the VcfFrame."""
         return self.__class__(self.copy_meta(), self.copy_df())
 
+    def to_bed(self):
+        """Write BedFrame from the VcfFrame.
+
+        Returns
+        -------
+        BedFrame
+            BedFrame.
+
+        Examples
+        --------
+        Assume we have the following data:
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102, 103, 104],
+        ...     'ID': ['.', '.', '.', '.', '.'],
+        ...     'REF': ['A', 'A', 'C', 'C', 'ACGT'],
+        ...     'ALT': ['C', 'T,G', 'G', 'A,G,CT', 'A'],
+        ...     'QUAL': ['.', '.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.', '.'],
+        ...     'INFO': ['.', '.', '.', '.', '.'],
+        ...     'FORMAT': ['GT:DP', 'GT:DP', 'GT:DP', 'GT:DP', 'GT:DP'],
+        ...     'Steven': ['0/1:32', './.:.', '0/1:27', '0/2:34', '0/0:31'],
+        ...     'Sara': ['0/0:28', '1/2:30', '1/1:29', '1/2:38', '0/1:27'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID   REF     ALT QUAL FILTER INFO FORMAT  Steven    Sara
+        0  chr1  100  .     A       C    .      .    .  GT:DP  0/1:32  0/0:28
+        1  chr1  101  .     A     T,G    .      .    .  GT:DP   ./.:.  1/2:30
+        2  chr1  102  .     C       G    .      .    .  GT:DP  0/1:27  1/1:29
+        3  chr1  103  .     C  A,G,CT    .      .    .  GT:DP  0/2:34  1/2:38
+        4  chr1  104  .  ACGT       A    .      .    .  GT:DP  0/0:31  0/1:27
+
+        We can construct BedFrame from the VcfFrame:
+
+        >>> bf = vf.to_bed()
+        >>> bf.gr.df
+          Chromosome  Start  End
+        0       chr1    100  100
+        1       chr1    101  101
+        2       chr1    102  102
+        3       chr1    103  103
+        4       chr1    103  104
+        5       chr1    105  107
+        """
+        def one_row(r):
+            if len(r.REF) == len(r.ALT) == 1:
+                start = r.POS
+                end = r.POS
+            elif len(r.REF) > len(r.ALT):
+                start = r.POS + 1
+                end = r.POS + len(r.REF) - len(r.ALT)
+            else:
+                start = r.POS
+                end = r.POS + 1
+            return pd.Series([r.CHROM, start, end])
+        df = self.expand().df.apply(one_row, axis=1)
+        df.columns = ['Chromosome', 'Start', 'End']
+        df = df.drop_duplicates()
+        bf = pybed.BedFrame.from_frame([], df)
+        return bf
+
     def to_file(self, fn, compression=False):
         """
         Write VcfFrame to a VCF file.
@@ -2425,6 +2489,44 @@ def to_string(self):
             ).to_csv(index=False, sep='\t')
         return s
 
+    def to_variants(self):
+        """
+        List unique variants in VcfFrame.
+
+        Returns
+        -------
+        list
+            List of unique variants.
+
+        Examples
+        --------
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr2'],
+        ...     'POS': [100, 101],
+        ...     'ID': ['.', '.'],
+        ...     'REF': ['G', 'T'],
+        ...     'ALT': ['A', 'A,C'],
+        ...     'QUAL': ['.', '.'],
+        ...     'FILTER': ['.', '.'],
+        ...     'INFO': ['.', '.'],
+        ...     'FORMAT': ['GT', 'GT'],
+        ...     'A': ['0/1', '1/2']
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.variants()
+        ['chr1-100-G-A', 'chr2-101-T-A', 'chr2-101-T-C']
+        """
+        def one_row(r):
+            result = []
+            for alt in r.ALT.split(','):
+                result.append(f'{r.CHROM}-{r.POS}-{r.REF}-{alt}')
+            return ','.join(result)
+        s = self.df.apply(one_row, axis=1)
+        s = ','.join(s)
+        return s.split(',')
+
     def strip(self, format='GT', metadata=False):
         """
         Remove any unnecessary data.
@@ -4970,70 +5072,6 @@ def slice(self, region):
             df = df[df.POS <= end]
         return self.__class__(self.copy_meta(), df)
 
-    def to_bed(self):
-        """Write BedFrame from the VcfFrame.
-
-        Returns
-        -------
-        BedFrame
-            BedFrame.
-
-        Examples
-        --------
-        Assume we have the following data:
-
-        >>> from fuc import pyvcf
-        >>> data = {
-        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
-        ...     'POS': [100, 101, 102, 103, 104],
-        ...     'ID': ['.', '.', '.', '.', '.'],
-        ...     'REF': ['A', 'A', 'C', 'C', 'ACGT'],
-        ...     'ALT': ['C', 'T,G', 'G', 'A,G,CT', 'A'],
-        ...     'QUAL': ['.', '.', '.', '.', '.'],
-        ...     'FILTER': ['.', '.', '.', '.', '.'],
-        ...     'INFO': ['.', '.', '.', '.', '.'],
-        ...     'FORMAT': ['GT:DP', 'GT:DP', 'GT:DP', 'GT:DP', 'GT:DP'],
-        ...     'Steven': ['0/1:32', './.:.', '0/1:27', '0/2:34', '0/0:31'],
-        ...     'Sara': ['0/0:28', '1/2:30', '1/1:29', '1/2:38', '0/1:27'],
-        ... }
-        >>> vf = pyvcf.VcfFrame.from_dict([], data)
-        >>> vf.df
-          CHROM  POS ID   REF     ALT QUAL FILTER INFO FORMAT  Steven    Sara
-        0  chr1  100  .     A       C    .      .    .  GT:DP  0/1:32  0/0:28
-        1  chr1  101  .     A     T,G    .      .    .  GT:DP   ./.:.  1/2:30
-        2  chr1  102  .     C       G    .      .    .  GT:DP  0/1:27  1/1:29
-        3  chr1  103  .     C  A,G,CT    .      .    .  GT:DP  0/2:34  1/2:38
-        4  chr1  104  .  ACGT       A    .      .    .  GT:DP  0/0:31  0/1:27
-
-        We can construct BedFrame from the VcfFrame:
-
-        >>> bf = vf.to_bed()
-        >>> bf.gr.df
-          Chromosome  Start  End
-        0       chr1    100  100
-        1       chr1    101  101
-        2       chr1    102  102
-        3       chr1    103  103
-        4       chr1    103  104
-        5       chr1    105  107
-        """
-        def one_row(r):
-            if len(r.REF) == len(r.ALT) == 1:
-                start = r.POS
-                end = r.POS
-            elif len(r.REF) > len(r.ALT):
-                start = r.POS + 1
-                end = r.POS + len(r.REF) - len(r.ALT)
-            else:
-                start = r.POS
-                end = r.POS + 1
-            return pd.Series([r.CHROM, start, end])
-        df = self.expand().df.apply(one_row, axis=1)
-        df.columns = ['Chromosome', 'Start', 'End']
-        df = df.drop_duplicates()
-        bf = pybed.BedFrame.from_frame([], df)
-        return bf
-
     def extract_format(self, k, func=None, as_nan=False):
         """
         Extract data for the specified FORMAT key.
@@ -6070,41 +6108,3 @@ def get_af(self, sample, variant):
             af = float(field.split(',')[j+1])
 
         return af
-
-    def variants(self):
-        """
-        List unique variants in VcfFrame.
-
-        Returns
-        -------
-        list
-            List of unique variants.
-
-        Examples
-        --------
-
-        >>> from fuc import pyvcf
-        >>> data = {
-        ...     'CHROM': ['chr1', 'chr2'],
-        ...     'POS': [100, 101],
-        ...     'ID': ['.', '.'],
-        ...     'REF': ['G', 'T'],
-        ...     'ALT': ['A', 'A,C'],
-        ...     'QUAL': ['.', '.'],
-        ...     'FILTER': ['.', '.'],
-        ...     'INFO': ['.', '.'],
-        ...     'FORMAT': ['GT', 'GT'],
-        ...     'A': ['0/1', '1/2']
-        ... }
-        >>> vf = pyvcf.VcfFrame.from_dict([], data)
-        >>> vf.variants()
-        ['chr1-100-G-A', 'chr2-101-T-A', 'chr2-101-T-C']
-        """
-        def one_row(r):
-            result = []
-            for alt in r.ALT.split(','):
-                result.append(f'{r.CHROM}-{r.POS}-{r.REF}-{alt}')
-            return ','.join(result)
-        s = self.df.apply(one_row, axis=1)
-        s = ','.join(s)
-        return s.split(',')

From 5b98ffa2a558b862401f4de1c506ddc6a8926e5e Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 15 Feb 2022 10:58:52 +0900
Subject: [PATCH 13/22] Update docs

---
 fuc/api/pybed.py    |  3 ++-
 fuc/api/pyfq.py     |  3 ++-
 fuc/api/pymaf.py    | 17 +++++++++++------
 fuc/api/pysnpeff.py |  9 ++++++---
 fuc/api/pyvcf.py    | 41 +++++++++++++++++++++++++++--------------
 fuc/api/pyvep.py    | 15 ++++++++++-----
 6 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/fuc/api/pybed.py b/fuc/api/pybed.py
index 6383dab..d31b942 100644
--- a/fuc/api/pybed.py
+++ b/fuc/api/pybed.py
@@ -48,7 +48,8 @@
 ]
 
 class BedFrame:
-    """Class for storing BED data.
+    """
+    Class for storing BED data.
 
     Parameters
     ----------
diff --git a/fuc/api/pyfq.py b/fuc/api/pyfq.py
index f88262b..a3e1d8b 100644
--- a/fuc/api/pyfq.py
+++ b/fuc/api/pyfq.py
@@ -30,7 +30,8 @@ def readlen(self):
 
     @classmethod
     def from_file(cls, fn):
-        """Construct FqFrame from a FASTQ file.
+        """
+        Construct FqFrame from a FASTQ file.
 
         Parameters
         ----------
diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index 4485bab..dfbe434 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -198,7 +198,8 @@
 SNV_CLASS_ORDER = ['C>A', 'C>G', 'C>T', 'T>A', 'T>C', 'T>G']
 
 class MafFrame:
-    """Class for storing MAF data.
+    """
+    Class for storing MAF data.
 
     Parameters
     ----------
@@ -2367,7 +2368,8 @@ def plot_summary(
         legend_fontsize=12
 
     ):
-        """Create a summary figure for MafFrame.
+        """
+        Create a summary figure for MafFrame.
 
         Parameters
         ----------
@@ -2709,7 +2711,8 @@ def plot_vaf(
         return ax
 
     def plot_varcls(self, ax=None, figsize=None, **kwargs):
-        """Create a bar plot for the nonsynonymous variant classes.
+        """
+        Create a bar plot for the nonsynonymous variant classes.
 
         Parameters
         ----------
@@ -3350,7 +3353,7 @@ def to_vcf(
         Returns
         -------
         VcfFrame
-            The VcfFrame object.
+            VcfFrame object.
 
         Examples
         --------
@@ -3454,7 +3457,8 @@ def one_row(r):
         return vf
 
     def to_file(self, fn):
-        """Write MafFrame to a MAF file.
+        """
+        Write MafFrame to a MAF file.
 
         Parameters
         ----------
@@ -3465,7 +3469,8 @@ def to_file(self, fn):
             f.write(self.to_string())
 
     def to_string(self):
-        """Render MafFrame to a console-friendly tabular output.
+        """
+        Render MafFrame to a console-friendly tabular output.
 
         Returns
         -------
diff --git a/fuc/api/pysnpeff.py b/fuc/api/pysnpeff.py
index f37ae2b..1177d54 100644
--- a/fuc/api/pysnpeff.py
+++ b/fuc/api/pysnpeff.py
@@ -31,7 +31,8 @@
 """
 
 def row_firstann(r):
-    """Return the first SnpEff annotation for the row."""
+    """
+    Return the first SnpEff annotation for the row."""
     ann = [x for x in r.INFO.split(';') if 'ANN=' in x]
     if not ann:
         return ''
@@ -39,7 +40,8 @@ def row_firstann(r):
     return ann
 
 def filter_ann(vf, targets, include=True):
-    """Filter out rows based on the SnpEff annotations.
+    """
+    Filter out rows based on the SnpEff annotations.
 
     Parameters
     ----------
@@ -71,7 +73,8 @@ def func(r):
     return vf
 
 def parseann(vf, idx, sep=' | '):
-    """Parse SnpEff annotations.
+    """
+    Parse SnpEff annotations.
 
     Parameters
     ----------
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 18ab30f..67282c7 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -381,7 +381,8 @@ def gt_miss(g):
     return '.' in g.split(':')[0]
 
 def gt_polyp(g):
-    """Return True if sample genotype has a polyploid call.
+    """
+    Return True if sample genotype has a polyploid call.
 
     Parameters
     ----------
@@ -1534,7 +1535,8 @@ def one_gt(g):
         return self.__class__(self.copy_meta(), df)
 
     def add_dp(self):
-        """Compute DP using AD and add it to the FORMAT field.
+        """
+        Compute DP using AD and add it to the FORMAT field.
 
         Returns
         -------
@@ -1594,7 +1596,8 @@ def infunc(x):
         return vf
 
     def add_flag(self, flag, order='last', index=None):
-        """Add the given flag to the INFO field.
+        """
+        Add the given flag to the INFO field.
 
         The default behavior is to add the flag to all rows in the VcfFrame.
 
@@ -1697,7 +1700,8 @@ def f(r):
         return vf
 
     def cfilter_empty(self, opposite=False, as_list=False):
-        """Remove samples whose genotype calls are all missing.
+        """
+        Remove samples whose genotype calls are all missing.
 
         Parameters
         ----------
@@ -1773,7 +1777,8 @@ def cfilter_empty(self, opposite=False, as_list=False):
         return self.subset(l)
 
     def collapse(self):
-        """Collapse duplicate records in the VcfFrame.
+        """
+        Collapse duplicate records in the VcfFrame.
 
         Duplicate records have the identical values for CHROM, POS, and REF.
         They can result from merging two VCF files.
@@ -2345,12 +2350,13 @@ def copy(self):
         return self.__class__(self.copy_meta(), self.copy_df())
 
     def to_bed(self):
-        """Write BedFrame from the VcfFrame.
+        """
+        Convert VcfFrame to BedFrame.
 
         Returns
         -------
         BedFrame
-            BedFrame.
+            BedFrame obejct.
 
         Examples
         --------
@@ -3469,7 +3475,8 @@ def one_gt(g):
         return vf
 
     def expand(self):
-        """Expand each multiallelic locus to multiple rows.
+        """
+        Expand each multiallelic locus to multiple rows.
 
         Only the GT subfield of FORMAT will be retained.
 
@@ -3806,7 +3813,8 @@ def filter_indel(self, opposite=False, as_index=False):
         return self.__class__(self.copy_meta(), self.df[i])
 
     def filter_flagall(self, flags, opposite=False, as_index=False):
-        """Select rows if all of the given INFO flags are present.
+        """
+        Select rows if all of the given INFO flags are present.
 
         Parameters
         ----------
@@ -3889,7 +3897,8 @@ def f(r):
         return self.__class__(self.copy_meta(), self.df[i])
 
     def filter_flagany(self, flags, opposite=False, as_index=False):
-        """Select rows if any one of the given INFO flags is present.
+        """
+        Select rows if any one of the given INFO flags is present.
 
         Parameters
         ----------
@@ -4047,7 +4056,8 @@ def filter_multialt(self, opposite=False, as_index=False):
         return self.__class__(self.copy_meta(), self.df[i])
 
     def filter_pass(self, opposite=False, as_index=False):
-        """Select rows with PASS in the FILTER field.
+        """
+        Select rows with PASS in the FILTER field.
 
         Parameters
         ----------
@@ -4268,7 +4278,8 @@ def filter_polyp(self, opposite=False, as_index=False):
         return self.__class__(self.copy_meta(), self.df[i])
 
     def filter_qual(self, threshold, opposite=False, as_index=False):
-        """Select rows with minimum QUAL value.
+        """
+        Select rows with minimum QUAL value.
 
         Parameters
         ----------
@@ -4348,7 +4359,8 @@ def one_row(r):
         return self.__class__(self.copy_meta(), self.df[i])
 
     def filter_sampall(self, samples=None, opposite=False, as_index=False):
-        """Select rows if all of the given samples have the variant.
+        """
+        Select rows if all of the given samples have the variant.
 
         The default behavior is to use all samples in the VcfFrame.
 
@@ -4544,7 +4556,8 @@ def filter_sampany(self, samples=None, opposite=False, as_index=False):
         return self.__class__(self.copy_meta(), self.df[i])
 
     def filter_sampnum(self, threshold, opposite=False, as_index=False):
-        """Select rows if the variant is prevalent enough.
+        """
+        Select rows if the variant is prevalent enough.
 
         Parameters
         ----------
diff --git a/fuc/api/pyvep.py b/fuc/api/pyvep.py
index c29404b..69906c4 100644
--- a/fuc/api/pyvep.py
+++ b/fuc/api/pyvep.py
@@ -156,7 +156,8 @@
 ]
 
 def row_firstann(r):
-    """Return the first result in the row.
+    """
+    Return the first result in the row.
 
     Parameters
     ----------
@@ -208,7 +209,8 @@ def row_firstann(r):
     return results.split(',')[0]
 
 def row_mostsevere(r):
-    """Return result with the most severe consequence in the row.
+    """
+    Return result with the most severe consequence in the row.
 
     Parameters
     ----------
@@ -260,7 +262,8 @@ def row_mostsevere(r):
     return sorted(results.split(','), key=f)[0]
 
 def parseann(vf, targets, sep=' | ', as_series=False):
-    """Parse variant annotations in VcfFrame.
+    """
+    Parse variant annotations in VcfFrame.
 
     If there are multiple results per row, the method will use the first
     one for extracting requested data.
@@ -391,7 +394,8 @@ def to_frame(vf, as_zero=False):
     return df
 
 def pick_result(vf, mode='mostsevere'):
-    """Return a new VcfFrame after picking one result per row.
+    """
+    Return a new VcfFrame after picking one result per row.
 
     Parameters
     ----------
@@ -456,7 +460,8 @@ def pick_result(vf, mode='mostsevere'):
     return new_vf
 
 def annot_names(vf):
-    """Return the list of avaialble consequence annotations in the VcfFrame.
+    """
+    Return the list of avaialble consequence annotations in the VcfFrame.
 
     Parameters
     ----------

From 72c5e40adb6b2fca7aaff0c3be66c7805768938d Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 15 Feb 2022 15:25:28 +0900
Subject: [PATCH 14/22] Update `pyvcf.row_updateinfo`

---
 CHANGELOG.rst    |   1 +
 fuc/api/pyvcf.py | 260 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 198 insertions(+), 63 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 77d36ea..152cbe1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -10,6 +10,7 @@ Changelog
 * Combine optional arguments ``bed`` and ``region`` into single optional argument ``regions`` for :meth:`pycov.CovFrame.from_bam` method. The same goes for :command:`bam-depth` command (combine ``--bed`` and ``--region`` into ``--regions``).
 * Update :meth:`pycov.CovFrame.from_bam` method and :command:`bam-depth` command to automatically handle the 'chr' string.
 * Rename :meth:`pyvcf.VcfFrame.variants` method to :meth:`pyvcf.VcfFrame.to_variants`.
+* Add new optional arguments ``force`` and ``missing`` to :meth:`pyvcf.row_updateinfo` method.
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 67282c7..9bbb980 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -12,45 +12,117 @@
 identifier (e.g. 'chr1'). See the VCF specification above for an example
 VCF file.
 
-Genotype lines have nine required fields for storing variant information
-and variable-length fields for storing sample genotype data. For some
-fields, missing values are tolerated and can be specified with a dot ('.').
-The nine required fields are:
-
-+-----+--------+------------------------------------+------------------------+
-| No. | Name   | Description                        | Examples               |
-+=====+========+====================================+========================+
-| 1   | CHROM  | Chromosome or contig identifier    | 'chr2', '2', 'chrM'    |
-+-----+--------+------------------------------------+------------------------+
-| 2   | POS    | 1-based reference position         | 10041, 23042           |
-+-----+--------+------------------------------------+------------------------+
-| 3   | ID     | ';'-separated variant identifiers  | '.', 'rs35', 'rs9;rs53'|
-+-----+--------+------------------------------------+------------------------+
-| 4   | REF    | Reference allele                   | 'A', 'GT'              |
-+-----+--------+------------------------------------+------------------------+
-| 5   | ALT    | ','-separated alternate alleles    | 'T', 'ACT', 'C,T'      |
-+-----+--------+------------------------------------+------------------------+
-| 6   | QUAL   | Phred-scaled quality score for ALT | '.', 67, 12            |
-+-----+--------+------------------------------------+------------------------+
-| 7   | FILTER | ';'-separated filters that failed  | '.', 'PASS', 'q10;s50' |
-+-----+--------+------------------------------------+------------------------+
-| 8   | INFO   | ';'-separated information fields   | '.', 'DP=14;AF=0.5;DB' |
-+-----+--------+------------------------------------+------------------------+
-| 9   | FORMAT | ':'-separated genotype fields      | 'GT', 'GT:AD:DP'       |
-+-----+--------+------------------------------------+------------------------+
-
-You will sometimes come across VCFs that have only eight columns, and contain
-no FORMAT or sample-specific information. These are called "sites-only" VCFs,
-and represent variation that has been observed in a population. Generally,
-information about the population of origin should be included in the header.
-
-There are several common, reserved genotype keywords that are standards
-across the community. Currently, the pyvcf submodule is aware of the
-following:
-
-* AD - Total read depth for each allele (R, Integer)
-* AF - Allele fraction of the event in the tumor (1, Float)
-* DP - Read depth (1, Integer)
+Genotype lines usually consist of nine columns for storing variant
+information (all fixed and mandatory except FORMAT) plus additional columns
+for storing sample genotype data. For some columns, missing values are
+allowed and can be specified with a dot ('.'). The first nine columns are:
+
+.. list-table::
+   :header-rows: 1
+
+   * - No.
+     - Column
+     - Description
+     - Required
+     - Missing
+     - Examples
+   * - 1
+     - CHROM
+     - Chromosome or contig identifier
+     - ✅
+     - ❌
+     - 'chr2', '2', 'chrM'
+   * - 2
+     - POS
+     - 1-based reference position
+     - ✅
+     - ❌
+     - 10041, 23042
+   * - 3
+     - ID
+     - ';'-separated variant identifiers
+     - ✅
+     - ✅
+     - '.', 'rs35', 'rs9;rs53'
+   * - 4
+     - REF
+     - Reference allele
+     - ✅
+     - ❌
+     - 'A', 'GT'
+   * - 5
+     - ALT
+     - ','-separated alternate alleles
+     - ✅
+     - ❌
+     - 'T', 'ACT', 'C,T'
+   * - 6
+     - QUAL
+     - Phred-scaled quality score for ALT
+     - ✅
+     - ✅
+     - '.', 67, 12
+   * - 7
+     - FILTER
+     - ';'-separated filters that failed
+     - ✅
+     - ✅
+     - '.', 'PASS', 'q10;s50'
+   * - 8
+     - INFO
+     - ';'-separated information fields
+     - ✅
+     - ✅
+     - '.', 'DP=14;AF=0.5;DB'
+   * - 9
+     - FORMAT
+     - ':'-separated genotype fields
+     - ❌
+     - ❌
+     - 'GT', 'GT:AD:DP'
+
+You will sometimes come across VCF files that have only eight columns, and
+do not contain the FORMAT column or sample-specific information. These are
+called "sites-only" VCF files, and normally represent genetic variation that
+has been observed in a large population. Generally, information about the
+population of origin should be included in the header.
+
+There are several reserved keywords in the INFO and FORMAT columns that are
+standards across the community. Popular keywords are listed below:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Column
+     - Key
+     - Number
+     - Type
+     - Description
+   * - INFO
+     - AC
+     - A
+     - Integer
+     - Allele count in genotypes, for each ALT allele, in the same order as listed
+   * - INFO
+     - AF
+     - A
+     - Float
+     - Allele frequency for each ALT allele in the same order as listed (estimated from primary data, not called genotypes)
+   * - FORMAT
+     - AD
+     - R
+     - Integer
+     - Total read depth for each allele
+   * - FORMAT
+     - AF
+     - 1
+     - Float
+     - Allele fraction of the event in the tumor
+   * - FORMAT
+     - DP
+     - 1
+     - Integer
+     - Read depth
 
 If sample annotation data are available for a given VCF file, use
 the :class:`common.AnnFrame` class to import the data.
@@ -845,9 +917,9 @@ def one_gt(g):
         return '|' in g.split(':')[0]
     return r[9:].apply(one_gt).all()
 
-def row_updateinfo(r, key, value):
+def row_updateinfo(r, key, value, force=True, missing=False):
     """
-    Update INFO data in the row that match the given key.
+    Update INFO field matching specified key.
 
     Parameters
     ----------
@@ -857,6 +929,12 @@ def row_updateinfo(r, key, value):
         INFO key.
     value : str
         New value to be assigned.
+    force : bool, default: True
+        If True, overwrite any existing data.
+    missing : bool, default: False
+        By default (``missing=False``), the method will not update INFO field
+        if there is missing value ('.'). Setting ``missing=True`` will stop
+        this behavior.
 
     Returns
     -------
@@ -868,36 +946,62 @@ def row_updateinfo(r, key, value):
 
     >>> from fuc import pyvcf
     >>> data = {
-    ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
-    ...     'POS': [100, 101, 102, 103],
-    ...     'ID': ['.', '.', '.', '.'],
-    ...     'REF': ['G', 'T', 'A', 'C'],
-    ...     'ALT': ['A', 'C', 'T', 'A'],
-    ...     'QUAL': ['.', '.', '.', '.'],
-    ...     'FILTER': ['.', '.', '.', '.'],
-    ...     'INFO': ['DB;AC=0', 'DB;H2;AC=1', 'DB;H2;AC=1', '.'],
-    ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-    ...     'Steven': ['0/0', '0/1', '0/1', '0/0'],
+    ...     'CHROM': ['chr1', 'chr1', 'chr1'],
+    ...     'POS': [100, 101, 102],
+    ...     'ID': ['.', '.', '.'],
+    ...     'REF': ['G', 'T', 'A'],
+    ...     'ALT': ['A', 'C', 'T'],
+    ...     'QUAL': ['.', '.', '.'],
+    ...     'FILTER': ['.', '.', '.'],
+    ...     'INFO': ['DB;AC=1', 'DB', '.'],
+    ...     'FORMAT': ['GT', 'GT', 'GT'],
+    ...     'A': ['0/1', '1/1', '0/0'],
+    ...     'B': ['0/0', '0/1', '0/1'],
     ... }
     >>> vf = pyvcf.VcfFrame.from_dict([], data)
     >>> vf.df
-      CHROM  POS ID REF ALT QUAL FILTER        INFO FORMAT Steven
-    0  chr1  100  .   G   A    .      .     DB;AC=0     GT    0/0
-    1  chr1  101  .   T   C    .      .  DB;H2;AC=1     GT    0/1
-    2  chr1  102  .   A   T    .      .  DB;H2;AC=1     GT    0/1
-    3  chr1  103  .   C   A    .      .           .     GT    0/0
-    >>> vf.df.apply(pyvcf.row_updateinfo, args=('AC', '4'), axis=1)
-    0       DB;AC=4
-    1    DB;H2;AC=4
-    2    DB;H2;AC=4
-    3             .
-    dtype: object
+      CHROM  POS ID REF ALT QUAL FILTER     INFO FORMAT    A    B
+    0  chr1  100  .   G   A    .      .  DB;AC=1     GT  0/1  0/0
+    1  chr1  101  .   T   C    .      .       DB     GT  1/1  0/1
+    2  chr1  102  .   A   T    .      .        .     GT  0/0  0/1
+    >>> pyvcf.row_updateinfo(vf.df.iloc[0, :], 'AC', '100')
+    'DB;AC=100'
+    >>> pyvcf.row_updateinfo(vf.df.iloc[0, :], 'AC', '100', force=False)
+    'DB;AC=1'
+    >>> pyvcf.row_updateinfo(vf.df.iloc[1, :], 'AC', '100')
+    'DB;AC=100'
+    >>> pyvcf.row_updateinfo(vf.df.iloc[1, :], 'AC', '100', force=False)
+    'DB;AC=100'
+    >>> pyvcf.row_updateinfo(vf.df.iloc[2, :], 'AC', '100')
+    '.'
+    >>> pyvcf.row_updateinfo(vf.df.iloc[2, :], 'AC', '100', force=False)
+    '.'
+    >>> pyvcf.row_updateinfo(vf.df.iloc[2, :], 'AC', '100', missing=True)
+    'AC=100'
     """
+    data = f'{key}={value}'
+
+    if not missing and r.INFO == '.':
+        return r.INFO
+
     fields = r.INFO.split(';')
+    found = False
+
     for i, field in enumerate(fields):
         if field.startswith(f'{key}='):
-            fields[i] = field[:len(key)+1] + value
+            found = True
+            if force:
+                fields[i] = data
+            else:
+                pass
             break
+
+    if not found:
+        if r.INFO == '.':
+            fields = [data]
+        else:
+            fields.append(data)
+
     return ';'.join(fields)
 
 def row_missval(r):
@@ -1595,6 +1699,36 @@ def infunc(x):
         vf = self.__class__(self.copy_meta(), df)
         return vf
 
+    def add_info_ac(self, force=True):
+        """
+        Add the AC field to the INFO column.
+
+        Parameters
+        ----------
+        force : bool, default: True
+            If True, overwrite any existing data.
+
+        Returns
+        -------
+        VcfFrame
+            Updated VcfFrame.
+        """
+        def one_row(r):
+            data = []
+            for i, allele in enumerate(r.ALT.split(',')):
+                count = r[9:].apply(lambda x: x.split('/').count(str(i)))
+                data.append(str(sum(count)))
+            data = 'AC=' + ','.join(data)
+            if r.INFO == '.':
+                r.INFO = data
+            else:
+                r.INFO += ';' + data
+            return r
+
+        df = self.df.apply(one_row, axis=1)
+
+        return self.__class__(self.copy_meta(), df)
+
     def add_flag(self, flag, order='last', index=None):
         """
         Add the given flag to the INFO field.

From 6f1e149a391a40a557d58c99b7c43191400aea37 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 15 Feb 2022 15:38:27 +0900
Subject: [PATCH 15/22] Update docs

---
 fuc/api/pyvep.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fuc/api/pyvep.py b/fuc/api/pyvep.py
index 69906c4..9006495 100644
--- a/fuc/api/pyvep.py
+++ b/fuc/api/pyvep.py
@@ -415,6 +415,7 @@ def pick_result(vf, mode='mostsevere'):
     Examples
     --------
 
+    >>> from fuc import pyvep, pyvcf
     >>> meta = [
     ...     '##fileformat=VCFv4.1',
     ...     '##VEP="v104" time="2021-05-20 10:50:12" cache="/net/isilonP/public/ro/ensweb-data/latest/tools/grch37/e104/vep/cache/homo_sapiens/104_GRCh37" db="homo_sapiens_core_104_37@hh-mysql-ens-grch37-web" 1000genomes="phase3" COSMIC="92" ClinVar="202012" HGMD-PUBLIC="20204" assembly="GRCh37.p13" dbSNP="154" gencode="GENCODE 19" genebuild="2011-04" gnomAD="r2.1" polyphen="2.2.2" regbuild="1.0" sift="sift5.2.2"',
@@ -441,16 +442,16 @@ def pick_result(vf, mode='mostsevere'):
     >>> vf1 = pyvep.pick_result(vf, mode='mostsevere')
     >>> vf2 = pyvep.pick_result(vf, mode='firstann')
     >>> vf1.df.INFO
-    0    CSQ=C|missense_variant|MODERATE|SPEN|ENSG00000...
-    1    CSQ=T|missense_variant|MODERATE|NRAS|ENSG00000...
-    2    CSQ=A|stop_gained&NMD_transcript_variant|HIGH|...
-    3    CSQ=-|inframe_deletion|MODERATE|HRAS|ENSG00000...
+    0    CSQ=C|missense_variant|MODERATE|SPEN|ENSG00000065526|Transcript|ENST00000375759.3|protein_coding|12/15||||10322|10118|3373|Q/P|cAg/cCg|||1||HGNC|17575|||||tolerated_low_confidence(0.08)|possibly_damaging(0.718)||||||||||
+    1           CSQ=T|missense_variant|MODERATE|NRAS|ENSG00000213281|Transcript|ENST00000369535.4|protein_coding|3/7||||502|248|83|A/D|gCc/gAc|COSV99795232||-1||HGNC|7989|||||deleterious(0.02)|probably_damaging(0.946)|||1|1||||||
+    2                        CSQ=A|stop_gained&NMD_transcript_variant|HIGH|NTRK1|ENSG00000198400|Transcript|ENST00000497019.2|nonsense_mediated_decay|10/16||||1183|1032|344|W/*|tgG/tgA|COSV62328771||1||HGNC|8031|||||||||1|1||||||
+    3                                         CSQ=-|inframe_deletion|MODERATE|HRAS|ENSG00000174775|Transcript|ENST00000311189.7|protein_coding|2/6||||198-200|26-28|9-10|VG/G|gTGGgc/ggc|rs1164486792||-1||HGNC|5173||||||||||1||||||
     Name: INFO, dtype: object
     >>> vf2.df.INFO
-    0    CSQ=C|downstream_gene_variant|MODIFIER|ZBTB17|...
-    1    CSQ=T|downstream_gene_variant|MODIFIER|CSDE1|E...
-    2    CSQ=A|missense_variant|MODERATE|NTRK1|ENSG0000...
-    3    CSQ=-|upstream_gene_variant|MODIFIER|LRRC56|EN...
+    0                                                                       CSQ=C|downstream_gene_variant|MODIFIER|ZBTB17|ENSG00000116809|Transcript|ENST00000375733.2|protein_coding|||||||||||4617|-1||HGNC|12936||||||||||||||||
+    1                                                          CSQ=T|downstream_gene_variant|MODIFIER|CSDE1|ENSG00000009307|Transcript|ENST00000261443.5|protein_coding||||||||||COSV99795232|3453|-1||HGNC|29905|||||||||1|1||||||
+    2    CSQ=A|missense_variant|MODERATE|NTRK1|ENSG00000198400|Transcript|ENST00000358660.3|protein_coding|10/16||||1296|1255|419|A/T|Gcc/Acc|COSV62328771||1||HGNC|8031|||||deleterious(0.01)|probably_damaging(0.999)|||1|1||||||
+    3                                                             CSQ=-|upstream_gene_variant|MODIFIER|LRRC56|ENSG00000161328|Transcript|ENST00000270115.7|protein_coding||||||||||rs1164486792|3230|1||HGNC|25430||||||||||1||||||
     Name: INFO, dtype: object
     """
     funcs = {'mostsevere': row_mostsevere, 'firstann': row_firstann}

From 3cb733ffea798ada17732d82d25f0573dd4ca8d1 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 15 Feb 2022 17:26:39 +0900
Subject: [PATCH 16/22] Update `pyvcf`:

* Add new method :meth:`pyvcf.gt_ploidy`.
* Update :meth:`pyvcf.gt_polyp` method to use :meth:`pyvcf.gt_ploidy`
method internally.
* :issue:`53`: Add new methods :meth:`pyvcf.row_compute_info` and
:meth:`pyvcf.VcfFrame.compute_info`.
---
 CHANGELOG.rst    |   3 +
 fuc/api/pyvcf.py | 240 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 206 insertions(+), 37 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 152cbe1..78075ec 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -11,6 +11,9 @@ Changelog
 * Update :meth:`pycov.CovFrame.from_bam` method and :command:`bam-depth` command to automatically handle the 'chr' string.
 * Rename :meth:`pyvcf.VcfFrame.variants` method to :meth:`pyvcf.VcfFrame.to_variants`.
 * Add new optional arguments ``force`` and ``missing`` to :meth:`pyvcf.row_updateinfo` method.
+* Add new method :meth:`pyvcf.gt_ploidy`.
+* Update :meth:`pyvcf.gt_polyp` method to use :meth:`pyvcf.gt_ploidy` method internally.
+* :issue:`53`: Add new methods :meth:`pyvcf.row_compute_info` and :meth:`pyvcf.VcfFrame.compute_info`.
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 9bbb980..a80fa59 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -103,6 +103,11 @@
      - A
      - Integer
      - Allele count in genotypes, for each ALT allele, in the same order as listed
+   * - INFO
+     - AN
+     - 1
+     - Integer
+     - Total number of alleles in called genotypes
    * - INFO
      - AF
      - A
@@ -452,9 +457,50 @@ def gt_miss(g):
     """
     return '.' in g.split(':')[0]
 
+def gt_ploidy(g):
+    """
+    For given genotype, determine its ploidy.
+
+    Parameters
+    ----------
+    g : str
+        Sample genotype.
+
+    Returns
+    -------
+    int
+        Ploidy number.
+
+    Examples
+    --------
+
+    >>> from fuc import pyvcf
+    >>> pyvcf.gt_ploidy('1')
+    1
+    >>> pyvcf.gt_ploidy('.')
+    1
+    >>> pyvcf.gt_ploidy('0/1')
+    2
+    >>> pyvcf.gt_ploidy('./.')
+    2
+    >>> pyvcf.gt_ploidy('0|1')
+    2
+    >>> pyvcf.gt_ploidy('1|0|1')
+    3
+    >>> pyvcf.gt_ploidy('0/./1/1')
+    4
+    """
+    gt = g.split(':')[0]
+    if '/' in gt:
+        return gt.count('/') + 1
+    elif '|' in gt:
+        return gt.count('|') + 1
+    else:
+        return 1
+
 def gt_polyp(g):
     """
-    Return True if sample genotype has a polyploid call.
+    For given genotype, return True if it's polyploid.
 
     Parameters
     ----------
@@ -464,7 +510,7 @@ def gt_polyp(g):
     Returns
     -------
     bool
-        True if sample genotype has a polyploid call.
+        True if genotype is polyploid.
 
     Examples
     --------
@@ -481,11 +527,7 @@ def gt_polyp(g):
     >>> pyvcf.gt_polyp('0/./1/1')
     True
     """
-    gt = g.split(':')[0]
-    if '/' in gt:
-        return gt.count('/') > 1
-    else:
-        return gt.count('|') > 1
+    return gt_ploidy(g) > 2
 
 def gt_hasvar(g):
     """
@@ -819,6 +861,106 @@ def row_hasindel(r):
     alt_has = max([len(x) for x in r['ALT'].split(',')]) > 1
     return ref_has or alt_has
 
+
+def row_compute_info(r, key, decimals=3):
+    """
+    For given row, compute AC/AN/AF in INFO column.
+
+    Parameters
+    ----------
+    r : pandas.Series
+        VCF row.
+    key : {'AC', 'AN', 'AF'}
+        INFO key.
+    decimals : int, default: 3
+        Number of decimals to display for AF.
+
+    Returns
+    -------
+    str
+        Requested INFO data.
+
+    Example
+    -------
+
+    >>> from fuc import pyvcf
+    >>> data = {
+    ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chrX'],
+    ...     'POS': [100, 101, 102, 100],
+    ...     'ID': ['.', '.', '.', '.'],
+    ...     'REF': ['G', 'T', 'A', 'A'],
+    ...     'ALT': ['A', 'C', 'T,G', 'G'],
+    ...     'QUAL': ['.', '.', '.', '.'],
+    ...     'FILTER': ['.', '.', '.', '.'],
+    ...     'INFO': ['.', '.', '.', '.'],
+    ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
+    ...     'A': ['0/1', '0/0', '0/1', '0'],
+    ...     'B': ['1/1', './.', '0/0', '0/1'],
+    ...     'C': ['0/0', '0/0', '1/2', '1'],
+    ... }
+    >>> vf = pyvcf.VcfFrame.from_dict([], data)
+    >>> vf.df
+      CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT    A    B    C
+    0  chr1  100  .   G    A    .      .    .     GT  0/1  1/1  0/0
+    1  chr1  101  .   T    C    .      .    .     GT  0/0  ./.  0/0
+    2  chr1  102  .   A  T,G    .      .    .     GT  0/1  0/0  1/2
+    3  chrX  100  .   A    G    .      .    .     GT    0  0/1    1
+    >>> pyvcf.row_compute_info(vf.df.iloc[0, :], 'AC')
+    '3'
+    >>> pyvcf.row_compute_info(vf.df.iloc[0, :], 'AN')
+    '6'
+    >>> pyvcf.row_compute_info(vf.df.iloc[0, :], 'AF')
+    '0.500'
+    >>> pyvcf.row_compute_info(vf.df.iloc[1, :], 'AC')
+    '0'
+    >>> pyvcf.row_compute_info(vf.df.iloc[1, :], 'AN')
+    '4'
+    >>> pyvcf.row_compute_info(vf.df.iloc[1, :], 'AF')
+    '0.000'
+    >>> pyvcf.row_compute_info(vf.df.iloc[2, :], 'AC')
+    '2,1'
+    >>> pyvcf.row_compute_info(vf.df.iloc[2, :], 'AN')
+    '6'
+    >>> pyvcf.row_compute_info(vf.df.iloc[2, :], 'AF')
+    '0.333,0.167'
+    >>> pyvcf.row_compute_info(vf.df.iloc[3, :], 'AC')
+    '2'
+    >>> pyvcf.row_compute_info(vf.df.iloc[3, :], 'AN')
+    '4'
+    >>> pyvcf.row_compute_info(vf.df.iloc[3, :], 'AF')
+    '0.500'
+    """
+    def get_ac(r):
+        counts = []
+        for i, allele in enumerate(r.ALT.split(',')):
+            count = r[9:].apply(lambda x: x.split('/').count(str(i+1)))
+            counts.append(sum(count))
+        return counts
+
+    def get_an(r):
+        def one_gt(g):
+            if '.' in g:
+                return 0
+            else:
+                return gt_ploidy(g)
+        return r[9:].apply(one_gt).sum()
+
+    def get_af(r):
+        return [x / get_an(r) for x in get_ac(r)]
+
+    methods = {'AC': get_ac, 'AN': get_an, 'AF': get_af}
+
+    results = methods[key](r)
+
+    if key == 'AC':
+        results = ','.join([str(x) for x in results])
+    elif key == 'AN':
+        results = str(results)
+    else:
+        results = ','.join([f'{x:.{decimals}f}' for x in results])
+
+    return results
+
 def row_parseinfo(r, key):
     """
     Return INFO data in the row that match the given key.
@@ -1699,36 +1841,6 @@ def infunc(x):
         vf = self.__class__(self.copy_meta(), df)
         return vf
 
-    def add_info_ac(self, force=True):
-        """
-        Add the AC field to the INFO column.
-
-        Parameters
-        ----------
-        force : bool, default: True
-            If True, overwrite any existing data.
-
-        Returns
-        -------
-        VcfFrame
-            Updated VcfFrame.
-        """
-        def one_row(r):
-            data = []
-            for i, allele in enumerate(r.ALT.split(',')):
-                count = r[9:].apply(lambda x: x.split('/').count(str(i)))
-                data.append(str(sum(count)))
-            data = 'AC=' + ','.join(data)
-            if r.INFO == '.':
-                r.INFO = data
-            else:
-                r.INFO += ';' + data
-            return r
-
-        df = self.df.apply(one_row, axis=1)
-
-        return self.__class__(self.copy_meta(), df)
-
     def add_flag(self, flag, order='last', index=None):
         """
         Add the given flag to the INFO field.
@@ -2036,6 +2148,60 @@ def raise_error(c):
         vf = self.__class__(self.copy_meta(), df)
         return vf
 
+    def compute_info(self, key):
+        """
+        Compute AC/AN/AF in INFO column.
+
+        The method will ignore and overwrite any existing data for selected
+        key.
+
+        Returns
+        -------
+        VcfFrame
+            Updated VcfFrame.
+        key : {'AC', 'AN', 'AF'}
+            INFO key.
+
+        Example
+        -------
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102],
+        ...     'ID': ['.', '.', '.'],
+        ...     'REF': ['G', 'T', 'A'],
+        ...     'ALT': ['A', 'C', 'T,G'],
+        ...     'QUAL': ['.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.'],
+        ...     'INFO': ['AC=100', 'MQ=59', '.'],
+        ...     'FORMAT': ['GT', 'GT', 'GT'],
+        ...     'A': ['0/1', '0/0', '0/1'],
+        ...     'B': ['1/1', '0/1', '0/0'],
+        ...     'C': ['0/0', '0/0', '1/2'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID REF  ALT QUAL FILTER    INFO FORMAT    A    B    C
+        0  chr1  100  .   G    A    .      .  AC=100     GT  0/1  1/1  0/0
+        1  chr1  101  .   T    C    .      .   MQ=59     GT  0/0  0/1  0/0
+        2  chr1  102  .   A  T,G    .      .       .     GT  0/1  0/0  1/2
+        >>> vf = vf.add_info_ac()
+        >>> vf.df
+          CHROM  POS ID REF  ALT QUAL FILTER        INFO FORMAT    A    B    C
+        0  chr1  100  .   G    A    .      .        AC=3     GT  0/1  1/1  0/0
+        1  chr1  101  .   T    C    .      .  MQ=59;AC=5     GT  0/0  0/1  0/0
+        2  chr1  102  .   A  T,G    .      .      AC=3,2     GT  0/1  0/0  1/2
+        """
+        def one_row(r, key):
+            data = row_compute_info(r, key)
+            r.INFO = row_updateinfo(r, key, data, missing=True)
+            return r
+
+        df = self.df.apply(one_row, args=(key,), axis=1)
+
+        return self.__class__(self.copy_meta(), df)
+
     @classmethod
     def from_dict(cls, meta, data):
         """

From b2239ad7ef0977636a244a86bd7f0c5763ab7e0f Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Tue, 15 Feb 2022 20:53:38 +0900
Subject: [PATCH 17/22] Update API

---
 CHANGELOG.rst    |   2 +-
 fuc/api/pyvcf.py | 152 +++++++++++++++++++++++++++--------------------
 2 files changed, 89 insertions(+), 65 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 78075ec..a3c7bb0 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -13,7 +13,7 @@ Changelog
 * Add new optional arguments ``force`` and ``missing`` to :meth:`pyvcf.row_updateinfo` method.
 * Add new method :meth:`pyvcf.gt_ploidy`.
 * Update :meth:`pyvcf.gt_polyp` method to use :meth:`pyvcf.gt_ploidy` method internally.
-* :issue:`53`: Add new methods :meth:`pyvcf.row_compute_info` and :meth:`pyvcf.VcfFrame.compute_info`.
+* :issue:`53`: Add new methods to compute AC/AN/AF in the INFO column: :meth:`pyvcf.row_computeinfo` and :meth:`pyvcf.VcfFrame.compute_info`.
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index a80fa59..a48cbac 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -13,9 +13,10 @@
 VCF file.
 
 Genotype lines usually consist of nine columns for storing variant
-information (all fixed and mandatory except FORMAT) plus additional columns
-for storing sample genotype data. For some columns, missing values are
-allowed and can be specified with a dot ('.'). The first nine columns are:
+information (all fixed and mandatory except for the FORMAT column) plus
+additional sample-specific columns for expressing individual genotype calls
+(e.g. '0/1'). Missing values are allowed in some cases and can be specified
+with a dot ('.'). The first nine columns are:
 
 .. list-table::
    :header-rows: 1
@@ -422,7 +423,7 @@ def rescue_filtered_variants(vfs, format='GT'):
 
 def gt_miss(g):
     """
-    Return True if sample genotype is missing.
+    For given genotype, return True if it has missing value.
 
     Parameters
     ----------
@@ -432,7 +433,7 @@ def gt_miss(g):
     Returns
     -------
     bool
-        True if sample genotype is missing.
+        True if genotype is missing.
 
     Examples
     --------
@@ -459,7 +460,7 @@ def gt_miss(g):
 
 def gt_ploidy(g):
     """
-    For given genotype, determine its ploidy.
+    For given genotype, return its ploidy number.
 
     Parameters
     ----------
@@ -500,7 +501,7 @@ def gt_ploidy(g):
 
 def gt_polyp(g):
     """
-    For given genotype, return True if it's polyploid.
+    For given genotype, return True if it is polyploid.
 
     Parameters
     ----------
@@ -531,7 +532,7 @@ def gt_polyp(g):
 
 def gt_hasvar(g):
     """
-    Return True if sample genotype has at least one variant call.
+    For given genotype, return True if it has variation.
 
     Parameters
     ----------
@@ -541,7 +542,7 @@ def gt_hasvar(g):
     Returns
     -------
     bool
-        True if sample genotype has a variant call.
+        True if genotype has variation.
 
     Examples
     --------
@@ -572,7 +573,7 @@ def gt_hasvar(g):
 
 def gt_unphase(g):
     """
-    Return unphased sample genotype.
+    For given genotype, return its unphased form.
 
     Parameters
     ----------
@@ -620,7 +621,7 @@ def gt_unphase(g):
 
 def gt_het(g):
     """
-    Return True if genotype call is heterozygous.
+    For given genotype, return True if it is heterozygous.
 
     Parameters
     ----------
@@ -630,7 +631,7 @@ def gt_het(g):
     Returns
     -------
     bool
-        True if genotype call is heterozygous.
+        True if genotype is heterozygous.
 
     Examples
     --------
@@ -657,7 +658,7 @@ def gt_het(g):
 
 def gt_pseudophase(g):
     """
-    Return pseudophased genotype call.
+    For given genotype, return its pseudophased form.
 
     Parameters
     ----------
@@ -815,7 +816,7 @@ def merge(
 
 def row_hasindel(r):
     """
-    Return True if the row has an indel.
+    For given row, return True if it has indel.
 
     Parameters
     ----------
@@ -862,9 +863,9 @@ def row_hasindel(r):
     return ref_has or alt_has
 
 
-def row_compute_info(r, key, decimals=3):
+def row_computeinfo(r, key, decimals=3):
     """
-    For given row, compute AC/AN/AF in INFO column.
+    For given row, return AC/AN/AF calculation in INFO column.
 
     Parameters
     ----------
@@ -893,47 +894,54 @@ def row_compute_info(r, key, decimals=3):
     ...     'QUAL': ['.', '.', '.', '.'],
     ...     'FILTER': ['.', '.', '.', '.'],
     ...     'INFO': ['.', '.', '.', '.'],
-    ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-    ...     'A': ['0/1', '0/0', '0/1', '0'],
-    ...     'B': ['1/1', './.', '0/0', '0/1'],
-    ...     'C': ['0/0', '0/0', '1/2', '1'],
+    ...     'FORMAT': ['GT:DP', 'GT', 'GT', 'GT'],
+    ...     'A': ['1|0:29', '0|0', '1|0', '0'],
+    ...     'B': ['1/1:34', './.', '0/0', '0/1'],
+    ...     'C': ['0/0:23', '0/0', '1/2', '1'],
     ... }
     >>> vf = pyvcf.VcfFrame.from_dict([], data)
     >>> vf.df
-      CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT    A    B    C
-    0  chr1  100  .   G    A    .      .    .     GT  0/1  1/1  0/0
-    1  chr1  101  .   T    C    .      .    .     GT  0/0  ./.  0/0
-    2  chr1  102  .   A  T,G    .      .    .     GT  0/1  0/0  1/2
-    3  chrX  100  .   A    G    .      .    .     GT    0  0/1    1
-    >>> pyvcf.row_compute_info(vf.df.iloc[0, :], 'AC')
+      CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT       A       B       C
+    0  chr1  100  .   G    A    .      .    .  GT:DP  1|0:29  1/1:34  0/0:23
+    1  chr1  101  .   T    C    .      .    .     GT     0|0     ./.     0/0
+    2  chr1  102  .   A  T,G    .      .    .     GT     1|0     0/0     1/2
+    3  chrX  100  .   A    G    .      .    .     GT       0     0/1       1
+    >>> pyvcf.row_computeinfo(vf.df.iloc[0, :], 'AC')
     '3'
-    >>> pyvcf.row_compute_info(vf.df.iloc[0, :], 'AN')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[0, :], 'AN')
     '6'
-    >>> pyvcf.row_compute_info(vf.df.iloc[0, :], 'AF')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[0, :], 'AF')
     '0.500'
-    >>> pyvcf.row_compute_info(vf.df.iloc[1, :], 'AC')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[1, :], 'AC')
     '0'
-    >>> pyvcf.row_compute_info(vf.df.iloc[1, :], 'AN')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[1, :], 'AN')
     '4'
-    >>> pyvcf.row_compute_info(vf.df.iloc[1, :], 'AF')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[1, :], 'AF')
     '0.000'
-    >>> pyvcf.row_compute_info(vf.df.iloc[2, :], 'AC')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[2, :], 'AC')
     '2,1'
-    >>> pyvcf.row_compute_info(vf.df.iloc[2, :], 'AN')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[2, :], 'AN')
     '6'
-    >>> pyvcf.row_compute_info(vf.df.iloc[2, :], 'AF')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[2, :], 'AF')
     '0.333,0.167'
-    >>> pyvcf.row_compute_info(vf.df.iloc[3, :], 'AC')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[3, :], 'AC')
     '2'
-    >>> pyvcf.row_compute_info(vf.df.iloc[3, :], 'AN')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[3, :], 'AN')
     '4'
-    >>> pyvcf.row_compute_info(vf.df.iloc[3, :], 'AF')
+    >>> pyvcf.row_computeinfo(vf.df.iloc[3, :], 'AF')
     '0.500'
     """
     def get_ac(r):
+        def one_gt(g, i):
+            gt = g.split(':')[0]
+            if '/' in gt:
+                l = gt.split('/')
+            else:
+                l = gt.split('|')
+            return l.count(str(i + 1))
         counts = []
         for i, allele in enumerate(r.ALT.split(',')):
-            count = r[9:].apply(lambda x: x.split('/').count(str(i+1)))
+            count = r[9:].apply(one_gt, args=(i, ))
             counts.append(sum(count))
         return counts
 
@@ -963,7 +971,7 @@ def get_af(r):
 
 def row_parseinfo(r, key):
     """
-    Return INFO data in the row that match the given key.
+    For given row, return requested data in INFO column.
 
     Parameters
     ----------
@@ -1015,7 +1023,7 @@ def row_parseinfo(r, key):
 
 def row_phased(r):
     """
-    Return True if every genotype in the row is haplotype phased.
+    For given row, return True if all genotypes are phased.
 
     Parameters
     ----------
@@ -1061,7 +1069,7 @@ def one_gt(g):
 
 def row_updateinfo(r, key, value, force=True, missing=False):
     """
-    Update INFO field matching specified key.
+    For given row, return updated data in INFO column.
 
     Parameters
     ----------
@@ -1148,7 +1156,7 @@ def row_updateinfo(r, key, value, force=True, missing=False):
 
 def row_missval(r):
     """
-    Return the correctly formatted missing value for the row.
+    For given row, return formatted missing genotype.
 
     Parameters
     ----------
@@ -2167,34 +2175,50 @@ def compute_info(self, key):
 
         >>> from fuc import pyvcf
         >>> data = {
-        ...     'CHROM': ['chr1', 'chr1', 'chr1'],
-        ...     'POS': [100, 101, 102],
-        ...     'ID': ['.', '.', '.'],
-        ...     'REF': ['G', 'T', 'A'],
-        ...     'ALT': ['A', 'C', 'T,G'],
-        ...     'QUAL': ['.', '.', '.'],
-        ...     'FILTER': ['.', '.', '.'],
-        ...     'INFO': ['AC=100', 'MQ=59', '.'],
-        ...     'FORMAT': ['GT', 'GT', 'GT'],
-        ...     'A': ['0/1', '0/0', '0/1'],
-        ...     'B': ['1/1', '0/1', '0/0'],
-        ...     'C': ['0/0', '0/0', '1/2'],
+        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chrX'],
+        ...     'POS': [100, 101, 102, 100],
+        ...     'ID': ['.', '.', '.', '.'],
+        ...     'REF': ['G', 'T', 'A', 'C'],
+        ...     'ALT': ['A', 'C', 'T,G', 'A'],
+        ...     'QUAL': ['.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.'],
+        ...     'INFO': ['AC=100', 'MQ=59', '.', '.'],
+        ...     'FORMAT': ['GT:DP', 'GT', 'GT', 'GT'],
+        ...     'A': ['1|0:34', '0|0', '1|0', '0'],
+        ...     'B': ['1/1:23', '0/1', '0/0', '0/0'],
+        ...     'C': ['0/0:28', './.', '1/2', '1'],
         ... }
         >>> vf = pyvcf.VcfFrame.from_dict([], data)
         >>> vf.df
-          CHROM  POS ID REF  ALT QUAL FILTER    INFO FORMAT    A    B    C
-        0  chr1  100  .   G    A    .      .  AC=100     GT  0/1  1/1  0/0
-        1  chr1  101  .   T    C    .      .   MQ=59     GT  0/0  0/1  0/0
-        2  chr1  102  .   A  T,G    .      .       .     GT  0/1  0/0  1/2
-        >>> vf = vf.add_info_ac()
+          CHROM  POS ID REF  ALT QUAL FILTER    INFO FORMAT       A       B       C
+        0  chr1  100  .   G    A    .      .  AC=100  GT:DP  1|0:34  1/1:23  0/0:28
+        1  chr1  101  .   T    C    .      .   MQ=59     GT     0|0     0/1     ./.
+        2  chr1  102  .   A  T,G    .      .       .     GT     1|0     0/0     1/2
+        3  chrX  100  .   C    A    .      .       .     GT       0     0/0       1
+        >>> vf = vf.compute_info('AC')
+        >>> vf.df
+          CHROM  POS ID REF  ALT QUAL FILTER        INFO FORMAT       A       B       C
+        0  chr1  100  .   G    A    .      .        AC=1  GT:DP  1|0:34  1/1:23  0/0:28
+        1  chr1  101  .   T    C    .      .  MQ=59;AC=1     GT     0|0     0/1     ./.
+        2  chr1  102  .   A  T,G    .      .      AC=1,1     GT     1|0     0/0     1/2
+        3  chrX  100  .   C    A    .      .        AC=1     GT       0     0/0       1
+        >>> vf = vf.compute_info('AN')
+        >>> vf.df
+          CHROM  POS ID REF  ALT QUAL FILTER             INFO FORMAT       A       B       C
+        0  chr1  100  .   G    A    .      .        AC=1;AN=6  GT:DP  1|0:34  1/1:23  0/0:28
+        1  chr1  101  .   T    C    .      .  MQ=59;AC=1;AN=4     GT     0|0     0/1     ./.
+        2  chr1  102  .   A  T,G    .      .      AC=1,1;AN=6     GT     1|0     0/0     1/2
+        3  chrX  100  .   C    A    .      .        AC=1;AN=4     GT       0     0/0       1
+        >>> vf = vf.compute_info('AF')
         >>> vf.df
-          CHROM  POS ID REF  ALT QUAL FILTER        INFO FORMAT    A    B    C
-        0  chr1  100  .   G    A    .      .        AC=3     GT  0/1  1/1  0/0
-        1  chr1  101  .   T    C    .      .  MQ=59;AC=5     GT  0/0  0/1  0/0
-        2  chr1  102  .   A  T,G    .      .      AC=3,2     GT  0/1  0/0  1/2
+          CHROM  POS ID REF  ALT QUAL FILTER                        INFO FORMAT       A       B       C
+        0  chr1  100  .   G    A    .      .          AC=1;AN=6;AF=0.167  GT:DP  1|0:34  1/1:23  0/0:28
+        1  chr1  101  .   T    C    .      .    MQ=59;AC=1;AN=4;AF=0.250     GT     0|0     0/1     ./.
+        2  chr1  102  .   A  T,G    .      .  AC=1,1;AN=6;AF=0.167,0.167     GT     1|0     0/0     1/2
+        3  chrX  100  .   C    A    .      .          AC=1;AN=4;AF=0.250     GT       0     0/0       1
         """
         def one_row(r, key):
-            data = row_compute_info(r, key)
+            data = row_computeinfo(r, key)
             r.INFO = row_updateinfo(r, key, data, missing=True)
             return r
 

From e957537e77eabb1fb96828c6a1be735461563cef Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Thu, 17 Feb 2022 17:26:39 +0900
Subject: [PATCH 18/22] Update `pyvcf.VcfFrame.cfilter_empty` (#54):

* :issue:`54: Update :meth:`pyvcf.VcfFrame.cfilter_empty` method so that
users can control missingness threshold for filtering samples.
* Rename :meth:`pyvcf.VcfFrame.cfilter_empty` method to
:meth:`pyvcf.VcfFrame.empty_samples`.
---
 CHANGELOG.rst    |   2 +
 fuc/api/pyvcf.py | 374 ++++++++++++++++++++++++++---------------------
 2 files changed, 211 insertions(+), 165 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index a3c7bb0..5da424e 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,6 +14,8 @@ Changelog
 * Add new method :meth:`pyvcf.gt_ploidy`.
 * Update :meth:`pyvcf.gt_polyp` method to use :meth:`pyvcf.gt_ploidy` method internally.
 * :issue:`53`: Add new methods to compute AC/AN/AF in the INFO column: :meth:`pyvcf.row_computeinfo` and :meth:`pyvcf.VcfFrame.compute_info`.
+* :issue:`54: Update :meth:`pyvcf.VcfFrame.cfilter_empty` method so that users can control missingness threshold for filtering samples.
+* Rename :meth:`pyvcf.VcfFrame.cfilter_empty` method to :meth:`pyvcf.VcfFrame.empty_samples`.
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index a48cbac..f3f87d7 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -1953,83 +1953,6 @@ def f(r):
         vf = self.__class__(self.copy_meta(), df)
         return vf
 
-    def cfilter_empty(self, opposite=False, as_list=False):
-        """
-        Remove samples whose genotype calls are all missing.
-
-        Parameters
-        ----------
-        opposite : bool, default: False
-            If True, return samples that don't meet the said criteria.
-        as_list : bool, default: False
-             If True, return a list of sample names instead of a VcfFrame.
-
-        Returns
-        -------
-        VcfFrame
-            Filtered VcfFrame.
-
-        Examples
-        --------
-        Assume we have the following data:
-
-        >>> data = {
-        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
-        ...     'POS': [100, 101, 102, 103],
-        ...     'ID': ['.', '.', '.', '.'],
-        ...     'REF': ['G', 'T', 'G', 'T'],
-        ...     'ALT': ['A', 'C', 'C', 'C'],
-        ...     'QUAL': ['.', '.', '.', '.'],
-        ...     'FILTER': ['.', '.', '.', '.'],
-        ...     'INFO': ['.', '.', '.', '.'],
-        ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-        ...     'Steven': ['0/1', '1/1', '1/1', '1/1'],
-        ...     'Rachel': ['./.', './.', './.', './.'],
-        ...     'John': ['0/0', './.', '0/0', '0/0'],
-        ...     'Sara': ['./.', './.', './.', './.'],
-        ... }
-        >>> vf = pyvcf.VcfFrame.from_dict([], data)
-        >>> vf.df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven Rachel John Sara
-        0  chr1  100  .   G   A    .      .    .     GT    0/1    ./.  0/0  ./.
-        1  chr1  101  .   T   C    .      .    .     GT    1/1    ./.  ./.  ./.
-        2  chr1  102  .   G   C    .      .    .     GT    1/1    ./.  0/0  ./.
-        3  chr1  103  .   T   C    .      .    .     GT    1/1    ./.  0/0  ./.
-
-        We can remove samples whose genotypes are all missing:
-
-        >>> vf.cfilter_empty().df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven John
-        0  chr1  100  .   G   A    .      .    .     GT    0/1  0/0
-        1  chr1  101  .   T   C    .      .    .     GT    1/1  ./.
-        2  chr1  102  .   G   C    .      .    .     GT    1/1  0/0
-        3  chr1  103  .   T   C    .      .    .     GT    1/1  0/0
-
-        We can also select those samples:
-
-        >>> vf.cfilter_empty(opposite=True).df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Rachel Sara
-        0  chr1  100  .   G   A    .      .    .     GT    ./.  ./.
-        1  chr1  101  .   T   C    .      .    .     GT    ./.  ./.
-        2  chr1  102  .   G   C    .      .    .     GT    ./.  ./.
-        3  chr1  103  .   T   C    .      .    .     GT    ./.  ./.
-
-        Finally, we can return a list of sample names from the filtering:
-
-        >>> vf.cfilter_empty(as_list=True)
-        ['Steven', 'John']
-        """
-        f = lambda r: r[9:].apply(gt_miss)
-        s = self.df.apply(f, axis=1).all()
-        if opposite:
-            s = s[s == True]
-        else:
-            s = s[s == False]
-        l = s.index.to_list()
-        if as_list:
-            return l
-        return self.subset(l)
-
     def collapse(self):
         """
         Collapse duplicate records in the VcfFrame.
@@ -3798,6 +3721,101 @@ def one_gt(g):
         vf = self.__class__(self.copy_meta(), df)
         return vf
 
+    def empty_samples(self, threshold=0, opposite=False, as_list=False):
+        """
+        Remove samples with high missingness.
+
+        Samples with missingness >= threshold will be removed.
+
+        Parameters
+        ----------
+        threshold : int or float, default: 0
+            Number or fraction of missing variants. By default
+            (``threshold=0``), only samples with 100% missingness will be
+            removed.
+        opposite : bool, default: False
+            If True, return samples that don't meet the said criteria.
+        as_list : bool, default: False
+             If True, return a list of sample names instead of a VcfFrame.
+
+        Returns
+        -------
+        VcfFrame
+            Filtered VcfFrame.
+
+        Examples
+        --------
+        Assume we have the following data:
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102, 103],
+        ...     'ID': ['.', '.', '.', '.'],
+        ...     'REF': ['G', 'T', 'G', 'T'],
+        ...     'ALT': ['A', 'C', 'C', 'C'],
+        ...     'QUAL': ['.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.'],
+        ...     'INFO': ['.', '.', '.', '.'],
+        ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
+        ...     'Steven': ['0/1', '1/1', '1/1', '1/1'],
+        ...     'Rachel': ['./.', './.', './.', './.'],
+        ...     'John': ['0/0', './.', '0/0', '0/0'],
+        ...     'Sara': ['./.', './.', './.', './.'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven Rachel John Sara
+        0  chr1  100  .   G   A    .      .    .     GT    0/1    ./.  0/0  ./.
+        1  chr1  101  .   T   C    .      .    .     GT    1/1    ./.  ./.  ./.
+        2  chr1  102  .   G   C    .      .    .     GT    1/1    ./.  0/0  ./.
+        3  chr1  103  .   T   C    .      .    .     GT    1/1    ./.  0/0  ./.
+
+        We can remove samples whose genotypes are all missing:
+
+        >>> vf.cfilter_empty().df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven John
+        0  chr1  100  .   G   A    .      .    .     GT    0/1  0/0
+        1  chr1  101  .   T   C    .      .    .     GT    1/1  ./.
+        2  chr1  102  .   G   C    .      .    .     GT    1/1  0/0
+        3  chr1  103  .   T   C    .      .    .     GT    1/1  0/0
+
+        We can also select those samples:
+
+        >>> vf.cfilter_empty(opposite=True).df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Rachel Sara
+        0  chr1  100  .   G   A    .      .    .     GT    ./.  ./.
+        1  chr1  101  .   T   C    .      .    .     GT    ./.  ./.
+        2  chr1  102  .   G   C    .      .    .     GT    ./.  ./.
+        3  chr1  103  .   T   C    .      .    .     GT    ./.  ./.
+
+        Finally, we can return a list of sample names from the filtering:
+
+        >>> vf.cfilter_empty(as_list=True)
+        ['Steven', 'John']
+        """
+        total = self.shape[0]
+
+        if not threshold:
+            threshold = total
+
+        if isinstance(threshold, int):
+            use_fraction = False
+        elif isinstance(threshold, float):
+            use_fraction = True
+        else:
+            raise TypeError("Incorrect argument type 'threshold'")
+
+        def one_col(c):
+            data = sum(c.apply(gt_miss))
+            if use_fraction:
+                data /= total
+            return data
+
+        s = self.df.iloc[:, 9:].apply(one_col, axis=0) < threshold
+
+        return self.subset(s[s].index.to_list())
+
     def expand(self):
         """
         Expand each multiallelic locus to multiple rows.
@@ -3876,7 +3894,9 @@ def one_gt(g, i):
 
     def filter_bed(self, bed, opposite=False, as_index=False):
         """
-        Select rows that overlap with the given BED data.
+        Filter rows intersecting with given BED.
+
+        Only variants intersecting with given BED data will remain.
 
         Parameters
         ----------
@@ -3968,7 +3988,9 @@ def filter_bed(self, bed, opposite=False, as_index=False):
 
     def filter_empty(self, threshold=0, opposite=False, as_index=False):
         """
-        Remove rows with missing genotype calls.
+        Filter rows with high missingness.
+
+        Variants with missingness >= threshold will be removed.
 
         Parameters
         ----------
@@ -4063,82 +4085,11 @@ def one_row(r):
             return self.__class__(self.copy_meta(), self.copy_df())
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_indel(self, opposite=False, as_index=False):
-        """
-        Remove rows with an indel.
-
-        Parameters
-        ----------
-        opposite : bool, default: False
-            If True, return rows that don't meet the said criteria.
-        as_index : bool, default: False
-            If True, return boolean index array instead of VcfFrame.
-
-        Returns
-        -------
-        VcfFrame or pandas.Series
-            Filtered VcfFrame or boolean index array.
-
-        Examples
-        --------
-        Assume we have the following data:
-
-        >>> from fuc import pyvcf
-        >>> data = {
-        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
-        ...     'POS': [100, 101, 102, 103],
-        ...     'ID': ['.', '.', '.', '.'],
-        ...     'REF': ['G', 'CT', 'A', 'C'],
-        ...     'ALT': ['A', 'C', 'C,AT', 'A'],
-        ...     'QUAL': ['.', '.', '.', '.'],
-        ...     'FILTER': ['.', '.', '.', '.'],
-        ...     'INFO': ['.', '.', '.', '.'],
-        ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-        ...     'Steven': ['0/1', '0/1', '1/2', '0/1'],
-        ... }
-        >>> vf = pyvcf.VcfFrame.from_dict([], data)
-        >>> vf.df
-          CHROM  POS ID REF   ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  100  .   G     A    .      .    .     GT    0/1
-        1  chr1  101  .  CT     C    .      .    .     GT    0/1
-        2  chr1  102  .   A  C,AT    .      .    .     GT    1/2
-        3  chr1  103  .   C     A    .      .    .     GT    0/1
-
-        We can remove rows with an indel:
-
-        >>> vf.filter_indel().df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .    .     GT    0/1
-        1  chr1  103  .   C   A    .      .    .     GT    0/1
-
-        We can also select those rows:
-
-        >>> vf.filter_indel(opposite=True).df
-          CHROM  POS ID REF   ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  101  .  CT     C    .      .    .     GT    0/1
-        1  chr1  102  .   A  C,AT    .      .    .     GT    1/2
-
-        Finally, we can return boolean index array from the filtering:
-
-        >>> vf.filter_indel(as_index=True)
-        0     True
-        1    False
-        2    False
-        3     True
-        dtype: bool
-        """
-        i = ~self.df.apply(row_hasindel, axis=1)
-        if opposite:
-            i = ~i
-        if as_index:
-            return i
-        if i.empty:
-            return self.__class__(self.copy_meta(), self.copy_df())
-        return self.__class__(self.copy_meta(), self.df[i])
-
     def filter_flagall(self, flags, opposite=False, as_index=False):
         """
-        Select rows if all of the given INFO flags are present.
+        Filter rows with all given INFO flags.
+
+        Only variants with all given INFO flags will remain.
 
         Parameters
         ----------
@@ -4222,7 +4173,9 @@ def f(r):
 
     def filter_flagany(self, flags, opposite=False, as_index=False):
         """
-        Select rows if any one of the given INFO flags is present.
+        Filter rows with any given INFO flags.
+
+        Only variants with any given INFO flags will remain.
 
         Parameters
         ----------
@@ -4304,9 +4257,86 @@ def f(r):
             return self.__class__(self.copy_meta(), self.copy_df())
         return self.__class__(self.copy_meta(), self.df[i])
 
+    def filter_indel(self, opposite=False, as_index=False):
+        """
+        Filter rows with indel.
+
+        Variants with indel will be removed.
+
+        Parameters
+        ----------
+        opposite : bool, default: False
+            If True, return rows that don't meet the said criteria.
+        as_index : bool, default: False
+            If True, return boolean index array instead of VcfFrame.
+
+        Returns
+        -------
+        VcfFrame or pandas.Series
+            Filtered VcfFrame or boolean index array.
+
+        Examples
+        --------
+        Assume we have the following data:
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102, 103],
+        ...     'ID': ['.', '.', '.', '.'],
+        ...     'REF': ['G', 'CT', 'A', 'C'],
+        ...     'ALT': ['A', 'C', 'C,AT', 'A'],
+        ...     'QUAL': ['.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.'],
+        ...     'INFO': ['.', '.', '.', '.'],
+        ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
+        ...     'Steven': ['0/1', '0/1', '1/2', '0/1'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID REF   ALT QUAL FILTER INFO FORMAT Steven
+        0  chr1  100  .   G     A    .      .    .     GT    0/1
+        1  chr1  101  .  CT     C    .      .    .     GT    0/1
+        2  chr1  102  .   A  C,AT    .      .    .     GT    1/2
+        3  chr1  103  .   C     A    .      .    .     GT    0/1
+
+        We can remove rows with an indel:
+
+        >>> vf.filter_indel().df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
+        0  chr1  100  .   G   A    .      .    .     GT    0/1
+        1  chr1  103  .   C   A    .      .    .     GT    0/1
+
+        We can also select those rows:
+
+        >>> vf.filter_indel(opposite=True).df
+          CHROM  POS ID REF   ALT QUAL FILTER INFO FORMAT Steven
+        0  chr1  101  .  CT     C    .      .    .     GT    0/1
+        1  chr1  102  .   A  C,AT    .      .    .     GT    1/2
+
+        Finally, we can return boolean index array from the filtering:
+
+        >>> vf.filter_indel(as_index=True)
+        0     True
+        1    False
+        2    False
+        3     True
+        dtype: bool
+        """
+        i = ~self.df.apply(row_hasindel, axis=1)
+        if opposite:
+            i = ~i
+        if as_index:
+            return i
+        if i.empty:
+            return self.__class__(self.copy_meta(), self.copy_df())
+        return self.__class__(self.copy_meta(), self.df[i])
+
     def filter_multialt(self, opposite=False, as_index=False):
         """
-        Remove rows with multiple ALT alleles.
+        Filter rows with multiple ALT alleles.
+
+        Variants with multiple ALT alleles will be removed.
 
         Parameters
         ----------
@@ -4381,7 +4411,9 @@ def filter_multialt(self, opposite=False, as_index=False):
 
     def filter_pass(self, opposite=False, as_index=False):
         """
-        Select rows with PASS in the FILTER field.
+        Filter rows with PASS in FILTER column.
+
+        Only variants with PASS in the FILTER column will remain.
 
         Parameters
         ----------
@@ -4455,7 +4487,9 @@ def filter_pass(self, opposite=False, as_index=False):
 
     def filter_phased(self, opposite=False, as_index=False):
         """
-        Remove rows with phased genotypes.
+        Filter rows with phased genotypes.
+
+        Variants with phased genotypes will be removed.
 
         Parameters
         ----------
@@ -4529,7 +4563,9 @@ def filter_phased(self, opposite=False, as_index=False):
 
     def filter_polyp(self, opposite=False, as_index=False):
         """
-        Remove rows with a polyploid genotype call.
+        Filter rows with polyploid genotypes.
+
+        Variants with polyploid genotypes will be removed.
 
         Parameters
         ----------
@@ -4603,7 +4639,9 @@ def filter_polyp(self, opposite=False, as_index=False):
 
     def filter_qual(self, threshold, opposite=False, as_index=False):
         """
-        Select rows with minimum QUAL value.
+        Filter rows with low QUAL values.
+
+        Only variants with QUAL >= threashold will remain.
 
         Parameters
         ----------
@@ -4684,9 +4722,10 @@ def one_row(r):
 
     def filter_sampall(self, samples=None, opposite=False, as_index=False):
         """
-        Select rows if all of the given samples have the variant.
+        Filter rows where all given samples have variant.
 
-        The default behavior is to use all samples in the VcfFrame.
+        Only variants where all given samples have variant. The default
+        behavior is to use all samples in the VcfFrame.
 
         Parameters
         ----------
@@ -4783,9 +4822,10 @@ def filter_sampall(self, samples=None, opposite=False, as_index=False):
 
     def filter_sampany(self, samples=None, opposite=False, as_index=False):
         """
-        Select rows if any one of the given samples has the variant.
+        Filter rows where any given samples have variant.
 
-        The default behavior is to use all samples in the VcfFrame.
+        Only variants where any given samples have variant will remain. The
+        default behavior is to use all samples in the VcfFrame.
 
         Parameters
         ----------
@@ -4881,7 +4921,9 @@ def filter_sampany(self, samples=None, opposite=False, as_index=False):
 
     def filter_sampnum(self, threshold, opposite=False, as_index=False):
         """
-        Select rows if the variant is prevalent enough.
+        Filter rows with high variant prevalence.
+
+        Only variants with variant prevalence >= threshold will remian.
 
         Parameters
         ----------
@@ -4969,7 +5011,9 @@ def f(r):
 
     def filter_vcf(self, vcf, opposite=False, as_index=False):
         """
-        Select rows that overlap with the other VCF.
+        Filter rows intersecting with given VCF.
+
+        Only variants intersecting with given VCF data will remain.
 
         Parameters
         ----------

From fa1c705dae26279acdbf2b123ba6da01f12c819c Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Thu, 17 Feb 2022 20:04:40 +0900
Subject: [PATCH 19/22] Update docs

---
 CHANGELOG.rst    | 2 +-
 fuc/api/pyvcf.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 5da424e..4651da0 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,7 +14,7 @@ Changelog
 * Add new method :meth:`pyvcf.gt_ploidy`.
 * Update :meth:`pyvcf.gt_polyp` method to use :meth:`pyvcf.gt_ploidy` method internally.
 * :issue:`53`: Add new methods to compute AC/AN/AF in the INFO column: :meth:`pyvcf.row_computeinfo` and :meth:`pyvcf.VcfFrame.compute_info`.
-* :issue:`54: Update :meth:`pyvcf.VcfFrame.cfilter_empty` method so that users can control missingness threshold for filtering samples.
+* :issue:`54`: Update :meth:`pyvcf.VcfFrame.cfilter_empty` method so that users can control missingness threshold for filtering samples.
 * Rename :meth:`pyvcf.VcfFrame.cfilter_empty` method to :meth:`pyvcf.VcfFrame.empty_samples`.
 
 0.30.0 (2022-02-05)
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index f3f87d7..d311353 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -865,7 +865,7 @@ def row_hasindel(r):
 
 def row_computeinfo(r, key, decimals=3):
     """
-    For given row, return AC/AN/AF calculation in INFO column.
+    For given row, return AC/AN/AF calculation for INFO column.
 
     Parameters
     ----------
@@ -971,7 +971,7 @@ def get_af(r):
 
 def row_parseinfo(r, key):
     """
-    For given row, return requested data in INFO column.
+    For given row, return requested data from INFO column.
 
     Parameters
     ----------
@@ -1069,7 +1069,7 @@ def one_gt(g):
 
 def row_updateinfo(r, key, value, force=True, missing=False):
     """
-    For given row, return updated data in INFO column.
+    For given row, return updated data from INFO column.
 
     Parameters
     ----------
@@ -2081,7 +2081,7 @@ def raise_error(c):
 
     def compute_info(self, key):
         """
-        Compute AC/AN/AF in INFO column.
+        Compute AC/AN/AF for INFO column.
 
         The method will ignore and overwrite any existing data for selected
         key.

From 8e29646583545aac56e0788f81a9f0bc7334532f Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Thu, 17 Feb 2022 20:30:16 +0900
Subject: [PATCH 20/22] Update docs

---
 fuc/api/pyvcf.py | 83 ++++++++++++++++++++++++++++--------------------
 1 file changed, 48 insertions(+), 35 deletions(-)

diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index d311353..1d4f1a3 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -3741,11 +3741,10 @@ def empty_samples(self, threshold=0, opposite=False, as_list=False):
         Returns
         -------
         VcfFrame
-            Filtered VcfFrame.
+            Subsetted VcfFrame.
 
         Examples
         --------
-        Assume we have the following data:
 
         >>> from fuc import pyvcf
         >>> data = {
@@ -3758,41 +3757,45 @@ def empty_samples(self, threshold=0, opposite=False, as_list=False):
         ...     'FILTER': ['.', '.', '.', '.'],
         ...     'INFO': ['.', '.', '.', '.'],
         ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-        ...     'Steven': ['0/1', '1/1', '1/1', '1/1'],
-        ...     'Rachel': ['./.', './.', './.', './.'],
-        ...     'John': ['0/0', './.', '0/0', '0/0'],
-        ...     'Sara': ['./.', './.', './.', './.'],
+        ...     'A': ['0/0', '0/0', '0/0', '0/0'],
+        ...     'B': ['./.', '0/0', '0/0', '0/0'],
+        ...     'C': ['./.', './.', '0/0', '0/0'],
+        ...     'D': ['./.', './.', './.', '0/0'],
+        ...     'E': ['./.', './.', './.', './.'],
         ... }
         >>> vf = pyvcf.VcfFrame.from_dict([], data)
         >>> vf.df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven Rachel John Sara
-        0  chr1  100  .   G   A    .      .    .     GT    0/1    ./.  0/0  ./.
-        1  chr1  101  .   T   C    .      .    .     GT    1/1    ./.  ./.  ./.
-        2  chr1  102  .   G   C    .      .    .     GT    1/1    ./.  0/0  ./.
-        3  chr1  103  .   T   C    .      .    .     GT    1/1    ./.  0/0  ./.
-
-        We can remove samples whose genotypes are all missing:
-
-        >>> vf.cfilter_empty().df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven John
-        0  chr1  100  .   G   A    .      .    .     GT    0/1  0/0
-        1  chr1  101  .   T   C    .      .    .     GT    1/1  ./.
-        2  chr1  102  .   G   C    .      .    .     GT    1/1  0/0
-        3  chr1  103  .   T   C    .      .    .     GT    1/1  0/0
-
-        We can also select those samples:
-
-        >>> vf.cfilter_empty(opposite=True).df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Rachel Sara
-        0  chr1  100  .   G   A    .      .    .     GT    ./.  ./.
-        1  chr1  101  .   T   C    .      .    .     GT    ./.  ./.
-        2  chr1  102  .   G   C    .      .    .     GT    ./.  ./.
-        3  chr1  103  .   T   C    .      .    .     GT    ./.  ./.
-
-        Finally, we can return a list of sample names from the filtering:
-
-        >>> vf.cfilter_empty(as_list=True)
-        ['Steven', 'John']
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B    C    D    E
+        0  chr1  100  .   G   A    .      .    .     GT  0/0  ./.  ./.  ./.  ./.
+        1  chr1  101  .   T   C    .      .    .     GT  0/0  0/0  ./.  ./.  ./.
+        2  chr1  102  .   G   C    .      .    .     GT  0/0  0/0  0/0  ./.  ./.
+        3  chr1  103  .   T   C    .      .    .     GT  0/0  0/0  0/0  0/0  ./.
+        >>> vf.empty_samples().df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B    C    D
+        0  chr1  100  .   G   A    .      .    .     GT  0/0  ./.  ./.  ./.
+        1  chr1  101  .   T   C    .      .    .     GT  0/0  0/0  ./.  ./.
+        2  chr1  102  .   G   C    .      .    .     GT  0/0  0/0  0/0  ./.
+        3  chr1  103  .   T   C    .      .    .     GT  0/0  0/0  0/0  0/0
+        >>> vf.empty_samples(threshold=2).df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  100  .   G   A    .      .    .     GT  0/0  ./.
+        1  chr1  101  .   T   C    .      .    .     GT  0/0  0/0
+        2  chr1  102  .   G   C    .      .    .     GT  0/0  0/0
+        3  chr1  103  .   T   C    .      .    .     GT  0/0  0/0
+        >>> vf.empty_samples(threshold=0.5).df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    A    B
+        0  chr1  100  .   G   A    .      .    .     GT  0/0  ./.
+        1  chr1  101  .   T   C    .      .    .     GT  0/0  0/0
+        2  chr1  102  .   G   C    .      .    .     GT  0/0  0/0
+        3  chr1  103  .   T   C    .      .    .     GT  0/0  0/0
+        >>> vf.empty_samples(threshold=0.5, opposite=True).df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT    C    D    E
+        0  chr1  100  .   G   A    .      .    .     GT  ./.  ./.  ./.
+        1  chr1  101  .   T   C    .      .    .     GT  ./.  ./.  ./.
+        2  chr1  102  .   G   C    .      .    .     GT  0/0  ./.  ./.
+        3  chr1  103  .   T   C    .      .    .     GT  0/0  0/0  ./.
+        >>> vf.empty_samples(threshold=0.5, opposite=True, as_list=True)
+        ['C', 'D', 'E']
         """
         total = self.shape[0]
 
@@ -3814,7 +3817,17 @@ def one_col(c):
 
         s = self.df.iloc[:, 9:].apply(one_col, axis=0) < threshold
 
-        return self.subset(s[s].index.to_list())
+        if opposite:
+            s = s[s == False]
+        else:
+            s = s[s == True]
+
+        l = s.index.to_list()
+
+        if as_list:
+            return l
+
+        return self.subset(l)
 
     def expand(self):
         """

From 8f603d1f36940e427b31beaa7e6aa40add62246e Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Feb 2022 17:11:31 +0900
Subject: [PATCH 21/22] Update `common.sort_regions`:

* Update :meth:`common.sort_regions` method to support regions
containing an ALT contig (e.g. chr16_KI270854v1_alt).
---
 CHANGELOG.rst     |  1 +
 fuc/api/common.py | 12 ++++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 4651da0..37899b7 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -16,6 +16,7 @@ Changelog
 * :issue:`53`: Add new methods to compute AC/AN/AF in the INFO column: :meth:`pyvcf.row_computeinfo` and :meth:`pyvcf.VcfFrame.compute_info`.
 * :issue:`54`: Update :meth:`pyvcf.VcfFrame.cfilter_empty` method so that users can control missingness threshold for filtering samples.
 * Rename :meth:`pyvcf.VcfFrame.cfilter_empty` method to :meth:`pyvcf.VcfFrame.empty_samples`.
+* Update :meth:`common.sort_regions` method to support regions containing an ALT contig (e.g. chr16_KI270854v1_alt).
 
 0.30.0 (2022-02-05)
 -------------------
diff --git a/fuc/api/common.py b/fuc/api/common.py
index b352265..02cc5d0 100644
--- a/fuc/api/common.py
+++ b/fuc/api/common.py
@@ -1316,17 +1316,21 @@ def sort_regions(regions):
     --------
 
     >>> from fuc import common
-    >>> regions = ['22:1000-1500', '16:100-200', '22:200-300']
+    >>> regions = ['chr22:1000-1500', 'chr16:100-200', 'chr22:200-300', 'chr16_KI270854v1_alt', 'chr3_GL000221v1_random', 'HLA-A*02:10']
     >>> sorted(regions) # Lexicographic sorting (not what we want)
-    ['16:100-200', '22:1000-1500', '22:200-300']
+    ['HLA-A*02:10', 'chr16:100-200', 'chr16_KI270854v1_alt', 'chr22:1000-1500', 'chr22:200-300', 'chr3_GL000221v1_random']
     >>> common.sort_regions(regions)
-    ['16:100-200', '22:200-300', '22:1000-1500']
+    ['chr16:100-200', 'chr22:200-300', 'chr22:1000-1500', 'chr16_KI270854v1_alt', 'chr3_GL000221v1_random', 'HLA-A*02:10']
     """
     def func(x):
         chrom, start, end = parse_region(x)
         if chrom in pyvcf.CONTIGS:
             chrom = pyvcf.CONTIGS.index(chrom)
-        return (chrom, start, end)
+            alt = ''
+        else:
+            chrom = len(pyvcf.CONTIGS)
+            alt = chrom
+        return (chrom, alt, start, end)
     return sorted(regions, key=func)
 
 def update_chr_prefix(regions, mode='remove'):

From c6b5e8f5807e6965d07e0d2a3a79e6a308885f28 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Tue, 1 Mar 2022 10:35:03 +0900
Subject: [PATCH 22/22] Update docs

---
 CHANGELOG.rst          |   4 +-
 README.rst             |  28 ++---
 docs/cli.rst           | 234 ++++++++++++++++++++---------------------
 docs/create.py         |   8 +-
 fuc/cli/bam_aldepth.py |  15 +--
 fuc/cli/bam_head.py    |   8 +-
 fuc/cli/bam_index.py   |   8 +-
 fuc/cli/bam_rename.py  |  17 ++-
 fuc/cli/bam_slice.py   |  37 ++++---
 fuc/cli/bed_intxn.py   |  10 +-
 fuc/cli/bed_sum.py     |  12 ++-
 fuc/cli/cov_concat.py  |  12 ++-
 fuc/cli/cov_rename.py  |  19 ++--
 fuc/cli/fa_filter.py   |  14 ++-
 fuc/cli/fq_count.py    |   6 +-
 fuc/cli/fq_sum.py      |   6 +-
 fuc/cli/fuc_bgzip.py   |   6 +-
 fuc/cli/fuc_compf.py   |  15 ++-
 fuc/cli/fuc_demux.py   |  14 ++-
 fuc/cli/fuc_exist.py   |  11 +-
 fuc/cli/fuc_find.py    |  14 ++-
 fuc/cli/fuc_undetm.py  |  11 +-
 fuc/cli/maf_maf2vcf.py |  18 ++--
 fuc/cli/maf_oncoplt.py |  24 +++--
 fuc/cli/maf_sumplt.py  |  21 ++--
 fuc/cli/maf_vcf2maf.py |   9 +-
 fuc/cli/ngs_bam2fq.py  |  24 +++--
 fuc/cli/ngs_fq2bam.py  |  43 +++++---
 fuc/cli/ngs_hc.py      |  65 +++++++-----
 fuc/cli/ngs_m2.py      |  30 ++++--
 fuc/cli/ngs_pon.py     |  27 +++--
 fuc/cli/ngs_quant.py   |  36 ++++---
 fuc/cli/ngs_trim.py    |  21 ++--
 fuc/cli/tabix_index.py |   9 +-
 fuc/cli/tabix_slice.py |   9 +-
 fuc/cli/tbl_merge.py   |  26 +++--
 fuc/cli/tbl_sum.py     |  55 ++++++----
 fuc/cli/vcf_filter.py  |  36 ++++---
 fuc/cli/vcf_index.py   |  10 +-
 fuc/cli/vcf_merge.py   |  29 ++---
 fuc/cli/vcf_rename.py  |  24 +++--
 fuc/cli/vcf_slice.py   |  29 ++---
 fuc/cli/vcf_split.py   |  19 ++--
 fuc/cli/vcf_vcf2bed.py |   6 +-
 fuc/cli/vcf_vep.py     |  17 +--
 45 files changed, 662 insertions(+), 434 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 37899b7..45de339 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,8 +1,8 @@
 Changelog
 *********
 
-0.31.0 (in development)
------------------------
+0.31.0 (2022-03-01)
+-------------------
 
 * Fix bug in :meth:`pykallisto.KallistoFrame.compute_fold_change` method.
 * Add new method :meth:`pyvcf.call` and new command :command:`vcf-call`.
diff --git a/README.rst b/README.rst
index 12f95e2..d7f087e 100644
--- a/README.rst
+++ b/README.rst
@@ -46,10 +46,10 @@ Currently, fuc can be used to analyze, summarize, visualize, and manipulate the
 
 Additionally, fuc can be used to parse output data from the following programs:
 
-- Ensembl Variant Effect Predictor (VEP)
-- SnpEff
-- bcl2fastq and bcl2fastq2
-- Kallisto
+- `Ensembl Variant Effect Predictor (VEP) <https://asia.ensembl.org/info/docs/tools/vep/index.html>`__
+- `SnpEff <http://pcingola.github.io/SnpEff/>`__
+- `bcl2fastq and bcl2fastq2 <https://sapac.support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html>`__
+- `Kallisto <https://pachterlab.github.io/kallisto/>`__
 
 Your contributions (e.g. feature ideas, pull requests) are most welcome.
 
@@ -112,38 +112,38 @@ For getting help on the fuc CLI:
    
    positional arguments:
      COMMAND
-       bam-aldepth  Compute allelic depth from a SAM/BAM/CRAM file.
+       bam-aldepth  Compute allelic depth from a BAM file.
        bam-depth    Compute per-base read depth from BAM files.
-       bam-head     Print the header of a SAM/BAM/CRAM file.
-       bam-index    Index a SAM/BAM/CRAM file.
-       bam-rename   Rename the sample in a SAM/BAM/CRAM file.
-       bam-slice    Slice a SAM/BAM/CRAM file.
+       bam-head     Print the header of a BAM file.
+       bam-index    Index a BAM file.
+       bam-rename   Rename the sample in a BAM file.
+       bam-slice    Slice a BAM file.
        bed-intxn    Find the intersection of BED files.
        bed-sum      Summarize a BED file.
        cov-concat   Concatenate depth of coverage files.
        cov-rename   Rename the samples in a depth of coverage file.
-       fa-filter    Filter sequence records in a FASTA file
+       fa-filter    Filter sequence records in a FASTA file.
        fq-count     Count sequence reads in FASTQ files.
        fq-sum       Summarize a FASTQ file.
        fuc-bgzip    Write a BGZF compressed file.
        fuc-compf    Compare the contents of two files.
        fuc-demux    Parse the Reports directory from bcl2fastq.
        fuc-exist    Check whether certain files exist.
-       fuc-find     Retrieve absolute paths of files whose name matches a 
+       fuc-find     Retrieve absolute paths of files whose name matches a
                     specified pattern, optionally recursively.
-       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from 
+       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from
                     bcl2fastq.
        maf-maf2vcf  Convert a MAF file to a VCF file.
        maf-oncoplt  Create an oncoplot with a MAF file.
        maf-sumplt   Create a summary plot with a MAF file.
        maf-vcf2maf  Convert a VCF file to a MAF file.
        ngs-bam2fq   Pipeline for converting BAM files to FASTQ files.
-       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM 
+       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM
                     files.
        ngs-hc       Pipeline for germline short variant discovery.
        ngs-m2       Pipeline for somatic short variant discovery.
        ngs-pon      Pipeline for constructing a panel of normals (PoN).
-       ngs-quant    Pipeline for running RNAseq quantification from FASTQ files 
+       ngs-quant    Pipeline for running RNAseq quantification from FASTQ files
                     with Kallisto.
        ngs-trim     Pipeline for trimming adapters from FASTQ files.
        tabix-index  Index a GFF/BED/SAM/VCF file with Tabix.
diff --git a/docs/cli.rst b/docs/cli.rst
index dcc4aaa..e815334 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -18,38 +18,38 @@ For getting help on the fuc CLI:
    
    positional arguments:
      COMMAND
-       bam-aldepth  Compute allelic depth from a SAM/BAM/CRAM file.
+       bam-aldepth  Compute allelic depth from a BAM file.
        bam-depth    Compute per-base read depth from BAM files.
-       bam-head     Print the header of a SAM/BAM/CRAM file.
-       bam-index    Index a SAM/BAM/CRAM file.
-       bam-rename   Rename the sample in a SAM/BAM/CRAM file.
-       bam-slice    Slice a SAM/BAM/CRAM file.
+       bam-head     Print the header of a BAM file.
+       bam-index    Index a BAM file.
+       bam-rename   Rename the sample in a BAM file.
+       bam-slice    Slice a BAM file.
        bed-intxn    Find the intersection of BED files.
        bed-sum      Summarize a BED file.
        cov-concat   Concatenate depth of coverage files.
        cov-rename   Rename the samples in a depth of coverage file.
-       fa-filter    Filter sequence records in a FASTA file
+       fa-filter    Filter sequence records in a FASTA file.
        fq-count     Count sequence reads in FASTQ files.
        fq-sum       Summarize a FASTQ file.
        fuc-bgzip    Write a BGZF compressed file.
        fuc-compf    Compare the contents of two files.
        fuc-demux    Parse the Reports directory from bcl2fastq.
        fuc-exist    Check whether certain files exist.
-       fuc-find     Retrieve absolute paths of files whose name matches a 
+       fuc-find     Retrieve absolute paths of files whose name matches a
                     specified pattern, optionally recursively.
-       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from 
+       fuc-undetm   Compute top unknown barcodes using undertermined FASTQ from
                     bcl2fastq.
        maf-maf2vcf  Convert a MAF file to a VCF file.
        maf-oncoplt  Create an oncoplot with a MAF file.
        maf-sumplt   Create a summary plot with a MAF file.
        maf-vcf2maf  Convert a VCF file to a MAF file.
        ngs-bam2fq   Pipeline for converting BAM files to FASTQ files.
-       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM 
+       ngs-fq2bam   Pipeline for converting FASTQ files to analysis-ready BAM
                     files.
        ngs-hc       Pipeline for germline short variant discovery.
        ngs-m2       Pipeline for somatic short variant discovery.
        ngs-pon      Pipeline for constructing a panel of normals (PoN).
-       ngs-quant    Pipeline for running RNAseq quantification from FASTQ files 
+       ngs-quant    Pipeline for running RNAseq quantification from FASTQ files
                     with Kallisto.
        ngs-trim     Pipeline for trimming adapters from FASTQ files.
        tabix-index  Index a GFF/BED/SAM/VCF file with Tabix.
@@ -84,13 +84,13 @@ bam-aldepth
    $ fuc bam-aldepth -h
    usage: fuc bam-aldepth [-h] bam sites
    
-   Count allelic depth from a SAM/BAM/CRAM file.
+   Count allelic depth from a BAM file.
    
    Positional arguments:
-     bam         Alignment file.
-     sites       TSV file containing two columns, chromosome and position. 
-                 This can also be a BED or VCF file (compressed or 
-                 uncompressed) Input type will be detected automatically.
+     bam         Input alignment file.
+     sites       TSV file containing two columns, chromosome and position. This
+                 can also be a BED or VCF file (compressed or uncompressed). 
+                 Input type will be detected automatically.
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -153,10 +153,10 @@ bam-head
    $ fuc bam-head -h
    usage: fuc bam-head [-h] bam
    
-   Print the header of a SAM/BAM/CRAM file.
+   Print the header of a BAM file.
    
    Positional arguments:
-     bam         Alignment file.
+     bam         Input alignment file.
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -172,10 +172,10 @@ bam-index
    $ fuc bam-index -h
    usage: fuc bam-index [-h] bam
    
-   Index a SAM/BAM/CRAM file.
+   Index a BAM file.
    
    Positional arguments:
-     bam         Alignment file.
+     bam         Input alignment file.
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -191,10 +191,10 @@ bam-rename
    $ fuc bam-rename -h
    usage: fuc bam-rename [-h] bam name
    
-   Rename the sample in a SAM/BAM/CRAM file.
+   Rename the sample in a BAM file.
    
    Positional arguments:
-     bam         Alignment file.
+     bam         Input alignment file.
      name        New sample name.
    
    Optional arguments:
@@ -212,24 +212,24 @@ bam-slice
    usage: fuc bam-slice [-h] [--format TEXT] [--fasta PATH]
                         bam regions [regions ...]
    
-   Slice an alignment file (SAM/BAM/CRAM).
+   Slice a BAM file.
    
    Positional arguments:
-     bam            Input alignment file must be already indexed (.bai) to allow 
-                    random access. You can index an alignment file with the 
+     bam            Input alignment file must be already indexed (.bai) to allow
+                    random access. You can index an alignment file with the
                     bam-index command.
-     regions        One or more regions to be sliced. Each region must have the 
-                    format chrom:start-end and be a half-open interval with 
-                    (start, end]. This means, for example, chr1:100-103 will 
-                    extract positions 101, 102, and 103. Alternatively, you can 
-                    provide a BED file (compressed or uncompressed) to specify 
-                    regions. Note that the 'chr' prefix in contig names (e.g. 
-                    'chr1' vs. '1') will be automatically added or removed as 
+     regions        One or more regions to be sliced. Each region must have the
+                    format chrom:start-end and be a half-open interval with
+                    (start, end]. This means, for example, chr1:100-103 will
+                    extract positions 101, 102, and 103. Alternatively, you can
+                    provide a BED file (compressed or uncompressed) to specify
+                    regions. Note that the 'chr' prefix in contig names (e.g.
+                    'chr1' vs. '1') will be automatically added or removed as
                     necessary to match the input BED's contig names.
    
    Optional arguments:
      -h, --help     Show this help message and exit.
-     --format TEXT  Output format (default: 'BAM') (choices: 'SAM', 'BAM', 
+     --format TEXT  Output format (default: 'BAM') (choices: 'SAM', 'BAM',
                     'CRAM').
      --fasta PATH   FASTA file. Required when --format is 'CRAM'.
    
@@ -253,7 +253,7 @@ bed-intxn
    Find the intersection of BED files.
    
    Positional arguments:
-     bed         BED files.
+     bed         Input BED files.
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -279,7 +279,7 @@ bed-sum
    can, for example, use '--bases 1000' to display in kb.
    
    Positional arguments:
-     bed             BED file.
+     bed             Input BED file.
    
    Optional arguments:
      -h, --help      Show this help message and exit.
@@ -292,16 +292,16 @@ cov-concat
 .. code-block:: text
 
    $ fuc cov-concat -h
-   usage: fuc cov-concat [-h] [--axis INT] PATH [PATH ...]
+   usage: fuc cov-concat [-h] [--axis INT] tsv [tsv ...]
    
    Concatenate depth of coverage files.
    
    Positional arguments:
-     PATH        One or more TSV files.
+     tsv         Input TSV files.
    
    Optional arguments:
      -h, --help  Show this help message and exit.
-     --axis INT  The axis to concatenate along (default: 0) (choices: 
+     --axis INT  The axis to concatenate along (default: 0) (choices:
                  0, 1 where 0 is index and 1 is columns).
    
    [Example] Concatenate vertically:
@@ -337,9 +337,9 @@ cov-rename
      -h, --help       Show this help message and exit.
      --mode TEXT      Renaming mode (default: 'MAP') (choices: 'MAP', 
                       'INDEX', 'RANGE').
-     --range INT INT  Index range to use when renaming the samples. 
+     --range INT INT  Index range to use when renaming the samples.
                       Applicable only with the 'RANGE' mode.
-     --sep TEXT       Delimiter to use when reading the names file 
+     --sep TEXT       Delimiter to use when reading the names file
                       (default: '\t').
    
    [Example] Using the default 'MAP' mode:
@@ -362,13 +362,13 @@ fa-filter
    Filter sequence records in a FASTA file.
    
    Positional arguments:
-     fasta                 FASTA file (compressed or uncompressed).
+     fasta                 Input FASTA file (compressed or uncompressed).
    
    Optional arguments:
      -h, --help            Show this help message and exit.
      --contigs TEXT [TEXT ...]
-                           One or more contigs to be selected. Alternatively, you can 
-                           provide a file containing one contig per line. 
+                           One or more contigs to be selected. Alternatively, you can
+                           provide a file containing one contig per line.
      --exclude             Exclude specified contigs.
    
    [Example] Select certain contigs:
@@ -388,7 +388,7 @@ fq-count
    Count sequence reads in FASTQ files.
    
    Positional arguments:
-     fastq       FASTQ files (compressed or uncompressed) (default: stdin).
+     fastq       Input FASTQ files (compressed or uncompressed) (default: stdin).
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -414,7 +414,7 @@ fq-sum
    lengths, and the numbers of unique and duplicate sequences.
    
    Positional arguments:
-     fastq       FASTQ file (zipped or unqzipped).
+     fastq       Input FASTQ file (compressed or uncompressed).
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -439,7 +439,7 @@ fuc-bgzip
    formats (e.g. FASTA, FASTQ, GenBank, VCF, MAF).
    
    Positional arguments:
-     file        File to be compressed (default: stdin).
+     file        Input file to be compressed (default: stdin).
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -464,8 +464,8 @@ fuc-compf
    are identical and 'False' otherwise.
    
    Positional arguments:
-     left        Left file.
-     right       Right file.
+     left        Input left file.
+     right       Input right file.
    
    Optional arguments:
      -h, --help  Show this help message and exit.
@@ -497,12 +497,12 @@ fuc-demux
    reports.pdf files.
    
    Positional arguments:
-     reports       Reports directory.
+     reports       Input Reports directory.
      output        Output directory (will be created).
    
    Optional arguments:
      -h, --help    Show this help message and exit.
-     --sheet PATH  SampleSheet.csv file. Used for sorting and/or subsetting 
+     --sheet PATH  SampleSheet.csv file. Used for sorting and/or subsetting
                    samples.
 
 fuc-exist
@@ -656,7 +656,7 @@ maf-oncoplt
    determined by the output file's extension.
    
    Positional arguments:
-     maf                   MAF file.
+     maf                   Input MAF file.
      out                   Output image file.
    
    Optional arguments:
@@ -693,19 +693,19 @@ maf-sumplt
    determined by the output file's extension.
    
    Positional arguments:
-     maf                   MAF file.
+     maf                   Input MAF file.
      out                   Output image file.
    
    Optional arguments:
      -h, --help            Show this help message and exit.
      --figsize FLOAT FLOAT
-                           width, height in inches (default: [15, 10])
+                           Width, height in inches (default: [15, 10]).
      --title_fontsize FLOAT
-                           font size of subplot titles (default: 16)
+                           Font size of subplot titles (default: 16).
      --ticklabels_fontsize FLOAT
-                           font size of tick labels (default: 12)
+                           Font size of tick labels (default: 12).
      --legend_fontsize FLOAT
-                           font size of legend texts (default: 12)
+                           Font size of legend texts (default: 12).
    
    [Example] Output a PNG file:
      $ fuc maf-sumplt in.maf out.png
@@ -757,9 +757,9 @@ ngs-bam2fq
    Positional arguments:
      manifest      Sample manifest CSV file.
      output        Output directory.
-     qsub          SGE resoruce to request with qsub for BAM to FASTQ 
-                   conversion. Since this oppoeration supports multithreading, 
-                   it is recommended to speicfy a parallel environment (PE) 
+     qsub          SGE resoruce to request with qsub for BAM to FASTQ
+                   conversion. Since this oppoeration supports multithreading,
+                   it is recommended to speicfy a parallel environment (PE)
                    to speed up the process (also see --thread).
    
    Optional arguments:
@@ -815,7 +815,7 @@ ngs-fq2bam
      output           Output directory.
      qsub             SGE resoruce to request for qsub.
      java             Java resoruce to request for GATK.
-     vcf              One or more reference VCF files containing known variant 
+     vcf              One or more reference VCF files containing known variant
                       sites (e.g. 1000 Genomes Project).
    
    Optional arguments:
@@ -879,20 +879,20 @@ ngs-hc
      --bed PATH    BED file.
      --dbsnp PATH  VCF file from dbSNP.
      --thread INT  Number of threads to use (default: 1).
-     --batch INT   Batch size used for GenomicsDBImport (default: 0). This 
-                   controls the number of samples for which readers are 
-                   open at once and therefore provides a way to minimize 
-                   memory consumption. The size of 0 means no batching (i.e. 
+     --batch INT   Batch size used for GenomicsDBImport (default: 0). This
+                   controls the number of samples for which readers are
+                   open at once and therefore provides a way to minimize
+                   memory consumption. The size of 0 means no batching (i.e.
                    readers for all samples will be opened at once).
      --job TEXT    Job submission ID for SGE.
      --force       Overwrite the output directory if it already exists.
      --keep        Keep temporary files.
-     --posix       Set GenomicsDBImport to allow for optimizations to improve 
-                   the usability and performance for shared Posix Filesystems 
-                   (e.g. NFS, Lustre). If set, file level locking is disabled 
-                   and file system writes are minimized by keeping a higher 
-                   number of file descriptors open for longer periods of time. 
-                   Use with --batch if keeping a large number of file 
+     --posix       Set GenomicsDBImport to allow for optimizations to improve
+                   the usability and performance for shared Posix Filesystems
+                   (e.g. NFS, Lustre). If set, file level locking is disabled
+                   and file system writes are minimized by keeping a higher
+                   number of file descriptors open for longer periods of time.
+                   Use with --batch if keeping a large number of file
                    descriptors open is an issue.
    
    [Example] Specify queue:
@@ -1028,8 +1028,8 @@ ngs-quant
      --bootstrap INT  Number of bootstrap samples (default: 50).
      --job TEXT       Job submission ID for SGE.
      --force          Overwrite the output directory if it already exists.
-     --posix          Set the environment variable HDF5_USE_FILE_LOCKING=FALSE 
-                      before running Kallisto. This is required for shared Posix 
+     --posix          Set the environment variable HDF5_USE_FILE_LOCKING=FALSE
+                      before running Kallisto. This is required for shared Posix
                       Filesystems (e.g. NFS, Lustre).
    
    [Example] Specify queue:
@@ -1154,12 +1154,12 @@ tbl-merge
    method's documentation page.
    
    Positional arguments:
-     left                  Left file.
-     right                 Right file.
+     left                  Input left file.
+     right                 Input right file.
    
    Optional arguments:
      -h, --help            Show this help message and exit.
-     --how TEXT            Type of merge to be performed (default: 'inner') 
+     --how TEXT            Type of merge to be performed (default: 'inner')
                            (choices: 'left', 'right', 'outer', 'inner', 'cross').
      --on TEXT [TEXT ...]  Column names to join on.
      --lsep TEXT           Delimiter to use for the left file (default: '\t').
@@ -1194,27 +1194,27 @@ tbl-sum
    Optional arguments:
      -h, --help            Show this help message and exit.
      --sep TEXT            Delimiter to use (default: '\t').
-     --skiprows TEXT       Comma-separated line numbers to skip (0-indexed) or 
-                           number of lines to skip at the start of the file 
-                           (e.g. `--skiprows 1,` will skip the second line, 
-                           `--skiprows 2,4` will skip the third and fifth lines, 
+     --skiprows TEXT       Comma-separated line numbers to skip (0-indexed) or
+                           number of lines to skip at the start of the file
+                           (e.g. `--skiprows 1,` will skip the second line,
+                           `--skiprows 2,4` will skip the third and fifth lines,
                            and `--skiprows 10` will skip the first 10 lines).
      --na_values TEXT [TEXT ...]
-                           Additional strings to recognize as NA/NaN (by 
-                           default, the following values are interpreted 
-                           as NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', 
-                           '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', 
-                           '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan', 
+                           Additional strings to recognize as NA/NaN (by
+                           default, the following values are interpreted
+                           as NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND',
+                           '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN',
+                           '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan',
                            'null').
-     --keep_default_na     Whether or not to include the default NaN values when 
+     --keep_default_na     Whether or not to include the default NaN values when
                            parsing the data (see 'pandas.read_table' for details).
-     --expr TEXT           Query the columns of a pandas.DataFrame with a 
+     --expr TEXT           Query the columns of a pandas.DataFrame with a
                            boolean expression (e.g. `--query "A == 'yes'"`).
      --columns TEXT [TEXT ...]
-                           Columns to be summarized (by default, all columns 
+                           Columns to be summarized (by default, all columns
                            will be included).
-     --dtypes PATH         File of column names and their data types (either 
-                           'categorical' or 'numeric'); one tab-delimited pair of 
+     --dtypes PATH         File of column names and their data types (either
+                           'categorical' or 'numeric'); one tab-delimited pair of
                            column name and data type per line.
    
    [Example] Summarize a table:
@@ -1287,17 +1287,17 @@ vcf-filter
    Optional arguments:
      -h, --help            Show this help message and exit.
      --expr TEXT           Expression to evaluate.
-     --samples PATH        File of sample names to apply the marking (one 
+     --samples PATH        File of sample names to apply the marking (one
                            sample per line).
      --drop_duplicates [TEXT ...]
-                           Only consider certain columns for identifying 
+                           Only consider certain columns for identifying
                            duplicates, by default use all of the columns.
-     --greedy              Use this flag to mark even ambiguous genotypes 
+     --greedy              Use this flag to mark even ambiguous genotypes
                            as missing.
-     --opposite            Use this flag to mark all genotypes that do not 
-                           satisfy the query expression as missing and leave 
+     --opposite            Use this flag to mark all genotypes that do not
+                           satisfy the query expression as missing and leave
                            those that do intact.
-     --filter_empty        Use this flag to remove rows with no genotype 
+     --filter_empty        Use this flag to remove rows with no genotype
                            calls at all.
    
    [Example] Mark genotypes with 0/0 as missing:
@@ -1338,8 +1338,8 @@ vcf-index
    This command will create an index file (.tbi) for the input VCF.
    
    Positional arguments:
-     vcf         Input VCF file to be indexed. When an uncompressed file is 
-                 given, the command will automatically create a BGZF 
+     vcf         Input VCF file to be indexed. When an uncompressed file is
+                 given, the command will automatically create a BGZF
                  compressed copy of the file (.gz) before indexing.
    
    Optional arguments:
@@ -1365,19 +1365,19 @@ vcf-merge
    
    Positional arguments:
      vcf_files      VCF files (compressed or uncompressed). Note that the 'chr'
-                    prefix in contig names (e.g. 'chr1' vs. '1') will be 
-                    automatically added or removed as necessary to match the 
+                    prefix in contig names (e.g. 'chr1' vs. '1') will be
+                    automatically added or removed as necessary to match the
                     contig names of the first VCF.
    
    Optional arguments:
      -h, --help     Show this help message and exit.
-     --how TEXT     Type of merge as defined in pandas.DataFrame.merge 
+     --how TEXT     Type of merge as defined in pandas.DataFrame.merge
                     (default: 'inner').
-     --format TEXT  FORMAT subfields to be retained (e.g. 'GT:AD:DP') 
+     --format TEXT  FORMAT subfields to be retained (e.g. 'GT:AD:DP')
                     (default: 'GT').
-     --sort         Use this flag to turn off sorting of records 
+     --sort         Use this flag to turn off sorting of records
                     (default: True).
-     --collapse     Use this flag to collapse duplicate records 
+     --collapse     Use this flag to collapse duplicate records
                     (default: False).
    
    [Example] Merge multiple VCF files:
@@ -1411,9 +1411,9 @@ vcf-rename
    
    Optional arguments:
      -h, --help       Show this help message and exit.
-     --mode TEXT      Renaming mode (default: 'MAP') (choices: 'MAP', 
+     --mode TEXT      Renaming mode (default: 'MAP') (choices: 'MAP',
                       'INDEX', 'RANGE').
-     --range INT INT  Index range to use when renaming the samples. 
+     --range INT INT  Index range to use when renaming the samples.
                       Applicable only with the 'RANGE' mode.
      --sep TEXT       Delimiter to use for reading the 'names' file 
                       (default: '\t').
@@ -1438,17 +1438,17 @@ vcf-slice
    Slice a VCF file for specified regions.
    
    Positional arguments:
-     vcf         Input VCF file must be already BGZF compressed (.gz) and 
-                 indexed (.tbi) to allow random access. A VCF file can be 
-                 compressed with the fuc-bgzip command and indexed with the 
+     vcf         Input VCF file must be already BGZF compressed (.gz) and
+                 indexed (.tbi) to allow random access. A VCF file can be
+                 compressed with the fuc-bgzip command and indexed with the
                  vcf-index command.
-     regions     One or more regions to be sliced. Each region must have the 
-                 format chrom:start-end and be a half-open interval with 
-                 (start, end]. This means, for example, chr1:100-103 will 
-                 extract positions 101, 102, and 103. Alternatively, you can 
-                 provide a BED file (compressed or uncompressed) to specify 
-                 regions. Note that the 'chr' prefix in contig names (e.g. 
-                 'chr1' vs. '1') will be automatically added or removed as 
+     regions     One or more regions to be sliced. Each region must have the
+                 format chrom:start-end and be a half-open interval with
+                 (start, end]. This means, for example, chr1:100-103 will
+                 extract positions 101, 102, and 103. Alternatively, you can
+                 provide a BED file (compressed or uncompressed) to specify
+                 regions. Note that the 'chr' prefix in contig names (e.g.
+                 'chr1' vs. '1') will be automatically added or removed as
                  necessary to match the input VCF's contig names.
    
    Optional arguments:
@@ -1479,8 +1479,8 @@ vcf-split
    
    Optional arguments:
      -h, --help  Show this help message and exit.
-     --clean     By default, the command will only return variants present in 
-                 each individual. Use the tag to stop this behavior and make 
+     --clean     By default, the command will only return variants present in
+                 each individual. Use the tag to stop this behavior and make
                  sure that all individuals have the same number of variants.
      --force     Overwrite the output directory if it already exists.
    
@@ -1522,7 +1522,7 @@ vcf-vep
    
    Optional arguments:
      -h, --help  Show this help message and exit.
-     --opposite  Use this flag to return only records that don't 
+     --opposite  Use this flag to return only records that don't
                  meet the said criteria.
      --as_zero   Use this flag to treat missing values as zero instead of NaN.
    
diff --git a/docs/create.py b/docs/create.py
index a036d7a..27d9cf9 100644
--- a/docs/create.py
+++ b/docs/create.py
@@ -74,10 +74,10 @@
 
 Additionally, fuc can be used to parse output data from the following programs:
 
-- Ensembl Variant Effect Predictor (VEP)
-- SnpEff
-- bcl2fastq and bcl2fastq2
-- Kallisto
+- `Ensembl Variant Effect Predictor (VEP) <https://asia.ensembl.org/info/docs/tools/vep/index.html>`__
+- `SnpEff <http://pcingola.github.io/SnpEff/>`__
+- `bcl2fastq and bcl2fastq2 <https://sapac.support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html>`__
+- `Kallisto <https://pachterlab.github.io/kallisto/>`__
 
 Your contributions (e.g. feature ideas, pull requests) are most welcome.
 
diff --git a/fuc/cli/bam_aldepth.py b/fuc/cli/bam_aldepth.py
index 36c09a2..e715d62 100644
--- a/fuc/cli/bam_aldepth.py
+++ b/fuc/cli/bam_aldepth.py
@@ -3,7 +3,7 @@
 from .. import api
 
 description = """
-Count allelic depth from a SAM/BAM/CRAM file.
+Count allelic depth from a BAM file.
 """
 
 epilog = f"""
@@ -20,17 +20,20 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Compute allelic depth from a SAM/BAM/CRAM file.',
+        help=
+"""Compute allelic depth from a BAM file."""
     )
     parser.add_argument(
         'bam',
-        help='Alignment file.'
+        help=
+"""Input alignment file."""
     )
     parser.add_argument(
         'sites',
-        help='TSV file containing two columns, chromosome and position. \n'
-             'This can also be a BED or VCF file (compressed or \n'
-             'uncompressed) Input type will be detected automatically.'
+        help=
+"""TSV file containing two columns, chromosome and position. This
+can also be a BED or VCF file (compressed or uncompressed). 
+Input type will be detected automatically."""
     )
 
 def main(args):
diff --git a/fuc/cli/bam_head.py b/fuc/cli/bam_head.py
index ca3572b..385a4da 100644
--- a/fuc/cli/bam_head.py
+++ b/fuc/cli/bam_head.py
@@ -5,7 +5,7 @@
 import pysam
 
 description = """
-Print the header of a SAM/BAM/CRAM file.
+Print the header of a BAM file.
 """
 
 epilog = f"""
@@ -19,11 +19,13 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Print the header of a SAM/BAM/CRAM file.',
+        help=
+"""Print the header of a BAM file."""
     )
     parser.add_argument(
         'bam',
-        help='Alignment file.'
+        help=
+"""Input alignment file."""
     )
 
 def main(args):
diff --git a/fuc/cli/bam_index.py b/fuc/cli/bam_index.py
index cdb443d..2c88dc1 100644
--- a/fuc/cli/bam_index.py
+++ b/fuc/cli/bam_index.py
@@ -3,7 +3,7 @@
 import pysam
 
 description = """
-Index a SAM/BAM/CRAM file.
+Index a BAM file.
 """
 
 epilog = f"""
@@ -17,11 +17,13 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Index a SAM/BAM/CRAM file.',
+        help=
+"""Index a BAM file."""
     )
     parser.add_argument(
         'bam',
-        help='Alignment file.'
+        help=
+"""Input alignment file."""
     )
 
 def main(args):
diff --git a/fuc/cli/bam_rename.py b/fuc/cli/bam_rename.py
index 0ed6d79..e0aa141 100644
--- a/fuc/cli/bam_rename.py
+++ b/fuc/cli/bam_rename.py
@@ -6,7 +6,7 @@
 import pysam
 
 description = """
-Rename the sample in a SAM/BAM/CRAM file.
+Rename the sample in a BAM file.
 """
 
 epilog = f"""
@@ -20,10 +20,19 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Rename the sample in a SAM/BAM/CRAM file.',
+        help=
+"""Rename the sample in a BAM file."""
+    )
+    parser.add_argument(
+        'bam',
+        help=
+"""Input alignment file."""
+    )
+    parser.add_argument(
+        'name',
+        help=
+"""New sample name."""
     )
-    parser.add_argument('bam', help='Alignment file.')
-    parser.add_argument('name', help='New sample name.')
 
 def main(args):
     # Detect the input format.
diff --git a/fuc/cli/bam_slice.py b/fuc/cli/bam_slice.py
index 91324df..3f2816f 100644
--- a/fuc/cli/bam_slice.py
+++ b/fuc/cli/bam_slice.py
@@ -5,7 +5,7 @@
 import pysam
 
 description = """
-Slice an alignment file (SAM/BAM/CRAM).
+Slice a BAM file.
 """
 
 epilog = f"""
@@ -25,38 +25,43 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Slice a SAM/BAM/CRAM file.',
+        help=
+"""Slice a BAM file."""
     )
     parser.add_argument(
         'bam',
-        help="Input alignment file must be already indexed (.bai) to allow \n"
-             "random access. You can index an alignment file with the \n"
-             "bam-index command."
+        help=
+"""Input alignment file must be already indexed (.bai) to allow
+random access. You can index an alignment file with the
+bam-index command."""
     )
     parser.add_argument(
         'regions',
         nargs='+',
-        help="One or more regions to be sliced. Each region must have the \n"
-             "format chrom:start-end and be a half-open interval with \n"
-             "(start, end]. This means, for example, chr1:100-103 will \n"
-             "extract positions 101, 102, and 103. Alternatively, you can \n"
-             "provide a BED file (compressed or uncompressed) to specify \n"
-             "regions. Note that the 'chr' prefix in contig names (e.g. \n"
-             "'chr1' vs. '1') will be automatically added or removed as \n"
-             "necessary to match the input BED's contig names."
+        help=
+"""One or more regions to be sliced. Each region must have the
+format chrom:start-end and be a half-open interval with
+(start, end]. This means, for example, chr1:100-103 will
+extract positions 101, 102, and 103. Alternatively, you can
+provide a BED file (compressed or uncompressed) to specify
+regions. Note that the 'chr' prefix in contig names (e.g.
+'chr1' vs. '1') will be automatically added or removed as
+necessary to match the input BED's contig names."""
     )
     parser.add_argument(
         '--format',
         metavar='TEXT',
         default='BAM',
         choices=['SAM', 'BAM', 'CRAM'],
-        help="Output format (default: 'BAM') (choices: 'SAM', 'BAM', \n"
-             "'CRAM')."
+        help=
+"""Output format (default: 'BAM') (choices: 'SAM', 'BAM',
+'CRAM')."""
     )
     parser.add_argument(
         '--fasta',
         metavar='PATH',
-        help="FASTA file. Required when --format is 'CRAM'."
+        help=
+"""FASTA file. Required when --format is 'CRAM'."""
     )
 
 def main(args):
diff --git a/fuc/cli/bed_intxn.py b/fuc/cli/bed_intxn.py
index 6c78804..d673eb4 100644
--- a/fuc/cli/bed_intxn.py
+++ b/fuc/cli/bed_intxn.py
@@ -17,9 +17,15 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Find the intersection of BED files.',
+        help=
+"""Find the intersection of BED files."""
+    )
+    parser.add_argument(
+        'bed',
+        nargs='+',
+        help=
+"""Input BED files."""
     )
-    parser.add_argument('bed', help='BED files.', nargs='+')
 
 def main(args):
     bfs = [api.pybed.BedFrame.from_file(x) for x in args.bed]
diff --git a/fuc/cli/bed_sum.py b/fuc/cli/bed_sum.py
index b72844d..d429baf 100644
--- a/fuc/cli/bed_sum.py
+++ b/fuc/cli/bed_sum.py
@@ -16,25 +16,29 @@ def create_parser(subparsers):
         subparsers,
         api.common._script_name(),
         description=description,
-        help='Summarize a BED file.',
+        help=
+"""Summarize a BED file."""
     )
     parser.add_argument(
         'bed',
-        help='BED file.'
+        help=
+"""Input BED file."""
     )
     parser.add_argument(
         '--bases',
         metavar='INT',
         type=int,
         default=1,
-        help='Number to divide covered base pairs (default: 1).'
+        help=
+"""Number to divide covered base pairs (default: 1)."""
     )
     parser.add_argument(
         '--decimals',
         metavar='INT',
         type=int,
         default=0,
-        help='Number of decimals (default: 0).'
+        help=
+"""Number of decimals (default: 0)."""
     )
 
 def main(args):
diff --git a/fuc/cli/cov_concat.py b/fuc/cli/cov_concat.py
index fdae26c..69a4bea 100644
--- a/fuc/cli/cov_concat.py
+++ b/fuc/cli/cov_concat.py
@@ -22,21 +22,23 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Concatenate depth of coverage files.',
+        help=
+"""Concatenate depth of coverage files."""
     )
     parser.add_argument(
         'tsv',
-        metavar='PATH',
         nargs='+',
-        help='One or more TSV files.'
+        help=
+"""Input TSV files."""
     )
     parser.add_argument(
         '--axis',
         metavar='INT',
         default=0,
         type=int,
-        help='The axis to concatenate along (default: 0) (choices: \n'
-             '0, 1 where 0 is index and 1 is columns).'
+        help=
+"""The axis to concatenate along (default: 0) (choices:
+0, 1 where 0 is index and 1 is columns)."""
     )
 
 def main(args):
diff --git a/fuc/cli/cov_rename.py b/fuc/cli/cov_rename.py
index 9498ec5..56ed8e2 100644
--- a/fuc/cli/cov_rename.py
+++ b/fuc/cli/cov_rename.py
@@ -33,15 +33,18 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Rename the samples in a depth of coverage file.',
+        help=
+"""Rename the samples in a depth of coverage file."""
     )
     parser.add_argument(
         'tsv',
-        help='TSV file (compressed or uncompressed).'
+        help=
+"""TSV file (compressed or uncompressed)."""
     )
     parser.add_argument(
         'names',
-        help='Text file containing information for renaming the samples.'
+        help=
+"""Text file containing information for renaming the samples."""
     )
     parser.add_argument(
         '--mode',
@@ -56,15 +59,17 @@ def create_parser(subparsers):
         metavar='INT',
         type=int,
         nargs=2,
-        help="Index range to use when renaming the samples. \n"
-             "Applicable only with the 'RANGE' mode."
+        help=
+"""Index range to use when renaming the samples.
+Applicable only with the 'RANGE' mode."""
     )
     parser.add_argument(
         '--sep',
         metavar='TEXT',
         default='\t',
-        help="Delimiter to use when reading the names file \n"
-             "(default: '\\t')."
+        help=
+"""Delimiter to use when reading the names file
+(default: '\\t')."""
     )
 
 def main(args):
diff --git a/fuc/cli/fa_filter.py b/fuc/cli/fa_filter.py
index 83fcae0..dbe7316 100644
--- a/fuc/cli/fa_filter.py
+++ b/fuc/cli/fa_filter.py
@@ -24,23 +24,27 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Filter sequence records in a FASTA file',
+        help=
+"""Filter sequence records in a FASTA file."""
     )
     parser.add_argument(
         'fasta',
-        help='FASTA file (compressed or uncompressed).'
+        help=
+"""Input FASTA file (compressed or uncompressed)."""
     )
     parser.add_argument(
         '--contigs',
         metavar='TEXT',
         nargs='+',
-        help='One or more contigs to be selected. Alternatively, you can \n'
-             'provide a file containing one contig per line. '
+        help=
+"""One or more contigs to be selected. Alternatively, you can
+provide a file containing one contig per line."""
     )
     parser.add_argument(
         '--exclude',
         action='store_true',
-        help='Exclude specified contigs.'
+        help=
+"""Exclude specified contigs."""
     )
 
 def main(args):
diff --git a/fuc/cli/fq_count.py b/fuc/cli/fq_count.py
index df4155f..d22856f 100644
--- a/fuc/cli/fq_count.py
+++ b/fuc/cli/fq_count.py
@@ -21,12 +21,14 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Count sequence reads in FASTQ files.',
+        help=
+"""Count sequence reads in FASTQ files."""
     )
     parser.add_argument(
         'fastq',
         nargs='*',
-        help='FASTQ files (compressed or uncompressed) (default: stdin).'
+        help=
+"""Input FASTQ files (compressed or uncompressed) (default: stdin)."""
     )
 
 def main(args):
diff --git a/fuc/cli/fq_sum.py b/fuc/cli/fq_sum.py
index 70c519c..50bdb33 100644
--- a/fuc/cli/fq_sum.py
+++ b/fuc/cli/fq_sum.py
@@ -19,11 +19,13 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Summarize a FASTQ file.',
+        help=
+"""Summarize a FASTQ file."""
     )
     parser.add_argument(
         'fastq',
-        help='FASTQ file (zipped or unqzipped).'
+        help=
+"""Input FASTQ file (compressed or uncompressed)."""
     )
 
 def main(args):
diff --git a/fuc/cli/fuc_bgzip.py b/fuc/cli/fuc_bgzip.py
index b3a681b..6ef96ac 100644
--- a/fuc/cli/fuc_bgzip.py
+++ b/fuc/cli/fuc_bgzip.py
@@ -28,12 +28,14 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Write a BGZF compressed file.',
+        help=
+"""Write a BGZF compressed file."""
     )
     parser.add_argument(
         'file',
         nargs='*',
-        help='File to be compressed (default: stdin).'
+        help=
+"""Input file to be compressed (default: stdin)."""
     )
 
 def main(args):
diff --git a/fuc/cli/fuc_compf.py b/fuc/cli/fuc_compf.py
index 8762b77..31c266a 100644
--- a/fuc/cli/fuc_compf.py
+++ b/fuc/cli/fuc_compf.py
@@ -20,10 +20,19 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Compare the contents of two files.',
+        help=
+"""Compare the contents of two files."""
+    )
+    parser.add_argument(
+        'left',
+        help=
+"""Input left file."""
+    )
+    parser.add_argument(
+        'right',
+        help=
+"""Input right file."""
     )
-    parser.add_argument('left', help='Left file.')
-    parser.add_argument('right', help='Right file.')
 
 def main(args):
     print(filecmp.cmp(args.left, args.right))
diff --git a/fuc/cli/fuc_demux.py b/fuc/cli/fuc_demux.py
index 0b92c6d..aed7dd8 100644
--- a/fuc/cli/fuc_demux.py
+++ b/fuc/cli/fuc_demux.py
@@ -36,22 +36,26 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='Parse the Reports directory from bcl2fastq.',
         description=description,
+        help=
+"""Parse the Reports directory from bcl2fastq."""
     )
     parser.add_argument(
         'reports',
-        help='Reports directory.'
+        help=
+"""Input Reports directory."""
     )
     parser.add_argument(
         'output',
-        help='Output directory (will be created).'
+        help=
+"""Output directory (will be created)."""
     )
     parser.add_argument(
         '--sheet',
         metavar='PATH',
-        help='SampleSheet.csv file. Used for sorting and/or subsetting \n'
-             'samples.'
+        help=
+"""SampleSheet.csv file. Used for sorting and/or subsetting
+samples."""
     )
 
 def main(args):
diff --git a/fuc/cli/fuc_exist.py b/fuc/cli/fuc_exist.py
index 4e6a1af..b54d4f5 100644
--- a/fuc/cli/fuc_exist.py
+++ b/fuc/cli/fuc_exist.py
@@ -27,10 +27,15 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Check whether certain files exist.',
+        help=
+"""Check whether certain files exist."""
+    )
+    parser.add_argument(
+        'files',
+        nargs='*',
+        help=
+"""Files and directories to be tested (default: stdin)."""
     )
-    parser.add_argument('files', nargs='*',
-        help='Files and directories to be tested (default: stdin).')
 
 def main(args):
     if args.files:
diff --git a/fuc/cli/fuc_find.py b/fuc/cli/fuc_find.py
index 39e2164..9659b92 100644
--- a/fuc/cli/fuc_find.py
+++ b/fuc/cli/fuc_find.py
@@ -25,25 +25,29 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Retrieve absolute paths of files whose name matches a \n'
-             'specified pattern, optionally recursively.',
+        help=
+"""Retrieve absolute paths of files whose name matches a
+specified pattern, optionally recursively."""
     )
     parser.add_argument(
         'pattern',
-        help='Filename pattern.'
+        help=
+"""Filename pattern."""
     )
     parser.add_argument(
         '-r',
         '--recursive',
         action='store_true',
-        help='Turn on recursive retrieving.'
+        help=
+"""Turn on recursive retrieving."""
     )
     parser.add_argument(
         '-d',
         '--directory',
         metavar='PATH',
         default=os.getcwd(),
-        help='Directory to search in (default: current directory).'
+        help=
+"""Directory to search in (default: current directory)."""
     )
 
 def main(args):
diff --git a/fuc/cli/fuc_undetm.py b/fuc/cli/fuc_undetm.py
index 30d10bf..4e71440 100644
--- a/fuc/cli/fuc_undetm.py
+++ b/fuc/cli/fuc_undetm.py
@@ -20,19 +20,22 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Compute top unknown barcodes using undertermined FASTQ from \n'
-             'bcl2fastq.'
+        help=
+"""Compute top unknown barcodes using undertermined FASTQ from
+bcl2fastq."""
     )
     parser.add_argument(
         'fastq',
-        help='Undertermined FASTQ (compressed or uncompressed).'
+        help=
+"""Undertermined FASTQ (compressed or uncompressed)."""
     )
     parser.add_argument(
         '--count',
         metavar='INT',
         type=int,
         default=30,
-        help='Number of top unknown barcodes to return (default: 30).'
+        help=
+"""Number of top unknown barcodes to return (default: 30)."""
     )
 
 def main(args):
diff --git a/fuc/cli/maf_maf2vcf.py b/fuc/cli/maf_maf2vcf.py
index 7286135..a413720 100644
--- a/fuc/cli/maf_maf2vcf.py
+++ b/fuc/cli/maf_maf2vcf.py
@@ -44,33 +44,39 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Convert a MAF file to a VCF file.',
+        help=
+"""Convert a MAF file to a VCF file."""
     )
     parser.add_argument(
         'maf',
-        help='MAF file (compressed or uncompressed).'
+        help=
+"""MAF file (compressed or uncompressed)."""
     )
     parser.add_argument(
         '--fasta',
         metavar='PATH',
-        help='FASTA file (required to include INDELs in the output).'
+        help=
+"""FASTA file (required to include INDELs in the output)."""
     )
     parser.add_argument(
         '--ignore_indels',
         action='store_true',
-        help='Use this flag to exclude INDELs from the output.'
+        help=
+"""Use this flag to exclude INDELs from the output."""
     )
     parser.add_argument(
         '--cols',
         metavar='TEXT',
         nargs='+',
-        help='Column(s) in the MAF file.'
+        help=
+"""Column(s) in the MAF file."""
     )
     parser.add_argument(
         '--names',
         metavar='TEXT',
         nargs='+',
-        help='Name(s) to be displayed in the FORMAT field.'
+        help=
+"""Name(s) to be displayed in the FORMAT field."""
     )
 
 def main(args):
diff --git a/fuc/cli/maf_oncoplt.py b/fuc/cli/maf_oncoplt.py
index 15b786c..458e420 100644
--- a/fuc/cli/maf_oncoplt.py
+++ b/fuc/cli/maf_oncoplt.py
@@ -23,22 +23,26 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Create an oncoplot with a MAF file.',
+        help=
+"""Create an oncoplot with a MAF file."""
     )
     parser.add_argument(
         'maf',
-        help='MAF file.'
+        help=
+"""Input MAF file."""
     )
     parser.add_argument(
         'out',
-        help='Output image file.'
+        help=
+"""Output image file."""
     )
     parser.add_argument(
         '--count',
         metavar='INT',
         type=int,
         default=10,
-        help='Number of top mutated genes to display (default: 10).'
+        help=
+"""Number of top mutated genes to display (default: 10)."""
     )
     parser.add_argument(
         '--figsize',
@@ -46,28 +50,32 @@ def create_parser(subparsers):
         type=float,
         default=[15, 10],
         nargs=2,
-        help='Width, height in inches (default: [15, 10]).'
+        help=
+"""Width, height in inches (default: [15, 10])."""
     )
     parser.add_argument(
         '--label_fontsize',
         metavar='FLOAT',
         type=float,
         default=15,
-        help='Font size of labels (default: 15).'
+        help=
+"""Font size of labels (default: 15)."""
     )
     parser.add_argument(
         '--ticklabels_fontsize',
         metavar='FLOAT',
         type=float,
         default=15,
-        help='Font size of tick labels (default: 15).'
+        help=
+"""Font size of tick labels (default: 15)."""
     )
     parser.add_argument(
         '--legend_fontsize',
         metavar='FLOAT',
         type=float,
         default=15,
-        help='Font size of legend texts (default: 15).'
+        help=
+"""Font size of legend texts (default: 15)."""
     )
 
 def main(args):
diff --git a/fuc/cli/maf_sumplt.py b/fuc/cli/maf_sumplt.py
index 87ecad1..03f6e85 100644
--- a/fuc/cli/maf_sumplt.py
+++ b/fuc/cli/maf_sumplt.py
@@ -23,15 +23,18 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Create a summary plot with a MAF file.',
+        help=
+"""Create a summary plot with a MAF file."""
     )
     parser.add_argument(
         'maf',
-        help='MAF file.'
+        help=
+"""Input MAF file."""
     )
     parser.add_argument(
         'out',
-        help='Output image file.'
+        help=
+"""Output image file."""
     )
     parser.add_argument(
         '--figsize',
@@ -39,28 +42,32 @@ def create_parser(subparsers):
         type=float,
         default=[15, 10],
         nargs=2,
-        help='width, height in inches (default: [15, 10])'
+        help=
+"""Width, height in inches (default: [15, 10])."""
     )
     parser.add_argument(
         '--title_fontsize',
         metavar='FLOAT',
         type=float,
         default=16,
-        help='font size of subplot titles (default: 16)'
+        help=
+"""Font size of subplot titles (default: 16)."""
     )
     parser.add_argument(
         '--ticklabels_fontsize',
         metavar='FLOAT',
         type=float,
         default=12,
-        help='font size of tick labels (default: 12)'
+        help=
+"""Font size of tick labels (default: 12)."""
     )
     parser.add_argument(
         '--legend_fontsize',
         metavar='FLOAT',
         type=float,
         default=12,
-        help='font size of legend texts (default: 12)'
+        help=
+"""Font size of legend texts (default: 12)."""
     )
 
 def main(args):
diff --git a/fuc/cli/maf_vcf2maf.py b/fuc/cli/maf_vcf2maf.py
index 6fc653a..5846652 100644
--- a/fuc/cli/maf_vcf2maf.py
+++ b/fuc/cli/maf_vcf2maf.py
@@ -17,9 +17,14 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Convert a VCF file to a MAF file.',
+        help=
+"""Convert a VCF file to a MAF file."""
+    )
+    parser.add_argument(
+        'vcf',
+        help=
+"""Annotated VCF file."""
     )
-    parser.add_argument('vcf', help='Annotated VCF file.')
 
 def main(args):
     mf = api.pymaf.MafFrame.from_vcf(args.vcf)
diff --git a/fuc/cli/ngs_bam2fq.py b/fuc/cli/ngs_bam2fq.py
index 3e2bcb8..6dd92d1 100644
--- a/fuc/cli/ngs_bam2fq.py
+++ b/fuc/cli/ngs_bam2fq.py
@@ -44,36 +44,42 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Pipeline for converting BAM files to FASTQ files.',
+        help=
+"""Pipeline for converting BAM files to FASTQ files."""
     )
     parser.add_argument(
         'manifest',
-        help='Sample manifest CSV file.'
+        help=
+"""Sample manifest CSV file."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         'qsub',
         type=str,
-        help="SGE resoruce to request with qsub for BAM to FASTQ \n"
-             "conversion. Since this oppoeration supports multithreading, \n"
-             "it is recommended to speicfy a parallel environment (PE) \n"
-             "to speed up the process (also see --thread)."
+        help=
+"""SGE resoruce to request with qsub for BAM to FASTQ
+conversion. Since this oppoeration supports multithreading,
+it is recommended to speicfy a parallel environment (PE)
+to speed up the process (also see --thread)."""
     )
     parser.add_argument(
         '--thread',
         metavar='INT',
         type=int,
         default=1,
-        help='Number of threads to use (default: 1).'
+        help=
+"""Number of threads to use (default: 1)."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
 
 def main(args):
diff --git a/fuc/cli/ngs_fq2bam.py b/fuc/cli/ngs_fq2bam.py
index ffdd5f4..0955113 100644
--- a/fuc/cli/ngs_fq2bam.py
+++ b/fuc/cli/ngs_fq2bam.py
@@ -54,73 +54,86 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Pipeline for converting FASTQ files to analysis-ready BAM \n'
-             'files.'
+        help=
+"""Pipeline for converting FASTQ files to analysis-ready BAM
+files."""
     )
     parser.add_argument(
         'manifest',
-        help='Sample manifest CSV file.'
+        help=
+"""Sample manifest CSV file."""
     )
     parser.add_argument(
         'fasta',
-        help='Reference FASTA file.'
+        help=
+"""Reference FASTA file."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         'qsub',
         type=str,
-        help='SGE resoruce to request for qsub.'
+        help=
+"""SGE resoruce to request for qsub."""
     )
     parser.add_argument(
         'java',
-        help='Java resoruce to request for GATK.'
+        help=
+"""Java resoruce to request for GATK."""
     )
     parser.add_argument(
         'vcf',
         type=str,
         nargs='+',
-        help='One or more reference VCF files containing known variant \n'
-             'sites (e.g. 1000 Genomes Project).'
+        help=
+"""One or more reference VCF files containing known variant
+sites (e.g. 1000 Genomes Project)."""
     )
     parser.add_argument(
         '--bed',
         metavar='PATH',
         type=str,
-        help='BED file.'
+        help=
+"""BED file."""
     )
     parser.add_argument(
         '--thread',
         metavar='INT',
         type=int,
         default=1,
-        help='Number of threads to use (default: 1).'
+        help=
+"""Number of threads to use (default: 1)."""
     )
     parser.add_argument(
         '--platform',
         metavar='TEXT',
         type=str,
         default='Illumina',
-        help="Sequencing platform (default: 'Illumina')."
+        help=
+"""Sequencing platform (default: 'Illumina')."""
     )
     parser.add_argument(
         '--job',
         metavar='TEXT',
         type=str,
-        help='Job submission ID for SGE.'
+        help=
+"""Job submission ID for SGE."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
     parser.add_argument(
         '--keep',
         action='store_true',
-        help='Keep temporary files.'
+        help=
+"""Keep temporary files."""
     )
 
 def main(args):
diff --git a/fuc/cli/ngs_hc.py b/fuc/cli/ngs_hc.py
index b39e29d..3d437e1 100644
--- a/fuc/cli/ngs_hc.py
+++ b/fuc/cli/ngs_hc.py
@@ -45,92 +45,107 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Pipeline for germline short variant discovery.',
+        help=
+"""Pipeline for germline short variant discovery."""
     )
     parser.add_argument(
         'manifest',
-        help='Sample manifest CSV file.'
+        help=
+"""Sample manifest CSV file."""
     )
     parser.add_argument(
         'fasta',
-        help='Reference FASTA file.'
+        help=
+"""Reference FASTA file."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         'qsub',
         type=str,
-        help='SGE resoruce to request for qsub.'
+        help=
+"""SGE resoruce to request for qsub."""
     )
     parser.add_argument(
         'java1',
         type=str,
-        help='Java resoruce to request for single-sample variant calling.'
+        help=
+"""Java resoruce to request for single-sample variant calling."""
     )
     parser.add_argument(
         'java2',
         type=str,
-        help='Java resoruce to request for joint variant calling.'
+        help=
+"""Java resoruce to request for joint variant calling."""
     )
     parser.add_argument(
         '--bed',
         metavar='PATH',
         type=str,
-        help='BED file.'
+        help=
+"""BED file."""
     )
     parser.add_argument(
         '--dbsnp',
         metavar='PATH',
         type=str,
-        help='VCF file from dbSNP.'
+        help=
+"""VCF file from dbSNP."""
     )
     parser.add_argument(
         '--thread',
         metavar='INT',
         type=int,
         default=1,
-        help='Number of threads to use (default: 1).'
+        help=
+"""Number of threads to use (default: 1)."""
     )
     parser.add_argument(
         '--batch',
         metavar='INT',
         type=int,
         default=0,
-        help='Batch size used for GenomicsDBImport (default: 0). This \n'
-             'controls the number of samples for which readers are \n'
-             'open at once and therefore provides a way to minimize \n'
-             'memory consumption. The size of 0 means no batching (i.e. \n'
-             'readers for all samples will be opened at once).'
+        help=
+"""Batch size used for GenomicsDBImport (default: 0). This
+controls the number of samples for which readers are
+open at once and therefore provides a way to minimize
+memory consumption. The size of 0 means no batching (i.e.
+readers for all samples will be opened at once)."""
     )
     parser.add_argument(
         '--job',
         metavar='TEXT',
         type=str,
-        help='Job submission ID for SGE.'
+        help=
+"""Job submission ID for SGE."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
     parser.add_argument(
         '--keep',
         action='store_true',
-        help='Keep temporary files.'
+        help=
+"""Keep temporary files."""
     )
     parser.add_argument(
         '--posix',
         action='store_true',
-        help='Set GenomicsDBImport to allow for optimizations to improve \n'
-             'the usability and performance for shared Posix Filesystems \n'
-             '(e.g. NFS, Lustre). If set, file level locking is disabled \n'
-             'and file system writes are minimized by keeping a higher \n'
-             'number of file descriptors open for longer periods of time. \n'
-             'Use with --batch if keeping a large number of file \n'
-             'descriptors open is an issue.'
+        help=
+"""Set GenomicsDBImport to allow for optimizations to improve
+the usability and performance for shared Posix Filesystems
+(e.g. NFS, Lustre). If set, file level locking is disabled
+and file system writes are minimized by keeping a higher
+number of file descriptors open for longer periods of time.
+Use with --batch if keeping a large number of file
+descriptors open is an issue."""
     )
 
 def main(args):
diff --git a/fuc/cli/ngs_m2.py b/fuc/cli/ngs_m2.py
index 867cb31..618a90d 100644
--- a/fuc/cli/ngs_m2.py
+++ b/fuc/cli/ngs_m2.py
@@ -50,50 +50,60 @@ def create_parser(subparsers):
     )
     parser.add_argument(
         'manifest',
-        help='Sample manifest CSV file.'
+        help=
+"""Sample manifest CSV file."""
     )
     parser.add_argument(
         'fasta',
-        help='Reference FASTA file.'
+        help=
+"""Reference FASTA file."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         'pon',
-        help='PoN VCF file.'
+        help=
+"""PoN VCF file."""
     )
     parser.add_argument(
         'germline',
-        help='Germline VCF file.'
+        help=
+"""Germline VCF file."""
     )
     parser.add_argument(
         'qsub',
         type=str,
-        help='SGE resoruce to request for qsub.'
+        help=
+"""SGE resoruce to request for qsub."""
     )
     parser.add_argument(
         'java',
         type=str,
-        help='Java resoruce to request for GATK.'
+        help=
+"""Java resoruce to request for GATK."""
     )
     parser.add_argument(
         '--bed',
         metavar='PATH',
         type=str,
-        help='BED file.'
+        help=
+"""BED file."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
     parser.add_argument(
         '--keep',
         action='store_true',
-        help='Keep temporary files.'
+        help=
+"""Keep temporary files."""
     )
 
 def main(args):
diff --git a/fuc/cli/ngs_pon.py b/fuc/cli/ngs_pon.py
index 2cb0ed1..82809ae 100644
--- a/fuc/cli/ngs_pon.py
+++ b/fuc/cli/ngs_pon.py
@@ -40,45 +40,54 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Pipeline for constructing a panel of normals (PoN).',
+        help=
+"""Pipeline for constructing a panel of normals (PoN)."""
     )
     parser.add_argument(
         'manifest',
-        help='Sample manifest CSV file.'
+        help=
+"""Sample manifest CSV file."""
     )
     parser.add_argument(
         'fasta',
-        help='Reference FASTA file.'
+        help=
+"""Reference FASTA file."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         'qsub',
         type=str,
-        help='SGE resoruce to request for qsub.'
+        help=
+"""SGE resoruce to request for qsub."""
     )
     parser.add_argument(
         'java',
-        help='Java resoruce to request for GATK.'
+        help=
+"""Java resoruce to request for GATK."""
     )
     parser.add_argument(
         '--bed',
         metavar='PATH',
         type=str,
-        help='BED file.'
+        help=
+"""BED file."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
     parser.add_argument(
         '--keep',
         action='store_true',
-        help='Keep temporary files.'
+        help=
+"""Keep temporary files."""
     )
 
 def main(args):
diff --git a/fuc/cli/ngs_quant.py b/fuc/cli/ngs_quant.py
index a0f9196..e731995 100644
--- a/fuc/cli/ngs_quant.py
+++ b/fuc/cli/ngs_quant.py
@@ -35,58 +35,68 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Pipeline for running RNAseq quantification from FASTQ files \n'
-             'with Kallisto.',
+        help=
+"""Pipeline for running RNAseq quantification from FASTQ files
+with Kallisto."""
     )
     parser.add_argument(
         'manifest',
-        help='Sample manifest CSV file.'
+        help=
+"""Sample manifest CSV file."""
     )
     parser.add_argument(
         'index',
-        help='Kallisto index file.'
+        help=
+"""Kallisto index file."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         'qsub',
         type=str,
-        help='SGE resoruce to request for qsub.'
+        help=
+"""SGE resoruce to request for qsub."""
     )
     parser.add_argument(
         '--thread',
         metavar='INT',
         type=int,
         default=1,
-        help='Number of threads to use (default: 1).'
+        help=
+"""Number of threads to use (default: 1)."""
     )
     parser.add_argument(
         '--bootstrap',
         metavar='INT',
         type=int,
         default=50,
-        help='Number of bootstrap samples (default: 50).'
+        help=
+"""Number of bootstrap samples (default: 50)."""
     )
     parser.add_argument(
         '--job',
         metavar='TEXT',
         type=str,
-        help='Job submission ID for SGE.'
+        help=
+"""Job submission ID for SGE."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
     parser.add_argument(
         '--posix',
         action='store_true',
-        help='Set the environment variable HDF5_USE_FILE_LOCKING=FALSE \n'
-             'before running Kallisto. This is required for shared Posix \n'
-             'Filesystems (e.g. NFS, Lustre).'
+        help=
+"""Set the environment variable HDF5_USE_FILE_LOCKING=FALSE
+before running Kallisto. This is required for shared Posix
+Filesystems (e.g. NFS, Lustre)."""
     )
 
 def main(args):
diff --git a/fuc/cli/ngs_trim.py b/fuc/cli/ngs_trim.py
index 4384fc3..89f33bf 100644
--- a/fuc/cli/ngs_trim.py
+++ b/fuc/cli/ngs_trim.py
@@ -34,39 +34,46 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Pipeline for trimming adapters from FASTQ files.',
+        help=
+"""Pipeline for trimming adapters from FASTQ files."""
     )
     parser.add_argument(
         'manifest',
-        help='Sample manifest CSV file.'
+        help=
+"""Sample manifest CSV file."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         'qsub',
         type=str,
-        help='SGE resoruce to request for qsub.'
+        help=
+"""SGE resoruce to request for qsub."""
     )
     parser.add_argument(
         '--thread',
         metavar='INT',
         type=int,
         default=1,
-        help='Number of threads to use (default: 1).'
+        help=
+"""Number of threads to use (default: 1)."""
     )
     parser.add_argument(
         '--job',
         metavar='TEXT',
         type=str,
-        help='Job submission ID for SGE.'
+        help=
+"""Job submission ID for SGE."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
 
 def main(args):
diff --git a/fuc/cli/tabix_index.py b/fuc/cli/tabix_index.py
index 993ee29..bc3e759 100644
--- a/fuc/cli/tabix_index.py
+++ b/fuc/cli/tabix_index.py
@@ -32,16 +32,19 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Index a GFF/BED/SAM/VCF file with Tabix.',
+        help=
+"""Index a GFF/BED/SAM/VCF file with Tabix."""
     )
     parser.add_argument(
         'file',
-        help='File to be indexed.'
+        help=
+"""File to be indexed."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Force to overwrite the index file if it is present.'
+        help=
+"""Force to overwrite the index file if it is present."""
     )
 
 def main(args):
diff --git a/fuc/cli/tabix_slice.py b/fuc/cli/tabix_slice.py
index d8f3bd7..dfc08e6 100644
--- a/fuc/cli/tabix_slice.py
+++ b/fuc/cli/tabix_slice.py
@@ -25,16 +25,19 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Slice a GFF/BED/SAM/VCF file with Tabix.',
+        help=
+"""Slice a GFF/BED/SAM/VCF file with Tabix."""
     )
     parser.add_argument(
         'file',
-        help='File to be sliced.'
+        help=
+"""File to be sliced."""
     )
     parser.add_argument(
         'regions',
         nargs='+',
-        help='One or more regions.'
+        help=
+"""One or more regions."""
     )
 
 def main(args):
diff --git a/fuc/cli/tbl_merge.py b/fuc/cli/tbl_merge.py
index b98d6c4..9765883 100644
--- a/fuc/cli/tbl_merge.py
+++ b/fuc/cli/tbl_merge.py
@@ -32,47 +32,55 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Merge two table files.',
+        help=
+"""Merge two table files."""
     )
     parser.add_argument(
         'left',
-        help='Left file.'
+        help=
+"""Input left file."""
     )
     parser.add_argument(
         'right',
-        help='Right file.'
+        help=
+"""Input right file."""
     )
     parser.add_argument(
         '--how',
         metavar='TEXT',
         choices=CHOICES,
         default='inner',
-        help="Type of merge to be performed (default: 'inner') \n"
-             "(choices: 'left', 'right', 'outer', 'inner', 'cross')."
+        help=
+"""Type of merge to be performed (default: 'inner')
+(choices: 'left', 'right', 'outer', 'inner', 'cross')."""
     )
     parser.add_argument(
         '--on',
         metavar='TEXT',
         nargs='+',
-        help='Column names to join on.'
+        help=
+"""Column names to join on."""
     )
     parser.add_argument(
         '--lsep',
         metavar='TEXT',
         default='\t',
-        help="Delimiter to use for the left file (default: '\\t')."
+        help=
+"""Delimiter to use for the left file (default: '\\t')."""
     )
     parser.add_argument(
         '--rsep',
         metavar='TEXT',
         default='\t',
-        help="Delimiter to use for the right file (default: '\\t')."
+        help=
+"""Delimiter to use for the right file (default: '\\t')."""
     )
     parser.add_argument(
         '--osep',
         metavar='TEXT',
         default='\t',
-        help="Delimiter to use for the output file (default: '\\t')."
+        help=
+"""Delimiter to use for the output file (default: '\\t')."""
     )
     return parser
 
diff --git a/fuc/cli/tbl_sum.py b/fuc/cli/tbl_sum.py
index 1b46794..6cdebd0 100644
--- a/fuc/cli/tbl_sum.py
+++ b/fuc/cli/tbl_sum.py
@@ -18,63 +18,72 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Summarize a table file.',
+        help=
+"""Summarize a table file."""
     )
     parser.add_argument(
         'table_file',
-        help='Table file.'
+        help=
+"""Table file."""
     )
     parser.add_argument(
         '--sep',
         metavar='TEXT',
         default='\t',
-        help="Delimiter to use (default: '\\t')."
+        help=
+"""Delimiter to use (default: '\\t')."""
     )
     parser.add_argument(
         '--skiprows',
         metavar='TEXT',
-        help='Comma-separated line numbers to skip (0-indexed) or \n'
-             'number of lines to skip at the start of the file \n'
-             '(e.g. `--skiprows 1,` will skip the second line, \n'
-             '`--skiprows 2,4` will skip the third and fifth lines, \n'
-             'and `--skiprows 10` will skip the first 10 lines).'
+        help=
+"""Comma-separated line numbers to skip (0-indexed) or
+number of lines to skip at the start of the file
+(e.g. `--skiprows 1,` will skip the second line,
+`--skiprows 2,4` will skip the third and fifth lines,
+and `--skiprows 10` will skip the first 10 lines)."""
     )
     parser.add_argument(
         '--na_values',
         metavar='TEXT',
         nargs='+',
-        help='Additional strings to recognize as NA/NaN (by \n'
-             "default, the following values are interpreted \n"
-             "as NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', \n"
-             "'-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', \n"
-             "'<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan', \n"
-             "'null')."
+        help=
+"""Additional strings to recognize as NA/NaN (by
+default, the following values are interpreted
+as NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND',
+'-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN',
+'<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan',
+'null')."""
     )
     parser.add_argument(
         '--keep_default_na',
         action='store_false',
-        help='Whether or not to include the default NaN values when \n'
-             "parsing the data (see 'pandas.read_table' for details)."
+        help=
+"""Whether or not to include the default NaN values when
+parsing the data (see 'pandas.read_table' for details)."""
     )
     parser.add_argument(
         '--expr',
         metavar='TEXT',
-        help='Query the columns of a pandas.DataFrame with a \n'
-             """boolean expression (e.g. `--query "A == 'yes'"`)."""
+        help=
+"""Query the columns of a pandas.DataFrame with a
+boolean expression (e.g. `--query "A == 'yes'"`)."""
     )
     parser.add_argument(
         '--columns',
         metavar='TEXT',
         nargs='+',
-        help='Columns to be summarized (by default, all columns \n'
-             'will be included).'
+        help=
+"""Columns to be summarized (by default, all columns
+will be included)."""
     )
     parser.add_argument(
         '--dtypes',
         metavar='PATH',
-        help="File of column names and their data types (either \n"
-             "'categorical' or 'numeric'); one tab-delimited pair of \n"
-             "column name and data type per line."
+        help=
+"""File of column names and their data types (either
+'categorical' or 'numeric'); one tab-delimited pair of
+column name and data type per line."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_filter.py b/fuc/cli/vcf_filter.py
index c80be07..5de1933 100644
--- a/fuc/cli/vcf_filter.py
+++ b/fuc/cli/vcf_filter.py
@@ -41,48 +41,56 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Filter a VCF file.',
+        help=
+"""Filter a VCF file."""
     )
     parser.add_argument(
         'vcf',
-        help='VCF file (compressed or uncompressed).'
+        help=
+"""VCF file (compressed or uncompressed)."""
     )
     parser.add_argument(
         '--expr',
         metavar='TEXT',
-        help='Expression to evaluate.'
+        help=
+"""Expression to evaluate."""
     )
     parser.add_argument(
         '--samples',
         metavar='PATH',
-        help='File of sample names to apply the marking (one \n'
-             'sample per line).'
+        help=
+"""File of sample names to apply the marking (one
+sample per line)."""
     )
     parser.add_argument(
         '--drop_duplicates',
         metavar='TEXT',
         nargs='*',
-        help='Only consider certain columns for identifying \n'
-             'duplicates, by default use all of the columns.'
+        help=
+"""Only consider certain columns for identifying
+duplicates, by default use all of the columns."""
     )
     parser.add_argument(
         '--greedy',
         action='store_true',
-        help='Use this flag to mark even ambiguous genotypes \n'
-             'as missing.'
+        help=
+"""Use this flag to mark even ambiguous genotypes
+as missing."""
     )
     parser.add_argument(
         '--opposite',
         action='store_true',
-        help='Use this flag to mark all genotypes that do not \n'
-             'satisfy the query expression as missing and leave \n'
-             'those that do intact.'
+        help=
+"""Use this flag to mark all genotypes that do not
+satisfy the query expression as missing and leave
+those that do intact."""
     )
     parser.add_argument(
         '--filter_empty',
         action='store_true',
-        help='Use this flag to remove rows with no genotype \n'
-             'calls at all.'
+        help=
+"""Use this flag to remove rows with no genotype
+calls at all."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_index.py b/fuc/cli/vcf_index.py
index 4ecd621..6f77ce0 100644
--- a/fuc/cli/vcf_index.py
+++ b/fuc/cli/vcf_index.py
@@ -28,14 +28,16 @@ def create_parser(subparsers):
     )
     parser.add_argument(
         'vcf',
-        help='Input VCF file to be indexed. When an uncompressed file is \n'
-             'given, the command will automatically create a BGZF \n'
-             'compressed copy of the file (.gz) before indexing.'
+        help=
+"""Input VCF file to be indexed. When an uncompressed file is
+given, the command will automatically create a BGZF
+compressed copy of the file (.gz) before indexing."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Force to overwrite the index file if it is already present.'
+        help=
+"""Force to overwrite the index file if it is already present."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_merge.py b/fuc/cli/vcf_merge.py
index 60d5353..710cd25 100644
--- a/fuc/cli/vcf_merge.py
+++ b/fuc/cli/vcf_merge.py
@@ -25,36 +25,41 @@ def create_parser(subparsers):
     parser.add_argument(
         'vcf_files',
         nargs='+',
-        help="VCF files (compressed or uncompressed). Note that the 'chr'\n"
-             "prefix in contig names (e.g. 'chr1' vs. '1') will be \n"
-             "automatically added or removed as necessary to match the \n"
-             "contig names of the first VCF."
+        help=
+"""VCF files (compressed or uncompressed). Note that the 'chr'
+prefix in contig names (e.g. 'chr1' vs. '1') will be
+automatically added or removed as necessary to match the
+contig names of the first VCF."""
     )
     parser.add_argument(
         '--how',
         metavar='TEXT',
         default='inner',
-        help="Type of merge as defined in pandas.DataFrame.merge \n"
-             "(default: 'inner')."
+        help=
+"""Type of merge as defined in pandas.DataFrame.merge
+(default: 'inner')."""
     )
     parser.add_argument(
         '--format',
         metavar='TEXT',
         default='GT',
-        help="FORMAT subfields to be retained (e.g. 'GT:AD:DP') \n"
-             "(default: 'GT')."
+        help=
+"""FORMAT subfields to be retained (e.g. 'GT:AD:DP')
+(default: 'GT')."""
     )
     parser.add_argument(
         '--sort',
         action='store_false',
-        help='Use this flag to turn off sorting of records \n'
-             '(default: True).'
+        help=
+"""Use this flag to turn off sorting of records
+(default: True)."""
     )
     parser.add_argument(
         '--collapse',
         action='store_true',
-        help='Use this flag to collapse duplicate records \n'
-             '(default: False).'
+        help=
+"""Use this flag to collapse duplicate records
+(default: False)."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_rename.py b/fuc/cli/vcf_rename.py
index d03db01..3d0f7a0 100644
--- a/fuc/cli/vcf_rename.py
+++ b/fuc/cli/vcf_rename.py
@@ -33,38 +33,44 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Rename the samples in a VCF file.',
+        help=
+"""Rename the samples in a VCF file."""
     )
     parser.add_argument(
         'vcf',
-        help='VCF file (compressed or uncompressed).'
+        help=
+"""VCF file (compressed or uncompressed)."""
     )
     parser.add_argument(
         'names',
-        help='Text file containing information for renaming the samples.'
+        help=
+"""Text file containing information for renaming the samples."""
     )
     parser.add_argument(
         '--mode',
         metavar='TEXT',
         default='MAP',
         choices=['MAP', 'INDEX', 'RANGE'],
-        help="Renaming mode (default: 'MAP') (choices: 'MAP', \n"
-             "'INDEX', 'RANGE')."
+        help=
+"""Renaming mode (default: 'MAP') (choices: 'MAP',
+'INDEX', 'RANGE')."""
     )
     parser.add_argument(
         '--range',
         metavar='INT',
         type=int,
         nargs=2,
-        help="Index range to use when renaming the samples. \n"
-             "Applicable only with the 'RANGE' mode."
+        help=
+"""Index range to use when renaming the samples.
+Applicable only with the 'RANGE' mode."""
     )
     parser.add_argument(
         '--sep',
         metavar='TEXT',
         default='\t',
-        help="Delimiter to use for reading the 'names' file \n"
-             "(default: '\\t')."
+        help=
+"""Delimiter to use for reading the 'names' file 
+(default: '\\t')."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_slice.py b/fuc/cli/vcf_slice.py
index 388fcea..68d154c 100644
--- a/fuc/cli/vcf_slice.py
+++ b/fuc/cli/vcf_slice.py
@@ -23,26 +23,29 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Slice a VCF file for specified regions.',
+        help=
+"""Slice a VCF file for specified regions."""
     )
     parser.add_argument(
         'vcf',
-        help="Input VCF file must be already BGZF compressed (.gz) and \n"
-             "indexed (.tbi) to allow random access. A VCF file can be \n"
-             "compressed with the fuc-bgzip command and indexed with the \n"
-             "vcf-index command."
+        help=
+"""Input VCF file must be already BGZF compressed (.gz) and
+indexed (.tbi) to allow random access. A VCF file can be
+compressed with the fuc-bgzip command and indexed with the
+vcf-index command."""
     )
     parser.add_argument(
         'regions',
         nargs='+',
-        help="One or more regions to be sliced. Each region must have the \n"
-             "format chrom:start-end and be a half-open interval with \n"
-             "(start, end]. This means, for example, chr1:100-103 will \n"
-             "extract positions 101, 102, and 103. Alternatively, you can \n"
-             "provide a BED file (compressed or uncompressed) to specify \n"
-             "regions. Note that the 'chr' prefix in contig names (e.g. \n"
-             "'chr1' vs. '1') will be automatically added or removed as \n"
-             "necessary to match the input VCF's contig names."
+        help=
+"""One or more regions to be sliced. Each region must have the
+format chrom:start-end and be a half-open interval with
+(start, end]. This means, for example, chr1:100-103 will
+extract positions 101, 102, and 103. Alternatively, you can
+provide a BED file (compressed or uncompressed) to specify
+regions. Note that the 'chr' prefix in contig names (e.g.
+'chr1' vs. '1') will be automatically added or removed as
+necessary to match the input VCF's contig names."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_split.py b/fuc/cli/vcf_split.py
index 8d14dd8..23529f5 100644
--- a/fuc/cli/vcf_split.py
+++ b/fuc/cli/vcf_split.py
@@ -19,28 +19,33 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Split a VCF file by individual.',
+        help=
+"""Split a VCF file by individual."""
     )
     parser.add_argument(
         'vcf',
-        help="VCF file to be split."
+        help=
+"""VCF file to be split."""
     )
     parser.add_argument(
         'output',
         type=os.path.abspath,
-        help='Output directory.'
+        help=
+"""Output directory."""
     )
     parser.add_argument(
         '--clean',
         action='store_false',
-        help="By default, the command will only return variants present in \n"
-             "each individual. Use the tag to stop this behavior and make \n"
-             "sure that all individuals have the same number of variants."
+        help=
+"""By default, the command will only return variants present in
+each individual. Use the tag to stop this behavior and make
+sure that all individuals have the same number of variants."""
     )
     parser.add_argument(
         '--force',
         action='store_true',
-        help='Overwrite the output directory if it already exists.'
+        help=
+"""Overwrite the output directory if it already exists."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_vcf2bed.py b/fuc/cli/vcf_vcf2bed.py
index bda3991..2fb07f9 100644
--- a/fuc/cli/vcf_vcf2bed.py
+++ b/fuc/cli/vcf_vcf2bed.py
@@ -17,11 +17,13 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Convert a VCF file to a BED file.',
+        help=
+"""Convert a VCF file to a BED file."""
     )
     parser.add_argument(
         'vcf',
-        help='VCF file (compressed or uncompressed).'
+        help=
+"""VCF file (compressed or uncompressed)."""
     )
 
 def main(args):
diff --git a/fuc/cli/vcf_vep.py b/fuc/cli/vcf_vep.py
index 969b21f..d0605c5 100644
--- a/fuc/cli/vcf_vep.py
+++ b/fuc/cli/vcf_vep.py
@@ -37,26 +37,31 @@ def create_parser(subparsers):
         api.common._script_name(),
         description=description,
         epilog=epilog,
-        help='Filter a VCF file by annotations from Ensembl VEP.',
+        help=
+"""Filter a VCF file by annotations from Ensembl VEP."""
     )
     parser.add_argument(
         'vcf',
-        help='VCF file annotated by Ensembl VEP (compressed or uncompressed).'
+        help=
+"""VCF file annotated by Ensembl VEP (compressed or uncompressed)."""
     )
     parser.add_argument(
         'expr',
-        help='Query expression to evaluate.'
+        help=
+"""Query expression to evaluate."""
     )
     parser.add_argument(
         '--opposite',
         action='store_true',
-        help="Use this flag to return only records that don't \n"
-             "meet the said criteria."
+        help=
+"""Use this flag to return only records that don't
+meet the said criteria."""
     )
     parser.add_argument(
         '--as_zero',
         action='store_true',
-        help='Use this flag to treat missing values as zero instead of NaN.'
+        help=
+"""Use this flag to treat missing values as zero instead of NaN."""
     )
 
 def main(args):