From b1a3125386086825a26fbd29aaf3e8478040bd89 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 08:26:39 +0900
Subject: [PATCH 01/29] Bump up version number

---
 fuc/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuc/version.py b/fuc/version.py
index ef91994..a842d05 100644
--- a/fuc/version.py
+++ b/fuc/version.py
@@ -1 +1 @@
-__version__ = '0.14.0'
+__version__ = '0.15.0'

From b54e10e9271e2fb66bff5d342cad5b9199b14468 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 08:49:03 +0900
Subject: [PATCH 02/29] Update vcf_filter command

---
 docs/cli.rst          | 51 +++++++++++++++++------------
 fuc/cli/vcf_filter.py | 75 +++++++++++++++++++++++++++----------------
 2 files changed, 78 insertions(+), 48 deletions(-)

diff --git a/docs/cli.rst b/docs/cli.rst
index 4980249..23c2e50 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -558,36 +558,45 @@ vcf_filter
 .. code-block:: text
 
    $ fuc vcf_filter -h
-   usage: fuc vcf_filter [-h] [--greedy] [--opposite] [--samples PATH]
+   usage: fuc vcf_filter [-h] [--expr TEXT] [--samples PATH]
+                         [--drop_duplicates [TEXT ...]] [--greedy] [--opposite]
                          [--filter_empty]
-                         vcf expr
+                         vcf
    
    This command will filter a VCF file (both zipped and unzipped). It essentially
-   wraps the 'pyvcf.VcfFrame.markmiss' method from the fuc API.
+   wraps multiple methods from the fuc API.
    
    usage examples:
-     $ fuc vcf_filter in.vcf 'GT == "0/0"' > out.vcf
-     $ fuc vcf_filter in.vcf 'GT != "0/0"' > out.vcf
-     $ fuc vcf_filter in.vcf 'DP < 30' > out.vcf
-     $ fuc vcf_filter in.vcf 'DP < 30' --greedy > out.vcf
-     $ fuc vcf_filter in.vcf 'AD[1] < 10' --greedy > out.vcf
-     $ fuc vcf_filter in.vcf 'AD[1] < 10 and DP < 30' --greedy > out.vcf
-     $ fuc vcf_filter in.vcf 'AD[1] < 10 or DP < 30' --greedy > out.vcf
-     $ fuc vcf_filter in.vcf 'AD[1] < 10 or DP < 30' --opposite > out.vcf
-     $ fuc vcf_filter in.vcf 'np.mean(AD) < 10' --greedy --samples sample.list > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'GT == "0/0"' > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'GT != "0/0"' > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'DP < 30' > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'DP < 30' --greedy > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'AD[1] < 10' --greedy > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'AD[1] < 10 and DP < 30' --greedy > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'AD[1] < 10 or DP < 30' --greedy > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'AD[1] < 10 or DP < 30' --opposite > out.vcf
+     $ fuc vcf_filter in.vcf --expr 'np.mean(AD) < 10' --greedy --samples sample.list > out.vcf
+     $ fuc vcf_filter in.vcf --drop_duplicates CHROM POS REF ALT > out.vcf
+     $ fuc vcf_filter in.vcf --filter_empty > out.vcf
    
    positional arguments:
-     vcf             VCF file
-     expr            expression to evaluate
+     vcf                   VCF file
    
    optional arguments:
-     -h, --help      show this help message and exit
-     --greedy        use this flag to mark even ambiguous genotypes as missing
-     --opposite      use this flag to mark all genotypes that do not satisfy the
-                     query expression as missing and leave those that do intact
-     --samples PATH  file of sample names to apply the marking (one sample per
-                     line)
-     --filter_empty  use this flag to remove rows with no genotype calls at all
+     -h, --help            show this help message and exit
+     --expr TEXT           expression to evaluate
+     --samples PATH        file of sample names to apply the marking (one sample
+                           per line)
+     --drop_duplicates [TEXT ...]
+                           only consider certain columns for identifying
+                           duplicates, by default use all of the columns.
+     --greedy              use this flag to mark even ambiguous genotypes as
+                           missing
+     --opposite            use this flag to mark all genotypes that do not
+                           satisfy the query expression as missing and leave
+                           those that do intact
+     --filter_empty        use this flag to remove rows with no genotype calls at
+                           all
 
 vcf_merge
 =========
diff --git a/fuc/cli/vcf_filter.py b/fuc/cli/vcf_filter.py
index dc28ffc..9868a50 100644
--- a/fuc/cli/vcf_filter.py
+++ b/fuc/cli/vcf_filter.py
@@ -3,18 +3,20 @@
 
 description = f"""
 This command will filter a VCF file (both zipped and unzipped). It essentially
-wraps the 'pyvcf.VcfFrame.markmiss' method from the fuc API.
+wraps multiple methods from the fuc API.
 
 usage examples:
-  $ fuc {api.common._script_name()} in.vcf 'GT == "0/0"' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'GT != "0/0"' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'DP < 30' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'DP < 30' --greedy > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'AD[1] < 10' --greedy > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'AD[1] < 10 and DP < 30' --greedy > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'AD[1] < 10 or DP < 30' --greedy > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'AD[1] < 10 or DP < 30' --opposite > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'np.mean(AD) < 10' --greedy --samples sample.list > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'GT == "0/0"' > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'GT != "0/0"' > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'DP < 30' > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'DP < 30' --greedy > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'AD[1] < 10' --greedy > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'AD[1] < 10 and DP < 30' --greedy > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'AD[1] < 10 or DP < 30' --greedy > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'AD[1] < 10 or DP < 30' --opposite > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --expr 'np.mean(AD) < 10' --greedy --samples sample.list > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --drop_duplicates CHROM POS REF ALT > out.vcf
+  $ fuc {api.common._script_name()} in.vcf --filter_empty > out.vcf
 """
 
 def create_parser(subparsers):
@@ -25,7 +27,23 @@ def create_parser(subparsers):
         description=description,
     )
     parser.add_argument('vcf', help='VCF file')
-    parser.add_argument('expr', help='expression to evaluate')
+    parser.add_argument(
+        '--expr',
+        metavar='TEXT',
+        help='expression to evaluate'
+    )
+    parser.add_argument(
+        '--samples',
+        metavar='PATH',
+        help='file of sample names to apply the marking (one sample per line)'
+    )
+    parser.add_argument(
+        '--drop_duplicates',
+        metavar='TEXT',
+        nargs='*',
+        help='only consider certain columns for identifying duplicates, '
+            'by default use all of the columns.'
+    )
     parser.add_argument(
         '--greedy',
         action='store_true',
@@ -35,12 +53,7 @@ def create_parser(subparsers):
         '--opposite',
         action='store_true',
         help='use this flag to mark all genotypes that do not satisfy the '
-             'query expression as missing and leave those that do intact'
-    )
-    parser.add_argument(
-        '--samples',
-        metavar='PATH',
-        help='file of sample names to apply the marking (one sample per line)'
+            'query expression as missing and leave those that do intact'
     )
     parser.add_argument(
         '--filter_empty',
@@ -50,14 +63,22 @@ def create_parser(subparsers):
 
 def main(args):
     vf = api.pyvcf.VcfFrame.from_file(args.vcf)
-    if args.samples is None:
-        samples = None
-    else:
-        samples = pd.read_table(args.samples, header=None)[0].to_list()
-    filtered_vf = vf.markmiss(args.expr,
-                              greedy=args.greedy,
-                              opposite=args.opposite,
-                              samples=samples)
+
+    if args.expr is not None:
+        if args.samples is None:
+            samples = None
+        else:
+            samples = pd.read_table(args.samples, header=None)[0].to_list()
+
+        vf = vf.markmiss(args.expr,
+                         greedy=args.greedy,
+                         opposite=args.opposite,
+                         samples=samples)
+
+    if args.drop_duplicates is not None:
+        vf = vf.drop_duplicates(args.drop_duplicates)
+
     if args.filter_empty:
-        filtered_vf = filtered_vf.filter_empty()
-    print(filtered_vf.to_string(), end='')
+        vf = vf.filter_empty()
+
+    print(vf.to_string(), end='')

From 69e32adddde1acdb60f16ef3319aa2b2bb0612a9 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 10:53:19 +0900
Subject: [PATCH 03/29] Update tbl_sum command

---
 CHANGELOG.rst      |  6 ++++++
 fuc/cli/tbl_sum.py | 49 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index e1a958f..309c51a 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,12 @@
 Changelog
 *********
 
+0.15.0 (in development)
+-----------------------
+
+* Update :command:`vcf_filter` command.
+* Update :command:`tbl_sum` command.
+
 0.14.0 (2021-06-20)
 -------------------
 
diff --git a/fuc/cli/tbl_sum.py b/fuc/cli/tbl_sum.py
index 85bf485..10a4952 100644
--- a/fuc/cli/tbl_sum.py
+++ b/fuc/cli/tbl_sum.py
@@ -9,6 +9,7 @@
 
 usage examples:
   $ fuc {api.common._script_name()} table.tsv
+  $ fuc {api.common._script_name()} table.csv --sep ,
 """
 
 def create_parser(subparsers):
@@ -51,10 +52,10 @@ def create_parser(subparsers):
         '--keep_default_na',
         action='store_false',
         help='whether or not to include the default NaN values when '
-             'parsing the data (see `pandas.read_table` for details)'
+             "parsing the data (see 'pandas.read_table' for details)"
     )
     parser.add_argument(
-        '--query',
+        '--expr',
         metavar='TEXT',
         help='query the columns of a pandas.DataFrame with a '
              """boolean expression (e.g. `--query "A == 'yes'"`)"""
@@ -66,6 +67,13 @@ def create_parser(subparsers):
         help='columns to be summarized (by default, all columns '
              'will be included)'
     )
+    parser.add_argument(
+        '--dtypes',
+        metavar='PATH',
+        help="file of column names and their data types (etheir "
+            "'categorical' or 'numeric'); one tab-delimited pair of column "
+            "name and data type per line"
+    )
 
 def main(args):
     if args.skiprows is None:
@@ -74,23 +82,44 @@ def main(args):
         skiprows = [int(x) for x in args.skiprows.split(',') if x]
     else:
         skiprows = int(args.skiprows)
+
     df = pd.read_table(args.table_file,
                        sep=args.sep,
                        skiprows=skiprows,
                        na_values=args.na_values,
                        keep_default_na=args.keep_default_na)
-    if args.query:
-        df = df.query(args.query)
+
+    if args.dtypes is not None:
+        with open(args.dtypes) as f:
+            for line in f:
+                fields = line.strip().split('\t')
+                col = fields[0]
+                dtype = fields[1]
+                if dtype == 'numeric':
+                    dtype = float
+                elif dtype == 'categorical':
+                    dtype = 'category'
+                else:
+                    raise TypeError("Data type must be either numeric or "
+                        "categorical.")
+                df[col] = df[col].astype(dtype)
+
+    if args.expr:
+        df = df.query(args.expr)
+
     if args.columns:
         headers = args.columns
     else:
         headers = df.columns
+
     print('='*70)
+
     for header in headers:
-        column = df[header]
-        if is_numeric_dtype(column):
-            print(column.describe())
-            print('='*70)
+        s = df[header]
+        print(f'Name: {header}')
+        print('Summary:')
+        if is_numeric_dtype(s):
+            print(s.describe().to_string())
         else:
-            print(column.value_counts())
-            print('='*70)
+            print(s.value_counts().sort_index().to_string())
+        print('='*70)

From 8fc919a352ec9831b40ab6595238d9ab7a6f747b Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 10:56:56 +0900
Subject: [PATCH 04/29] Update CLI

---
 fuc/cli/tbl_sum.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fuc/cli/tbl_sum.py b/fuc/cli/tbl_sum.py
index 10a4952..f984825 100644
--- a/fuc/cli/tbl_sum.py
+++ b/fuc/cli/tbl_sum.py
@@ -100,8 +100,8 @@ def main(args):
                 elif dtype == 'categorical':
                     dtype = 'category'
                 else:
-                    raise TypeError("Data type must be either numeric or "
-                        "categorical.")
+                    raise TypeError("Data type must be either 'numeric' or "
+                        "'categorical'.")
                 df[col] = df[col].astype(dtype)
 
     if args.expr:

From e0624f11ceff89fc4474d815bd8c5486e9165571 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 11:11:21 +0900
Subject: [PATCH 05/29] Update CLI

---
 fuc/cli/tbl_sum.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fuc/cli/tbl_sum.py b/fuc/cli/tbl_sum.py
index f984825..bfe6fa5 100644
--- a/fuc/cli/tbl_sum.py
+++ b/fuc/cli/tbl_sum.py
@@ -117,6 +117,7 @@ def main(args):
     for header in headers:
         s = df[header]
         print(f'Name: {header}')
+        print(f'Count: {len(s)}')
         print('Summary:')
         if is_numeric_dtype(s):
             print(s.describe().to_string())

From d045a5a62533c56e578da9d9dd900efeed7a0f3a Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 12:21:01 +0900
Subject: [PATCH 06/29] Update docs

---
 README.rst     |  2 +-
 docs/cli.rst   | 10 +++++++---
 docs/create.py |  2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/README.rst b/README.rst
index e383d93..dbb8106 100644
--- a/README.rst
+++ b/README.rst
@@ -249,7 +249,7 @@ To create read depth profile of a region from a CRAM file:
     >>> from fuc import pycov
     >>> cf = pycov.CovFrame.from_file('HG00525.final.cram', zero=True,
     ...    region='chr12:21161194-21239796', names=['HG00525'])
-    >>> cf.plot_region('chr12', start=21161194, end=21239796)
+    >>> cf.plot_region('chr12:21161194-21239796')
 
 .. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/coverage.png
 
diff --git a/docs/cli.rst b/docs/cli.rst
index 23c2e50..95a3133 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -517,7 +517,7 @@ tbl_sum
    $ fuc tbl_sum -h
    usage: fuc tbl_sum [-h] [--sep TEXT] [--skiprows TEXT]
                       [--na_values TEXT [TEXT ...]] [--keep_default_na]
-                      [--query TEXT] [--columns TEXT [TEXT ...]]
+                      [--expr TEXT] [--columns TEXT [TEXT ...]] [--dtypes PATH]
                       table_file
    
    This command will summarize a table file. It essentially wraps the
@@ -526,6 +526,7 @@ tbl_sum
    
    usage examples:
      $ fuc tbl_sum table.tsv
+     $ fuc tbl_sum table.csv --sep ,
    
    positional arguments:
      table_file            table file
@@ -545,12 +546,15 @@ tbl_sum
                            '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
                            'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null')
      --keep_default_na     whether or not to include the default NaN values when
-                           parsing the data (see `pandas.read_table` for details)
-     --query TEXT          query the columns of a pandas.DataFrame with a boolean
+                           parsing the data (see 'pandas.read_table' for details)
+     --expr TEXT           query the columns of a pandas.DataFrame with a boolean
                            expression (e.g. `--query "A == 'yes'"`)
      --columns TEXT [TEXT ...]
                            columns to be summarized (by default, all columns will
                            be included)
+     --dtypes PATH         file of column names and their data types (etheir
+                           'categorical' or 'numeric'); one tab-delimited pair of
+                           column name and data type per line
 
 vcf_filter
 ==========
diff --git a/docs/create.py b/docs/create.py
index 0e729e0..2daa2bc 100644
--- a/docs/create.py
+++ b/docs/create.py
@@ -237,7 +237,7 @@
     >>> from fuc import pycov
     >>> cf = pycov.CovFrame.from_file('HG00525.final.cram', zero=True,
     ...    region='chr12:21161194-21239796', names=['HG00525'])
-    >>> cf.plot_region('chr12', start=21161194, end=21239796)
+    >>> cf.plot_region('chr12:21161194-21239796')
 
 .. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/coverage.png
 

From da67fadc0e15e60aa1da9b4e0a44a9ee1fe66c62 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 17:20:20 +0900
Subject: [PATCH 07/29] Update API

---
 CHANGELOG.rst      |   3 ++
 docs/tutorials.rst |   6 +--
 fuc/api/pymaf.py   | 116 +++++++++++++++++++++++++++++++++++----------
 3 files changed, 97 insertions(+), 28 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 309c51a..eda066e 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,9 @@ Changelog
 
 * Update :command:`vcf_filter` command.
 * Update :command:`tbl_sum` command.
+* Add ``samples`` and ``shape`` attributes to :class:`pymaf.AnnFrame` class.
+* Rename :meth:`pymaf.MafFrame.compute_genes/tmb/waterfall` methods to :meth:`pymaf.MafFrame.matrix_genes/tmb/waterfall`.
+* Add ``keep_empty`` argument to :meth:`pymaf.MafFrame.matrix_waterfall/plot_oncoplot/plot_waterfall` methods.
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
index e9e3032..b671fb7 100644
--- a/docs/tutorials.rst
+++ b/docs/tutorials.rst
@@ -36,14 +36,14 @@ We can (relatively) easily achieve above thanks to the LEGO block-like plotting
     fig.suptitle('Customized oncoplot', fontsize=20)
 
     # Create the TMB plot.
-    samples = list(mf.compute_waterfall(count).columns)
+    samples = list(mf.matrix_waterfall(count).columns)
     mf.plot_tmb(ax=ax1, samples=samples)
     ax1.set_xlabel('')
     ax1.spines['right'].set_visible(False)
     ax1.spines['top'].set_visible(False)
     ax1.spines['bottom'].set_visible(False)
     ax1.set_ylabel('TMB', fontsize=label_fontsize)
-    ax1.set_yticks([0, mf.compute_tmb().sum(axis=1).max()])
+    ax1.set_yticks([0, mf.matrix_tmb().sum(axis=1).max()])
     ax1.tick_params(axis='y', which='major', labelsize=ticklabels_fontsize)
 
     ax2.remove()
@@ -60,7 +60,7 @@ We can (relatively) easily achieve above thanks to the LEGO block-like plotting
     ax4.spines['top'].set_visible(False)
     ax4.set_yticks([])
     ax4.set_xlabel('Samples', fontsize=label_fontsize)
-    ax4.set_xticks([0, mf.compute_genes(10, mode='samples').sum(axis=1).max()])
+    ax4.set_xticks([0, mf.matrix_genes(10, mode='samples').sum(axis=1).max()])
     ax4.set_ylim(-0.5, count-0.5)
     ax4.tick_params(axis='x', which='major', labelsize=ticklabels_fontsize)
 
diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index 9a2b3f3..d89ac4b 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -283,8 +283,6 @@ class AnnFrame:
     Sara_N                  Sara  Normal   57
     Sara_T                  Sara   Tumor   57
     """
-    def __init__(self, df):
-        self._df = self._check_df(df)
 
     def _check_df(self, df):
         if type(df.index) == pd.RangeIndex:
@@ -292,6 +290,9 @@ def _check_df(self, df):
             raise ValueError(m)
         return df
 
+    def __init__(self, df):
+        self._df = self._check_df(df)
+
     @property
     def df(self):
         """pandas.DataFrame : DataFrame containing sample annotation data."""
@@ -301,6 +302,33 @@ def df(self):
     def df(self, value):
         self._df = self._check_df(value)
 
+    @property
+    def samples(self):
+        """list : List of the sample names."""
+        return list(self.df.index.to_list())
+
+    @property
+    def shape(self):
+        """tuple : Dimensionality of AnnFrame (samples, annotations)."""
+        return self.df.shape
+
+    def filter_mf(self, mf):
+        """
+        Filter the AnnFrame for the samples in the MafFrame.
+
+        Parameters
+        ----------
+        mf : MafFrame
+            MafFrame containing target samples.
+
+        Returns
+        -------
+        AnnFrame
+            Filtered AnnFrame object.
+        """
+        df = self.df.loc[mf.samples]
+        return self.__class__(df)
+
     @classmethod
     def from_dict(cls, data, sample_col='Tumor_Sample_Barcode'):
         """Construct AnnFrame from dict of array-like or dicts.
@@ -765,8 +793,10 @@ def one_row(r):
 
         return cls(df)
 
-    def compute_genes(self, count=10, mode='variants'):
-        """Compute a matrix of counts for genes and variant classifications.
+    def matrix_genes(self, count=10, mode='variants'):
+        """
+        Compute a matrix of variant counts with a shape of (genes, variant
+        classifications).
 
         Parameters
         ----------
@@ -796,7 +826,7 @@ def compute_genes(self, count=10, mode='variants'):
             df = df[:count]
             df = df.rename_axis(None, axis=1)
         elif mode == 'samples':
-            df = self.compute_waterfall(count)
+            df = self.matrix_waterfall(count)
             df = df.apply(lambda r: r.value_counts(), axis=1)
             for varcls in NONSYN_NAMES + ['Multi_Hit']:
                 if varcls not in df.columns:
@@ -807,8 +837,10 @@ def compute_genes(self, count=10, mode='variants'):
             raise ValueError(f'Found incorrect mode: {mode}')
         return df
 
-    def compute_tmb(self):
-        """Compute a matrix of counts for samples and variant classifications.
+    def matrix_tmb(self):
+        """
+        Compute a matrix of variant counts with a shape of (samples, variant
+        classifications).
 
         Returns
         -------
@@ -832,13 +864,22 @@ def compute_tmb(self):
         df = df.rename_axis(None, axis=1)
         return df
 
-    def compute_waterfall(self, count=10):
-        """Compute a matrix of variant classifications for genes and samples.
+    def matrix_waterfall(self, count=10, samples=None, keep_empty=False):
+        """
+        Compute a matrix of variant classifications with a shape of
+        (genes, samples).
+
+        If there are multiple variant classifications available for a given
+        cell, they will be replaced as 'Multi_Hit'.
 
         Parameters
         ----------
         count : int, default: 10
-            Number of top mutated genes to display.
+            Number of top mutated genes to include.
+        samples : list, optional
+            List of samples that should be used to compute the matrix.
+        keep_empty : bool, default: False
+            If True, keep samples with all ``NaN``'s.
 
         Returns
         -------
@@ -846,6 +887,10 @@ def compute_waterfall(self, count=10):
             Dataframe containing waterfall data.
         """
         df = self.df[self.df.Variant_Classification.isin(NONSYN_NAMES)]
+
+        if samples is not None:
+            df = df[df.Tumor_Sample_Barcode.isin(samples)]
+
         f = lambda x: ''.join(x) if len(x) == 1 else 'Multi_Hit'
         df = df.groupby(['Hugo_Symbol', 'Tumor_Sample_Barcode'])[
             'Variant_Classification'].apply(f).to_frame()
@@ -860,8 +905,9 @@ def compute_waterfall(self, count=10):
         # Select the top mutated genes.
         df = df[:count]
 
-        # Remove columns (samples) with all NaN's.
-        df = df.dropna(axis=1, how='all')
+        # Drop samples with all NaN's.
+        if not keep_empty:
+            df = df.dropna(axis=1, how='all')
 
         # Sort the columns (samples).
         c = df.applymap(lambda x: 0 if pd.isnull(x) else 1).sort_values(
@@ -921,7 +967,7 @@ def plot_genes(
             colors = NONSYN_COLORS + ['k']
         else:
             raise ValueError(f'Found incorrect mode: {mode}')
-        df = self.compute_genes(count, mode=mode)
+        df = self.matrix_genes(count, mode=mode)
         df = df.iloc[::-1]
         if ax is None:
             fig, ax = plt.subplots(figsize=figsize)
@@ -932,7 +978,7 @@ def plot_genes(
         return ax
 
     def plot_oncoplot(
-        self, count=10, figsize=(15, 10), label_fontsize=15,
+        self, count=10, keep_empty=False, figsize=(15, 10), label_fontsize=15,
         ticklabels_fontsize=15, legend_fontsize=15
     ):
         """Create an oncoplot.
@@ -941,6 +987,8 @@ def plot_oncoplot(
         ----------
         count : int, default: 10
             Number of top mutated genes to display.
+        keep_empty : bool, default: False
+            If True, display samples that do not have any mutations.
         figsize : tuple, default: (15, 10)
             Width, height in inches. Format: (float, float).
         label_fontsize : float, default: 15
@@ -967,14 +1015,14 @@ def plot_oncoplot(
         [[ax1, ax2], [ax3, ax4], [ax5, ax6]] = axes
 
         # Create the TMB plot.
-        samples = list(self.compute_waterfall(count).columns)
+        samples = list(self.matrix_waterfall(count=count, keep_empty=keep_empty).columns)
         self.plot_tmb(ax=ax1, samples=samples)
         ax1.set_xlabel('')
         ax1.spines['right'].set_visible(False)
         ax1.spines['top'].set_visible(False)
         ax1.spines['bottom'].set_visible(False)
         ax1.set_ylabel('TMB', fontsize=label_fontsize)
-        ax1.set_yticks([0, self.compute_tmb().sum(axis=1).max()])
+        ax1.set_yticks([0, self.matrix_tmb().sum(axis=1).max()])
         ax1.tick_params(axis='y', which='major',
                         labelsize=ticklabels_fontsize)
 
@@ -982,7 +1030,7 @@ def plot_oncoplot(
         ax2.remove()
 
         # Create the waterfall plot.
-        self.plot_waterfall(count=count, ax=ax3, linewidths=1)
+        self.plot_waterfall(count=count, ax=ax3, linewidths=1, keep_empty=keep_empty)
         ax3.set_xlabel('')
         ax3.tick_params(axis='y', which='major', labelrotation=0,
                         labelsize=ticklabels_fontsize)
@@ -994,7 +1042,7 @@ def plot_oncoplot(
         ax4.spines['top'].set_visible(False)
         ax4.set_yticks([])
         ax4.set_xlabel('Samples', fontsize=label_fontsize)
-        ax4.set_xticks([0, self.compute_genes(
+        ax4.set_xticks([0, self.matrix_genes(
             10, mode='samples').sum(axis=1).max()])
         ax4.set_ylim(-0.5, count-0.5)
         ax4.tick_params(axis='x', which='major',
@@ -1262,7 +1310,7 @@ def plot_summary(
                         labelsize=ticklabels_fontsize)
 
         # Create the 'Variants per sample' figure.
-        median = self.compute_tmb().sum(axis=1).median()
+        median = self.matrix_tmb().sum(axis=1).median()
         self.plot_tmb(ax=ax4)
         ax4.set_title(f'Variants per sample (median={median:.1f})',
                       fontsize=title_fontsize)
@@ -1333,7 +1381,7 @@ def plot_tmb(self, ax=None, figsize=None, samples=None, **kwargs):
             >>> mf.plot_tmb()
             >>> plt.tight_layout()
         """
-        df = self.compute_tmb()
+        df = self.matrix_tmb()
         if samples is not None:
             df = df.loc[samples]
         if ax is None:
@@ -1381,7 +1429,7 @@ def plot_vaf(self, col, count=10, ax=None, figsize=None, **kwargs):
             >>> mf.plot_vaf('i_TumorVAF_WU')
             >>> plt.tight_layout()
         """
-        genes = self.compute_genes(count=count).index.to_list()
+        genes = self.matrix_genes(count=count).index.to_list()
         s = self.df.groupby('Hugo_Symbol')[col].median()
         genes = s[genes].sort_values(ascending=False).index.to_list()
         df = self.df[self.df.Hugo_Symbol.isin(genes)]
@@ -1470,7 +1518,7 @@ def plot_varsum(self, flip=False, ax=None, figsize=None):
             >>> mf.plot_varsum()
             >>> plt.tight_layout()
         """
-        df = self.compute_tmb()
+        df = self.matrix_tmb()
         df = pd.melt(df, value_vars=df.columns)
         if ax is None:
             fig, ax = plt.subplots(figsize=figsize)
@@ -1527,20 +1575,30 @@ def plot_vartype(self, ax=None, figsize=None, **kwargs):
         ax.set_ylabel('')
         return ax
 
-    def plot_waterfall(self, count=10, ax=None, figsize=None, **kwargs):
+    def plot_waterfall(
+        self, count=10, keep_empty=False, af=None, sort_by=None,
+        sample_order=None, ax=None, figsize=None, **kwargs
+    ):
         """Create a waterfall plot.
 
         Parameters
         ----------
         count : int, default: 10
             Number of top mutated genes to display.
+        keep_empty : bool, default: False
+            If True, display samples that do not have any mutations.
+        af : AnnFrame, optional
+            AnnFrame containing sample annoation data for the MafFrame.
+        sort_by : list, optional
+            Columns in the AnnFrame to sort the samples by.
+        sample_order : list, optional
+            List of samples to display.
         ax : matplotlib.axes.Axes, optional
             Pre-existing axes for the plot. Otherwise, crete a new one.
         figsize : tuple, optional
             Width, height in inches. Format: (float, float).
         kwargs
             Other keyword arguments will be passed down to
-            :meth:`matplotlib.axes.Axes.pcolormesh()` and
             :meth:`seaborn.heatmap`.
 
         Returns
@@ -1561,7 +1619,15 @@ def plot_waterfall(self, count=10, ax=None, figsize=None, **kwargs):
             >>> mf.plot_waterfall(linewidths=0.5)
             >>> plt.tight_layout()
         """
-        df = self.compute_waterfall(count)
+        df = self.matrix_waterfall(count=count, keep_empty=keep_empty)
+
+        if sort_by is not None:
+            _ = af.df.loc[df.columns]
+            _ = _.sort_values(sort_by)
+            df = df[_.index]
+
+        if sample_order is not None:
+            df = df[sample_order]
 
         # Apply the mapping between items and integers.
         l = reversed(NONSYN_NAMES + ['Multi_Hit', 'None'])

From 8f4bb1cd70576ca2962709b9e12a7ade2273c009 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Mon, 21 Jun 2021 20:29:16 +0900
Subject: [PATCH 08/29] Add pymaf.MafFrame.filter_annot method

---
 CHANGELOG.rst    |  1 +
 fuc/api/pymaf.py | 63 ++++++++++++++++++++++++++++++++----------------
 2 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index eda066e..8ec40c7 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -9,6 +9,7 @@ Changelog
 * Add ``samples`` and ``shape`` attributes to :class:`pymaf.AnnFrame` class.
 * Rename :meth:`pymaf.MafFrame.compute_genes/tmb/waterfall` methods to :meth:`pymaf.MafFrame.matrix_genes/tmb/waterfall`.
 * Add ``keep_empty`` argument to :meth:`pymaf.MafFrame.matrix_waterfall/plot_oncoplot/plot_waterfall` methods.
+* Add :meth:`pymaf.MafFrame.filter_annot` method.
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index d89ac4b..23289ec 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -380,7 +380,8 @@ def from_dict(cls, data, sample_col='Tumor_Sample_Barcode'):
 
     @classmethod
     def from_file(cls, fn, sample_col='Tumor_Sample_Barcode', sep='\t'):
-        """Construct AnnFrame from a delimited text file.
+        """
+        Construct an AnnFrame from a delimited text file.
 
         The text file must have at least one column that represents
         sample names which are used as index for pandas.DataFrame.
@@ -864,7 +865,7 @@ def matrix_tmb(self):
         df = df.rename_axis(None, axis=1)
         return df
 
-    def matrix_waterfall(self, count=10, samples=None, keep_empty=False):
+    def matrix_waterfall(self, count=10, keep_empty=False):
         """
         Compute a matrix of variant classifications with a shape of
         (genes, samples).
@@ -876,8 +877,6 @@ def matrix_waterfall(self, count=10, samples=None, keep_empty=False):
         ----------
         count : int, default: 10
             Number of top mutated genes to include.
-        samples : list, optional
-            List of samples that should be used to compute the matrix.
         keep_empty : bool, default: False
             If True, keep samples with all ``NaN``'s.
 
@@ -888,9 +887,6 @@ def matrix_waterfall(self, count=10, samples=None, keep_empty=False):
         """
         df = self.df[self.df.Variant_Classification.isin(NONSYN_NAMES)]
 
-        if samples is not None:
-            df = df[df.Tumor_Sample_Barcode.isin(samples)]
-
         f = lambda x: ''.join(x) if len(x) == 1 else 'Multi_Hit'
         df = df.groupby(['Hugo_Symbol', 'Tumor_Sample_Barcode'])[
             'Variant_Classification'].apply(f).to_frame()
@@ -1576,8 +1572,7 @@ def plot_vartype(self, ax=None, figsize=None, **kwargs):
         return ax
 
     def plot_waterfall(
-        self, count=10, keep_empty=False, af=None, sort_by=None,
-        sample_order=None, ax=None, figsize=None, **kwargs
+        self, count=10, keep_empty=False, samples=None, ax=None, figsize=None, **kwargs
     ):
         """Create a waterfall plot.
 
@@ -1587,11 +1582,7 @@ def plot_waterfall(
             Number of top mutated genes to display.
         keep_empty : bool, default: False
             If True, display samples that do not have any mutations.
-        af : AnnFrame, optional
-            AnnFrame containing sample annoation data for the MafFrame.
-        sort_by : list, optional
-            Columns in the AnnFrame to sort the samples by.
-        sample_order : list, optional
+        samples : list, optional
             List of samples to display.
         ax : matplotlib.axes.Axes, optional
             Pre-existing axes for the plot. Otherwise, crete a new one.
@@ -1621,13 +1612,8 @@ def plot_waterfall(
         """
         df = self.matrix_waterfall(count=count, keep_empty=keep_empty)
 
-        if sort_by is not None:
-            _ = af.df.loc[df.columns]
-            _ = _.sort_values(sort_by)
-            df = df[_.index]
-
-        if sample_order is not None:
-            df = df[sample_order]
+        if samples is not None:
+            df = df[samples]
 
         # Apply the mapping between items and integers.
         l = reversed(NONSYN_NAMES + ['Multi_Hit', 'None'])
@@ -1802,3 +1788,38 @@ def to_string(self):
             String representation of MafFrame.
         """
         return self.df.to_csv(index=False, sep='\t')
+
+    def filter_annot(self, af, expr):
+        """
+        Filter the MafFrame using sample annotation data.
+
+        Samples are selected by querying the columns of an AnnFrame with a
+        boolean expression. Samples not present in the MafFrame will be
+        excluded automatically.
+
+        Parameters
+        ----------
+        af : AnnFrame
+            AnnFrame with sample annotaton data.
+        expr : str
+            Query expression to evaluate.
+
+        Returns
+        -------
+        MafFrame
+            Filtered MafFrame.
+
+        Examples
+        --------
+
+        >>> from fuc import common, pymaf
+        >>> common.load_dataset('tcga-laml')
+        >>> mf = pymaf.MafFrame.from_file('~/fuc-data/tcga-laml/tcga_laml.maf.gz')
+        >>> af = pymaf.AnnFrame.from_file('~/fuc-data/tcga-laml/tcga_laml_annot.tsv')
+        >>> filtered_mf = mf.filter_annot(af, "FAB_classification == 'M4')
+        """
+        samples = af.df.query(expr).index
+        i = self.df.Tumor_Sample_Barcode.isin(samples)
+        df = self.df[i]
+        mf = self.__class__(df)
+        return mf

From 5232ae8fefa596b8721066c63ac13369b767fd47 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 09:47:06 +0900
Subject: [PATCH 09/29] Fix typo

---
 fuc/api/pymaf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index 23289ec..4d24f5f 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -1816,7 +1816,7 @@ def filter_annot(self, af, expr):
         >>> common.load_dataset('tcga-laml')
         >>> mf = pymaf.MafFrame.from_file('~/fuc-data/tcga-laml/tcga_laml.maf.gz')
         >>> af = pymaf.AnnFrame.from_file('~/fuc-data/tcga-laml/tcga_laml_annot.tsv')
-        >>> filtered_mf = mf.filter_annot(af, "FAB_classification == 'M4')
+        >>> filtered_mf = mf.filter_annot(af, "FAB_classification == 'M4'")
         """
         samples = af.df.query(expr).index
         i = self.df.Tumor_Sample_Barcode.isin(samples)

From 6e73ac78efe663854cab9a20b1f35c4ea4e55926 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 10:33:40 +0900
Subject: [PATCH 10/29] Update docs

---
 docs/examples/customized_oncoplot_1.py | 90 +++++++++++++++++++++++
 docs/tutorials.rst                     | 99 ++------------------------
 fuc/api/pymaf.py                       | 33 +++++++--
 3 files changed, 121 insertions(+), 101 deletions(-)
 create mode 100644 docs/examples/customized_oncoplot_1.py

diff --git a/docs/examples/customized_oncoplot_1.py b/docs/examples/customized_oncoplot_1.py
new file mode 100644
index 0000000..42d7bea
--- /dev/null
+++ b/docs/examples/customized_oncoplot_1.py
@@ -0,0 +1,90 @@
+# File: customized_oncoplot_1.py
+
+import matplotlib.pyplot as plt
+from fuc import common, pymaf
+common.load_dataset('tcga-laml')
+mf = pymaf.MafFrame.from_file('~/fuc-data/tcga-laml/tcga_laml.maf.gz')
+af = pymaf.AnnFrame.from_file('~/fuc-data/tcga-laml/tcga_laml_annot.tsv')
+
+# Define the shared variables.
+count=10
+figsize=(15, 10)
+label_fontsize=13
+ticklabels_fontsize=12
+legend_fontsize=12
+
+# Create the figure. Getting the right height ratios can be tricky and often requires a trial-and-error process.
+fig, axes = plt.subplots(6, 2, figsize=figsize, gridspec_kw={'height_ratios': [1, 10, 1, 1, 1, 3.5], 'width_ratios': [10, 1]})
+[[ax1, ax2], [ax3, ax4], [ax5, ax6], [ax7, ax8], [ax9, ax10], [ax11, ax12]] = axes
+fig.suptitle('customized_oncoplot_1.py', fontsize=20)
+
+# Create the TMB plot.
+samples = list(mf.matrix_waterfall(count).columns)
+mf.plot_tmb(ax=ax1, samples=samples)
+ax1.set_xlabel('')
+ax1.spines['right'].set_visible(False)
+ax1.spines['top'].set_visible(False)
+ax1.spines['bottom'].set_visible(False)
+ax1.set_ylabel('TMB', fontsize=label_fontsize)
+ax1.set_yticks([0, mf.matrix_tmb().sum(axis=1).max()])
+ax1.tick_params(axis='y', which='major', labelsize=ticklabels_fontsize)
+
+ax2.remove()
+
+# Create the waterfall plot.
+mf.plot_waterfall(count=count, ax=ax3, linewidths=1)
+ax3.set_xlabel('')
+ax3.tick_params(axis='y', which='major', labelrotation=0, labelsize=ticklabels_fontsize)
+
+# Create the genes plot.
+mf.plot_genes(count=count, ax=ax4, mode='samples', width=0.95)
+ax4.spines['right'].set_visible(False)
+ax4.spines['left'].set_visible(False)
+ax4.spines['top'].set_visible(False)
+ax4.set_yticks([])
+ax4.set_xlabel('Samples', fontsize=label_fontsize)
+ax4.set_xticks([0, mf.matrix_genes(10, mode='samples').sum(axis=1).max()])
+ax4.set_ylim(-0.5, count-0.5)
+ax4.tick_params(axis='x', which='major', labelsize=ticklabels_fontsize)
+
+# Create the annotation plot for 'FAB_classification'.
+af.plot_annot('FAB_classification', samples=samples, ax=ax5, linewidths=1, cmap='Dark2')
+ax5.set_xlabel('')
+ax5.set_ylabel('')
+
+ax6.remove()
+
+# Create the annotation plot for 'days_to_last_followup'.
+af.plot_annot('days_to_last_followup', samples=samples, numeric=True, ax=ax7, linewidths=1, cmap='viridis')
+ax7.set_xlabel('')
+ax7.set_ylabel('')
+
+ax8.remove()
+
+# Create the annotation plot for 'Overall_Survival_Status'.
+af.plot_annot('Overall_Survival_Status', samples=samples, ax=ax9, linewidths=1)
+ax9.set_xlabel('Samples', fontsize=label_fontsize)
+ax9.set_ylabel('')
+
+ax10.remove()
+
+# Create the legends. Getting the right legend locations can be tricky and often requires a trial-and-error process.
+handles1 = pymaf.legend_handles(name='waterfall')
+handles2 = af.legend_handles('FAB_classification', samples=samples, cmap='Dark2')
+handles3 = af.legend_handles('days_to_last_followup', samples=samples, numeric=True, cmap='viridis')
+handles4 = af.legend_handles('Overall_Survival_Status', samples=samples)
+leg1 = ax11.legend(handles=handles1, loc=(0, 0), title='Variant_Classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg2 = ax11.legend(handles=handles2, loc=(0.43, 0), title='FAB_classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg3 = ax11.legend(handles=handles3, loc=(0.62, 0), title='days_to_last_followup', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg4 = ax11.legend(handles=handles4, loc=(0.82, 0), title='Overall_Survival_Status', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+ax11.add_artist(leg1)
+ax11.add_artist(leg2)
+ax11.add_artist(leg3)
+ax11.add_artist(leg4)
+ax11.axis('off')
+
+ax12.remove()
+
+plt.tight_layout()
+plt.subplots_adjust(wspace=0.01, hspace=0.01)
+plt.savefig('customized_oncoplot_1.png')
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
index b671fb7..ea7a28e 100644
--- a/docs/tutorials.rst
+++ b/docs/tutorials.rst
@@ -1,8 +1,8 @@
 Tutorials
 *********
 
-Create customized oncoplot
-==========================
+Create customized oncoplots
+===========================
 
 We can use either the :command:`maf_oncoplt` command or the :meth:`pymaf.plot_oncoplot` method to create a "standard" oncoplot like the one shown below.
 
@@ -10,97 +10,6 @@ We can use either the :command:`maf_oncoplt` command or the :meth:`pymaf.plot_on
 
 While the plot is pleasing to the eye, one may wish to customize it to add, for example, various annotation data for the samples like this:
 
-.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/customized_oncoplot.png
+.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/customized_oncoplot_1.png
 
-We can (relatively) easily achieve above thanks to the LEGO block-like plotting methods in the ``pymaf`` submodule:
-
-.. code:: python3
-
-    # File: customized_oncoplot.py
-    import matplotlib.pyplot as plt
-    from fuc import common, pymaf
-    common.load_dataset('tcga-laml')
-    mf = pymaf.MafFrame.from_file('~/fuc-data/tcga-laml/tcga_laml.maf.gz')
-    af = pymaf.AnnFrame.from_file('~/fuc-data/tcga-laml/tcga_laml_annot.tsv')
-
-    # Define the shared variables.
-    count=10
-    figsize=(15, 10)
-    label_fontsize=13
-    ticklabels_fontsize=12
-    legend_fontsize=12
-
-    # Create the figure. Getting the right height ratios can be tricky and often requires a trial-and-error process.
-    fig, axes = plt.subplots(6, 2, figsize=figsize, gridspec_kw={'height_ratios': [1, 10, 1, 1, 1, 3.5], 'width_ratios': [10, 1]})
-    [[ax1, ax2], [ax3, ax4], [ax5, ax6], [ax7, ax8], [ax9, ax10], [ax11, ax12]] = axes
-    fig.suptitle('Customized oncoplot', fontsize=20)
-
-    # Create the TMB plot.
-    samples = list(mf.matrix_waterfall(count).columns)
-    mf.plot_tmb(ax=ax1, samples=samples)
-    ax1.set_xlabel('')
-    ax1.spines['right'].set_visible(False)
-    ax1.spines['top'].set_visible(False)
-    ax1.spines['bottom'].set_visible(False)
-    ax1.set_ylabel('TMB', fontsize=label_fontsize)
-    ax1.set_yticks([0, mf.matrix_tmb().sum(axis=1).max()])
-    ax1.tick_params(axis='y', which='major', labelsize=ticklabels_fontsize)
-
-    ax2.remove()
-
-    # Create the waterfall plot.
-    mf.plot_waterfall(count=count, ax=ax3, linewidths=1)
-    ax3.set_xlabel('')
-    ax3.tick_params(axis='y', which='major', labelrotation=0, labelsize=ticklabels_fontsize)
-
-    # Create the genes plot.
-    mf.plot_genes(count=count, ax=ax4, mode='samples', width=0.95)
-    ax4.spines['right'].set_visible(False)
-    ax4.spines['left'].set_visible(False)
-    ax4.spines['top'].set_visible(False)
-    ax4.set_yticks([])
-    ax4.set_xlabel('Samples', fontsize=label_fontsize)
-    ax4.set_xticks([0, mf.matrix_genes(10, mode='samples').sum(axis=1).max()])
-    ax4.set_ylim(-0.5, count-0.5)
-    ax4.tick_params(axis='x', which='major', labelsize=ticklabels_fontsize)
-
-    # Create the annotation plot for 'FAB_classification'.
-    af.plot_annot('FAB_classification', samples=samples, ax=ax5, linewidths=1, cmap='Dark2')
-    ax5.set_xlabel('')
-    ax5.set_ylabel('')
-
-    ax6.remove()
-
-    # Create the annotation plot for 'days_to_last_followup'.
-    af.plot_annot('days_to_last_followup', samples=samples, numeric=True, ax=ax7, linewidths=1, cmap='viridis')
-    ax7.set_xlabel('')
-    ax7.set_ylabel('')
-
-    ax8.remove()
-
-    # Create the annotation plot for 'Overall_Survival_Status'.
-    af.plot_annot('Overall_Survival_Status', samples=samples, ax=ax9, linewidths=1)
-    ax9.set_xlabel('Samples', fontsize=label_fontsize)
-    ax9.set_ylabel('')
-
-    ax10.remove()
-
-    # Create the legends. Getting the right legend locations can be tricky and often requires a trial-and-error process.
-    handles1 = pymaf.legend_handles(name='waterfall')
-    handles2 = af.legend_handles('FAB_classification', samples=samples, cmap='Dark2')
-    handles3 = af.legend_handles('days_to_last_followup', samples=samples, numeric=True, cmap='viridis')
-    handles4 = af.legend_handles('Overall_Survival_Status', samples=samples)
-    leg1 = ax11.legend(handles=handles1, loc=(0, 0), title='Variant_Classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
-    leg2 = ax11.legend(handles=handles2, loc=(0.43, 0), title='FAB_classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
-    leg3 = ax11.legend(handles=handles3, loc=(0.62, 0), title='days_to_last_followup', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
-    leg4 = ax11.legend(handles=handles4, loc=(0.79, 0), title='Overall_Survival_Status', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
-    ax11.add_artist(leg1)
-    ax11.add_artist(leg2)
-    ax11.add_artist(leg3)
-    ax11.add_artist(leg4)
-    ax11.axis('off')
-
-    ax12.remove()
-
-    plt.tight_layout()
-    plt.subplots_adjust(wspace=0.01, hspace=0.01)
+We can (relatively) easily achieve above thanks to the LEGO block-like plotting methods in the ``pymaf`` submodule (:download:`click <examples/customized_oncoplot_1.py>`).
diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index 4d24f5f..e3588c1 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -240,10 +240,11 @@ def legend_handles(name='regular'):
     return handles
 
 class AnnFrame:
-    """Class for storing sample annotation data.
+    """
+    Class for storing sample annotation data.
 
-    This class stores sample annotation data as pandas.DataFrame with sample
-    names as index.
+    This class stores sample annotation data as :class:`pandas.DataFrame`
+    with sample names as index.
 
     Note that an AnnFrame can have a different set of samples than its
     accompanying MafFrame.
@@ -520,11 +521,12 @@ def plot_annot(
         self, col, samples=None, numeric=False, segments=5, decimals=0,
         cmap='Pastel1', ax=None, figsize=None, **kwargs
     ):
-        """Create a 1D categorical heatmap for the given column.
+        """
+        Create a 1D categorical heatmap of the column.
 
-        In the case of a numeric column, use ``numeric=True`` which will
+        In the case of a numeric column, set ``numeric`` as True which will
         divide the values into equal-sized intervals, with the number of
-        intervals determined by the `segments` option.
+        intervals determined by ``segments``.
 
         Parameters
         ----------
@@ -588,6 +590,25 @@ def plot_annot(
         ax.set_yticks([])
         return ax
 
+    def sorted_samples(self, by, mf=None, keep_empty=False, nonsyn=False):
+        """
+        Return a sorted list of sample names.
+
+        Parameters
+        ----------
+        df : str or list
+            Column or list of columns to sort by.
+        """
+        df = self.df.copy()
+
+        if nonsyn:
+            samples = mf.matrix_waterfall(keep_empty=keep_empty).columns
+            df = df.loc[samples]
+
+        df = df.sort_values(by=by)
+
+        return df.index.to_list()
+
 class MafFrame:
     """Class for storing MAF data.
 

From 63aef455a5edb9df86bb26c98ff5295783778bed Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 10:51:23 +0900
Subject: [PATCH 11/29] Update docs

---
 CHANGELOG.rst                          |  1 +
 docs/examples/customized_oncoplot_2.py | 90 ++++++++++++++++++++++++++
 docs/tutorials.rst                     |  6 +-
 fuc/api/pymaf.py                       | 15 ++++-
 4 files changed, 109 insertions(+), 3 deletions(-)
 create mode 100644 docs/examples/customized_oncoplot_2.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 8ec40c7..2e092c1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -10,6 +10,7 @@ Changelog
 * Rename :meth:`pymaf.MafFrame.compute_genes/tmb/waterfall` methods to :meth:`pymaf.MafFrame.matrix_genes/tmb/waterfall`.
 * Add ``keep_empty`` argument to :meth:`pymaf.MafFrame.matrix_waterfall/plot_oncoplot/plot_waterfall` methods.
 * Add :meth:`pymaf.MafFrame.filter_annot` method.
+* Add :meth:`pymaf.AnnFrame.sorted_samples` method.
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/docs/examples/customized_oncoplot_2.py b/docs/examples/customized_oncoplot_2.py
new file mode 100644
index 0000000..9c66e04
--- /dev/null
+++ b/docs/examples/customized_oncoplot_2.py
@@ -0,0 +1,90 @@
+# File: customized_oncoplot_2.py
+
+import matplotlib.pyplot as plt
+from fuc import common, pymaf
+common.load_dataset('tcga-laml')
+mf = pymaf.MafFrame.from_file('~/fuc-data/tcga-laml/tcga_laml.maf.gz')
+af = pymaf.AnnFrame.from_file('~/fuc-data/tcga-laml/tcga_laml_annot.tsv')
+
+# Define the shared variables.
+count=10
+figsize=(15, 10)
+label_fontsize=13
+ticklabels_fontsize=12
+legend_fontsize=12
+
+# Create the figure. Getting the right height ratios can be tricky and often requires a trial-and-error process.
+fig, axes = plt.subplots(6, 2, figsize=figsize, gridspec_kw={'height_ratios': [1, 10, 1, 1, 1, 3.5], 'width_ratios': [10, 1]})
+[[ax1, ax2], [ax3, ax4], [ax5, ax6], [ax7, ax8], [ax9, ax10], [ax11, ax12]] = axes
+fig.suptitle('customized_oncoplot_2.py', fontsize=20)
+
+# Create the TMB plot.
+samples = af.sorted_samples(['FAB_classification', 'Overall_Survival_Status'], mf=mf, nonsyn=True)
+mf.plot_tmb(ax=ax1, samples=samples)
+ax1.set_xlabel('')
+ax1.spines['right'].set_visible(False)
+ax1.spines['top'].set_visible(False)
+ax1.spines['bottom'].set_visible(False)
+ax1.set_ylabel('TMB', fontsize=label_fontsize)
+ax1.set_yticks([0, mf.matrix_tmb().sum(axis=1).max()])
+ax1.tick_params(axis='y', which='major', labelsize=ticklabels_fontsize)
+
+ax2.remove()
+
+# Create the waterfall plot.
+mf.plot_waterfall(count=count, ax=ax3, linewidths=1, samples=samples)
+ax3.set_xlabel('')
+ax3.tick_params(axis='y', which='major', labelrotation=0, labelsize=ticklabels_fontsize)
+
+# Create the genes plot.
+mf.plot_genes(count=count, ax=ax4, mode='samples', width=0.95)
+ax4.spines['right'].set_visible(False)
+ax4.spines['left'].set_visible(False)
+ax4.spines['top'].set_visible(False)
+ax4.set_yticks([])
+ax4.set_xlabel('Samples', fontsize=label_fontsize)
+ax4.set_xticks([0, mf.matrix_genes(10, mode='samples').sum(axis=1).max()])
+ax4.set_ylim(-0.5, count-0.5)
+ax4.tick_params(axis='x', which='major', labelsize=ticklabels_fontsize)
+
+# Create the annotation plot for 'FAB_classification'.
+af.plot_annot('FAB_classification', samples=samples, ax=ax5, linewidths=1, cmap='Dark2')
+ax5.set_xlabel('')
+ax5.set_ylabel('')
+
+ax6.remove()
+
+# Create the annotation plot for 'days_to_last_followup'.
+af.plot_annot('days_to_last_followup', samples=samples, numeric=True, ax=ax7, linewidths=1, cmap='viridis')
+ax7.set_xlabel('')
+ax7.set_ylabel('')
+
+ax8.remove()
+
+# Create the annotation plot for 'Overall_Survival_Status'.
+af.plot_annot('Overall_Survival_Status', samples=samples, ax=ax9, linewidths=1)
+ax9.set_xlabel('Samples', fontsize=label_fontsize)
+ax9.set_ylabel('')
+
+ax10.remove()
+
+# Create the legends. Getting the right legend locations can be tricky and often requires a trial-and-error process.
+handles1 = pymaf.legend_handles(name='waterfall')
+handles2 = af.legend_handles('FAB_classification', samples=samples, cmap='Dark2')
+handles3 = af.legend_handles('days_to_last_followup', samples=samples, numeric=True, cmap='viridis')
+handles4 = af.legend_handles('Overall_Survival_Status', samples=samples)
+leg1 = ax11.legend(handles=handles1, loc=(0, 0), title='Variant_Classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg2 = ax11.legend(handles=handles2, loc=(0.43, 0), title='FAB_classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg3 = ax11.legend(handles=handles3, loc=(0.62, 0), title='days_to_last_followup', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg4 = ax11.legend(handles=handles4, loc=(0.82, 0), title='Overall_Survival_Status', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+ax11.add_artist(leg1)
+ax11.add_artist(leg2)
+ax11.add_artist(leg3)
+ax11.add_artist(leg4)
+ax11.axis('off')
+
+ax12.remove()
+
+plt.tight_layout()
+plt.subplots_adjust(wspace=0.01, hspace=0.01)
+plt.savefig('customized_oncoplot_2.png')
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
index ea7a28e..6d1e298 100644
--- a/docs/tutorials.rst
+++ b/docs/tutorials.rst
@@ -12,4 +12,8 @@ While the plot is pleasing to the eye, one may wish to customize it to add, for
 
 .. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/customized_oncoplot_1.png
 
-We can (relatively) easily achieve above thanks to the LEGO block-like plotting methods in the ``pymaf`` submodule (:download:`click <examples/customized_oncoplot_1.py>`).
+We can (relatively) easily achieve above thanks to the LEGO block-like plotting methods in the ``pymaf`` submodule (:download:`customized_oncoplot_1.py <examples/customized_oncoplot_1.py>`).
+
+We can go one step further and sort the samples by one or more annotations (:download:`customized_oncoplot_2.py <examples/customized_oncoplot_2.py>`):
+
+.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/customized_oncoplot_2.png
diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index e3588c1..91e5ae5 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -528,6 +528,9 @@ def plot_annot(
         divide the values into equal-sized intervals, with the number of
         intervals determined by ``segments``.
 
+        See the :ref:`tutorials:Create customized oncoplots` tutorial to
+        learn how to create customized oncoplots.
+
         Parameters
         ----------
         col : str
@@ -998,7 +1001,11 @@ def plot_oncoplot(
         self, count=10, keep_empty=False, figsize=(15, 10), label_fontsize=15,
         ticklabels_fontsize=15, legend_fontsize=15
     ):
-        """Create an oncoplot.
+        """
+        Create a standard oncoplot.
+
+        See the :ref:`tutorials:Create customized oncoplots` tutorial to
+        learn how to create customized oncoplots.
 
         Parameters
         ----------
@@ -1595,7 +1602,11 @@ def plot_vartype(self, ax=None, figsize=None, **kwargs):
     def plot_waterfall(
         self, count=10, keep_empty=False, samples=None, ax=None, figsize=None, **kwargs
     ):
-        """Create a waterfall plot.
+        """
+        Create a waterfall plot.
+
+        See the :ref:`tutorials:Create customized oncoplots` tutorial to
+        learn how to create customized oncoplots.
 
         Parameters
         ----------

From b1192988f75a7b8cec544ee3af25c6fef113842b Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 12:39:01 +0900
Subject: [PATCH 12/29] Update tutorial

---
 docs/examples/customized_oncoplot_3.py | 93 ++++++++++++++++++++++++++
 docs/tutorials.rst                     |  4 ++
 2 files changed, 97 insertions(+)
 create mode 100644 docs/examples/customized_oncoplot_3.py

diff --git a/docs/examples/customized_oncoplot_3.py b/docs/examples/customized_oncoplot_3.py
new file mode 100644
index 0000000..b294096
--- /dev/null
+++ b/docs/examples/customized_oncoplot_3.py
@@ -0,0 +1,93 @@
+# File: customized_oncoplot_3.py
+
+import matplotlib.pyplot as plt
+from fuc import common, pymaf
+common.load_dataset('tcga-laml')
+mf = pymaf.MafFrame.from_file('~/fuc-data/tcga-laml/tcga_laml.maf.gz')
+af = pymaf.AnnFrame.from_file('~/fuc-data/tcga-laml/tcga_laml_annot.tsv')
+
+# Filter the MafFrame.
+mf = mf.filter_annot(af, "Overall_Survival_Status == 1")
+
+# Define the shared variables.
+count=10
+figsize=(15, 10)
+label_fontsize=13
+ticklabels_fontsize=12
+legend_fontsize=12
+
+# Create the figure. Getting the right height ratios can be tricky and often requires a trial-and-error process.
+fig, axes = plt.subplots(6, 2, figsize=figsize, gridspec_kw={'height_ratios': [1, 10, 1, 1, 1, 3.5], 'width_ratios': [10, 1]})
+[[ax1, ax2], [ax3, ax4], [ax5, ax6], [ax7, ax8], [ax9, ax10], [ax11, ax12]] = axes
+fig.suptitle('customized_oncoplot_3.py', fontsize=20)
+
+# Create the TMB plot.
+samples = list(mf.matrix_waterfall(count).columns)
+mf.plot_tmb(ax=ax1, samples=samples)
+ax1.set_xlabel('')
+ax1.spines['right'].set_visible(False)
+ax1.spines['top'].set_visible(False)
+ax1.spines['bottom'].set_visible(False)
+ax1.set_ylabel('TMB', fontsize=label_fontsize)
+ax1.set_yticks([0, mf.matrix_tmb().sum(axis=1).max()])
+ax1.tick_params(axis='y', which='major', labelsize=ticklabels_fontsize)
+
+ax2.remove()
+
+# Create the waterfall plot.
+mf.plot_waterfall(count=count, ax=ax3, linewidths=1, samples=samples)
+ax3.set_xlabel('')
+ax3.tick_params(axis='y', which='major', labelrotation=0, labelsize=ticklabels_fontsize)
+
+# Create the genes plot.
+mf.plot_genes(count=count, ax=ax4, mode='samples', width=0.95)
+ax4.spines['right'].set_visible(False)
+ax4.spines['left'].set_visible(False)
+ax4.spines['top'].set_visible(False)
+ax4.set_yticks([])
+ax4.set_xlabel('Samples', fontsize=label_fontsize)
+ax4.set_xticks([0, mf.matrix_genes(10, mode='samples').sum(axis=1).max()])
+ax4.set_ylim(-0.5, count-0.5)
+ax4.tick_params(axis='x', which='major', labelsize=ticklabels_fontsize)
+
+# Create the annotation plot for 'FAB_classification'.
+af.plot_annot('FAB_classification', samples=samples, ax=ax5, linewidths=1, cmap='Dark2')
+ax5.set_xlabel('')
+ax5.set_ylabel('')
+
+ax6.remove()
+
+# Create the annotation plot for 'days_to_last_followup'.
+af.plot_annot('days_to_last_followup', samples=samples, numeric=True, ax=ax7, linewidths=1, cmap='viridis')
+ax7.set_xlabel('')
+ax7.set_ylabel('')
+
+ax8.remove()
+
+# Create the annotation plot for 'Overall_Survival_Status'.
+af.plot_annot('Overall_Survival_Status', samples=samples, ax=ax9, linewidths=1)
+ax9.set_xlabel('Samples', fontsize=label_fontsize)
+ax9.set_ylabel('')
+
+ax10.remove()
+
+# Create the legends. Getting the right legend locations can be tricky and often requires a trial-and-error process.
+handles1 = pymaf.legend_handles(name='waterfall')
+handles2 = af.legend_handles('FAB_classification', samples=samples, cmap='Dark2')
+handles3 = af.legend_handles('days_to_last_followup', samples=samples, numeric=True, cmap='viridis')
+handles4 = af.legend_handles('Overall_Survival_Status', samples=samples)
+leg1 = ax11.legend(handles=handles1, loc=(0, 0), title='Variant_Classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg2 = ax11.legend(handles=handles2, loc=(0.43, 0), title='FAB_classification', ncol=2, fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg3 = ax11.legend(handles=handles3, loc=(0.62, 0), title='days_to_last_followup', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+leg4 = ax11.legend(handles=handles4, loc=(0.82, 0), title='Overall_Survival_Status', fontsize=legend_fontsize, title_fontsize=legend_fontsize)
+ax11.add_artist(leg1)
+ax11.add_artist(leg2)
+ax11.add_artist(leg3)
+ax11.add_artist(leg4)
+ax11.axis('off')
+
+ax12.remove()
+
+plt.tight_layout()
+plt.subplots_adjust(wspace=0.01, hspace=0.01)
+plt.savefig('customized_oncoplot_3.png')
diff --git a/docs/tutorials.rst b/docs/tutorials.rst
index 6d1e298..bd95c08 100644
--- a/docs/tutorials.rst
+++ b/docs/tutorials.rst
@@ -17,3 +17,7 @@ We can (relatively) easily achieve above thanks to the LEGO block-like plotting
 We can go one step further and sort the samples by one or more annotations (:download:`customized_oncoplot_2.py <examples/customized_oncoplot_2.py>`):
 
 .. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/customized_oncoplot_2.png
+
+Finally, we can also subset the samples with annotation data (:download:`customized_oncoplot_3.py <examples/customized_oncoplot_3.py>`):
+
+.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/customized_oncoplot_3.png

From 40b15dc66a4514b26ffaafc2e90f5d98b28609a2 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 13:40:13 +0900
Subject: [PATCH 13/29] Updata pymaf.MafFrame.to_vcf

---
 fuc/api/pymaf.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index 91e5ae5..c66de3c 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -645,22 +645,31 @@ def genes(self):
 
     @classmethod
     def from_file(cls, fn):
-        """Construct MafFrame from a MAF file.
+        """
+        Construct a MafFrame from a MAF file.
 
         Parameters
         ----------
         fn : str
-            MAF file path (zipped or unzipped).
+            MAF file (zipped or unzipped).
 
         Returns
         -------
         MafFrame
-            MafFrame.
+            MafFrame object.
 
         See Also
         --------
         MafFrame
             MafFrame object creation using constructor.
+
+        Examples
+        --------
+
+        >>> from fuc import common, pymaf
+        >>> common.load_dataset('tcga-laml')
+        >>> fn = '~/fuc-data/tcga-laml/tcga_laml.maf.gz'
+        >>> mf = pymaf.MafFrame.from_file(fn)
         """
         # Read the MAF file.
         df = pd.read_table(fn)
@@ -684,6 +693,9 @@ def from_file(cls, fn):
         if case_dict:
             df = df.rename(columns=case_dict)
 
+        # Set the data types.
+        df.Chromosome = df.Chromosome.astype(str)
+
         return cls(df)
 
     @classmethod
@@ -1795,8 +1807,14 @@ def one_row(r):
                 return r
             df = df.apply(one_row, axis=1)
 
+        # Create the metadata.
+        meta = [
+            '##fileformat=VCFv4.3',
+            '##source=fuc.api.pymaf.MafFrame.to_vcf',
+        ]
+
         # Create the VcfFrame.
-        vf = pyvcf.VcfFrame(['##fileformat=VCFv4.3'], df)
+        vf = pyvcf.VcfFrame(meta, df)
 
         return vf
 

From 4de7ccbbf10d7d932d214f71cf87a4889c725f31 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 13:52:46 +0900
Subject: [PATCH 14/29] Update common.load_dataset

---
 fuc/api/common.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fuc/api/common.py b/fuc/api/common.py
index ea4beab..07833a1 100644
--- a/fuc/api/common.py
+++ b/fuc/api/common.py
@@ -79,6 +79,8 @@ def load_dataset(name, force=False):
             'tcga_cohort.txt.gz',
             'tcga_laml.maf.gz',
             'tcga_laml_annot.tsv',
+            'tcga_laml.vcf',
+            'tcga_laml_vep.vcf',
         ],
         'pyvcf': [
             'plot_comparison.vcf',

From 7a73014739c2bf5ff7006fcc0054b97320064191 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 14:36:29 +0900
Subject: [PATCH 15/29] Deprecate pyvep.filter_lof method

---
 CHANGELOG.rst      |  2 ++
 fuc/api/pyvep.py   | 64 +++++++++++++++-------------------------------
 fuc/cli/vcf_vep.py | 18 ++++++-------
 3 files changed, 31 insertions(+), 53 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2e092c1..ec0d447 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -11,6 +11,8 @@ Changelog
 * Add ``keep_empty`` argument to :meth:`pymaf.MafFrame.matrix_waterfall/plot_oncoplot/plot_waterfall` methods.
 * Add :meth:`pymaf.MafFrame.filter_annot` method.
 * Add :meth:`pymaf.AnnFrame.sorted_samples` method.
+* Fix minor bug in :meth:`pymaf.MafFrame.to_frame` method.
+* Deprecate :meth:`pyvep.filter_lof` method.
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/fuc/api/pyvep.py b/fuc/api/pyvep.py
index 5f99eec..8b2fa19 100644
--- a/fuc/api/pyvep.py
+++ b/fuc/api/pyvep.py
@@ -505,7 +505,11 @@ def to_frame(vf, as_zero=False):
     else:
         df = df.replace('', np.nan)
     df.columns = annot_names(vf)
-    df = df.astype(DATA_TYPES)
+    d = {}
+    for col in df.columns:
+        if col in DATA_TYPES:
+            d[col] = DATA_TYPES[col]
+    df = df.astype(d)
     return df
 
 def pick_result(vf, mode='mostsevere'):
@@ -592,48 +596,6 @@ def annot_names(vf):
             l = re.search(r'Format: (.*?)">', vf.meta[i]).group(1).split('|')
     return l
 
-def filter_lof(vf, opposite=None, as_index=False):
-    """Select rows whose conseuqence annotation is deleterious and/or LoF.
-
-    Parameters
-    ----------
-    opposite : bool, default: False
-        If True, return rows that don't meet the said criteria.
-    as_index : bool, default: False
-        If True, return boolean index array instead of VcfFrame.
-
-    Returns
-    -------
-    VcfFrame or pandas.Series
-        Filtered VcfFrame or boolean index array.
-    """
-    consequence_index = annot_names(vf).index('Consequence')
-    impact_index = annot_names(vf).index('IMPACT')
-    polyphen_index = annot_names(vf).index('PolyPhen')
-    sift_index = annot_names(vf).index('SIFT')
-    def one_row(r):
-        l = row_firstann(r).split('|')
-        consequence = l[consequence_index]
-        impact = l[impact_index]
-        polyphen = l[polyphen_index]
-        sift = l[sift_index]
-        if impact not in ['HIGH', 'MODERATE']:
-            return False
-        if consequence == 'missense_variant':
-            if 'damaging' in polyphen or 'deleterious' in sift:
-                return True
-            else:
-                return False
-        return True
-    i = vf.df.apply(one_row, axis=1)
-    if opposite:
-        i = ~i
-    if as_index:
-        return i
-    df = vf.df[i]
-    vf = vf.__class__(vf.copy_meta(), df)
-    return vf
-
 def filter_query(vf, expr, opposite=None, as_index=False, as_zero=False):
     """
     Select rows that satisfy the query expression.
@@ -655,6 +617,22 @@ def filter_query(vf, expr, opposite=None, as_index=False, as_zero=False):
     -------
     VcfFrame or pandas.Series
         Filtered VcfFrame or boolean index array.
+
+    Examples
+    --------
+
+    >>> from fuc import common, pyvcf, pyvep
+    >>> common.load_dataset('tcga-laml')
+    >>> fn = '~/fuc-data/tcga-laml/tcga_laml_vep.vcf'
+    >>> vf = pyvcf.VcfFrame.from_file(fn)
+    >>> filtered_vf = pyvep.filter_query(vf, "SYMBOL == 'TP53'")
+    >>> filtered_vf = pyvep.filter_query(vf, "SYMBOL != 'TP53'")
+    >>> filtered_vf = pyvep.filter_query(vf, "SYMBOL != 'TP53'", opposite=True)
+    >>> filtered_vf = pyvep.filter_query(vf, "Consequence in ['splice_donor_variant', 'stop_gained']")
+    >>> filtered_vf = pyvep.filter_query(vf, "(SYMBOL == 'TP53') and (Consequence.str.contains('stop_gained'))")
+    >>> filtered_vf = pyvep.filter_query(vf, "gnomAD_AF < 0.001")
+    >>> filtered_vf = pyvep.filter_query(vf, "gnomAD_AF < 0.001", as_zero=True)
+    >>> filtered_vf = pyvep.filter_query(vf, "(IMPACT == 'HIGH') or (Consequence.str.contains('missense_variant') and (PolyPhen.str.contains('damaging') or SIFT.str.contains('deleterious')))")
     """
     df = to_frame(vf, as_zero=as_zero)
     i = vf.df.index.isin(df.query(expr).index)
diff --git a/fuc/cli/vcf_vep.py b/fuc/cli/vcf_vep.py
index a39ddbf..d4b8c54 100644
--- a/fuc/cli/vcf_vep.py
+++ b/fuc/cli/vcf_vep.py
@@ -6,15 +6,13 @@
 details on query expression, please visit the method's documentation page.
 
 usage examples:
-  $ fuc {api.common._script_name()} in.vcf 'SYMBOL == "TP53"' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'SYMBOL != "TP53"' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'SYMBOL == "TP53"' --opposite > out.vcf
-  $ fuc {api.common._script_name()} in.vcf \\
-      'Consequence in ["splice_donor_variant", "stop_gained"]' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf \\
-      '(SYMBOL == "TP53") and (Consequence.str.contains("stop_gained"))' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'gnomAD_AF < 0.001' > out.vcf
-  $ fuc {api.common._script_name()} in.vcf 'gnomAD_AF < 0.001' --as_zero > out.vcf
+  $ fuc {api.common._script_name()} in.vcf "SYMBOL == 'TP53'" > out.vcf
+  $ fuc {api.common._script_name()} in.vcf "SYMBOL != 'TP53'" > out.vcf
+  $ fuc {api.common._script_name()} in.vcf "SYMBOL == 'TP53'" --opposite > out.vcf
+  $ fuc {api.common._script_name()} in.vcf "Consequence in ['splice_donor_variant', 'stop_gained']" > out.vcf
+  $ fuc {api.common._script_name()} in.vcf "(SYMBOL == 'TP53') and (Consequence.str.contains('stop_gained'))" > out.vcf
+  $ fuc {api.common._script_name()} in.vcf "gnomAD_AF < 0.001" > out.vcf
+  $ fuc {api.common._script_name()} in.vcf "gnomAD_AF < 0.001" --as_zero > out.vcf
 """
 
 def create_parser(subparsers):
@@ -24,7 +22,7 @@ def create_parser(subparsers):
         help='[VCF] filter a VCF file annotated by Ensemble VEP',
         description=description,
     )
-    parser.add_argument('vcf', help='Ensemble VEP-annotated VCF file')
+    parser.add_argument('vcf', help='VCF file annotated with Ensemble VEP')
     parser.add_argument('expr', help='query expression to evaluate')
     parser.add_argument(
         '--opposite',

From b6af06c29cb85d5abf4e83adb12847f58932076b Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 14:39:26 +0900
Subject: [PATCH 16/29] Update docs

---
 docs/cli.rst | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/docs/cli.rst b/docs/cli.rst
index 95a3133..91c4538 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -729,18 +729,16 @@ vcf_vep
    details on query expression, please visit the method's documentation page.
    
    usage examples:
-     $ fuc vcf_vep in.vcf 'SYMBOL == "TP53"' > out.vcf
-     $ fuc vcf_vep in.vcf 'SYMBOL != "TP53"' > out.vcf
-     $ fuc vcf_vep in.vcf 'SYMBOL == "TP53"' --opposite > out.vcf
-     $ fuc vcf_vep in.vcf \
-         'Consequence in ["splice_donor_variant", "stop_gained"]' > out.vcf
-     $ fuc vcf_vep in.vcf \
-         '(SYMBOL == "TP53") and (Consequence.str.contains("stop_gained"))' > out.vcf
-     $ fuc vcf_vep in.vcf 'gnomAD_AF < 0.001' > out.vcf
-     $ fuc vcf_vep in.vcf 'gnomAD_AF < 0.001' --as_zero > out.vcf
+     $ fuc vcf_vep in.vcf "SYMBOL == 'TP53'" > out.vcf
+     $ fuc vcf_vep in.vcf "SYMBOL != 'TP53'" > out.vcf
+     $ fuc vcf_vep in.vcf "SYMBOL == 'TP53'" --opposite > out.vcf
+     $ fuc vcf_vep in.vcf "Consequence in ['splice_donor_variant', 'stop_gained']" > out.vcf
+     $ fuc vcf_vep in.vcf "(SYMBOL == 'TP53') and (Consequence.str.contains('stop_gained'))" > out.vcf
+     $ fuc vcf_vep in.vcf "gnomAD_AF < 0.001" > out.vcf
+     $ fuc vcf_vep in.vcf "gnomAD_AF < 0.001" --as_zero > out.vcf
    
    positional arguments:
-     vcf         Ensemble VEP-annotated VCF file
+     vcf         VCF file annotated with Ensemble VEP
      expr        query expression to evaluate
    
    optional arguments:

From 12e919b2dfcc90e126670f9097e1afb6ca5320cb Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 14:50:22 +0900
Subject: [PATCH 17/29] Deprecate pyvep.filter_clinsig method

---
 CHANGELOG.rst    |   2 +-
 fuc/api/pyvep.py | 122 -----------------------------------------------
 2 files changed, 1 insertion(+), 123 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index ec0d447..0f4848d 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -12,7 +12,7 @@ Changelog
 * Add :meth:`pymaf.MafFrame.filter_annot` method.
 * Add :meth:`pymaf.AnnFrame.sorted_samples` method.
 * Fix minor bug in :meth:`pymaf.MafFrame.to_frame` method.
-* Deprecate :meth:`pyvep.filter_lof` method.
+* Deprecate :meth:`pyvep.filter_lof/clinsig` methods.
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/fuc/api/pyvep.py b/fuc/api/pyvep.py
index 8b2fa19..47981b8 100644
--- a/fuc/api/pyvep.py
+++ b/fuc/api/pyvep.py
@@ -259,128 +259,6 @@ def row_mostsevere(r):
     f = lambda x: SEVERITIY.index(x.split('|')[1].split('&')[0])
     return sorted(results.split(','), key=f)[0]
 
-def filter_clinsig(vf, whitelist=None, blacklist=None, opposite=False,
-                   index=False):
-    """Select rows based on the given CLIN_SIG values.
-
-    List of CLIN_SIG values:
-
-        - benign
-        - likely_benign
-        - pathogenic
-        - likely_pathogenic
-        - drug_response
-        - risk_factor
-        - uncertain_significance
-        - conflicting_interpretations_of_pathogenicity
-        - not_provided
-        - other
-        - benign/likely_benign
-        - pathogenic/likely_pathogenic
-        - ...
-
-    Parameters
-    ----------
-    vf : VcfFrame
-        VcfFrame.
-    whilelist : list, default: None
-        If these CLIN_SIG values are present, select the row.
-    blacklist : list, default: None
-        If these CLIN_SIG values are present, do not select the row.
-    opposite : bool, default: False
-        If True, return rows that don't meet the said criteria.
-    index : bool, default: False
-        If True, return boolean index array instead of VcfFrame.
-
-    Returns
-    -------
-    VcfFrame or pandas.Series
-        Filtered VcfFrame or boolean index array.
-
-    Examples
-    --------
-    Assume we have the following data:
-
-    >>> meta = [
-    ...     '##fileformat=VCFv4.1',
-    ...     '##VEP="v104" time="2021-05-20 10:50:12" cache="/net/isilonP/public/ro/ensweb-data/latest/tools/grch37/e104/vep/cache/homo_sapiens/104_GRCh37" db="homo_sapiens_core_104_37@hh-mysql-ens-grch37-web" 1000genomes="phase3" COSMIC="92" ClinVar="202012" HGMD-PUBLIC="20204" assembly="GRCh37.p13" dbSNP="154" gencode="GENCODE 19" genebuild="2011-04" gnomAD="r2.1" polyphen="2.2.2" regbuild="1.0" sift="sift5.2.2"',
-    ...     '##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|MANE_SELECT|MANE_PLUS_CLINICAL|TSL|APPRIS|SIFT|PolyPhen|AF|CLIN_SIG|SOMATIC|PHENO|PUBMED|MOTIF_NAME|MOTIF_POS|HIGH_INF_POS|MOTIF_SCORE_CHANGE|TRANSCRIPTION_FACTORS">'
-    ... ]
-    >>> data = {
-    ...     'CHROM': ['chr1', 'chr1', 'chr2', 'chr2'],
-    ...     'POS': [100, 101, 200, 201],
-    ...     'ID': ['.', '.', '.', '.'],
-    ...     'REF': ['G', 'C', 'CAG', 'C'],
-    ...     'ALT': ['T', 'T', 'C', 'T'],
-    ...     'QUAL': ['.', '.', '.', '.'],
-    ...     'FILTER': ['.', '.', '.', '.'],
-    ...     'INFO': [
-    ...         'CSQ=T|missense_variant|MODERATE|MTOR|ENSG00000198793|Transcript|ENST00000361445.4|protein_coding|47/58||||6721|6644|2215|S/Y|tCt/tAt|rs587777894&COSV63868278&COSV63868313||-1||HGNC|3942|||||deleterious(0)|possibly_damaging(0.876)||likely_pathogenic&pathogenic|0&1&1|1&1&1|26619011&27159400&24631838&26018084&27830187|||||',
-    ...         'CSQ=T|synonymous_variant|LOW|MTOR|ENSG00000198793|Transcript|ENST00000361445.4|protein_coding|49/58||||6986|6909|2303|L|ctG/ctA|rs11121691&COSV63870864||-1||HGNC|3942|||||||0.2206|benign|0&1|1&1|24996771|||||',
-    ...         'CSQ=-|frameshift_variant|HIGH|BRCA2|ENSG00000139618|Transcript|ENST00000380152.3|protein_coding|18/27||||8479-8480|8246-8247|2749|Q/X|cAG/c|rs80359701||1||HGNC|1101||||||||pathogenic||1|26467025&26295337&15340362|||||',
-    ...         'CSQ=T|missense_variant|MODERATE|MTOR|ENSG00000198793|Transcript|ENST00000361445.4|protein_coding|30/58||||4516|4439|1480|R/H|cGc/cAc|rs780930764&COSV63868373||-1||HGNC|3942|||||tolerated(0.13)|benign(0)||likely_benign|0&1|1&1||||||'
-    ...     ],
-    ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-    ...     'Steven': ['0/1', '0/1', '0/1', '0/1'],
-    ... }
-    >>> vf = pyvcf.VcfFrame.from_dict(meta, data)
-    >>> pyvep.parseann(vf, ['CLIN_SIG']).df
-      CHROM  POS ID  REF ALT QUAL FILTER                          INFO FORMAT Steven
-    0  chr1  100  .    G   T    .      .  likely_pathogenic&pathogenic     GT    0/1
-    1  chr1  101  .    C   T    .      .                        benign     GT    0/1
-    2  chr2  200  .  CAG   C    .      .                    pathogenic     GT    0/1
-    3  chr2  201  .    C   T    .      .                 likely_benign     GT    0/1
-
-    We can select rows with pathogenic or likely_pathogenic:
-
-    >>> whitelist=['pathogenic', 'likely_pathogenic']
-    >>> temp_vf = pyvep.filter_clinsig(vf, whitelist=whitelist)
-    >>> pyvep.parseann(temp_vf, ['CLIN_SIG']).df
-      CHROM  POS ID  REF ALT QUAL FILTER                          INFO FORMAT Steven
-    0  chr1  100  .    G   T    .      .  likely_pathogenic&pathogenic     GT    0/1
-    1  chr2  200  .  CAG   C    .      .                    pathogenic     GT    0/1
-
-    We can also remove those rows:
-
-    >>> temp_vf = pyvep.filter_clinsig(vf, whitelist=whitelist, opposite=True)
-    >>> pyvep.parseann(temp_vf, ['CLIN_SIG']).df
-      CHROM  POS ID REF ALT QUAL FILTER           INFO FORMAT Steven
-    0  chr1  101  .   C   T    .      .         benign     GT    0/1
-    1  chr2  201  .   C   T    .      .  likely_benign     GT    0/1
-
-    Finally, we can return boolean index array from the filtering:
-
-    >>> pyvep.filter_clinsig(vf, whitelist=whitelist, index=True)
-    0     True
-    1    False
-    2     True
-    3    False
-    dtype: bool
-    """
-    if whitelist is None and blacklist is None:
-        raise ValueError('must provide either whitelist or blcklist')
-    if whitelist is None:
-        whitelist = []
-    if blacklist is None:
-        blacklist = []
-    def func(r):
-        ann = row_firstann(r)
-        i = annot_names(vf).index('CLIN_SIG')
-        values = ann.split('|')[i].split('&')
-        if not list(set(values) & set(whitelist)):
-            return False
-        if list(set(values) & set(blacklist)):
-            return False
-        return True
-    i = vf.df.apply(func, axis=1)
-    if opposite:
-        i = ~i
-    if index:
-        return i
-    df = vf.df[i].reset_index(drop=True)
-    vf = vf.__class__(vf.copy_meta(), df)
-    return vf
-
 def parseann(vf, targets, sep=' | ', as_series=False):
     """Parse variant annotations in VcfFrame.
 

From 6b0c77e248bdc997070d10e72fdf831af5bac828 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 15:27:22 +0900
Subject: [PATCH 18/29] Update docs

---
 fuc/api/pymaf.py |  2 +-
 fuc/api/pyvep.py | 62 ++++++++++++++++++++++++------------------------
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index c66de3c..db79247 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -700,7 +700,7 @@ def from_file(cls, fn):
 
     @classmethod
     def from_vcf(cls, vcf):
-        """Construct MafFrame from a VCF file or VcfFrame.
+        """Construct a MafFrame from a VCF file or VcfFrame.
 
         The input VCF should already contain functional annotation data
         from a tool such as Ensemble VEP, SnpEff, and ANNOVAR. The
diff --git a/fuc/api/pyvep.py b/fuc/api/pyvep.py
index 47981b8..88b6720 100644
--- a/fuc/api/pyvep.py
+++ b/fuc/api/pyvep.py
@@ -13,37 +13,37 @@
 symbol to protein change. However, most of the analysis in pyvep uses the
 following fields:
 
-+-----+--------------------+---------------------+----------------------------------------------+
-| No. | Name               | Description         | Examples                                     |
-+=====+====================+=====================+==============================================+
-| 1   | Allele             | Variant allele      | 'C', 'T', 'CG'                               |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 2   | Consequence        | Consequence type    | 'missense_variant', 'stop_gained'            |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 3   | IMPACT             | Consequence impact  | 'MODERATE', 'HIGH'                           |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 4   | SYMBOL             | Gene symbol         | 'TP53', 'APC'                                |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 5   | Gene               | Ensembl ID          | 7157, 55294                                  |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 6   | BIOTYPE            | Transcript biotype  | 'protein_coding', 'pseudogene', 'miRNA'      |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 7   | EXON               | Exon number         | '8/17', '17/17'                              |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 8   | Protein_position   | Amino acid position | 234, 1510                                    |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 9   | Amino_acids        | Amino acid changes  | 'R/\*', 'A/X', 'V/A'                         |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 10  | Existing_variation | Variant identifier  | 'rs17851045', 'rs786201856&COSV57325157'     |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 11  | STRAND             | Genomic strand      | 1, -1                                        |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 12  | SIFT               | SIFT prediction     | 'deleterious(0)', 'tolerated(0.6)'           |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 13  | PolyPhen           | PolyPhen prediction | 'benign(0.121)', 'probably_damaging(0.999)'  |
-+-----+--------------------+---------------------+----------------------------------------------+
-| 14  | CLIN_SIG           | ClinVar prediction  | 'pathogenic', 'likely_pathogenic&pathogenic' |
-+-----+--------------------+---------------------+----------------------------------------------+
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| No. | Name               | Description         | Examples                                                                                                 |
++=====+====================+=====================+==========================================================================================================+
+| 1   | Allele             | Variant allele      | 'C', 'T', 'CG'                                                                                           |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 2   | Consequence        | Consequence type    | 'missense_variant', 'stop_gained'                                                                        |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 3   | IMPACT             | Consequence impact  | 'MODERATE', 'HIGH'                                                                                       |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 4   | SYMBOL             | Gene symbol         | 'TP53', 'APC'                                                                                            |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 5   | Gene               | Ensembl ID          | 7157, 55294                                                                                              |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 6   | BIOTYPE            | Transcript biotype  | 'protein_coding', 'pseudogene', 'miRNA'                                                                  |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 7   | EXON               | Exon number         | '8/17', '17/17'                                                                                          |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 8   | Protein_position   | Amino acid position | 234, 1510                                                                                                |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 9   | Amino_acids        | Amino acid changes  | 'R/\*', 'A/X', 'V/A'                                                                                     |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 10  | Existing_variation | Variant identifier  | 'rs17851045', 'rs786201856&COSV57325157'                                                                 |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 11  | STRAND             | Genomic strand      | 1, -1                                                                                                    |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 12  | SIFT               | SIFT prediction     | 'tolerated(0.6)', 'tolerated_low_confidence(0.37)', 'deleterious_low_confidence(0.03)', 'deleterious(0)' |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 13  | PolyPhen           | PolyPhen prediction | 'benign(0.121)', 'possibly_damaging(0.459)', 'probably_damaging(0.999)'                                  |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
+| 14  | CLIN_SIG           | ClinVar prediction  | 'pathogenic', 'likely_pathogenic&pathogenic'                                                             |
++-----+--------------------+---------------------+----------------------------------------------------------------------------------------------------------+
 """
 
 import re

From 89e5489800212f5036c117b9687e74a88d0caff6 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Tue, 22 Jun 2021 16:38:22 +0900
Subject: [PATCH 19/29] Update pymaf.MafFrame.from_vcf method to extract
 genotype keys

---
 CHANGELOG.rst    |  1 +
 fuc/api/pymaf.py | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 0f4848d..56f9705 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -13,6 +13,7 @@ Changelog
 * Add :meth:`pymaf.AnnFrame.sorted_samples` method.
 * Fix minor bug in :meth:`pymaf.MafFrame.to_frame` method.
 * Deprecate :meth:`pyvep.filter_lof/clinsig` methods.
+* Update :meth:`pymaf.MafFrame.from_vcf` method to extract genotype keys (e.g. DP, AD, AF).
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index db79247..1c878c3 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -699,7 +699,7 @@ def from_file(cls, fn):
         return cls(df)
 
     @classmethod
-    def from_vcf(cls, vcf):
+    def from_vcf(cls, vcf, keys=None, names=None):
         """Construct a MafFrame from a VCF file or VcfFrame.
 
         The input VCF should already contain functional annotation data
@@ -712,6 +712,10 @@ def from_vcf(cls, vcf):
         ----------
         vcf : str or VcfFrame
             VCF file or VcfFrame.
+        keys : str or list
+            Genotype key or list of genotype keys.
+        names : str or list
+            Column name or list of column names to use in the MafFrame.
 
         Examples
         --------
@@ -813,6 +817,10 @@ def one_row(r):
                 Tumor_Seq_Allele2 = tumor_seq_allele2,
                 Tumor_Sample_Barcode = tumor_sample_barcode,
                 Protein_Change = protein_change,
+                CHROM = r.CHROM,
+                POS = r.POS,
+                REF = r.REF,
+                ALT = r.ALT,
             )
 
             return pd.Series(d)
@@ -828,6 +836,32 @@ def one_row(r):
         del df['Tumor_Sample_Barcode']
         df = df.join(s)
 
+        # Append the genotype keys.
+        if keys is None:
+            keys = []
+        if names is None:
+            names = []
+        if isinstance(keys, str):
+            keys = [keys]
+        if isinstance(names, str):
+            names = [names]
+        for i, key in enumerate(keys):
+            temp_df = vf.extract(key)
+            temp_df = pd.concat([vf.df.iloc[:, :9], temp_df], axis=1)
+            temp_df = temp_df.drop(
+                columns=['ID', 'QUAL', 'FILTER', 'INFO', 'FORMAT'])
+            temp_df = pd.melt(
+                temp_df,
+                id_vars=['CHROM', 'POS', 'REF', 'ALT'],
+                var_name='Tumor_Sample_Barcode',
+            )
+            temp_df = temp_df[temp_df.value != '.']
+            df = df.merge(temp_df,
+                on=['CHROM', 'POS', 'REF', 'ALT', 'Tumor_Sample_Barcode'])
+            df = df.rename(columns={'value': names[i]})
+
+        df = df.drop(columns=['CHROM', 'POS', 'REF', 'ALT'])
+
         return cls(df)
 
     def matrix_genes(self, count=10, mode='variants'):

From 7debd86e81e23409390ef0b77044022a96381a5d Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 13:08:04 +0900
Subject: [PATCH 20/29] Fix typo in README

---
 README.rst     | 2 +-
 docs/create.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index dbb8106..301684c 100644
--- a/README.rst
+++ b/README.rst
@@ -177,7 +177,7 @@ To index a CRAM file:
 
 .. code-block:: text
 
-   $ fuc bam_head in.cram
+   $ fuc bam_index in.cram
 
 To slice a BAM file:
 
diff --git a/docs/create.py b/docs/create.py
index 2daa2bc..bb02ba0 100644
--- a/docs/create.py
+++ b/docs/create.py
@@ -165,7 +165,7 @@
 
 .. code-block:: text
 
-   $ fuc bam_head in.cram
+   $ fuc bam_index in.cram
 
 To slice a BAM file:
 

From 280eb1209e64a9b6a2b7cad67c120c1330190419 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 15:40:29 +0900
Subject: [PATCH 21/29] Update CLI

---
 CHANGELOG.rst          |   1 +
 README.rst             |  54 ++++++-------
 docs/cli.rst           | 180 +++++++++++++++++------------------------
 fuc/__main__.py        |  15 +++-
 fuc/api/common.py      |  17 +++-
 fuc/cli/bam_head.py    |   2 +-
 fuc/cli/bam_index.py   |   2 +-
 fuc/cli/bam_rename.py  |   2 +-
 fuc/cli/bam_slice.py   |  66 ++++++++++-----
 fuc/cli/bed_intxn.py   |   2 +-
 fuc/cli/bed_sum.py     |   2 +-
 fuc/cli/fq_count.py    |   2 +-
 fuc/cli/fq_sum.py      |   2 +-
 fuc/cli/fuc_compf.py   |   2 +-
 fuc/cli/fuc_demux.py   |   2 +-
 fuc/cli/fuc_exist.py   |   4 +-
 fuc/cli/fuc_find.py    |   2 +-
 fuc/cli/maf_maf2vcf.py |   2 +-
 fuc/cli/maf_oncoplt.py |   2 +-
 fuc/cli/maf_sumplt.py  |   2 +-
 fuc/cli/maf_vcf2maf.py |   2 +-
 fuc/cli/tbl_merge.py   |   2 +-
 fuc/cli/tbl_sum.py     |   2 +-
 fuc/cli/vcf_filter.py  |   2 +-
 fuc/cli/vcf_merge.py   |   2 +-
 fuc/cli/vcf_rename.py  |   2 +-
 fuc/cli/vcf_slice.py   |   2 +-
 fuc/cli/vcf_vcf2bed.py |   2 +-
 fuc/cli/vcf_vep.py     |   2 +-
 29 files changed, 200 insertions(+), 181 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 56f9705..23738fa 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,6 +14,7 @@ Changelog
 * Fix minor bug in :meth:`pymaf.MafFrame.to_frame` method.
 * Deprecate :meth:`pyvep.filter_lof/clinsig` methods.
 * Update :meth:`pymaf.MafFrame.from_vcf` method to extract genotype keys (e.g. DP, AD, AF).
+* Update :command:`bam_slice` command.
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/README.rst b/README.rst
index 301684c..9cfdd45 100644
--- a/README.rst
+++ b/README.rst
@@ -107,35 +107,35 @@ For getting help on the fuc CLI:
    usage: fuc [-h] [-v] COMMAND ...
    
    positional arguments:
-     COMMAND        name of the command
-       bam_head     [BAM] print the header of a SAM/BAM/CRAM file
-       bam_index    [BAM] index a SAM/BAM/CRAM file
-       bam_rename   [BAM] rename the samples in a SAM/BAM/CRAM file
-       bam_slice    [BAM] slice a SAM/BAM/CRAM file
-       bed_intxn    [BED] find intersection of two or more BED files
-       bed_sum      [BED] summarize a BED file
-       fq_count     [FASTQ] count sequence reads in FASTQ files
-       fq_sum       [FASTQ] summarize a FASTQ file
-       fuc_compf    [FUC] compare contents of two files
-       fuc_demux    [FUC] parse Reports directory from bcl2fastq or bcl2fastq2
-       fuc_exist    [FUC] check whether files/directories exist
-       fuc_find     [FUC] find files with certain extension recursively
-       maf_maf2vcf  [MAF] convert a MAF file to a VCF file
-       maf_oncoplt  [MAF] create an oncoplot with a MAF file
-       maf_sumplt   [MAF] create a summary plot with a MAF file
-       maf_vcf2maf  [MAF] convert an annotated VCF file to a MAF file
-       tbl_merge    [TABLE] merge two table files
-       tbl_sum      [TABLE] summarize a table file
-       vcf_filter   [VCF] filter a VCF file
-       vcf_merge    [VCF] merge two or more VCF files
-       vcf_rename   [VCF] rename the samples in a VCF file.
-       vcf_slice    [VCF] slice a VCF file
-       vcf_vcf2bed  [VCF] convert a VCF file to a BED file
-       vcf_vep      [VCF] filter a VCF file annotated by Ensemble VEP
+     COMMAND
+       bam_head     [BAM] Print the header of a SAM/BAM/CRAM file.
+       bam_index    [BAM] Index a SAM/BAM/CRAM file.
+       bam_rename   [BAM] Rename the samples in a SAM/BAM/CRAM file.
+       bam_slice    [BAM] Slice a SAM/BAM/CRAM file.
+       bed_intxn    [BED] Find the intersection of two or more BED files.
+       bed_sum      [BED] Summarize a BED file.
+       fq_count     [FASTQ] Count sequence reads in FASTQ files.
+       fq_sum       [FASTQ] Summarize a FASTQ file.
+       fuc_compf    [FUC] Compare the contents of two files.
+       fuc_demux    [FUC] Parse the Reports directory from bcl2fastq.
+       fuc_exist    [FUC] Check whether certain files exist.
+       fuc_find     [FUC] Find all files with a certain extension recursively.
+       maf_maf2vcf  [MAF] Convert a MAF file to a VCF file.
+       maf_oncoplt  [MAF] Create an oncoplot with a MAF file.
+       maf_sumplt   [MAF] Create a summary plot with a MAF file.
+       maf_vcf2maf  [MAF] Convert a VCF file to a MAF file.
+       tbl_merge    [TABLE] Merge two table files.
+       tbl_sum      [TABLE] Summarize a table file.
+       vcf_filter   [VCF] Filter a VCF file.
+       vcf_merge    [VCF] Merge two or more VCF files.
+       vcf_rename   [VCF] Rename the samples in a VCF file.
+       vcf_slice    [VCF] Slice a VCF file.
+       vcf_vcf2bed  [VCF] Convert a VCF file to a BED file.
+       vcf_vep      [VCF] Filter a VCF file annotated by Ensemble VEP.
    
    optional arguments:
-     -h, --help     show this help message and exit
-     -v, --version  show the version number and exit
+     -h, --help     Show this help message and exit.
+     -v, --version  Show the version number and exit.
 
 For getting help on a specific command (e.g. vcf_merge):
 
diff --git a/docs/cli.rst b/docs/cli.rst
index 91c4538..d62b703 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -17,35 +17,35 @@ For getting help on the fuc CLI:
    usage: fuc [-h] [-v] COMMAND ...
    
    positional arguments:
-     COMMAND        name of the command
-       bam_head     [BAM] print the header of a SAM/BAM/CRAM file
-       bam_index    [BAM] index a SAM/BAM/CRAM file
-       bam_rename   [BAM] rename the samples in a SAM/BAM/CRAM file
-       bam_slice    [BAM] slice a SAM/BAM/CRAM file
-       bed_intxn    [BED] find intersection of two or more BED files
-       bed_sum      [BED] summarize a BED file
-       fq_count     [FASTQ] count sequence reads in FASTQ files
-       fq_sum       [FASTQ] summarize a FASTQ file
-       fuc_compf    [FUC] compare contents of two files
-       fuc_demux    [FUC] parse Reports directory from bcl2fastq or bcl2fastq2
-       fuc_exist    [FUC] check whether files/directories exist
-       fuc_find     [FUC] find files with certain extension recursively
-       maf_maf2vcf  [MAF] convert a MAF file to a VCF file
-       maf_oncoplt  [MAF] create an oncoplot with a MAF file
-       maf_sumplt   [MAF] create a summary plot with a MAF file
-       maf_vcf2maf  [MAF] convert an annotated VCF file to a MAF file
-       tbl_merge    [TABLE] merge two table files
-       tbl_sum      [TABLE] summarize a table file
-       vcf_filter   [VCF] filter a VCF file
-       vcf_merge    [VCF] merge two or more VCF files
-       vcf_rename   [VCF] rename the samples in a VCF file.
-       vcf_slice    [VCF] slice a VCF file
-       vcf_vcf2bed  [VCF] convert a VCF file to a BED file
-       vcf_vep      [VCF] filter a VCF file annotated by Ensemble VEP
+     COMMAND
+       bam_head     [BAM] Print the header of a SAM/BAM/CRAM file.
+       bam_index    [BAM] Index a SAM/BAM/CRAM file.
+       bam_rename   [BAM] Rename the samples in a SAM/BAM/CRAM file.
+       bam_slice    [BAM] Slice a SAM/BAM/CRAM file.
+       bed_intxn    [BED] Find the intersection of two or more BED files.
+       bed_sum      [BED] Summarize a BED file.
+       fq_count     [FASTQ] Count sequence reads in FASTQ files.
+       fq_sum       [FASTQ] Summarize a FASTQ file.
+       fuc_compf    [FUC] Compare the contents of two files.
+       fuc_demux    [FUC] Parse the Reports directory from bcl2fastq.
+       fuc_exist    [FUC] Check whether certain files exist.
+       fuc_find     [FUC] Find all files with a certain extension recursively.
+       maf_maf2vcf  [MAF] Convert a MAF file to a VCF file.
+       maf_oncoplt  [MAF] Create an oncoplot with a MAF file.
+       maf_sumplt   [MAF] Create a summary plot with a MAF file.
+       maf_vcf2maf  [MAF] Convert a VCF file to a MAF file.
+       tbl_merge    [TABLE] Merge two table files.
+       tbl_sum      [TABLE] Summarize a table file.
+       vcf_filter   [VCF] Filter a VCF file.
+       vcf_merge    [VCF] Merge two or more VCF files.
+       vcf_rename   [VCF] Rename the samples in a VCF file.
+       vcf_slice    [VCF] Slice a VCF file.
+       vcf_vcf2bed  [VCF] Convert a VCF file to a BED file.
+       vcf_vep      [VCF] Filter a VCF file annotated by Ensemble VEP.
    
    optional arguments:
-     -h, --help     show this help message and exit
-     -v, --version  show the version number and exit
+     -h, --help     Show this help message and exit.
+     -v, --version  Show the version number and exit.
 
 For getting help on a specific command (e.g. vcf_merge):
 
@@ -73,7 +73,7 @@ bam_head
      bam         SAM/BAM/CRAM file
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 bam_index
 =========
@@ -95,7 +95,7 @@ bam_index
      bam         SAM/BAM/CRAM file
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 bam_rename
 ==========
@@ -119,7 +119,7 @@ bam_rename
      out         output file
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 bam_slice
 =========
@@ -127,27 +127,21 @@ bam_slice
 .. code-block:: text
 
    $ fuc bam_slice -h
-   usage: fuc bam_slice [-h] [--no_index] bam region out
+   usage: fuc bam_slice [-h] [--format TEXT] bam region [region ...]
    
-   This command will slice a SAM/BAM/CRAM file. It essentially wraps the
-   'pysam.view' method from the pysam package.
-   
-   By default, the command will index the output file. Use the '--no_index' flag
-   to skip indexing.
+   This command will slice the input SAM/BAM/CRAM file for the specified region(s).
    
    usage examples:
-     $ fuc bam_slice in.sam 4:300-400 out.sam
-     $ fuc bam_slice in.bam chr1:100-200 out.bam
-     $ fuc bam_slice in.cram chr1:100-200 out.cram --no_index
+     $ fuc bam_slice in.bam chr1:100-200 > out.bam
+     $ fuc bam_slice in.bam chr1:100-200 chr2:100-200 > out.bam
    
    positional arguments:
-     bam         SAM/BAM/CRAM file
-     region      region ('chrom:start-end')
-     out         output file
+     bam            SAM/BAM/CRAM file
+     region         space-separated regions ('chrom:start-end')
    
    optional arguments:
-     -h, --help  show this help message and exit
-     --no_index  use this flag to skip indexing
+     -h, --help     Show this help message and exit.
+     --format TEXT  output format (default: 'BAM') (choices: 'SAM', 'BAM', 'CRAM')
 
 bed_intxn
 =========
@@ -167,7 +161,7 @@ bed_intxn
      bed         BED files
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 bed_sum
 =======
@@ -191,7 +185,7 @@ bed_sum
      bed             BED file
    
    optional arguments:
-     -h, --help      show this help message and exit
+     -h, --help      Show this help message and exit.
      --bases INT     number to divide covered base pairs (default: 1)
      --decimals INT  number of decimals (default: 0)
 
@@ -214,7 +208,7 @@ fq_count
      fastq       FASTQ files (default: stdin)
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 fq_sum
 ======
@@ -236,7 +230,7 @@ fq_sum
      fastq       FASTQ file
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 fuc_compf
 =========
@@ -258,7 +252,7 @@ fuc_compf
      right       right file
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 fuc_demux
 =========
@@ -285,7 +279,7 @@ fuc_demux
      output_dir   output directory
    
    optional arguments:
-     -h, --help   show this help message and exit
+     -h, --help   Show this help message and exit.
 
 fuc_exist
 =========
@@ -295,7 +289,7 @@ fuc_exist
    $ fuc fuc_exist -h
    usage: fuc fuc_exist [-h] [files ...]
    
-   This command will check whether files/directories exist. It will return
+   This command will check whether files exist. It will return
    'True' if they exist and 'False' otherwise. The command will look for stdin
    if there are no arguments.
    
@@ -308,7 +302,7 @@ fuc_exist
      files       test files/directories (default: stdin)
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 fuc_find
 ========
@@ -330,7 +324,7 @@ fuc_find
      ext         file extension
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
      --dir PATH  directory to search in (default: current directory)
 
 maf_maf2vcf
@@ -374,7 +368,7 @@ maf_maf2vcf
      maf                   MAF file
    
    optional arguments:
-     -h, --help            show this help message and exit
+     -h, --help            Show this help message and exit.
      --fasta PATH          FASTA file (required to include INDELs in the output)
      --ignore_indels       use this flag to exclude INDELs from the output
      --cols TEXT [TEXT ...]
@@ -409,7 +403,7 @@ maf_oncoplt
      out                   image file
    
    optional arguments:
-     -h, --help            show this help message and exit
+     -h, --help            Show this help message and exit.
      --count INT           number of top mutated genes to display (default: 10)
      --figsize FLOAT FLOAT
                            width, height in inches (default: [15, 10])
@@ -446,7 +440,7 @@ maf_sumplt
      out                   output image file
    
    optional arguments:
-     -h, --help            show this help message and exit
+     -h, --help            Show this help message and exit.
      --figsize FLOAT FLOAT
                            width, height in inches (default: [15, 10])
      --title_fontsize FLOAT
@@ -474,7 +468,7 @@ maf_vcf2maf
      vcf         VCF file
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 tbl_merge
 =========
@@ -501,9 +495,8 @@ tbl_merge
      right                 right file
    
    optional arguments:
-     -h, --help            show this help message and exit
-     --how TEXT            type of merge to be performed ['left', 'right',
-                           'outer', 'inner', 'cross'] (default: 'inner')
+     -h, --help            Show this help message and exit.
+     --how TEXT            type of merge to be performed ['left', 'right', 'outer', 'inner', 'cross'] (default: 'inner')
      --on TEXT [TEXT ...]  column names to join on
      --lsep TEXT           delimiter to use for the left file (default: '\t')
      --rsep TEXT           delimiter to use for the right file (default: '\t')
@@ -532,29 +525,16 @@ tbl_sum
      table_file            table file
    
    optional arguments:
-     -h, --help            show this help message and exit
+     -h, --help            Show this help message and exit.
      --sep TEXT            delimiter to use (default: '\t')
-     --skiprows TEXT       comma-separated line numbers to skip (0-indexed) or
-                           number of lines to skip at the start of the file (e.g.
-                           `--skiprows 1,` will skip the second line, `--skiprows
-                           2,4` will skip the third and fifth lines, and
-                           `--skiprows 10` will skip the first 10 lines)
+     --skiprows TEXT       comma-separated line numbers to skip (0-indexed) or number of lines to skip at the start of the file (e.g. `--skiprows 1,` will skip the second line, `--skiprows 2,4` will skip the third and fifth lines, and `--skiprows 10` will skip the first 10 lines)
      --na_values TEXT [TEXT ...]
-                           additional strings to recognize as NA/NaN (by default,
-                           the following values are interpreted as NaN: '',
-                           '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
-                           '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
-                           'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null')
-     --keep_default_na     whether or not to include the default NaN values when
-                           parsing the data (see 'pandas.read_table' for details)
-     --expr TEXT           query the columns of a pandas.DataFrame with a boolean
-                           expression (e.g. `--query "A == 'yes'"`)
+                           additional strings to recognize as NA/NaN (by default, the following values are interpreted as NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null')
+     --keep_default_na     whether or not to include the default NaN values when parsing the data (see 'pandas.read_table' for details)
+     --expr TEXT           query the columns of a pandas.DataFrame with a boolean expression (e.g. `--query "A == 'yes'"`)
      --columns TEXT [TEXT ...]
-                           columns to be summarized (by default, all columns will
-                           be included)
-     --dtypes PATH         file of column names and their data types (etheir
-                           'categorical' or 'numeric'); one tab-delimited pair of
-                           column name and data type per line
+                           columns to be summarized (by default, all columns will be included)
+     --dtypes PATH         file of column names and their data types (etheir 'categorical' or 'numeric'); one tab-delimited pair of column name and data type per line
 
 vcf_filter
 ==========
@@ -587,20 +567,14 @@ vcf_filter
      vcf                   VCF file
    
    optional arguments:
-     -h, --help            show this help message and exit
+     -h, --help            Show this help message and exit.
      --expr TEXT           expression to evaluate
-     --samples PATH        file of sample names to apply the marking (one sample
-                           per line)
+     --samples PATH        file of sample names to apply the marking (one sample per line)
      --drop_duplicates [TEXT ...]
-                           only consider certain columns for identifying
-                           duplicates, by default use all of the columns.
-     --greedy              use this flag to mark even ambiguous genotypes as
-                           missing
-     --opposite            use this flag to mark all genotypes that do not
-                           satisfy the query expression as missing and leave
-                           those that do intact
-     --filter_empty        use this flag to remove rows with no genotype calls at
-                           all
+                           only consider certain columns for identifying duplicates, by default use all of the columns.
+     --greedy              use this flag to mark even ambiguous genotypes as missing
+     --opposite            use this flag to mark all genotypes that do not satisfy the query expression as missing and leave those that do intact
+     --filter_empty        use this flag to remove rows with no genotype calls at all
 
 vcf_merge
 =========
@@ -625,11 +599,9 @@ vcf_merge
      vcf_files      VCF files
    
    optional arguments:
-     -h, --help     show this help message and exit
-     --how TEXT     type of merge as defined in `pandas.DataFrame.merge`
-                    (default: 'inner')
-     --format TEXT  FORMAT subfields to be retained (e.g. 'GT:AD:DP') (default:
-                    'GT')
+     -h, --help     Show this help message and exit.
+     --how TEXT     type of merge as defined in `pandas.DataFrame.merge` (default: 'inner')
+     --format TEXT  FORMAT subfields to be retained (e.g. 'GT:AD:DP') (default: 'GT')
      --sort         use this flag to turn off sorting of records (default: True)
      --collapse     use this flag to collapse duplicate records (default: False)
 
@@ -665,9 +637,8 @@ vcf_rename
      names            delimited text file
    
    optional arguments:
-     -h, --help       show this help message and exit
-     --mode TEXT      renaming mode (default: 'MAP') (choices: 'MAP', 'INDICIES',
-                      'RANGE')
+     -h, --help       Show this help message and exit.
+     --mode TEXT      renaming mode (default: 'MAP') (choices: 'MAP', 'INDICIES', 'RANGE')
      --range INT INT  specify an index range
      --sep TEXT       delimiter to use (default: '\t')
 
@@ -694,7 +665,7 @@ vcf_slice
      region      region ('chrom:start-end')
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 vcf_vcf2bed
 ===========
@@ -714,7 +685,7 @@ vcf_vcf2bed
      vcf         VCF file
    
    optional arguments:
-     -h, --help  show this help message and exit
+     -h, --help  Show this help message and exit.
 
 vcf_vep
 =======
@@ -742,8 +713,7 @@ vcf_vep
      expr        query expression to evaluate
    
    optional arguments:
-     -h, --help  show this help message and exit
-     --opposite  use this flag to return records that don’t meet the said
-                 criteria
+     -h, --help  Show this help message and exit.
+     --opposite  use this flag to return records that don’t meet the said criteria
      --as_zero   use this flag to treat missing values as zero instead of NaN
 
diff --git a/fuc/__main__.py b/fuc/__main__.py
index e2fcd13..492cf1e 100644
--- a/fuc/__main__.py
+++ b/fuc/__main__.py
@@ -4,18 +4,27 @@
 from .cli import commands
 
 def main():
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
+        add_help=False,
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        '-h',
+        '--help',
+        action='help',
+        default=argparse.SUPPRESS,
+        help='Show this help message and exit.',
+    )
     parser.add_argument(
         '-v',
         '--version',
         action='version',
         version=f'%(prog)s {__version__}',
-        help='show the version number and exit'
+        help='Show the version number and exit.'
     )
     subparsers = parser.add_subparsers(
         dest='command',
         metavar='COMMAND',
-        help='name of the command',
         required=True,
     )
     for name, command in commands.items():
diff --git a/fuc/api/common.py b/fuc/api/common.py
index 07833a1..353eaa1 100644
--- a/fuc/api/common.py
+++ b/fuc/api/common.py
@@ -13,7 +13,7 @@
 import pysam
 import warnings
 import inspect
-from argparse import RawDescriptionHelpFormatter
+from argparse import RawTextHelpFormatter, SUPPRESS
 
 FUC_PATH = pathlib.Path(__file__).parent.parent.parent.absolute()
 
@@ -24,8 +24,19 @@ def _script_name():
 
 def _add_parser(subparsers, name, **kwargs):
     """Return a formatted parser."""
-    parser = subparsers.add_parser(name,
-        formatter_class=RawDescriptionHelpFormatter, **kwargs)
+    parser = subparsers.add_parser(
+        name,
+        add_help=False,
+        formatter_class=RawTextHelpFormatter,
+        **kwargs,
+    )
+    parser.add_argument(
+        '-h',
+        '--help',
+        action='help',
+        default=SUPPRESS,
+        help='Show this help message and exit.',
+    )
     return parser
 
 def get_similarity(a, b):
diff --git a/fuc/cli/bam_head.py b/fuc/cli/bam_head.py
index ffb1ef6..fb50e5a 100644
--- a/fuc/cli/bam_head.py
+++ b/fuc/cli/bam_head.py
@@ -15,7 +15,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BAM] print the header of a SAM/BAM/CRAM file',
+        help='[BAM] Print the header of a SAM/BAM/CRAM file.',
         description=description,
     )
     parser.add_argument('bam', help='SAM/BAM/CRAM file')
diff --git a/fuc/cli/bam_index.py b/fuc/cli/bam_index.py
index c836472..4002ae0 100644
--- a/fuc/cli/bam_index.py
+++ b/fuc/cli/bam_index.py
@@ -15,7 +15,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BAM] index a SAM/BAM/CRAM file',
+        help='[BAM] Index a SAM/BAM/CRAM file.',
         description=description,
     )
     parser.add_argument('bam', help='SAM/BAM/CRAM file')
diff --git a/fuc/cli/bam_rename.py b/fuc/cli/bam_rename.py
index c6e047c..6fe5109 100644
--- a/fuc/cli/bam_rename.py
+++ b/fuc/cli/bam_rename.py
@@ -14,7 +14,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BAM] rename the samples in a SAM/BAM/CRAM file',
+        help='[BAM] Rename the samples in a SAM/BAM/CRAM file.',
         description=description,
     )
     parser.add_argument('bam', help='SAM/BAM/CRAM file')
diff --git a/fuc/cli/bam_slice.py b/fuc/cli/bam_slice.py
index 0012476..337842b 100644
--- a/fuc/cli/bam_slice.py
+++ b/fuc/cli/bam_slice.py
@@ -1,35 +1,63 @@
+import sys
+
 from .. import api
+
 import pysam
 
 description = f"""
-This command will slice a SAM/BAM/CRAM file. It essentially wraps the
-'pysam.view' method from the pysam package.
-
-By default, the command will index the output file. Use the '--no_index' flag
-to skip indexing.
+This command will slice the input SAM/BAM/CRAM file for the specified region(s).
 
 usage examples:
-  $ fuc {api.common._script_name()} in.sam 4:300-400 out.sam
-  $ fuc {api.common._script_name()} in.bam chr1:100-200 out.bam
-  $ fuc {api.common._script_name()} in.cram chr1:100-200 out.cram --no_index
+  $ fuc {api.common._script_name()} in.bam chr1:100-200 > out.bam
+  $ fuc {api.common._script_name()} in.bam chr1:100-200 chr2:100-200 > out.bam
+  $ fuc {api.common._script_name()} in.bam chr1:100-200 --format SAM > out.sam
+  $ fuc {api.common._script_name()} in.bam chr1:100-200 --format CRAM --fasta ref.fa > out.cram
 """
 
 def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BAM] slice a SAM/BAM/CRAM file',
+        help='[BAM] Slice a SAM/BAM/CRAM file.',
         description=description,
     )
-    parser.add_argument('bam', help='SAM/BAM/CRAM file')
-    parser.add_argument('region', help="region ('chrom:start-end')")
-    parser.add_argument('out', help='output file')
-    parser.add_argument('--no_index', action='store_true',
-        help='use this flag to skip indexing')
+    parser.add_argument(
+        'bam',
+        help='SAM/BAM/CRAM file.'
+    )
+    parser.add_argument(
+        'region',
+        nargs='+',
+        help="Space-separated regions ('chrom:start-end')."
+    )
+    parser.add_argument(
+        '--format',
+        metavar='TEXT',
+        default='BAM',
+        choices=['SAM', 'BAM', 'CRAM'],
+        help=
+            "Output format (default: 'BAM') (choices: 'SAM', 'BAM', 'CRAM'). "
+            "A FASTA file ('--fasta') is required for 'CRAM'."
+    )
+    parser.add_argument(
+        '--fasta',
+        metavar='PATH',
+        help="FASTA file. Required when '--format' is 'CRAM'."
+    )
 
 def main(args):
-    b = pysam.view('-b', args.bam, args.region)
-    with open(args.out, 'wb') as f:
-        f.write(b)
-    if not args.no_index:
-        pysam.index(args.out)
+    options = []
+
+    # Determine the output format.
+    if args.format == 'BAM':
+        stdout_method = sys.stdout.buffer.write
+        options.append('-b')
+    elif args.format == 'CRAM':
+        stdout_method = sys.stdout.buffer.write
+        options.append('-C')
+    else:
+        stdout_method = sys.stdout.write
+
+    alignments = pysam.view(args.bam, *args.region, *options)
+
+    stdout_method(alignments)
diff --git a/fuc/cli/bed_intxn.py b/fuc/cli/bed_intxn.py
index 947e06d..2a13fe8 100644
--- a/fuc/cli/bed_intxn.py
+++ b/fuc/cli/bed_intxn.py
@@ -12,7 +12,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BED] find intersection of two or more BED files',
+        help='[BED] Find the intersection of two or more BED files.',
         description=description,
     )
     parser.add_argument('bed', help='BED files', nargs='+')
diff --git a/fuc/cli/bed_sum.py b/fuc/cli/bed_sum.py
index b46ca44..1cd668f 100644
--- a/fuc/cli/bed_sum.py
+++ b/fuc/cli/bed_sum.py
@@ -16,7 +16,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BED] summarize a BED file',
+        help='[BED] Summarize a BED file.',
         description=description,
     )
     parser.add_argument('bed', help='BED file')
diff --git a/fuc/cli/fq_count.py b/fuc/cli/fq_count.py
index 738c688..0c461b8 100644
--- a/fuc/cli/fq_count.py
+++ b/fuc/cli/fq_count.py
@@ -14,7 +14,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[FASTQ] count sequence reads in FASTQ files',
+        help='[FASTQ] Count sequence reads in FASTQ files.',
         description=description,
     )
     parser.add_argument('fastq', nargs='*',
diff --git a/fuc/cli/fq_sum.py b/fuc/cli/fq_sum.py
index 98698b4..dd9cc7d 100644
--- a/fuc/cli/fq_sum.py
+++ b/fuc/cli/fq_sum.py
@@ -14,7 +14,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[FASTQ] summarize a FASTQ file',
+        help='[FASTQ] Summarize a FASTQ file.',
         description=description,
     )
     parser.add_argument('fastq', help='FASTQ file')
diff --git a/fuc/cli/fuc_compf.py b/fuc/cli/fuc_compf.py
index be1bf03..2f02ad5 100644
--- a/fuc/cli/fuc_compf.py
+++ b/fuc/cli/fuc_compf.py
@@ -14,7 +14,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[FUC] compare contents of two files',
+        help='[FUC] Compare the contents of two files.',
         description=description,
     )
     parser.add_argument('left', help='left file')
diff --git a/fuc/cli/fuc_demux.py b/fuc/cli/fuc_demux.py
index 703dd3d..5c5fe1c 100644
--- a/fuc/cli/fuc_demux.py
+++ b/fuc/cli/fuc_demux.py
@@ -23,7 +23,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[FUC] parse Reports directory from bcl2fastq or bcl2fastq2',
+        help='[FUC] Parse the Reports directory from bcl2fastq.',
         description=description,
     )
     parser.add_argument('reports_dir', help='Reports directory')
diff --git a/fuc/cli/fuc_exist.py b/fuc/cli/fuc_exist.py
index 92ca8fe..0f7c484 100644
--- a/fuc/cli/fuc_exist.py
+++ b/fuc/cli/fuc_exist.py
@@ -3,7 +3,7 @@
 import sys
 
 description = f"""
-This command will check whether files/directories exist. It will return
+This command will check whether files exist. It will return
 'True' if they exist and 'False' otherwise. The command will look for stdin
 if there are no arguments.
 
@@ -17,7 +17,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[FUC] check whether files/directories exist',
+        help='[FUC] Check whether certain files exist.',
         description=description,
     )
     parser.add_argument('files', nargs='*',
diff --git a/fuc/cli/fuc_find.py b/fuc/cli/fuc_find.py
index c156fe9..075f9d9 100644
--- a/fuc/cli/fuc_find.py
+++ b/fuc/cli/fuc_find.py
@@ -17,7 +17,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[FUC] find files with certain extension recursively',
+        help='[FUC] Find all files with a certain extension recursively.',
         description=description,
     )
     parser.add_argument('ext', help='file extension')
diff --git a/fuc/cli/maf_maf2vcf.py b/fuc/cli/maf_maf2vcf.py
index e27e4a3..e707bf3 100644
--- a/fuc/cli/maf_maf2vcf.py
+++ b/fuc/cli/maf_maf2vcf.py
@@ -33,7 +33,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[MAF] convert a MAF file to a VCF file',
+        help='[MAF] Convert a MAF file to a VCF file.',
         description=description,
     )
     parser.add_argument('maf', help='MAF file')
diff --git a/fuc/cli/maf_oncoplt.py b/fuc/cli/maf_oncoplt.py
index 3ad6723..569f52a 100644
--- a/fuc/cli/maf_oncoplt.py
+++ b/fuc/cli/maf_oncoplt.py
@@ -18,7 +18,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[MAF] create an oncoplot with a MAF file',
+        help='[MAF] Create an oncoplot with a MAF file.',
         description=description,
     )
     parser.add_argument('maf', help='MAF file')
diff --git a/fuc/cli/maf_sumplt.py b/fuc/cli/maf_sumplt.py
index 69f5fde..b146064 100644
--- a/fuc/cli/maf_sumplt.py
+++ b/fuc/cli/maf_sumplt.py
@@ -18,7 +18,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[MAF] create a summary plot with a MAF file',
+        help='[MAF] Create a summary plot with a MAF file.',
         description=description,
     )
     parser.add_argument('maf', help='MAF file')
diff --git a/fuc/cli/maf_vcf2maf.py b/fuc/cli/maf_vcf2maf.py
index 1a950e2..3e8f633 100644
--- a/fuc/cli/maf_vcf2maf.py
+++ b/fuc/cli/maf_vcf2maf.py
@@ -12,7 +12,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[MAF] convert an annotated VCF file to a MAF file',
+        help='[MAF] Convert a VCF file to a MAF file.',
         description=description,
     )
     parser.add_argument('vcf', help='VCF file')
diff --git a/fuc/cli/tbl_merge.py b/fuc/cli/tbl_merge.py
index 244af3b..1e58c14 100644
--- a/fuc/cli/tbl_merge.py
+++ b/fuc/cli/tbl_merge.py
@@ -19,7 +19,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[TABLE] merge two table files',
+        help='[TABLE] Merge two table files.',
         description=description,
     )
     parser.add_argument('left', help='left file')
diff --git a/fuc/cli/tbl_sum.py b/fuc/cli/tbl_sum.py
index bfe6fa5..5819a9e 100644
--- a/fuc/cli/tbl_sum.py
+++ b/fuc/cli/tbl_sum.py
@@ -16,7 +16,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[TABLE] summarize a table file',
+        help='[TABLE] Summarize a table file.',
         description=description,
     )
     parser.add_argument(
diff --git a/fuc/cli/vcf_filter.py b/fuc/cli/vcf_filter.py
index 9868a50..f133e9c 100644
--- a/fuc/cli/vcf_filter.py
+++ b/fuc/cli/vcf_filter.py
@@ -23,7 +23,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[VCF] filter a VCF file',
+        help='[VCF] Filter a VCF file.',
         description=description,
     )
     parser.add_argument('vcf', help='VCF file')
diff --git a/fuc/cli/vcf_merge.py b/fuc/cli/vcf_merge.py
index 65a148b..3dd8c34 100644
--- a/fuc/cli/vcf_merge.py
+++ b/fuc/cli/vcf_merge.py
@@ -16,7 +16,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[VCF] merge two or more VCF files',
+        help='[VCF] Merge two or more VCF files.',
         description=description,
     )
     parser.add_argument('vcf_files', help='VCF files', nargs='+')
diff --git a/fuc/cli/vcf_rename.py b/fuc/cli/vcf_rename.py
index ea3dbb1..070fc04 100644
--- a/fuc/cli/vcf_rename.py
+++ b/fuc/cli/vcf_rename.py
@@ -25,7 +25,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[VCF] rename the samples in a VCF file.',
+        help='[VCF] Rename the samples in a VCF file.',
         description=description,
     )
     parser.add_argument('vcf', help='VCF file')
diff --git a/fuc/cli/vcf_slice.py b/fuc/cli/vcf_slice.py
index 821d5d0..8fe2c08 100644
--- a/fuc/cli/vcf_slice.py
+++ b/fuc/cli/vcf_slice.py
@@ -16,7 +16,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[VCF] slice a VCF file',
+        help='[VCF] Slice a VCF file.',
         description=description,
     )
     parser.add_argument('vcf', help='VCF file')
diff --git a/fuc/cli/vcf_vcf2bed.py b/fuc/cli/vcf_vcf2bed.py
index 1ab5d7f..e98caab 100644
--- a/fuc/cli/vcf_vcf2bed.py
+++ b/fuc/cli/vcf_vcf2bed.py
@@ -12,7 +12,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[VCF] convert a VCF file to a BED file',
+        help='[VCF] Convert a VCF file to a BED file.',
         description=description,
     )
     parser.add_argument('vcf', help='VCF file')
diff --git a/fuc/cli/vcf_vep.py b/fuc/cli/vcf_vep.py
index d4b8c54..083aa8b 100644
--- a/fuc/cli/vcf_vep.py
+++ b/fuc/cli/vcf_vep.py
@@ -19,7 +19,7 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[VCF] filter a VCF file annotated by Ensemble VEP',
+        help='[VCF] Filter a VCF file annotated by Ensemble VEP.',
         description=description,
     )
     parser.add_argument('vcf', help='VCF file annotated with Ensemble VEP')

From c103595d9087fc2375801c7e158aa578af262af1 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 15:58:55 +0900
Subject: [PATCH 22/29] Update CLI

---
 fuc/cli/bam_head.py   | 10 ++++++----
 fuc/cli/bam_index.py  |  6 +++---
 fuc/cli/bam_rename.py |  9 ++++-----
 fuc/cli/bam_slice.py  | 11 ++++++++---
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/fuc/cli/bam_head.py b/fuc/cli/bam_head.py
index fb50e5a..150d297 100644
--- a/fuc/cli/bam_head.py
+++ b/fuc/cli/bam_head.py
@@ -1,9 +1,11 @@
+import sys
+
 from .. import api
+
 import pysam
 
 description = f"""
-This command will print the header of a SAM/BAM/CRAM file. It essentially
-wraps the 'pysam.view' method from the pysam package.
+This command will print the header of the input SAM/BAM/CRAM file.
 
 usage examples:
   $ fuc {api.common._script_name()} in.sam
@@ -18,8 +20,8 @@ def create_parser(subparsers):
         help='[BAM] Print the header of a SAM/BAM/CRAM file.',
         description=description,
     )
-    parser.add_argument('bam', help='SAM/BAM/CRAM file')
+    parser.add_argument('bam', help='SAM/BAM/CRAM file.')
 
 def main(args):
     header = pysam.view('-H', args.bam)
-    print(header, end='')
+    sys.stdout.write(header)
diff --git a/fuc/cli/bam_index.py b/fuc/cli/bam_index.py
index 4002ae0..4a4358f 100644
--- a/fuc/cli/bam_index.py
+++ b/fuc/cli/bam_index.py
@@ -1,9 +1,9 @@
 from .. import api
+
 import pysam
 
 description = f"""
-This command will index a SAM/BAM/CRAM file. It essentially wraps the
-'pysam.index' method from the pysam package.
+This command will index the input SAM/BAM/CRAM file.
 
 usage examples:
   $ fuc {api.common._script_name()} in.sam
@@ -18,7 +18,7 @@ def create_parser(subparsers):
         help='[BAM] Index a SAM/BAM/CRAM file.',
         description=description,
     )
-    parser.add_argument('bam', help='SAM/BAM/CRAM file')
+    parser.add_argument('bam', help='SAM/BAM/CRAM file.')
 
 def main(args):
     pysam.index(args.bam)
diff --git a/fuc/cli/bam_rename.py b/fuc/cli/bam_rename.py
index 6fe5109..5814ef7 100644
--- a/fuc/cli/bam_rename.py
+++ b/fuc/cli/bam_rename.py
@@ -1,8 +1,7 @@
 from .. import api
 
 description = f"""
-This command will rename the samples in a SAM/BAM/CRAM file. It essentially
-wraps the 'pybam.rename' method from the fuc API.
+This command will rename the samples in a SAM/BAM/CRAM file.
 
 usage examples:
   $ fuc {api.common._script_name()} in.sam new_name out.sam
@@ -17,9 +16,9 @@ def create_parser(subparsers):
         help='[BAM] Rename the samples in a SAM/BAM/CRAM file.',
         description=description,
     )
-    parser.add_argument('bam', help='SAM/BAM/CRAM file')
-    parser.add_argument('name', help='sample name')
-    parser.add_argument('out', help='output file')
+    parser.add_argument('bam', help='SAM/BAM/CRAM file.')
+    parser.add_argument('name', help='Sample name.')
+    parser.add_argument('out', help='Output file.')
 
 def main(args):
     api.pybam.rename(args.bam, args.name, args.out)
diff --git a/fuc/cli/bam_slice.py b/fuc/cli/bam_slice.py
index 337842b..6e70ead 100644
--- a/fuc/cli/bam_slice.py
+++ b/fuc/cli/bam_slice.py
@@ -37,7 +37,7 @@ def create_parser(subparsers):
         choices=['SAM', 'BAM', 'CRAM'],
         help=
             "Output format (default: 'BAM') (choices: 'SAM', 'BAM', 'CRAM'). "
-            "A FASTA file ('--fasta') is required for 'CRAM'."
+            "A FASTA file must be specified with '--fasta' for 'CRAM'."
     )
     parser.add_argument(
         '--fasta',
@@ -51,10 +51,15 @@ def main(args):
     # Determine the output format.
     if args.format == 'BAM':
         stdout_method = sys.stdout.buffer.write
-        options.append('-b')
+        options += ['-b']
     elif args.format == 'CRAM':
         stdout_method = sys.stdout.buffer.write
-        options.append('-C')
+        if args.fasta is None:
+            raise ValueError(
+                "A FASTA file must be specified with '--fasta' "
+                "when '--format' is 'CRAM'."
+            )
+        options += ['-C', '-T', args.fasta]
     else:
         stdout_method = sys.stdout.write
 

From c9a4749f1c019a602d24319a2fe2bbd29acea3e6 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 16:50:10 +0900
Subject: [PATCH 23/29] Update `bam_rename` command

---
 CHANGELOG.rst         |  2 +-
 README.rst            |  2 +-
 docs/cli.rst          | 39 ++++++++++++++++++------------------
 fuc/api/pybam.py      | 45 ------------------------------------------
 fuc/cli/bam_rename.py | 46 ++++++++++++++++++++++++++++++++++---------
 5 files changed, 58 insertions(+), 76 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 23738fa..9de88bf 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,7 +14,7 @@ Changelog
 * Fix minor bug in :meth:`pymaf.MafFrame.to_frame` method.
 * Deprecate :meth:`pyvep.filter_lof/clinsig` methods.
 * Update :meth:`pymaf.MafFrame.from_vcf` method to extract genotype keys (e.g. DP, AD, AF).
-* Update :command:`bam_slice` command.
+* Update :command:`bam_slice` and :command:`bam_rename` commands.
 
 0.14.0 (2021-06-20)
 -------------------
diff --git a/README.rst b/README.rst
index 9cfdd45..0238e38 100644
--- a/README.rst
+++ b/README.rst
@@ -110,7 +110,7 @@ For getting help on the fuc CLI:
      COMMAND
        bam_head     [BAM] Print the header of a SAM/BAM/CRAM file.
        bam_index    [BAM] Index a SAM/BAM/CRAM file.
-       bam_rename   [BAM] Rename the samples in a SAM/BAM/CRAM file.
+       bam_rename   [BAM] Rename the samples in a BAM/CRAM file.
        bam_slice    [BAM] Slice a SAM/BAM/CRAM file.
        bed_intxn    [BED] Find the intersection of two or more BED files.
        bed_sum      [BED] Summarize a BED file.
diff --git a/docs/cli.rst b/docs/cli.rst
index d62b703..4400d22 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -20,7 +20,7 @@ For getting help on the fuc CLI:
      COMMAND
        bam_head     [BAM] Print the header of a SAM/BAM/CRAM file.
        bam_index    [BAM] Index a SAM/BAM/CRAM file.
-       bam_rename   [BAM] Rename the samples in a SAM/BAM/CRAM file.
+       bam_rename   [BAM] Rename the samples in a BAM/CRAM file.
        bam_slice    [BAM] Slice a SAM/BAM/CRAM file.
        bed_intxn    [BED] Find the intersection of two or more BED files.
        bed_sum      [BED] Summarize a BED file.
@@ -61,8 +61,7 @@ bam_head
    $ fuc bam_head -h
    usage: fuc bam_head [-h] bam
    
-   This command will print the header of a SAM/BAM/CRAM file. It essentially
-   wraps the 'pysam.view' method from the pysam package.
+   This command will print the header of the input SAM/BAM/CRAM file.
    
    usage examples:
      $ fuc bam_head in.sam
@@ -70,7 +69,7 @@ bam_head
      $ fuc bam_head in.cram
    
    positional arguments:
-     bam         SAM/BAM/CRAM file
+     bam         SAM/BAM/CRAM file.
    
    optional arguments:
      -h, --help  Show this help message and exit.
@@ -83,8 +82,7 @@ bam_index
    $ fuc bam_index -h
    usage: fuc bam_index [-h] bam
    
-   This command will index a SAM/BAM/CRAM file. It essentially wraps the
-   'pysam.index' method from the pysam package.
+   This command will index the input SAM/BAM/CRAM file.
    
    usage examples:
      $ fuc bam_index in.sam
@@ -92,7 +90,7 @@ bam_index
      $ fuc bam_index in.cram
    
    positional arguments:
-     bam         SAM/BAM/CRAM file
+     bam         SAM/BAM/CRAM file.
    
    optional arguments:
      -h, --help  Show this help message and exit.
@@ -103,20 +101,17 @@ bam_rename
 .. code-block:: text
 
    $ fuc bam_rename -h
-   usage: fuc bam_rename [-h] bam name out
+   usage: fuc bam_rename [-h] bam name
    
-   This command will rename the samples in a SAM/BAM/CRAM file. It essentially
-   wraps the 'pybam.rename' method from the fuc API.
+   This command will rename the sample(s) in the input BAM/CRAM file.
    
    usage examples:
-     $ fuc bam_rename in.sam new_name out.sam
-     $ fuc bam_rename in.bam new_name out.bam
-     $ fuc bam_rename in.cram new_name out.cram
+     $ fuc bam_rename in.bam NA12878 > out.bam
+     $ fuc bam_rename in.cram NA12878 > out.cram
    
    positional arguments:
-     bam         SAM/BAM/CRAM file
-     name        sample name
-     out         output file
+     bam         BAM/CRAM file.
+     name        New sample name.
    
    optional arguments:
      -h, --help  Show this help message and exit.
@@ -127,21 +122,25 @@ bam_slice
 .. code-block:: text
 
    $ fuc bam_slice -h
-   usage: fuc bam_slice [-h] [--format TEXT] bam region [region ...]
+   usage: fuc bam_slice [-h] [--format TEXT] [--fasta PATH]
+                        bam region [region ...]
    
    This command will slice the input SAM/BAM/CRAM file for the specified region(s).
    
    usage examples:
      $ fuc bam_slice in.bam chr1:100-200 > out.bam
      $ fuc bam_slice in.bam chr1:100-200 chr2:100-200 > out.bam
+     $ fuc bam_slice in.bam chr1:100-200 --format SAM > out.sam
+     $ fuc bam_slice in.bam chr1:100-200 --format CRAM --fasta ref.fa > out.cram
    
    positional arguments:
-     bam            SAM/BAM/CRAM file
-     region         space-separated regions ('chrom:start-end')
+     bam            SAM/BAM/CRAM file.
+     region         Space-separated regions ('chrom:start-end').
    
    optional arguments:
      -h, --help     Show this help message and exit.
-     --format TEXT  output format (default: 'BAM') (choices: 'SAM', 'BAM', 'CRAM')
+     --format TEXT  Output format (default: 'BAM') (choices: 'SAM', 'BAM', 'CRAM'). A FASTA file must be specified with '--fasta' for 'CRAM'.
+     --fasta PATH   FASTA file. Required when '--format' is 'CRAM'.
 
 bed_intxn
 =========
diff --git a/fuc/api/pybam.py b/fuc/api/pybam.py
index b646cdd..6866f6d 100644
--- a/fuc/api/pybam.py
+++ b/fuc/api/pybam.py
@@ -6,56 +6,11 @@
 """
 
 import pysam
-import tempfile
 
 def header(fn):
     """Return the header of a BAM file."""
     return pysam.view('-H', fn).strip()
 
-def rename(fn, name, out, index=True):
-    """Update the SM tags in a BAM file.
-
-    This method will write a new BAM file with updated SM tags. By default,
-    the method will also create an index file.
-
-    .. warning::
-        This method will only update the SM tags and nothing else;
-        therefore, the old sample name(s) may still be present in other
-        tags.
-
-    Parameters
-    ----------
-    fn : str
-        Path to the input BAM file.
-    name : str
-        New sample name.
-    out : str
-        Path to the output BAM file.
-    index : bool, default: True
-        If False, don't index the output BAM file.
-    """
-    lines1 = pysam.view('-H', fn).strip().split('\n')
-    lines2 = []
-    for line in lines1:
-        fields = line.split('\t')
-        if fields[0] != '@RG':
-            lines2.append(line)
-            continue
-        t = []
-        for field in fields:
-            if 'SM:' not in field:
-                t.append(field)
-                continue
-            t.append(f'SM:{name}')
-        lines2.append('\t'.join(t))
-    with tempfile.TemporaryDirectory() as t:
-        with open(f'{t}/header.sam', 'w') as f1:
-            f1.write('\n'.join(lines2))
-        with open(out, 'wb') as f2:
-            f2.write(pysam.reheader(f'{t}/header.sam', fn))
-    if index:
-        pysam.index(out)
-
 def tag_sm(fn):
     """Extract the SM tags (sample names) from a BAM file.
 
diff --git a/fuc/cli/bam_rename.py b/fuc/cli/bam_rename.py
index 5814ef7..70fb80d 100644
--- a/fuc/cli/bam_rename.py
+++ b/fuc/cli/bam_rename.py
@@ -1,24 +1,52 @@
+import sys
+import tempfile
+
 from .. import api
 
+import pysam
+
 description = f"""
-This command will rename the samples in a SAM/BAM/CRAM file.
+This command will rename the sample(s) in the input BAM/CRAM file.
 
 usage examples:
-  $ fuc {api.common._script_name()} in.sam new_name out.sam
-  $ fuc {api.common._script_name()} in.bam new_name out.bam
-  $ fuc {api.common._script_name()} in.cram new_name out.cram
+  $ fuc {api.common._script_name()} in.bam NA12878 > out.bam
+  $ fuc {api.common._script_name()} in.cram NA12878 > out.cram
 """
 
 def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BAM] Rename the samples in a SAM/BAM/CRAM file.',
+        help='[BAM] Rename the samples in a BAM/CRAM file.',
         description=description,
     )
-    parser.add_argument('bam', help='SAM/BAM/CRAM file.')
-    parser.add_argument('name', help='Sample name.')
-    parser.add_argument('out', help='Output file.')
+    parser.add_argument('bam', help='BAM/CRAM file.')
+    parser.add_argument('name', help='New sample name.')
 
 def main(args):
-    api.pybam.rename(args.bam, args.name, args.out)
+    old_lines = pysam.view('-H', args.bam).strip().split('\n')
+    new_lines = []
+
+    for old_line in old_lines:
+        old_fields = old_line.split('\t')
+
+        if old_fields[0] != '@RG':
+            new_lines.append(old_line)
+            continue
+
+        new_fields = []
+
+        for old_field in old_fields:
+            if 'SM:' not in old_field:
+                new_fields.append(old_field)
+                continue
+
+            new_fields.append(f'SM:{args.name}')
+
+        new_lines.append('\t'.join(new_fields))
+
+    with tempfile.TemporaryDirectory() as t:
+        with open(f'{t}/header.sam', 'w') as f:
+            f.write('\n'.join(new_lines))
+
+        sys.stdout.buffer.write(pysam.reheader(f'{t}/header.sam', args.bam))

From f26afb08360efc0a881090701940c3fedaaedb5b Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 16:58:51 +0900
Subject: [PATCH 24/29] Update CHANGELOG.rst

---
 CHANGELOG.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 9de88bf..4331392 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -15,6 +15,7 @@ Changelog
 * Deprecate :meth:`pyvep.filter_lof/clinsig` methods.
 * Update :meth:`pymaf.MafFrame.from_vcf` method to extract genotype keys (e.g. DP, AD, AF).
 * Update :command:`bam_slice` and :command:`bam_rename` commands.
+* Deprecate :meth:`pybam.rename`.
 
 0.14.0 (2021-06-20)
 -------------------

From e4365a91432b2ffb3b29cbc6f2e4c50aaacafa0b Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 16:59:27 +0900
Subject: [PATCH 25/29] Update CHANGELOG.rst

---
 CHANGELOG.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 4331392..47b5d26 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -15,7 +15,7 @@ Changelog
 * Deprecate :meth:`pyvep.filter_lof/clinsig` methods.
 * Update :meth:`pymaf.MafFrame.from_vcf` method to extract genotype keys (e.g. DP, AD, AF).
 * Update :command:`bam_slice` and :command:`bam_rename` commands.
-* Deprecate :meth:`pybam.rename`.
+* Deprecate :meth:`pybam.rename` method.
 
 0.14.0 (2021-06-20)
 -------------------

From b2fc7cbde72ec69c7abecb5799fed24c38388db2 Mon Sep 17 00:00:00 2001
From: "Seung-been \"Steven\" Lee" <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 17:10:45 +0900
Subject: [PATCH 26/29] Update CLI

---
 fuc/cli/bam_head.py   | 2 +-
 fuc/cli/bam_rename.py | 5 +++--
 fuc/cli/bam_slice.py  | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/fuc/cli/bam_head.py b/fuc/cli/bam_head.py
index 150d297..287f94f 100644
--- a/fuc/cli/bam_head.py
+++ b/fuc/cli/bam_head.py
@@ -23,5 +23,5 @@ def create_parser(subparsers):
     parser.add_argument('bam', help='SAM/BAM/CRAM file.')
 
 def main(args):
-    header = pysam.view('-H', args.bam)
+    header = pysam.view('-H', args.bam, '--no-PG')
     sys.stdout.write(header)
diff --git a/fuc/cli/bam_rename.py b/fuc/cli/bam_rename.py
index 70fb80d..6f563aa 100644
--- a/fuc/cli/bam_rename.py
+++ b/fuc/cli/bam_rename.py
@@ -24,7 +24,7 @@ def create_parser(subparsers):
     parser.add_argument('name', help='New sample name.')
 
 def main(args):
-    old_lines = pysam.view('-H', args.bam).strip().split('\n')
+    old_lines = pysam.view('-H', args.bam, '--no-PG').strip().split('\n')
     new_lines = []
 
     for old_line in old_lines:
@@ -49,4 +49,5 @@ def main(args):
         with open(f'{t}/header.sam', 'w') as f:
             f.write('\n'.join(new_lines))
 
-        sys.stdout.buffer.write(pysam.reheader(f'{t}/header.sam', args.bam))
+        alignments = pysam.reheader(f'{t}/header.sam', args.bam, '--no-PG')
+        sys.stdout.buffer.write(alignments)
diff --git a/fuc/cli/bam_slice.py b/fuc/cli/bam_slice.py
index 6e70ead..2c5536d 100644
--- a/fuc/cli/bam_slice.py
+++ b/fuc/cli/bam_slice.py
@@ -46,7 +46,7 @@ def create_parser(subparsers):
     )
 
 def main(args):
-    options = []
+    options = ['--no-PG']
 
     # Determine the output format.
     if args.format == 'BAM':

From 73384f6f90c201a89ed515e2edf509a6e881036e Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 20:11:29 +0900
Subject: [PATCH 27/29] Update `bam_slice` command

---
 fuc/cli/bam_rename.py | 43 ++++++++++++++++++++++++++++---------------
 fuc/cli/bam_slice.py  |  2 +-
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/fuc/cli/bam_rename.py b/fuc/cli/bam_rename.py
index 6f563aa..44e37c1 100644
--- a/fuc/cli/bam_rename.py
+++ b/fuc/cli/bam_rename.py
@@ -6,9 +6,10 @@
 import pysam
 
 description = f"""
-This command will rename the sample(s) in the input BAM/CRAM file.
+This command will rename the sample(s) in the input SAM/BAM/CRAM file.
 
 usage examples:
+  $ fuc {api.common._script_name()} in.sam NA12878 > out.sam
   $ fuc {api.common._script_name()} in.bam NA12878 > out.bam
   $ fuc {api.common._script_name()} in.cram NA12878 > out.cram
 """
@@ -17,37 +18,49 @@ def create_parser(subparsers):
     parser = api.common._add_parser(
         subparsers,
         api.common._script_name(),
-        help='[BAM] Rename the samples in a BAM/CRAM file.',
+        help='[BAM] Rename the samples in a SAM/BAM/CRAM file.',
         description=description,
     )
-    parser.add_argument('bam', help='BAM/CRAM file.')
+    parser.add_argument('bam', help='SAM/BAM/CRAM file.')
     parser.add_argument('name', help='New sample name.')
 
 def main(args):
+    # Detect the input format.
+    try:
+        with open(args.bam, 'r') as f:
+            for line in f:
+                is_sam = True
+                break
+    except UnicodeDecodeError:
+        is_sam = False
+
+    # Rename the sample(s).
     old_lines = pysam.view('-H', args.bam, '--no-PG').strip().split('\n')
     new_lines = []
-
     for old_line in old_lines:
         old_fields = old_line.split('\t')
-
         if old_fields[0] != '@RG':
             new_lines.append(old_line)
             continue
-
         new_fields = []
-
         for old_field in old_fields:
             if 'SM:' not in old_field:
                 new_fields.append(old_field)
                 continue
-
             new_fields.append(f'SM:{args.name}')
-
         new_lines.append('\t'.join(new_fields))
 
-    with tempfile.TemporaryDirectory() as t:
-        with open(f'{t}/header.sam', 'w') as f:
-            f.write('\n'.join(new_lines))
-
-        alignments = pysam.reheader(f'{t}/header.sam', args.bam, '--no-PG')
-        sys.stdout.buffer.write(alignments)
+    # Write the output file.
+    if is_sam:
+        for new_line in new_lines:
+            sys.stdout.write(new_line + '\n')
+        alignments = pysam.view(args.bam, '--no-PG')
+        sys.stdout.write(alignments)
+    else:
+        with tempfile.TemporaryDirectory() as t:
+            with open(f'{t}/header.sam', 'w') as f:
+                f.write('\n'.join(new_lines))
+            alignments = pysam.reheader(f'{t}/header.sam',
+                                        args.bam,
+                                        '--no-PG')
+            sys.stdout.buffer.write(alignments)
diff --git a/fuc/cli/bam_slice.py b/fuc/cli/bam_slice.py
index 2c5536d..e99f79f 100644
--- a/fuc/cli/bam_slice.py
+++ b/fuc/cli/bam_slice.py
@@ -46,7 +46,7 @@ def create_parser(subparsers):
     )
 
 def main(args):
-    options = ['--no-PG']
+    options = ['-h', '--no-PG']
 
     # Determine the output format.
     if args.format == 'BAM':

From 7604274aa5f8c64d4c7bccd7dfac73180336d104 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Wed, 23 Jun 2021 20:28:06 +0900
Subject: [PATCH 28/29] Update docs

---
 README.rst     | 12 +++++++++---
 docs/cli.rst   |  7 ++++---
 docs/create.py | 10 ++++++++--
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/README.rst b/README.rst
index 0238e38..360538e 100644
--- a/README.rst
+++ b/README.rst
@@ -110,7 +110,7 @@ For getting help on the fuc CLI:
      COMMAND
        bam_head     [BAM] Print the header of a SAM/BAM/CRAM file.
        bam_index    [BAM] Index a SAM/BAM/CRAM file.
-       bam_rename   [BAM] Rename the samples in a BAM/CRAM file.
+       bam_rename   [BAM] Rename the samples in a SAM/BAM/CRAM file.
        bam_slice    [BAM] Slice a SAM/BAM/CRAM file.
        bed_intxn    [BED] Find the intersection of two or more BED files.
        bed_sum      [BED] Summarize a BED file.
@@ -165,7 +165,7 @@ For getting help on a specific submodule (e.g. pyvcf):
 CLI Examples
 ============
 
-**BAM**
+**SAM/BAM/CRAM**
 
 To print the header of a SAM file:
 
@@ -179,11 +179,17 @@ To index a CRAM file:
 
    $ fuc bam_index in.cram
 
+To rename the samples in a SAM file:
+
+.. code-block:: text
+
+   $ fuc bam_rename in.sam NA12878 > out.sam
+
 To slice a BAM file:
 
 .. code-block:: text
 
-   $ fuc bam_slice in.bam chr1:100-200 out.bam
+   $ fuc bam_slice in.bam chr1:100-200 > out.bam
 
 **BED**
 
diff --git a/docs/cli.rst b/docs/cli.rst
index 4400d22..b041882 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -20,7 +20,7 @@ For getting help on the fuc CLI:
      COMMAND
        bam_head     [BAM] Print the header of a SAM/BAM/CRAM file.
        bam_index    [BAM] Index a SAM/BAM/CRAM file.
-       bam_rename   [BAM] Rename the samples in a BAM/CRAM file.
+       bam_rename   [BAM] Rename the samples in a SAM/BAM/CRAM file.
        bam_slice    [BAM] Slice a SAM/BAM/CRAM file.
        bed_intxn    [BED] Find the intersection of two or more BED files.
        bed_sum      [BED] Summarize a BED file.
@@ -103,14 +103,15 @@ bam_rename
    $ fuc bam_rename -h
    usage: fuc bam_rename [-h] bam name
    
-   This command will rename the sample(s) in the input BAM/CRAM file.
+   This command will rename the sample(s) in the input SAM/BAM/CRAM file.
    
    usage examples:
+     $ fuc bam_rename in.sam NA12878 > out.sam
      $ fuc bam_rename in.bam NA12878 > out.bam
      $ fuc bam_rename in.cram NA12878 > out.cram
    
    positional arguments:
-     bam         BAM/CRAM file.
+     bam         SAM/BAM/CRAM file.
      name        New sample name.
    
    optional arguments:
diff --git a/docs/create.py b/docs/create.py
index bb02ba0..7850b2f 100644
--- a/docs/create.py
+++ b/docs/create.py
@@ -153,7 +153,7 @@
 CLI Examples
 ============
 
-**BAM**
+**SAM/BAM/CRAM**
 
 To print the header of a SAM file:
 
@@ -167,11 +167,17 @@
 
    $ fuc bam_index in.cram
 
+To rename the samples in a SAM file:
+
+.. code-block:: text
+
+   $ fuc bam_rename in.sam NA12878 > out.sam
+
 To slice a BAM file:
 
 .. code-block:: text
 
-   $ fuc bam_slice in.bam chr1:100-200 out.bam
+   $ fuc bam_slice in.bam chr1:100-200 > out.bam
 
 **BED**
 

From 4b0c492a69496ac6bad231ac40ebeaf4176e3644 Mon Sep 17 00:00:00 2001
From: Seung-been Lee <sbstevenlee@gmail.com>
Date: Thu, 24 Jun 2021 20:03:45 +0900
Subject: [PATCH 29/29] Update CHANGELOG.rst

---
 CHANGELOG.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 47b5d26..0770efd 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,8 +1,8 @@
 Changelog
 *********
 
-0.15.0 (in development)
------------------------
+0.15.0 (2021-06-24)
+-------------------
 
 * Update :command:`vcf_filter` command.
 * Update :command:`tbl_sum` command.