diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index f6e5866..18679d3 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,16 @@
 Changelog
 *********
 
+0.11.0 (2021-06-10)
+-------------------
+
+* :issue:`16`: Add new method :meth:`pyvcf.VcfFrame.cfilter_empty`.
+* Add new methods :meth:`pyvep.filter_af/lof`.
+* Add ``matplotlib-venn`` package as dependency for plotting Venn diagrams.
+* Add new methods :meth:`pyvcf.plot_comparison/regplot/histplot`.
+* :issue:`17`: Add new method :meth:`pyvep.filter_biotype`.
+* Add new class :class:`pyvcf.AnnFrame`.
+
 0.10.0 (2021-06-03)
 -------------------
 
diff --git a/README.rst b/README.rst
index cb15882..adb03b2 100644
--- a/README.rst
+++ b/README.rst
@@ -56,24 +56,36 @@ Your contributions (e.g. feature ideas, pull requests) are most welcome.
 CLI Examples
 ============
 
+SAM/BAM/CRAM
+------------
+
 To print the header of a BAM file:
 
 .. code-block:: console
 
    $ fuc bam_head example.bam
 
+BED
+---
+
 To find intersection between BED files:
 
 .. code-block:: console
 
    $ fuc bed_intxn 1.bed 2.bed 3.bed > intersect.bed
 
+FASTQ
+-----
+
 To count sequence reads in a FASTQ file:
 
 .. code-block:: console
 
    $ fuc fq_count example.fastq
 
+FUC
+---
+
 To check whether a file exists in the operating system:
 
 .. code-block:: console
@@ -86,12 +98,18 @@ To find all VCF files within the current directory recursively:
 
    $ fuc fuc_find . vcf
 
+TABLE
+-----
+
 To merge two tab-delimited files:
 
 .. code-block:: console
 
    $ fuc tbl_merge left.txt right.txt > merged.txt
 
+VCF
+---
+
 To merge VCF files:
 
 .. code-block:: console
@@ -101,6 +119,9 @@ To merge VCF files:
 API Examples
 ============
 
+VCF
+---
+
 To filter a VCF file based on a BED file:
 
 .. code:: python3
@@ -119,11 +140,63 @@ To remove indels from a VCF file:
    >>> filtered_vf = vf.filter_indel()
    >>> filtered_vf.to_file('no_indels.vcf')
 
+To create a Venn diagram showing genotype concordance between groups:
+
+.. code:: python3
+
+    >>> from fuc import pyvcf, common
+    >>> common.load_dataset('pyvcf')
+    >>> f = '~/fuc-data/pyvcf/plot_comparison.vcf'
+    >>> vf = pyvcf.VcfFrame.from_file(f)
+    >>> a = ['Steven_A', 'John_A', 'Sara_A']
+    >>> b = ['Steven_B', 'John_B', 'Sara_B']
+    >>> c = ['Steven_C', 'John_C', 'Sara_C']
+    >>> vf.plot_comparison(a, b, c)
+
+.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/plot_comparison.png
+
+To create a histogram of tumor mutational burden (TMB) distribution:
+
+.. code:: python3
+
+    >>> from fuc import pyvcf
+    >>> vcf_data = {
+    ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
+    ...     'POS': [100, 101, 102, 103, 103],
+    ...     'ID': ['.', '.', '.', '.', '.'],
+    ...     'REF': ['T', 'T', 'T', 'T', 'T'],
+    ...     'ALT': ['C', 'C', 'C', 'C', 'C'],
+    ...     'QUAL': ['.', '.', '.', '.', '.'],
+    ...     'FILTER': ['.', '.', '.', '.', '.'],
+    ...     'INFO': ['.', '.', '.', '.', '.'],
+    ...     'FORMAT': ['GT', 'GT', 'GT', 'GT', 'GT'],
+    ...     'Steven_N': ['0/0', '0/0', '0/1', '0/0', '0/0'],
+    ...     'Steven_T': ['0/0', '0/1', '0/1', '0/1', '0/1'],
+    ...     'Sara_N': ['0/0', '0/1', '0/0', '0/0', '0/0'],
+    ...     'Sara_T': ['0/0', '0/0', '1/1', '1/1', '0/1'],
+    ...     'John_N': ['0/0', '0/0', '0/0', '0/0', '0/0'],
+    ...     'John_T': ['0/1', '0/0', '1/1', '1/1', '0/1'],
+    ...     'Rachel_N': ['0/0', '0/0', '0/0', '0/0', '0/0'],
+    ...     'Rachel_T': ['0/1', '0/1', '0/0', '0/1', '0/1'],
+    ... }
+    >>> annot_data = {
+    ...     'Sample': ['Steven_N', 'Steven_T', 'Sara_N', 'Sara_T', 'John_N', 'John_T', 'Rachel_N', 'Rachel_T'],
+    ...     'Subject': ['Steven', 'Steven', 'Sara', 'Sara', 'John', 'John', 'Rachel', 'Rachel'],
+    ...     'Type': ['Normal', 'Tumor', 'Normal', 'Tumor', 'Normal', 'Tumor', 'Normal', 'Tumor'],
+    ... }
+    >>> vf = pyvcf.VcfFrame.from_dict([], vcf_data)
+    >>> af = pyvcf.AnnFrame.from_dict(annot_data, 'Sample')
+    >>> vf.plot_histplot(hue='Type', af=af)
+
+.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/plot_histplot.png
+
+MAF
+---
+
 To create an oncoplot with a MAF file:
 
 .. code:: python3
 
-    >>> import matplotlib.pyplot as plt
     >>> from fuc import common, pymaf
     >>> common.load_dataset('tcga-laml')
     >>> f = '~/fuc-data/tcga-laml/tcga_laml.maf.gz'
@@ -140,7 +213,6 @@ To create a summary figure for a MAF file:
 
 .. code:: python3
 
-    >>> import matplotlib.pyplot as plt
     >>> from fuc import common, pymaf
     >>> common.load_dataset('tcga-laml')
     >>> f = '~/fuc-data/tcga-laml/tcga_laml.maf.gz'
@@ -149,11 +221,13 @@ To create a summary figure for a MAF file:
 
 .. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/maf_summary.png
 
+SAM/BAM/CRAM
+------------
+
 To create read depth profile of a region from a CRAM file:
 
 .. code:: python3
 
-    >>> import matplotlib.pyplot as plt
     >>> from fuc import pycov
     >>> cf = pycov.CovFrame.from_file('HG00525.final.cram', zero=True,
     ...    region='chr12:21161194-21239796', names=['HG00525'])
@@ -197,7 +271,7 @@ Finally, you can clone the GitHub repository and then install fuc this way:
    $ cd fuc
    $ pip install .
 
-The nice thing about this approach is that you will have access to development versions that are not available in Anaconda or PyPI. For example, you can access a development branch with the ``git checkout`` command.
+The nice thing about this approach is that you will have access to development versions that are not available in Anaconda or PyPI. For example, you can access a development branch with the ``git checkout`` command. When you do this, please make sure your environment already has all the dependencies installed.
 
 Getting Help
 ============
@@ -251,10 +325,10 @@ Below is the list of submodules available in API:
 - **pybed** : The pybed submodule is designed for working with BED files. It implements ``pybed.BedFrame`` which stores BED data as ``pandas.DataFrame`` via the `pyranges <https://github.com/biocore-ntnu/pyranges>`_ package to allow fast computation and easy manipulation. The submodule strictly adheres to the standard `BED specification <https://genome.ucsc.edu/FAQ/FAQformat.html>`_.
 - **pycov** : The pycov submodule is designed for working with depth of coverage data from sequence alingment files (SAM/BAM/CRAM). It implements ``pycov.CovFrame`` which stores read depth data as ``pandas.DataFrame`` via the `pysam <https://pysam.readthedocs.io/en/latest/api.html>`_ package to allow fast computation and easy manipulation.
 - **pyfq** : The pyfq submodule is designed for working with FASTQ files. It implements ``pyfq.FqFrame`` which stores FASTQ data as ``pandas.DataFrame`` to allow fast computation and easy manipulation.
-- **pymaf** : The pymaf submodule is designed for working with MAF files. It implements ``pymaf.MafFrame`` which stores MAF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The class also contains many useful plotting methods such as ``MafFrame.plot_varcls`` and ``MafFrame.plot_waterfall``. The submodule strictly adheres to the standard `MAF specification <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`_.
-- **pysnpeff** : The pysnpeff submodule is designed for parsing VCF annotation data from the `SnpEff <https://pcingola.github.io/SnpEff/>`_ program. It is designed to be used with ``pyvcf.VcfFrame``.
-- **pyvcf** : The pyvcf submodule is designed for working with VCF files. It implements ``pyvcf.VcfFrame`` class which stores VCF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The submodule strictly adheres to the standard `VCF specification <https://samtools.github.io/hts-specs/VCFv4.3.pdf>`_.
-- **pyvep** : The pyvep submodule is designed for parsing VCF annotation data from the `Ensembl VEP <https://asia.ensembl.org/info/docs/tools/vep/index.html>`_. It is designed to be used with ``pyvcf.VcfFrame``.
+- **pymaf** : The pymaf submodule is designed for working with MAF files. It implements ``pymaf.MafFrame`` which stores MAF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The ``pymaf.MafFrame`` class also contains many useful plotting methods such as ``MafFrame.plot_oncoplot`` and ``MafFrame.plot_summary``. The submodule strictly adheres to the standard `MAF specification <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`_.
+- **pysnpeff** : The pysnpeff submodule is designed for parsing VCF annotation data from the `SnpEff <https://pcingola.github.io/SnpEff/>`_ program. It should be used with ``pyvcf.VcfFrame``.
+- **pyvcf** : The pyvcf submodule is designed for working with VCF files. It implements ``pyvcf.VcfFrame`` which stores VCF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The ``pyvcf.VcfFrame`` class also contains many useful plotting methods such as ``VcfFrame.plot_comparison`` and ``VcfFrame.plot_regplot``. The submodule strictly adheres to the standard `VCF specification <https://samtools.github.io/hts-specs/VCFv4.3.pdf>`_.
+- **pyvep** : The pyvep submodule is designed for parsing VCF annotation data from the `Ensembl VEP <https://asia.ensembl.org/info/docs/tools/vep/index.html>`_ program. It should be used with ``pyvcf.VcfFrame``.
 
 For getting help on a specific module (e.g. pyvcf):
 
diff --git a/conda.yml b/conda.yml
index c58df7e..3db176e 100644
--- a/conda.yml
+++ b/conda.yml
@@ -8,6 +8,7 @@ dependencies:
   - cython
   - lxml
   - matplotlib
+  - matplotlib-venn
   - notebook
   - numpy
   - pandas
diff --git a/docs/api.rst b/docs/api.rst
index 80e80fe..bc52021 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -16,10 +16,10 @@ Below is the list of submodules available in API:
 - **pybed** : The pybed submodule is designed for working with BED files. It implements ``pybed.BedFrame`` which stores BED data as ``pandas.DataFrame`` via the `pyranges <https://github.com/biocore-ntnu/pyranges>`_ package to allow fast computation and easy manipulation. The submodule strictly adheres to the standard `BED specification <https://genome.ucsc.edu/FAQ/FAQformat.html>`_.
 - **pycov** : The pycov submodule is designed for working with depth of coverage data from sequence alingment files (SAM/BAM/CRAM). It implements ``pycov.CovFrame`` which stores read depth data as ``pandas.DataFrame`` via the `pysam <https://pysam.readthedocs.io/en/latest/api.html>`_ package to allow fast computation and easy manipulation.
 - **pyfq** : The pyfq submodule is designed for working with FASTQ files. It implements ``pyfq.FqFrame`` which stores FASTQ data as ``pandas.DataFrame`` to allow fast computation and easy manipulation.
-- **pymaf** : The pymaf submodule is designed for working with MAF files. It implements ``pymaf.MafFrame`` which stores MAF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The class also contains many useful plotting methods such as ``MafFrame.plot_varcls`` and ``MafFrame.plot_waterfall``. The submodule strictly adheres to the standard `MAF specification <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`_.
-- **pysnpeff** : The pysnpeff submodule is designed for parsing VCF annotation data from the `SnpEff <https://pcingola.github.io/SnpEff/>`_ program. It is designed to be used with ``pyvcf.VcfFrame``.
-- **pyvcf** : The pyvcf submodule is designed for working with VCF files. It implements ``pyvcf.VcfFrame`` class which stores VCF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The submodule strictly adheres to the standard `VCF specification <https://samtools.github.io/hts-specs/VCFv4.3.pdf>`_.
-- **pyvep** : The pyvep submodule is designed for parsing VCF annotation data from the `Ensembl VEP <https://asia.ensembl.org/info/docs/tools/vep/index.html>`_. It is designed to be used with ``pyvcf.VcfFrame``.
+- **pymaf** : The pymaf submodule is designed for working with MAF files. It implements ``pymaf.MafFrame`` which stores MAF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The ``pymaf.MafFrame`` class also contains many useful plotting methods such as ``MafFrame.plot_oncoplot`` and ``MafFrame.plot_summary``. The submodule strictly adheres to the standard `MAF specification <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`_.
+- **pysnpeff** : The pysnpeff submodule is designed for parsing VCF annotation data from the `SnpEff <https://pcingola.github.io/SnpEff/>`_ program. It should be used with ``pyvcf.VcfFrame``.
+- **pyvcf** : The pyvcf submodule is designed for working with VCF files. It implements ``pyvcf.VcfFrame`` which stores VCF data as ``pandas.DataFrame`` to allow fast computation and easy manipulation. The ``pyvcf.VcfFrame`` class also contains many useful plotting methods such as ``VcfFrame.plot_comparison`` and ``VcfFrame.plot_regplot``. The submodule strictly adheres to the standard `VCF specification <https://samtools.github.io/hts-specs/VCFv4.3.pdf>`_.
+- **pyvep** : The pyvep submodule is designed for parsing VCF annotation data from the `Ensembl VEP <https://asia.ensembl.org/info/docs/tools/vep/index.html>`_ program. It should be used with ``pyvcf.VcfFrame``.
 
 For getting help on a specific module (e.g. pyvcf):
 
diff --git a/docs/conf.py b/docs/conf.py
index 16d6ad4..ee990f2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -49,6 +49,8 @@
 
 issues_github_path = 'sbslee/fuc'
 
+napoleon_use_param = False
+
 # Include the example source for plots in API docs
 plot_include_source = True
 plot_formats = [('png', 90)]
diff --git a/docs/create.py b/docs/create.py
index 0069e54..f639cfa 100644
--- a/docs/create.py
+++ b/docs/create.py
@@ -84,24 +84,36 @@
 CLI Examples
 ============
 
+SAM/BAM/CRAM
+------------
+
 To print the header of a BAM file:
 
 .. code-block:: console
 
    $ fuc bam_head example.bam
 
+BED
+---
+
 To find intersection between BED files:
 
 .. code-block:: console
 
    $ fuc bed_intxn 1.bed 2.bed 3.bed > intersect.bed
 
+FASTQ
+-----
+
 To count sequence reads in a FASTQ file:
 
 .. code-block:: console
 
    $ fuc fq_count example.fastq
 
+FUC
+---
+
 To check whether a file exists in the operating system:
 
 .. code-block:: console
@@ -114,12 +126,18 @@
 
    $ fuc fuc_find . vcf
 
+TABLE
+-----
+
 To merge two tab-delimited files:
 
 .. code-block:: console
 
    $ fuc tbl_merge left.txt right.txt > merged.txt
 
+VCF
+---
+
 To merge VCF files:
 
 .. code-block:: console
@@ -129,6 +147,9 @@
 API Examples
 ============
 
+VCF
+---
+
 To filter a VCF file based on a BED file:
 
 .. code:: python3
@@ -147,11 +168,63 @@
    >>> filtered_vf = vf.filter_indel()
    >>> filtered_vf.to_file('no_indels.vcf')
 
+To create a Venn diagram showing genotype concordance between groups:
+
+.. code:: python3
+
+    >>> from fuc import pyvcf, common
+    >>> common.load_dataset('pyvcf')
+    >>> f = '~/fuc-data/pyvcf/plot_comparison.vcf'
+    >>> vf = pyvcf.VcfFrame.from_file(f)
+    >>> a = ['Steven_A', 'John_A', 'Sara_A']
+    >>> b = ['Steven_B', 'John_B', 'Sara_B']
+    >>> c = ['Steven_C', 'John_C', 'Sara_C']
+    >>> vf.plot_comparison(a, b, c)
+
+.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/plot_comparison.png
+
+To create a histogram of tumor mutational burden (TMB) distribution:
+
+.. code:: python3
+
+    >>> from fuc import pyvcf
+    >>> vcf_data = {{
+    ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
+    ...     'POS': [100, 101, 102, 103, 103],
+    ...     'ID': ['.', '.', '.', '.', '.'],
+    ...     'REF': ['T', 'T', 'T', 'T', 'T'],
+    ...     'ALT': ['C', 'C', 'C', 'C', 'C'],
+    ...     'QUAL': ['.', '.', '.', '.', '.'],
+    ...     'FILTER': ['.', '.', '.', '.', '.'],
+    ...     'INFO': ['.', '.', '.', '.', '.'],
+    ...     'FORMAT': ['GT', 'GT', 'GT', 'GT', 'GT'],
+    ...     'Steven_N': ['0/0', '0/0', '0/1', '0/0', '0/0'],
+    ...     'Steven_T': ['0/0', '0/1', '0/1', '0/1', '0/1'],
+    ...     'Sara_N': ['0/0', '0/1', '0/0', '0/0', '0/0'],
+    ...     'Sara_T': ['0/0', '0/0', '1/1', '1/1', '0/1'],
+    ...     'John_N': ['0/0', '0/0', '0/0', '0/0', '0/0'],
+    ...     'John_T': ['0/1', '0/0', '1/1', '1/1', '0/1'],
+    ...     'Rachel_N': ['0/0', '0/0', '0/0', '0/0', '0/0'],
+    ...     'Rachel_T': ['0/1', '0/1', '0/0', '0/1', '0/1'],
+    ... }}
+    >>> annot_data = {{
+    ...     'Sample': ['Steven_N', 'Steven_T', 'Sara_N', 'Sara_T', 'John_N', 'John_T', 'Rachel_N', 'Rachel_T'],
+    ...     'Subject': ['Steven', 'Steven', 'Sara', 'Sara', 'John', 'John', 'Rachel', 'Rachel'],
+    ...     'Type': ['Normal', 'Tumor', 'Normal', 'Tumor', 'Normal', 'Tumor', 'Normal', 'Tumor'],
+    ... }}
+    >>> vf = pyvcf.VcfFrame.from_dict([], vcf_data)
+    >>> af = pyvcf.AnnFrame.from_dict(annot_data, 'Sample')
+    >>> vf.plot_histplot(hue='Type', af=af)
+
+.. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/plot_histplot.png
+
+MAF
+---
+
 To create an oncoplot with a MAF file:
 
 .. code:: python3
 
-    >>> import matplotlib.pyplot as plt
     >>> from fuc import common, pymaf
     >>> common.load_dataset('tcga-laml')
     >>> f = '~/fuc-data/tcga-laml/tcga_laml.maf.gz'
@@ -168,7 +241,6 @@
 
 .. code:: python3
 
-    >>> import matplotlib.pyplot as plt
     >>> from fuc import common, pymaf
     >>> common.load_dataset('tcga-laml')
     >>> f = '~/fuc-data/tcga-laml/tcga_laml.maf.gz'
@@ -177,11 +249,13 @@
 
 .. image:: https://raw.githubusercontent.com/sbslee/fuc-data/main/images/maf_summary.png
 
+SAM/BAM/CRAM
+------------
+
 To create read depth profile of a region from a CRAM file:
 
 .. code:: python3
 
-    >>> import matplotlib.pyplot as plt
     >>> from fuc import pycov
     >>> cf = pycov.CovFrame.from_file('HG00525.final.cram', zero=True,
     ...    region='chr12:21161194-21239796', names=['HG00525'])
@@ -225,7 +299,7 @@
    $ cd fuc
    $ pip install .
 
-The nice thing about this approach is that you will have access to development versions that are not available in Anaconda or PyPI. For example, you can access a development branch with the ``git checkout`` command.
+The nice thing about this approach is that you will have access to development versions that are not available in Anaconda or PyPI. For example, you can access a development branch with the ``git checkout`` command. When you do this, please make sure your environment already has all the dependencies installed.
 
 Getting Help
 ============
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 6d20e7a..cb841c9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,6 +2,7 @@ sphinx_rtd_theme
 sphinx_issues
 autodocsumm
 matplotlib
+matplotlib-venn
 numpy
 pandas
 pysam
diff --git a/fuc/api/common.py b/fuc/api/common.py
index e087619..8c524ca 100644
--- a/fuc/api/common.py
+++ b/fuc/api/common.py
@@ -69,8 +69,11 @@ def load_dataset(name, force=False):
         'tcga-laml': [
             'tcga_cohort.txt.gz',
             'tcga_laml.maf.gz',
-            'tcga_laml_annot.tsv'
-        ]
+            'tcga_laml_annot.tsv',
+        ],
+        'pyvcf': [
+            'plot_comparison.vcf',
+        ],
     }
     base_url = ('https://raw.githubusercontent.com/sbslee/fuc-data/main')
     for f in datasets[name]:
diff --git a/fuc/api/pymaf.py b/fuc/api/pymaf.py
index 6c60ffe..67f9f18 100644
--- a/fuc/api/pymaf.py
+++ b/fuc/api/pymaf.py
@@ -1,11 +1,49 @@
 """
 The pymaf submodule is designed for working with MAF files. It implements
 ``pymaf.MafFrame`` which stores MAF data as ``pandas.DataFrame`` to allow
-fast computation and easy manipulation. The class also contains many useful
-plotting methods such as ``MafFrame.plot_varcls`` and
-``MafFrame.plot_waterfall``. The submodule strictly adheres to the
+fast computation and easy manipulation. The ``pymaf.MafFrame`` class also
+contains many useful plotting methods such as ``MafFrame.plot_oncoplot`` and
+``MafFrame.plot_summary``. The submodule strictly adheres to the
 standard `MAF specification
 <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`_.
+
+A typical MAF file contains many fields ranging from gene symbol to
+protein change. However, most of the analysis in pymaf uses the
+following fields:
+
++------+------------------------+----------------------+-------------------------------+
+| No.  | Name                   | Description          | Examples                      |
++======+========================+======================+===============================+
+| 1    | Hugo_Symbol            | HUGO gene symbol     | 'TP53', 'Unknown'             |
++------+------------------------+----------------------+-------------------------------+
+| 2    | Entrez_Gene_Id         | Entrez or Ensembl ID | 0, 8714                       |
++------+------------------------+----------------------+-------------------------------+
+| 3    | Center                 | Sequencing center    | '.', 'genome.wustl.edu'       |
++------+------------------------+----------------------+-------------------------------+
+| 4    | NCBI_Build             | Genome assembly      | '37', 'GRCh38'                |
++------+------------------------+----------------------+-------------------------------+
+| 5    | Chromosome             | Chromosome name      | 'chr1'                        |
++------+------------------------+----------------------+-------------------------------+
+| 6    | Start_Position         | Start coordinate     | 119031351                     |
++------+------------------------+----------------------+-------------------------------+
+| 7    | End_Position           | End coordinate       | 44079555                      |
++------+------------------------+----------------------+-------------------------------+
+| 8    | Strand                 | Genomic strand       | '+', '-'                      |
++------+------------------------+----------------------+-------------------------------+
+| 9    | Variant_Classification | Translational effect | 'Missense_Mutation', 'Silent' |
++------+------------------------+----------------------+-------------------------------+
+| 10   | Variant_Type           | Mutation type        | 'SNP', 'INS', 'DEL'           |
++------+------------------------+----------------------+-------------------------------+
+| 11   | Reference_Allele       | Reference allele     | 'T', '-', 'ACAA'              |
++------+------------------------+----------------------+-------------------------------+
+| 12   | Tumor_Seq_Allele1      | First tumor allele   | 'A', '-', 'TCA'               |
++------+------------------------+----------------------+-------------------------------+
+| 13   | Tumor_Seq_Allele2      | Second tumor allele  | 'A', '-', 'TCA'               |
++------+------------------------+----------------------+-------------------------------+
+| 14   | Tumor_Sample_Barcode   | Sample ID            | 'TCGA-AB-3002'                |
++------+------------------------+----------------------+-------------------------------+
+| 15   | Protein_Change         | Protein change       | 'p.L558Q'                     |
++------+------------------------+----------------------+-------------------------------+
 """
 
 import pandas as pd
@@ -196,37 +234,131 @@ def legend_handles(name='regular'):
     return handles
 
 class AnnFrame:
-    """Class for storing annotation data.
+    """Class for storing sample annotation data.
+
+    This class stores sample annotation data as pandas.DataFrame with sample
+    names as index.
+
+    Note that an AnnFrame can have a different set of samples than its
+    accompanying MafFrame.
 
     Parameters
     ----------
     df : pandas.DataFrame
-        DataFrame containing annotation data. The index must be
-        'Tumor_Sample_Barcode'.
+        DataFrame containing sample annotation data. The index must be
+        sample names.
 
     See Also
     --------
+    AnnFrame.from_dict
+        Construct AnnFrame from dict of array-like or dicts.
     AnnFrame.from_file
-        Construct AnnFrame from an annotation file.
+        Construct AnnFrame from a delimited text file.
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from fuc import pymaf
+    >>> data = {
+    ...     'Tumor_Sample_Barcode': ['Steven_N', 'Steven_T', 'Sara_N', 'Sara_T'],
+    ...     'Subject': ['Steven', 'Steven', 'Sara', 'Sara'],
+    ...     'Type': ['Normal', 'Tumor', 'Normal', 'Tumor'],
+    ...     'Age': [30, 30, 57, 57]
+    ... }
+    >>> df = pd.DataFrame(data)
+    >>> df = df.set_index('Tumor_Sample_Barcode')
+    >>> af = pymaf.AnnFrame(df)
+    >>> af.df
+                         Subject    Type  Age
+    Tumor_Sample_Barcode
+    Steven_N              Steven  Normal   30
+    Steven_T              Steven   Tumor   30
+    Sara_N                  Sara  Normal   57
+    Sara_T                  Sara   Tumor   57
     """
     def __init__(self, df):
-        self.df = df
+        self._df = self._check_df(df)
+
+    def _check_df(self, df):
+        if type(df.index) == pd.RangeIndex:
+            m = "Index must be sample names, not 'pandas.RangeIndex'."
+            raise ValueError(m)
+        return df
+
+    @property
+    def df(self):
+        """pandas.DataFrame : DataFrame containing sample annotation data."""
+        return self._df
+
+    @df.setter
+    def df(self, value):
+        self._df = self._check_df(value)
+
+    @classmethod
+    def from_dict(cls, data, sample_col='Tumor_Sample_Barcode'):
+        """Construct AnnFrame from dict of array-like or dicts.
+
+        The dictionary must have at least one column that represents sample
+        names which are used as index for pandas.DataFrame.
+
+        Parameters
+        ----------
+        data : dict
+            Of the form {field : array-like} or {field : dict}.
+        sample_col : str, default: 'Tumor_Sample_Barcode'
+            Column containing sample names.
+
+        Returns
+        -------
+        AnnFrame
+            AnnFrame object.
+
+        See Also
+        --------
+        AnnFrame
+            AnnFrame object creation using constructor.
+        AnnFrame.from_file
+            Construct AnnFrame from a delimited text file.
+
+        Examples
+        --------
+
+        >>> from fuc import pymaf
+        >>> data = {
+        ...     'Tumor_Sample_Barcode': ['Steven_Normal', 'Steven_Tumor', 'Sara_Normal', 'Sara_Tumor'],
+        ...     'Subject': ['Steven', 'Steven', 'Sara', 'Sara'],
+        ...     'Type': ['Normal', 'Tumor', 'Normal', 'Tumor'],
+        ...     'Age': [30, 30, 57, 57]
+        ... }
+        >>> af = pymaf.AnnFrame.from_dict(data)
+        >>> af.df
+                             Subject    Type  Age
+        Tumor_Sample_Barcode
+        Steven_Normal         Steven  Normal   30
+        Steven_Tumor          Steven   Tumor   30
+        Sara_Normal             Sara  Normal   57
+        Sara_Tumor              Sara   Tumor   57
+        """
+        df = pd.DataFrame(data)
+        df = df.set_index(sample_col)
+        return cls(df)
 
     @classmethod
-    def from_file(cls, fn, sep='\t', sample_col=None):
-        """Construct AnnFrame from an annotation file.
+    def from_file(cls, fn, sample_col='Tumor_Sample_Barcode', sep='\t'):
+        """Construct AnnFrame from a delimited text file.
 
-        The input text file must contain a column that corresponds to
-        'Tumor_Sample_Barcode'.
+        The text file must have at least one column that represents
+        sample names which are used as index for pandas.DataFrame.
 
         Parameters
         ----------
         fn : str
-            Annotation file path (zipped or unzipped).
-        sep : str, default: '\t'
+            Text file path (zipped or unzipped).
+        sample_col : str, default: 'Tumor_Sample_Barcode'
+            Column containing sample names.
+        sep : str, default: '\\\\t'
             Delimiter to use.
-        sample_col : str, optional
-            If provided, use this column as 'Tumor_Sample_Barcode'.
 
         Returns
         -------
@@ -237,23 +369,44 @@ def from_file(cls, fn, sep='\t', sample_col=None):
         --------
         AnnFrame
             AnnFrame object creation using constructor.
-        """
+        AnnFrame.from_dict
+            Construct AnnFrame from dict of array-like or dicts.
+
+        Examples
+        --------
 
+        >>> from fuc import pymaf
+        >>> af1 = pymaf.AnnFrame.from_file('sample-annot-1.tsv')
+        >>> af2 = pymaf.AnnFrame.from_file('sample-annot-2.csv', sample_col='SampleID', sep=',')
+        """
         df = pd.read_table(fn, sep=sep)
-        if sample_col is not None:
-            df = df.rename(columns={sample_col: 'Tumor_Sample_Barcode'})
-        df = df.set_index('Tumor_Sample_Barcode')
+        df = df.set_index(sample_col)
         return cls(df)
 
-    def legend_handles(self, col, numeric=False, segments=5, samples=None, decimals=0, cmap='Pastel1'):
+    def legend_handles(
+        self, col, samples=None, numeric=False, segments=5, decimals=0,
+        cmap='Pastel1'
+    ):
         """Return legend handles for the given column.
 
+        In the case of a numeric column, use ``numeric=True`` which will
+        divide the values into equal-sized intervals, with the number of
+        intervals determined by the `segments` option.
+
         Parameters
         ----------
         col : str
             Column name.
         samples : list, optional
             If provided, these samples will be used to create legend handles.
+        numeric : bool, default: False
+            If True, the column will be treated as numeric.
+        segments : int, default: 5
+            If ``numeric`` is True, the numbers will be divided
+            into this number of equal-sized segments.
+        decimals : int, default: 0
+            If ``numeric`` is True, the numbers will be rounded up to this
+            number of decimals.
         cmap : str, default: 'Pastel1'
             Color map.
 
@@ -262,6 +415,11 @@ def legend_handles(self, col, numeric=False, segments=5, samples=None, decimals=
         list
             Legend handles.
 
+        See Also
+        --------
+        AnnFrame.plot_annot
+            Create a 1D categorical heatmap for the given column.
+
         Examples
         --------
 
@@ -273,26 +431,42 @@ def legend_handles(self, col, numeric=False, segments=5, samples=None, decimals=
             >>> f = '~/fuc-data/tcga-laml/tcga_laml_annot.tsv'
             >>> af = pymaf.AnnFrame.from_file(f)
             >>> fig, ax = plt.subplots()
-            >>> handles1 = af.legend_handles('FAB_classification')
-            >>> handles2 = af.legend_handles('Overall_Survival_Status')
-            >>> leg1 = ax.legend(handles=handles1, title='FAB_classification', loc='center left')
-            >>> leg2 = ax.legend(handles=handles2, title='Overall_Survival_Status', loc='center right')
+            >>> handles1 = af.legend_handles('FAB_classification',
+            ...                              cmap='Dark2')
+            >>> handles2 = af.legend_handles('days_to_last_followup',
+            ...                              numeric=True,
+            ...                              cmap='viridis')
+            >>> handles3 = af.legend_handles('Overall_Survival_Status')
+            >>> leg1 = ax.legend(handles=handles1,
+            ...                  title='FAB_classification',
+            ...                  loc='center left')
+            >>> leg2 = ax.legend(handles=handles2,
+            ...                  title='days_to_last_followup',
+            ...                  loc='center')
+            >>> leg3 = ax.legend(handles=handles3,
+            ...                  title='Overall_Survival_Status',
+            ...                  loc='center right')
             >>> ax.add_artist(leg1)
             >>> ax.add_artist(leg2)
+            >>> ax.add_artist(leg3)
             >>> plt.tight_layout()
         """
-        s, l = self._get_col(col, numeric=numeric, segments=segments, samples=samples)
+        s, l = self._get_col(col, numeric=numeric, segments=segments,
+            samples=samples)
         colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, len(l)))
         handles = []
         for i, label in enumerate(l):
             handles.append(mpatches.Patch(color=colors[i], label=label))
         return handles
 
-    def _get_col(self, col, numeric=False, segments=5, samples=None, decimals=0):
+    def _get_col(
+        self, col, numeric=False, segments=5, samples=None, decimals=0
+    ):
         s = self.df[col]
         s = s.replace([np.inf, -np.inf], np.nan)
         if numeric:
-            boundaries = list(np.linspace(s.min(), s.max(), segments+1, endpoint=True))
+            boundaries = list(np.linspace(s.min(), s.max(),
+                segments+1, endpoint=True))
             intervals = list(zip(boundaries[:-1], boundaries[1:]))
             def f(x):
                 if pd.isna(x):
@@ -308,10 +482,66 @@ def f(x):
         return s, l
 
     def plot_annot(
-        self, col, numeric=False, segments=5, samples=None, decimals=0, cmap='Pastel1', ax=None,
-        figsize=None, **kwargs
+        self, col, samples=None, numeric=False, segments=5, decimals=0,
+        cmap='Pastel1', ax=None, figsize=None, **kwargs
     ):
-        s, l = self._get_col(col, numeric=numeric, segments=segments, samples=samples)
+        """Create a 1D categorical heatmap for the given column.
+
+        In the case of a numeric column, use ``numeric=True`` which will
+        divide the values into equal-sized intervals, with the number of
+        intervals determined by the `segments` option.
+
+        Parameters
+        ----------
+        col : str
+            Column name.
+        samples : list, optional
+            If provided, these samples will be used to create legend handles.
+        numeric : bool, default: False
+            If True, the column will be treated as numeric.
+        segments : int, default: 5
+            If ``numeric`` is True, the numbers will be divided
+            into this number of equal-sized segments.
+        decimals : int, default: 0
+            If ``numeric`` is True, the numbers will be rounded up to this
+            number of decimals.
+        cmap : str, default: 'Pastel1'
+            Color map.
+
+        Returns
+        -------
+        list
+            Legend handles.
+
+        See Also
+        --------
+        AnnFrame.legend_handles
+            Return legend handles for the given column.
+
+        Examples
+        --------
+
+        .. plot::
+
+            >>> import matplotlib.pyplot as plt
+            >>> from fuc import common, pymaf
+            >>> common.load_dataset('tcga-laml')
+            >>> f = '~/fuc-data/tcga-laml/tcga_laml_annot.tsv'
+            >>> af = pymaf.AnnFrame.from_file(f)
+            >>> fig, [ax1, ax2, ax3] = plt.subplots(3, 1, figsize=(10, 5))
+            >>> af.plot_annot('FAB_classification', ax=ax1, linewidths=1, cmap='Dark2')
+            >>> af.plot_annot('days_to_last_followup', ax=ax2, linewidths=1, cmap='viridis')
+            >>> af.plot_annot('Overall_Survival_Status', ax=ax3, linewidths=1)
+            >>> ax1.set_xlabel('')
+            >>> ax2.set_xlabel('')
+            >>> ax1.set_ylabel('')
+            >>> ax2.set_ylabel('')
+            >>> ax3.set_ylabel('')
+            >>> plt.tight_layout()
+            >>> plt.subplots_adjust(wspace=0.01, hspace=0.01)
+        """
+        s, l = self._get_col(col, numeric=numeric, segments=segments,
+            samples=samples)
         d = {k: v for v, k in enumerate(l)}
         df = s.to_frame().applymap(lambda x: x if pd.isna(x) else d[x])
         if ax is None:
@@ -401,10 +631,22 @@ def from_file(cls, fn):
     def from_vcf(cls, vcf):
         """Construct MafFrame from a VCF file or VcfFrame.
 
+        The input VCF should already contain functional annotation data
+        from a tool such as Ensemble VEP, SnpEff, and ANNOVAR. The
+        recommended method is Ensemble VEP with "RefSeq transcripts" as the
+        transcript database and the filtering option "Show one selected
+        consequence per variant".
+
         Parameters
         ----------
         vcf : str or VcfFrame
-            VCF file path or VcfFrame.
+            VCF file or VcfFrame.
+
+        Examples
+        --------
+
+        >>> from fuc import pymaf
+        >>> mf = pymaf.MafFrame.from_vcf('annotated.vcf')
         """
         # Parse the input VCF.
         if isinstance(vcf, str):
diff --git a/fuc/api/pysnpeff.py b/fuc/api/pysnpeff.py
index f81be48..f37ae2b 100644
--- a/fuc/api/pysnpeff.py
+++ b/fuc/api/pysnpeff.py
@@ -1,7 +1,7 @@
 """
 The pysnpeff submodule is designed for parsing VCF annotation data from
-the `SnpEff <https://pcingola.github.io/SnpEff/>`_ program. It is designed
-to be used with ``pyvcf.VcfFrame``.
+the `SnpEff <https://pcingola.github.io/SnpEff/>`_ program. It should be
+used with ``pyvcf.VcfFrame``.
 
 One VCF record can have several SnpEff annotations if, for example,
 the record is a multiallelic site or the variant is shared by
diff --git a/fuc/api/pyvcf.py b/fuc/api/pyvcf.py
index 3f8fa07..e9f62d6 100644
--- a/fuc/api/pyvcf.py
+++ b/fuc/api/pyvcf.py
@@ -1,18 +1,20 @@
 """
 The pyvcf submodule is designed for working with VCF files. It implements
-``pyvcf.VcfFrame`` class which stores VCF data as
-``pandas.DataFrame`` to allow fast computation and easy manipulation.
-The submodule strictly adheres to the standard `VCF specification
+``pyvcf.VcfFrame`` which stores VCF data as ``pandas.DataFrame`` to allow
+fast computation and easy manipulation. The ``pyvcf.VcfFrame`` class also
+contains many useful plotting methods such as ``VcfFrame.plot_comparison``
+and ``VcfFrame.plot_regplot``. The submodule strictly adheres to the
+standard `VCF specification
 <https://samtools.github.io/hts-specs/VCFv4.3.pdf>`_.
 
-A VCF file contains metadata lines (prefixed with '##'), a header line
-(prefixed with '#'), and genotype lines that begin with a chromosome
+A typical VCF file contains metadata lines (prefixed with '##'), a header
+line (prefixed with '#'), and genotype lines that begin with a chromosome
 identifier (e.g. 'chr1'). See the VCF specification above for an example
 VCF file.
 
 Genotype lines have nine required fields for storing variant information
 and variable-length fields for storing sample genotype data. For some
-fields, missing values are tolerated and specified with a dot ('.').
+fields, missing values are tolerated and can be specified with a dot ('.').
 The nine required fields are:
 
 +-----+--------+------------------------------------+------------------------+
@@ -38,10 +40,10 @@
 +-----+--------+------------------------------------+------------------------+
 
 There are several common, reserved genotype keywords that are standards
-across the community. Currently, the module is aware of the
+across the community. Currently, the pyvcf submodule is aware of the
 following:
 
-* AD - Total read depth for each allele (R, Integer).
+* AD - Total read depth for each allele (R, Integer)
 * AF - Allele fraction of the event in the tumor (1, Float)
 * DP - Read depth (1, Integer)
 """
@@ -52,6 +54,10 @@
 from copy import deepcopy
 from Bio import bgzf
 from . import pybed
+import matplotlib.pyplot as plt
+from matplotlib_venn import venn2, venn3
+import os
+import seaborn as sns
 
 CONTIGS = [
     '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13',
@@ -510,7 +516,155 @@ def row_missval(r):
         m += ':.'
     return m
 
-# -- VcfFrame ----------------------------------------------------------------
+class AnnFrame:
+    """Class for storing sample annotation data.
+
+    This class stores sample annotation data as pandas.DataFrame with sample
+    names as index.
+
+    Note that an AnnFrame can have a different set of samples than its
+    accompanying VcfFrame.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        DataFrame containing sample annotation data. Index must be
+        sample names.
+
+    See Also
+    --------
+    AnnFrame.from_dict
+        Construct AnnFrame from dict of array-like or dicts.
+    AnnFrame.from_file
+        Construct AnnFrame from a delimited text file.
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from fuc import pyvcf
+    >>> data = {
+    ...     'Sample': ['Steven_N', 'Steven_T', 'Sara_N', 'Sara_T'],
+    ...     'Subject': ['Steven', 'Steven', 'Sara', 'Sara'],
+    ...     'Type': ['Normal', 'Tumor', 'Normal', 'Tumor'],
+    ...     'Age': [30, 30, 57, 57]
+    ... }
+    >>> df = pd.DataFrame(data)
+    >>> df = df.set_index('Sample')
+    >>> af = pyvcf.AnnFrame(df)
+    >>> af.df
+             Subject    Type  Age
+    Sample
+    Steven_N  Steven  Normal   30
+    Steven_T  Steven   Tumor   30
+    Sara_N      Sara  Normal   57
+    Sara_T      Sara   Tumor   57
+    """
+    def _check_df(self, df):
+        if type(df.index) == pd.RangeIndex:
+            m = "Index must be sample names, not 'pandas.RangeIndex'."
+            raise ValueError(m)
+        return df
+
+    def __init__(self, df):
+        self._df = self._check_df(df)
+
+    @property
+    def df(self):
+        """pandas.DataFrame : DataFrame containing sample annotation data."""
+        return self._df
+
+    @df.setter
+    def df(self, value):
+        self._df = self._check_df(value)
+
+    @classmethod
+    def from_dict(cls, data, sample_col):
+        """Construct AnnFrame from dict of array-like or dicts.
+
+        The dictionary must have at least one column that represents sample
+        names which are used as index for pandas.DataFrame.
+
+        Parameters
+        ----------
+        data : dict
+            Of the form {field : array-like} or {field : dict}.
+        sample_col : str
+            Column containing sample names.
+
+        Returns
+        -------
+        AnnFrame
+            AnnFrame object.
+
+        See Also
+        --------
+        AnnFrame
+            AnnFrame object creation using constructor.
+        AnnFrame.from_file
+            Construct AnnFrame from a delimited text file.
+
+        Examples
+        --------
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'Sample': ['Steven_Normal', 'Steven_Tumor', 'Sara_Normal', 'Sara_Tumor'],
+        ...     'Subject': ['Steven', 'Steven', 'Sara', 'Sara'],
+        ...     'Type': ['Normal', 'Tumor', 'Normal', 'Tumor'],
+        ...     'Age': [30, 30, 57, 57]
+        ... }
+        >>> af = pyvcf.AnnFrame.from_dict(data, 'Sample')
+        >>> af.df
+                      Subject    Type  Age
+        Sample
+        Steven_Normal  Steven  Normal   30
+        Steven_Tumor   Steven   Tumor   30
+        Sara_Normal      Sara  Normal   57
+        Sara_Tumor       Sara   Tumor   57
+        """
+        df = pd.DataFrame(data)
+        df = df.set_index(sample_col)
+        return cls(df)
+
+    @classmethod
+    def from_file(cls, fn, sample_col, sep='\t'):
+        """Construct AnnFrame from a delimited text file.
+
+        The text file must have at least one column that represents
+        sample names which are used as index for pandas.DataFrame.
+
+        Parameters
+        ----------
+        fn : str
+            Text file path (zipped or unzipped).
+        sample_col : str
+            Column containing sample names.
+        sep : str, default: '\\\\t'
+            Delimiter to use.
+
+        Returns
+        -------
+        AnnFrame
+            AnnFrame.
+
+        See Also
+        --------
+        AnnFrame
+            AnnFrame object creation using constructor.
+        AnnFrame.from_dict
+            Construct AnnFrame from dict of array-like or dicts.
+
+        Examples
+        --------
+
+        >>> from fuc import pyvcf
+        >>> af1 = pyvcf.AnnFrame.from_file('sample-annot-1.tsv', 'Tumor_Sample_Barcode')
+        >>> af2 = pyvcf.AnnFrame.from_file('sample-annot-2.csv', 'SampleID', sep=',')
+        """
+        df = pd.read_table(fn, sep=sep)
+        df = df.set_index(sample_col)
+        return cls(df)
 
 class VcfFrame:
     """Class for storing VCF data.
@@ -561,33 +715,397 @@ def __init__(self, meta, df):
         self._meta = meta
         self._df = df.reset_index(drop=True)
 
-    @property
-    def meta(self):
-        """list : List of metadata lines."""
-        return self._meta
+    @property
+    def meta(self):
+        """list : List of metadata lines."""
+        return self._meta
+
+    @meta.setter
+    def meta(self, value):
+        self._meta = value
+
+    @property
+    def df(self):
+        """pandas.DataFrame : DataFrame containing VCF data."""
+        return self._df
+
+    @df.setter
+    def df(self, value):
+        self._df = value.reset_index(drop=True)
+
+    @property
+    def samples(self):
+        """list : List of the sample names."""
+        return self.df.columns[9:].to_list()
+
+    @property
+    def shape(self):
+        """tuple : Dimensionality of VcfFrame (variants, samples)."""
+        return (self.df.shape[0], len(self.samples))
+
+    def add_dp(self):
+        """Compute DP using AD and add it to the FORMAT field.
+
+        Returns
+        -------
+        VcfFrame
+            Updated VcfFrame.
+
+        Examples
+        --------
+        Assume we have the following data:
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr2', 'chr2'],
+        ...     'POS': [100, 100, 200, 200],
+        ...     'ID': ['.', '.', '.', '.'],
+        ...     'REF': ['A', 'A', 'C', 'C'],
+        ...     'ALT': ['C', 'T', 'G', 'G,A'],
+        ...     'QUAL': ['.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.'],
+        ...     'INFO': ['.', '.', '.', '.'],
+        ...     'FORMAT': ['GT:AD', 'GT:AD', 'GT:AD', 'GT:AD'],
+        ...     'Steven': ['0/1:12,15', '0/0:32,1', '0/1:16,12', './.:.'],
+        ...     'Sara': ['0/1:13,17', '0/1:14,15', './.:.', '1/2:0,11,17'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT     Steven         Sara
+        0  chr1  100  .   A    C    .      .    .  GT:AD  0/1:12,15    0/1:13,17
+        1  chr1  100  .   A    T    .      .    .  GT:AD   0/0:32,1    0/1:14,15
+        2  chr2  200  .   C    G    .      .    .  GT:AD  0/1:16,12        ./.:.
+        3  chr2  200  .   C  G,A    .      .    .  GT:AD      ./.:.  1/2:0,11,17
+
+        We can add the DP subfield to our genotype data:
+
+        >>> vf.add_dp().df
+          CHROM  POS ID REF  ALT QUAL FILTER INFO    FORMAT        Steven            Sara
+        0  chr1  100  .   A    C    .      .    .  GT:AD:DP  0/1:12,15:27    0/1:13,17:30
+        1  chr1  100  .   A    T    .      .    .  GT:AD:DP   0/0:32,1:33    0/1:14,15:29
+        2  chr2  200  .   C    G    .      .    .  GT:AD:DP  0/1:16,12:28         ./.:.:.
+        3  chr2  200  .   C  G,A    .      .    .  GT:AD:DP       ./.:.:.  1/2:0,11,17:28
+        """
+        def outfunc(r):
+            i = r.FORMAT.split(':').index('AD')
+            def infunc(x):
+                ad = x.split(':')[i].split(',')
+                dp = 0
+                for depth in ad:
+                    if depth == '.':
+                        return f'{x}:.'
+                    dp += int(depth)
+                return f'{x}:{dp}'
+            r.iloc[9:] = r.iloc[9:].apply(infunc)
+            r.FORMAT += ':DP'
+            return r
+        df = self.df.apply(outfunc, axis=1)
+        vf = self.__class__(self.copy_meta(), df)
+        return vf
+
+    def add_flag(self, flag, order='last', index=None):
+        """Add the given flag to the INFO field.
+
+        The default behavior is to add the flag to all rows in the VcfFrame.
+
+        Parameters
+        ----------
+        flag : str
+            INFO flag.
+        order : {'last', 'first', False}, default: 'last'
+            Determines the order in which the flag will be added.
+
+            - ``last`` : Add to the end of the list.
+            - ``first`` : Add to the beginning of the list.
+            - ``False`` : Overwrite the existing field.
+
+        index : list or pandas.Series, optional
+            Boolean index array indicating which rows should be updated.
+
+        Returns
+        -------
+        VcfFrame
+            Updated VcfFrame.
+
+        Examples
+        --------
+        Assume we have the following data:
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102, 103],
+        ...     'ID': ['.', '.', '.', '.'],
+        ...     'REF': ['G', 'T', 'A', 'C'],
+        ...     'ALT': ['A', 'C', 'T', 'A'],
+        ...     'QUAL': ['.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.'],
+        ...     'INFO': ['.', 'DB', 'DB', '.'],
+        ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
+        ...     'Steven': ['0/0', '0/1', '0/1', '1/1'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
+        0  chr1  100  .   G   A    .      .    .     GT    0/0
+        1  chr1  101  .   T   C    .      .   DB     GT    0/1
+        2  chr1  102  .   A   T    .      .   DB     GT    0/1
+        3  chr1  103  .   C   A    .      .    .     GT    1/1
+
+        We can add the SOMATIC flag to the INFO field:
+
+        >>> vf.add_flag('SOMATIC').df
+          CHROM  POS ID REF ALT QUAL FILTER        INFO FORMAT Steven
+        0  chr1  100  .   G   A    .      .     SOMATIC     GT    0/0
+        1  chr1  101  .   T   C    .      .  DB;SOMATIC     GT    0/1
+        2  chr1  102  .   A   T    .      .  DB;SOMATIC     GT    0/1
+        3  chr1  103  .   C   A    .      .     SOMATIC     GT    1/1
+
+        Setting ``order='first'`` will append the flag at the beginning:
+
+        >>> vf.add_flag('SOMATIC', order='first').df
+          CHROM  POS ID REF ALT QUAL FILTER        INFO FORMAT Steven
+        0  chr1  100  .   G   A    .      .     SOMATIC     GT    0/0
+        1  chr1  101  .   T   C    .      .  SOMATIC;DB     GT    0/1
+        2  chr1  102  .   A   T    .      .  SOMATIC;DB     GT    0/1
+        3  chr1  103  .   C   A    .      .     SOMATIC     GT    1/1
+
+        Setting ``order=False`` will overwrite the INFO field:
+
+        >>> vf.add_flag('SOMATIC', order=False).df
+          CHROM  POS ID REF ALT QUAL FILTER     INFO FORMAT Steven
+        0  chr1  100  .   G   A    .      .  SOMATIC     GT    0/0
+        1  chr1  101  .   T   C    .      .  SOMATIC     GT    0/1
+        2  chr1  102  .   A   T    .      .  SOMATIC     GT    0/1
+        3  chr1  103  .   C   A    .      .  SOMATIC     GT    1/1
+
+        We can also specify which rows should be updated:
+
+        >>> vf.add_flag('SOMATIC', index=[True, True, False, False]).df
+          CHROM  POS ID REF ALT QUAL FILTER        INFO FORMAT Steven
+        0  chr1  100  .   G   A    .      .     SOMATIC     GT    0/0
+        1  chr1  101  .   T   C    .      .  DB;SOMATIC     GT    0/1
+        2  chr1  102  .   A   T    .      .          DB     GT    0/1
+        3  chr1  103  .   C   A    .      .           .     GT    1/1
+        """
+        if index is None:
+            index = [True for i in range(self.shape[0])]
+        def f(r):
+            if not index[r.name]:
+                return r
+            if r.INFO == '.':
+                r.INFO = flag
+            elif not order:
+                r.INFO = flag
+            elif order == 'first':
+                r.INFO = f'{flag};{r.INFO}'
+            else:
+                r.INFO += f';{flag}'
+            return r
+        df = self.df.apply(f, axis=1)
+        vf = self.__class__(self.copy_meta(), df)
+        return vf
+
+    def cfilter_empty(self, opposite=False, as_list=False):
+        """Remove samples whose genotype calls are all missing.
+
+        Parameters
+        ----------
+        opposite : bool, default: False
+            If True, return samples that don't meet the said criteria.
+        as_list : bool, default: False
+             If True, return a list of sample names instead of a VcfFrame.
+
+        Returns
+        -------
+        VcfFrame
+            Filtered VcfFrame.
+
+        Examples
+        --------
+        Assume we have the following data:
+
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
+        ...     'POS': [100, 101, 102, 103],
+        ...     'ID': ['.', '.', '.', '.'],
+        ...     'REF': ['G', 'T', 'G', 'T'],
+        ...     'ALT': ['A', 'C', 'C', 'C'],
+        ...     'QUAL': ['.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.'],
+        ...     'INFO': ['.', '.', '.', '.'],
+        ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
+        ...     'Steven': ['0/1', '1/1', '1/1', '1/1'],
+        ...     'Rachel': ['./.', './.', './.', './.'],
+        ...     'John': ['0/0', './.', '0/0', '0/0'],
+        ...     'Sara': ['./.', './.', './.', './.'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven Rachel John Sara
+        0  chr1  100  .   G   A    .      .    .     GT    0/1    ./.  0/0  ./.
+        1  chr1  101  .   T   C    .      .    .     GT    1/1    ./.  ./.  ./.
+        2  chr1  102  .   G   C    .      .    .     GT    1/1    ./.  0/0  ./.
+        3  chr1  103  .   T   C    .      .    .     GT    1/1    ./.  0/0  ./.
+
+        We can remove samples whose genotypes are all missing:
+
+        >>> vf.cfilter_empty().df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven John
+        0  chr1  100  .   G   A    .      .    .     GT    0/1  0/0
+        1  chr1  101  .   T   C    .      .    .     GT    1/1  ./.
+        2  chr1  102  .   G   C    .      .    .     GT    1/1  0/0
+        3  chr1  103  .   T   C    .      .    .     GT    1/1  0/0
+
+        We can also select those samples:
+
+        >>> vf.cfilter_empty(opposite=True).df
+          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Rachel Sara
+        0  chr1  100  .   G   A    .      .    .     GT    ./.  ./.
+        1  chr1  101  .   T   C    .      .    .     GT    ./.  ./.
+        2  chr1  102  .   G   C    .      .    .     GT    ./.  ./.
+        3  chr1  103  .   T   C    .      .    .     GT    ./.  ./.
+
+        Finally, we can return a list of sample names from the filtering:
+
+        >>> vf.cfilter_empty(as_list=True)
+        ['Steven', 'John']
+        """
+        f = lambda r: r[9:].apply(gt_miss)
+        s = self.df.apply(f, axis=1).all()
+        if opposite:
+            s = s[s == True]
+        else:
+            s = s[s == False]
+        l = s.index.to_list()
+        if as_list:
+            return l
+        return self.subset(l)
+
+    def collapse(self):
+        """Collapse duplicate records in the VcfFrame.
+
+        Duplicate records have the identical values for CHROM, POS, and REF.
+        They can result from merging two VCF files.
+
+        .. note::
+           The method will sort the order of ALT alleles.
+
+        Returns
+        -------
+        VcfFrame
+            Collapsed VcfFrame.
+
+        Examples
+        --------
+        Assume we have the following data:
+
+        >>> from fuc import pyvcf
+        >>> data = {
+        ...     'CHROM': ['chr1', 'chr1', 'chr2', 'chr2'],
+        ...     'POS': [100, 100, 200, 200],
+        ...     'ID': ['.', '.', '.', '.'],
+        ...     'REF': ['A', 'A', 'C', 'C'],
+        ...     'ALT': ['C', 'T', 'G', 'G,A'],
+        ...     'QUAL': ['.', '.', '.', '.'],
+        ...     'FILTER': ['.', '.', '.', '.'],
+        ...     'INFO': ['.', '.', '.', '.'],
+        ...     'FORMAT': ['GT:AD', 'GT:AD', 'GT:AD', 'GT:AD'],
+        ...     'Steven': ['0/1:12,15', './.:.', '0/1:16,12', './.:.'],
+        ...     'Sara': ['./.:.', '0/1:14,15', './.:.', '1/2:0,11,17'],
+        ... }
+        >>> vf = pyvcf.VcfFrame.from_dict([], data)
+        >>> vf.df
+          CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT     Steven         Sara
+        0  chr1  100  .   A    C    .      .    .  GT:AD  0/1:12,15        ./.:.
+        1  chr1  100  .   A    T    .      .    .  GT:AD      ./.:.    0/1:14,15
+        2  chr2  200  .   C    G    .      .    .  GT:AD  0/1:16,12        ./.:.
+        3  chr2  200  .   C  G,A    .      .    .  GT:AD      ./.:.  1/2:0,11,17
+
+        We collapse the VcfFrame:
+
+        >>> vf.collapse().df
+          CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT       Steven         Sara
+        0  chr1  100  .   A  C,T    .      .    .  GT:AD  0/1:12,15,0  0/2:14,0,15
+        2  chr2  200  .   C  A,G    .      .    .  GT:AD  0/2:16,0,12  1/2:0,17,11
+        """
+        df = self.df.copy()
+        dup_idx = df.duplicated(['CHROM', 'POS', 'REF'], keep=False)
+        dups = {}
+        for i, r in df[dup_idx].iterrows():
+            name = f'{r.CHROM}:{r.POS}:{r.REF}'
+            if name not in dups:
+                dups[name] = []
+            dups[name].append(i)
+
+        def collapse_one(df):
+            ref_allele = df.REF.unique()[0]
+            alt_alleles = []
+            for i, r in df.iterrows():
+                alt_alleles += r.ALT.split(',')
+            alt_alleles = sorted(list(set(alt_alleles)),
+                                 key=lambda x: (len(x), x))
+            all_alleles = [ref_allele] + alt_alleles
 
-    @meta.setter
-    def meta(self, value):
-        self._meta = value
+            def infunc(x, r_all_alleles, index_map):
+                if gt_miss(x):
+                    return ''
+                old_fields = x.split(':')
+                old_gt = old_fields[0]
+                new_gt = '/'.join([str(x) for x in
+                    sorted([index_map[int(i)] for i in old_gt.split('/')])
+                ])
+                new_fields = [new_gt]
+                for old_field in old_fields[1:]:
+                    old_subfields = old_field.split(',')
+                    new_subfields = ['0' for x in all_alleles]
+                    if len(old_subfields) == len(r_all_alleles):
+                        for i, old_subfield in enumerate(old_subfields):
+                            new_subfields[index_map[i]] = old_subfield
+                        new_fields.append(','.join(new_subfields))
+                    else:
+                        new_fields.append(old_field)
+                return ':'.join(new_fields)
 
-    @property
-    def df(self):
-        """pandas.DataFrame : DataFrame containing VCF data."""
-        return self._df
+            def outfunc(r):
+                r_alt_alleles = r.ALT.split(',')
+                r_all_alleles = [r.REF] + r_alt_alleles
+                old_indicies = [i for i in range(len(r_all_alleles))]
+                new_indicies = [all_alleles.index(x) for x in r_all_alleles]
+                index_map = dict(zip(old_indicies, new_indicies))
+                r[9:] = r[9:].apply(infunc, args=(r_all_alleles, index_map))
+                return r
 
-    @df.setter
-    def df(self, value):
-        self._df = value.reset_index(drop=True)
+            df2 = df.apply(outfunc, axis=1)
 
-    @property
-    def samples(self):
-        """list : List of the sample names."""
-        return self.df.columns[9:].to_list()
+            def raise_error(c):
+                if sum(c.values != '') > 1:
+                    message = ('cannot collapse following '
+                               f'records:\n{df.loc[c.index]}')
+                    raise ValueError(message)
 
-    @property
-    def shape(self):
-        """tuple : Dimensionality of VcfFrame (variants, samples)."""
-        return (self.df.shape[0], len(self.samples))
+            df2.iloc[:, 9:].apply(raise_error)
+            df2 = df2.groupby(['CHROM', 'POS', 'REF']).agg(''.join)
+            df2 = df2.reset_index()
+            cols = list(df2)
+            cols[2], cols[3] = cols[3], cols[2]
+            df2 = df2[cols]
+            df2.ID = df.ID.unique()[0]
+            df2.ALT = ','.join(alt_alleles)
+            df2.QUAL = df.QUAL.unique()[0]
+            df2.FILTER = df.FILTER.unique()[0]
+            df2.INFO = df.INFO.unique()[0]
+            df2.FORMAT = df.FORMAT.unique()[0]
+            s = df2.squeeze()
+            s = s.replace('', row_missval(s))
+            return s
+
+        for name, i in dups.items():
+            df.iloc[i] = collapse_one(df.iloc[i])
+        df.drop_duplicates(subset=['CHROM', 'POS', 'REF'], inplace=True)
+
+        vf = self.__class__(self.copy_meta(), df)
+        return vf
 
     @classmethod
     def from_dict(cls, meta, data):
@@ -673,6 +1191,8 @@ def from_file(cls, fn, compression=False):
         """
         meta = []
         skip_rows = 0
+        if fn.startswith('~'):
+            fn = os.path.expanduser(fn)
         if fn.endswith('.gz') or compression:
             f = bgzf.open(fn, 'rt')
         else:
@@ -740,199 +1260,74 @@ def compare(self, a, b, c=None):
         We compare Steven and Sara:
 
         >>> vf.compare('Steven', 'Sara')
-        (0, 1, 2, 1)
-
-        Next, we compare all three:
-
-        >>> vf.compare('Steven', 'Sara', 'James')
-        (0, 0, 1, 1, 0, 1, 1, 0)
-        """
-        if c is None:
-            result = self._compare_two(a, b)
-        else:
-            result = self._compare_three(a, b, c)
-        return result
-
-    def _compare_two(self, a, b):
-        a = a if isinstance(a, str) else self.samples[a]
-        b = b if isinstance(b, str) else self.samples[b]
-        def func(r):
-            a_has = gt_hasvar(r[a])
-            b_has = gt_hasvar(r[b])
-            if a_has and not b_has:
-                return 'Ab'
-            elif not a_has and b_has:
-                return 'aB'
-            elif a_has and b_has:
-                return 'AB'
-            else:
-                return 'ab'
-        d = self.df.apply(func, axis=1).value_counts().to_dict()
-        Ab = d['Ab'] if 'Ab' in d else 0
-        aB = d['aB'] if 'aB' in d else 0
-        AB = d['AB'] if 'AB' in d else 0
-        ab = d['ab'] if 'ab' in d else 0
-        return (Ab, aB, AB, ab)
-
-    def _compare_three(self, a, b, c):
-        a = a if isinstance(a, str) else self.samples[a]
-        b = b if isinstance(b, str) else self.samples[b]
-        c = c if isinstance(c, str) else self.samples[c]
-        def func(r):
-            a_has = gt_hasvar(r[a])
-            b_has = gt_hasvar(r[b])
-            c_has = gt_hasvar(r[c])
-            if a_has and not b_has and not c_has:
-                return 'Abc'
-            elif not a_has and b_has and not c_has:
-                return 'aBc'
-            elif a_has and b_has and not c_has:
-                return 'ABc'
-            elif not a_has and not b_has and c_has:
-                return 'abC'
-            elif a_has and not b_has and c_has:
-                return 'AbC'
-            elif not a_has and b_has and c_has:
-                return 'aBC'
-            elif a_has and b_has and c_has:
-                return 'ABC'
-            else:
-                return 'abc'
-        d = self.df.apply(func, axis=1).value_counts().to_dict()
-        Abc = d['Abc'] if 'Abc' in d else 0
-        aBc = d['aBc'] if 'aBc' in d else 0
-        ABc = d['ABc'] if 'ABc' in d else 0
-        abC = d['abC'] if 'abC' in d else 0
-        AbC = d['AbC'] if 'AbC' in d else 0
-        aBC = d['aBC'] if 'aBC' in d else 0
-        ABC = d['ABC'] if 'ABC' in d else 0
-        abc = d['abc'] if 'abc' in d else 0
-        return (Abc, aBc, ABc, abC, AbC, aBC, ABC, abc)
-
-    def collapse(self):
-        """Collapse duplicate records in the VcfFrame.
-
-        Duplicate records have the identical values for CHROM, POS, and REF.
-        They can result from merging two VCF files.
-
-        .. note::
-           The method will sort the order of ALT alleles.
-
-        Returns
-        -------
-        VcfFrame
-            Collapsed VcfFrame.
-
-        Examples
-        --------
-        Assume we have the following data:
-
-        >>> from fuc import pyvcf
-        >>> data = {
-        ...     'CHROM': ['chr1', 'chr1', 'chr2', 'chr2'],
-        ...     'POS': [100, 100, 200, 200],
-        ...     'ID': ['.', '.', '.', '.'],
-        ...     'REF': ['A', 'A', 'C', 'C'],
-        ...     'ALT': ['C', 'T', 'G', 'G,A'],
-        ...     'QUAL': ['.', '.', '.', '.'],
-        ...     'FILTER': ['.', '.', '.', '.'],
-        ...     'INFO': ['.', '.', '.', '.'],
-        ...     'FORMAT': ['GT:AD', 'GT:AD', 'GT:AD', 'GT:AD'],
-        ...     'Steven': ['0/1:12,15', './.:.', '0/1:16,12', './.:.'],
-        ...     'Sara': ['./.:.', '0/1:14,15', './.:.', '1/2:0,11,17'],
-        ... }
-        >>> vf = pyvcf.VcfFrame.from_dict([], data)
-        >>> vf.df
-          CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT     Steven         Sara
-        0  chr1  100  .   A    C    .      .    .  GT:AD  0/1:12,15        ./.:.
-        1  chr1  100  .   A    T    .      .    .  GT:AD      ./.:.    0/1:14,15
-        2  chr2  200  .   C    G    .      .    .  GT:AD  0/1:16,12        ./.:.
-        3  chr2  200  .   C  G,A    .      .    .  GT:AD      ./.:.  1/2:0,11,17
-
-        We collapse the VcfFrame:
-
-        >>> vf.collapse().df
-          CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT       Steven         Sara
-        0  chr1  100  .   A  C,T    .      .    .  GT:AD  0/1:12,15,0  0/2:14,0,15
-        2  chr2  200  .   C  A,G    .      .    .  GT:AD  0/2:16,0,12  1/2:0,17,11
-        """
-        df = self.df.copy()
-        dup_idx = df.duplicated(['CHROM', 'POS', 'REF'], keep=False)
-        dups = {}
-        for i, r in df[dup_idx].iterrows():
-            name = f'{r.CHROM}:{r.POS}:{r.REF}'
-            if name not in dups:
-                dups[name] = []
-            dups[name].append(i)
-
-        def collapse_one(df):
-            ref_allele = df.REF.unique()[0]
-            alt_alleles = []
-            for i, r in df.iterrows():
-                alt_alleles += r.ALT.split(',')
-            alt_alleles = sorted(list(set(alt_alleles)),
-                                 key=lambda x: (len(x), x))
-            all_alleles = [ref_allele] + alt_alleles
-
-            def infunc(x, r_all_alleles, index_map):
-                if gt_miss(x):
-                    return ''
-                old_fields = x.split(':')
-                old_gt = old_fields[0]
-                new_gt = '/'.join([str(x) for x in
-                    sorted([index_map[int(i)] for i in old_gt.split('/')])
-                ])
-                new_fields = [new_gt]
-                for old_field in old_fields[1:]:
-                    old_subfields = old_field.split(',')
-                    new_subfields = ['0' for x in all_alleles]
-                    if len(old_subfields) == len(r_all_alleles):
-                        for i, old_subfield in enumerate(old_subfields):
-                            new_subfields[index_map[i]] = old_subfield
-                        new_fields.append(','.join(new_subfields))
-                    else:
-                        new_fields.append(old_field)
-                return ':'.join(new_fields)
-
-            def outfunc(r):
-                r_alt_alleles = r.ALT.split(',')
-                r_all_alleles = [r.REF] + r_alt_alleles
-                old_indicies = [i for i in range(len(r_all_alleles))]
-                new_indicies = [all_alleles.index(x) for x in r_all_alleles]
-                index_map = dict(zip(old_indicies, new_indicies))
-                r[9:] = r[9:].apply(infunc, args=(r_all_alleles, index_map))
-                return r
-
-            df2 = df.apply(outfunc, axis=1)
+        (0, 1, 2, 1)
 
-            def raise_error(c):
-                if sum(c.values != '') > 1:
-                    message = ('cannot collapse following '
-                               f'records:\n{df.loc[c.index]}')
-                    raise ValueError(message)
+        Next, we compare all three:
 
-            df2.iloc[:, 9:].apply(raise_error)
-            df2 = df2.groupby(['CHROM', 'POS', 'REF']).agg(''.join)
-            df2 = df2.reset_index()
-            cols = list(df2)
-            cols[2], cols[3] = cols[3], cols[2]
-            df2 = df2[cols]
-            df2.ID = df.ID.unique()[0]
-            df2.ALT = ','.join(alt_alleles)
-            df2.QUAL = df.QUAL.unique()[0]
-            df2.FILTER = df.FILTER.unique()[0]
-            df2.INFO = df.INFO.unique()[0]
-            df2.FORMAT = df.FORMAT.unique()[0]
-            s = df2.squeeze()
-            s = s.replace('', row_missval(s))
-            return s
+        >>> vf.compare('Steven', 'Sara', 'James')
+        (0, 0, 1, 1, 0, 1, 1, 0)
+        """
+        if c is None:
+            result = self._compare_two(a, b)
+        else:
+            result = self._compare_three(a, b, c)
+        return result
 
-        for name, i in dups.items():
-            df.iloc[i] = collapse_one(df.iloc[i])
-        df.drop_duplicates(subset=['CHROM', 'POS', 'REF'], inplace=True)
+    def _compare_two(self, a, b):
+        a = a if isinstance(a, str) else self.samples[a]
+        b = b if isinstance(b, str) else self.samples[b]
+        def func(r):
+            a_has = gt_hasvar(r[a])
+            b_has = gt_hasvar(r[b])
+            if a_has and not b_has:
+                return 'Ab'
+            elif not a_has and b_has:
+                return 'aB'
+            elif a_has and b_has:
+                return 'AB'
+            else:
+                return 'ab'
+        d = self.df.apply(func, axis=1).value_counts().to_dict()
+        Ab = d['Ab'] if 'Ab' in d else 0
+        aB = d['aB'] if 'aB' in d else 0
+        AB = d['AB'] if 'AB' in d else 0
+        ab = d['ab'] if 'ab' in d else 0
+        return (Ab, aB, AB, ab)
 
-        vf = self.__class__(self.copy_meta(), df)
-        return vf
+    def _compare_three(self, a, b, c):
+        a = a if isinstance(a, str) else self.samples[a]
+        b = b if isinstance(b, str) else self.samples[b]
+        c = c if isinstance(c, str) else self.samples[c]
+        def func(r):
+            a_has = gt_hasvar(r[a])
+            b_has = gt_hasvar(r[b])
+            c_has = gt_hasvar(r[c])
+            if a_has and not b_has and not c_has:
+                return 'Abc'
+            elif not a_has and b_has and not c_has:
+                return 'aBc'
+            elif a_has and b_has and not c_has:
+                return 'ABc'
+            elif not a_has and not b_has and c_has:
+                return 'abC'
+            elif a_has and not b_has and c_has:
+                return 'AbC'
+            elif not a_has and b_has and c_has:
+                return 'aBC'
+            elif a_has and b_has and c_has:
+                return 'ABC'
+            else:
+                return 'abc'
+        d = self.df.apply(func, axis=1).value_counts().to_dict()
+        Abc = d['Abc'] if 'Abc' in d else 0
+        aBc = d['aBc'] if 'aBc' in d else 0
+        ABc = d['ABc'] if 'ABc' in d else 0
+        abC = d['abC'] if 'abC' in d else 0
+        AbC = d['AbC'] if 'AbC' in d else 0
+        aBC = d['aBC'] if 'aBC' in d else 0
+        ABC = d['ABC'] if 'ABC' in d else 0
+        abc = d['abc'] if 'abc' in d else 0
+        return (Abc, aBc, ABc, abC, AbC, aBC, ABC, abc)
 
     def combine(self, a, b):
         """Combine the genotype data of Sample A and Sample B.
@@ -1242,168 +1637,218 @@ def meta_keys(self):
             if '=<ID=' in line:
                 print(line)
 
-    def add_dp(self):
-        """Compute DP using AD and add it to the FORMAT field.
+    def plot_comparison(
+        self, a, b, c=None, labels=None, ax=None, figsize=None
+    ):
+        """Create a Venn diagram showing genotype concordance between groups.
+
+        This method supports comparison between two groups (Groups A & B)
+        as well as three groups (Groups A, B, & C).
+
+        Parameters
+        ----------
+        a, b : list
+            Sample names. The lists must have the same shape.
+        c : list, optional
+            Same as above.
+        labels : list, optional
+            List of labels to be displayed.
+        ax : matplotlib.axes.Axes, optional
+            Pre-existing axes for the plot. Otherwise, crete a new one.
+        figsize : tuple, optional
+            Width, height in inches. Format: (float, float).
 
         Returns
         -------
-        VcfFrame
-            Updated VcfFrame.
+        matplotlib.axes.Axes
+            The matplotlib axes containing the plot.
+        matplotlib_venn._common.VennDiagram
+            VennDiagram object.
 
         Examples
         --------
-        Assume we have the following data:
-
-        >>> from fuc import pyvcf
-        >>> data = {
-        ...     'CHROM': ['chr1', 'chr1', 'chr2', 'chr2'],
-        ...     'POS': [100, 100, 200, 200],
-        ...     'ID': ['.', '.', '.', '.'],
-        ...     'REF': ['A', 'A', 'C', 'C'],
-        ...     'ALT': ['C', 'T', 'G', 'G,A'],
-        ...     'QUAL': ['.', '.', '.', '.'],
-        ...     'FILTER': ['.', '.', '.', '.'],
-        ...     'INFO': ['.', '.', '.', '.'],
-        ...     'FORMAT': ['GT:AD', 'GT:AD', 'GT:AD', 'GT:AD'],
-        ...     'Steven': ['0/1:12,15', '0/0:32,1', '0/1:16,12', './.:.'],
-        ...     'Sara': ['0/1:13,17', '0/1:14,15', './.:.', '1/2:0,11,17'],
-        ... }
-        >>> vf = pyvcf.VcfFrame.from_dict([], data)
-        >>> vf.df
-          CHROM  POS ID REF  ALT QUAL FILTER INFO FORMAT     Steven         Sara
-        0  chr1  100  .   A    C    .      .    .  GT:AD  0/1:12,15    0/1:13,17
-        1  chr1  100  .   A    T    .      .    .  GT:AD   0/0:32,1    0/1:14,15
-        2  chr2  200  .   C    G    .      .    .  GT:AD  0/1:16,12        ./.:.
-        3  chr2  200  .   C  G,A    .      .    .  GT:AD      ./.:.  1/2:0,11,17
 
-        We can add the DP subfield to our genotype data:
+        .. plot::
+            :context: close-figs
 
-        >>> vf.add_dp().df
-          CHROM  POS ID REF  ALT QUAL FILTER INFO    FORMAT        Steven            Sara
-        0  chr1  100  .   A    C    .      .    .  GT:AD:DP  0/1:12,15:27    0/1:13,17:30
-        1  chr1  100  .   A    T    .      .    .  GT:AD:DP   0/0:32,1:33    0/1:14,15:29
-        2  chr2  200  .   C    G    .      .    .  GT:AD:DP  0/1:16,12:28         ./.:.:.
-        3  chr2  200  .   C  G,A    .      .    .  GT:AD:DP       ./.:.:.  1/2:0,11,17:28
-        """
-        def outfunc(r):
-            i = r.FORMAT.split(':').index('AD')
-            def infunc(x):
-                ad = x.split(':')[i].split(',')
-                dp = 0
-                for depth in ad:
-                    if depth == '.':
-                        return f'{x}:.'
-                    dp += int(depth)
-                return f'{x}:{dp}'
-            r.iloc[9:] = r.iloc[9:].apply(infunc)
-            r.FORMAT += ':DP'
-            return r
-        df = self.df.apply(outfunc, axis=1)
-        vf = self.__class__(self.copy_meta(), df)
-        return vf
+            >>> from fuc import pyvcf, common
+            >>> common.load_dataset('pyvcf')
+            >>> f = '~/fuc-data/pyvcf/plot_comparison.vcf'
+            >>> vf = pyvcf.VcfFrame.from_file(f)
+            >>> a = ['Steven_A', 'John_A', 'Sara_A']
+            >>> b = ['Steven_B', 'John_B', 'Sara_B']
+            >>> c = ['Steven_C', 'John_C', 'Sara_C']
+            >>> vf.plot_comparison(a, b)
 
-    def add_flag(self, flag, order='last', index=None):
-        """Add the given flag to the INFO field.
+        .. plot::
+            :context: close-figs
 
-        The default behavior is to add the flag to all rows in the VcfFrame.
+            >>> vf.plot_comparison(a, b, c)
+        """
+        if len(a) != len(b):
+            raise ValueError('Groups A and B have different length.')
+        if c is not None and len(a) != len(c):
+            raise ValueError('Group C has unmatched length.')
+        if labels is None:
+            if c is None:
+                labels = ('A', 'B')
+            else:
+                labels = ('A', 'B', 'C')
+        if ax is None:
+            fig, ax = plt.subplots(figsize=figsize)
+        venn_kws = dict(ax=ax, alpha=0.5, set_labels=labels)
+        if c is None:
+            out = self._plot_comparison_two(a, b, venn_kws)
+        else:
+            out = self._plot_comparison_three(a, b, c, venn_kws)
+        return ax, out
+
+    def _plot_comparison_two(self, a, b, venn_kws):
+        n = [0, 0, 0, 0]
+        for i in range(len(a)):
+            n = [x + y for x, y in zip(n, self.compare(a[i], b[i]))]
+        out = venn2(subsets=n[:-1], **venn_kws)
+        return out
+
+    def _plot_comparison_three(self, a, b, c, venn_kws):
+        n = [0, 0, 0, 0, 0, 0, 0, 0]
+        for i in range(len(a)):
+            n = [x + y for x, y in zip(n, self.compare(a[i], b[i], c[i]))]
+        out = venn3(subsets=n[:-1], **venn_kws)
+        return out
+
+    def plot_histplot(
+        self, af=None, hue=None, kde=True, ax=None, figsize=None, **kwargs
+    ):
+        """Create a histogram of TMB distribution.
 
         Parameters
         ----------
-        flag : str
-            INFO flag.
-        order : {'last', 'first', False}, default: 'last'
-            Determines the order in which the flag will be added.
-
-            - ``last`` : Add to the end of the list.
-            - ``first`` : Add to the beginning of the list.
-            - ``False`` : Overwrite the existing field.
-
-        index : list or pandas.Series, optional
-            Boolean index array indicating which rows should be updated.
+        af : pyvcf.AnnFrame
+            AnnFrame.
+        hue : list, optional
+            Grouping variable that will produce bars with different colors.
+            Requires the `af` option.
+        kde : bool, default: True
+            Compute a kernel density estimate to smooth the distribution.
+        ax : matplotlib.axes.Axes, optional
+            Pre-existing axes for the plot. Otherwise, crete a new one.
+        figsize : tuple, optional
+            Width, height in inches. Format: (float, float).
+        kwargs
+            Other keyword arguments will be passed down to
+            :meth:`seaborn.histplot`.
 
         Returns
         -------
-        VcfFrame
-            Updated VcfFrame.
+        matplotlib.axes.Axes
+            The matplotlib axes containing the plot.
 
         Examples
         --------
-        Assume we have the following data:
-
-        >>> from fuc import pyvcf
-        >>> data = {
-        ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1'],
-        ...     'POS': [100, 101, 102, 103],
-        ...     'ID': ['.', '.', '.', '.'],
-        ...     'REF': ['G', 'T', 'A', 'C'],
-        ...     'ALT': ['A', 'C', 'T', 'A'],
-        ...     'QUAL': ['.', '.', '.', '.'],
-        ...     'FILTER': ['.', '.', '.', '.'],
-        ...     'INFO': ['.', 'DB', 'DB', '.'],
-        ...     'FORMAT': ['GT', 'GT', 'GT', 'GT'],
-        ...     'Steven': ['0/0', '0/1', '0/1', '1/1'],
-        ... }
-        >>> vf = pyvcf.VcfFrame.from_dict([], data)
-        >>> vf.df
-          CHROM  POS ID REF ALT QUAL FILTER INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .    .     GT    0/0
-        1  chr1  101  .   T   C    .      .   DB     GT    0/1
-        2  chr1  102  .   A   T    .      .   DB     GT    0/1
-        3  chr1  103  .   C   A    .      .    .     GT    1/1
-
-        We can add the SOMATIC flag to the INFO field:
-
-        >>> vf.add_flag('SOMATIC').df
-          CHROM  POS ID REF ALT QUAL FILTER        INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .     SOMATIC     GT    0/0
-        1  chr1  101  .   T   C    .      .  DB;SOMATIC     GT    0/1
-        2  chr1  102  .   A   T    .      .  DB;SOMATIC     GT    0/1
-        3  chr1  103  .   C   A    .      .     SOMATIC     GT    1/1
 
-        Setting ``order='first'`` will append the flag at the beginning:
+        .. plot::
+            :context: close-figs
+
+            >>> from fuc import pyvcf
+            >>> vcf_data = {
+            ...     'CHROM': ['chr1', 'chr1', 'chr1', 'chr1', 'chr1'],
+            ...     'POS': [100, 101, 102, 103, 103],
+            ...     'ID': ['.', '.', '.', '.', '.'],
+            ...     'REF': ['T', 'T', 'T', 'T', 'T'],
+            ...     'ALT': ['C', 'C', 'C', 'C', 'C'],
+            ...     'QUAL': ['.', '.', '.', '.', '.'],
+            ...     'FILTER': ['.', '.', '.', '.', '.'],
+            ...     'INFO': ['.', '.', '.', '.', '.'],
+            ...     'FORMAT': ['GT', 'GT', 'GT', 'GT', 'GT'],
+            ...     'Steven_N': ['0/0', '0/0', '0/1', '0/0', '0/0'],
+            ...     'Steven_T': ['0/0', '0/1', '0/1', '0/1', '0/1'],
+            ...     'Sara_N': ['0/0', '0/1', '0/0', '0/0', '0/0'],
+            ...     'Sara_T': ['0/0', '0/0', '1/1', '1/1', '0/1'],
+            ...     'John_N': ['0/0', '0/0', '0/0', '0/0', '0/0'],
+            ...     'John_T': ['0/1', '0/0', '1/1', '1/1', '0/1'],
+            ...     'Rachel_N': ['0/0', '0/0', '0/0', '0/0', '0/0'],
+            ...     'Rachel_T': ['0/1', '0/1', '0/0', '0/1', '0/1'],
+            ... }
+            >>> annot_data = {
+            ...     'Sample': ['Steven_N', 'Steven_T', 'Sara_N', 'Sara_T', 'John_N', 'John_T', 'Rachel_N', 'Rachel_T'],
+            ...     'Subject': ['Steven', 'Steven', 'Sara', 'Sara', 'John', 'John', 'Rachel', 'Rachel'],
+            ...     'Type': ['Normal', 'Tumor', 'Normal', 'Tumor', 'Normal', 'Tumor', 'Normal', 'Tumor'],
+            ... }
+            >>> vf = pyvcf.VcfFrame.from_dict([], vcf_data)
+            >>> af = pyvcf.AnnFrame.from_dict(annot_data, 'Sample')
+            >>> vf.plot_histplot()
+
+        .. plot::
+            :context: close-figs
+
+            >>> vf.plot_histplot(hue='Type', af=af)
+        """
+        s = self.df.iloc[:, 9:].applymap(gt_hasvar).sum()
+        s.name = 'TMB'
+        if af is None:
+            df = s.to_frame()
+        else:
+            df = pd.concat([af.df, s], axis=1, join='inner')
+        if ax is None:
+            fig, ax = plt.subplots(figsize=figsize)
+        sns.histplot(data=df, x='TMB', hue=hue, kde=kde, **kwargs)
+        return ax
 
-        >>> vf.add_flag('SOMATIC', order='first').df
-          CHROM  POS ID REF ALT QUAL FILTER        INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .     SOMATIC     GT    0/0
-        1  chr1  101  .   T   C    .      .  SOMATIC;DB     GT    0/1
-        2  chr1  102  .   A   T    .      .  SOMATIC;DB     GT    0/1
-        3  chr1  103  .   C   A    .      .     SOMATIC     GT    1/1
+    def plot_regplot(self, a, b, ax=None, figsize=None, **kwargs):
+        """Create a scatter plot showing TMB between paired samples.
 
-        Setting ``order=False`` will overwrite the INFO field:
+        Parameters
+        ----------
+        a, b : list
+            Sample names. The lists must have the same shape.
+        ax : matplotlib.axes.Axes, optional
+            Pre-existing axes for the plot. Otherwise, crete a new one.
+        figsize : tuple, optional
+            Width, height in inches. Format: (float, float).
+        kwargs
+            Other keyword arguments will be passed down to
+            :meth:`seaborn.regplot`.
 
-        >>> vf.add_flag('SOMATIC', order=False).df
-          CHROM  POS ID REF ALT QUAL FILTER     INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .  SOMATIC     GT    0/0
-        1  chr1  101  .   T   C    .      .  SOMATIC     GT    0/1
-        2  chr1  102  .   A   T    .      .  SOMATIC     GT    0/1
-        3  chr1  103  .   C   A    .      .  SOMATIC     GT    1/1
+        Returns
+        -------
+        matplotlib.axes.Axes
+            The matplotlib axes containing the plot.
 
-        We can also specify which rows should be updated:
+        Examples
+        --------
 
-        >>> vf.add_flag('SOMATIC', index=[True, True, False, False]).df
-          CHROM  POS ID REF ALT QUAL FILTER        INFO FORMAT Steven
-        0  chr1  100  .   G   A    .      .     SOMATIC     GT    0/0
-        1  chr1  101  .   T   C    .      .  DB;SOMATIC     GT    0/1
-        2  chr1  102  .   A   T    .      .          DB     GT    0/1
-        3  chr1  103  .   C   A    .      .           .     GT    1/1
+        .. plot::
+
+            >>> from fuc import pyvcf
+            >>> data = {
+            ...     'CHROM': ['chr1', 'chr1', 'chr1'],
+            ...     'POS': [100, 101, 102],
+            ...     'ID': ['.', '.', '.'],
+            ...     'REF': ['G', 'T', 'T'],
+            ...     'ALT': ['A', 'C', 'C'],
+            ...     'QUAL': ['.', '.', '.'],
+            ...     'FILTER': ['.', '.', '.'],
+            ...     'INFO': ['.', '.', '.'],
+            ...     'FORMAT': ['GT', 'GT', 'GT'],
+            ...     'Steven_A': ['0/1', '0/1', '0/1'],
+            ...     'Steven_B': ['0/1', '0/1', '0/1'],
+            ...     'Sara_A': ['0/0', '1/1', '1/1'],
+            ...     'Sara_B': ['0/0', '1/1', '1/1'],
+            ...     'John_A': ['0/0', '0/0', '1/1'],
+            ...     'John_B': ['0/0', '0/0', '1/1'],
+            ... }
+            >>> vf = pyvcf.VcfFrame.from_dict([], data)
+            >>> vf.df
+            >>> a = ['Steven_A', 'Sara_A', 'John_A']
+            >>> b = ['Steven_B', 'Sara_B', 'John_B']
+            >>> vf.plot_regplot(a, b)
         """
-        if index is None:
-            index = [True for i in range(self.shape[0])]
-        def f(r):
-            if not index[r.name]:
-                return r
-            if r.INFO == '.':
-                r.INFO = flag
-            elif not order:
-                r.INFO = flag
-            elif order == 'first':
-                r.INFO = f'{flag};{r.INFO}'
-            else:
-                r.INFO += f';{flag}'
-            return r
-        df = self.df.apply(f, axis=1)
-        vf = self.__class__(self.copy_meta(), df)
-        return vf
+        s = self.df.iloc[:, 9:].applymap(gt_hasvar).sum()
+        if ax is None:
+            fig, ax = plt.subplots(figsize=figsize)
+        sns.regplot(x=s[a], y=s[b], ax=ax, **kwargs)
+        return ax
 
     def markmiss_ad(self, threshold, samples=None, full=False, as_nan=False):
         """Mark genotypes whose AD is below threshold as missing.
@@ -1428,9 +1873,9 @@ def markmiss_ad(self, threshold, samples=None, full=False, as_nan=False):
 
         See Also
         --------
-        markmiss_af
+        VcfFrame.markmiss_af
             Similar method using AF.
-        markmiss_dp
+        VcfFrame.markmiss_dp
             Similar method using DP.
 
         Examples
@@ -1530,9 +1975,9 @@ def markmiss_af(self, threshold, samples=None, full=False, as_nan=False):
 
         See Also
         --------
-        markmiss_ad
+        VcfFrame.markmiss_ad
             Similar method using AD.
-        markmiss_dp
+        VcfFrame.markmiss_dp
             Similar method using DP.
 
         Examples
@@ -1632,9 +2077,9 @@ def markmiss_dp(self, threshold, samples=None, full=False, as_nan=False):
 
         See Also
         --------
-        markmiss_ad
+        VcfFrame.markmiss_ad
             Similar method using allele depth.
-        markmiss_af
+        VcfFrame.markmiss_af
             Similar method using allele fraction.
 
         Examples
@@ -1723,7 +2168,7 @@ def expand(self):
 
         See Also
         --------
-        collapse
+        VcfFrame.collapse
             Collapse duplicate records in the VcfFrame.
 
         Examples
@@ -1786,7 +2231,7 @@ def one_gt(g, i):
                 data.append(s)
         return self.__class__(self.copy_meta(), pd.concat(data, axis=1).T)
 
-    def filter_bed(self, bed, opposite=False, index=False):
+    def filter_bed(self, bed, opposite=False, as_index=False):
         """Select rows that overlap with the given BED data.
 
         Parameters
@@ -1795,7 +2240,7 @@ def filter_bed(self, bed, opposite=False, index=False):
             BedFrame or path to a BED file.
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -1855,7 +2300,7 @@ def filter_bed(self, bed, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_bed(bf, index=True)
+        >>> vf.filter_bed(bf, as_index=True)
         0     True
         1    False
         2     True
@@ -1871,18 +2316,18 @@ def filter_bed(self, bed, opposite=False, index=False):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_empty(self, opposite=False, index=False):
+    def filter_empty(self, opposite=False, as_index=False):
         """Remove rows with no genotype calls at all.
 
         Parameters
         ----------
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -1933,7 +2378,7 @@ def filter_empty(self, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_empty(index=True)
+        >>> vf.filter_empty(as_index=True)
         0     True
         1    False
         2     True
@@ -1944,18 +2389,18 @@ def filter_empty(self, opposite=False, index=False):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_indel(self, opposite=False, index=False):
+    def filter_indel(self, opposite=False, as_index=False):
         """Remove rows with an indel.
 
         Parameters
         ----------
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2004,7 +2449,7 @@ def filter_indel(self, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_indel(index=True)
+        >>> vf.filter_indel(as_index=True)
         0     True
         1    False
         2    False
@@ -2014,11 +2459,11 @@ def filter_indel(self, opposite=False, index=False):
         i = ~self.df.apply(row_hasindel, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_flagall(self, flags, opposite=False, index=False):
+    def filter_flagall(self, flags, opposite=False, as_index=False):
         """Select rows if all of the given INFO flags are present.
 
         Parameters
@@ -2027,7 +2472,7 @@ def filter_flagall(self, flags, opposite=False, index=False):
             List of INFO flags.
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2037,7 +2482,7 @@ def filter_flagall(self, flags, opposite=False, index=False):
 
         See Also
         --------
-        filter_flagany
+        VcfFrame.filter_flagany
             Similar method that selects rows if any one of the given
             INFO flags is present.
 
@@ -2082,7 +2527,7 @@ def filter_flagall(self, flags, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_flagall(['H2', 'DB'], index=True)
+        >>> vf.filter_flagall(['H2', 'DB'], as_index=True)
         0    False
         1     True
         2     True
@@ -2095,11 +2540,11 @@ def f(r):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_flagany(self, flags, opposite=False, index=False):
+    def filter_flagany(self, flags, opposite=False, as_index=False):
         """Select rows if any one of the given INFO flags is present.
 
         Parameters
@@ -2108,7 +2553,7 @@ def filter_flagany(self, flags, opposite=False, index=False):
             List of INFO flags.
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2118,7 +2563,7 @@ def filter_flagany(self, flags, opposite=False, index=False):
 
         See Also
         --------
-        filter_flagall
+        VcfFrame.filter_flagall
             Similar method that selects rows if all of the given INFO
             flags are present.
 
@@ -2163,7 +2608,7 @@ def filter_flagany(self, flags, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_flagany(['H2'], index=True)
+        >>> vf.filter_flagany(['H2'], as_index=True)
         0    False
         1     True
         2     True
@@ -2176,18 +2621,18 @@ def f(r):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_multialt(self, opposite=False, index=False):
+    def filter_multialt(self, opposite=False, as_index=False):
         """Remove rows with multiple ALT alleles.
 
         Parameters
         ----------
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2238,7 +2683,7 @@ def filter_multialt(self, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_multialt(index=True)
+        >>> vf.filter_multialt(as_index=True)
         0    False
         1     True
         2     True
@@ -2248,18 +2693,18 @@ def filter_multialt(self, opposite=False, index=False):
         i = self.df.apply(lambda r: ',' not in r.ALT, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_pass(self, opposite=False, index=False):
+    def filter_pass(self, opposite=False, as_index=False):
         """Select rows with PASS in the FILTER field.
 
         Parameters
         ----------
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2308,7 +2753,7 @@ def filter_pass(self, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_pass(index=True)
+        >>> vf.filter_pass(as_index=True)
         0     True
         1    False
         2     True
@@ -2319,18 +2764,18 @@ def filter_pass(self, opposite=False, index=False):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_phased(self, opposite=False, index=False):
+    def filter_phased(self, opposite=False, as_index=False):
         """Remove rows with phased genotypes.
 
         Parameters
         ----------
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2379,7 +2824,7 @@ def filter_phased(self, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_phased(index=True)
+        >>> vf.filter_phased(as_index=True)
         0    False
         1     True
         2     True
@@ -2390,17 +2835,19 @@ def filter_phased(self, opposite=False, index=False):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_polyp(self, opposite=False, index=False):
+    def filter_polyp(self, opposite=False, as_index=False):
         """Remove rows with a polyploid genotype call.
 
         Parameters
         ----------
-        include : bool, default: False
-            If True, include only such rows instead of excluding them.
+        opposite : bool, default: False
+            If True, return rows that don't meet the said criteria.
+        as_index : bool, default: False
+            If True, return boolean index array instead of VcfFrame.
 
         Returns
         -------
@@ -2448,7 +2895,7 @@ def filter_polyp(self, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_polyp(index=True)
+        >>> vf.filter_polyp(as_index=True)
         0    False
         1     True
         2    False
@@ -2459,11 +2906,11 @@ def filter_polyp(self, opposite=False, index=False):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_qual(self, threshold, opposite=False, index=False):
+    def filter_qual(self, threshold, opposite=False, as_index=False):
         """Select rows with minimum QUAL value.
 
         Parameters
@@ -2472,7 +2919,7 @@ def filter_qual(self, threshold, opposite=False, index=False):
             Minimum QUAL value.
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2522,7 +2969,7 @@ def filter_qual(self, threshold, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_qual(30, index=True)
+        >>> vf.filter_qual(30, as_index=True)
         0    False
         1     True
         2    False
@@ -2537,11 +2984,11 @@ def one_row(r):
         i = self.df.apply(one_row, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_sampall(self, samples=None, opposite=False, index=False):
+    def filter_sampall(self, samples=None, opposite=False, as_index=False):
         """Select rows if all of the given samples have the variant.
 
         The default behavior is to use all samples in the VcfFrame.
@@ -2552,7 +2999,7 @@ def filter_sampall(self, samples=None, opposite=False, index=False):
             List of sample names or indicies.
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2562,7 +3009,7 @@ def filter_sampall(self, samples=None, opposite=False, index=False):
 
         See Also
         --------
-        filter_sampany
+        VcfFrame.filter_sampany
             Similar method that selects rows if any one of the given
             samples has the variant.
 
@@ -2617,7 +3064,7 @@ def filter_sampall(self, samples=None, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_sampall(index=True)
+        >>> vf.filter_sampall(as_index=True)
         0     True
         1    False
         2    False
@@ -2633,11 +3080,11 @@ def filter_sampall(self, samples=None, opposite=False, index=False):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_sampany(self, samples=None, opposite=False, index=False):
+    def filter_sampany(self, samples=None, opposite=False, as_index=False):
         """Select rows if any one of the given samples has the variant.
 
         The default behavior is to use all samples in the VcfFrame.
@@ -2648,7 +3095,7 @@ def filter_sampany(self, samples=None, opposite=False, index=False):
             List of sample names or indicies.
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2658,7 +3105,7 @@ def filter_sampany(self, samples=None, opposite=False, index=False):
 
         See Also
         --------
-        filter_sampall
+        VcfFrame.filter_sampall
             Similar method that selects rows if all of the given
             samples have the variant.
 
@@ -2712,7 +3159,7 @@ def filter_sampany(self, samples=None, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_sampany(index=True)
+        >>> vf.filter_sampany(as_index=True)
         0     True
         1     True
         2     True
@@ -2732,7 +3179,7 @@ def filter_sampany(self, samples=None, opposite=False, index=False):
             return i
         return self.__class__(self.copy_meta(), self.df[i])
 
-    def filter_sampnum(self, threshold, opposite=False, index=False):
+    def filter_sampnum(self, threshold, opposite=False, as_index=False):
         """Select rows if the variant is prevalent enough.
 
         Parameters
@@ -2741,7 +3188,7 @@ def filter_sampnum(self, threshold, opposite=False, index=False):
             Minimum number or fraction of samples with the variant.
         opposite : bool, default: False
             If True, return rows that don't meet the said criteria.
-        index : bool, default: False
+        as_index : bool, default: False
             If True, return boolean index array instead of VcfFrame.
 
         Returns
@@ -2798,7 +3245,7 @@ def filter_sampnum(self, threshold, opposite=False, index=False):
 
         Finally, we can return boolean index array from the filtering:
 
-        >>> vf.filter_sampnum(2, index=True)
+        >>> vf.filter_sampnum(2, as_index=True)
         0     True
         1     True
         2    False
@@ -2813,7 +3260,7 @@ def f(r):
         i = self.df.apply(f, axis=1)
         if opposite:
             i = ~i
-        if index:
+        if as_index:
             return i
         vf = self.__class__(self.copy_meta(), self.df[i])
         return vf
diff --git a/fuc/api/pyvep.py b/fuc/api/pyvep.py
index e176c58..3a724dc 100644
--- a/fuc/api/pyvep.py
+++ b/fuc/api/pyvep.py
@@ -1,7 +1,13 @@
 """
 The pyvep submodule is designed for parsing VCF annotation data from the
-`Ensembl VEP <https://asia.ensembl.org/info/docs/tools/vep/index.html>`_.
-It is designed to be used with ``pyvcf.VcfFrame``.
+`Ensembl VEP <https://asia.ensembl.org/info/docs/tools/vep/index.html>`_
+program. It should be used with ``pyvcf.VcfFrame``.
+
+The input VCF should already contain functional annotation data from
+Ensemble VEP. The recommended method is Ensemble VEP's
+`web interface <https://asia.ensembl.org/Tools/VEP>`_ with
+“RefSeq transcripts” as the transcript database and the filtering option
+“Show one selected consequence per variant”.
 """
 
 import re
@@ -47,14 +53,6 @@
     'intergenic_variant'
 ]
 
-def _get_keys(vf):
-    """Return existing annotation keys (e.g. Allele, IMPACT)."""
-    l = []
-    for i, line in enumerate(vf.meta):
-        if 'ID=CSQ' in line:
-            l = re.search(r'Format: (.*?)">', vf.meta[i]).group(1).split('|')
-    return l
-
 def row_firstann(r):
     """Return the first result in the row.
 
@@ -492,7 +490,7 @@ def parseann(vf, targets, sep=' | ', as_series=False):
     3     MTOR
     dtype: object
     """
-    _targets = [x if isinstance(x, int) else _get_keys(vf).index(x) for x in targets]
+    _targets = [x if isinstance(x, int) else annot_names(vf).index(x) for x in targets]
     def func(r):
         ann = row_firstann(r)
         if not ann:
@@ -509,13 +507,13 @@ def func(r):
 
 def get_index(vf, target):
     """Return the index of the target field (e.g. CLIN_SIG)."""
-    headers = _get_keys(vf)
+    headers = annot_names(vf)
     return headers.index(target)
 
 def get_table(vf):
     """Write the VcfFrame as a tab-delimited text file."""
     df = vf.df.copy()
-    headers = _get_keys(vf)
+    headers = annot_names(vf)
     def func(r):
         ann = row_firstann(r)
         if ann:
@@ -597,3 +595,126 @@ def pick_result(vf, mode='mostsevere'):
     one_row = lambda r: pyvcf.row_updateinfo(r, 'CSQ', funcs[mode](r))
     new_vf.df.INFO = vf.df.apply(one_row, axis=1)
     return new_vf
+
+def annot_names(vf):
+    """Return the list of avaialble consequence annotations in the VcfFrame.
+
+    Parameters
+    ----------
+    vf : VcfFrame
+        VcfFrame.
+
+    Returns
+    -------
+    list
+        List of consequence annotations.
+    """
+    l = []
+    for i, line in enumerate(vf.meta):
+        if 'ID=CSQ' in line:
+            l = re.search(r'Format: (.*?)">', vf.meta[i]).group(1).split('|')
+    return l
+
+def filter_af(vf, name, threshold, opposite=None, as_index=False):
+    """Select rows whose selected AF is below threshold.
+
+    Parameters
+    ----------
+    name : str
+        Name of the consequence annotation representing AF (e.g. 'gnomAD_AF').
+    threshold : float
+        Minimum AF.
+    opposite : bool, default: False
+        If True, return rows that don't meet the said criteria.
+    as_index : bool, default: False
+        If True, return boolean index array instead of VcfFrame.
+
+    Returns
+    -------
+    VcfFrame or pandas.Series
+        Filtered VcfFrame or boolean index array.
+    """
+    i = annot_names(vf).index(name)
+    def one_row(r):
+        af = row_firstann(r).split('|')[i]
+        if not af:
+            af = 0
+        return float(af) < threshold
+    i = vf.df.apply(one_row, axis=1)
+    if opposite:
+        i = ~i
+    if as_index:
+        return i
+    df = vf.df[i].reset_index(drop=True)
+    vf = vf.__class__(vf.copy_meta(), df)
+    return vf
+
+def filter_lof(vf, opposite=None, as_index=False):
+    """Select rows whose conseuqence annotation is deleterious and/or LoF.
+
+    Parameters
+    ----------
+    opposite : bool, default: False
+        If True, return rows that don't meet the said criteria.
+    as_index : bool, default: False
+        If True, return boolean index array instead of VcfFrame.
+
+    Returns
+    -------
+    VcfFrame or pandas.Series
+        Filtered VcfFrame or boolean index array.
+    """
+    consequence_index = annot_names(vf).index('Consequence')
+    impact_index = annot_names(vf).index('IMPACT')
+    polyphen_index = annot_names(vf).index('PolyPhen')
+    sift_index = annot_names(vf).index('SIFT')
+    def one_row(r):
+        l = row_firstann(r).split('|')
+        consequence = l[consequence_index]
+        impact = l[impact_index]
+        polyphen = l[polyphen_index]
+        sift = l[sift_index]
+        if impact not in ['HIGH', 'MODERATE']:
+            return False
+        if consequence == 'missense_variant':
+            if 'damaging' in polyphen or 'deleterious' in sift:
+                return True
+            else:
+                return False
+        return True
+    i = vf.df.apply(one_row, axis=1)
+    if opposite:
+        i = ~i
+    if as_index:
+        return i
+    df = vf.df[i]
+    vf = vf.__class__(vf.copy_meta(), df)
+    return vf
+
+def filter_biotype(vf, value, opposite=None, as_index=False):
+    """Select rows whose BIOTYPE matches the given value.
+
+    Parameters
+    ----------
+    opposite : bool, default: False
+        If True, return rows that don't meet the said criteria.
+    as_index : bool, default: False
+        If True, return boolean index array instead of VcfFrame.
+
+    Returns
+    -------
+    VcfFrame or pandas.Series
+        Filtered VcfFrame or boolean index array.
+    """
+    i = annot_names(vf).index('BIOTYPE')
+    def one_row(r):
+        l = row_firstann(r).split('|')
+        return l[i] == value
+    i = vf.df.apply(one_row, axis=1)
+    if opposite:
+        i = ~i
+    if as_index:
+        return i
+    df = vf.df[i]
+    vf = vf.__class__(vf.copy_meta(), df)
+    return vf
diff --git a/fuc/version.py b/fuc/version.py
index 9d1bb72..f323a57 100644
--- a/fuc/version.py
+++ b/fuc/version.py
@@ -1 +1 @@
-__version__ = '0.10.0'
+__version__ = '0.11.0'
diff --git a/setup.py b/setup.py
index 33161ea..dbc32f3 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 exec(open('fuc/version.py').read())
 
 requirements = [
-    'biopython', 'lxml', 'matplotlib', 'numpy', 'pandas',
+    'biopython', 'lxml', 'matplotlib', 'matplotlib-venn', 'numpy', 'pandas',
     'pyranges', 'pysam', 'seaborn'
 ]