Skip to content

Commit

Permalink
Merge pull request #64 from sbslee/0.34.0-dev
Browse files Browse the repository at this point in the history
0.34.0 dev
  • Loading branch information
sbslee authored Jun 7, 2022
2 parents 4df48f8 + 25e163a commit de8ce31
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 8 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
*********

0.34.0 (2022-06-08)
-------------------

* Add new optional argument ``--stranded`` to :command:`ngs-quant` command.
* Add new method :meth:`pycov.CovFrame.merge`.
* Add new method :meth:`pycov.merge`.
* :issue:`61`: Update :meth:`pymaf.MafFrame.from_vcf` method to automatically detect CSQ field in INFO column (thanks `@lbeltrame <https://github.com/lbeltrame>`__).
* :issue:`63`: Update :meth:`pyvcf.VcfFrame.sort` method to handle contigs that are not pre-defined.

0.33.1 (2022-05-03)
-------------------

Expand Down
4 changes: 3 additions & 1 deletion docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,7 @@ ngs-quant
$ fuc ngs-quant -h
usage: fuc ngs-quant [-h] [--thread INT] [--bootstrap INT] [--job TEXT]
[--force] [--posix]
[--force] [--posix] [--stranded TEXT]
manifest index output qsub
Pipeline for running RNAseq quantification from FASTQ files with Kallisto.
Expand Down Expand Up @@ -1058,6 +1058,8 @@ ngs-quant
--posix Set the environment variable HDF5_USE_FILE_LOCKING=FALSE
before running Kallisto. This is required for shared Posix
Filesystems (e.g. NFS, Lustre).
--stranded TEXT Strand specific reads (default: 'none') (choices:
'none', 'forward', 'reverse').
[Example] Specify queue:
$ fuc ngs-quant \
Expand Down
178 changes: 178 additions & 0 deletions fuc/api/pycov.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,90 @@ def simulate(mode='wgs', loc=30, scale=5, size=1000):

return a

def merge(
cfs, how='inner'
):
"""
Merge CovFrame objects.
Parameters
----------
cfs : list
List of CovFrames to be merged. Note that the 'chr' prefix in contig
names (e.g. 'chr1' vs. '1') will be automatically added or removed as
necessary to match the contig names of the first CovFrame.
how : str, default: 'inner'
Type of merge as defined in :meth:`pandas.merge`.
Returns
-------
CovFrame
Merged CovFrame.
See Also
--------
CovFrame.merge
Merge self with another CovFrame.
Examples
--------
Assume we have the following data:
>>> import numpy as np
>>> from fuc import pycov
>>> data1 = {
... 'Chromosome': ['chr1'] * 5,
... 'Position': np.arange(100, 105),
... 'A': pycov.simulate(loc=35, scale=5, size=5),
... 'B': pycov.simulate(loc=25, scale=7, size=5),
... }
>>> data2 = {
... 'Chromosome': ['1'] * 5,
... 'Position': np.arange(102, 107),
... 'C': pycov.simulate(loc=35, scale=5, size=5),
... }
>>> cf1 = pycov.CovFrame.from_dict(data1)
>>> cf2 = pycov.CovFrame.from_dict(data2)
>>> cf1.df
Chromosome Position A B
0 chr1 100 33 17
1 chr1 101 36 20
2 chr1 102 39 39
3 chr1 103 31 19
4 chr1 104 31 10
>>> cf2.df
Chromosome Position C
0 1 102 41
1 1 103 37
2 1 104 35
3 1 105 33
4 1 106 39
We can merge the two VcfFrames with `how='inner'` (default):
>>> pycov.merge([cf1, cf2]).df
Chromosome Position A B C
0 chr1 102 39 39 41
1 chr1 103 31 19 37
2 chr1 104 31 10 35
We can also merge with `how='outer'`:
>>> pycov.merge([cf1, cf2], how='outer').df
Chromosome Position A B C
0 chr1 100 33.0 17.0 NaN
1 chr1 101 36.0 20.0 NaN
2 chr1 102 39.0 39.0 41.0
3 chr1 103 31.0 19.0 37.0
4 chr1 104 31.0 10.0 35.0
5 chr1 105 NaN NaN 33.0
6 chr1 106 NaN NaN 39.0
"""
merged_cf = cfs[0]
for cf in cfs[1:]:
merged_cf = merged_cf.merge(cf, how=how)
return merged_cf

class CovFrame:
"""
Class for storing read depth data from one or more SAM/BAM/CRAM files.
Expand Down Expand Up @@ -1128,3 +1212,97 @@ def rename(self, names, indicies=None):
cf = self.copy()
cf.df.columns = columns
return cf

def merge(
self, other, how='inner'
):
"""
Merge with the other CovFrame.
Parameters
----------
other : CovFrame
Other CovFrame. Note that the 'chr' prefix in contig names (e.g.
'chr1' vs. '1') will be automatically added or removed as
necessary to match the contig names of ``self``.
how : str, default: 'inner'
Type of merge as defined in :meth:`pandas.DataFrame.merge`.
Returns
-------
CovFrame
Merged CovFrame.
See Also
--------
merge
Merge multiple CovFrame objects.
Examples
--------
Assume we have the following data:
>>> import numpy as np
>>> from fuc import pycov
>>> data1 = {
... 'Chromosome': ['chr1'] * 5,
... 'Position': np.arange(100, 105),
... 'A': pycov.simulate(loc=35, scale=5, size=5),
... 'B': pycov.simulate(loc=25, scale=7, size=5),
... }
>>> data2 = {
... 'Chromosome': ['1'] * 5,
... 'Position': np.arange(102, 107),
... 'C': pycov.simulate(loc=35, scale=5, size=5),
... }
>>> cf1 = pycov.CovFrame.from_dict(data1)
>>> cf2 = pycov.CovFrame.from_dict(data2)
>>> cf1.df
Chromosome Position A B
0 chr1 100 40 27
1 chr1 101 32 33
2 chr1 102 32 22
3 chr1 103 32 29
4 chr1 104 37 22
>>> cf2.df
Chromosome Position C
0 1 102 33
1 1 103 29
2 1 104 35
3 1 105 27
4 1 106 25
We can merge the two VcfFrames with `how='inner'` (default):
>>> cf1.merge(cf2).df
Chromosome Position A B C
0 chr1 102 32 22 33
1 chr1 103 32 29 29
2 chr1 104 37 22 35
We can also merge with `how='outer'`:
>>> cf1.merge(cf2, how='outer').df
Chromosome Position A B C
0 chr1 100 40.0 27.0 NaN
1 chr1 101 32.0 33.0 NaN
2 chr1 102 32.0 22.0 33.0
3 chr1 103 32.0 29.0 29.0
4 chr1 104 37.0 22.0 35.0
5 chr1 105 NaN NaN 27.0
6 chr1 106 NaN NaN 25.0
"""
if self.has_chr_prefix and other.has_chr_prefix:
pass
elif self.has_chr_prefix and not other.has_chr_prefix:
other = other.update_chr_prefix('add')
elif not self.has_chr_prefix and other.has_chr_prefix:
other = other.update_chr_prefix('remove')
else:
pass

df = self.df.merge(other.df, on=['Chromosome', 'Position'], how=how)

merged = self.__class__(df)

return merged
5 changes: 4 additions & 1 deletion fuc/api/pymaf.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,10 @@ def one_row(r):
tumor_seq_allele1 = r.ALT[1:]
tumor_seq_allele2 = r.ALT[1:]

fields = r.INFO.replace('CSQ=', '').split(',')[0].split('|')
# Extract annotation fields.
if has_annot:
csq = [x for x in r.INFO.split(';') if x.startswith('CSQ=')][0]
fields = csq.replace('CSQ=', '').split('|')

# Get the Strand data.
if has_annot:
Expand Down
20 changes: 15 additions & 5 deletions fuc/api/pyvcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ def merge(
names (e.g. 'chr1' vs. '1') will be automatically added or removed as
necessary to match the contig names of the first VCF.
how : str, default: 'inner'
Type of merge as defined in pandas.DataFrame.merge.
Type of merge as defined in :meth:`pandas.merge`.
format : str, default: 'GT'
FORMAT subfields to be retained (e.g. 'GT:AD:DP').
sort : bool, default: True
Expand Down Expand Up @@ -2897,7 +2897,7 @@ def merge(
'chr1' vs. '1') will be automatically added or removed as
necessary to match the contig names of ``self``.
how : str, default: 'inner'
Type of merge as defined in `pandas.DataFrame.merge`.
Type of merge as defined in :meth:`pandas.DataFrame.merge`.
format : str, default: 'GT'
FORMAT subfields to be retained (e.g. 'GT:AD:DP').
sort : bool, default: True
Expand All @@ -2910,6 +2910,11 @@ def merge(
VcfFrame
Merged VcfFrame.
See Also
--------
merge
Merge multiple VcfFrame objects.
Examples
--------
Assume we have the following data:
Expand Down Expand Up @@ -5273,10 +5278,15 @@ def sort(self):
2 chr2 101 . T C . . . GT:DP 0/0:29
3 chr10 100 . G A . . . GT:DP ./.:.
"""
df = self.df.sort_values(by=['CHROM', 'POS'], ignore_index=True,
key=lambda col: [CONTIGS.index(x) if isinstance(x, str)
else x for x in col])
def f(col):
return [CONTIGS.index(x) if x in CONTIGS
else len(CONTIGS) if isinstance(x, str)
else x for x in col]

df = self.df.sort_values(by=['CHROM', 'POS'],
ignore_index=True, key=f)
vf = self.__class__(self.copy_meta(), df)

return vf

def subset(self, samples, exclude=False):
Expand Down
15 changes: 15 additions & 0 deletions fuc/cli/ngs_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ def create_parser(subparsers):
before running Kallisto. This is required for shared Posix
Filesystems (e.g. NFS, Lustre)."""
)
parser.add_argument(
'--stranded',
metavar='TEXT',
default='none',
choices=['none', 'forward', 'reverse'],
help=
"""Strand specific reads (default: 'none') (choices:
'none', 'forward', 'reverse')."""
)

def main(args):
if os.path.exists(args.output) and args.force:
Expand Down Expand Up @@ -129,6 +138,12 @@ def main(args):
command += f' -o {args.output}/{r.Name}'
command += f' -b {args.bootstrap}'
command += f' -t {args.thread}'
if args.stranded == 'forward':
command += ' --fr-stranded'
elif args.stranded == 'reverse':
command += ' --rf-stranded'
else:
pass
command += f' {r.Read1}'
command += f' {r.Read2}'

Expand Down
2 changes: 1 addition & 1 deletion fuc/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.33.1'
__version__ = '0.34.0'

0 comments on commit de8ce31

Please sign in to comment.