-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathcpgs.py
34 lines (27 loc) · 798 Bytes
/
cpgs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""
Read a fastq file and count CpGs
"""
import os
import sys
import argparse
from roblib import bcolors
from roblib import stream_fastq
def countcpgs(fqfile):
"""
Count the CpGs in a file
:param fqfile: the fastq file
:return:
"""
count = {}
for seqid, header, seq, qual in stream_fastq(fqfile):
cg = seq.count('CG')
count[cg] = count.get(cg, 0) + 1
return count
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Count CGs in a fastq file')
parser.add_argument('-f', help='fastq file', required=True)
parser.add_argument('-v', help='verbose output', action='store_true')
args = parser.parse_args()
count = countcpgs(args.f)
for c in sorted(list(count.keys())):
print(f"{c}\t{count[c]}")