Skip to content

Commit

Permalink
added expected_unique, RPU
Browse files Browse the repository at this point in the history
  • Loading branch information
alfredsimkin committed Jul 2, 2020
1 parent 6e5b0b1 commit 52072c1
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions pypblat/te_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,29 @@ def windowed(seq, n=2):
class ReferenceStatistics:
def __init__(self):
self.unique_windows = {}
self.unique_counts = {}
self.non_unique_windows = {}
# Dict[TE Name, int]
self.theoretical_counts = {}

def parse_unique(unique_windows):
ref_counts={}
for seq in unique_windows:
for ref in unique_windows[seq]:
if ref not in ref_counts:
ref_counts[ref]=0
ref_counts[ref]+=1
# print(ref_counts)
return ref_counts

def make_reference_statistics(read_len: int, path: Path, seq_format: str = "fasta") -> ReferenceStatistics:

theoretical_counts = defaultdict(int)
stats = ReferenceStatistics()
all_windows = ((''.join(window), record.id)
all_windows = (((''.join(window)).upper(), record.id)
for record in SeqIO.parse(str(path), seq_format)
for window in windowed(record.seq, read_len))
sorted_windows = sorted(all_windows, key=lambda x: x[0])

for seq, seq_ids in groupby(sorted_windows, key=lambda x: x[0]):

seq_ids = list(x[1] for x in seq_ids)
Expand All @@ -45,6 +54,7 @@ def make_reference_statistics(read_len: int, path: Path, seq_format: str = "fast
continue

stats.non_unique_windows[seq] = seq_ids

stats.unique_counts=parse_unique(stats.unique_windows)
stats.theoretical_counts = dict(theoretical_counts)
print(stats.theoretical_counts)
return stats

0 comments on commit 52072c1

Please sign in to comment.