-
Notifications
You must be signed in to change notification settings - Fork 0
/
2023-02-10--plot-read-contig-starts-by-sample.py
executable file
·87 lines (66 loc) · 2.44 KB
/
2023-02-10--plot-read-contig-starts-by-sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3
import sys
import glob
import json
from collections import Counter
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
samples = {} # accession_{1,2} -> vid -> f/r + loc -> count
for fname in sys.argv[1:]:
accession, _ = fname.split("_")
with open(fname) as inf:
samples[accession] = json.load(inf)
all_vids = set()
for vids in samples.values():
all_vids.update(vids)
all_vids = list(sorted(all_vids))
def to_slug(vid):
return vid.split(".")[0]
fig, axs = plt.subplots(#nrows=9, ncols=1,
#figsize=(4*1, 4*9),
nrows=6, ncols=3,
figsize=(12, 24),
constrained_layout=True)
plt.suptitle("read starts by location, per sample")
fig.supylabel("position along genome in this sample")
fig.supxlabel("reads starting at this position")
for direction in 'fr':
for row, vid in enumerate(all_vids):
slug = to_slug(vid)
ax = axs[(row%3)*2 + (direction == 'f')][row//3]
loc_vals = Counter()
max_loc = 0
for accession in samples:
for frloc, count in samples[accession][vid].items():
if frloc.startswith(direction):
loc = int(frloc[1:])
max_loc = max(max_loc, loc)
loc_vals[loc] += count
loc_ranks = {} # loc -> rank
for rank, (val, loc) in enumerate(sorted(
[(loc_vals[loc], loc) for loc in range(max_loc + 1)],
reverse=True)):
loc_ranks[loc] = rank
offset = 0
for accession in samples:
points = []
vals = Counter()
for frloc, count in samples[accession][vid].items():
if frloc.startswith(direction):
loc = int(frloc[1:])
if loc < 0:
continue
points.append((loc_ranks[loc], count))
if len(points) < max_loc / 4:
continue
points.sort()
xs = [x for (x,y) in points]
ys = [y for (x,y) in points]
ys = [y / sum(ys) for y in ys]
xs = [x + offset * (max(xs)/20) for x in xs]
ys = [y + offset * (max(ys)/20) for y in ys]
ax.plot(xs, ys)
offset += 1
ax.set_title("%s:%s" % (slug, direction))
fig.savefig("read-start-positions-by-sample.png", dpi=180)
plt.clf()