-
Notifications
You must be signed in to change notification settings - Fork 0
/
2023-03-17--qc-lengths.py
executable file
·65 lines (51 loc) · 1.63 KB
/
2023-03-17--qc-lengths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
import glob
import json
import numpy as np
import matplotlib.pyplot as plt
data = {} # accession -> xs, ys
for fname in glob.glob("*.json"):
accession, _ = fname.split(".")
with open(fname) as inf:
qc = json.load(inf)
if "post_cleaning" not in qc: continue
post_cleaning = qc["post_cleaning"]
if "lengths" not in post_cleaning: continue
lengths = post_cleaning["lengths"]
max_length = max(int(length) for length in lengths)
xs = []
ys = []
for length in range(max_length+1):
xs.append(length)
ys.append(lengths.get(str(length), 0))
data[accession] = xs, ys
for accession, (xs, ys) in sorted(data.items()):
fig, ax = plt.subplots(constrained_layout=True)
ax.plot(xs, ys)
fig.savefig("%s.qc.png" % accession)
plt.close()
fig, ax = plt.subplots(constrained_layout=True)
big_xs = np.array([])
big_ys = np.array([])
for accession, (xs, ys) in sorted(data.items()):
xs = np.array(xs)
ys = np.array(ys)
print("%s: %s" % (accession, np.average(xs, weights=ys)))
if len(xs) > len(big_xs):
big_xs.resize(xs.shape)
big_ys.resize(xs.shape)
elif len(xs) < len(big_xs):
xs.resize(big_xs.shape)
ys.resize(big_xs.shape)
big_xs += xs
big_ys += ys
ax.plot(big_xs, big_ys)
fig.savefig("overall.qc.png")
plt.close()
fig, ax = plt.subplots(constrained_layout=True)
for accession, (xs, ys) in sorted(data.items()):
scaled_ys = np.array(ys) / sum(ys)
ax.plot(xs, scaled_ys, label=accession)
ax.legend()
fig.savefig("multi.qc.png")
plt.close()