Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed a bug that was causing some custom sequencing error models to e… #128

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions neat/model_sequencing_error/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ def expand_counts(count_array: list, scores: list):
return np.array(ret_list)


def _make_gen(reader):
"""
solution from stack overflow to quickly count lines in a file.
https://stackoverflow.com/questions/19001402/how-to-count-the-total-number-of-lines-in-a-text-file-using-python

"""
b = reader(1024 * 1024)
while b:
yield b
b = reader(1024 * 1024)


def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offset: int, readlen: int):
"""
Parses an individual file for statistics
Expand Down Expand Up @@ -84,6 +96,13 @@ def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offse
line = fq_in.readline().strip()
readlens.append(len(line))

# solution from stack overflow to quickly count lines in a file.
# https://stackoverflow.com/questions/19001402/how-to-count-the-total-number-of-lines-in-a-text-file-using-python
if max_reads == np.inf:
f = open(input_file, 'rb')
f_gen = _make_gen(f.raw.read)
max_reads = sum(buf.count(b'\n') for buf in f_gen)

readlens = np.array(readlens)

# Using the statistical mode seems like the right approach here. We expect the readlens to be roughly the same.
Expand Down Expand Up @@ -153,8 +172,12 @@ def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offse
for i in range(read_length):
this_counts = q_count_by_pos[i]
expanded_counts = expand_counts(this_counts, quality_scores)
average_q = np.average(expanded_counts)
st_d_q = np.std(expanded_counts)
if not expanded_counts:
average_q = 0
st_d_q = 0
else:
average_q = np.average(expanded_counts)
st_d_q = np.std(expanded_counts)
avg_std_by_pos.append((average_q, st_d_q))

# TODO In progress, working on ensuring the error model produces the right shape
Expand Down Expand Up @@ -191,7 +214,7 @@ def plot_stuff(init_q, real_q, q_range, prob_q, actual_readlen, plot_path):
plt.figure(1)
z = np.array(init_q).T
x, y = np.meshgrid(range(0, len(z[0]) + 1), range(0, len(z) + 1))
plt.pcolormesh(x, Y, z, vmin=0., vmax=0.25)
plt.pcolormesh(x, y, z, vmin=0., vmax=0.25)
plt.axis([0, len(z[0]), 0, len(z)])
plt.yticks(range(0, len(z), 10), range(0, len(z), 10))
plt.xticks(range(0, len(z[0]), 10), range(0, len(z[0]), 10))
Expand Down
3 changes: 1 addition & 2 deletions neat/read_simulator/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ def initialize_all_models(options: Options):
homozygous_freq=default_cancer_homozygous_freq,
variant_probs=default_cancer_variant_probs,
insert_len_model=default_cancer_insert_len_model,
is_cancer=True,
rng=options.rng
is_cancer=True
)

_LOG.debug("Mutation models loaded")
Expand Down
Loading