Skip to content

Commit

Permalink
Fixing a bug when requested read length is different than the model
Browse files Browse the repository at this point in the history
  • Loading branch information
joshfactorial committed Oct 24, 2024
1 parent 83c2d3b commit d297910
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 10 deletions.
1 change: 0 additions & 1 deletion neat/models/default_quality_score_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# All numbers from 1-42
default_quality_scores = np.arange(1, 43)


default_qual_score_probs = np.array([
[35.20551064, 5.39726466],
[35.05310236, 5.63622973],
Expand Down
16 changes: 8 additions & 8 deletions neat/models/error_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,27 +67,27 @@ def __init__(

def get_quality_scores(
self,
run_read_length: int,
model_read_length: int,
length: int,
rng
) -> np.ndarray:
"""
Takes a read_length and rng and returns an array of quality scores
Takes a length and rng and returns an array of quality scores
:param run_read_length: The desired length of the quality score array
:param model_read_length: the original read length for the model
:param length: The desired length of the quality score array
:param rng: random number generator.
:return: An array of quality scores.
"""
if self.uniform_quality_score:
return np.array([self.uniform_quality_score] * run_read_length)
return np.array([self.uniform_quality_score] * length)
else:
if run_read_length == model_read_length:
if length == model_read_length:
quality_index_map = np.arange(model_read_length)
else:
# This is basically a way to evenly spread the distribution across the number of bases in the read
quality_index_map = np.array(
[max([0, model_read_length * n // run_read_length]) for n in range(run_read_length)]
[max([0, model_read_length * n // length]) for n in range(length)]
)

temp_qual_array = []
Expand Down Expand Up @@ -206,7 +206,7 @@ def get_sequencing_errors(
# This is to prevent deletion error collisions and to keep there from being too many indel errors.
if 0 < index < self.read_length - max(
self.deletion_len_model) and total_indel_length > self.read_length // 4:
error_type = self.rng.choice(a=list(self.variant_probs), p=list(self.variant_probs.values()))
error_type = rng.choice(a=list(self.variant_probs), p=list(self.variant_probs.values()))

# Deletion error
if error_type == Deletion:
Expand All @@ -227,7 +227,7 @@ def get_sequencing_errors(
elif error_type == Insertion:
insertion_length = self.get_insertion_length()
insertion_reference = reference_segment[index]
insert_string = ''.join(self.rng.choice(ALLOWED_NUCL, size=insertion_length))
insert_string = ''.join(rng.choice(ALLOWED_NUCL, size=insertion_length))
insertion_alternate = insertion_reference + insert_string
introduced_errors.append(
ErrorContainer(Insertion, index, insertion_length, insertion_reference, insertion_alternate)
Expand Down
2 changes: 1 addition & 1 deletion neat/read_simulator/utils/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def finalize_read_and_write(
"""

# Generate quality scores for the read
self.quality_array = qual_model.get_quality_scores(err_model.read_length, self.run_read_length, rng)
self.quality_array = qual_model.get_quality_scores(err_model.read_length, len(self.reference_segment), rng)

# This replaces either hard or soft-masked reference segment with upper case or a standard repeat
# It updates the quality array and reference segment in place, including reversing them, if appropriate
Expand Down

0 comments on commit d297910

Please sign in to comment.