Skip to content

Commit

Permalink
Bugfix for classification when an SV type is not present (#37)
Browse files Browse the repository at this point in the history
* fix for model failing to classify if any SV type not present

* version bump
  • Loading branch information
helrick authored Feb 14, 2024
1 parent 9b65935 commit 549f1f0
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 1 addition & 1 deletion savana/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from time import time
from datetime import datetime

__version__ = "1.0.4"
__version__ = "1.0.5"

samflag_desc_to_number = {
"BAM_CMATCH": 0, # M
Expand Down
6 changes: 5 additions & 1 deletion savana/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,12 @@ def format_data(data_matrix):

# convert the SVTYPE to 0/1
data_matrix['SVTYPE'] = data_matrix['SVTYPE'].map({'BND':0,'INS':1})
# ONE-HOT ENCODING
# one-hot-encoding of BP_NOTATION
sv_type_one_hot = pd.get_dummies(data_matrix['BP_NOTATION'])
# check to make sure all bp types are present
for bp_type in ["++","+-","-+","--"]:
if bp_type not in sv_type_one_hot:
sv_type_one_hot[bp_type] = False
data_matrix.drop('BP_NOTATION', axis=1)
data_matrix = data_matrix.join(sv_type_one_hot)

Expand Down

0 comments on commit 549f1f0

Please sign in to comment.