Skip to content

Commit

Permalink
update valid library_strategy values
Browse files Browse the repository at this point in the history
  • Loading branch information
danlu1 committed Nov 5, 2024
1 parent 42b5ff8 commit 63a22f3
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 19 deletions.
9 changes: 4 additions & 5 deletions genie_registry/assay.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
"""Assay information class"""

import os
import yaml

import pandas as pd

from genie.example_filetype_format import FileTypeFormat
import yaml
from genie import extract, load, process_functions
from genie.example_filetype_format import FileTypeFormat


class Assayinfo(FileTypeFormat):
Expand Down Expand Up @@ -128,7 +127,7 @@ def _get_dataframe(self, filepath_list):
all_panel_info = pd.concat([all_panel_info, assay_finaldf])
return all_panel_info

def _validate(self, assay_info_df, project_id):
def _validate(self, assay_info_df):
"""
Validates the values of assay information file
Expand Down Expand Up @@ -202,7 +201,7 @@ def _validate(self, assay_info_df, project_id):
warn, error = process_functions.check_col_and_values(
assay_info_df,
"library_strategy",
read_group_headers["library_strategy"]["enum"],
["Targeted Sequencing", "WXS"],
filename="Assay_information.yaml",
required=True,
)
Expand Down
27 changes: 13 additions & 14 deletions tests/test_assay.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@

import pandas as pd
import pytest

from genie_registry.assay import Assayinfo
from genie import extract, process_functions
from genie_registry.assay import Assayinfo

GDC_DATA_DICT = {
"properties": {
Expand Down Expand Up @@ -45,7 +44,7 @@ def test_validinput__validate(assay_info):
assay_info_dict = {
"SEQ_ASSAY_ID": ["SAGE-1", "SAGE-3"],
"is_paired_end": [True, False],
"library_strategy": ["value1", "value2"],
"library_strategy": ["Targeted Sequencing", "WXS"],
"library_selection": ["value1", "value2"],
"platform": ["value1", "value2"],
"instrument_model": ["value1", "value2"],
Expand All @@ -68,18 +67,18 @@ def test_validinput__validate(assay_info):
), patch.object(
process_functions, "get_gdc_data_dictionary", return_value=test_dict
) as patch_get_gdc:
error, warning = assay_info._validate(assay_info_df, "syn9999")
error, warning = assay_info._validate(assay_info_df)
assert error == ""
assert warning == ""
patch_get_gdc.assert_called()


def test_case__validate(assay_info):
"""Valid input should have no errors or warnings"""
"""Valid input with lowercase SEQ_ASSAY_ID, should have no errors or warnings"""
assay_info_dict = {
"SEQ_ASSAY_ID": ["sage-1", "SAGE-3"],
"is_paired_end": [True, False],
"library_strategy": ["value1", "value2"],
"library_strategy": ["Targeted Sequencing", "WXS"],
"library_selection": ["value1", "value2"],
"platform": ["value1", "value2"],
"instrument_model": ["value1", "value2"],
Expand All @@ -102,18 +101,18 @@ def test_case__validate(assay_info):
), patch.object(
process_functions, "get_gdc_data_dictionary", return_value=test_dict
) as patch_get_gdc:
error, warning = assay_info._validate(assay_info_df, "syn9999")
error, warning = assay_info._validate(assay_info_df)
assert error == ""
assert warning == ""
patch_get_gdc.assert_called()


def test_underscore__validate(assay_info):
"""Valid input should have no errors or warnings"""
"""Valid input with underscore in SEQ_ASSAY_ID, should have no errors or warnings"""
assay_info_dict = {
"SEQ_ASSAY_ID": ["SAGE_1", "SAGE-3"],
"is_paired_end": [True, False],
"library_strategy": ["value1", "value2"],
"library_strategy": ["Targeted Sequencing", "WXS"],
"library_selection": ["value1", "value2"],
"platform": ["value1", "value2"],
"instrument_model": ["value1", "value2"],
Expand All @@ -136,7 +135,7 @@ def test_underscore__validate(assay_info):
), patch.object(
process_functions, "get_gdc_data_dictionary", return_value=test_dict
) as patch_get_gdc:
error, warning = assay_info._validate(assay_info_df, "syn9999")
error, warning = assay_info._validate(assay_info_df)
assert error == ""
assert warning == ""
patch_get_gdc.assert_called()
Expand All @@ -149,7 +148,7 @@ def test__missingcols__validate(assay_info):
with patch.object(
process_functions, "get_gdc_data_dictionary", return_value=test_dict
) as patch_get_gdc:
error, warning = assay_info._validate(assay_info_df, "syn99999")
error, warning = assay_info._validate(assay_info_df)
expected_errors = (
"Assay_information.yaml: Must have SEQ_ASSAY_ID column.\n"
"Assay_information.yaml: Must have is_paired_end column.\n"
Expand Down Expand Up @@ -230,7 +229,7 @@ def test_invalid__validate(assay_info):
assay_info_dict = {
"SEQ_ASSAY_ID": ["SAGE-1", "SAG-2"],
"is_paired_end": [True, "foo"],
"library_strategy": ["foo", "ChIP-Seq"],
"library_strategy": ["foo", "WXS"],
"library_selection": ["foo", "PCR"],
"platform": ["foo", "Illumina"],
"instrument_model": ["foo", "Illumina HiSeq 4000"],
Expand All @@ -256,7 +255,7 @@ def test_invalid__validate(assay_info):
), patch.object(
process_functions, "get_gdc_data_dictionary", return_value=test_dict
) as patch_get_gdc:
error, warning = assay_info._validate(assay_info_df, "syn9999")
error, warning = assay_info._validate(assay_info_df)
expected_errors = (
"Assay_information.yaml: "
"Please make sure all your SEQ_ASSAY_IDs start with your "
Expand All @@ -270,7 +269,7 @@ def test_invalid__validate(assay_info):
"This column must only be these values: value1, value2\n"
"Assay_information.yaml: "
"Please double check your library_strategy column. "
"This column must only be these values: value1, value2\n"
"This column must only be these values: Targeted Sequencing, WXS\n"
"Assay_information.yaml: "
"Please double check your platform column. "
"This column must only be these values: value1, value2\n"
Expand Down

0 comments on commit 63a22f3

Please sign in to comment.