From 1bccf9f52f68a70f5e533dbfffdad021e505a33d Mon Sep 17 00:00:00 2001 From: Anushya Muruganujan Date: Wed, 7 Feb 2024 19:52:26 -0800 Subject: [PATCH] For #2064 - Added check for pipe in gene symbol --- ontobio/io/gafparser.py | 5 ++++- tests/test_gafparser.py | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ontobio/io/gafparser.py b/ontobio/io/gafparser.py index 8064a67c..306bfde8 100644 --- a/ontobio/io/gafparser.py +++ b/ontobio/io/gafparser.py @@ -393,7 +393,10 @@ def to_association(gaf_line: List[str], report=None, group="unknown", dataset="u return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[DB_OBJECT_SYMBOL] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "col3 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) - return assocparser.ParseResult(source_line, [], True, report=report) + return assocparser.ParseResult(source_line, [], True, report=report) + if '|' in gaf_line[DB_OBJECT_SYMBOL]: + report.error(source_line, Report.INVALID_SYMBOL, gaf_line[4], "Pipes are not allowed in gene symbol", taxon=gaf_line[TAXON_INDEX], rule=1) + return assocparser.ParseResult(source_line, [], True, report=report) if gaf_line[REFERENCE_INDEX] == "": report.error(source_line, Report.INVALID_ID, "EMPTY", "reference column 6 is empty", taxon=gaf_line[TAXON_INDEX], rule=1) return assocparser.ParseResult(source_line, [], True, report=report) diff --git a/tests/test_gafparser.py b/tests/test_gafparser.py index ed19cb32..9c907408 100644 --- a/tests/test_gafparser.py +++ b/tests/test_gafparser.py @@ -421,6 +421,12 @@ def test_bad_date(): assert assoc_result.skipped == True assert assoc_result.associations == [] +def test_bad_gene_symbol(): + p = GafParser() + assoc_result = p.parse_line("PomBase\tSPAC25B8.17\ta|pipeisnotallowed\t\tGO:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)") + assert assoc_result.skipped == True + assert assoc_result.associations == [] + def test_bad_go_id(): p = GafParser() assoc_result = p.parse_line("PomBase\tSPAC25B8.17\typf1\t\tINVALID:0000007\tGO_REF:0000024\tISO\tSGD:S000001583\tC\tintramembrane aspartyl protease of the perinuclear ER membrane Ypf1 (predicted)\tppp81\tprotein\ttaxon:4896\t20231110\tPomBase\tfoo(X:1)")