-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Parse protein change case insensitive
- Loading branch information
Showing
8 changed files
with
800 additions
and
265 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
381 changes: 118 additions & 263 deletions
381
src/main/java/org/mskcc/oncokb/curation/util/AlterationUtils.java
Large diffs are not rendered by default.
Oops, something went wrong.
38 changes: 38 additions & 0 deletions
38
src/main/java/org/mskcc/oncokb/curation/util/parser/ParsingStatus.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
package org.mskcc.oncokb.curation.util.parser; | ||
|
||
import org.mskcc.oncokb.curation.domain.enumeration.EntityStatusType; | ||
|
||
public class ParsingStatus<T> { | ||
|
||
EntityStatusType status; | ||
String message; | ||
T entity; | ||
|
||
public Boolean isParsed() { | ||
return this.status != null; | ||
} | ||
|
||
public EntityStatusType getStatus() { | ||
return status; | ||
} | ||
|
||
public void setStatus(EntityStatusType status) { | ||
this.status = status; | ||
} | ||
|
||
public String getMessage() { | ||
return message; | ||
} | ||
|
||
public void setMessage(String message) { | ||
this.message = message; | ||
} | ||
|
||
public T getEntity() { | ||
return entity; | ||
} | ||
|
||
public void setEntity(T entity) { | ||
this.entity = entity; | ||
} | ||
} |
360 changes: 360 additions & 0 deletions
360
src/main/java/org/mskcc/oncokb/curation/util/parser/ProteinChangeParser.java
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package org.mskcc.oncokb.curation; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.File; | ||
import java.io.FileNotFoundException; | ||
import java.io.FileReader; | ||
|
||
public class TestHelper { | ||
|
||
public static BufferedReader getTestFileBufferedReader(String filePath) throws FileNotFoundException { | ||
if (filePath == null) { | ||
System.out.println("Please specify the testing file path"); | ||
return null; | ||
} | ||
|
||
File file = new File(filePath); | ||
FileReader reader = new FileReader(file); | ||
return new BufferedReader(reader); | ||
} | ||
} |
25 changes: 25 additions & 0 deletions
25
src/test/java/org/mskcc/oncokb/curation/test/AlterationUtilsTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package org.mskcc.oncokb.curation.test; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.mskcc.oncokb.curation.util.AlterationUtils.parseProteinChange; | ||
|
||
import org.junit.jupiter.api.Test; | ||
import org.mskcc.oncokb.curation.domain.Alteration; | ||
import org.mskcc.oncokb.curation.domain.AlterationAnnotationStatus; | ||
import org.mskcc.oncokb.curation.domain.EntityStatus; | ||
|
||
public class AlterationUtilsTest { | ||
|
||
@Test | ||
public void testRevisedProteinChangeInParseProteinChange() { | ||
EntityStatus<Alteration> status = new AlterationAnnotationStatus(); | ||
parseProteinChange(status, "v600e"); | ||
assertEquals("V600E", status.getEntity().getProteinChange()); | ||
|
||
parseProteinChange(status, "*757kext*"); | ||
assertEquals("*757Kext*", status.getEntity().getProteinChange()); | ||
|
||
parseProteinChange(status, "T599delinsip"); | ||
assertEquals("T599delinsIP", status.getEntity().getProteinChange()); | ||
} | ||
} |
144 changes: 144 additions & 0 deletions
144
src/test/java/org/mskcc/oncokb/curation/test/ParseProteinChangeParameterizedTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
package org.mskcc.oncokb.curation.test; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.mskcc.oncokb.curation.TestHelper.getTestFileBufferedReader; | ||
import static org.mskcc.oncokb.curation.util.AlterationUtils.parseProteinChange; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.List; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.Parameterized; | ||
import org.mskcc.oncokb.curation.domain.Alteration; | ||
import org.mskcc.oncokb.curation.domain.AlterationAnnotationStatus; | ||
import org.mskcc.oncokb.curation.domain.EntityStatus; | ||
|
||
@RunWith(Parameterized.class) | ||
public class ParseProteinChangeParameterizedTest { | ||
|
||
private static String TEST_FILE_PATH = "src/test/resources/data/test_parse_protein_change.tsv"; | ||
|
||
private String proteinChange; | ||
private String expectedConsequence; | ||
private String expectedRefAllele; | ||
private String expectedVarAllele; | ||
private String expectedProteinStart; | ||
private String expectedProteinEnd; | ||
|
||
public ParseProteinChangeParameterizedTest( | ||
String proteinChange, | ||
String expectedConsequence, | ||
String expectedRefAllele, | ||
String expectedVarAllele, | ||
String expectedProteinStart, | ||
String expectedProteinEnd | ||
) { | ||
this.proteinChange = proteinChange; | ||
this.expectedConsequence = expectedConsequence; | ||
this.expectedRefAllele = expectedRefAllele; | ||
this.expectedVarAllele = expectedVarAllele; | ||
this.expectedProteinStart = expectedProteinStart; | ||
this.expectedProteinEnd = expectedProteinEnd; | ||
} | ||
|
||
@Parameterized.Parameters | ||
public static Collection<String[]> getParameters() throws IOException { | ||
return importer(); | ||
} | ||
|
||
private static List<String[]> importer() throws IOException { | ||
BufferedReader buf = getTestFileBufferedReader(TEST_FILE_PATH); | ||
String line = buf.readLine(); | ||
|
||
List<String[]> queries = new ArrayList<>(); | ||
int count = 0; | ||
while (line != null) { | ||
if (!line.startsWith("#") && line.trim().length() > 0) { | ||
try { | ||
String parts[] = line.split("\t"); | ||
if (parts.length < 1) { | ||
throw new IllegalArgumentException("Test case should have at least protein change. Current case: " + line); | ||
} | ||
String proteinChange = parts[0]; | ||
String expectedConsequence = parts.length > 1 ? parts[1].toUpperCase() : ""; | ||
String expectedRefAllele = parts.length > 2 ? parts[2] : ""; | ||
String expectedVarAllele = parts.length > 3 ? parts[3] : ""; | ||
String expectedProteinStart = parts.length > 4 ? parts[4] : ""; | ||
String expectedProteinEnd = parts.length > 5 ? parts[5] : ""; | ||
String[] query = { | ||
proteinChange, | ||
expectedConsequence, | ||
expectedRefAllele, | ||
expectedVarAllele, | ||
expectedProteinStart, | ||
expectedProteinEnd, | ||
}; | ||
queries.add(query); | ||
count++; | ||
} catch (Exception e) { | ||
System.err.println("Could not add line '" + line + "'. " + e); | ||
} | ||
} | ||
line = buf.readLine(); | ||
} | ||
System.err.println("Contains " + count + " queries."); | ||
System.err.println("Done."); | ||
|
||
return queries; | ||
} | ||
|
||
private void testSuite( | ||
Alteration annotatedAlteration, | ||
String proteinChange, | ||
String expectedConsequence, | ||
String expectedRefAllele, | ||
String expectedVarAllele, | ||
String expectedProteinStart, | ||
String expectedProteinEnd | ||
) { | ||
assertEquals( | ||
"Not expected consequence. Query: " + proteinChange, | ||
expectedConsequence, | ||
annotatedAlteration.getConsequence() == null ? "" : annotatedAlteration.getConsequence().getTerm() | ||
); | ||
assertEquals( | ||
"Not expected ref allele. Query: " + proteinChange, | ||
expectedRefAllele, | ||
StringUtils.isEmpty(annotatedAlteration.getRefResidues()) ? "" : annotatedAlteration.getRefResidues() | ||
); | ||
assertEquals( | ||
"Not expected var allele. Query: " + proteinChange, | ||
expectedVarAllele, | ||
StringUtils.isEmpty(annotatedAlteration.getVariantResidues()) ? "" : annotatedAlteration.getVariantResidues() | ||
); | ||
assertEquals( | ||
"Not expected protein start. Query: " + proteinChange, | ||
expectedProteinStart, | ||
annotatedAlteration.getStart() == null ? "" : Integer.toString(annotatedAlteration.getStart()) | ||
); | ||
assertEquals( | ||
"Not expected protein end. Query: " + proteinChange, | ||
expectedProteinEnd, | ||
annotatedAlteration.getEnd() == null ? "" : Integer.toString(annotatedAlteration.getEnd()) | ||
); | ||
} | ||
|
||
@Test | ||
public void testSummary() { | ||
EntityStatus<Alteration> alterationEntityStatus = new AlterationAnnotationStatus(); | ||
parseProteinChange(alterationEntityStatus, proteinChange); | ||
testSuite( | ||
alterationEntityStatus.getEntity(), | ||
proteinChange, | ||
expectedConsequence, | ||
expectedRefAllele, | ||
expectedVarAllele, | ||
expectedProteinStart, | ||
expectedProteinEnd | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# | ||
# List of alterations for testing. Each line is the following format: | ||
# | ||
# Protein Change\tExpected consequence\tExpected Reference Allele\tExpected Variant Allele\tExpected Protein Start\tExpected Protein End | ||
# | ||
|
||
# missense variants | ||
N505I missense_variant N I 505 505 | ||
E323_D324delinsKN missense_variant 323 324 | ||
814_852mis missense_variant 814 852 | ||
IK744KI missense_variant IK KI 744 745 | ||
|
||
# inframes | ||
Q58_Q59insL inframe_insertion 58 59 | ||
58_Q59insL inframe_insertion 58 59 | ||
Q58_59insL inframe_insertion 58 59 | ||
58_59insL inframe_insertion 58 59 | ||
P68_C77dup inframe_insertion 68 77 | ||
P68_77dup inframe_insertion 68 77 | ||
68_C77dup inframe_insertion 68 77 | ||
68_77dup inframe_insertion 68 77 | ||
T599delinsIP inframe_insertion T 599 599 | ||
599delinsIP inframe_insertion 599 599 | ||
599delins UNKNOWN 599 599 | ||
599delins2 UNKNOWN 599 599 | ||
I744KI inframe_insertion I KI 744 744 | ||
M1ext-1 inframe_insertion 1 1 | ||
1ext-1 inframe_insertion 1 1 | ||
1ext inframe_insertion 1 1 | ||
C359del inframe_deletion C 359 359 | ||
359del inframe_deletion 359 359 | ||
IK744A inframe_deletion IK A 744 745 | ||
1459_1468del inframe_deletion 1459 1468 | ||
A1459_D1468del inframe_deletion 1459 1468 | ||
1459_D1468del inframe_deletion 1459 1468 | ||
A1459_1468del inframe_deletion 1459 1468 | ||
D1161_S1172delinsE inframe_deletion 1161 1172 | ||
D1161_1172delinsE inframe_deletion 1161 1172 | ||
1161_S1172delinsE inframe_deletion 1161 1172 | ||
1161_1172delinsE inframe_deletion 1161 1172 | ||
|
||
# frame shifts | ||
S859Afs*12 frameshift_variant S 859 859 | ||
859Afs*12 frameshift_variant 859 859 | ||
S859fs*12 frameshift_variant S 859 859 | ||
G314fs frameshift_variant G 314 314 | ||
314fs frameshift_variant 314 314 | ||
S330_S352fs frameshift_variant 330 352 | ||
330_S352fs frameshift_variant 330 352 | ||
S330_352fs frameshift_variant 330 352 | ||
330_352fs frameshift_variant 330 352 | ||
|
||
# truncating mutations | ||
W143_A314trunc feature_truncation 143 314 | ||
W143_314trunc feature_truncation 143 314 | ||
143_A314trunc feature_truncation 143 314 | ||
422_605trunc feature_truncation 422 605 | ||
Truncating Mutations feature_truncation | ||
|
||
# splice variants | ||
596_619splice splice_region_variant 596 619 | ||
X1429_splice splice_region_variant 1429 1429 | ||
1429_splice splice_region_variant 1429 1429 | ||
M1I start_lost M I 1 1 | ||
1I start_lost I 1 1 | ||
M1? start_lost M ? 1 1 | ||
A149* stop_gained A * 149 149 | ||
149* stop_gained * 149 149 | ||
*149* stop_lost * * 149 149 | ||
|
||
# extension | ||
*757Kext*36 stop_lost * 757 757 | ||
757Kext*36 stop_lost * 757 757 | ||
*757ext*36 stop_lost * 757 757 | ||
*757Kext* stop_lost * 757 757 | ||
*757Kext*? stop_lost * 757 757 | ||
|
||
# synonymous variants | ||
K24K synonymous_variant K K 24 24 | ||
|
||
# any | ||
449_514mut any 449 514 | ||
|
||
# Unknown variants | ||
D399 NA D 399 399 | ||
MCUR1-AKT1 fusion UNKNOWN | ||
Fusions UNKNOWN | ||
Deletion UNKNOWN | ||
Amplification UNKNOWN | ||
Oncogenic Mutations UNKNOWN | ||
Oncogenic Mutations {excluding V600} UNKNOWN | ||
|