From e5ad4658eb051a717f6a33e7751a34bd3f810aee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 20 Mar 2024 11:16:59 +0000 Subject: [PATCH 01/11] storage: Add VariantAnnotatorExtension scaffolding. #TASK-5318 --- .../core/variant/VariantStorageOptions.java | 3 + .../DefaultVariantAnnotationManager.java | 9 +++ .../VariantAnnotatorExtensionTask.java | 58 +++++++++++++++++++ .../VariantAnnotatorExtensionsFactory.java | 53 +++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index caefbb5260e..6488e35bf8a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -80,6 +80,9 @@ public enum VariantStorageOptions implements ConfigurationOption { ANNOTATOR_CELLBASE_VARIANT_LENGTH_THRESHOLD("annotator.cellbase.variantLengthThreshold", Integer.MAX_VALUE), ANNOTATOR_CELLBASE_IMPRECISE_VARIANTS("annotator.cellbase.impreciseVariants", true), ANNOTATOR_CELLBASE_STAR_ALTERNATE("annotator.cellbase.starAlternate", false), + ANNOTATOR_EXTENSION_PREFIX("annotator.extension."), + ANNOTATOR_EXTENSION_LIST("annotator.extension.list"), + ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"), INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java index 78229d47ef6..16d3b071279 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java @@ -52,6 +52,8 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionsFactory; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; import org.opencb.opencga.storage.core.variant.io.db.VariantAnnotationDBWriter; import org.opencb.opencga.storage.core.variant.io.db.VariantDBReader; @@ -265,6 +267,13 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap return variantAnnotationList; }; + List extensions = new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(params); + for (VariantAnnotatorExtensionTask extension : extensions) { + extension.setup(outDir); + extension.checkAvailable(); + annotationTask = annotationTask.then(extension); + } + final DataWriter variantAnnotationDataWriter; if (avro) { //FIXME diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..7e044a901ce --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java @@ -0,0 +1,58 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.run.Task; + +import java.net.URI; +import java.util.List; + +public interface VariantAnnotatorExtensionTask extends Task { + + /** + * Set up the annotator extension. + * This method will be called before any other method. It might generate extra files or data needed for the annotation. + * + * @param output Output directory where the annotator extension should write the files + * @return List of URIs of generated files (if any) + */ + List setup(URI output); + + /** + * Check if the annotator extension is available for the given options. + * @throws IllegalArgumentException if the annotator extension is not available + */ + void checkAvailable() throws IllegalArgumentException; + + /** + * Check if the annotator extension is available for the given options. Do not throw any exception if the extension is not available. + * @return true if the annotator extension is available + */ + default boolean isAvailable() { + try { + checkAvailable(); + return true; + } catch (IllegalArgumentException e) { + return false; + } + } + + @Override + default void pre() throws Exception { + Task.super.pre(); + checkAvailable(); + } + + /** + * Get the options for the annotator extension. + * @return Options for the annotator extension + */ + ObjectMap getOptions(); + + /** + * Get the metadata for the annotator extension. + * @return Metadata for the annotator extension + */ + ObjectMap getMetadata(); + +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java new file mode 100644 index 00000000000..57626a34b43 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java @@ -0,0 +1,53 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; + +import java.lang.reflect.InvocationTargetException; +import java.util.LinkedList; +import java.util.List; + +public class VariantAnnotatorExtensionsFactory { + + public List getVariantAnnotatorExtensions(ObjectMap options) { + + List tasks = new LinkedList<>(); + for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) { + VariantAnnotatorExtensionTask task = null; + switch (extensionId) { +// case CosmicVariantAnnotatorExtensionTask.ID: +// task = new CosmicVariantAnnotatorExtensionTask(options); +// break; + default: + String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId); + if (extensionClass != null) { + task = getVariantAnnotatorExtension(extensionClass, options); + } else { + throw new IllegalArgumentException("Unknown annotator extension '" + extensionId + "'"); + } + } + + if (task == null) { + throw new IllegalArgumentException("Unable to create annotator extension '" + extensionId + "'"); + } + + tasks.add(task); + } + return tasks; + } + + private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String className, ObjectMap options) { + try { + Class clazz = Class.forName(className); + return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options); + } catch (ClassNotFoundException + | NoSuchMethodException + | InstantiationException + | IllegalAccessException + | InvocationTargetException e) { + throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e); + } + } + + +} From abbc66a4d5f1808ad272bb8887ed963711c70148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 21 Mar 2024 17:55:10 +0100 Subject: [PATCH 02/11] core: implement the CosmicVariantAnnotatorExtensionTask, #TASK-5902, #TASK-5318 --- .../DefaultVariantAnnotationManager.java | 2 +- .../VariantAnnotatorExtensionTask.java | 3 +- .../cosmic/CosmicExtensionTaskCallback.java | 82 ++++++++ .../CosmicVariantAnnotatorExtensionTask.java | 194 ++++++++++++++++++ ...smicVariantAnnotatorExtensionTaskTest.java | 120 +++++++++++ .../custom_annotation/cosmic.small.tsv.gz | Bin 0 -> 8342 bytes 6 files changed, 399 insertions(+), 2 deletions(-) create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java create mode 100644 opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java create mode 100644 opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java create mode 100644 opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java index 16d3b071279..f8e7f4c4b90 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java @@ -295,7 +295,7 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap ParallelTaskRunner parallelTaskRunner = new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config); parallelTaskRunner.run(); - } catch (ExecutionException e) { + } catch (Exception e) { throw new VariantAnnotatorException("Error creating annotations", e); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java index 7e044a901ce..03255409123 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java @@ -15,8 +15,9 @@ public interface VariantAnnotatorExtensionTask extends Task setup(URI output); + List setup(URI output) throws Exception; /** * Check if the annotator extension is available for the given options. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java new file mode 100644 index 00000000000..c343e690dde --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java @@ -0,0 +1,82 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.collections4.CollectionUtils; +import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback; +import org.opencb.biodata.models.sequence.SequenceLocation; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; +import org.opencb.biodata.tools.variant.VariantNormalizer; +import org.opencb.opencga.core.common.JacksonUtils; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class CosmicExtensionTaskCallback implements CosmicParserCallback { + + private RocksDB rdb; + private VariantNormalizer variantNormalizer; + private ObjectMapper defaultObjectMapper; + + private static Logger logger = LoggerFactory.getLogger(CosmicExtensionTaskCallback.class); + + private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|()|()|()|()|()"; + + public CosmicExtensionTaskCallback(RocksDB rdb) { + this.rdb = rdb; + this.variantNormalizer = new VariantNormalizer(); + this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper(); + } + + @Override + public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List evidenceEntries) { + // Add evidence entries in the RocksDB + // More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed + List normalisedVariantStringList; + try { + normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(), + sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()); + if (CollectionUtils.isNotEmpty(normalisedVariantStringList)) { + for (String normalisedVariantString : normalisedVariantStringList) { + rdb.put(normalisedVariantString.getBytes(), defaultObjectMapper.writeValueAsBytes(evidenceEntries)); + } + return true; + } + return false; + } catch (NonStandardCompliantSampleField | RocksDBException | JsonProcessingException e) { + e.printStackTrace(); + return false; + } + } + + protected List getNormalisedVariantString(String chromosome, int start, String reference, String alternate) + throws NonStandardCompliantSampleField { + Variant variant = new Variant(chromosome, start, reference, alternate); + return getNormalisedVariantString(variant); + } + + protected List getNormalisedVariantString(Variant variant) throws NonStandardCompliantSampleField { + // Checks no weird characters are part of the reference & alternate alleles + if (isValid(variant)) { + List normalizedVariantList = variantNormalizer.normalize(Collections.singletonList(variant), true); + return normalizedVariantList.stream().map(Variant::toString).collect(Collectors.toList()); + } else { + logger.warn("Variant {} is not valid: skipping it!", variant); + } + + return Collections.emptyList(); + } + + protected boolean isValid(Variant variant) { + return (variant.getReference().matches(VARIANT_STRING_PATTERN) + && (variant.getAlternate().matches(VARIANT_STRING_PATTERN) + && !variant.getAlternate().equals(variant.getReference()))); + } +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..60dcaeaccf1 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -0,0 +1,194 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectReader; +import org.opencb.biodata.formats.variant.cosmic.CosmicParser; +import org.opencb.biodata.models.common.DataVersion; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; + +public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExtensionTask { + + private ObjectMap options; + + private Path cosmicFolder; + + private ObjectReader objectReader; + + private RocksDB rdb = null; + private Options dbOption = null; + private Path dbLocation = null; + + public static final String COSMIC_ANNOTATOR_INDEX_NAME = "cosmicAnnotatorIndex"; + public static final String COSMIC_VERSION_FILENAME = "cosmicVersion.json"; + + private static Logger logger = LoggerFactory.getLogger(CosmicVariantAnnotatorExtensionTask.class); + + public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { + this.options = options; + this.objectReader = JacksonUtils.getDefaultObjectMapper().readerFor(new TypeReference>() {}); + } + + @Override + public List setup(URI output) throws Exception { + // Check input path + cosmicFolder = Paths.get(output.getPath()); + if (cosmicFolder == null || !Files.exists(cosmicFolder)) { + throw new IllegalArgumentException("Path " + output + " does not exist"); + } + if (!cosmicFolder.toFile().isDirectory()) { + throw new IllegalArgumentException("Path " + output + " must be a directory with two files: the raw COSMIC file and the" + + " metadata file 'cosmicVersion.json'"); + } + + // Clean and init RocksDB + dbLocation = cosmicFolder.toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); + if (Files.exists(dbLocation)) { + // Skipping setup but init RocksDB + logger.info("Skipping setup, it was already done"); + initRockDB(false); + } else { + logger.info("Setup and populate RocksDB"); + File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); + if (!versionFile.exists()) { + throw new IllegalArgumentException("Path " + output + " does not contain the COSMIC metadata file: " + + COSMIC_VERSION_FILENAME); + } + DataVersion dataVersion; + try { + dataVersion = JacksonUtils.getDefaultObjectMapper().readValue(versionFile, DataVersion.class); + } catch (IOException e) { + throw new IllegalArgumentException("Error parsing the metadata file " + versionFile.getAbsolutePath(), e); + } + String cosmicFilename; + try { + cosmicFilename = dataVersion.getFiles().get(0); + } catch (Exception e) { + throw new IllegalArgumentException("Error getting the COSMIC file from the metadata file " + + versionFile.getAbsolutePath(), e); + } + File cosmicFile = cosmicFolder.resolve(cosmicFilename).toFile(); + if (!cosmicFile.exists()) { + throw new IllegalArgumentException("COSMIC file " + cosmicFile.getAbsolutePath() + " does not exist"); + } + + // Init RocksDB + initRockDB(true); + + // Call COSMIC parser + try { + CosmicExtensionTaskCallback callback = new CosmicExtensionTaskCallback(rdb); + CosmicParser.parse(cosmicFile.toPath(), dataVersion.getVersion(), dataVersion.getName(), dataVersion.getAssembly(), + callback); + } catch (IOException e) { + throw new ToolException(e); + } + } + return Collections.singletonList(dbLocation.toUri()); + } + + @Override + public void checkAvailable() throws IllegalArgumentException { + if (!isAvailable()) { + throw new IllegalArgumentException("COSMIC annotator extension is not available"); + } + } + + @Override + public boolean isAvailable() { + return (dbLocation != null && Files.exists(dbLocation)); + } + + @Override + public ObjectMap getOptions() { + return options; + } + + @Override + public ObjectMap getMetadata() { + File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); + if (!versionFile.exists()) { + throw new IllegalArgumentException("Metadata file " + versionFile + " does not exist"); + } + try { + return JacksonUtils.getDefaultObjectMapper().readValue(versionFile, ObjectMap.class); + } catch (IOException e) { + throw new IllegalArgumentException(e); + } + } + + @Override + public List apply(List list) throws Exception { + for (VariantAnnotation variantAnnotation : list) { + Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), + variantAnnotation.getAlternate()); + byte[] key = variant.toString().getBytes(); + byte[] dbContent = rdb.get(key); + if (dbContent != null) { + List evidenceEntryList = objectReader.readValue(dbContent); + if (variantAnnotation.getTraitAssociation() == null) { + variantAnnotation.setTraitAssociation(evidenceEntryList); + } else { + variantAnnotation.getTraitAssociation().addAll(evidenceEntryList); + } + } + } + return list; + } + + @Override + public void post() throws Exception { + closeRocksDB(); + } + + + private void closeRocksDB() { + if (rdb != null) { + rdb.close(); + } + if (dbOption != null) { + dbOption.dispose(); + } + } + + private void initRockDB(boolean forceCreate) throws ToolException { + boolean indexingNeeded = forceCreate || !Files.exists(dbLocation); + // a static method that loads the RocksDB C++ library. + RocksDB.loadLibrary(); + // the Options class contains a set of configurable DB options + // that determines the behavior of a database. + dbOption = new Options().setCreateIfMissing(true); + + rdb = null; + try { + // a factory method that returns a RocksDB instance + if (indexingNeeded) { + rdb = RocksDB.open(dbOption, dbLocation.toAbsolutePath().toString()); + } else { + rdb = RocksDB.openReadOnly(dbOption, dbLocation.toAbsolutePath().toString()); + } + } catch (RocksDBException e) { + // Do some error handling + throw new ToolException("", e); + } + } +} diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java new file mode 100644 index 00000000000..aced5f62950 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -0,0 +1,120 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.Assert; +import org.junit.Test; +import org.opencb.biodata.models.common.DataVersion; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.common.TimeUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class CosmicVariantAnnotatorExtensionTaskTest { + + private final String COSMIC_VERSION = "v95"; + + @Test + public void testSetupCosmicVariantAnnotatorExtensionTask() throws Exception { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + + Assert.assertEquals(false, task.isAvailable()); + + // Setup COSMIC directory + Path cosmicPath = initCosmicPath(); + + // Set-up COSMIC variant annotator extension task, once + task.setup(cosmicPath.toUri()); + + // Set-up COSMIC variant annotator extension task, twice + task.setup(cosmicPath.toUri()); + + ObjectMap metadata = task.getMetadata(); + Assert.assertEquals(COSMIC_VERSION, metadata.get("version")); + + Assert.assertEquals(true, task.isAvailable()); + } + + @Test + public void testSCosmicVariantAnnotatorExtensionTask() { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + Assert.assertEquals(false, task.isAvailable()); + } + + @Test + public void testAnnotationCosmicVariantAnnotatorExtensionTask() throws Exception { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + + Assert.assertEquals(false, task.isAvailable()); + + // Setup COSMIC directory + Path cosmicPath = initCosmicPath(); + + // Set-up COSMIC variant annotator extension task, once + task.setup(cosmicPath.toUri()); + + List inputVariantAnnotations = new ArrayList<>(); + VariantAnnotation variantAnnotation1 = new VariantAnnotation(); + variantAnnotation1.setChromosome("12"); + variantAnnotation1.setStart(124402657); + variantAnnotation1.setEnd(124402657); + variantAnnotation1.setReference("G"); + variantAnnotation1.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation1); + VariantAnnotation variantAnnotation2 = new VariantAnnotation(); + variantAnnotation2.setChromosome("22"); + variantAnnotation2.setStart(124402657); + variantAnnotation2.setEnd(124402657); + variantAnnotation2.setReference("G"); + variantAnnotation2.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation2); + + List outputVariantAnnotations = task.apply(inputVariantAnnotations); + task.post(); + + Assert.assertEquals(inputVariantAnnotations.size(), outputVariantAnnotations.size()); + + // Checking variantAnnotation1 + Assert.assertEquals(1, outputVariantAnnotations.get(0).getTraitAssociation().size()); + Assert.assertEquals("COSV62300079", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getId()); + Assert.assertEquals("liver", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getPrimarySite()); + Assert.assertEquals("hepatocellular carcinoma", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getHistologySubtype()); + Assert.assertEquals("PMID:323", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getBibliography().get(0)); + + // Checking variantAnnotation2 + Assert.assertTrue(CollectionUtils.isEmpty(outputVariantAnnotations.get(1).getTraitAssociation())); + } + + private Path initCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(getClass().getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); + DataVersion cosmicDataVersion = new DataVersion("variant", "cosmic", COSMIC_VERSION, "20231212", + "hsapiens", "GRCh38", Collections.singletonList(cosmicFile.getFileName().toString()), + Collections.singletonList("http://cosmic.org"), null); + JacksonUtils.getDefaultObjectMapper().writeValue(cosmicPath.resolve(CosmicVariantAnnotatorExtensionTask.COSMIC_VERSION_FILENAME).toFile(), cosmicDataVersion); + Files.copy(cosmicFile, cosmicPath.resolve(cosmicDataVersion.getFiles().get(0))); + + return cosmicPath; + } + + private Path getTempPath() { + return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); + } +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz b/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..153129875feedfd109939484ff008b3f3f7a30b6 GIT binary patch literal 8342 zcmV;HAZgzpiwFom4EtpO17mMb;~t>MM^Y>}>>w$IV!=yrH^brFq|<;x=cqpOSQ z#(jya?QXf=Wzk(WUnbe>*=D{?qcOakZFb+buP^Bt-T#v9f60q~`8?lj*NgSjYjl~f zD!=3KJ=X9ZNBP&wH2&xO0iH3-(sUJ#KHYx4xf)JyzDx#_tIxMn*v|E*(O~%N-dlU? z-8R{xRzMTw7lT2$zDVJ>&tGs|zWK-N(;5`;8eM?qjc+-_vRNuK%f3H2ggNN(oWYP{Hu48_k8#Rz?&aC#W_enw<+R_3)=j^m2Yj zrBT<>c=v6yoo{zeNAcOse6vYcn>30dZX(Klq)c$ZsT6UsiemV42TJ}7Msqcc&aD-i zMPHL_o~*WK>&LU*>bKSUpQ~JXoKQohR8h3qWlw4L8b8keCw)MBpA)p4zDOBg@ZbU13z87E?=J4^M~mc)Ur!Ubr;`QrR$ePvROv% zHJQ%R#bUaAO&29PuN7z*po-8~Dy?9~L8C0w6rlm+Mn4S!ntLjl{*O8|v-QJ#^#slv zy#6+NPPcmy2~9{`tQth~mL43Cz$RNTv~W40wNic=&Y1gAUfc2!8rSL%1IGQQN!Y5b>ONrN37eCh`>=s zk6F5T{`bdh{YQ!qy8S$kUVQwGq-_Re^8zA{7R@Io>>~%oiv}?D@mIs4R1?I)GO7>@ zB{^ULF(K>pPr1sOHIga(;+9gut+HO_@S;KG#;_k4V_-z+qW&}b;?+~Lb~vq?*tl5j zTRWkgt1caL#=CSJH7cdLOVXr=|_aj`n2F#@9gdwc_LR(Dml!ZAA$dRngIkemz8AuVxy1(f1AB$pq+A$9JS z9fcvmExQ8(?o0^pmdA|Bztr9G?RwSZmKhguu{u;e13W*J$$)4RILM9e*vnsW#|**g zQ&Nt1hCTyf+H7FWz(cXlg zFyUU@$q}$kJUI}SJ5J8?I$I(bSvpH!w(IP)b7YLJqyoyP0!C?v4`gLN6##q;{rhS7 zC@lj%xXM!jYh_o+ed_^ofEYT-UYY?3l9zy>Ed5de7ds@0fsd;g?8YUl%+pK=A|DYu zz$W2QW$Z8>2Z{6(aFB3frtqR4QefVjiAO|Jdt!11XE0>Pu#z5;V`0P#s93A^@$oH* z*rB{74k*jCkj4MNpf@ZRG#*g-4;Rw4;Q}ZkRYnV|qIC7JUZ&e@zFRh(mg3v!*4lBg z6i$N6$GB4X+YuJaTu4`t4|w-AVbt2UrDL2kY2sp4BdC!1>28E#G}phep6z!5n~SznOFM3a5^!vMflna?!L$P&HuOQCo3|Yr!IUXUP)m zcK&$iy#&PSxL6$xDR0p|L=`7anmulSAuHR)k)80RyFx+WogOIsVAm zek5#seakTAV}=SzFpts1*!PxX|GLY5n{T&e{<3*lXUPNnBJ4xeOS2$Zg(`b40E~m8 zSL2ehylny0=rm#mdS)^%RtKq!QKGB10)}EF5bH9`)M_hjtqT;eg}UYYC^19H{(oWy zo|(a`KVl5S1IsaE3^21Qbeirt<0mCIf)nj&fnOaWW{K`hKKnq1!HDs;R444BtT>`z z{QzUYpdn0@e=B1xsLE9Wmb=+{lPsb~0B?~_pVHNQ3;Dm}aMORqhB3sehjg1}%lRtV zrvHXez>m(KR-lXNGF>E1Wl8`@K|l%(DT@!bAq!s1QVJF{8S!6@u`ca3xPGH^3l$DJ^WqqLyRQMgr z1*YNyLpev_DF=!Qgkuy!mKI`sT5?L132iA{g>nISp%1MVK$t>_TIy}N;1zBSqV%hM z`m-a-zwr!F^JknPp~4yBz+ghx1iUvwqnI-@cZP_yS0_QkC05Ie)uFTFxWTudA&^@% zcd79Bpm>Y4ne&BRl$;OcdPhg#&nO@0vV35Q@`0~Xqcd>v2WY<-wqkO~y?txTuMT0s zp#Aa11BgOS%-7};Xug{&A1C%56*uhGNWsc(jSAfw^D*5T?5|h4HNJzOq%aN}MMI}% z1QsU0o_1b16ErSXwGKOY%&rlXqrIEvfHPXWIsw6{G@=srQ7!6VWwd8M(#p_}L?&fi z8|F>@NXe-v`;m}sq0Ti?Tw-UJsu;2_;bVPkI}&`e@-+ars^vCB+ODwPv$I=~tl(Mh z={>Xlv7mX*Z+*^h*azkO4494fZAiv&I4F=Q3keM7Dcl(EbS^37q@7IFGfFt#JT6v; zPDg{U`=A2A<$Qy(wUMZN3zZbb_GP@6I9>qkQ^C1_Pz|I;1^3cm&>ecWmPkTFB~n8Y z1%WS&?kLIfW;-3#OuK5=gh}NGL)_|cFvLpR#fcy`VxA?UymYTRdh4`WO7sux*{vE5 zA({!VLlJbS+jZ!T+|@-^=`LGi9Gzug4J|=5C{_f;YH$}Jio#x*%MNh9!Z&-lChbvl zKRW<<<6?D~9T=rL3%qq*x6>JvsU>9QqF1s35T;+B_f}z=Z4LSrr4^;*lr-CYJzcFI z^62@-M(=*6O+%)wgj8Ypf#I3JsV|XJ9R%()kid>YICZOrMlblfPLLo&02kX{g*UW) zfQ_~?oqRLZ5jH{l`Je9{36#ns0XQO7L;^SGsq?h@4ycfBZ@$;(2svdmE>=xv$8`Fs zH$(zAXA82mwcT|sopuuSi^-rOqo?;2vlfnnmvD?M9I1{B`e8T2?5!o5h3{H9hoyVV7`jC9EzL=0k3M!0YKE2;$gm{ zN`3}1_MIVei#4^ zH1KJkjk#5gj+#z3`qnZAkbRH~Kxou{E`6bWCs5xZl9{33W^) zNC~NN9(4gJV5^V^K-LXGMRS)2VACFzJpr$CtVH@FDU$&q@G>a6!v zD+|_gWzLx&7C{O3N~S@tKbHHTh)gcps24KQXq-%>S}6_81UL}I(cESd^oE=aun^V? z-&mlGNea2Y!vdm!a9GeV`+P4nyEzdHY2#vbhy`q+EBpeXl40)8LGgp=6} zh~G>F5a|Zbdma@bVg}(dltv*1V|Y{qviu(TF`f&;z_q)A8%Tz?v=9r(YAMblP%cOe zhEOm5RCTW*Y{bQ?=7hQ@a*kEkfSNUHwIWubS$sN@%(rXy?t&w$On_usVHBp#!s7h7 z>kd4p2_{~%--7B+Df~+!tjVto8I4V9uUCOJ1ndsdA0*bP+SCfEBEnp(YHDS&^EL{{ zVINkZlOcjG(=D)rWCO%qvu4#+Bnz7L0t|a;_WH}OhiX*M7W`HOHc5zN!x{neK^YW^p)0w)GHC{8bvdk*7QSR~%F(@U){om_l6(tW+oliG#Lez?;4o9$Ad6e1Tt#kuEkw5dv_8Xmhxcp zxY~yUG||at&@?z+X6W|evJn&l5Q9$U#i*+t|GJkuB3jG5!b;nW=N2h0kbIozFs`(Z?Wgo;FD~4Ull`GOcyPsRh zLm}|-{N50E)7b`ZQ+z-t3iS*Kkw5R`oN1Hwrp@4?NJAe*6&#J;->ySJkYXo3N9D z<)Co=5OT5jeiDxfBw5s{AU%BCi;-GIhByRdnFfkS7Z30)*6zul2 z$PDRgoUe{wjyM{OS)=l?k_pM|X4R)X$dS9GJpw{0R<2d|=zdj% zs%hh;Lyh<5wKwhjD$v_an;H?QCZt1L75?AFS%Xs)n5079#E%W`PJV%H{*CX zx%bHyCqCH%cG8t>d3_%wTIupz_PSmri^DpN%x~`i)8$xoMnn{$u4$Fu*!{|gJp~Y; zdPIk#fU6oH+H@X?GS_Nr7Ve>dGPR%(1TP6F$kGply9>b};|)a|yy}KU91=!6w;61q zy@0dX0ZDzF&*sTux_Qac1o?YIO-Jc4%xQ_%EQAD$%_}e{#-#72|Cb>4*!^~&h-ngv zJZ^O`nnpW69qD+HXK$Xfgk-c>frOK`8gkf2k*A?(qh3+r&Q2j6pnNK)V?Mp7Q)CjZ zldF9vm-F78Tuf69Gd@ByqPe>V>dpk5zoN!Qr~Yr|8&|EhHdU*&bvCF@>()Ym=_s-7 z305t&$!Q@IHmG`}V#6UN!jA+}9C!jsDU5j&r35X84|^eaM}viKiPe~1i5N#Bd~K(d zQeycmE>`v46a)V!Y({T+QC!;ut4oyLYbjsBoIjl~{AXzbZMFd6}25}h&}B>&l5e+M{@O*jRr zf`g^Ss%gN9b~aXzm1&ym!jtR;C2Xq$gng831w%Nv zr|Kj(c8--`Mgl(>C_6c^GEegSA65XA`YNsK--x6A0GUrQO4}kCyVR z1h1OL#j1YEo{DbQ4Sw!+Z$Q4Kg;$ZKpc?qRepKUjczHLyxVjr*OO$a+6vN3qE%8d| zEM0B01ZP{b-%)>?*UdIv0`o}bE4T7$d|J#=r)!{xYoH|AdiJ`7H>RTuY=3%nbrmJ6 z1m`ce^I6UKCHFo^XrY*qp;lnsGzT@-gy90k`LX#W?}Lc@sLyJ{;=E{DDE=Va>;Y9G!JuB`SEZYys(=a68lI_nQ;&Pxne5CN9MnzF< zK{loVHAc@72z|7(KhG zzYs}Flg0PZWWm?I#bkB0b0-MPo2A4R%VRKNb)-%UR_9IDt?EMo^w=ev9wu9gRq#Q4 zbiZ2FC4NAYGg)f9$pXS{d}E2(?9{Q2vM1CU_&ArcOa(R1jaTS=jD-TrsjGKyjs*f+ zT&(H|i_o3r*}A#Xj=_~&kaBVbp4U&IqrcvM8cxCGF-tTOFa}f`FBg5AFXjMh=(Ig0 zi^aD^^6*fYYK2lPlSSw793>Lms>mm{AcZ$z%7NZd*XRM}=mXr(j2)er&dX1X#Ko%N zA~=3DL09oZDsJq-$HjrvvbpGS_ne||Y4hMk*g>TZh1J16koX(Mr2O06G#2|ejg!?n zOR>1oQje-R56DEZvPknwtBhl_(R&Z+QgXba5N4#MJaK>#C;sDNm46}}f7b>47Uho? zuP9bq#VH71DaA_p4rYy?3P*%3hUBmSvCyKs3x#8H*ER&m1_|yK%h82Nn41gP7 zoMios<%2H&+upVH#&KKWXaAKH6Yx&X<7VT?Mr1>F)=1w3!Byg-swCJkTJ+cVoWny7 zXLdL{!$wtxWaMRD;VdOE0MjMoPq(BXds& zw=$M36?=_1I(AuvT&~|bqXrkuKoGMU(B{bu0d0`4v4&#z&f#O?EjGNvzl+hlRT+l+ zhrfNEE1+}eVUyhg4{ypSkn_QTV()j0F+#9ujp*`ddcJK~UUb=GDrcI=smHCOI7PhL zh(lX|6&qNhF{s#oY0p+7*4yB^h(5&tL!dTCd7!g=DXmmwmE@hJGKSk z?^j9=QJhL%jAbq=PHfnHTN;rAO;IDNtPx#=-HOE#ze>3(=!(8sUUA7QKBfv0X9Zm= zTJiAl5hbhyU3nUX|40u>L(dRy1tq>P6+$hHx<9jpQMsapk%4Iqj^2N-oKQPj|3f(4 z&5|1Cq}N>@O`Z|&7oq9a<-4O!Fw24iQ1RD(gUc#gU&tTp5z=3tKHJ(P3?Fo7jfPQ6Z z^nBYjmdK%)HIOEr&Cpzm@|^)N^CioAX>UMf7m&`}{EPW0>b~}U!r=giYYQ?hEU@bG z==cGX%UWYoaqk$Eg9M%1n>3rU5nG=FxBcb!M!R8eyiI!(0q1&d3wxuST?I;yIY4c@ zJUX^Xm02vYXxx;GyL%L|(jL$A7!Yevv^KT!%j(}r@M|9wq@J)lm z0vh<`Jv{BmA~;2d0O=WP9!4vGfwe%Q5LoRc?5lZ&f&B(7*{s4SXHh`=fq>Upz&60( zFM1v10NG*kbr1xwC@~e!=>Nj4MvOxUcH8CAvC6=%#HT~fCfO?BDQOBNA%JVdmwkaz zF|5K#Ij_8@#*O z)g~Spkf;Wj$buK3e5P-tP$HzK0|wfhHyXQcf`#?3rlmgIKHmKCfCmP{5fvAMjyzU{ zW>7dAfh(QRI^$-M$V-@Tf?AdHXqsUSi!6jxzO;;n;uoN_qHqP4mb1PQ(eD$%dnfUvCB_ z^MO}V$BH^@*H_dLAB&32X|LcUT14@Qm}7!K9F!EX$n&|E-_?LgCPy?l<08Qnj#iMc zoLb3wd+@WDHC8E>I-gXMs12FqhnM@e*Ggfth{JYJc!OjBPqf~GH|=fTPV}LjO;iR# zE0gl{?Nqk`{O>Sh&L8=G+Ugjg!%7of7?hlVPn++ z?2tz0ue3w-kUPV@!dLLsZLA+ieXXZJUs z9@2QUmBTN7za7P7?;q~3|8e{A2JYtH|HJJfxncr2x!H8>;hp@Hf`y#e4T0 z_WD$Va&n)+KkV{orY>180uCX^fdt!i6T=>;E)ejt;x?pd6s|N#b_T)T&sW%>M?VSL zOp8ddhlHu;K${V=v{82O;RGYGgEik%P}Z6RyV!?y;Ux@p2GiwHYA4igG11w=YK|gR z!bwT;G5{M)qp7>!ODVYN0n--P^Z*9L786a|pb9W)X8uB8W&)G8e?c4W6qvNNeYDDG zG_64P%Pee-VA6wRLud}a{y$yHFf3*8c_}eP-$|r)hxa-x69vO78=}V>M@Ps8{zRQ? z!(xtQOUoI*e=aY8f0K9ttfnlcw_wRU4KIMxD$@%39DNf6(G4$1OvFF;ez!8jgT}nc z?uD6YV~|*q(oPCcNvWa3;cFV1CsH~b#vGcc-Y;Lil1f@e%9YHMW}XZw7oJj=Ojq0z zjzLI)Pqv5u9;o!^Qfck$b@dZZ8f+Y9LZ#6RSln_=&{onkkgbLdnx;^&JA*{KCaCEz zzZYH>dlAxJNTQ?#BP-=Jt=Y)A31++I;53eoy4?I?JZ>v)v8nI}rYRKo(N3GT5UBS- zWYA*b*GACdaAwBet0LpC7{?xqeG^r9Yw{U8cQohQNoSVy1Kecs$`6-5zjhF7`Wi+agHJ z+rzW-JhTj+9%ZTkLPzI}ace&QK5l3m`DP${ zmX5FWOo=`}Q`)6xib$4}&b^(6`(IjSsULe`1Xn1G$IVg+%%1*5hn3uoz$fkE<_tB g$MExKrGHZL)N}er{F(mwn(3ea0!i$rfjpN002~7OLjV8( literal 0 HcmV?d00001 From 0f835389388381413aa65148ddde8cec33c337b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 09:41:05 +0200 Subject: [PATCH 03/11] storage-core: configure normalization and use logger instead of printStackTrace, #TASK-5902, #TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java --- .../extensions/cosmic/CosmicExtensionTaskCallback.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java index c343e690dde..054ffaf4915 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback; import org.opencb.biodata.models.sequence.SequenceLocation; import org.opencb.biodata.models.variant.Variant; @@ -31,7 +32,10 @@ public class CosmicExtensionTaskCallback implements CosmicParserCallback { public CosmicExtensionTaskCallback(RocksDB rdb) { this.rdb = rdb; - this.variantNormalizer = new VariantNormalizer(); + this.variantNormalizer = new VariantNormalizer(new VariantNormalizer.VariantNormalizerConfig() + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(false)); this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper(); } @@ -51,7 +55,7 @@ public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List Date: Wed, 5 Jun 2024 09:44:35 +0200 Subject: [PATCH 04/11] storage-core: add options to VariantStorageOptions and update the COSMIC annotator extension, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java --- .../core/variant/VariantStorageOptions.java | 1 + .../VariantAnnotatorExtensionsFactory.java | 15 ++-- .../CosmicVariantAnnotatorExtensionTask.java | 73 ++++++------------- 3 files changed, 32 insertions(+), 57 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index 6488e35bf8a..42d4851cf2a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -83,6 +83,7 @@ public enum VariantStorageOptions implements ConfigurationOption { ANNOTATOR_EXTENSION_PREFIX("annotator.extension."), ANNOTATOR_EXTENSION_LIST("annotator.extension.list"), ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"), + ANNOTATOR_EXTENSION_COSMIC_VERSION("annotator.extension.cosmic.version"), INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java index 57626a34b43..f20dadda289 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java @@ -2,6 +2,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.lang.reflect.InvocationTargetException; import java.util.LinkedList; @@ -15,9 +16,9 @@ public List getVariantAnnotatorExtensions(ObjectM for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) { VariantAnnotatorExtensionTask task = null; switch (extensionId) { -// case CosmicVariantAnnotatorExtensionTask.ID: -// task = new CosmicVariantAnnotatorExtensionTask(options); -// break; + case CosmicVariantAnnotatorExtensionTask.ID: + task = new CosmicVariantAnnotatorExtensionTask(options); + break; default: String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId); if (extensionClass != null) { @@ -41,10 +42,10 @@ private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String classN Class clazz = Class.forName(className); return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options); } catch (ClassNotFoundException - | NoSuchMethodException - | InstantiationException - | IllegalAccessException - | InvocationTargetException e) { + | NoSuchMethodException + | InstantiationException + | IllegalAccessException + | InvocationTargetException e) { throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e); } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 60dcaeaccf1..49616124772 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -2,14 +2,15 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.formats.variant.cosmic.CosmicParser; -import org.opencb.biodata.models.common.DataVersion; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; import org.rocksdb.Options; import org.rocksdb.RocksDB; @@ -17,7 +18,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; import java.io.IOException; import java.net.URI; import java.nio.file.Files; @@ -28,9 +28,12 @@ public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExtensionTask { + public static final String ID = "cosmic"; + private ObjectMap options; - private Path cosmicFolder; + private String cosmicVersion; + private String assembly; private ObjectReader objectReader; @@ -50,46 +53,28 @@ public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { @Override public List setup(URI output) throws Exception { - // Check input path - cosmicFolder = Paths.get(output.getPath()); - if (cosmicFolder == null || !Files.exists(cosmicFolder)) { - throw new IllegalArgumentException("Path " + output + " does not exist"); + // Sanity check + Path cosmicFile = Paths.get(options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key())); + if (!Files.exists(cosmicFile)) { + throw new IllegalArgumentException("COSMIC file " + cosmicFile + " does not exist"); + } + cosmicVersion = (String) options.getOrDefault(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), null); + if (StringUtils.isEmpty(cosmicVersion)) { + throw new IllegalArgumentException("Missing COSMIC version"); } - if (!cosmicFolder.toFile().isDirectory()) { - throw new IllegalArgumentException("Path " + output + " must be a directory with two files: the raw COSMIC file and the" - + " metadata file 'cosmicVersion.json'"); + assembly = (String) options.getOrDefault(VariantStorageOptions.ASSEMBLY.key(), null); + if (StringUtils.isEmpty(assembly)) { + throw new IllegalArgumentException("Missing assembly"); } // Clean and init RocksDB - dbLocation = cosmicFolder.toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); + dbLocation = Paths.get(output.getPath()).toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); if (Files.exists(dbLocation)) { // Skipping setup but init RocksDB logger.info("Skipping setup, it was already done"); initRockDB(false); } else { logger.info("Setup and populate RocksDB"); - File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); - if (!versionFile.exists()) { - throw new IllegalArgumentException("Path " + output + " does not contain the COSMIC metadata file: " - + COSMIC_VERSION_FILENAME); - } - DataVersion dataVersion; - try { - dataVersion = JacksonUtils.getDefaultObjectMapper().readValue(versionFile, DataVersion.class); - } catch (IOException e) { - throw new IllegalArgumentException("Error parsing the metadata file " + versionFile.getAbsolutePath(), e); - } - String cosmicFilename; - try { - cosmicFilename = dataVersion.getFiles().get(0); - } catch (Exception e) { - throw new IllegalArgumentException("Error getting the COSMIC file from the metadata file " - + versionFile.getAbsolutePath(), e); - } - File cosmicFile = cosmicFolder.resolve(cosmicFilename).toFile(); - if (!cosmicFile.exists()) { - throw new IllegalArgumentException("COSMIC file " + cosmicFile.getAbsolutePath() + " does not exist"); - } // Init RocksDB initRockDB(true); @@ -97,8 +82,7 @@ public List setup(URI output) throws Exception { // Call COSMIC parser try { CosmicExtensionTaskCallback callback = new CosmicExtensionTaskCallback(rdb); - CosmicParser.parse(cosmicFile.toPath(), dataVersion.getVersion(), dataVersion.getName(), dataVersion.getAssembly(), - callback); + CosmicParser.parse(cosmicFile, cosmicVersion, ID, assembly, callback); } catch (IOException e) { throw new ToolException(e); } @@ -108,16 +92,11 @@ public List setup(URI output) throws Exception { @Override public void checkAvailable() throws IllegalArgumentException { - if (!isAvailable()) { + if (dbLocation == null || !Files.exists(dbLocation)) { throw new IllegalArgumentException("COSMIC annotator extension is not available"); } } - @Override - public boolean isAvailable() { - return (dbLocation != null && Files.exists(dbLocation)); - } - @Override public ObjectMap getOptions() { return options; @@ -125,15 +104,9 @@ public ObjectMap getOptions() { @Override public ObjectMap getMetadata() { - File versionFile = cosmicFolder.resolve(COSMIC_VERSION_FILENAME).toFile(); - if (!versionFile.exists()) { - throw new IllegalArgumentException("Metadata file " + versionFile + " does not exist"); - } - try { - return JacksonUtils.getDefaultObjectMapper().readValue(versionFile, ObjectMap.class); - } catch (IOException e) { - throw new IllegalArgumentException(e); - } + return new ObjectMap("data", ID) + .append("version", cosmicVersion) + .append("assembly", assembly); } @Override From 79ac8874d293f1f168c04e45055c5c3832c6472e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 09:46:34 +0200 Subject: [PATCH 05/11] test: update COSMIC annotator extension JUnit tests, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java --- ...smicVariantAnnotatorExtensionTaskTest.java | 62 ++++++++++++++----- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index aced5f62950..f1fbaaf411f 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -11,6 +11,7 @@ import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.io.IOException; @@ -23,26 +24,40 @@ public class CosmicVariantAnnotatorExtensionTaskTest { + private final String ASSEMBLY ="GRCh38"; private final String COSMIC_VERSION = "v95"; @Test public void testSetupCosmicVariantAnnotatorExtensionTask() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + ObjectMap options = new ObjectMap(); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); Assert.assertEquals(false, task.isAvailable()); - // Setup COSMIC directory - Path cosmicPath = initCosmicPath(); - // Set-up COSMIC variant annotator extension task, once - task.setup(cosmicPath.toUri()); + task.setup(outPath.toUri()); // Set-up COSMIC variant annotator extension task, twice - task.setup(cosmicPath.toUri()); + task.setup(outPath.toUri()); ObjectMap metadata = task.getMetadata(); Assert.assertEquals(COSMIC_VERSION, metadata.get("version")); + Assert.assertEquals(CosmicVariantAnnotatorExtensionTask.ID, metadata.get("data")); + Assert.assertEquals(ASSEMBLY, metadata.get("assembly")); Assert.assertEquals(true, task.isAvailable()); } @@ -55,17 +70,29 @@ public void testSCosmicVariantAnnotatorExtensionTask() { } @Test - public void testAnnotationCosmicVariantAnnotatorExtensionTask() throws Exception { + public void testAnnotationCosmicVariantAnnotatorExtensionTaskUsingFactory() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + ObjectMap options = new ObjectMap(); - CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID); - Assert.assertEquals(false, task.isAvailable()); + CosmicVariantAnnotatorExtensionTask task = (CosmicVariantAnnotatorExtensionTask) new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(options).get(0); - // Setup COSMIC directory - Path cosmicPath = initCosmicPath(); + Assert.assertEquals(false, task.isAvailable()); // Set-up COSMIC variant annotator extension task, once - task.setup(cosmicPath.toUri()); + task.setup(outPath.toUri()); List inputVariantAnnotations = new ArrayList<>(); VariantAnnotation variantAnnotation1 = new VariantAnnotation(); @@ -105,13 +132,14 @@ private Path initCosmicPath() throws IOException { throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); } Path cosmicFile = Paths.get(getClass().getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); - DataVersion cosmicDataVersion = new DataVersion("variant", "cosmic", COSMIC_VERSION, "20231212", - "hsapiens", "GRCh38", Collections.singletonList(cosmicFile.getFileName().toString()), - Collections.singletonList("http://cosmic.org"), null); - JacksonUtils.getDefaultObjectMapper().writeValue(cosmicPath.resolve(CosmicVariantAnnotatorExtensionTask.COSMIC_VERSION_FILENAME).toFile(), cosmicDataVersion); - Files.copy(cosmicFile, cosmicPath.resolve(cosmicDataVersion.getFiles().get(0))); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } - return cosmicPath; + return targetPath; } private Path getTempPath() { From 41c155ca5034cbf2bb8f72689436f83442239073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 12:40:58 +0200 Subject: [PATCH 06/11] storage-core: check valid variants in COSMIC annotator extension, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java --- .../cosmic/CosmicVariantAnnotatorExtensionTask.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 49616124772..49a2a5f871e 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -112,8 +112,14 @@ public ObjectMap getMetadata() { @Override public List apply(List list) throws Exception { for (VariantAnnotation variantAnnotation : list) { - Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), - variantAnnotation.getAlternate()); + Variant variant; + try { + variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), + variantAnnotation.getAlternate()); + } catch (Exception e) { + logger.warn("Skipping variant: " + e.getMessage()); + continue; + } byte[] key = variant.toString().getBytes(); byte[] dbContent = rdb.get(key); if (dbContent != null) { From ca7458e56e75911c811e0f717dbfbbeaf061358a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 5 Jun 2024 12:42:52 +0200 Subject: [PATCH 07/11] test: add VariantAnnotationManager JUnit tests to check the COSMIC annotator extension, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java new file: opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz --- .../core/variant/VariantStorageBaseTest.java | 16 ++++ .../VariantAnnotationManagerTest.java | 71 ++++++++++++++++++ ...smicVariantAnnotatorExtensionTaskTest.java | 6 +- ...riant-test-file-annotator-extension.vcf.gz | Bin 0 -> 6726 bytes 4 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java index fe5313bdc80..33229775f45 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java @@ -97,6 +97,15 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari "22:16616084:G:A" ))); + public static final Set COSMIC_VARIANTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + "1:169607124:G:T", + "1:169611640:T:G", + "1:169617058:A:T", + "1:169617158:C:A", + "12:124372173:T:A", + "12:124336867:G:A" + ))); + public static final String VCF_TEST_FILE_NAME = "10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"; protected static URI inputUri; @@ -106,6 +115,9 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari public static final String VCF_CORRUPTED_FILE_NAME = "variant-test-file-corrupted.vcf"; protected static URI corruptedInputUri; + public static final String ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME = "variant-test-file-annotator-extension.vcf.gz"; + protected static URI annotatorExtensionInputUri; + protected static URI outputUri; protected VariantStorageEngine variantStorageEngine; protected VariantStorageMetadataManager metadataManager; @@ -134,16 +146,20 @@ public static void _beforeClass() throws Exception { Path inputPath = rootDir.resolve(VCF_TEST_FILE_NAME); Path smallInputPath = rootDir.resolve(SMALL_VCF_TEST_FILE_NAME); Path corruptedInputPath = rootDir.resolve(VCF_CORRUPTED_FILE_NAME); + Path annotatorExtensionInputPath = rootDir.resolve(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_TEST_FILE_NAME), inputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(SMALL_VCF_TEST_FILE_NAME), smallInputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_CORRUPTED_FILE_NAME), corruptedInputPath, StandardCopyOption.REPLACE_EXISTING); + Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME), + annotatorExtensionInputPath, StandardCopyOption.REPLACE_EXISTING); inputUri = inputPath.toUri(); smallInputUri = smallInputPath.toUri(); corruptedInputUri = corruptedInputPath.toUri(); + annotatorExtensionInputUri = annotatorExtensionInputPath.toUri(); outputUri = rootDir.toUri(); // logger.info("count: " + count.getAndIncrement()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index dc2721882fa..0092e2889da 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -4,7 +4,9 @@ import org.apache.commons.lang.StringUtils; import org.junit.Assume; import org.junit.Test; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -17,10 +19,13 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantMatchers; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotatorFactory; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.CosmicVariantAnnotatorExtensionTaskTest; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.io.File; import java.net.URI; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; @@ -237,6 +242,72 @@ public void testMultiAnnotations() throws Exception { } + @Test + public void testCosmicAnnotatorExtensionWithCosmicAnnotation() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, COSMIC_VARIANTS.size()); + } + + @Test + public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, 0); + } + + public void checkCosmicVariants(DataResult annotationDataResult, int expected) { + int cosmicCount = 0; + for (VariantAnnotation va : annotationDataResult.getResults()) { + String variantId = va.getChromosome() + ":" + va.getStart() + ":" + va.getReference() + ":" + va.getAlternate(); + if (COSMIC_VARIANTS.contains(variantId)) { + if (va.getTraitAssociation() != null) { + for (EvidenceEntry entry : va.getTraitAssociation()) { + if (CosmicVariantAnnotatorExtensionTask.ID.equals(entry.getSource().getName())) { + cosmicCount++; + break; + } + } + } + } + } + assertEquals(expected, cosmicCount); + } + public void testQueries(VariantStorageEngine variantStorageEngine) throws StorageEngineException { long count = variantStorageEngine.count(new Query()).first(); long partialCount = 0; diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index f1fbaaf411f..5070e4922d7 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -126,12 +126,12 @@ public void testAnnotationCosmicVariantAnnotatorExtensionTaskUsingFactory() thro Assert.assertTrue(CollectionUtils.isEmpty(outputVariantAnnotations.get(1).getTraitAssociation())); } - private Path initCosmicPath() throws IOException { + public static Path initCosmicPath() throws IOException { Path cosmicPath = getTempPath(); if (!cosmicPath.toFile().mkdirs()) { throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); } - Path cosmicFile = Paths.get(getClass().getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); Files.copy(cosmicFile, targetPath); @@ -142,7 +142,7 @@ private Path initCosmicPath() throws IOException { return targetPath; } - private Path getTempPath() { + public static Path getTempPath() { return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); } } \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..fa19d5b2e4db3ff662592f94257e36b0a27b8ede GIT binary patch literal 6726 zcmV-M8oA{kiwFoFJz!=419o9@X<=@3Ep%mbbS-9SY-KHBZfeJ_6KEJC|e){>3FW&l;*KeQx_>aH+$GhL2!_&9_`}cRx z-+cb<{jdLZ{-M|Je*O4a2d^gou%G<8PX6x0$6r2A&;9Dd`@g>X@czF)e*E*N%kR8; za{*%FJ2vO#zx@2uABR8v^k(?})$sGT-wiK*csu;-zrFZjsEaUs|KoQ*4fVsrk1wFa z5b8hU{-03)ndJ!#Adz!`;oYZyVw6Iq3@?Y@zxwuvKMY}~Gi?ioZ-0FC-2PzEKlu9X zvsZ7Pz5Zbc{}-Nxok)20P1r?vs(%N3`c2uzIF5zjW1-ks(lOv&f#yW;v0TSWvaujf zzezhp0~;mekcQtr0ZWWjvP?Z z@M3t~fz4K6Wo*a{B-yZW;fSGo*-m#=|1a)xc`!E7;}>j8nJ`IU(Pe9(+e0{h<(7r( zn5Ez%=wb;gxEPDxlhR(elhpw~)-gW)hIi@gr{IprkG1{@I9`Xvc@=YoNA)@&XU-`= zRDjF^lI-%nebCNcEl}623QkZ$41OiJKuk%oz3AB4+t9%-<^96m1TRq%06TdGGiNHT zD?7UtK%>+uRce8w;AC6K7ncTH6mqnyBc=7tCpchI5l4zIl>{*)ZcCKL5r=1Z`cn@3 z*m*HIV-8NU`LYvfo;vHnis2G-UMbtob3^Q3e&jWLhFZq7qV6 zXg6GmgS?3O1On`j9M>Z^{=GTx^0zwtiG&VNg>ZI&NP)1#ZY|bx<{Fwp&m1OG`q8n) zn2X(qSz&htfOjxnuohZ@}QCfSu?ZKmrC4=uP;mvbr3# zLgjUjMfn>xM9}L{X2jIbT0hBDaCJEN=&M}Pzh zP_0OeSZsrtRMeH+!gsY0x;!ISn;IL1DqoLJ?c$FLO@btT){p$cY9O?ljBOZN&r zNG4V|y2+y?fF#~^@E)pV7rZ zsnt`W@2zSxRZDC2b3h9%9K{vN~=C<=8(xCcX zZ>hM#;s{cdICZk6lOeqq`DkO&rBFHWk>wLt5=E&BP3dOg1<&=h-p=!qzh2jAwgj_o zvg0?$Q%6``VL)iEx^0hLV{1zeYW;&aV?wlS!=a&EnP_miReq(j*GCw$ey%Kvja9DIV zqdRga6vjZ^X7$SC*oX-z4!sqIE+q~j_FApIMo~i&D>{sn&>285v%m93tnMvwY|1u? za`mnU$O{hG)fcY4tqkUZ7}W%arp34|bpkbLMUPx|4_(TEo5s$TaWDhNsv)u)Iz*;m zZ_}^aTn9;NTC*QHG?b;it1G7vEtoi&ViET+=}j#qNwoOujohT#%kj+4j&eG7owRYx zhLO>Q(fFp@6M9&w;|A9zlOr_o;#}(B)Ub; zC-8FS6rB~LYK+1$TSVe!ADPEw+#7L`&{QcT%Khti@<~HjB$;uYoq3|lX68F{C*(9& zE$D%&XUvySXfj-OIq!7LvF-EUDnjKZ;67{+L@K=rJ72)+G3P(q?HF?>C6L?*@8Hvd zIJpkheu&q*2xD^RL_*fpH=poen=4LLiV9}6(Kd*km}_s&qCM&j#U{+DurFWCoP(lU zU2$~j6bC03VfTZl*H(p0Gxv1M1PTQgnMp88O$E@U!&5KFoiDTVf zsFg3R!plW%+Jt*XAQCzm!RnD%8B|@6xW2G$rRK8r9l1V7fD8pn;J`K4DTdO_!`d>T zSQG?iRSY!{7NVMD3qp9iP>NXMWY#_P2(jvjkmbHW)ah02bH6viB~bl^BLNl5X1gH- zK+6e|lVfx9j5x$_8b!e|isBF%vDB<0H6IBV;$+*!C={F-V#p=e3I*k&+{~Ql4f6LK zy*MaW7x8rB6ivUjsOOfMY;j-O|7tP%%Ob#+Py)69b4AY>JgDiW)c8z{P|id%|-8sE);3 zQp8Sen#*If&p@ohWEt5D0ld{OM|1I#rR_R6RTp{` zfp3g2+VARzIfk)>qxvhxg05ymNcUS2dL*#H4IkBzd+NquP#9wcDaqL8sKOUh-c)~*28^txMI|dmLMjVs3s+Vlz{NdQ*+*wLnR&dSB-9!Dd^>G7H2Wn-nOBY zSzch^qO!f1oV0LOhId@-^UIqD45&g6s%4871ZsL4w~L16L3w+~H5C;iBx%-FYzs}dk)dYY zXI7cqqy+c;eA;n2q;mE|rIanWb0=2o2creKrS0lj`Mx$+4xp4cW*aGyXFYtk<)obC z>sy1q)IbZ>a!d)E8TV(Yxgf`Etb#Y1 zj#tyBF4Aa&SA2!WoNZmb407kz))ZzV!;|DQ&X1kVyG#YHrscSTrG)GuN7b^-(k3Wd z##B`H3ab=>?5Qcby{WgDcl9hm#TG*(3Ds$V8Mt{bdR_lLBtWq#D_N8c139Y0j`cp6Nroz)%oL`I5)o}s#3dQYqC zNU;2#kd;8WX`9A^fo84yb`d>?N5wL>r6%JSuWfiFhMB`FSw)K%AM1gWVChkP!P?dR z$p()&sO9OM{9UqFSg%qJ)1`Jk_dWX4vv|6wG5KS)7fR!VPgkM)%pD#a@bBvc=iC8ccHcBHj% z3XPF#$V~ZDWa}8XWjcO)-7Ia^I-$5zHb#M1T203`)vY+Fy+cHCbhxWkac|jHoG;q~ zo-@gC-;`pyY-{|c;Zy1axuoFhhinZYn<14c8Lz%`N5qW zV%Z`W?qn2DqxP}5@(i41fY$zF7!q(HVQgnfe^z|bB!yL zknh5GJ*_4FMu0`P)kCAfQqC9T-@{xN_u-QPiyCUt6CF*lNxZKx zpNBge`AVbd4vlY11AI*4~_N4Sdgs}j;mBV$;=gZ9RkdSTi-Nk9?v%D;*`6?-Wb zSicuTr&rX_seUNc1Y)m3{}QpsPH-h4#^QMv2`o1rHqO>>gR_uXfg0{6Z|#0xx@DS7 z*bmOf%fvejVQi#2?@>N}(-2y#2t9CKouq;|V{8^DZ7S`e3Y3(+RRoxV4XM|!GG)vh zeO6tY8;5L#Ka#^R_D!dUKg4j%hMwr}cKcPY=ma8pP@YW`KfFW9b+n={qtU^sgi}~{ zX)&o84o%BvCH9G@&d4S_wG#=c_M1%$)N{MGI29T4n3JaqhtjN?TH7_T2+kV1c{O)Z z#;K?8#xuwJAk)OI!Y;{KLHUjIPSfIPN7(@uQB77u$)yMjx>5V~?TdjB7jA7iDJ9~I zL~M6D?0?qz(8RP!XNF?bMNC74+Coh&AbTm|zF0vB^Ra?fYM0E`92~!C;7E#!WbZhO zAf97k$1jDruz1;aqyD~e--sc0kT*m@Uw!?Yb~1wq#9( zljqVZ7r^d^Q`V=QicK#=I`mDk=8=!3OX&zMhke1mYKeuF+b~Zc4LMMO=y%}!n=LqBhlj?ehrmQZ8c9VlmFKQkLRB+_+*)OvYo9TC(rlsZ~ zTT26|(H+apUr6U!dEr{pHxI?gdVP&VP1kJC8$77(hd3L?F?iQ!|Hu)Z*fWP%BV!19 zZ*w!1Y<0Izd-0-5IAHIBlavcv2cg>t$;Eqn7jN27+S?LpSCsR4S43;}n-w5Zs;$YC zY{n$LyW8srZLXdwhEScI`fsxEy4LDWu{OA*i$$qMj+s+!BXmz}yIE{f=HfTUr)68; zbbtx$EjJkZ90A-H`D{Hp*ATilO{?&7;mlsekeID%b@l!gol)a5NC10j!(^Uy6S}!U z367lzs+^)Tw;Uc`uq8|$7DFi@-HA|N0vlL*LdxzFaBPtVpcoRY1g>cbmq*F;)Uym4}_D_%a@V@hh(8LPZ zm@V13HgI7C^mjJys-cDXr0g=Q;@c^E`)^i+qNs8i=}F=*P}$QN6mxJ{l~^9Z$$jr@ z-wLkFTCCvgjm=s;lc?LT^JTzgNa7`UQ9#XEXuq7kVIbSVDJ!lu0!B>A;>)vFwkO=% zu=AE1#V--pxk<}j1;7m*0}z>Oy|#n`3GLTOCpdPHTlW<-t!N9nI={h4$FU%pGMB4*nIc44JFPn{WCofQVnG%n|D8+hLqV_joJFfVcWE|NY-(v(5rY$X zNuYY}(ZoO%T#ks0^>Jpa?YS=#g=9H+Jyn=G`!e(Kla8(k5RNaaMNHwor|55F#Jx*r z9(VApPt3NlxR3~Sr54KghQ_(SFylUWK^NIDSM!?HTZS;l5@bwHzNdRv5;_RCC59wCY%TEiJPUa5cZ zmE7_g*AhC3f;DNILeiz{e`ICt`W{1P&E3XVR@C4q?YyWdn>pPB?|NEe^^^dp1n=7JGow6t_nQ8w<-z6?`=BV+dVvpAqaYZGEsvq-`d&oE(@r~J*C(PgGJEX zfQ)LsEaa(d7iCdw43Vh%MU_vL&|5vTIuBKB5g?|Vt8axV#0C4Jvj(JRT7{Q0px@YH z>b7ZOY?;Pj*suf=sYbVol1%;Rdi;?MS*IEm#ETc|vp$F^FNnI^kJedB@E$#Z))Eix z857R|wU@)2O6<&OH}(UXScGHowMWq|9&e~maLB~N6PMpKd_)P-*y}x$HOyV*THNd} zYwxXEJgR2X*i1ELXu4Cj>`-Rlti4h64!OCcg%peJ%0LyFIqsB^c#wcIsD{1O)58Z| zIk~O%#n}lE3-kP-M%W`n3uCCw?JuhsiI1UQ8n5hRG;r3o3O#xj5v@PzV(F3E1odXt zZus?8S}5aKn(;Co<|!BP|63cu6SuzU2ure-A8uG!x7W!km0?n;s)24Ubi zc}vO>i&)Nc$QS1SFNs<>jy(lJRJ+Pzu2x+p%i&K{eAGBp4k|P=^b7(XvWTZ%}ioJ>&ftys#0GS~%WG%Zpq4YT>&?%-DHUJf--SmJJL4u-$pVgvp>|oS$|MHeN3^*msXCg90b3To zT2w+Uc(ywS%GqU Date: Thu, 6 Jun 2024 11:40:15 +0200 Subject: [PATCH 08/11] test: add JUnit tests to check COSMIC file format, missing COSMIC file, version and assembly; and assembly match, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java --- .../CosmicVariantAnnotatorExtensionTask.java | 5 +- .../VariantAnnotationManagerTest.java | 119 ++++++++++++++++++ ...smicVariantAnnotatorExtensionTaskTest.java | 16 +++ 3 files changed, 137 insertions(+), 3 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 49a2a5f871e..33246dc9658 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -8,6 +8,7 @@ import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.utils.FileUtils; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; @@ -55,9 +56,7 @@ public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { public List setup(URI output) throws Exception { // Sanity check Path cosmicFile = Paths.get(options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key())); - if (!Files.exists(cosmicFile)) { - throw new IllegalArgumentException("COSMIC file " + cosmicFile + " does not exist"); - } + FileUtils.checkFile(cosmicFile); cosmicVersion = (String) options.getOrDefault(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), null); if (StringUtils.isEmpty(cosmicVersion)) { throw new IllegalArgumentException("Missing COSMIC version"); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index 0092e2889da..0497d3bc96b 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -290,6 +290,125 @@ public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Excepti checkCosmicVariants(annotationDataResult, 0); } + @Test + public void testCosmicAnnotatorExtensionMissingInvalidCosmicFile() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initInvalidCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicFile() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95"); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicVersion() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMismatchAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh37") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + public void checkCosmicVariants(DataResult annotationDataResult, int expected) { int cosmicCount = 0; for (VariantAnnotation va : annotationDataResult.getResults()) { diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index 5070e4922d7..04a9f4b2e36 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -142,6 +142,22 @@ public static Path initCosmicPath() throws IOException { return targetPath; } + public static Path initInvalidCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/myannot.vcf").getPath()); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } + + return targetPath; + } + public static Path getTempPath() { return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); } From a628d20bcaae7150baabc1c77e7d9245c416f046 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 11 Jun 2024 16:50:25 +0200 Subject: [PATCH 09/11] test: rename JUnit test, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java --- .../core/variant/annotation/VariantAnnotationManagerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index 0497d3bc96b..68461f5d9c2 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -291,7 +291,7 @@ public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Excepti } @Test - public void testCosmicAnnotatorExtensionMissingInvalidCosmicFile() throws Exception { + public void testCosmicAnnotatorExtensionInvalidCosmicFile() throws Exception { // Setup COSMIC directory Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initInvalidCosmicPath(); System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); From 7f47c71f47b6d666c50f95d1e417a7932884fe37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 11 Jun 2024 16:52:23 +0200 Subject: [PATCH 10/11] test: set JUnit test as @Category(ShortTests.class), #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java --- .../extensions/CosmicVariantAnnotatorExtensionTaskTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java index 04a9f4b2e36..6f1a1f2825c 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -5,12 +5,14 @@ import org.apache.commons.lang3.RandomStringUtils; import org.junit.Assert; import org.junit.Test; +import org.junit.experimental.categories.Category; import org.opencb.biodata.models.common.DataVersion; import org.opencb.biodata.models.variant.avro.VariantAnnotation; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.opencga.core.common.JacksonUtils; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.core.testclassification.duration.ShortTests; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; @@ -22,6 +24,7 @@ import java.util.Collections; import java.util.List; +@Category(ShortTests.class) public class CosmicVariantAnnotatorExtensionTaskTest { private final String ASSEMBLY ="GRCh38"; From 1dcb395bdac7fc803057814acc5d0a3356dd2828 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 11 Jun 2024 16:54:04 +0200 Subject: [PATCH 11/11] storage-core: fix sonnar issues, #TASK-5902, #TASK-5318 On branch TASK-5318 Changes to be committed: modified: opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java --- .../CosmicVariantAnnotatorExtensionTask.java | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java index 33246dc9658..421fb5fa87a 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -43,7 +43,6 @@ public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExte private Path dbLocation = null; public static final String COSMIC_ANNOTATOR_INDEX_NAME = "cosmicAnnotatorIndex"; - public static final String COSMIC_VERSION_FILENAME = "cosmicVersion.json"; private static Logger logger = LoggerFactory.getLogger(CosmicVariantAnnotatorExtensionTask.class); @@ -111,14 +110,8 @@ public ObjectMap getMetadata() { @Override public List apply(List list) throws Exception { for (VariantAnnotation variantAnnotation : list) { - Variant variant; - try { - variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getReference(), - variantAnnotation.getAlternate()); - } catch (Exception e) { - logger.warn("Skipping variant: " + e.getMessage()); - continue; - } + Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getEnd(), + variantAnnotation.getReference(), variantAnnotation.getAlternate()); byte[] key = variant.toString().getBytes(); byte[] dbContent = rdb.get(key); if (dbContent != null) { @@ -165,8 +158,7 @@ private void initRockDB(boolean forceCreate) throws ToolException { rdb = RocksDB.openReadOnly(dbOption, dbLocation.toAbsolutePath().toString()); } } catch (RocksDBException e) { - // Do some error handling - throw new ToolException("", e); + throw new ToolException("Error initializing RocksDB", e); } } }