diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java index 847ae860d70..73abcbae61f 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageOptions.java @@ -81,6 +81,10 @@ public enum VariantStorageOptions implements ConfigurationOption { ANNOTATOR_CELLBASE_VARIANT_LENGTH_THRESHOLD("annotator.cellbase.variantLengthThreshold", Integer.MAX_VALUE), ANNOTATOR_CELLBASE_IMPRECISE_VARIANTS("annotator.cellbase.impreciseVariants", true), ANNOTATOR_CELLBASE_STAR_ALTERNATE("annotator.cellbase.starAlternate", false), + ANNOTATOR_EXTENSION_PREFIX("annotator.extension."), + ANNOTATOR_EXTENSION_LIST("annotator.extension.list"), + ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"), + ANNOTATOR_EXTENSION_COSMIC_VERSION("annotator.extension.cosmic.version"), INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine. diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java index 78229d47ef6..f8e7f4c4b90 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/DefaultVariantAnnotationManager.java @@ -52,6 +52,8 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionsFactory; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; import org.opencb.opencga.storage.core.variant.io.db.VariantAnnotationDBWriter; import org.opencb.opencga.storage.core.variant.io.db.VariantDBReader; @@ -265,6 +267,13 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap return variantAnnotationList; }; + List extensions = new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(params); + for (VariantAnnotatorExtensionTask extension : extensions) { + extension.setup(outDir); + extension.checkAvailable(); + annotationTask = annotationTask.then(extension); + } + final DataWriter variantAnnotationDataWriter; if (avro) { //FIXME @@ -286,7 +295,7 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap ParallelTaskRunner parallelTaskRunner = new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config); parallelTaskRunner.run(); - } catch (ExecutionException e) { + } catch (Exception e) { throw new VariantAnnotatorException("Error creating annotations", e); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..03255409123 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionTask.java @@ -0,0 +1,59 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.run.Task; + +import java.net.URI; +import java.util.List; + +public interface VariantAnnotatorExtensionTask extends Task { + + /** + * Set up the annotator extension. + * This method will be called before any other method. It might generate extra files or data needed for the annotation. + * + * @param output Output directory where the annotator extension should write the files + * @return List of URIs of generated files (if any) + * @throws Exception if the annotator extension set up fails + */ + List setup(URI output) throws Exception; + + /** + * Check if the annotator extension is available for the given options. + * @throws IllegalArgumentException if the annotator extension is not available + */ + void checkAvailable() throws IllegalArgumentException; + + /** + * Check if the annotator extension is available for the given options. Do not throw any exception if the extension is not available. + * @return true if the annotator extension is available + */ + default boolean isAvailable() { + try { + checkAvailable(); + return true; + } catch (IllegalArgumentException e) { + return false; + } + } + + @Override + default void pre() throws Exception { + Task.super.pre(); + checkAvailable(); + } + + /** + * Get the options for the annotator extension. + * @return Options for the annotator extension + */ + ObjectMap getOptions(); + + /** + * Get the metadata for the annotator extension. + * @return Metadata for the annotator extension + */ + ObjectMap getMetadata(); + +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java new file mode 100644 index 00000000000..f20dadda289 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/VariantAnnotatorExtensionsFactory.java @@ -0,0 +1,54 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; + +import java.lang.reflect.InvocationTargetException; +import java.util.LinkedList; +import java.util.List; + +public class VariantAnnotatorExtensionsFactory { + + public List getVariantAnnotatorExtensions(ObjectMap options) { + + List tasks = new LinkedList<>(); + for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) { + VariantAnnotatorExtensionTask task = null; + switch (extensionId) { + case CosmicVariantAnnotatorExtensionTask.ID: + task = new CosmicVariantAnnotatorExtensionTask(options); + break; + default: + String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId); + if (extensionClass != null) { + task = getVariantAnnotatorExtension(extensionClass, options); + } else { + throw new IllegalArgumentException("Unknown annotator extension '" + extensionId + "'"); + } + } + + if (task == null) { + throw new IllegalArgumentException("Unable to create annotator extension '" + extensionId + "'"); + } + + tasks.add(task); + } + return tasks; + } + + private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String className, ObjectMap options) { + try { + Class clazz = Class.forName(className); + return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options); + } catch (ClassNotFoundException + | NoSuchMethodException + | InstantiationException + | IllegalAccessException + | InvocationTargetException e) { + throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e); + } + } + + +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java new file mode 100644 index 00000000000..054ffaf4915 --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicExtensionTaskCallback.java @@ -0,0 +1,86 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback; +import org.opencb.biodata.models.sequence.SequenceLocation; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; +import org.opencb.biodata.tools.variant.VariantNormalizer; +import org.opencb.opencga.core.common.JacksonUtils; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class CosmicExtensionTaskCallback implements CosmicParserCallback { + + private RocksDB rdb; + private VariantNormalizer variantNormalizer; + private ObjectMapper defaultObjectMapper; + + private static Logger logger = LoggerFactory.getLogger(CosmicExtensionTaskCallback.class); + + private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|()|()|()|()|()"; + + public CosmicExtensionTaskCallback(RocksDB rdb) { + this.rdb = rdb; + this.variantNormalizer = new VariantNormalizer(new VariantNormalizer.VariantNormalizerConfig() + .setReuseVariants(true) + .setNormalizeAlleles(true) + .setDecomposeMNVs(false)); + this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper(); + } + + @Override + public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List evidenceEntries) { + // Add evidence entries in the RocksDB + // More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed + List normalisedVariantStringList; + try { + normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(), + sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate()); + if (CollectionUtils.isNotEmpty(normalisedVariantStringList)) { + for (String normalisedVariantString : normalisedVariantStringList) { + rdb.put(normalisedVariantString.getBytes(), defaultObjectMapper.writeValueAsBytes(evidenceEntries)); + } + return true; + } + return false; + } catch (NonStandardCompliantSampleField | RocksDBException | JsonProcessingException e) { + logger.warn(StringUtils.join(e.getStackTrace(), "\n")); + return false; + } + } + + protected List getNormalisedVariantString(String chromosome, int start, String reference, String alternate) + throws NonStandardCompliantSampleField { + Variant variant = new Variant(chromosome, start, reference, alternate); + return getNormalisedVariantString(variant); + } + + protected List getNormalisedVariantString(Variant variant) throws NonStandardCompliantSampleField { + // Checks no weird characters are part of the reference & alternate alleles + if (isValid(variant)) { + List normalizedVariantList = variantNormalizer.normalize(Collections.singletonList(variant), true); + return normalizedVariantList.stream().map(Variant::toString).collect(Collectors.toList()); + } else { + logger.warn("Variant {} is not valid: skipping it!", variant); + } + + return Collections.emptyList(); + } + + protected boolean isValid(Variant variant) { + return (variant.getReference().matches(VARIANT_STRING_PATTERN) + && (variant.getAlternate().matches(VARIANT_STRING_PATTERN) + && !variant.getAlternate().equals(variant.getReference()))); + } +} diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java new file mode 100644 index 00000000000..421fb5fa87a --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/cosmic/CosmicVariantAnnotatorExtensionTask.java @@ -0,0 +1,164 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.formats.variant.cosmic.CosmicParser; +import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.utils.FileUtils; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.List; + +public class CosmicVariantAnnotatorExtensionTask implements VariantAnnotatorExtensionTask { + + public static final String ID = "cosmic"; + + private ObjectMap options; + + private String cosmicVersion; + private String assembly; + + private ObjectReader objectReader; + + private RocksDB rdb = null; + private Options dbOption = null; + private Path dbLocation = null; + + public static final String COSMIC_ANNOTATOR_INDEX_NAME = "cosmicAnnotatorIndex"; + + private static Logger logger = LoggerFactory.getLogger(CosmicVariantAnnotatorExtensionTask.class); + + public CosmicVariantAnnotatorExtensionTask(ObjectMap options) { + this.options = options; + this.objectReader = JacksonUtils.getDefaultObjectMapper().readerFor(new TypeReference>() {}); + } + + @Override + public List setup(URI output) throws Exception { + // Sanity check + Path cosmicFile = Paths.get(options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key())); + FileUtils.checkFile(cosmicFile); + cosmicVersion = (String) options.getOrDefault(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), null); + if (StringUtils.isEmpty(cosmicVersion)) { + throw new IllegalArgumentException("Missing COSMIC version"); + } + assembly = (String) options.getOrDefault(VariantStorageOptions.ASSEMBLY.key(), null); + if (StringUtils.isEmpty(assembly)) { + throw new IllegalArgumentException("Missing assembly"); + } + + // Clean and init RocksDB + dbLocation = Paths.get(output.getPath()).toAbsolutePath().resolve(COSMIC_ANNOTATOR_INDEX_NAME); + if (Files.exists(dbLocation)) { + // Skipping setup but init RocksDB + logger.info("Skipping setup, it was already done"); + initRockDB(false); + } else { + logger.info("Setup and populate RocksDB"); + + // Init RocksDB + initRockDB(true); + + // Call COSMIC parser + try { + CosmicExtensionTaskCallback callback = new CosmicExtensionTaskCallback(rdb); + CosmicParser.parse(cosmicFile, cosmicVersion, ID, assembly, callback); + } catch (IOException e) { + throw new ToolException(e); + } + } + return Collections.singletonList(dbLocation.toUri()); + } + + @Override + public void checkAvailable() throws IllegalArgumentException { + if (dbLocation == null || !Files.exists(dbLocation)) { + throw new IllegalArgumentException("COSMIC annotator extension is not available"); + } + } + + @Override + public ObjectMap getOptions() { + return options; + } + + @Override + public ObjectMap getMetadata() { + return new ObjectMap("data", ID) + .append("version", cosmicVersion) + .append("assembly", assembly); + } + + @Override + public List apply(List list) throws Exception { + for (VariantAnnotation variantAnnotation : list) { + Variant variant = new Variant(variantAnnotation.getChromosome(), variantAnnotation.getStart(), variantAnnotation.getEnd(), + variantAnnotation.getReference(), variantAnnotation.getAlternate()); + byte[] key = variant.toString().getBytes(); + byte[] dbContent = rdb.get(key); + if (dbContent != null) { + List evidenceEntryList = objectReader.readValue(dbContent); + if (variantAnnotation.getTraitAssociation() == null) { + variantAnnotation.setTraitAssociation(evidenceEntryList); + } else { + variantAnnotation.getTraitAssociation().addAll(evidenceEntryList); + } + } + } + return list; + } + + @Override + public void post() throws Exception { + closeRocksDB(); + } + + + private void closeRocksDB() { + if (rdb != null) { + rdb.close(); + } + if (dbOption != null) { + dbOption.dispose(); + } + } + + private void initRockDB(boolean forceCreate) throws ToolException { + boolean indexingNeeded = forceCreate || !Files.exists(dbLocation); + // a static method that loads the RocksDB C++ library. + RocksDB.loadLibrary(); + // the Options class contains a set of configurable DB options + // that determines the behavior of a database. + dbOption = new Options().setCreateIfMissing(true); + + rdb = null; + try { + // a factory method that returns a RocksDB instance + if (indexingNeeded) { + rdb = RocksDB.open(dbOption, dbLocation.toAbsolutePath().toString()); + } else { + rdb = RocksDB.openReadOnly(dbOption, dbLocation.toAbsolutePath().toString()); + } + } catch (RocksDBException e) { + throw new ToolException("Error initializing RocksDB", e); + } + } +} diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java index 23ece394c79..eef6d8a6edf 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/VariantStorageBaseTest.java @@ -97,6 +97,15 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari "22:16616084:G:A" ))); + public static final Set COSMIC_VARIANTS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + "1:169607124:G:T", + "1:169611640:T:G", + "1:169617058:A:T", + "1:169617158:C:A", + "12:124372173:T:A", + "12:124336867:G:A" + ))); + public static final String VCF_TEST_FILE_NAME = "10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz"; protected static URI inputUri; @@ -106,6 +115,9 @@ public abstract class VariantStorageBaseTest extends GenericTest implements Vari public static final String VCF_CORRUPTED_FILE_NAME = "variant-test-file-corrupted.vcf"; protected static URI corruptedInputUri; + public static final String ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME = "variant-test-file-annotator-extension.vcf.gz"; + protected static URI annotatorExtensionInputUri; + protected static URI outputUri; protected VariantStorageEngine variantStorageEngine; protected VariantStorageMetadataManager metadataManager; @@ -134,16 +146,20 @@ public static void _beforeClass() throws Exception { Path inputPath = rootDir.resolve(VCF_TEST_FILE_NAME); Path smallInputPath = rootDir.resolve(SMALL_VCF_TEST_FILE_NAME); Path corruptedInputPath = rootDir.resolve(VCF_CORRUPTED_FILE_NAME); + Path annotatorExtensionInputPath = rootDir.resolve(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_TEST_FILE_NAME), inputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(SMALL_VCF_TEST_FILE_NAME), smallInputPath, StandardCopyOption.REPLACE_EXISTING); Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(VCF_CORRUPTED_FILE_NAME), corruptedInputPath, StandardCopyOption.REPLACE_EXISTING); + Files.copy(VariantStorageEngineTest.class.getClassLoader().getResourceAsStream(ANNOTATOR_EXTENSION_VCF_TEST_FILE_NAME), + annotatorExtensionInputPath, StandardCopyOption.REPLACE_EXISTING); inputUri = inputPath.toUri(); smallInputUri = smallInputPath.toUri(); corruptedInputUri = corruptedInputPath.toUri(); + annotatorExtensionInputUri = annotatorExtensionInputPath.toUri(); outputUri = rootDir.toUri(); // logger.info("count: " + count.getAndIncrement()); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java index 00544a3d715..ba33f63dcff 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/VariantAnnotationManagerTest.java @@ -5,7 +5,9 @@ import org.junit.Assume; import org.junit.Test; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -18,10 +20,13 @@ import org.opencb.opencga.storage.core.variant.adaptors.VariantMatchers; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotatorFactory; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.CosmicVariantAnnotatorExtensionTaskTest; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; import java.io.File; import java.net.URI; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; @@ -238,6 +243,191 @@ public void testMultiAnnotations() throws Exception { } + @Test + public void testCosmicAnnotatorExtensionWithCosmicAnnotation() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, COSMIC_VARIANTS.size()); + } + + @Test + public void testCosmicAnnotatorExtensionWithoutCosmicAnnotation() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + variantStorageEngine.saveAnnotation("v1", new ObjectMap()); + + // Check that cosmic variants are annotated + DataResult annotationDataResult = variantStorageEngine.getAnnotation("v1", new Query(), new QueryOptions()); + checkCosmicVariants(annotationDataResult, 0); + } + + @Test + public void testCosmicAnnotatorExtensionInvalidCosmicFile() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initInvalidCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicFile() throws Exception { + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95"); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingCosmicVersion() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh38") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMissingAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + @Test + public void testCosmicAnnotatorExtensionMismatchAssembly() throws Exception { + // Setup COSMIC directory + Path cosmicFile = CosmicVariantAnnotatorExtensionTaskTest.initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + VariantStorageEngine variantStorageEngine = getVariantStorageEngine(); + runDefaultETL(annotatorExtensionInputUri, variantStorageEngine, newStudyMetadata(), + new ObjectMap(VariantStorageOptions.ANNOTATE.key(), false)); + + variantStorageEngine.getOptions() + .append(VariantStorageOptions.ANNOTATOR_CLASS.key(), DummyTestAnnotator.class.getName()) + .append(VariantStorageOptions.ANNOTATOR.key(), VariantAnnotatorFactory.AnnotationEngine.OTHER) + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID) + .append(VariantStorageOptions.ASSEMBLY.key(), "GRCh37") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), "v95") + .append(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + + URI annotOutdir = outputUri.resolve("annot1"); + Files.createDirectories(Paths.get(annotOutdir)); + + thrown.expect(VariantAnnotatorException.class); + variantStorageEngine.annotate(annotOutdir, new ObjectMap(DummyTestAnnotator.ANNOT_KEY, "v1").append(VariantStorageOptions.ANNOTATION_OVERWEITE.key(), true)); + } + + public void checkCosmicVariants(DataResult annotationDataResult, int expected) { + int cosmicCount = 0; + for (VariantAnnotation va : annotationDataResult.getResults()) { + String variantId = va.getChromosome() + ":" + va.getStart() + ":" + va.getReference() + ":" + va.getAlternate(); + if (COSMIC_VARIANTS.contains(variantId)) { + if (va.getTraitAssociation() != null) { + for (EvidenceEntry entry : va.getTraitAssociation()) { + if (CosmicVariantAnnotatorExtensionTask.ID.equals(entry.getSource().getName())) { + cosmicCount++; + break; + } + } + } + } + } + assertEquals(expected, cosmicCount); + } + public void testQueries(VariantStorageEngine variantStorageEngine) throws StorageEngineException { long count = variantStorageEngine.count(new Query()).first(); long partialCount = 0; diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java new file mode 100644 index 00000000000..6f1a1f2825c --- /dev/null +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/annotation/annotators/extensions/CosmicVariantAnnotatorExtensionTaskTest.java @@ -0,0 +1,167 @@ +package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions; + + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.opencb.biodata.models.common.DataVersion; +import org.opencb.biodata.models.variant.avro.VariantAnnotation; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.common.JacksonUtils; +import org.opencb.opencga.core.common.TimeUtils; +import org.opencb.opencga.core.exceptions.ToolException; +import org.opencb.opencga.core.testclassification.duration.ShortTests; +import org.opencb.opencga.storage.core.variant.VariantStorageOptions; +import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +@Category(ShortTests.class) +public class CosmicVariantAnnotatorExtensionTaskTest { + + private final String ASSEMBLY ="GRCh38"; + private final String COSMIC_VERSION = "v95"; + + @Test + public void testSetupCosmicVariantAnnotatorExtensionTask() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + ObjectMap options = new ObjectMap(); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + + Assert.assertEquals(false, task.isAvailable()); + + // Set-up COSMIC variant annotator extension task, once + task.setup(outPath.toUri()); + + // Set-up COSMIC variant annotator extension task, twice + task.setup(outPath.toUri()); + + ObjectMap metadata = task.getMetadata(); + Assert.assertEquals(COSMIC_VERSION, metadata.get("version")); + Assert.assertEquals(CosmicVariantAnnotatorExtensionTask.ID, metadata.get("data")); + Assert.assertEquals(ASSEMBLY, metadata.get("assembly")); + + Assert.assertEquals(true, task.isAvailable()); + } + + @Test + public void testSCosmicVariantAnnotatorExtensionTask() { + ObjectMap options = new ObjectMap(); + CosmicVariantAnnotatorExtensionTask task = new CosmicVariantAnnotatorExtensionTask(options); + Assert.assertEquals(false, task.isAvailable()); + } + + @Test + public void testAnnotationCosmicVariantAnnotatorExtensionTaskUsingFactory() throws Exception { + Path outPath = getTempPath(); + if (!outPath.toFile().mkdirs()) { + throw new IOException("Error creating the output path: " + outPath.toAbsolutePath()); + } + System.out.println("outPath = " + outPath.toAbsolutePath()); + + // Setup COSMIC directory + Path cosmicFile = initCosmicPath(); + System.out.println("cosmicFile = " + cosmicFile.toAbsolutePath()); + + ObjectMap options = new ObjectMap(); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_FILE.key(), cosmicFile); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_COSMIC_VERSION.key(), COSMIC_VERSION); + options.put(VariantStorageOptions.ASSEMBLY.key(), ASSEMBLY); + options.put(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key(), CosmicVariantAnnotatorExtensionTask.ID); + + CosmicVariantAnnotatorExtensionTask task = (CosmicVariantAnnotatorExtensionTask) new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(options).get(0); + + Assert.assertEquals(false, task.isAvailable()); + + // Set-up COSMIC variant annotator extension task, once + task.setup(outPath.toUri()); + + List inputVariantAnnotations = new ArrayList<>(); + VariantAnnotation variantAnnotation1 = new VariantAnnotation(); + variantAnnotation1.setChromosome("12"); + variantAnnotation1.setStart(124402657); + variantAnnotation1.setEnd(124402657); + variantAnnotation1.setReference("G"); + variantAnnotation1.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation1); + VariantAnnotation variantAnnotation2 = new VariantAnnotation(); + variantAnnotation2.setChromosome("22"); + variantAnnotation2.setStart(124402657); + variantAnnotation2.setEnd(124402657); + variantAnnotation2.setReference("G"); + variantAnnotation2.setAlternate("T"); + inputVariantAnnotations.add(variantAnnotation2); + + List outputVariantAnnotations = task.apply(inputVariantAnnotations); + task.post(); + + Assert.assertEquals(inputVariantAnnotations.size(), outputVariantAnnotations.size()); + + // Checking variantAnnotation1 + Assert.assertEquals(1, outputVariantAnnotations.get(0).getTraitAssociation().size()); + Assert.assertEquals("COSV62300079", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getId()); + Assert.assertEquals("liver", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getPrimarySite()); + Assert.assertEquals("hepatocellular carcinoma", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getSomaticInformation().getHistologySubtype()); + Assert.assertEquals("PMID:323", outputVariantAnnotations.get(0).getTraitAssociation().get(0).getBibliography().get(0)); + + // Checking variantAnnotation2 + Assert.assertTrue(CollectionUtils.isEmpty(outputVariantAnnotations.get(1).getTraitAssociation())); + } + + public static Path initCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/cosmic.small.tsv.gz").getPath()); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } + + return targetPath; + } + + public static Path initInvalidCosmicPath() throws IOException { + Path cosmicPath = getTempPath(); + if (!cosmicPath.toFile().mkdirs()) { + throw new IOException("Error creating the COSMIC path: " + cosmicPath.toAbsolutePath()); + } + Path cosmicFile = Paths.get(CosmicVariantAnnotatorExtensionTaskTest.class.getResource("/custom_annotation/myannot.vcf").getPath()); + Path targetPath = cosmicPath.resolve(cosmicFile.getFileName()); + Files.copy(cosmicFile, targetPath); + + if (!Files.exists(targetPath)) { + throw new IOException("Error copying COSMIC file to " + targetPath); + } + + return targetPath; + } + + public static Path getTempPath() { + return Paths.get("target/test-data").resolve(TimeUtils.getTimeMillis() + "_" + RandomStringUtils.random(8, true, false)); + } +} \ No newline at end of file diff --git a/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz b/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz new file mode 100644 index 00000000000..153129875fe Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/custom_annotation/cosmic.small.tsv.gz differ diff --git a/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz new file mode 100644 index 00000000000..fa19d5b2e4d Binary files /dev/null and b/opencga-storage/opencga-storage-core/src/test/resources/variant-test-file-annotator-extension.vcf.gz differ