Skip to content

Commit

Permalink
Merge pull request #2460 from opencb/TASK-5318
Browse files Browse the repository at this point in the history
TASK-5318 - Implement custom annotator to allow clients private files
  • Loading branch information
jtarraga authored Sep 26, 2024
2 parents 02f97f3 + 84d3ebe commit 862bed6
Show file tree
Hide file tree
Showing 11 changed files with 750 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ public enum VariantStorageOptions implements ConfigurationOption {
ANNOTATOR_CELLBASE_VARIANT_LENGTH_THRESHOLD("annotator.cellbase.variantLengthThreshold", Integer.MAX_VALUE),
ANNOTATOR_CELLBASE_IMPRECISE_VARIANTS("annotator.cellbase.impreciseVariants", true),
ANNOTATOR_CELLBASE_STAR_ALTERNATE("annotator.cellbase.starAlternate", false),
ANNOTATOR_EXTENSION_PREFIX("annotator.extension."),
ANNOTATOR_EXTENSION_LIST("annotator.extension.list"),
ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"),
ANNOTATOR_EXTENSION_COSMIC_VERSION("annotator.extension.cosmic.version"),

INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator;
import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask;
import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionsFactory;
import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils;
import org.opencb.opencga.storage.core.variant.io.db.VariantAnnotationDBWriter;
import org.opencb.opencga.storage.core.variant.io.db.VariantDBReader;
Expand Down Expand Up @@ -265,6 +267,13 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap
return variantAnnotationList;
};

List<VariantAnnotatorExtensionTask> extensions = new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(params);
for (VariantAnnotatorExtensionTask extension : extensions) {
extension.setup(outDir);
extension.checkAvailable();
annotationTask = annotationTask.then(extension);
}

final DataWriter<VariantAnnotation> variantAnnotationDataWriter;
if (avro) {
//FIXME
Expand All @@ -286,7 +295,7 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap
ParallelTaskRunner<Variant, VariantAnnotation> parallelTaskRunner =
new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config);
parallelTaskRunner.run();
} catch (ExecutionException e) {
} catch (Exception e) {
throw new VariantAnnotatorException("Error creating annotations", e);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions;

import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.run.Task;

import java.net.URI;
import java.util.List;

public interface VariantAnnotatorExtensionTask extends Task<VariantAnnotation, VariantAnnotation> {

/**
* Set up the annotator extension.
* This method will be called before any other method. It might generate extra files or data needed for the annotation.
*
* @param output Output directory where the annotator extension should write the files
* @return List of URIs of generated files (if any)
* @throws Exception if the annotator extension set up fails
*/
List<URI> setup(URI output) throws Exception;

/**
* Check if the annotator extension is available for the given options.
* @throws IllegalArgumentException if the annotator extension is not available
*/
void checkAvailable() throws IllegalArgumentException;

/**
* Check if the annotator extension is available for the given options. Do not throw any exception if the extension is not available.
* @return true if the annotator extension is available
*/
default boolean isAvailable() {
try {
checkAvailable();
return true;
} catch (IllegalArgumentException e) {
return false;
}
}

@Override
default void pre() throws Exception {
Task.super.pre();
checkAvailable();
}

/**
* Get the options for the annotator extension.
* @return Options for the annotator extension
*/
ObjectMap getOptions();

/**
* Get the metadata for the annotator extension.
* @return Metadata for the annotator extension
*/
ObjectMap getMetadata();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions;

import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.opencga.storage.core.variant.VariantStorageOptions;
import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask;

import java.lang.reflect.InvocationTargetException;
import java.util.LinkedList;
import java.util.List;

public class VariantAnnotatorExtensionsFactory {

public List<VariantAnnotatorExtensionTask> getVariantAnnotatorExtensions(ObjectMap options) {

List<VariantAnnotatorExtensionTask> tasks = new LinkedList<>();
for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) {
VariantAnnotatorExtensionTask task = null;
switch (extensionId) {
case CosmicVariantAnnotatorExtensionTask.ID:
task = new CosmicVariantAnnotatorExtensionTask(options);
break;
default:
String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId);
if (extensionClass != null) {
task = getVariantAnnotatorExtension(extensionClass, options);
} else {
throw new IllegalArgumentException("Unknown annotator extension '" + extensionId + "'");
}
}

if (task == null) {
throw new IllegalArgumentException("Unable to create annotator extension '" + extensionId + "'");
}

tasks.add(task);
}
return tasks;
}

private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String className, ObjectMap options) {
try {
Class<?> clazz = Class.forName(className);
return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options);
} catch (ClassNotFoundException
| NoSuchMethodException
| InstantiationException
| IllegalAccessException
| InvocationTargetException e) {
throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e);
}
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback;
import org.opencb.biodata.models.sequence.SequenceLocation;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.EvidenceEntry;
import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField;
import org.opencb.biodata.tools.variant.VariantNormalizer;
import org.opencb.opencga.core.common.JacksonUtils;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

public class CosmicExtensionTaskCallback implements CosmicParserCallback {

private RocksDB rdb;
private VariantNormalizer variantNormalizer;
private ObjectMapper defaultObjectMapper;

private static Logger logger = LoggerFactory.getLogger(CosmicExtensionTaskCallback.class);

private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|(<CNV[0-9]+>)|(<DUP>)|(<DEL>)|(<INS>)|(<INV>)";

public CosmicExtensionTaskCallback(RocksDB rdb) {
this.rdb = rdb;
this.variantNormalizer = new VariantNormalizer(new VariantNormalizer.VariantNormalizerConfig()
.setReuseVariants(true)
.setNormalizeAlleles(true)
.setDecomposeMNVs(false));
this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper();
}

@Override
public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List<EvidenceEntry> evidenceEntries) {
// Add evidence entries in the RocksDB
// More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed
List<String> normalisedVariantStringList;
try {
normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(),
sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate());
if (CollectionUtils.isNotEmpty(normalisedVariantStringList)) {
for (String normalisedVariantString : normalisedVariantStringList) {
rdb.put(normalisedVariantString.getBytes(), defaultObjectMapper.writeValueAsBytes(evidenceEntries));
}
return true;
}
return false;
} catch (NonStandardCompliantSampleField | RocksDBException | JsonProcessingException e) {
logger.warn(StringUtils.join(e.getStackTrace(), "\n"));
return false;
}
}

protected List<String> getNormalisedVariantString(String chromosome, int start, String reference, String alternate)
throws NonStandardCompliantSampleField {
Variant variant = new Variant(chromosome, start, reference, alternate);
return getNormalisedVariantString(variant);
}

protected List<String> getNormalisedVariantString(Variant variant) throws NonStandardCompliantSampleField {
// Checks no weird characters are part of the reference & alternate alleles
if (isValid(variant)) {
List<Variant> normalizedVariantList = variantNormalizer.normalize(Collections.singletonList(variant), true);
return normalizedVariantList.stream().map(Variant::toString).collect(Collectors.toList());
} else {
logger.warn("Variant {} is not valid: skipping it!", variant);
}

return Collections.emptyList();
}

protected boolean isValid(Variant variant) {
return (variant.getReference().matches(VARIANT_STRING_PATTERN)
&& (variant.getAlternate().matches(VARIANT_STRING_PATTERN)
&& !variant.getAlternate().equals(variant.getReference())));
}
}
Loading

0 comments on commit 862bed6

Please sign in to comment.