Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-4794 - Missing "FILTER" field in VCF generated (export step) while running Exomiser #2318

Merged
merged 7 commits into from
Jul 28, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import org.opencb.biodata.models.clinical.Phenotype;
import org.opencb.biodata.models.clinical.pedigree.Member;
import org.opencb.biodata.models.clinical.pedigree.Pedigree;
import org.opencb.biodata.models.core.SexOntologyTermAnnotation;
import org.opencb.biodata.models.pedigree.IndividualProperty;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.exec.Command;
Expand All @@ -21,12 +20,9 @@
import org.opencb.opencga.core.exceptions.ToolExecutorException;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -36,7 +32,6 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;

@ToolExecutor(id = ExomiserWrapperAnalysisExecutor.ID,
tool = ExomiserWrapperAnalysis.ID,
Expand Down Expand Up @@ -133,7 +128,8 @@ public void run() throws ToolException {
.sample(sampleId)
.includeSample(samples)
.includeSampleData("GT")
.unknownGenotype("./.");
.unknownGenotype("./.")
.append("includeAllFromSampleIndex", true);

QueryOptions queryOptions = new QueryOptions(QueryOptions.INCLUDE, "id,studies.samples");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,15 @@ public List<IndexFieldConfiguration> getCustomFields() {
return customFields;
}

public IndexFieldConfiguration getCustomField(IndexFieldConfiguration.Source source, String key) {
for (IndexFieldConfiguration s : customFields) {
if (s.getKey().equals(key) && s.getSource() == source) {
return s;
}
}
return null;
}

public int getFilePositionBits() {
return filePositionBits;
}
Expand Down Expand Up @@ -705,4 +714,13 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(fileIndexConfiguration, annotationIndexConfiguration);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SampleIndexConfiguration{");
sb.append("fileIndexConfiguration=").append(fileIndexConfiguration);
sb.append(", annotationIndexConfiguration=").append(annotationIndexConfiguration);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
##fileformat=VCFv4.1
##FILTER=<ID=PASS,Description="All filters passed">
##FILTER=<ID=noPass,Description="No pass">
##FILTER=<ID=noPass2,Description="No pass other">
##FILTER=<ID=.,Description="unknown filter state">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype likelihoods">
##FORMAT=<ID=DS,Number=1,Type=Float,Description="">
##command=seq 1000000 500 3000000 | while read i ; do echo -e "chr1\t$i\t.\tA\tC\t$RANDOM\tPASS\t.\tGT\t0/1\t1/1\t1|0\t0|1" ; done
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19600 NA19660 NA19661 NA19685
chr1 1000000 . A C,T 5 noPass,noPass2 . GT 1/2 1/1 0|0 0|1
chr1 1000010 . A AC,CA 20 PASS . GT 1/2 1/1 0|0 0|1
chr1 1000020 . AT T,A 60 . . GT 1/2 1/1 0|0 0|1
chr1 1000030 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000040 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000050 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000060 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000070 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000080 . C G 60 . PASS GT 1/0 1/1 0|0 0|1
chr1 1000090 . C G 60 . PASS GT 1/0 1/1 0|0 0|1

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.MultiValueIndexFieldFilter;
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.SingleValueIndexFieldFilter;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

/**
Expand All @@ -22,7 +19,7 @@
* Value "0" represents NA.
*/
public class CategoricalIndexField<T> extends IndexField<T> implements IndexCodec<T> {
private final int numBits;
private final int bitLength;
private final IndexCodec<T> codec;

public static CategoricalIndexField<String> create(IndexFieldConfiguration configuration, int bitOffset) {
Expand All @@ -43,18 +40,18 @@ public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffse
numValues = values.length;
codec = new BasicCodec<>(values, valuesMapping);
}
this.numBits = Math.max(1, IndexUtils.log2(numValues - 1) + 1);
this.bitLength = Math.max(1, IndexUtils.log2(numValues - 1) + 1);
}

public CategoricalIndexField(IndexFieldConfiguration configuration, int bitOffset, int numValues, IndexCodec<T> codec) {
super(configuration, bitOffset);
this.numBits = IndexUtils.log2(numValues - 1) + 1;
this.bitLength = IndexUtils.log2(numValues - 1) + 1;
this.codec = codec;
}

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

@Override
Expand Down Expand Up @@ -124,6 +121,16 @@ public T decode(int code) {
public boolean ambiguous(int code) {
return ambiguousValues[code];
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("BasicCodec{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", valuesMappingRev=").append(valuesMappingRev);
sb.append(", ambiguousValues=").append(Arrays.toString(ambiguousValues));
sb.append('}');
return sb.toString();
}
}

private static class BasicCodecWithNa<T> implements IndexCodec<T> {
Expand Down Expand Up @@ -178,6 +185,20 @@ public T decode(int code) {
public boolean ambiguous(int code) {
return ambiguousValues[code];
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("BasicCodecWithNa{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", valuesMappingRev=").append(valuesMappingRev);
sb.append(", ambiguousValues=").append(Arrays.toString(ambiguousValues));
sb.append('}');
return sb.toString();
}
}

protected IndexCodec<T> getCodec() {
return codec;
}

@Override
Expand All @@ -195,4 +216,14 @@ public boolean ambiguous(int code) {
return codec.ambiguous(code);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CategoricalIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append(", codec=").append(codec);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
public class CategoricalMultiValuedIndexField<T> extends CategoricalIndexField<List<T>> {

private final int numBits;
private final int bitLength;

public static CategoricalMultiValuedIndexField<String> createMultiValued(IndexFieldConfiguration configuration, int bitOffset) {
return new CategoricalMultiValuedIndexField<>(
Expand All @@ -41,7 +41,7 @@ public CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, i

private CategoricalMultiValuedIndexField(IndexFieldConfiguration configuration, int bitOffset, T[] values, MaskValueCodec<T> codec) {
super(configuration, bitOffset, values.length, codec);
numBits = codec.numBits;
bitLength = codec.numBits;
}

@Override
Expand All @@ -59,7 +59,7 @@ protected IndexFieldFilter getSingleValueIndexFilter(OpValue<List<T>> opValue) {

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

/**
Expand Down Expand Up @@ -145,6 +145,28 @@ public List<T> decode(int code) {
public boolean ambiguous(int code) {
return code == NA || (code & ambiguousValues) != 0;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("MaskValueCodec{");
sb.append("values=").append(Arrays.toString(values));
sb.append(", otherValuePosition=").append(otherValuePosition);
sb.append(", valuesPosition=").append(valuesPosition);
sb.append(", numBits=").append(numBits);
sb.append(", ambiguousValues=").append(ambiguousValues);
sb.append('}');
return sb.toString();
}
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("CategoricalMultiValuedIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", codec=").append(getCodec());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,12 @@ public BitBuffer read(BitBuffer buffer, int i) {
return buffer.getBitBuffer(i * indexSizeBits, indexSizeBits);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FixedSizeIndexSchema{");
sb.append("indexSizeBits=").append(indexSizeBits);
sb.append(", fields=").append(fields);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,12 @@ public R decode(int code) {
};
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("IndexField{");
sb.append("configuration=").append(configuration);
sb.append(", bitOffset=").append(bitOffset);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.IndexFieldFilter;
import org.opencb.opencga.storage.hadoop.variant.index.core.filters.RangeIndexFieldFilter;

import java.util.Arrays;
import java.util.List;

/**
Expand All @@ -18,7 +19,7 @@ public class RangeIndexField extends IndexField<Double> {
private final double[] thresholds;
private final double min;
private final double max;
private final int numBits;
private final int bitLength;
private final IndexCodec<Double> codec;
private int numRanges;

Expand All @@ -40,7 +41,7 @@ public RangeIndexField(IndexFieldConfiguration configuration, int bitOffset, dou
} else {
codec = new NonNullableRangeCodec();
}
numBits = Math.max(1, IndexUtils.log2(numRanges - 1) + 1);
bitLength = Math.max(1, IndexUtils.log2(numRanges - 1) + 1);
if (configuration.getType().equals(IndexFieldConfiguration.Type.RANGE_GT)) {
// Add one DELTA to each value to invert ranges from [s, e) to (s, e], therefore the operation ">" is exact
for (int i = 0; i < thresholds.length; i++) {
Expand Down Expand Up @@ -77,7 +78,7 @@ public double getMax() {

@Override
public int getBitLength() {
return numBits;
return bitLength;
}

@Override
Expand Down Expand Up @@ -120,13 +121,20 @@ public int encode(Double value) {

@Override
public Double decode(int code) {
return code == thresholds.length ? max : code < 0 ? min : thresholds[code];
return code <= 0 ? min : thresholds[code - 1];
}

@Override
public boolean ambiguous(int code) {
return true;
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("NonNullableRangeCodec{");
sb.append('}');
return sb.toString();
}
}

public class NullableRangeCodec extends NonNullableRangeCodec {
Expand All @@ -142,6 +150,13 @@ public int encode(Double value) {
public Double decode(int code) {
return code == NA ? null : super.decode(code - 1);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("NullableRangeCodec{");
sb.append('}');
return sb.toString();
}
}

/**
Expand Down Expand Up @@ -174,4 +189,19 @@ public static boolean lessThan(double a, double b) {
public static boolean equalsTo(double a, double b) {
return Math.abs(a - b) < (DELTA / 10);
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("RangeIndexField{");
sb.append("configuration=").append(getConfiguration());
sb.append(", bitOffset=").append(getBitOffset());
sb.append(", bitLength=").append(bitLength);
sb.append(", thresholds=").append(Arrays.toString(thresholds));
sb.append(", min=").append(min);
sb.append(", max=").append(max);
sb.append(", codec=").append(codec);
sb.append(", numRanges=").append(numRanges);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,12 @@ public int readFieldValue(BitBuffer buffer, int i) {
// return getField().read(read(buffer, i));
return buffer.getIntPartial(i * getBitsLength(), getBitsLength());
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("SingleFieldIndexSchema{");
sb.append("field=").append(field);
sb.append('}');
return sb.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,12 @@ public Variant next() {
@Override
public SampleVariantIndexEntry nextSampleVariantIndexEntry() {
AnnotationIndexEntry annotationIndexEntry = nextAnnotationIndexEntry();
BitBuffer fileIndex = null;
List<BitBuffer> filesIndex = new ArrayList<>();
if (hasFileIndex()) {
fileIndex = nextFileIndexEntry();
filesIndex.add(nextFileIndexEntry());
while (isMultiFileIndex()) {
filesIndex.add(nextMultiFileIndexEntry());
}
}
String genotype = nextGenotype();
int meCode = nextMendelianErrorCode();
Expand All @@ -136,7 +139,7 @@ public SampleVariantIndexEntry nextSampleVariantIndexEntry() {
parentsCode = nextParentsIndexEntry();
}
Variant variant = next();
return new SampleVariantIndexEntry(variant, fileIndex, genotype, annotationIndexEntry, parentsCode, meCode);
return new SampleVariantIndexEntry(variant, filesIndex, genotype, annotationIndexEntry, parentsCode, meCode);
}

@Override
Expand Down
Loading
Loading