From 717beacf09a137012c81a1123dba775e4c7ae511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Mon, 9 Sep 2024 16:28:19 +0100 Subject: [PATCH] storage: Add sampleIndexConfiguration.fileDataConfiguration. #TASK-6765 --- .../storage/SampleIndexConfiguration.java | 204 ++++++------------ .../index/sample/FileDataIndexSchema.java | 18 +- .../sample/HBaseToSampleIndexConverter.java | 7 +- .../index/sample/SampleIndexDBAdaptor.java | 1 + .../sample/SampleIndexEntryPutBuilder.java | 20 +- .../index/sample/SampleIndexSchema.java | 3 +- .../SampleVariantIndexEntryConverter.java | 5 +- .../variant/index/sample/SampleIndexTest.java | 16 +- 8 files changed, 109 insertions(+), 165 deletions(-) diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java index 21178f7fdf4..cb89309369c 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/SampleIndexConfiguration.java @@ -15,147 +15,12 @@ public class SampleIndexConfiguration { public static final int DEFAULT_FILE_POSITION_SIZE_BITS = 3; private static final double[] QUAL_THRESHOLDS = new double[]{10, 20, 30}; - private static final double[] DP_THRESHOLDS = new double[]{5, 10, 15, 20, 30, 40, 50}; private static final double[] DP_THRESHOLDS_NULLABLE = new double[]{5, 10, 15, 20, 30, 50}; + private final FileIndexConfiguration fileIndexConfiguration = new FileIndexConfiguration(); + private final FileDataConfiguration fileDataConfiguration = new FileDataConfiguration(); private final AnnotationIndexConfiguration annotationIndexConfiguration = new AnnotationIndexConfiguration(); - public static SampleIndexConfiguration backwardCompatibleConfiguration() { - double[] backwardCompatibleThresholds = new double[]{0.001, 0.005, 0.01}; - SampleIndexConfiguration sampleIndexConfiguration = new SampleIndexConfiguration() - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.FILE, - StudyEntry.FILTER, - IndexFieldConfiguration.Type.CATEGORICAL, - VCFConstants.PASSES_FILTERS_v4)) - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.FILE, StudyEntry.QUAL, QUAL_THRESHOLDS).setNullable(false)) - .addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.SAMPLE, VCFConstants.DEPTH_KEY, DP_THRESHOLDS).setNullable(false)); - sampleIndexConfiguration.getAnnotationIndexConfiguration().getPopulationFrequency() - .addPopulation(new Population(ParamConstants.POP_FREQ_1000G_CB_V4, "ALL")) - .addPopulation(new Population(ParamConstants.POP_FREQ_GNOMAD_GENOMES, "ALL")) - .setThresholds(backwardCompatibleThresholds); - - sampleIndexConfiguration.getFileIndexConfiguration().setFilePositionBits(4); - - // Ensure backward compatibility with these two params: - sampleIndexConfiguration.addFileIndexField(new IndexFieldConfiguration( - IndexFieldConfiguration.Source.SAMPLE, "padding", IndexFieldConfiguration.Type.CATEGORICAL, - "add_two_extra_bits", "to_allow_backward", "compatibility")); - sampleIndexConfiguration.getFileIndexConfiguration().setFixedFieldsFirst(false); - - IndexFieldConfiguration biotypeConfiguration = new IndexFieldConfiguration(IndexFieldConfiguration.Source.ANNOTATION, - "biotype", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) - .setValues( - NONSENSE_MEDIATED_DECAY, - LINCRNA, - MIRNA, - RETAINED_INTRON, - SNRNA, - SNORNA, - "other_non_pseudo_gene", -// "other", - PROTEIN_CODING - ).setValuesMapping(new HashMap<>()); - biotypeConfiguration.getValuesMapping().put(LINCRNA, Arrays.asList( - "lncRNA", - NON_CODING, - LINCRNA, - "macro_lncRNA", - ANTISENSE, - SENSE_INTRONIC, - SENSE_OVERLAPPING, - THREEPRIME_OVERLAPPING_NCRNA, - "bidirectional_promoter_lncRNA")); - biotypeConfiguration.getValuesMapping().put("other_non_pseudo_gene", Arrays.asList( - PROCESSED_TRANSCRIPT, - NON_STOP_DECAY, - MISC_RNA, - RRNA, - MT_RRNA, - MT_TRNA, - IG_C_GENE, - IG_D_GENE, - IG_J_GENE, - IG_V_GENE, - TR_C_GENE, - TR_D_GENE, - TR_J_GENE, - TR_V_GENE, - NMD_TRANSCRIPT_VARIANT, - TRANSCRIBED_UNPROCESSED_PSEUDGENE, - AMBIGUOUS_ORF, - KNOWN_NCRNA, - RETROTRANSPOSED, - LRG_GENE - )); - biotypeConfiguration.setNullable(false); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setBiotype(biotypeConfiguration); - IndexFieldConfiguration consequenceType = new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, - "consequenceType", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE) - .setValues( - SPLICE_DONOR_VARIANT, - TRANSCRIPT_ABLATION, - TRANSCRIPT_AMPLIFICATION, - INITIATOR_CODON_VARIANT, - SPLICE_REGION_VARIANT, - INCOMPLETE_TERMINAL_CODON_VARIANT, - "utr", - "mirna_tfbs", - MISSENSE_VARIANT, - FRAMESHIFT_VARIANT, - INFRAME_DELETION, - INFRAME_INSERTION, - START_LOST, - STOP_GAINED, - STOP_LOST, - SPLICE_ACCEPTOR_VARIANT - ).setValuesMapping(new HashMap<>()); - consequenceType.getValuesMapping().put("mirna_tfbs", Arrays.asList( - TF_BINDING_SITE_VARIANT, - MATURE_MIRNA_VARIANT)); - consequenceType.getValuesMapping().put("utr", Arrays.asList( - THREE_PRIME_UTR_VARIANT, - FIVE_PRIME_UTR_VARIANT)); - consequenceType.setNullable(false); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setConsequenceType(consequenceType); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setTranscriptFlagIndexConfiguration( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, - "transcriptFlag", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, - "do_not_use" - ).setNullable(false)); - sampleIndexConfiguration.getAnnotationIndexConfiguration().setTranscriptCombination(false); - - sampleIndexConfiguration.getAnnotationIndexConfiguration().setClinicalSource( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, "clinicalSource", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, "cosmic") - .setNullable(false)); - sampleIndexConfiguration.getAnnotationIndexConfiguration().setClinicalSignificance( - new IndexFieldConfiguration( - IndexFieldConfiguration.Source.ANNOTATION, "clinicalSignificance", - IndexFieldConfiguration.Type.CATEGORICAL_MULTI_VALUE, - ClinicalSignificance.likely_benign.toString(), - ClinicalSignificance.uncertain_significance.toString(), - ClinicalSignificance.likely_pathogenic.toString(), - ClinicalSignificance.pathogenic.toString(), - "unused_target_drug", - "unused_pgx", - "unused_bit8" - ).setNullable(false)); - - return sampleIndexConfiguration; - } - public static SampleIndexConfiguration defaultConfiguration() { return defaultConfiguration(false); } @@ -178,6 +43,9 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) sampleIndexConfiguration.getFileIndexConfiguration() .setFilePositionBits(DEFAULT_FILE_POSITION_SIZE_BITS); + sampleIndexConfiguration.getFileDataConfiguration() + .setIncludeOriginalCall(true) + .setIncludeSecondaryAlternates(true); IndexFieldConfiguration biotypeConfiguration = new IndexFieldConfiguration(IndexFieldConfiguration.Source.ANNOTATION, "biotype", @@ -312,10 +180,6 @@ public static SampleIndexConfiguration defaultConfiguration(boolean cellbaseV4) return sampleIndexConfiguration; } - public void validate() { - validate(null); - } - public void validate(String cellbaseVersion) { addMissingValues(defaultConfiguration("v4".equalsIgnoreCase(cellbaseVersion))); @@ -336,6 +200,13 @@ public void addMissingValues(SampleIndexConfiguration defaultConfiguration) { if (fileIndexConfiguration.getCustomFields().isEmpty()) { fileIndexConfiguration.getCustomFields().addAll(defaultConfiguration.fileIndexConfiguration.customFields); } + if (fileDataConfiguration.includeOriginalCall == null) { + fileDataConfiguration.includeOriginalCall = defaultConfiguration.fileDataConfiguration.includeOriginalCall; + } + if (fileDataConfiguration.includeSecondaryAlternates == null) { + fileDataConfiguration.includeSecondaryAlternates = defaultConfiguration.fileDataConfiguration.includeSecondaryAlternates; + } + if (annotationIndexConfiguration.getPopulationFrequency() == null) { annotationIndexConfiguration.setPopulationFrequency(defaultConfiguration.annotationIndexConfiguration.populationFrequency); } @@ -368,6 +239,53 @@ public void addMissingValues(SampleIndexConfiguration defaultConfiguration) { } } + public static class FileDataConfiguration { + private Boolean includeOriginalCall; + private Boolean includeSecondaryAlternates; + + public FileDataConfiguration() { + // By default, left as null. + // The defaultConfiguration will set it to true when constructed. + this.includeOriginalCall = null; + this.includeSecondaryAlternates = null; + } + + public Boolean getIncludeOriginalCall() { + return includeOriginalCall; + } + + public FileDataConfiguration setIncludeOriginalCall(Boolean includeOriginalCall) { + this.includeOriginalCall = includeOriginalCall; + return this; + } + + public boolean isIncludeOriginalCall() { + return includeOriginalCall != null && includeOriginalCall; + } + + public Boolean getIncludeSecondaryAlternates() { + return includeSecondaryAlternates; + } + + public FileDataConfiguration setIncludeSecondaryAlternates(Boolean includeSecondaryAlternates) { + this.includeSecondaryAlternates = includeSecondaryAlternates; + return this; + } + + public boolean isIncludeSecondaryAlternates() { + return includeSecondaryAlternates != null && includeSecondaryAlternates; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("FileDataConfiguration{"); + sb.append("includeOriginalCall=").append(includeOriginalCall); + sb.append(", includeSecondaryAlternates=").append(includeSecondaryAlternates); + sb.append('}'); + return sb.toString(); + } + } + public static class FileIndexConfiguration { private final List customFields = new ArrayList<>(); @@ -687,6 +605,9 @@ public FileIndexConfiguration getFileIndexConfiguration() { return fileIndexConfiguration; } + public FileDataConfiguration getFileDataConfiguration() { + return fileDataConfiguration; + } public SampleIndexConfiguration addFileIndexField(IndexFieldConfiguration fileIndex) { if (fileIndexConfiguration.getCustomFields().contains(fileIndex)) { @@ -719,6 +640,7 @@ public int hashCode() { public String toString() { final StringBuilder sb = new StringBuilder("SampleIndexConfiguration{"); sb.append("fileIndexConfiguration=").append(fileIndexConfiguration); + sb.append("fileDataConfiguration=").append(fileDataConfiguration); sb.append(", annotationIndexConfiguration=").append(annotationIndexConfiguration); sb.append('}'); return sb.toString(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java index 8df0628647c..371657ae3bf 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/FileDataIndexSchema.java @@ -21,11 +21,11 @@ public class FileDataIndexSchema extends DataSchema { private final DataFieldWithContext originalCallField; private final DataFieldWithContext> secondaryAlternatesField; - private boolean includeOriginalCall = true; - private boolean includeSecondaryAlternates = true; + private final SampleIndexConfiguration.FileDataConfiguration fileDataConfiguration; - public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileIndexConfiguration) { - if (includeOriginalCall) { + public FileDataIndexSchema(SampleIndexConfiguration.FileDataConfiguration fileDataConfiguration) { + this.fileDataConfiguration = fileDataConfiguration; + if (fileDataConfiguration.isIncludeOriginalCall()) { originalCallField = new VarBinaryDataField( new IndexFieldConfiguration(IndexFieldConfiguration.Source.FILE, "ORIGINAL_CALL", null)) .fromWithContext(new VariantOriginalCallToBytesConverter()); @@ -33,7 +33,7 @@ public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileI } else { originalCallField = null; } - if (includeSecondaryAlternates) { + if (fileDataConfiguration.isIncludeOriginalCall()) { secondaryAlternatesField = new VarBinaryDataField( new IndexFieldConfiguration(IndexFieldConfiguration.Source.STUDY, "SECONDARY_ALTERNATES", null)) .fromWithContext(new AlternateCoordinateToBytesConverter()); @@ -44,7 +44,7 @@ public FileDataIndexSchema(SampleIndexConfiguration.FileIndexConfiguration fileI } public boolean isIncludeOriginalCall() { - return includeOriginalCall; + return fileDataConfiguration.isIncludeOriginalCall(); } public DataFieldWithContext getOriginalCallField() { @@ -52,17 +52,13 @@ public DataFieldWithContext getOriginalCallField() { } public boolean isIncludeSecondaryAlternates() { - return includeSecondaryAlternates; + return fileDataConfiguration.isIncludeSecondaryAlternates(); } public DataFieldWithContext> getSecondaryAlternatesField() { return secondaryAlternatesField; } - public void writeOriginalCall(Variant variant, OriginalCall call, ByteBuffer bb) { - getOriginalCallField().write(variant, call, bb); - } - public OriginalCall readOriginalCall(ByteBuffer fileDataByteBuffer, Variant variant) { return readFieldAndDecode(fileDataByteBuffer, originalCallField, variant); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java index f6d9fd64638..033105e49a9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/HBaseToSampleIndexConverter.java @@ -165,7 +165,12 @@ public Map> convertToMapSampleVariantIn BitBuffer fileIndexEntry; do { fileIndexEntry = fileIndexSchema.readEntry(fileIndexStream); - ByteBuffer fileDataEntry = fileDataSchema.readNextEntry(fileDataBuffer); + ByteBuffer fileDataEntry; + if (fileDataBuffer == null) { + fileDataEntry = null; + } else { + fileDataEntry = fileDataSchema.readNextEntry(fileDataBuffer); + } values.add(new SampleVariantIndexEntry(variant, fileIndexEntry, fileDataEntry)); } while (this.fileIndexSchema.isMultiFile(fileIndexEntry)); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index 1280bc187d3..8218ca08a6c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -636,6 +636,7 @@ private Scan parse(SingleSampleIndexQuery query, LocusQuery locusQuery, boolean scan.setCaching(hBaseManager.getConf().getInt("hbase.client.scanner.caching", 100)); logger.info("---------"); + logger.info("Study = \"" + query.getStudy() + "\" (id=" + studyId + ")"); logger.info("Sample = \"" + query.getSample() + "\" (id=" + sampleId + ") , schema version = " + query.getSchema().getVersion()); logger.info("Table = " + getSampleIndexTableName(query)); printScan(scan); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java index e62c546120f..20e04c2c401 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexEntryPutBuilder.java @@ -181,10 +181,12 @@ public void build(Put put) { put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeCountColumn(gt), Bytes.toBytes(variants.size())); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toFileIndexColumn(gt), fileIndexBuffer.getBuffer()); int position = fileDataIndexBuffer.position(); - fileDataIndexBuffer.rewind(); - fileDataIndexBuffer.limit(position); - put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), put.getTimestamp(), - fileDataIndexBuffer); + if (position > 0) { + fileDataIndexBuffer.rewind(); + fileDataIndexBuffer.limit(position); + put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), put.getTimestamp(), + fileDataIndexBuffer); + } } } @@ -404,7 +406,9 @@ private void partialBuild(boolean flush) { fileIndexBuffer.setBitBuffer(gtEntry.getFilesIndex().get(0), offset); offset += fileIndexSchema.getBitsLength(); prev = gtEntry; - fileDataIndexSchema.writeEntry(fileDataBuffer, gtEntry.getFileData().get(0)); + if (!gtEntry.getFileData().isEmpty()) { + fileDataIndexSchema.writeEntry(fileDataBuffer, gtEntry.getFileData().get(0)); + } } // Do not write the whole buffer, but only the corresponding to the processed entries. @@ -422,8 +426,10 @@ public void build(Put put) { put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeColumn(gt), variantsBuffer); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toGenotypeCountColumn(gt), Bytes.toBytes(variantsCount)); put.addColumn(COLUMN_FAMILY, SampleIndexSchema.toFileIndexColumn(gt), fileIndexBuffer.toByteArray()); - put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), - put.getTimestamp(), fileDataBuffer.toByteByffer()); + if (fileDataBuffer.size() > 0) { + put.addColumn(COLUMN_FAMILY, ByteBuffer.wrap(SampleIndexSchema.toFileDataColumn(gt)), + put.getTimestamp(), fileDataBuffer.toByteByffer()); + } } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java index bd57ae196fb..2d4d700cf76 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexSchema.java @@ -144,7 +144,7 @@ public SampleIndexSchema(SampleIndexConfiguration configuration, int version) { this.version = version; this.configuration = configuration; fileIndex = new FileIndexSchema(configuration.getFileIndexConfiguration()); - fileData = new FileDataIndexSchema(configuration.getFileIndexConfiguration()); + fileData = new FileDataIndexSchema(configuration.getFileDataConfiguration()); // annotationSummaryIndexSchema = new AnnotationSummaryIndexSchema(); ctIndex = new ConsequenceTypeIndexSchema(configuration.getAnnotationIndexConfiguration().getConsequenceType()); biotypeIndex = new BiotypeIndexSchema(configuration.getAnnotationIndexConfiguration().getBiotype()); @@ -218,6 +218,7 @@ public String toString() { sb.append("version=").append(version); sb.append(", configuration=").append(configuration); sb.append(", fileIndex=").append(fileIndex); + sb.append(", fileData=").append(fileData); sb.append(", popFreqIndex=").append(popFreqIndex); sb.append(", ctIndex=").append(ctIndex); sb.append(", biotypeIndex=").append(biotypeIndex); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java index 59021d89ee8..8916ad975fe 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleVariantIndexEntryConverter.java @@ -140,9 +140,12 @@ private ByteBuffer createFileDataIndexValue(Variant variant, int filePosition, O if (fileDataSchema.isIncludeSecondaryAlternates()) { fileDataSize += fileDataSchema.getSecondaryAlternatesField().getByteLength(variant, secondaryAlternates); } + if (fileDataSize == 0) { + return null; + } ByteBuffer bb = ByteBuffer.allocate(fileDataSize); if (fileDataSchema.isIncludeOriginalCall()) { - fileDataSchema.writeOriginalCall(variant, call, bb); + fileDataSchema.getOriginalCallField().write(variant, call, bb); } if (fileDataSchema.isIncludeSecondaryAlternates()) { fileDataSchema.getSecondaryAlternatesField().write(variant, secondaryAlternates, bb); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index 98062c27b8f..2ee70568412 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -158,8 +158,12 @@ public void load() throws Exception { .append(VariantStorageOptions.STATS_CALCULATE.key(), false) .append(VariantStorageOptions.LOAD_SPLIT_DATA.key(), VariantStorageEngine.SplitData.MULTI); - versioned = metadataManager.addSampleIndexConfiguration(STUDY_NAME_2, SampleIndexConfiguration.defaultConfiguration() - .addFileIndexField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.SAMPLE, "DS", new double[]{0, 1, 2})), true); + SampleIndexConfiguration configuration = SampleIndexConfiguration.defaultConfiguration() + .addFileIndexField(new IndexFieldConfiguration(IndexFieldConfiguration.Source.SAMPLE, "DS", new double[]{0, 1, 2})); + configuration.getFileDataConfiguration() + .setIncludeOriginalCall(null) + .setIncludeSecondaryAlternates(null); + versioned = metadataManager.addSampleIndexConfiguration(STUDY_NAME_2, configuration, true); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.STAGING, versioned.getStatus()); @@ -227,7 +231,7 @@ public void load() throws Exception { // Study 1 - extra sample index configuration, not staging, only one sample in that configuration - SampleIndexConfiguration configuration = engine.getMetadataManager().getStudyMetadata(STUDY_NAME).getSampleIndexConfigurationLatest().getConfiguration(); + configuration = engine.getMetadataManager().getStudyMetadata(STUDY_NAME).getSampleIndexConfigurationLatest().getConfiguration(); // Don't modify the configuration. versioned = engine.getMetadataManager().addSampleIndexConfiguration(STUDY_NAME, configuration, true); assertEquals(2, versioned.getVersion()); @@ -250,11 +254,17 @@ public void load() throws Exception { versioned = engine.getMetadataManager().getStudyMetadata(STUDY_NAME).getSampleIndexConfigurationLatest(false); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE, versioned.getStatus()); + // No fileData fields should be null + assertNotNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeOriginalCall()); + assertNotNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeSecondaryAlternates()); // Study 2 - Latest should be active versioned = metadataManager.getStudyMetadata(STUDY_NAME_2).getSampleIndexConfiguration(versioned.getVersion()); assertEquals(2, versioned.getVersion()); assertEquals(StudyMetadata.SampleIndexConfigurationVersioned.Status.ACTIVE, versioned.getStatus()); + // Both fileData fields should be null + assertNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeOriginalCall()); + assertNull(versioned.getConfiguration().getFileDataConfiguration().getIncludeSecondaryAlternates()); // Study 3 - Latest should be active versioned = metadataManager.getStudyMetadata(STUDY_NAME_3).getSampleIndexConfiguration(versioned.getVersion());