From 44df54037f5a10285c49aee18abe2a8597ac364a Mon Sep 17 00:00:00 2001 From: Douglas Myers-Turnbull Date: Sun, 21 Jan 2024 18:17:39 -0800 Subject: [PATCH] overhaul project --- .github/ISSUE_TEMPLATE/bug.yaml | 46 ++ .github/ISSUE_TEMPLATE/docs.yaml | 16 + .github/ISSUE_TEMPLATE/feature.yaml | 16 + .github/ISSUE_TEMPLATE/performance.yaml | 51 +++ .github/PULL_REQUEST_TEMPLATE/pull_request.md | 1 + .github/workflows/build.yaml | 43 ++ .github/workflows/checks.yaml | 30 ++ .gitignore | 3 + README.md | 42 +- bed/build.gradle | 4 +- bed/lombok.config | 2 - .../org/pharmgkb/parsers/bed/BedParser.java | 9 +- .../org/pharmgkb/parsers/bed/BedParserI.java | 8 + .../org/pharmgkb/parsers/bed/BedWriter.java | 48 +- .../org/pharmgkb/parsers/bed/BedWriterI.java | 5 + .../pharmgkb/parsers/bed/model/BedBlock.java | 52 +-- .../parsers/bed/model/BedFeature.java | 55 ++- .../parsers/bed/model/package-info.java | 1 + .../pharmgkb/parsers/bed/package-info.java | 1 + .../pharmgkb/parsers/bed/BedBlockTest.java | 11 +- .../pharmgkb/parsers/bed/BedFeatureTest.java | 18 +- .../pharmgkb/parsers/bed/BedParserTest.java | 7 +- .../pharmgkb/parsers/bed/BedWriterTest.java | 4 +- bgee/build.gradle | 2 +- bgee/lombok.config | 2 - .../parsers/bgee/BgeeExpressionParser.java | 7 +- .../parsers/bgee/BgeeExpressionParserI.java | 8 + .../pharmgkb/parsers/bgee/BgeeResource.java | 32 -- .../parsers/bgee/model/BgeeExpression.java | 104 +---- .../model/{Quality.java => BgeeQuality.java} | 10 +- .../parsers/bgee/model/package-info.java | 1 + .../pharmgkb/parsers/bgee/package-info.java | 1 + .../parsers/bgee/BgeeResourceTest.java | 16 - build.gradle | 36 +- chain/build.gradle | 4 +- chain/lombok.config | 2 - .../pharmgkb/parsers/chain/ChainResource.java | 26 -- .../parsers/chain/GenomeChainParser.java | 25 +- .../parsers/chain/GenomeChainParserI.java | 8 + .../parsers/chain/model/GenomeChain.java | 129 +++--- .../parsers/chain/model/GenomeChainI.java | 20 + .../parsers/chain/model/package-info.java | 1 + .../pharmgkb/parsers/chain/package-info.java | 1 + .../parsers/chain/GenomeChainParserTest.java | 4 +- .../parsers/chain/GenomeChainTest.java | 2 +- core/lombok.config | 2 - .../java/org/pharmgkb/parsers/LineParser.java | 11 +- .../pharmgkb/parsers/LineStructureParser.java | 4 +- .../pharmgkb/parsers/LineStructureWriter.java | 4 +- .../java/org/pharmgkb/parsers/LineWriter.java | 8 +- .../org/pharmgkb/parsers/MultilineParser.java | 6 +- .../org/pharmgkb/parsers/MultilineWriter.java | 72 +++ .../org/pharmgkb/parsers/WebResource.java | 89 ---- .../parsers/escape/BackslashEscaper.java | 4 +- .../parsers/escape/Rfc3986Escaper.java | 4 +- .../pharmgkb/parsers/escape/package-info.java | 1 + .../parsers/io/BufferedRandomAccessFile.java | 160 +++++++ .../parsers/io/BufferedRandomAccessFileI.java | 50 ++ .../parsers/io/CompressionFormat.java | 26 ++ .../{utils => io}/HttpHeadResponse.java | 68 +-- .../parsers/io/HttpHeadResponseI.java | 46 ++ .../InvalidResponseException.java | 7 +- .../java/org/pharmgkb/parsers/io/IoUtils.java | 196 ++++++++ .../{utils => io}/QuietBufferedReader.java | 6 +- .../org/pharmgkb/parsers/io/WebResource.java | 122 +++++ .../org/pharmgkb/parsers/io/WebResourceI.java | 31 ++ .../org/pharmgkb/parsers/io/package-info.java | 1 + .../pharmgkb/parsers/model/AminoAcidCode.java | 23 +- .../pharmgkb/parsers/model/AtomicElement.java | 52 +++ .../parsers/model/ChromosomeName.java | 126 +---- .../org/pharmgkb/parsers/model/CodeType.java | 7 +- .../pharmgkb/parsers/model/CommonSpecies.java | 17 +- .../parsers/model/GeneralizedBigDecimal.java | 126 +---- .../parsers/model/GeneralizedBigDecimalI.java | 138 ++++++ .../org/pharmgkb/parsers/model/Locus.java | 121 ++--- .../pharmgkb/parsers/model/LocusRange.java | 116 +++-- .../parsers/model/NucleotideCode.java | 25 +- .../parsers/model/StandardChromosomeName.java | 139 ++++++ .../org/pharmgkb/parsers/model/Strand.java | 8 +- .../pharmgkb/parsers/model/package-info.java | 1 + .../org/pharmgkb/parsers/package-info.java | 1 + .../org/pharmgkb/parsers/utils/IoUtils.java | 196 -------- .../pharmgkb/parsers/utils/OptPairList.java | 56 +++ .../org/pharmgkb/parsers/utils/PairList.java | 69 +++ .../parsers/utils/ReflectingConstructor.java | 31 +- .../parsers/utils/ReflectingConstructorI.java | 8 + .../utils/RuntimeReflectionException.java | 11 + .../java/org/pharmgkb/parsers/utils/Try.java | 431 +++++++++--------- .../java/org/pharmgkb/parsers/utils/TryI.java | 389 ++++++++++++++++ .../pharmgkb/parsers/utils/package-info.java | 1 + .../parsers/escape/BackslashEscaperTest.java | 4 +- .../parsers/escape/Rfc3986EscaperTest.java | 4 +- .../parsers/{utils => io}/IoUtilsTest.java | 3 +- .../parsers/model/ChromosomeNameNameTest.java | 40 ++ .../parsers/model/ChromosomeNameTest.java | 40 -- .../model/GeneralizedBigDecimalTest.java | 20 +- .../parsers/model/LocusRangeTest.java | 8 +- .../org/pharmgkb/parsers/utils/TryTest.java | 34 +- fasta/build.gradle | 9 +- fasta/lombok.config | 2 - .../parsers/fasta/FastaBaseReader.java | 239 ++++++++++ .../parsers/fasta/FastaBaseReaderI.java | 35 ++ .../parsers/fasta/FastaSequenceParser.java | 17 +- .../parsers/fasta/FastaSequenceParserI.java | 12 + .../parsers/fasta/FastaSequenceWriter.java | 9 +- .../parsers/fasta/FastaSequenceWriterI.java | 8 + .../fasta/MultilineFastaSequenceParser.java | 16 +- .../fasta/MultilineFastaSequenceParserI.java | 12 + .../parsers/fasta/MultilineFastqParser.java | 5 - .../fasta/RandomAccessFastaBaseReader.java | 238 +++++----- .../fasta/RandomAccessFastaBaseReaderI.java | 59 +++ .../parsers/fasta/SimpleFastaBaseReader.java | 213 --------- .../parsers/fasta/model/FastaAlignment.java | 40 ++ .../parsers/fasta/model/FastaAlignmentI.java | 63 +++ .../parsers/fasta/model/FastaSequence.java | 13 +- .../{HasSequence.java => FastaSequenceI.java} | 19 +- .../parsers/fasta/model/FastqSequence.java | 15 +- .../{HasScores.java => FastqSequenceI.java} | 7 +- .../fasta/model/IlluminaSequenceId.java | 84 ---- .../fasta/model/extra/IlluminaSequenceId.java | 73 +++ .../fasta/model/{ => extra}/Phred33Score.java | 16 +- .../fasta/model/extra/package-info.java | 1 + .../parsers/fasta/model/package-info.java | 1 + .../pharmgkb/parsers/fasta/package-info.java | 1 + ...aderTest.java => FastaBaseReaderTest.java} | 25 +- .../fasta/FastaSequenceParserTest.java | 4 +- .../fasta/FastaSequenceWriterTest.java | 4 +- .../MultilineFastaSequenceParserTest.java | 11 +- .../RandomAccessFastaBaseReaderTest.java | 26 +- genbank/build.gradle | 2 +- genbank/lombok.config | 2 - .../parsers/genbank/GenbankParser.java | 50 +- .../parsers/genbank/GenbankParserI.java | 8 + .../genbank/model/GenbankDivision.java | 4 +- .../parsers/genbank/model/GenbankFeature.java | 10 +- .../genbank/model/GenbankSequenceRange.java | 4 +- .../GenbankAccessionAnnotation.java} | 12 +- .../{ => annotations}/GenbankAnnotation.java | 2 +- .../GenbankCommentAnnotation.java} | 10 +- .../GenbankDefinitionAnnotation.java} | 11 +- .../GenbankFeaturesAnnotation.java} | 13 +- .../GenbankKeywordsAnnotation.java} | 10 +- .../GenbankLocusAnnotation.java} | 21 +- .../GenbankOriginAnnotation.java} | 12 +- .../GenbankReferenceAnnotation.java} | 24 +- .../GenbankSourceAnnotation.java} | 14 +- .../GenbankVersionAnnotation.java} | 12 +- .../model/annotations/package-info.java | 1 + .../parsers/genbank/model/package-info.java | 1 + .../parsers/genbank/package-info.java | 1 + .../genbank/GenbankSequenceRangeTest.java | 8 +- gff/build.gradle | 4 +- gff/lombok.config | 2 - .../org/pharmgkb/parsers/gff/Gff3Parser.java | 30 +- .../org/pharmgkb/parsers/gff/Gff3ParserI.java | 8 + .../org/pharmgkb/parsers/gff/Gff3Writer.java | 21 +- .../org/pharmgkb/parsers/gff/Gff3WriterI.java | 8 + .../parsers/gff/model/BaseGffFeature.java | 275 ----------- .../parsers/gff/model/Gff3Attribute.java | 2 +- .../{CdsPhase.java => Gff3CdsPhase.java} | 18 +- .../parsers/gff/model/Gff3Feature.java | 180 ++++++-- .../parsers/gff/model/Gff3FeatureI.java | 42 ++ .../model/{GffStrand.java => Gff3Strand.java} | 14 +- .../parsers/gff/model/package-info.java | 1 + .../pharmgkb/parsers/gff/package-info.java | 1 + .../parsers/gff/utils/Gff3Escapers.java | 4 +- .../parsers/gff/utils/package-info.java | 1 + .../parsers/gff/BaseGffFeatureTest.java | 74 --- .../pharmgkb/parsers/gff/Gff3StrandTest.java | 35 ++ .../pharmgkb/parsers/gff/GffStrandTest.java | 36 -- .../parsers/gff/gff3/Gff3FeatureTest.java | 24 +- .../parsers/gff/gff3/Gff3ParserTest.java | 34 +- .../parsers/gff/gff3/Gff3WriterTest.java | 12 +- lombok.config | 2 - pedigree/build.gradle | 4 +- pedigree/lombok.config | 2 - .../parsers/pedigree/PedigreeParser.java | 34 +- .../parsers/pedigree/PedigreeParserI.java | 8 + .../parsers/pedigree/PedigreeWriter.java | 49 +- .../parsers/pedigree/PedigreeWriterI.java | 8 + .../iterators/PedigreeBfsIterator.java | 31 ++ .../iterators/PedigreeDfsIterator.java | 31 ++ .../PedigreeTopologicalIterator.java | 40 ++ .../pedigree/iterators/package-info.java | 1 + .../parsers/pedigree/model/Family.java | 154 ------- .../parsers/pedigree/model/Individual.java | 216 --------- .../parsers/pedigree/model/Pedigree.java | 39 +- .../pedigree/model/PedigreeBuilder.java | 104 +++-- .../pedigree/model/PedigreeFamily.java | 109 +++++ .../pedigree/model/PedigreeFamilyI.java | 11 + .../parsers/pedigree/model/PedigreeI.java | 15 + .../pedigree/model/PedigreeIndividual.java | 164 +++++++ .../pedigree/model/PedigreeIndividualI.java | 27 ++ .../model/{Sex.java => PedigreeSex.java} | 2 +- .../pedigree/model/PedigreeSubtreeI.java | 42 ++ .../parsers/pedigree/model/PedigreeUtils.java | 51 --- .../parsers/pedigree/model/Subtree.java | 90 ---- .../parsers/pedigree/model/package-info.java | 1 + .../parsers/pedigree/package-info.java | 2 +- .../parsers/pedigree/PedigreeBuilderTest.java | 250 +++++----- .../parsers/pedigree/PedigreeParserTest.java | 53 ++- .../parsers/pedigree/PedigreeWriterTest.java | 43 +- settings.gradle | 5 +- text/build.gradle | 4 +- text/lombok.config | 2 - .../pharmgkb/parsers/text}/MatrixParser.java | 28 +- .../pharmgkb/parsers/text/MatrixParserI.java | 9 + .../pharmgkb/parsers/text}/MatrixWriter.java | 56 +-- .../pharmgkb/parsers/text/MatrixWriterI.java | 9 + .../pharmgkb/parsers/text/package-info.java | 1 + text/src/test/java/MatrixParserTest.java | 3 - text/src/test/java/MatrixWriterTest.java | 2 - .../parsers/text/MatrixParserTest.java | 5 + .../parsers/text/MatrixWriterTest.java | 4 + turtle/build.gradle | 4 +- turtle/lombok.config | 2 - .../parsers/turtle/ChemblRdfResource.java | 47 -- .../pharmgkb/parsers/turtle/TurtleParser.java | 67 ++- .../parsers/turtle/TurtleParserI.java | 13 + ...eGrouper.java => TurtleTripleGrouper.java} | 28 +- .../parsers/turtle/TurtleTripleGrouperI.java | 16 + .../pharmgkb/parsers/turtle/model/Node.java | 109 ----- .../pharmgkb/parsers/turtle/model/Prefix.java | 60 --- .../pharmgkb/parsers/turtle/model/Triple.java | 69 --- .../{TripleGroup.java => TurtleGroup.java} | 12 +- .../parsers/turtle/model/TurtleNode.java | 61 +++ .../parsers/turtle/model/TurtlePrefix.java | 37 ++ .../parsers/turtle/model/TurtleTriple.java | 22 + .../parsers/turtle/model/package-info.java | 1 + .../pharmgkb/parsers/turtle/package-info.java | 1 + vcf/build.gradle | 4 +- .../test/org/pharmgkb/parsers/vcf/example.vcf | 27 -- vcf/lombok.config | 2 - .../pharmgkb/parsers/vcf/VcfDataParser.java | 18 +- .../pharmgkb/parsers/vcf/VcfDataParserI.java | 8 + .../pharmgkb/parsers/vcf/VcfDataWriter.java | 34 +- .../pharmgkb/parsers/vcf/VcfDataWriterI.java | 8 + .../pharmgkb/parsers/vcf/VcfFileWriter.java | 61 ++- .../pharmgkb/parsers/vcf/VcfFileWriterI.java | 22 + .../parsers/vcf/VcfMetadataParser.java | 35 +- .../parsers/vcf/VcfMetadataParserI.java | 8 + .../parsers/vcf/VcfMetadataWriter.java | 23 +- .../parsers/vcf/VcfMetadataWriterI.java | 19 + .../pharmgkb/parsers/vcf/VcfValidator.java | 219 --------- .../VcfMetadataCollector.java | 7 +- .../vcf/builders/VcfMetadataCollectorI.java | 10 + .../vcf/builders/VcfPropertyMapBuilder.java | 45 ++ .../vcf/builders/VcfPropertyMapBuilderI.java | 17 + .../parsers/vcf/builders/package-info.java | 1 + .../VcfAlleleFactory.java | 17 +- .../vcf/factories/VcfAlleleFactoryI.java | 11 + .../VcfMetadataFactory.java | 23 +- .../vcf/factories/VcfMetadataFactoryI.java | 10 + .../vcf/factories/VcfPropertyFactory.java | 126 +++++ .../vcf/factories/VcfPropertyFactoryI.java | 40 ++ .../parsers/vcf/factories/package-info.java | 1 + .../pharmgkb/parsers/vcf/model/VcfInfo.java | 29 +- .../vcf/model/VcfMetadataCollection.java | 167 ++++--- .../vcf/model/VcfMetadataCollectionI.java | 49 ++ .../parsers/vcf/model/VcfPosition.java | 163 ++++--- .../pharmgkb/parsers/vcf/model/VcfSample.java | 37 +- .../vcf/model/allele/VcfBasesAllele.java | 11 +- .../vcf/model/allele/VcfBreakpointAllele.java | 33 +- .../vcf/model/allele/VcfDeletedAllele.java | 2 - .../vcf/model/allele/VcfSymbolicAllele.java | 4 +- .../vcf/model/allele/package-info.java | 1 + .../vcf/model/extra/GenotypeLikelihoods.java | 88 ---- ...iant.java => VcfAltStructuralVariant.java} | 58 +-- .../parsers/vcf/model/extra/VcfGenotype.java | 93 ++-- .../parsers/vcf/model/extra/VcfGenotypeI.java | 46 ++ .../model/extra/VcfGenotypeLikelihood.java | 85 ++++ .../extra/VcfGenotypeLikelihoodExtractor.java | 109 +++++ .../VcfGenotypeLikelihoodExtractorI.java | 14 + .../parsers/vcf/model/extra/package-info.java | 1 + .../vcf/model/metadata/VcfContigMetadata.java | 27 +- .../vcf/model/metadata/VcfFilterMetadata.java | 1 - .../vcf/model/metadata/VcfFormatMetadata.java | 18 +- .../vcf/model/metadata/VcfFormatNumber.java | 2 +- .../vcf/model/metadata/VcfHeaderMetadata.java | 9 +- .../metadata/VcfIdDescriptionMetadata.java | 14 +- .../vcf/model/metadata/VcfIdMetadata.java | 5 +- .../vcf/model/metadata/VcfInfoMetadata.java | 19 +- .../vcf/model/metadata/VcfMapMetadata.java | 37 +- .../vcf/model/metadata/VcfMetadata.java | 3 +- .../vcf/model/metadata/VcfMetadataType.java | 2 +- .../vcf/model/metadata/VcfNumberFlag.java | 2 +- .../model/metadata/VcfPedigreeMetadata.java | 2 +- .../vcf/model/metadata/VcfRawMetadata.java | 6 +- .../vcf/model/metadata/VcfSampleMetadata.java | 4 +- .../model/metadata/VcfVersionMetadata.java | 9 +- .../vcf/model/metadata/package-info.java | 1 + .../parsers/vcf/model/package-info.java | 1 + .../VcfReservedFormatProperty.java} | 85 +++- .../VcfReservedInfoProperty.java} | 18 +- .../VcfReservedProperty.java} | 10 +- .../VcfReservedStructuralVariantCode.java} | 16 +- .../vcf/model/reserved/package-info.java | 1 + .../pharmgkb/parsers/vcf/package-info.java | 1 + .../parsers/vcf/utils/PropertyMapBuilder.java | 44 -- .../parsers/vcf/utils/VcfConversionUtils.java | 126 ----- .../parsers/vcf/utils/VcfEscapers.java | 2 +- .../parsers/vcf/utils/VcfPatterns.java | 6 +- .../vcf/validation/VcfInvalidProperty.java | 15 + .../vcf/validation/VcfPropertyType.java | 11 + .../validation/VcfValidationException.java | 38 ++ .../parsers/vcf/validation/VcfValidator.java | 122 +++++ .../parsers/vcf/validation/VcfValidatorI.java | 13 + .../parsers/vcf/VcfDataIntegrationTest.java | 25 +- .../parsers/vcf/VcfValidatorTest.java | 3 +- .../parsers/vcf/model/VcfPositionTest.java | 14 +- 310 files changed, 6319 insertions(+), 4851 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug.yaml create mode 100644 .github/ISSUE_TEMPLATE/docs.yaml create mode 100644 .github/ISSUE_TEMPLATE/feature.yaml create mode 100644 .github/ISSUE_TEMPLATE/performance.yaml create mode 100644 .github/PULL_REQUEST_TEMPLATE/pull_request.md create mode 100644 .github/workflows/build.yaml create mode 100644 .github/workflows/checks.yaml delete mode 100644 bed/lombok.config create mode 100644 bed/src/main/java/org/pharmgkb/parsers/bed/BedParserI.java create mode 100644 bed/src/main/java/org/pharmgkb/parsers/bed/BedWriterI.java create mode 100644 bed/src/main/java/org/pharmgkb/parsers/bed/model/package-info.java create mode 100644 bed/src/main/java/org/pharmgkb/parsers/bed/package-info.java delete mode 100644 bgee/lombok.config create mode 100644 bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParserI.java delete mode 100644 bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeResource.java rename bgee/src/main/java/org/pharmgkb/parsers/bgee/model/{Quality.java => BgeeQuality.java} (59%) create mode 100644 bgee/src/main/java/org/pharmgkb/parsers/bgee/model/package-info.java create mode 100644 bgee/src/main/java/org/pharmgkb/parsers/bgee/package-info.java delete mode 100644 bgee/src/test/java/org/pharmgkb/parsers/bgee/BgeeResourceTest.java delete mode 100644 chain/lombok.config delete mode 100644 chain/src/main/java/org/pharmgkb/parsers/chain/ChainResource.java create mode 100644 chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParserI.java create mode 100644 chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChainI.java create mode 100644 chain/src/main/java/org/pharmgkb/parsers/chain/model/package-info.java create mode 100644 chain/src/main/java/org/pharmgkb/parsers/chain/package-info.java delete mode 100644 core/lombok.config create mode 100644 core/src/main/java/org/pharmgkb/parsers/MultilineWriter.java delete mode 100644 core/src/main/java/org/pharmgkb/parsers/WebResource.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/escape/package-info.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFile.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFileI.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/CompressionFormat.java rename core/src/main/java/org/pharmgkb/parsers/{utils => io}/HttpHeadResponse.java (74%) create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/HttpHeadResponseI.java rename core/src/main/java/org/pharmgkb/parsers/{utils => io}/InvalidResponseException.java (55%) create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/IoUtils.java rename core/src/main/java/org/pharmgkb/parsers/{utils => io}/QuietBufferedReader.java (89%) create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/WebResource.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/WebResourceI.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/io/package-info.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/model/AtomicElement.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalI.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/model/StandardChromosomeName.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/model/package-info.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/package-info.java delete mode 100644 core/src/main/java/org/pharmgkb/parsers/utils/IoUtils.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/utils/OptPairList.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/utils/PairList.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructorI.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/utils/RuntimeReflectionException.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/utils/TryI.java create mode 100644 core/src/main/java/org/pharmgkb/parsers/utils/package-info.java rename core/src/test/java/org/pharmgkb/parsers/{utils => io}/IoUtilsTest.java (80%) create mode 100644 core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameNameTest.java delete mode 100644 core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameTest.java delete mode 100644 fasta/lombok.config create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReader.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReaderI.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParserI.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterI.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserI.java delete mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastqParser.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderI.java delete mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/SimpleFastaBaseReader.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignment.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignmentI.java rename fasta/src/main/java/org/pharmgkb/parsers/fasta/model/{HasSequence.java => FastaSequenceI.java} (57%) rename fasta/src/main/java/org/pharmgkb/parsers/fasta/model/{HasScores.java => FastqSequenceI.java} (67%) delete mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/model/IlluminaSequenceId.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/IlluminaSequenceId.java rename fasta/src/main/java/org/pharmgkb/parsers/fasta/model/{ => extra}/Phred33Score.java (79%) create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/package-info.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/model/package-info.java create mode 100644 fasta/src/main/java/org/pharmgkb/parsers/fasta/package-info.java rename fasta/src/test/java/org/pharmgkb/parsers/fasta/{SimpleFastaBaseReaderTest.java => FastaBaseReaderTest.java} (72%) delete mode 100644 genbank/lombok.config create mode 100644 genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParserI.java rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{AccessionAnnotation.java => annotations/GenbankAccessionAnnotation.java} (64%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{ => annotations}/GenbankAnnotation.java (71%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{CommentAnnotation.java => annotations/GenbankCommentAnnotation.java} (70%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{DefinitionAnnotation.java => annotations/GenbankDefinitionAnnotation.java} (70%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{FeaturesAnnotation.java => annotations/GenbankFeaturesAnnotation.java} (68%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{KeywordsAnnotation.java => annotations/GenbankKeywordsAnnotation.java} (70%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{LocusAnnotation.java => annotations/GenbankLocusAnnotation.java} (81%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{OriginAnnotation.java => annotations/GenbankOriginAnnotation.java} (73%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{ReferenceAnnotation.java => annotations/GenbankReferenceAnnotation.java} (80%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{SourceAnnotation.java => annotations/GenbankSourceAnnotation.java} (73%) rename genbank/src/main/java/org/pharmgkb/parsers/genbank/model/{VersionAnnotation.java => annotations/GenbankVersionAnnotation.java} (73%) create mode 100644 genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/package-info.java create mode 100644 genbank/src/main/java/org/pharmgkb/parsers/genbank/model/package-info.java create mode 100644 genbank/src/main/java/org/pharmgkb/parsers/genbank/package-info.java delete mode 100644 gff/lombok.config create mode 100644 gff/src/main/java/org/pharmgkb/parsers/gff/Gff3ParserI.java create mode 100644 gff/src/main/java/org/pharmgkb/parsers/gff/Gff3WriterI.java delete mode 100644 gff/src/main/java/org/pharmgkb/parsers/gff/model/BaseGffFeature.java rename gff/src/main/java/org/pharmgkb/parsers/gff/model/{CdsPhase.java => Gff3CdsPhase.java} (65%) create mode 100644 gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3FeatureI.java rename gff/src/main/java/org/pharmgkb/parsers/gff/model/{GffStrand.java => Gff3Strand.java} (82%) create mode 100644 gff/src/main/java/org/pharmgkb/parsers/gff/model/package-info.java create mode 100644 gff/src/main/java/org/pharmgkb/parsers/gff/package-info.java create mode 100644 gff/src/main/java/org/pharmgkb/parsers/gff/utils/package-info.java delete mode 100644 gff/src/test/java/org/pharmgkb/parsers/gff/BaseGffFeatureTest.java create mode 100644 gff/src/test/java/org/pharmgkb/parsers/gff/Gff3StrandTest.java delete mode 100644 gff/src/test/java/org/pharmgkb/parsers/gff/GffStrandTest.java delete mode 100644 lombok.config delete mode 100644 pedigree/lombok.config create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParserI.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriterI.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeBfsIterator.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeDfsIterator.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeTopologicalIterator.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/package-info.java delete mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Family.java delete mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Individual.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamily.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamilyI.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeI.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividual.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividualI.java rename pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/{Sex.java => PedigreeSex.java} (84%) create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeSubtreeI.java delete mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeUtils.java delete mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Subtree.java create mode 100644 pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/package-info.java delete mode 100644 text/lombok.config rename text/src/main/java/{ => org/pharmgkb/parsers/text}/MatrixParser.java (87%) create mode 100644 text/src/main/java/org/pharmgkb/parsers/text/MatrixParserI.java rename text/src/main/java/{ => org/pharmgkb/parsers/text}/MatrixWriter.java (68%) create mode 100644 text/src/main/java/org/pharmgkb/parsers/text/MatrixWriterI.java create mode 100644 text/src/main/java/org/pharmgkb/parsers/text/package-info.java delete mode 100644 text/src/test/java/MatrixParserTest.java delete mode 100644 text/src/test/java/MatrixWriterTest.java create mode 100644 text/src/test/java/org/pharmgkb/parsers/text/MatrixParserTest.java create mode 100644 text/src/test/java/org/pharmgkb/parsers/text/MatrixWriterTest.java delete mode 100644 turtle/lombok.config delete mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/ChemblRdfResource.java create mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParserI.java rename turtle/src/main/java/org/pharmgkb/parsers/turtle/{TripleGrouper.java => TurtleTripleGrouper.java} (50%) create mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleTripleGrouperI.java delete mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Node.java delete mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Prefix.java delete mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Triple.java rename turtle/src/main/java/org/pharmgkb/parsers/turtle/model/{TripleGroup.java => TurtleGroup.java} (74%) create mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleNode.java create mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtlePrefix.java create mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleTriple.java create mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/model/package-info.java create mode 100644 turtle/src/main/java/org/pharmgkb/parsers/turtle/package-info.java delete mode 100644 vcf/build/resources/test/org/pharmgkb/parsers/vcf/example.vcf delete mode 100644 vcf/lombok.config create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParserI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriterI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriterI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParserI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriterI.java delete mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfValidator.java rename vcf/src/main/java/org/pharmgkb/parsers/vcf/{utils => builders}/VcfMetadataCollector.java (79%) create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfMetadataCollectorI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilder.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilderI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/package-info.java rename vcf/src/main/java/org/pharmgkb/parsers/vcf/{utils => factories}/VcfAlleleFactory.java (60%) create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfAlleleFactoryI.java rename vcf/src/main/java/org/pharmgkb/parsers/vcf/{utils => factories}/VcfMetadataFactory.java (87%) create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfMetadataFactoryI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactory.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactoryI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/package-info.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollectionI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/package-info.java delete mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/GenotypeLikelihoods.java rename vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/{AltStructuralVariant.java => VcfAltStructuralVariant.java} (56%) create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihood.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractor.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractorI.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/package-info.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/package-info.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/package-info.java rename vcf/src/main/java/org/pharmgkb/parsers/vcf/model/{extra/ReservedFormatProperty.java => reserved/VcfReservedFormatProperty.java} (57%) rename vcf/src/main/java/org/pharmgkb/parsers/vcf/model/{extra/ReservedInfoProperty.java => reserved/VcfReservedInfoProperty.java} (92%) rename vcf/src/main/java/org/pharmgkb/parsers/vcf/model/{extra/ReservedProperty.java => reserved/VcfReservedProperty.java} (56%) rename vcf/src/main/java/org/pharmgkb/parsers/vcf/model/{extra/ReservedStructuralVariantCode.java => reserved/VcfReservedStructuralVariantCode.java} (73%) create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/package-info.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/package-info.java delete mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/PropertyMapBuilder.java delete mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfConversionUtils.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfInvalidProperty.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfPropertyType.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidationException.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidator.java create mode 100644 vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidatorI.java diff --git a/.github/ISSUE_TEMPLATE/bug.yaml b/.github/ISSUE_TEMPLATE/bug.yaml new file mode 100644 index 0000000..788477b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug.yaml @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD +# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd +# SPDX-License-Identifier: Apache-2.0 + +name: "🐛 Bug" +description: "Bug report" +labels: ["type: fix"] + +body: + - type: textarea + id: description + attributes: + label: What happened? + placeholder: Detail what you did and what happened. + validations: + required: true + - type: dropdown + id: os + attributes: + label: What OS types are you seeing the problem on? + multiple: true + options: + - Windows + - Linux + - macOS + validations: + required: false + - type: dropdown + id: browsers + attributes: + label: What browsers are you seeing the problem on? + multiple: true + options: + - Chrome + - Firefox + - Edge + - Safari + - Opera + - Samsung Internet + validations: + required: false + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. diff --git a/.github/ISSUE_TEMPLATE/docs.yaml b/.github/ISSUE_TEMPLATE/docs.yaml new file mode 100644 index 0000000..7ff11ca --- /dev/null +++ b/.github/ISSUE_TEMPLATE/docs.yaml @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD +# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd +# SPDX-License-Identifier: Apache-2.0 + +name: "📚 Documentation" +description: "Documentation issue" +labels: ["type: docs"] + +body: + - type: textarea + id: description + attributes: + label: What needs work? + placeholder: Describe the documentation issue. + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/feature.yaml b/.github/ISSUE_TEMPLATE/feature.yaml new file mode 100644 index 0000000..59c0584 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature.yaml @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD +# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd +# SPDX-License-Identifier: Apache-2.0 + +name: "✨ Feature" +description: "Feature request" +labels: ["type: feature"] + +body: + - type: textarea + id: description + attributes: + label: What is the feature request? + placeholder: Describe the feature you would like. + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/performance.yaml b/.github/ISSUE_TEMPLATE/performance.yaml new file mode 100644 index 0000000..22b542d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/performance.yaml @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD +# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd +# SPDX-License-Identifier: Apache-2.0 + +name: "🐢 Performance" +description: "Performance issue" +labels: ["type: performance"] + +body: + - type: textarea + id: description + attributes: + label: What is the performance issue? + placeholder: Detail your steps and where the performance dropped. + validations: + required: true + - type: input + id: version + attributes: + label: Version + description: What version are you running? + - type: dropdown + id: os + attributes: + label: What OS types are you seeing the problem on? + multiple: true + options: + - Windows + - Linux + - macOS + validations: + required: false + - type: dropdown + id: browsers + attributes: + label: What browsers are you seeing the problem on? + multiple: true + options: + - Chrome + - Firefox + - Edge + - Safari + - Opera + - Samsung Internet + validations: + required: false + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request.md b/.github/PULL_REQUEST_TEMPLATE/pull_request.md new file mode 100644 index 0000000..b4f949c --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/pull_request.md @@ -0,0 +1 @@ +# Pull request diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..ad31f78 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,43 @@ +# This workflow will build a Java project with Gradle +# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-gradle + +name: Test + +on: + workflow_call: + inputs: + ref: + type: string + description: Ref to checkout + default: main + +permissions: + contents: read + statuses: read + actions: read + security-events: write + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up JDK 20 + uses: actions/setup-java@v2 + with: + distribution: oracle + java-version: "21-ea" + - uses: gradle/wrapper-validation-action@v1 + - name: Set up and execute Gradle test + uses: gradle/gradle-build-action@v2 + with: + arguments: test + - uses: actions/upload-artifact@v3 + with: + name: package + path: build/libs + - uses: actions/upload-artifact@v3 + with: + name: build-reports + path: build/reports/ diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml new file mode 100644 index 0000000..681ef6a --- /dev/null +++ b/.github/workflows/checks.yaml @@ -0,0 +1,30 @@ +# Various static code analysis and checks +# Runs on commit or pull (to any branch) +# and on a schedule (weekly) +name: Static checks +on: + push: + pull_request: + schedule: + - cron: 0 7 * * 6 +jobs: + markdown-link-check: + name: Check Markdown links + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@master + - uses: gaurav-nelson/github-action-markdown-link-check@v1 + analyse: + name: Analyze with CodeQL + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + fetch-depth: 2 + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + - name: Autobuild with CodeQL + uses: github/codeql-action/autobuild@v1 + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.gitignore b/.gitignore index 97a9c42..e2aedc4 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,9 @@ Gemfile.lock # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml hs_err_pid* +# IDE files +.idea + # Keep these config files !/.gitignore !/.travis.yml diff --git a/README.md b/README.md index 30b2cbd..3044559 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,26 @@ ![stability-stable](https://img.shields.io/badge/stability-stable-green.svg) ![Active](https://img.shields.io/static/v1?label=development&message=active&color=green) [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -![Latest release](https://img.shields.io/github/v/tag/dmyersturnbull/genomics-io) +![Latest release](https://img.shields.io/github/v/tag/dmyersturnbull/bioio) ![Java compatibility](https://img.shields.io/static/v1?label=Java&message=14%2b) -![Maven Central](https://img.shields.io/maven-central/v/dmyersturnbull/genomics-io) -![GitHub last commit](https://img.shields.io/github/last-commit/dmyersturnbull/genomics-io?color=green) +![Maven Central](https://img.shields.io/maven-central/v/dmyersturnbull/bioio) +![GitHub last commit](https://img.shields.io/github/last-commit/dmyersturnbull/bioio?color=green) Efficient, high-quality streaming parsers and writers for 12 text-based formats used in bioinformatics. The goal is to have the best possible parsers for the most problematic ancient formats. **Supported formats:** -VCF, FASTA, GenBank, BED, GFF/GTV/GVF, UCSC chain, -pre-MAKEPED, BGEE, Turtle/RDF, -matrices/tables/CSV/TSV + +- Variant calls: VCF +- Gene features: GenBank, BED, GFF3, GTF, GVF +- Sequences: FASTA, FASTA alignment, FASTQ +- Expression: BGEE +- Coordinate mapping: UCSC chain +- Phylogenetics/pedigree: pre-MAKEPED +- Protein structure: PDB +- Triples: Turtle, RDF +- Tabular: CSV, TSV, etc. **Features & choices:** @@ -23,6 +30,7 @@ matrices/tables/CSV/TSV - Parses every part of a format, leaving nothing as text unnecessarily. - Has a consistent API. Coordinates are always 0-indexed and text is always escaped (according to specifications). - Immutable, thread-safe, null-pointer-safe (`Optional<>`), and arbitrary-precision. +- All methods are either exposed through interfaces, or reside in records, enums, and final classes #### Example: @@ -33,7 +41,7 @@ import org.pharmgkb.parsers.vcf; Stream goodMitochondrialCalls = new VcfDataParser().parseFile(path) .filter(p -> p.chromosome.isMitochondial()) - .filter(VcfFilters.qualityAtLeast(10)) // converts to BigDecimal + .filter(VcfFilters.qualityAtLeast(10)); // converts to BigDecimal new VcfDataWriter().writeToFile(goodMitochondrialCalls, filteredPath); ``` @@ -67,7 +75,7 @@ implementation group: 'com.pharmgkb.bioio', name: 'bioio', version: '0.3.0' #### Pre-build JAR -[Releases](https://github.com/dmyersturnbull/genomics-io/releases) contain both _fat_ JARs (containing dependencies) +[Releases](https://github.com/dmyersturnbull/bioio/releases) contain both _fat_ JARs (containing dependencies) and _thin_ JARs (without dependencies), independently for each subproject (e.g. `bioio-vcf` for VCF, or `bioio-gff` for GFF/GTV/GVF). @@ -84,7 +92,7 @@ Formats listed in bold are currently implemented. - Variant calls: **VCF** - Gene features: **GenBank, BED, GFF3, GTF, GVF** -- Sequences: **FASTA**, EMBL, FASTA alignment, **FASTQ**, Seq, faidx (FASTQ indices) +- Sequences: **FASTA**, EMBL, **FASTA alignment**, **FASTQ**, Seq, faidx (FASTQ indices) - Expression: **BGEE** - Coordinate mapping: **UCSC chain** - Phylogenetics & pedigrees: **pre-MAKEPED**, LINKAGE, Nexus @@ -93,7 +101,7 @@ Formats listed in bold are currently implemented. - Protein structure: PDB (non-comprehensive) - RNA structure: Bpseq, Connect/CT, Vienna, Base-Paring, Dot-Bracket, Dot-Plot - Other: cytoband -- Misc: Matrices/tables/CSV/TSV, alignment, **Turtle (and RDF)** +- Misc: **Matrices/tables/CSV/TSV**, **Turtle (and RDF)** ### Extra things @@ -220,9 +228,9 @@ List topScores = parser.parseAll(Files.lines(fastaFile)) ```java // Stream Triples in Turtle format from a URL /* -@prefix myPrefix: . - "belongsTo" @myPrefix ; - "hasSynonym" . +@prefix myPrefix: . + "belongsTo" @myPrefix ; + "hasSynonym" . */ Stream input = null; try (BufferedReader reader = new BufferedReader(new InputStreamReader((HttpURLConnection) myUrl.openConnection()).getInputStream()))) { @@ -230,7 +238,7 @@ try (BufferedReader reader = new BufferedReader(new InputStreamReader((HttpURLCo } TripleParser parser = new TripleParser(true); // usePrefixes=true will replace prefixes Stream stream = input.map(new TripleParser()); -// contains: List[ http://abc#cat belongsTo http://abc#owner , http://abc#cat hasSynonym http://abc#feline ] +// contains: List[ https://abc#cat belongsTo https://abc#owner , https://abc#cat hasSynonym https://abc#feline ] List prefixes = parser.getPrefixes(); ``` @@ -257,18 +265,18 @@ Map genotypeCounts = new VcfDataParser().parseAll(input) ``` ```java -Stream MatrixParser.tabs().parseAll(file).map(BigDecimal::new); +Stream org.pharmgkb.parsers.text.MatrixParserI.tabs().parseAll(file).map(BigDecimal::new); ``` ### Guiding principles 1. Where possible, a parser is a `Function` or `Function, R>`, and writer is a `Function` or `Function>`. - [Java 8+ Streams](http://www.oracle.com/technetwork/articles/java/ma14-java-se-8-streams-2177646.html) + [Java 8+ Streams](https://www.oracle.com/technetwork/articles/java/ma14-java-se-8-streams-2177646.html) are expected to be used. 2. Null values are generally banned from public methods in favor of [`Optional`](https://download.java.net/java/early_access/jdk16/docs/api/java.base/java/util/Optional.html). - See http://www.oracle.com/technetwork/articles/java/java8-optional-2175753.html for more information. + See https://www.oracle.com/technetwork/articles/java/java8-optional-2175753.html for more information. 3. Most operations are thread-safe. Thread safety is annotated using `javax.annotation.concurrent`. 4. Top-level data classes are immutable, as annotated by or `javax.annotation.concurrent.Immutable`. 5. The builder pattern is used for non-trivial classes. Each builder has a copy constructor. diff --git a/bed/build.gradle b/bed/build.gradle index 1cf83f3..75740b8 100644 --- a/bed/build.gradle +++ b/bed/build.gradle @@ -1,3 +1,3 @@ dependencies { - compile project(':core') -} \ No newline at end of file + api project(':core') +} diff --git a/bed/lombok.config b/bed/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/bed/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/BedParser.java b/bed/src/main/java/org/pharmgkb/parsers/bed/BedParser.java index 2c51895..4b48397 100644 --- a/bed/src/main/java/org/pharmgkb/parsers/bed/BedParser.java +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/BedParser.java @@ -1,7 +1,6 @@ package org.pharmgkb.parsers.bed; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineParser; import org.pharmgkb.parsers.bed.model.BedFeature; import org.pharmgkb.parsers.model.Strand; import org.slf4j.Logger; @@ -42,10 +41,10 @@ * * @author Douglas Myers-Turnbull * @see BedFeature - * @see org.pharmgkb.parsers.bed.BedWriter + * @see BedWriter */ @ThreadSafe -public class BedParser implements LineParser { +public class BedParser implements BedParserI { private static final long sf_logEvery = 10000; private static final Pattern sf_comma = Pattern.compile(","); @@ -53,7 +52,7 @@ public class BedParser implements LineParser { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); @Nonnull @Override @@ -81,7 +80,7 @@ public BedFeature apply(@Nonnull String line) throws BadDataFormatException { builder.setScore(Integer.parseInt(parts[4])); } if (parts.length > 5) { - builder.setStrand(Strand.lookupBySymbol(parts[5])); + builder.setStrand(Strand.fromSymbol(parts[5])); } if (parts.length > 6) { builder.setThickStart(Long.parseLong(parts[6])); diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/BedParserI.java b/bed/src/main/java/org/pharmgkb/parsers/bed/BedParserI.java new file mode 100644 index 0000000..4d74fcc --- /dev/null +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/BedParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.bed; + +import org.pharmgkb.parsers.LineParser; +import org.pharmgkb.parsers.bed.model.BedFeature; + +public interface BedParserI extends LineParser { + +} diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/BedWriter.java b/bed/src/main/java/org/pharmgkb/parsers/bed/BedWriter.java index e88067d..fdfef3b 100644 --- a/bed/src/main/java/org/pharmgkb/parsers/bed/BedWriter.java +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/BedWriter.java @@ -24,7 +24,7 @@ * @see BedFeature */ @ThreadSafe -public class BedWriter implements LineWriter { +public class BedWriter implements LineWriter, BedWriterI { private static final long sf_logEvery = 10000; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -39,51 +39,51 @@ public String apply(@Nonnull BedFeature feature) { sf_logger.debug("Writing line #{}", m_lineNumber); } - StringBuilder sb = new StringBuilder(feature.getChromosome()) - .append("\t").append(feature.getStart()) - .append("\t").append(feature.getEnd()); + StringBuilder sb = new StringBuilder(feature.chromosome()) + .append("\t").append(feature.start()) + .append("\t").append(feature.end()); - if (feature.getName().isPresent()) { - sb.append("\t").append(feature.getName().get()); + if (feature.name().isPresent()) { + sb.append("\t").append(feature.name().get()); } - if (feature.getScore().isPresent()) { - sb.append("\t").append(feature.getScore().get()); + if (feature.score().isPresent()) { + sb.append("\t").append(feature.score().get()); } - if (feature.getStrand().isPresent()) { - sb.append("\t").append(feature.getStrand().get().getSymbol()); + if (feature.strand().isPresent()) { + sb.append("\t").append(feature.strand().get().symbol()); } - if (feature.getThickStart().isPresent()) { - sb.append("\t").append(feature.getThickStart().get()); + if (feature.thickStart().isPresent()) { + sb.append("\t").append(feature.thickStart().get()); } - if (feature.getThickEnd().isPresent()) { - sb.append("\t").append(feature.getThickEnd().get()); + if (feature.thickEnd().isPresent()) { + sb.append("\t").append(feature.thickEnd().get()); } - if (feature.getColor().isPresent()) { - sb.append("\t").append(feature.getColor().get().getRed()) - .append(",").append(feature.getColor().get().getGreen()) - .append(",").append(feature.getColor().get().getBlue()); + if (feature.color().isPresent()) { + sb.append("\t").append(feature.color().get().getRed()) + .append(",").append(feature.color().get().getGreen()) + .append(",").append(feature.color().get().getBlue()); } - if (!feature.getBlocks().isEmpty()) { + if (!feature.blocks().isEmpty()) { // write count - sb.append("\t").append(feature.getBlocks().size()); + sb.append("\t").append(feature.blocks().size()); // write lengths sb.append("\t"); - for (int i = 0; i < feature.getBlocks().size(); i++) { + for (int i = 0; i < feature.blocks().size(); i++) { if (i > 0) { sb.append(","); } - sb.append(feature.getBlocks().get(i).getLength()); + sb.append(feature.blocks().get(i).length()); } // write starts sb.append("\t"); - for (int i = 0; i < feature.getBlocks().size(); i++) { + for (int i = 0; i < feature.blocks().size(); i++) { if (i > 0) { sb.append(","); } - sb.append(feature.getBlocks().get(i).getStart()); + sb.append(feature.blocks().get(i).start()); } } diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/BedWriterI.java b/bed/src/main/java/org/pharmgkb/parsers/bed/BedWriterI.java new file mode 100644 index 0000000..930eb23 --- /dev/null +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/BedWriterI.java @@ -0,0 +1,5 @@ +package org.pharmgkb.parsers.bed; + +public interface BedWriterI { + +} diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedBlock.java b/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedBlock.java index f2119d1..0342a7a 100644 --- a/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedBlock.java +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedBlock.java @@ -10,56 +10,28 @@ * The start and end of a block in a {@link BedFeature}. * @author Douglas Myers-Turnbull */ -@Immutable -public class BedBlock { +public record BedBlock( + @Nonnegative long start, + @Nonnegative long end +) { - private final long m_start; - - private final long m_end; @SuppressWarnings("ConstantConditions") - public BedBlock(@Nonnegative long start, @Nonnegative long end) { - Preconditions.checkArgument(start > -1, "Block start" + start + " is negative"); - Preconditions.checkArgument(end > -1, "Block end" + end + " is negative"); - Preconditions.checkArgument(start <= end, "Block cannot start before it ends; start is " + start + " but end is " + end); - m_start = start; - m_end = end; - } - - @Nonnegative - public long getStart() { - return m_start; - } - - @Nonnegative - public long getLength() { - return m_end - m_start; + public BedBlock { + Preconditions.checkArgument(start() > -1, "Block start" + start() + " is negative"); + Preconditions.checkArgument(end() > -1, "Block end" + end() + " is negative"); + Preconditions.checkArgument(start() <= end(), + "Block cannot start before it ends; start is " + start() + " but end is " + end()); } @Nonnegative - public long getEnd() { - return m_end; + public long length() { + return end() - start(); } @Override public String toString() { - return m_start + "-" + m_end; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - BedBlock bedBlock = (BedBlock) o; - return Objects.equals(m_start, bedBlock.m_start) && Objects.equals(m_end, bedBlock.m_end); + return start() + "-" + end(); } - @Override - public int hashCode() { - return Objects.hash(m_start, m_end); - } } diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedFeature.java b/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedFeature.java index 49aae2d..ae3489f 100644 --- a/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedFeature.java +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/model/BedFeature.java @@ -23,7 +23,7 @@ * @author Douglas Myers-Turnbull */ @Immutable -public class BedFeature { +public final class BedFeature { private final String m_chromosome; @@ -52,54 +52,54 @@ public class BedFeature { private final ImmutableList m_blocks; @Nonnull - public String getChromosome() { + public String chromosome() { return m_chromosome; } @Nonnegative - public long getStart() { + public long start() { return m_start; } @Nonnegative - public long getEnd() { + public long end() { return m_end; } @Nonnull - public Optional getName() { + public Optional name() { return Optional.ofNullable(m_name); } @Nonnull - public Optional getScore() { + public Optional score() { return Optional.ofNullable(m_score); } @Nonnull - public Optional getStrand() { + public Optional strand() { return Optional.ofNullable(m_strand); } @Nonnull @Nonnegative - public Optional getThickStart() { + public Optional thickStart() { return Optional.ofNullable(m_thickStart); } @Nonnull @Nonnegative - public Optional getThickEnd() { + public Optional thickEnd() { return Optional.ofNullable(m_thickEnd); } @Nonnull - public Optional getColor() { + public Optional color() { return Optional.ofNullable(m_color); } @Nonnull - public List getBlocks() { + public List blocks() { return m_blocks; } @@ -202,16 +202,16 @@ public Builder(@Nonnull String chromosome, @Nonnegative long start, @Nonnegative } public Builder(@Nonnull BedFeature feature) { - m_chromosome = feature.getChromosome(); - m_start = feature.getStart(); - m_end = feature.getEnd(); - setName(feature.getName()); - setScore(feature.getScore()); - setStrand(feature.getStrand()); - setColor(feature.getColor()); - setThickStart(feature.getThickStart()); - setThickEnd(feature.getThickEnd()); - m_blocks = new ArrayList<>(feature.getBlocks()); // we need to copy so that it's mutable! + m_chromosome = feature.chromosome(); + m_start = feature.start(); + m_end = feature.end(); + setName(feature.name()); + setScore(feature.score()); + setStrand(feature.strand()); + setColor(feature.color()); + setThickStart(feature.thickStart()); + setThickEnd(feature.thickEnd()); + m_blocks = new ArrayList<>(feature.blocks()); // we need to copy so that it's mutable! } @Nonnull @@ -228,7 +228,6 @@ public Builder setChromosome(@Nonnull String chromosome) { return this; } - @SuppressWarnings("ConstantConditions") @Nonnull public Builder setStart(@Nonnegative long start) { Preconditions.checkArgument(start > -1, "Start " + start + " < 0"); @@ -236,7 +235,6 @@ public Builder setStart(@Nonnegative long start) { return this; } - @SuppressWarnings("ConstantConditions") @Nonnull public Builder setEnd(@Nonnegative long end) { Preconditions.checkArgument(end > -1, "End " + end + " < 0"); @@ -361,12 +359,12 @@ public Builder clearBlocks() { @Nonnull public Builder addBlock(@Nonnull BedBlock block) { Preconditions.checkArgument( - !m_blocks.isEmpty() || block.getStart() == 0, - "The first block starts at " + block.getStart() + " != 0" + !m_blocks.isEmpty() || block.start() == 0, + "The first block starts at " + block.start() + " != 0" ); for (BedBlock other : m_blocks) { Preconditions.checkArgument( - block.getStart() >= other.getEnd() || block.getEnd() <= other.getStart(), + block.start() >= other.end() || block.end() <= other.start(), "block " + other + " overlaps with block " + block ); } @@ -379,10 +377,11 @@ public Builder addBlock(@Nonnegative long start, @Nonnegative long end) { return addBlock(new BedBlock(start, end)); } - @Nonnull + @Override + @Nonnull public BedFeature build() { if (!m_blocks.isEmpty()) { - long blockEnd = m_blocks.get(m_blocks.size() - 1).getEnd(); + long blockEnd = m_blocks.get(m_blocks.size() - 1).end(); Preconditions.checkArgument( blockEnd == m_end - m_start, "Last block end " + blockEnd + " is not feature end " + (m_end - m_start) diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/model/package-info.java b/bed/src/main/java/org/pharmgkb/parsers/bed/model/package-info.java new file mode 100644 index 0000000..bb078b5 --- /dev/null +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.bed.model; diff --git a/bed/src/main/java/org/pharmgkb/parsers/bed/package-info.java b/bed/src/main/java/org/pharmgkb/parsers/bed/package-info.java new file mode 100644 index 0000000..90e6d4a --- /dev/null +++ b/bed/src/main/java/org/pharmgkb/parsers/bed/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.bed; diff --git a/bed/src/test/java/org/pharmgkb/parsers/bed/BedBlockTest.java b/bed/src/test/java/org/pharmgkb/parsers/bed/BedBlockTest.java index a37b9f3..2b0b950 100644 --- a/bed/src/test/java/org/pharmgkb/parsers/bed/BedBlockTest.java +++ b/bed/src/test/java/org/pharmgkb/parsers/bed/BedBlockTest.java @@ -3,8 +3,7 @@ import org.junit.jupiter.api.Test; import org.pharmgkb.parsers.bed.model.BedBlock; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link BedBlock}. @@ -15,9 +14,9 @@ public class BedBlockTest { @Test public void test() { BedBlock block = new BedBlock(5, 9); - assertEquals(5, block.getStart()); - assertEquals(9, block.getEnd()); - assertEquals(4, block.getLength()); + assertEquals(5, block.start()); + assertEquals(9, block.end()); + assertEquals(4, block.length()); assertEquals("5-9", block.toString()); } @@ -29,4 +28,4 @@ public void testEquals() { assertEquals(block1, block2); assertNotEquals(block1, block3); } -} \ No newline at end of file +} diff --git a/bed/src/test/java/org/pharmgkb/parsers/bed/BedFeatureTest.java b/bed/src/test/java/org/pharmgkb/parsers/bed/BedFeatureTest.java index 9ee7e52..aff1e01 100644 --- a/bed/src/test/java/org/pharmgkb/parsers/bed/BedFeatureTest.java +++ b/bed/src/test/java/org/pharmgkb/parsers/bed/BedFeatureTest.java @@ -22,12 +22,12 @@ public class BedFeatureTest { @Test public void testColor() { Optional color = new BedFeature.Builder("chr1", 1, 2) - .setColorFromString("2,3,4").build().getColor(); + .setColorFromString("2,3,4").build().color(); assertTrue(color.isPresent()); assertEquals(new Color(2, 3, 4), color.get()); assertFalse( new BedFeature.Builder("chr1", 1, 2) - .build().getColor().isPresent() + .build().color().isPresent() ); } @@ -99,7 +99,7 @@ public void testLargeScore() { @Test public void testNoBlocks() { assertTrue(new BedFeature.Builder("chr1", 0, 15).build() - .getBlocks().isEmpty()); + .blocks().isEmpty()); } @Test @@ -109,7 +109,7 @@ public void testBlocks() { assertEquals(Arrays.asList(block1, block2), new BedFeature.Builder("chr1", 0, 15) .addBlock(0, 8).addBlock(8, 15) - .build().getBlocks()); + .build().blocks()); } @Test @@ -158,11 +158,11 @@ public void testBadBlockEnd() { public void testRebuild() { BedFeature.Builder builder = new BedFeature.Builder("chr1", 0, 15).setScore(200); BedFeature one = builder.setScore(200).build(); - assertTrue(one.getScore().isPresent()); - assertEquals(200, (int)one.getScore().get()); + assertTrue(one.score().isPresent()); + assertEquals(200, (int)one.score().get()); BedFeature two = builder.setScore(500).build(); - assertTrue(two.getScore().isPresent()); - assertEquals(500, (int)two.getScore().get()); + assertTrue(two.score().isPresent()); + assertEquals(500, (int)two.score().get()); } @Test @@ -185,4 +185,4 @@ public void testCopyConstructorAlterBlocks() { assertEquals(one, two); } -} \ No newline at end of file +} diff --git a/bed/src/test/java/org/pharmgkb/parsers/bed/BedParserTest.java b/bed/src/test/java/org/pharmgkb/parsers/bed/BedParserTest.java index 7486f23..665a826 100644 --- a/bed/src/test/java/org/pharmgkb/parsers/bed/BedParserTest.java +++ b/bed/src/test/java/org/pharmgkb/parsers/bed/BedParserTest.java @@ -13,8 +13,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link BedParser}. @@ -25,7 +24,7 @@ public class BedParserTest { @Test public void testParse() throws Exception { Path file = Paths.get(getClass().getResource("bed1.bed").toURI()); - List features = Files.lines(file).map(new BedParser()).collect(Collectors.toList()); + List features = Files.lines(file).map(new BedParser()).toList(); assertEquals(3, features.size()); BedFeature first = new BedFeature.Builder("chr1", 0, 5).build(); BedFeature second = new BedFeature.Builder("chr2", 10, 20) @@ -70,4 +69,4 @@ public void testTransparentColor() { assertThrows(BadDataFormatException.class, () -> Stream.of(line).map(new BedParser()).collect(Collectors.toList())); } -} \ No newline at end of file +} diff --git a/bed/src/test/java/org/pharmgkb/parsers/bed/BedWriterTest.java b/bed/src/test/java/org/pharmgkb/parsers/bed/BedWriterTest.java index 05936b3..1d8483b 100644 --- a/bed/src/test/java/org/pharmgkb/parsers/bed/BedWriterTest.java +++ b/bed/src/test/java/org/pharmgkb/parsers/bed/BedWriterTest.java @@ -13,7 +13,7 @@ import java.util.List; import java.util.stream.Collectors; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** @@ -70,7 +70,7 @@ public void testWriteToFile() throws Exception { Path tmpFile = Files.createTempFile(getClass().getSimpleName(), ".bed"); BedWriter writer = new BedWriter(); writer.writeToFile(m_data, tmpFile); - List lines = Files.lines(tmpFile).collect(Collectors.toList()); + List lines = Files.lines(tmpFile).toList(); // TODO: Is this failing? // assertEquals(lines, m_expectedLines); } diff --git a/bgee/build.gradle b/bgee/build.gradle index 41bb3c5..75740b8 100644 --- a/bgee/build.gradle +++ b/bgee/build.gradle @@ -1,3 +1,3 @@ dependencies { - compile project(':core') + api project(':core') } diff --git a/bgee/lombok.config b/bgee/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/bgee/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParser.java b/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParser.java index 8290077..cb0949f 100644 --- a/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParser.java +++ b/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParser.java @@ -3,9 +3,8 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableMap; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.MultilineParser; import org.pharmgkb.parsers.bgee.model.BgeeExpression; -import org.pharmgkb.parsers.bgee.model.Quality; +import org.pharmgkb.parsers.bgee.model.BgeeQuality; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -26,7 +25,7 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class BgeeExpressionParser implements MultilineParser { +public class BgeeExpressionParser implements BgeeExpressionParserI { private static final Splitter sf_tab = Splitter.on("\t"); private static final Pattern sf_barSplitter = Pattern.compile("^\"|\"$"); @@ -75,7 +74,7 @@ public Stream apply(@Nonnull String line) throws BadDataFormatEx unq.apply("Gene ID"), unq.apply("Gene name"), unq.apply("Anatomical entity ID"), unq.apply("Anatomical entity name"), unq.apply("Developmental stage ID"), unq.apply("Developmental stage name"), - unq.apply("Expression").equals("present"), Quality.find(unq.apply("Call quality").replace(" quality", "")), + unq.apply("Expression").equals("present"), BgeeQuality.find(unq.apply("Call quality").replace(" quality", "")), new BigDecimal(unq.apply("Expression rank")), ImmutableMap.copyOf(info) )); } catch (NumberFormatException | NullPointerException | IndexOutOfBoundsException e) { diff --git a/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParserI.java b/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParserI.java new file mode 100644 index 0000000..65f6a49 --- /dev/null +++ b/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeExpressionParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.bgee; + +import org.pharmgkb.parsers.MultilineParser; +import org.pharmgkb.parsers.bgee.model.BgeeExpression; + +public interface BgeeExpressionParserI extends MultilineParser { + +} diff --git a/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeResource.java b/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeResource.java deleted file mode 100644 index f448aa5..0000000 --- a/bgee/src/main/java/org/pharmgkb/parsers/bgee/BgeeResource.java +++ /dev/null @@ -1,32 +0,0 @@ -package org.pharmgkb.parsers.bgee; - -import org.pharmgkb.parsers.WebResource; -import org.pharmgkb.parsers.model.CommonSpecies; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.nio.file.Path; -import java.util.Optional; - -/** - * @author Douglas Myers-Turnbull - */ -@Immutable -public class BgeeResource extends WebResource { - - protected BgeeResource(@Nonnull String url, @Nonnull Optional cachePath) { - super(url, true, cachePath); - } - - @Nonnull - public static BgeeResource ofSpecies(@Nonnull CommonSpecies species) { - return ofSpecies(species.getFormalName()); - } - - @Nonnull - public static BgeeResource ofSpecies(@Nonnull String speciesFormalName) { - String url = "ftp://ftp.bgee.org/current/download/calls/expr_calls/Danio_rerio_expr_simple_development.tsv.gz"; - return new BgeeResource(url, Optional.empty()); - } - -} diff --git a/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/BgeeExpression.java b/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/BgeeExpression.java index 80b3d00..52c9c30 100644 --- a/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/BgeeExpression.java +++ b/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/BgeeExpression.java @@ -2,9 +2,6 @@ import com.google.common.collect.ImmutableMap; -import javax.annotation.Nonnegative; -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; import java.math.BigDecimal; import java.util.Objects; @@ -12,97 +9,34 @@ /** * @author Douglas Myers-Turnbull */ -@Immutable -public class BgeeExpression { - - protected final String m_geneId; - protected final String m_geneName; - protected final String m_tissueId; - protected final String m_tissueName; - protected final String m_stageId; - protected final String m_stageName; - protected final boolean m_isExpressed; - protected final Quality m_quality; - protected final BigDecimal m_level; - protected final ImmutableMap m_fullInfo; - - public BgeeExpression(String geneId, String geneName, String tissueId, String tissueName, String stageId, String stageName, boolean isExpressed, Quality quality, BigDecimal level, ImmutableMap extendedInfo) { - this.m_geneId = geneId; - this.m_geneName = geneName; - this.m_tissueId = tissueId; - this.m_tissueName = tissueName; - this.m_stageId = stageId; - this.m_stageName = stageName; - this.m_isExpressed = isExpressed; - this.m_quality = quality; - this.m_level = level; - this.m_fullInfo = extendedInfo; - } - - @Nonnull - public String geneId() { - return m_geneId; - } - - @Nonnull - public String geneName() { - return m_geneName; - } - - @Nonnull - public String tissueId() { - return m_tissueId; - } - - @Nonnull - public String tissueName() { - return m_tissueName; - } - - @Nonnull - public String stageId() { - return m_stageId; - } - - @Nonnull - public String stageName() { - return m_stageName; - } - - public boolean isExpressed() { - return m_isExpressed; - } - - @Nonnull - public Quality getQuality() { - return m_quality; - } - - @Nonnegative - public BigDecimal getLevel() { - return m_level; - } - - @Nonnull - public ImmutableMap getFullInfo() { - return m_fullInfo; - } +public record BgeeExpression( + String geneId, + String geneName, + String tissueId, + String tissueName, + String stageId, + String stageName, + boolean isExpressed, + BgeeQuality quality, + BigDecimal level, + ImmutableMap fullInfo +) { @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - BgeeExpression that = (BgeeExpression) o; - return Objects.equals(m_fullInfo, that.m_fullInfo); + public boolean equals(Object obj) { + if (this == obj) return true; + if (obj == null || getClass() != obj.getClass()) return false; + BgeeExpression o = (BgeeExpression) obj; + return Objects.equals(fullInfo, o.fullInfo); } @Override public int hashCode() { - return Objects.hash(m_fullInfo); + return Objects.hash(fullInfo); } @Override public String toString() { - return BgeeExpression.class.getSimpleName() + "[" + m_fullInfo + "]"; + return BgeeExpression.class.getSimpleName() + "[" + fullInfo + "]"; } } diff --git a/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/Quality.java b/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/BgeeQuality.java similarity index 59% rename from bgee/src/main/java/org/pharmgkb/parsers/bgee/model/Quality.java rename to bgee/src/main/java/org/pharmgkb/parsers/bgee/model/BgeeQuality.java index a98639b..6d12544 100644 --- a/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/Quality.java +++ b/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/BgeeQuality.java @@ -5,12 +5,14 @@ /** * @author Douglas Myers-Turnbull */ -public enum Quality { +public enum BgeeQuality { - Silver, Gold, Unknown; + Silver, + Gold, + Unknown; - public static Quality find(String name) { - return Arrays.stream(Quality.values()) + public static BgeeQuality find(String name) { + return Arrays.stream(values()) .filter(e -> e.name().equalsIgnoreCase(name)).findAny() .orElse(Unknown); } diff --git a/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/package-info.java b/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/package-info.java new file mode 100644 index 0000000..1d7508f --- /dev/null +++ b/bgee/src/main/java/org/pharmgkb/parsers/bgee/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.bgee.model; diff --git a/bgee/src/main/java/org/pharmgkb/parsers/bgee/package-info.java b/bgee/src/main/java/org/pharmgkb/parsers/bgee/package-info.java new file mode 100644 index 0000000..00c1dc8 --- /dev/null +++ b/bgee/src/main/java/org/pharmgkb/parsers/bgee/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.bgee; diff --git a/bgee/src/test/java/org/pharmgkb/parsers/bgee/BgeeResourceTest.java b/bgee/src/test/java/org/pharmgkb/parsers/bgee/BgeeResourceTest.java deleted file mode 100644 index 43b0a27..0000000 --- a/bgee/src/test/java/org/pharmgkb/parsers/bgee/BgeeResourceTest.java +++ /dev/null @@ -1,16 +0,0 @@ -package org.pharmgkb.parsers.bgee; - -import org.junit.jupiter.api.Test; - -import java.io.IOException; - -public class BgeeResourceTest { - - @Test - public void ofSpecies() throws IOException { - //try (BufferedReader reader = BgeeResource.ofSpecies(CommonSpecies.Zebrafish).newReader()) { - // reader.lines().flatMap(new BgeeExpressionParser()).forEach(System.out::println); - //} - } - -} \ No newline at end of file diff --git a/build.gradle b/build.gradle index 3786e0b..6fa5652 100644 --- a/build.gradle +++ b/build.gradle @@ -1,31 +1,16 @@ plugins { id 'java-library' id 'maven-publish' - id "com.jfrog.bintray" version "1.+" id 'signing' - id 'io.freefair.lombok' version '5.3.0' } -bintray { - user = System.getenv('BINTRAY_USER') - key = System.getenv('BINTRAY_KEY') - pkg { - repo = 'generic' - name = 'bioio' - userOrg = 'dmyersturnbull' - licenses = ['MPL-2.0'] - vcsUrl = 'https://github.com/dmyersturnbull/bioio' - } -} - allprojects { apply plugin: 'java-library' - apply plugin: 'io.freefair.lombok' group = 'org.pharmgkb.parsers' - version = '0.3' + version = '0.3.0' java { - sourceCompatibility = JavaVersion.VERSION_14 - targetCompatibility = JavaVersion.VERSION_14 + sourceCompatibility = JavaVersion.VERSION_21 + targetCompatibility = JavaVersion.VERSION_21 } } @@ -34,13 +19,14 @@ subprojects { mavenCentral() } dependencies { - implementation 'com.google.guava:guava:30.1-jre' - implementation 'org.slf4j:slf4j-api:1.7.30' + implementation 'com.google.guava:guava:33.+' + implementation 'org.slf4j:slf4j-api:2.+' implementation 'com.google.code.findbugs:jsr305:3.0.2' - testImplementation(platform('org.junit:junit-bom:5.7.1')) - testImplementation('org.junit.jupiter:junit-jupiter') - testImplementation 'org.assertj:assertj-core:3.19.0' - testImplementation 'org.assertj:assertj-guava:3.4.0' + testImplementation(platform('org.junit:junit-bom:5.+')) + testImplementation('org.junit.jupiter:junit-jupiter:5.+') + testRuntimeOnly 'org.junit.platform:junit-platform-launcher' + testImplementation 'org.assertj:assertj-core:3.+' + testImplementation 'org.assertj:assertj-guava:3.+' } } @@ -53,7 +39,7 @@ publishing { } repositories { maven { - url = uri("${buildDir}/publishing-repository") + url = uri("${layout.buildDirectory}/publishing-repository") } } } diff --git a/chain/build.gradle b/chain/build.gradle index 1cf83f3..c9508c4 100644 --- a/chain/build.gradle +++ b/chain/build.gradle @@ -1,3 +1,3 @@ dependencies { - compile project(':core') -} \ No newline at end of file + api project(':core') +} diff --git a/chain/lombok.config b/chain/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/chain/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/chain/src/main/java/org/pharmgkb/parsers/chain/ChainResource.java b/chain/src/main/java/org/pharmgkb/parsers/chain/ChainResource.java deleted file mode 100644 index 46ab26b..0000000 --- a/chain/src/main/java/org/pharmgkb/parsers/chain/ChainResource.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.pharmgkb.parsers.chain; - -import org.pharmgkb.parsers.WebResource; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.nio.file.Path; -import java.util.Optional; - -/** - * A chain file from GoldenPath at UCSC. - */ -@Immutable -public class ChainResource extends WebResource { - - protected ChainResource(@Nonnull String url, @Nonnull Optional cachePath) { - super(url, true, cachePath); - } - - public static ChainResource of(@Nonnull String genome1, @Nonnull String genome2) { - // http://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToDasNov2.over.chain.gz - String url = "http://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/$1To$2.over.chain.gz" - .replace("$1", genome1).replace("$2", genome2); - return new ChainResource(url, Optional.empty()); - } -} diff --git a/chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParser.java b/chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParser.java index e983938..226abb7 100644 --- a/chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParser.java +++ b/chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParser.java @@ -2,12 +2,8 @@ import com.google.common.base.Preconditions; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineStructureParser; import org.pharmgkb.parsers.chain.model.GenomeChain; -import org.pharmgkb.parsers.model.ChromosomeName; -import org.pharmgkb.parsers.model.Locus; -import org.pharmgkb.parsers.model.LocusRange; -import org.pharmgkb.parsers.model.Strand; +import org.pharmgkb.parsers.model.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,7 +31,7 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe // note that this is still thread safe even though LineConsumer is not -public class GenomeChainParser implements LineStructureParser { +public class GenomeChainParser implements GenomeChainParserI { private static final long sf_logEvery = 10000; @@ -43,7 +39,7 @@ public class GenomeChainParser implements LineStructureParser { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0l); + private final AtomicLong m_lineNumber = new AtomicLong(0L); /** * @throws IllegalArgumentException If {@code stream} is parallel @@ -88,14 +84,14 @@ public void accept(@Nonnull String line) { if (line.startsWith("chain")) { - sourceChr = new ChromosomeName(parts[2]); - targetChr = new ChromosomeName(parts[7]); + sourceChr = new StandardChromosomeName(parts[2]); + targetChr = new StandardChromosomeName(parts[7]); sourcePosition = Long.parseLong(parts[5]); targetPosition = Long.parseLong(parts[10]); sourceEnd = Long.parseLong(parts[6]); targetEnd = Long.parseLong(parts[11]); - sourceStrand = Strand.lookupBySymbol(parts[4]); - targetStrand = Strand.lookupBySymbol(parts[9]); + sourceStrand = Strand.fromSymbol(parts[4]); + targetStrand = Strand.fromSymbol(parts[9]); //noinspection OptionalGetWithoutIsPresent sf_logger.trace( "\nCHAIN: {} ----> {}", @@ -107,8 +103,9 @@ public void accept(@Nonnull String line) { } else { int diagonal = Integer.parseInt(parts[0]); - int sourceGap = 0, targetGap = 0; - if (parts.length > 1) { + int sourceGap = 0; + int targetGap = 0; + if (parts.length > 1) { sourceGap = Integer.parseInt(parts[1]); targetGap = Integer.parseInt(parts[2]); } @@ -150,7 +147,7 @@ public void accept(@Nonnull String line) { throw new BadDataFormatException("Couldn't parse line #" + m_lineNumber, e); } catch (RuntimeException e) { // this is a little weird, but it's helpful - // not that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. + // note that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. e.addSuppressed(new RuntimeException("Unexpectedly failed to parse line " + m_lineNumber)); throw e; } diff --git a/chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParserI.java b/chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParserI.java new file mode 100644 index 0000000..554b7cf --- /dev/null +++ b/chain/src/main/java/org/pharmgkb/parsers/chain/GenomeChainParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.chain; + +import org.pharmgkb.parsers.LineStructureParser; +import org.pharmgkb.parsers.chain.model.GenomeChain; + +public interface GenomeChainParserI extends LineStructureParser { + +} diff --git a/chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChain.java b/chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChain.java index e6bcfc7..876ab9d 100644 --- a/chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChain.java +++ b/chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChain.java @@ -4,7 +4,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSortedMap; import org.pharmgkb.parsers.ObjectBuilder; -import org.pharmgkb.parsers.model.ChromosomeName; +import org.pharmgkb.parsers.model.StandardChromosomeName; import org.pharmgkb.parsers.model.Locus; import org.pharmgkb.parsers.model.LocusRange; import org.slf4j.Logger; @@ -15,7 +15,6 @@ import javax.annotation.concurrent.NotThreadSafe; import java.lang.invoke.MethodHandles; import java.util.*; -import java.util.function.Function; /** * A mapping between reference coordinate sets according tot he UCSC "chain format". @@ -24,33 +23,31 @@ * @author Douglas Myers-Turnbull */ @Immutable -public class GenomeChain implements Function> { +public class GenomeChain implements GenomeChainI { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); // only compares loci of the same chromosome and strand - private static final Comparator sf_comparator = Comparator.comparingLong(o -> o.getEnd().getPosition()); + private static final Comparator sf_comparator = Comparator.comparingLong(o -> o.end().position()); - private final ImmutableMap> m_map; + private final ImmutableMap> m_map; - private GenomeChain(@Nonnull Builder builder) { + protected GenomeChain(@Nonnull Builder builder) { this(builder.m_map); } - private GenomeChain(@Nonnull Map> mutableMap) { - Map> map = new HashMap<>(2048); - mutableMap.forEach((name, values) -> map.put(name, ImmutableSortedMap.copyOfSorted(values))); - m_map = ImmutableMap.copyOf(map); + private GenomeChain(@Nonnull Map> map) { + Map> newMap = new HashMap<>(2048); + map.forEach((name, values) -> newMap.put(name, ImmutableSortedMap.copyOfSorted(values))); + m_map = ImmutableMap.copyOf(newMap); } - /** - * @return A new GenomeChain with the source and target assemblies swapped - */ - @Nonnull + @Override + @Nonnull public GenomeChain invert() { - Map> map = new HashMap<>(m_map.size()); - for (Map.Entry> e : m_map.entrySet()) { - ChromosomeName chr = e.getKey(); + Map> map = new HashMap<>(m_map.size()); + for (Map.Entry> e : m_map.entrySet()) { + StandardChromosomeName chr = e.getKey(); map.put(chr, new TreeMap<>()); for (Map.Entry entry : e.getValue().entrySet()) { map.get(chr).put(entry.getValue(), entry.getKey()); @@ -59,25 +56,30 @@ public GenomeChain invert() { return new GenomeChain(map); } - @Nonnull + @Override + @Nonnull public Optional apply(@Nonnull Locus locus) { - NavigableMap list = m_map.get(locus.getChromosome()); + NavigableMap list = + m_map.get(new StandardChromosomeName(locus.chromosome().original())); if (list == null) return Optional.empty(); for (LocusRange range : list.navigableKeySet()) { if (range.contains(locus)) { final LocusRange targetRange = list.get(range); - final long delta = locus.getPosition() - range.getStart().getPosition(); - return Optional.of(new Locus( - targetRange.getChromosome(), - targetRange.getStart().getPosition() + delta, - targetRange.getStrand() - )); + Objects.requireNonNull(targetRange, () -> "Error getting range for " + locus); + final long delta = locus.position() - range.start().position(); + return Optional.of( + new Locus( + targetRange.chromosome(), + targetRange.start().position() + delta, + targetRange.strand() + ) + ); } - if (range.getEnd().getPosition() > locus.getPosition()) { + if (range.end().position() > locus.position()) { return Optional.empty(); } @@ -86,16 +88,27 @@ public Optional apply(@Nonnull Locus locus) { return Optional.empty(); } + @Override + public int nChromosomes() { + return m_map.size(); + } + + @Override + public int nLoci() { + return m_map.values().stream().mapToInt(ImmutableSortedMap::size).sum(); + } + @NotThreadSafe public static class Builder implements ObjectBuilder { - private Map> m_map = new HashMap<>(); + private final Map> m_map; - public Builder() { - - } + public Builder() { + m_map = new HashMap<>(14); + } public Builder(@Nonnull Builder builder) { + m_map = new HashMap<>(builder.m_map.size()); builder.m_map.forEach((chr, map) -> { NavigableMap newMap = new TreeMap<>(map); m_map.put(chr, newMap); @@ -103,6 +116,7 @@ public Builder(@Nonnull Builder builder) { } public Builder(@Nonnull GenomeChain chain) { + m_map = new HashMap<>(chain.nChromosomes()); chain.m_map.forEach((chr, map) -> { NavigableMap newMap = new TreeMap<>(map); m_map.put(chr, newMap); @@ -114,7 +128,7 @@ public Builder(@Nonnull GenomeChain chain) { */ @Nonnull public Builder remove(@Nonnull LocusRange source) { - ChromosomeName chr = source.getChromosome(); + var chr = new StandardChromosomeName(source.chromosome().original()); if (m_map.containsKey(chr)) { m_map.get(chr).remove(source); } @@ -132,54 +146,65 @@ public Builder add(@Nonnull LocusRange source, @Nonnull LocusRange target) { source + " has size " + sourceSize + " but " + target + " has size " + targetSize ); - final ChromosomeName sourceChr = source.getChromosome(); + final StandardChromosomeName sourceChr = new StandardChromosomeName(source.chromosome().original()); if (!m_map.containsKey(sourceChr)) { m_map.put(sourceChr, new TreeMap<>(sf_comparator)); } for (LocusRange r : m_map.get(sourceChr).keySet()) { // can only compare when the strands are the same - if (r.getStrand() == target.getStrand() && r.overlapsWith(source)) { + if (r.strand() == target.strand() && r.overlapsWith(source)) { throw new IllegalArgumentException("Source locus " + source + " overlaps with " + r); } } for (LocusRange r : m_map.get(sourceChr).values()) { // really, this should never happen // can only compare when the strands are the same - if (r.getStrand() == target.getStrand() && r.overlapsWith(target)) { + if (r.strand() == target.strand() && r.overlapsWith(target)) { throw new IllegalArgumentException("Target locus " + target + " overlaps with " + r); } } m_map.get(sourceChr).put(source, target); - return this; } - @Nonnull + @Override + @Nonnull public GenomeChain build() { return new GenomeChain(this); } } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - GenomeChain that = (GenomeChain) o; - return Objects.equals(m_map, that.m_map); - } - - @Override - public int hashCode() { - return Objects.hash(m_map); - } - - @Override + @Override + public final boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (null == obj) { + return false; + } + if (!(obj instanceof GenomeChain)) { + throw new IllegalArgumentException( + "Type " + + obj.getClass().getName() + + " is incompatible with " + + getClass().getName() + ); + } + final var o = (GenomeChain) obj; + return Objects.equals(m_map, o.m_map); + } + + public int hashCode() { + return Objects.hash(m_map); + } + + @Override public String toString() { return "GenomeChain{" + - "map=" + m_map + - '}'; + "map=" + m_map + + '}'; } } diff --git a/chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChainI.java b/chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChainI.java new file mode 100644 index 0000000..f22298a --- /dev/null +++ b/chain/src/main/java/org/pharmgkb/parsers/chain/model/GenomeChainI.java @@ -0,0 +1,20 @@ +package org.pharmgkb.parsers.chain.model; + +import org.pharmgkb.parsers.model.Locus; + +import javax.annotation.Nonnull; +import java.util.Optional; +import java.util.function.Function; + +public interface GenomeChainI extends Function> { + + /** + * @return A new GenomeChain with the source and target assemblies swapped + */ + @Nonnull + GenomeChain invert(); + + int nChromosomes(); + + int nLoci(); +} diff --git a/chain/src/main/java/org/pharmgkb/parsers/chain/model/package-info.java b/chain/src/main/java/org/pharmgkb/parsers/chain/model/package-info.java new file mode 100644 index 0000000..15912a7 --- /dev/null +++ b/chain/src/main/java/org/pharmgkb/parsers/chain/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.chain.model; diff --git a/chain/src/main/java/org/pharmgkb/parsers/chain/package-info.java b/chain/src/main/java/org/pharmgkb/parsers/chain/package-info.java new file mode 100644 index 0000000..a44cb39 --- /dev/null +++ b/chain/src/main/java/org/pharmgkb/parsers/chain/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.chain; diff --git a/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainParserTest.java b/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainParserTest.java index 5ae433f..ee23469 100644 --- a/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainParserTest.java +++ b/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainParserTest.java @@ -9,7 +9,7 @@ import java.nio.file.Paths; import java.util.Optional; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link GenomeChainParser}. @@ -45,4 +45,4 @@ public void testFromChainFile() throws Exception { assertEquals(Optional.of(new Locus("chr2", 200, Strand.PLUS)), f_); } -} \ No newline at end of file +} diff --git a/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainTest.java b/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainTest.java index 3d01c46..fedec15 100644 --- a/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainTest.java +++ b/chain/src/test/java/org/pharmgkb/parsers/chain/GenomeChainTest.java @@ -157,4 +157,4 @@ private void addToChain(GenomeChain.Builder chain, int sourceStart, int sourceSt LocusRange target = new LocusRange(new Locus("chr1", targetStart, Strand.PLUS), new Locus("chr1", targetStop, Strand.PLUS)); chain.add(source, target); } -} \ No newline at end of file +} diff --git a/core/lombok.config b/core/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/core/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/core/src/main/java/org/pharmgkb/parsers/LineParser.java b/core/src/main/java/org/pharmgkb/parsers/LineParser.java index f54691e..708da83 100644 --- a/core/src/main/java/org/pharmgkb/parsers/LineParser.java +++ b/core/src/main/java/org/pharmgkb/parsers/LineParser.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers; -import org.pharmgkb.parsers.utils.IoUtils; +import org.pharmgkb.parsers.io.IoUtils; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -25,7 +25,7 @@ default List collectAll(@Nonnull File file) throws UncheckedIOException, BadD @Nonnull default List collectAll(@Nonnull Path file) throws UncheckedIOException, BadDataFormatException { - return collectAll(IoUtils.readUtf8Lines(file)); + return collectAll(new IoUtils().readUtf8Lines(file)); } @Nonnull @@ -40,10 +40,11 @@ default Stream parseAll(@Nonnull File file) throws UncheckedIOException, BadD @Nonnull default Stream parseAll(@Nonnull Path file) throws UncheckedIOException, BadDataFormatException { - return parseAll(IoUtils.readUtf8Lines(file)); + return parseAll(new IoUtils().readUtf8Lines(file)); } /** + * Consider overriding this. * For example: * {@code * return stream.filter(s -> s.isEmpty() || s.startsWith("#")).map(this); @@ -52,7 +53,9 @@ default Stream parseAll(@Nonnull Path file) throws UncheckedIOException, BadD * @throws BadDataFormatException For most formatting errors */ @Nonnull - Stream parseAll(@Nonnull Stream stream) throws UncheckedIOException, BadDataFormatException; + default Stream parseAll(@Nonnull Stream stream) throws UncheckedIOException, BadDataFormatException { + return stream.map(this); + } @Nonnull @Override diff --git a/core/src/main/java/org/pharmgkb/parsers/LineStructureParser.java b/core/src/main/java/org/pharmgkb/parsers/LineStructureParser.java index 2a2ac24..3d5dd91 100644 --- a/core/src/main/java/org/pharmgkb/parsers/LineStructureParser.java +++ b/core/src/main/java/org/pharmgkb/parsers/LineStructureParser.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers; -import org.pharmgkb.parsers.utils.IoUtils; +import org.pharmgkb.parsers.io.IoUtils; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -23,7 +23,7 @@ default S parse(@Nonnull File file) throws UncheckedIOException, BadDataFormatEx @Nonnull default S parse(@Nonnull Path file) throws UncheckedIOException, BadDataFormatException { - return apply(IoUtils.readUtf8Lines(file)); + return apply(new IoUtils().readUtf8Lines(file)); } @Nonnull diff --git a/core/src/main/java/org/pharmgkb/parsers/LineStructureWriter.java b/core/src/main/java/org/pharmgkb/parsers/LineStructureWriter.java index 6bf3a38..5a321f5 100644 --- a/core/src/main/java/org/pharmgkb/parsers/LineStructureWriter.java +++ b/core/src/main/java/org/pharmgkb/parsers/LineStructureWriter.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers; -import org.pharmgkb.parsers.utils.IoUtils; +import org.pharmgkb.parsers.io.IoUtils; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -20,7 +20,7 @@ default void writeToFile(@Nonnull S structure, @Nonnull Path file) throws Unchec writeToFile(structure, file.toFile()); } default void writeToFile(@Nonnull S structure, @Nonnull File file) throws UncheckedIOException { - IoUtils.writeUtf8Lines(file, apply(structure)); + new IoUtils().writeUtf8Lines(file, apply(structure)); } /** diff --git a/core/src/main/java/org/pharmgkb/parsers/LineWriter.java b/core/src/main/java/org/pharmgkb/parsers/LineWriter.java index cd85417..e6a74eb 100644 --- a/core/src/main/java/org/pharmgkb/parsers/LineWriter.java +++ b/core/src/main/java/org/pharmgkb/parsers/LineWriter.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers; -import org.pharmgkb.parsers.utils.IoUtils; +import org.pharmgkb.parsers.io.IoUtils; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -29,7 +29,7 @@ default void appendToFile(@Nonnull Iterable lines, @Nonnull File file) throws appendToFile(StreamSupport.stream(lines.spliterator(), false), file); } default void appendToFile(@Nonnull Stream stream, @Nonnull File file) throws UncheckedIOException { - IoUtils.appendUtf8Lines(file.toPath(), stream.map(Object::toString)); + new IoUtils().appendUtf8Lines(file.toPath(), stream.map(Object::toString)); } default void writeToFile(@Nonnull Stream stream, @Nonnull Path file) throws UncheckedIOException { writeToFile(stream, file.toFile()); @@ -41,10 +41,10 @@ default void writeToFile(@Nonnull Iterable lines, @Nonnull File file) throws writeToFile(StreamSupport.stream(lines.spliterator(), false), file); } default void writeToFile(@Nonnull Stream stream, @Nonnull File file) throws UncheckedIOException { - IoUtils.writeUtf8Lines(file.toPath(), stream.map(Object::toString)); + new IoUtils().writeUtf8Lines(file.toPath(), stream.map(Object::toString)); } default void writeToFile(@Nonnull Collection lines, @Nonnull Path file) throws UncheckedIOException { - IoUtils.writeUtf8Lines(file, lines.stream().map(Object::toString)); + new IoUtils().writeUtf8Lines(file, lines.stream().map(Object::toString)); } /** diff --git a/core/src/main/java/org/pharmgkb/parsers/MultilineParser.java b/core/src/main/java/org/pharmgkb/parsers/MultilineParser.java index 8aeb76d..7572a1f 100644 --- a/core/src/main/java/org/pharmgkb/parsers/MultilineParser.java +++ b/core/src/main/java/org/pharmgkb/parsers/MultilineParser.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers; -import org.pharmgkb.parsers.utils.IoUtils; +import org.pharmgkb.parsers.io.IoUtils; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -26,7 +26,7 @@ default List collectAll(@Nonnull File file) throws UncheckedIOException, BadD @Nonnull default List collectAll(@Nonnull Path file) throws UncheckedIOException, BadDataFormatException { - return collectAll(IoUtils.readUtf8Lines(file)); + return collectAll(new IoUtils().readUtf8Lines(file)); } @Nonnull @@ -41,7 +41,7 @@ default Stream parseAll(@Nonnull File file) throws UncheckedIOException, BadD @Nonnull default Stream parseAll(@Nonnull Path file) throws UncheckedIOException, BadDataFormatException { - return parseAll(IoUtils.readUtf8Lines(file)); + return parseAll(new IoUtils().readUtf8Lines(file)); } /** diff --git a/core/src/main/java/org/pharmgkb/parsers/MultilineWriter.java b/core/src/main/java/org/pharmgkb/parsers/MultilineWriter.java new file mode 100644 index 0000000..a1b7df0 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/MultilineWriter.java @@ -0,0 +1,72 @@ +package org.pharmgkb.parsers; + +import org.pharmgkb.parsers.io.IoUtils; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.io.File; +import java.io.UncheckedIOException; +import java.nio.file.Path; +import java.util.Collection; +import java.util.function.Function; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +/** + * A parser that transforms one or more lines into one or more data structures. + * To be used with {@link Stream#flatMap(Function)}. + * @author Douglas Myers-Turnbull + */ +public interface MultilineWriter extends Function, Stream> { + + @Override + Stream apply(Stream stream); + + default void appendToFile(@Nonnull Stream stream, @Nonnull Path file) throws UncheckedIOException { + new IoUtils().appendUtf8Lines(file, apply(stream)); + } + default void appendToFile(@Nonnull Iterable lines, @Nonnull Path file) throws UncheckedIOException { + appendToFile(StreamSupport.stream(lines.spliterator(), false), file); + } + default void appendToFile(@Nonnull Iterable lines, @Nonnull File file) throws UncheckedIOException { + appendToFile(StreamSupport.stream(lines.spliterator(), false), file.toPath()); + } + default void appendToFile(@Nonnull Stream stream, @Nonnull File file) throws UncheckedIOException { + appendToFile(stream, file.toPath()); + } + default void appendToFile(@Nonnull Collection lines, @Nonnull File file) throws UncheckedIOException { + appendToFile(lines.stream(), file.toPath()); + } + default void appendToFile(@Nonnull Collection lines, @Nonnull Path file) throws UncheckedIOException { + appendToFile(lines.stream(), file); + } + default void writeToFile(@Nonnull Stream stream, @Nonnull Path file) throws UncheckedIOException { + new IoUtils().writeUtf8Lines(file, apply(stream)); + } + default void writeToFile(@Nonnull Iterable lines, @Nonnull Path file) throws UncheckedIOException { + writeToFile(StreamSupport.stream(lines.spliterator(), false), file); + } + default void writeToFile(@Nonnull Iterable lines, @Nonnull File file) throws UncheckedIOException { + writeToFile(StreamSupport.stream(lines.spliterator(), false), file.toPath()); + } + default void writeToFile(@Nonnull Stream stream, @Nonnull File file) throws UncheckedIOException { + writeToFile(stream, file.toPath()); + } + default void writeToFile(@Nonnull Collection lines, @Nonnull Path file) throws UncheckedIOException { + writeToFile(lines.stream(), file); + } + + /** + * Override this to add post- or pre- validation or processing. + */ + default @Nonnull Stream writeAll(@Nonnull Stream stream) { + return apply(stream); + } + + /** + * @return The total number of lines this writer processed since its creation + */ + @Nonnegative + long nLinesProcessed(); + +} diff --git a/core/src/main/java/org/pharmgkb/parsers/WebResource.java b/core/src/main/java/org/pharmgkb/parsers/WebResource.java deleted file mode 100644 index dbd374f..0000000 --- a/core/src/main/java/org/pharmgkb/parsers/WebResource.java +++ /dev/null @@ -1,89 +0,0 @@ -package org.pharmgkb.parsers; - -import org.pharmgkb.parsers.utils.HttpHeadResponse; -import org.pharmgkb.parsers.utils.IoUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.io.UncheckedIOException; -import java.lang.invoke.MethodHandles; -import java.net.URL; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Optional; -import java.util.stream.Stream; - -/** - * A text resource that can be downloaded from a URL. - * Works with either text or gzipped text. - */ -@Immutable -public class WebResource> { - - private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private final URL m_url; - private final boolean m_isGzip; - private final Path m_path; - - protected WebResource(@Nonnull String url, boolean isGzip, @Nonnull Optional cachePath) { - this(IoUtils.getUrl(url), isGzip, cachePath); - } - protected WebResource(@Nonnull URL url, boolean isGzip, @Nonnull Optional cachePath) { - Path path = cachePath.orElse(null); - if (path != null && isGzip && !path.endsWith(".gzip") && !path.endsWith(".gz")) { - sf_logger.warn("Modifying cache path {} to end with .gz", path); - m_path = Paths.get(path + ".gz"); - } else if (path != null && !isGzip && (path.endsWith(".gzip") || path.endsWith(".gz"))) { - sf_logger.warn("Modifying cache path {} to end with .txt", path); - m_path = Paths.get(path + ".txt"); - } else { - m_path = path; - } - m_url = url; - m_isGzip = isGzip; - } - - public URL getUrl() { - return m_url; - } - - @Nonnull - public Optional getCachePath() { - return Optional.ofNullable(m_path); - } - - public boolean hasCache() { - return m_path != null; - } - public boolean isCached() { - return m_path != null && m_path.toFile().exists() && m_path.toFile().length() > 0; - } - - @Nonnull - public WebResource cacheTo(@Nonnull Path path) { - return new WebResource<>(m_url, m_isGzip, Optional.ofNullable(path)); - } - - @Nonnull - public Stream readLines() throws UncheckedIOException { - if (hasCache()) { - if (!isCached()) { - IoUtils.downloadBytesTo(m_url, m_path); - } - return IoUtils.readUtf8Lines(m_path); - } else if (m_isGzip) { - return IoUtils.readGzipUtf8LinesFromUrl(m_url); - } else { - return IoUtils.readUtf8LinesFromUrl(m_url); - } - } - - @Nonnull - public HttpHeadResponse queryHead() { - return IoUtils.getHeadResponse(getUrl()); - } - -} diff --git a/core/src/main/java/org/pharmgkb/parsers/escape/BackslashEscaper.java b/core/src/main/java/org/pharmgkb/parsers/escape/BackslashEscaper.java index 1c24eee..f7a51d6 100644 --- a/core/src/main/java/org/pharmgkb/parsers/escape/BackslashEscaper.java +++ b/core/src/main/java/org/pharmgkb/parsers/escape/BackslashEscaper.java @@ -12,7 +12,7 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class BackslashEscaper extends IllegalCharacterEscaper { +public final class BackslashEscaper extends IllegalCharacterEscaper { private BackslashEscaper(@Nonnull Builder builder) { super(builder.m_inverse, ImmutableSet.copyOf(builder.m_chars)); @@ -41,4 +41,4 @@ public BackslashEscaper build() { } } -} \ No newline at end of file +} diff --git a/core/src/main/java/org/pharmgkb/parsers/escape/Rfc3986Escaper.java b/core/src/main/java/org/pharmgkb/parsers/escape/Rfc3986Escaper.java index be4c182..25b22da 100644 --- a/core/src/main/java/org/pharmgkb/parsers/escape/Rfc3986Escaper.java +++ b/core/src/main/java/org/pharmgkb/parsers/escape/Rfc3986Escaper.java @@ -25,7 +25,7 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class Rfc3986Escaper extends IllegalCharacterEscaper { +public final class Rfc3986Escaper extends IllegalCharacterEscaper { private static final Pattern sf_encodedPattern = Pattern.compile("%(?:\\d|[A-Fa-f]){2}"); @@ -64,4 +64,4 @@ public Rfc3986Escaper build() { } } -} \ No newline at end of file +} diff --git a/core/src/main/java/org/pharmgkb/parsers/escape/package-info.java b/core/src/main/java/org/pharmgkb/parsers/escape/package-info.java new file mode 100644 index 0000000..76c22b6 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/escape/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.escape; diff --git a/core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFile.java b/core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFile.java new file mode 100644 index 0000000..0dd744a --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFile.java @@ -0,0 +1,160 @@ +package org.pharmgkb.parsers.io; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; + +public class BufferedRandomAccessFile extends RandomAccessFile implements BufferedRandomAccessFileI { + + private static final int sf_defaultBufferSize = 8192; + private final byte[] buffer; + private int bufferEnd = 0; + private int bufferAt = 0; + private long realAt = 0L; + + private final int bufferSize; + + /** + * Creates a new instance of the BufferedRandomAccessFile. + * + * @param file The path of the file to open. + * @param mode Specifies the mode to use ("r", "rw", etc.) + * See the BufferedLineReader documentation for more info. + * @param bufferSize The buffer size (in bytes) to use. + * @throws IOException + */ + public BufferedRandomAccessFile(Path file, String mode, int bufferSize) throws IOException { + super(file.toString(), mode); + invalidate(); + this.bufferSize = bufferSize; + buffer = new byte[bufferSize]; + } + + public BufferedRandomAccessFile(String file, String mode, int bufferSize) throws IOException { + this(Paths.get(file), mode, bufferSize); + } + + public BufferedRandomAccessFile(Path file, String mode) throws IOException { + this(file, mode, sf_defaultBufferSize); + } + + public BufferedRandomAccessFile(String file, String mode) throws IOException { + this(Paths.get(file), mode, sf_defaultBufferSize); + } + + @Override + @Nullable + public final String fetchNextLine() throws IOException { + String str; + if (bufferEnd - bufferAt <= 0 && fillBuffer() < 0) { + return null; + } + int lineEnd = -1; // final position of the char considering \n + for (int i = bufferAt; i < bufferEnd; i++) { + if ('\n' == buffer[i]) { + lineEnd = i; + break; + } + // check for only '\r' as line end + if (0 < i - bufferAt && '\r' == buffer[i - 1]) { + lineEnd = i - 1; + break; + } + } + if (lineEnd < 0) { + var input = new StringBuilder(256); + int c; + int lastC = 0; + while (-1 != (c = read()) && '\n' != c && '\r' != lastC) { + input.append((char) c); + lastC = c; + } + if (-1 == c && input.isEmpty()) { + return null; + } + return input.toString(); + } + if (0 < lineEnd && '\n' == buffer[lineEnd] && '\r' == buffer[lineEnd - 1] && 0 <= lineEnd - bufferAt - 1) { + str = new String(buffer, bufferAt, lineEnd - bufferAt - 1, StandardCharsets.UTF_8); + } else { + str = new String(buffer, bufferAt, lineEnd - bufferAt, StandardCharsets.UTF_8); + } + bufferAt = lineEnd + 1; + return str; + } + + @Override + public int read(byte[] b, int offset, int len) throws IOException { + int delta = bufferEnd - bufferAt; + if (len <= delta) { + System.arraycopy(buffer, bufferAt, b, offset, len); + bufferAt += len; + return len; + } + for (int i = 0; i < len; i++) { + int c = read(); + if (-1 == c) { + return i; + } + b[offset + i] = (byte) c; + } + return len; + } + + @Override + public final int read() throws IOException { + if (0 == bufferEnd) { + return -1; + } + if (bufferAt >= bufferEnd && 0 > fillBuffer()) { + return -1; + } + return buffer[bufferAt++]; + } + + @Override + public long getFilePointer() { + return realAt - bufferEnd + bufferAt; + } + + @Override + public void seek(long pos) throws IOException { + int n = (int) (realAt - pos); + if (n >= 0 && n <= bufferEnd) { + bufferAt = bufferEnd - n; + } else { + super.seek(pos); + invalidate(); + } + } + + /** + * Reads the next BUF_SIZE bytes into the internal buffer. + * + * @throws IOException + */ + private int fillBuffer() throws IOException { + int n = super.read(buffer, 0, bufferSize); + if (-1 < n) { + realAt += (long)n; + bufferEnd = n; + bufferAt = 0; + } + return n; + } + + /** + * Clears the buffer. + * + * @throws IOException + */ + private void invalidate() throws IOException { + bufferEnd = 0; + bufferAt = 0; + realAt = super.getFilePointer(); + } + +} diff --git a/core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFileI.java b/core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFileI.java new file mode 100644 index 0000000..04ceb86 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/BufferedRandomAccessFileI.java @@ -0,0 +1,50 @@ +package org.pharmgkb.parsers.io; + +import javax.annotation.Nullable; +import java.io.Closeable; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +public interface BufferedRandomAccessFileI extends DataOutput, DataInput, Closeable { + + /** + * Reads one byte form the current position + * + * @return The read byte or -1 in case the end was reached. + */ + int read() throws IOException; + + /** + * Reads the set number of bytes into the passed buffer. + * + * @param b The buffer to read the bytes into. + * @param off Byte offset within the file to start reading from + * @param len Number of bytes to read into the buffer. + * @return Number of bytes read. + */ + int read(byte[] b, int off, int len) throws IOException; + + /** + * Moves the internal pointer to the passed (byte) position in the file. + * + * @param pos The byte position to move to. + */ + void seek(long pos) throws IOException; + + /** + * Returns the current position of the pointer in the file. + * + * @return The byte position of the pointer in the file. + */ + long getFilePointer(); + + /** + * Returns the next line from the file. In case no data could be loaded (generally as the end of the file was + * reached) null is returned. + * + * @return The next string on the file or null in case the end of the file was reached + */ + @Nullable + String fetchNextLine() throws IOException; +} diff --git a/core/src/main/java/org/pharmgkb/parsers/io/CompressionFormat.java b/core/src/main/java/org/pharmgkb/parsers/io/CompressionFormat.java new file mode 100644 index 0000000..904cabd --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/CompressionFormat.java @@ -0,0 +1,26 @@ +package org.pharmgkb.parsers.io; + +import java.util.List; + +/** + * A compression format that bioio can handle. + */ +public enum CompressionFormat { + NONE(List.of("")), + GZIP(List.of(".gz", ".gzip")) + ; + + private final List suffixes; + + CompressionFormat(List suffixes) { + this.suffixes = suffixes; + } + + public String suffix() { + return suffixes.get(0); + } + + public List suffixes() { + return suffixes; + } +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/HttpHeadResponse.java b/core/src/main/java/org/pharmgkb/parsers/io/HttpHeadResponse.java similarity index 74% rename from core/src/main/java/org/pharmgkb/parsers/utils/HttpHeadResponse.java rename to core/src/main/java/org/pharmgkb/parsers/io/HttpHeadResponse.java index 8c73df1..a6f74f1 100644 --- a/core/src/main/java/org/pharmgkb/parsers/utils/HttpHeadResponse.java +++ b/core/src/main/java/org/pharmgkb/parsers/io/HttpHeadResponse.java @@ -1,6 +1,7 @@ -package org.pharmgkb.parsers.utils; +package org.pharmgkb.parsers.io; import com.google.common.collect.ImmutableMap; +import org.pharmgkb.parsers.utils.Try; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -25,7 +26,7 @@ * } */ @Immutable -public class HttpHeadResponse { +public class HttpHeadResponse implements HttpHeadResponseI { private final URL m_url; private final int m_code; @@ -59,57 +60,67 @@ public HttpHeadResponse( m_headers = Map.copyOf(headers); } - @Nonnegative - public int getCode() { + @Override + @Nonnegative + public int code() { return m_code; } - @Nonnull - public String getMessage() { + @Override + @Nonnull + public String message() { return m_message; } - @Nonnull - public URL getUrl() { + @Override + @Nonnull + public URL url() { return m_url; } - @Nonnull - public Optional getContentLength() throws InvalidResponseException { - return getSingle("content-length").map(r -> + @Override + @Nonnull + public Optional contentLength() throws InvalidResponseException { + return getSingleHeader("content-length").map(r -> Try.succeed(r, NumberFormatException.class) .compose(Long::parseLong) .require(v -> v < 0) .orElseThrow(new InvalidResponseException("Failed to parse content-length " + r))); } - @Nonnull - public Optional getExpiration() { + @Override + @Nonnull + public Optional expiration() { return getDatetime("expires"); } - @Nonnull - public Optional getDate() { + @Override + @Nonnull + public Optional date() { return getDatetime("date"); } - @Nonnull - public Optional getLastModified() { + @Override + @Nonnull + public Optional lastModified() { return getDatetime("last-modified"); } - @Nonnull - public Optional getContentType() { - return getSingle("content-type"); + @Override + @Nonnull + public Optional contentType() { + return getSingleHeader("content-type"); } - @Nonnull - public Optional getContentEncoding() { - return getSingle("content-encoding"); + @Override + @Nonnull + public Optional contentEncoding() { + return getSingleHeader("content-encoding"); } - @Nonnull - public Optional getSingle(@Nonnull String field) throws InvalidResponseException { + @Override + @Nonnull + public Optional getSingleHeader(@Nonnull String field) throws InvalidResponseException { List values = m_headers.get(field); if (values.isEmpty()) { return Optional.empty(); @@ -126,15 +137,16 @@ private Optional getDatetime(@Nonnull String field) throws Invali Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format */ - return getSingle("last-modified").map(z -> Try. + return getSingleHeader("last-modified").map(z -> Try. attempt(() -> ZonedDateTime.parse(z, DateTimeFormatter.ofPattern("EEE, dd MMM yyyy HH:mm:ss zzz")), DateTimeParseException.class) .recover(() -> ZonedDateTime.parse(z, DateTimeFormatter.ofPattern("EEEE, dd-MMM-yy HH:mm:ss zzz"))) .recover(() -> ZonedDateTime.parse(z + " GMT", DateTimeFormatter.ofPattern("EEE, MMM dd HH:mm:ss yyyy"))) .orElseThrow(new InvalidResponseException("Invalid date format " + z))); } - @Nonnull - public ImmutableMap> getHeaders() { + @Override + @Nonnull + public ImmutableMap> headers() { return ImmutableMap.copyOf(m_headers); } } diff --git a/core/src/main/java/org/pharmgkb/parsers/io/HttpHeadResponseI.java b/core/src/main/java/org/pharmgkb/parsers/io/HttpHeadResponseI.java new file mode 100644 index 0000000..98f5088 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/HttpHeadResponseI.java @@ -0,0 +1,46 @@ +package org.pharmgkb.parsers.io; + +import com.google.common.collect.ImmutableMap; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.net.URL; +import java.time.ZonedDateTime; +import java.util.List; +import java.util.Optional; + +public interface HttpHeadResponseI { + + @Nonnull + Optional getSingleHeader(@Nonnull String field) throws InvalidResponseException; + + @Nonnegative + int code(); + + @Nonnull + String message(); + + @Nonnull + URL url(); + + @Nonnull + Optional contentLength() throws InvalidResponseException; + + @Nonnull + Optional expiration(); + + @Nonnull + Optional date(); + + @Nonnull + Optional lastModified(); + + @Nonnull + Optional contentType(); + + @Nonnull + Optional contentEncoding(); + + @Nonnull + ImmutableMap> headers(); +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/InvalidResponseException.java b/core/src/main/java/org/pharmgkb/parsers/io/InvalidResponseException.java similarity index 55% rename from core/src/main/java/org/pharmgkb/parsers/utils/InvalidResponseException.java rename to core/src/main/java/org/pharmgkb/parsers/io/InvalidResponseException.java index 53bc613..7c84fb3 100644 --- a/core/src/main/java/org/pharmgkb/parsers/utils/InvalidResponseException.java +++ b/core/src/main/java/org/pharmgkb/parsers/io/InvalidResponseException.java @@ -1,7 +1,6 @@ -package org.pharmgkb.parsers.utils; +package org.pharmgkb.parsers.io; public class InvalidResponseException extends RuntimeException { - public InvalidResponseException() { } public InvalidResponseException(String message) { super(message); @@ -14,8 +13,4 @@ public InvalidResponseException(String message, Throwable cause) { public InvalidResponseException(Throwable cause) { super(cause); } - - public InvalidResponseException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } } diff --git a/core/src/main/java/org/pharmgkb/parsers/io/IoUtils.java b/core/src/main/java/org/pharmgkb/parsers/io/IoUtils.java new file mode 100644 index 0000000..bf51994 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/IoUtils.java @@ -0,0 +1,196 @@ +package org.pharmgkb.parsers.io; + +import javax.annotation.Nonnull; +import java.io.*; +import java.net.*; +import java.nio.channels.Channels; +import java.nio.channels.FileChannel; +import java.nio.channels.ReadableByteChannel; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.stream.Stream; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +/** + * IO utilities. + * Use {@link IoUtils#readUtf8Lines}, {@link IoUtils#writeUtf8Lines(Path, Stream)}, and similar methods + * to read/write text or GZIP, depending on whether the filename extension ends with '.gz' or '.gzip'. + */ +public class IoUtils { + + @Nonnull + public HttpHeadResponse getHeadResponse(@Nonnull String url) throws UncheckedIOException, InvalidResponseException { + return getHeadResponse(getUrl(url)); + } + + @Nonnull + public HttpHeadResponse getHeadResponse(@Nonnull URL url) throws UncheckedIOException, InvalidResponseException { + try { + HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setRequestMethod("HEAD"); + huc.connect(); + int code = huc.getResponseCode(); + if (code < 400 || code >= 500) { + throw new IOException("Response code is " + huc.getResponseCode()); + } + return HttpHeadResponse.fromConnection(huc); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Downloads bytes. + * Works up to about 9 exabytes. + * @param url From + * @param path To + * @throws UncheckedIOException On any IO error + */ + public void downloadBytesTo(@Nonnull URL url, @Nonnull Path path) throws UncheckedIOException { + try ( + ReadableByteChannel in = Channels.newChannel(url.openStream()); + FileOutputStream out = new FileOutputStream(path.toFile()); + FileChannel channel = out.getChannel() + ) { + channel.transferFrom(in, 0, Long.MAX_VALUE); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public void downloadUtf8To(@Nonnull URL url, @Nonnull Path path) throws UncheckedIOException { + // doesn't handle gzip input + Stream stream = readUtf8LinesFromUrl(url); + writeUtf8Lines(path, stream); + } + + @Nonnull + public Stream readGzipUtf8LinesFromUrl(@Nonnull URL url) throws UncheckedIOException { + try (QuietBufferedReader br = openGzipUtf8ReaderFromUrl(url)) { + Stream stream = br.streamLinesQuietly(); + return br.streamLinesQuietly(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Nonnull + public Stream readUtf8LinesFromUrl(@Nonnull URL url) throws UncheckedIOException { + try (QuietBufferedReader br = openUtf8ReaderFromUrl(url)) { + Stream stream = br.streamLinesQuietly(); + return br.streamLinesQuietly(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Nonnull + public QuietBufferedReader openGzipUtf8ReaderFromUrl(@Nonnull String url) throws UncheckedIOException { + return openGzipUtf8ReaderFromUrl(getUrl(url)); + } + + @Nonnull + public QuietBufferedReader openUtf8ReaderFromUrl(@Nonnull String url) throws UncheckedIOException { + return openUtf8ReaderFromUrl(getUrl(url)); + } + + @Nonnull + public QuietBufferedReader openGzipUtf8ReaderFromUrl(@Nonnull URL url) throws UncheckedIOException { + try { + return new QuietBufferedReader(new InputStreamReader(new GZIPInputStream(url.openStream()))); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Nonnull + public QuietBufferedReader openUtf8ReaderFromUrl(@Nonnull URL url) throws UncheckedIOException { + try { + return new QuietBufferedReader(new InputStreamReader(url.openStream())); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Nonnull + public URL getUrl(@Nonnull String url) throws UncheckedIOException { + try { + return new URI(url).toURL(); + } catch (MalformedURLException e) { + throw new UncheckedIOException(e); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + @Nonnull + public Stream readUtf8Lines(@Nonnull Path path) throws UncheckedIOException { + try (QuietBufferedReader br = openUtf8Reader(path)) { + return br.streamLinesQuietly(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public void writeUtf8Lines(@Nonnull Path path, @Nonnull Stream lines) throws UncheckedIOException { + try (PrintWriter pw = openUtf8Writer(path, false)) { + lines.forEach(pw::println); + } + } + + public void appendUtf8Lines(@Nonnull Path path, @Nonnull Stream lines) throws UncheckedIOException { + try (PrintWriter pw = openUtf8Writer(path, true)) { + lines.forEach(pw::println); + } + } + + @Nonnull + public PrintWriter openUtf8Writer(@Nonnull Path path, boolean append) throws UncheckedIOException { + Charset encoding = StandardCharsets.UTF_8; + try { + if (path.endsWith(".gz") || path.endsWith(".gzip")) { + return new PrintWriter(new BufferedWriter(new OutputStreamWriter( + new GZIPOutputStream(new FileOutputStream(path.toFile(), append)), encoding + )), true); + } else { + return new PrintWriter(new BufferedWriter(new FileWriter(path.toString(), encoding, false)), true); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Nonnull + public QuietBufferedReader openUtf8Reader(@Nonnull Path path) throws UncheckedIOException { + Charset encoding = StandardCharsets.UTF_8; + try { + if (path.endsWith(".gz") || path.endsWith(".gzip")) { + return new QuietBufferedReader(new InputStreamReader( + new GZIPInputStream(new FileInputStream(path.toFile())), + encoding + )); + } else { + return new QuietBufferedReader(new InputStreamReader(new FileInputStream(path.toFile()), encoding)); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public void writeUtf8Lines(@Nonnull File file, @Nonnull Stream lines) throws UncheckedIOException { + writeUtf8Lines(file.toPath(), lines); + } + + @Nonnull + public PrintWriter openUtf8Writer(@Nonnull File file, boolean append) throws UncheckedIOException { + return openUtf8Writer(file.toPath(), append); + } + + @Nonnull + public QuietBufferedReader openUtf8Reader(@Nonnull File file) throws UncheckedIOException { + return openUtf8Reader(file.toPath()); + } + +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/QuietBufferedReader.java b/core/src/main/java/org/pharmgkb/parsers/io/QuietBufferedReader.java similarity index 89% rename from core/src/main/java/org/pharmgkb/parsers/utils/QuietBufferedReader.java rename to core/src/main/java/org/pharmgkb/parsers/io/QuietBufferedReader.java index 82f1bdc..a222667 100644 --- a/core/src/main/java/org/pharmgkb/parsers/utils/QuietBufferedReader.java +++ b/core/src/main/java/org/pharmgkb/parsers/io/QuietBufferedReader.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.utils; +package org.pharmgkb.parsers.io; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -9,6 +9,10 @@ import java.util.stream.Stream; +/** + * A {@link BufferedReader} with extra methods that throw {@link UncheckedIOException} instead of {@link IOException}. + * @author Douglas Myers-Turnbull + */ public class QuietBufferedReader extends BufferedReader { public static final int DEFAULT_BUFFER_SIZE = 8192; diff --git a/core/src/main/java/org/pharmgkb/parsers/io/WebResource.java b/core/src/main/java/org/pharmgkb/parsers/io/WebResource.java new file mode 100644 index 0000000..097e547 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/WebResource.java @@ -0,0 +1,122 @@ +package org.pharmgkb.parsers.io; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; +import java.io.UncheckedIOException; +import java.lang.invoke.MethodHandles; +import java.net.URL; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Objects; +import java.util.Optional; +import java.util.StringJoiner; +import java.util.stream.Stream; + +/** + * A text resource that can be downloaded from a URL. + * Works with either text or gzipped text. + * @author Douglas Myers-Turnbull + */ +@Immutable +public class WebResource> implements WebResourceI { + + private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final URL m_url; + private final CompressionFormat m_compression; + private final Path m_path; + + protected WebResource(@Nonnull String url, CompressionFormat compressionFormat, @Nonnull Optional cachePath) { + this(ioUtils().getUrl(url), compressionFormat, cachePath); + } + protected WebResource(@Nonnull URL url, CompressionFormat compressionFormat, @Nonnull Optional cachePath) { + Path path = cachePath.orElse(null); + if ( + path != null + && compressionFormat != CompressionFormat.NONE + && compressionFormat.suffixes().stream().noneMatch(path::endsWith) + ) { + sf_logger.warn("Modifying cache path {} to end with {}", path, compressionFormat.suffix()); + m_path = Paths.get(path + compressionFormat.suffix()); + } else { + m_path = path; + } + m_url = url; + m_compression = compressionFormat; + } + + protected static IoUtils ioUtils() { + return new IoUtils(); + } + + @Override + public URL url() { + return m_url; + } + + @Nonnull + public Optional cachePath() { + return Optional.ofNullable(m_path); + } + @Override + public boolean isCached() { + return m_path != null && m_path.toFile().exists() && m_path.toFile().length() > 0; + } + + @Override + @Nonnull + public WebResourceI cachingTo(@Nonnull Path path) { + return new WebResource<>(m_url, m_compression, Optional.of(path)); + } + + @Override + @Nonnull + public Stream get() throws UncheckedIOException { + if (cachePath().isPresent()) { + if (!isCached()) { + ioUtils().downloadBytesTo(m_url, m_path); + } + return ioUtils().readUtf8Lines(m_path); + } + return switch (m_compression) { + case GZIP -> ioUtils().readGzipUtf8LinesFromUrl(m_url); + case NONE -> ioUtils().readUtf8LinesFromUrl(m_url); + }; + } + + @Override + @Nonnull + public HttpHeadResponse queryHead() { + return ioUtils().getHeadResponse(url()); + } + + @Override + public String toString() { + return new StringJoiner(", ", WebResource.class.getSimpleName() + "[", "]") + .add("url=" + m_url) + .add("compression=" + m_compression) + .add("path=" + m_path) + .toString(); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (null == obj || getClass() != obj.getClass()) { + return false; + } + final var o = (WebResourceI) obj; + return m_url == o.url() && m_path == o.cachePath().orElse(null); + } + + @Override + public int hashCode() { + return Objects.hash(m_url, m_path); + } + +} diff --git a/core/src/main/java/org/pharmgkb/parsers/io/WebResourceI.java b/core/src/main/java/org/pharmgkb/parsers/io/WebResourceI.java new file mode 100644 index 0000000..8ee61b7 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/WebResourceI.java @@ -0,0 +1,31 @@ +package org.pharmgkb.parsers.io; + +import javax.annotation.Nonnull; +import java.io.UncheckedIOException; +import java.net.URL; +import java.nio.file.Path; +import java.util.Optional; +import java.util.function.Supplier; +import java.util.stream.Stream; + +/** + * A text resource that can be downloaded from a URL. + * @author Douglas Myers-Turnbull + * @param + */ +public interface WebResourceI> extends Supplier> { + URL url(); + + boolean isCached(); + + Optional cachePath(); + + @Nonnull + WebResourceI cachingTo(@Nonnull Path path); + + @Nonnull + Stream get() throws UncheckedIOException; + + @Nonnull + HttpHeadResponse queryHead(); +} diff --git a/core/src/main/java/org/pharmgkb/parsers/io/package-info.java b/core/src/main/java/org/pharmgkb/parsers/io/package-info.java new file mode 100644 index 0000000..8526713 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/io/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.io; diff --git a/core/src/main/java/org/pharmgkb/parsers/model/AminoAcidCode.java b/core/src/main/java/org/pharmgkb/parsers/model/AminoAcidCode.java index 1628d63..2cf71bd 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/AminoAcidCode.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/AminoAcidCode.java @@ -1,8 +1,7 @@ package org.pharmgkb.parsers.model; -import java.util.HashMap; -import java.util.Map; -import java.util.NoSuchElementException; +import java.util.*; +import java.util.stream.Collectors; public enum AminoAcidCode { @@ -37,25 +36,19 @@ public enum AminoAcidCode { public final String name; public final CodeType type; - @SuppressWarnings("ParameterHidesMemberVariable") AminoAcidCode(char character, String name, CodeType type) { this.character = character; this.name = name; this.type = type; } - private static Map sf_lookup = new HashMap<>(AminoAcidCode.values().length); - static { - for (AminoAcidCode aac : AminoAcidCode.values()) { - sf_lookup.put(aac.character, aac); - } - } + private static final Map sf_lookup = + Arrays.stream(values()) + .map(aac -> new AbstractMap.SimpleEntry<>(aac.character, aac)) + .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); public static AminoAcidCode fromChar(char character) { - if (sf_lookup.containsKey(character)) { - return sf_lookup.get(character); - } else { - throw new NoSuchElementException("No amino acid code " + character); - } + return Optional.ofNullable(sf_lookup.get(character)) + .orElseThrow(() -> new NoSuchElementException("No amino acid code " + character)); } } diff --git a/core/src/main/java/org/pharmgkb/parsers/model/AtomicElement.java b/core/src/main/java/org/pharmgkb/parsers/model/AtomicElement.java new file mode 100644 index 0000000..5d46f58 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/model/AtomicElement.java @@ -0,0 +1,52 @@ +package org.pharmgkb.parsers.model; + +public enum AtomicElement { + + Hydrogen("hydrogen", "H", 1), + Carbon("carbon", "C", 6), + Nitrogen("nitrogen", "N", 7), + Oxygen("oxygen", "O", 8), + Fluorine("fluorine", "F", 9), + Sodium("sodium", "Na", 11), + Magnesium("magnesium", "Mg", 12), + Phosphorus("phosphorus", "P", 15), + Sulfur("sulfur", "S", 16), + Chlorine("chlorine", "Cl", 17), + Potassium("potassium", "K", 19), + Calcium("calcium", "Ca", 20), + Manganese("manganese", "Mn", 25), + Iron("iron", "Fe", 26), + Cobalt("cobolt", "Co", 27), + Copper("copper", "Cu", 29), + Zinc("zinc", "Zn", 30), + Selenium("selenium", "Se", 34), + Bromine("bromine", "Br", 35), + Iodine("iodine", "I", 53), + Molybdenum("molybdenum", "Mo", 42), + Mercury("mercury", "Hg", 80), + + ; + + private final String name; + private final String symbol; + private final int atomicNumber; + + + AtomicElement(String name, String symbol, int atomicNumber) { + this.name = name; + this.symbol = symbol; + this.atomicNumber = atomicNumber; + } + + public String word() { + return name; + } + + public String symbol() { + return symbol; + } + + public int atomicNumber() { + return atomicNumber; + } +} diff --git a/core/src/main/java/org/pharmgkb/parsers/model/ChromosomeName.java b/core/src/main/java/org/pharmgkb/parsers/model/ChromosomeName.java index 2055c0e..d944a71 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/ChromosomeName.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/ChromosomeName.java @@ -1,124 +1,26 @@ package org.pharmgkb.parsers.model; -import com.google.common.base.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.lang.invoke.MethodHandles; -import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Attempts to ensure consistency in chromosome names. - * @author Douglas Myers-Turnbull - */ -@Immutable -public class ChromosomeName implements Comparable { - - private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static final Pattern sf_pattern = Pattern.compile("^(?:chr)?(\\d{1,2}|X|Y|M|(?:MT))(_[A-Z]+[0-9]+v\\d+?(?:_(?:random)|(?:alt)))?$"); - - private final String m_originalName; - private final String m_name; - - @Nonnull - public static ChromosomeName ucscWithFailure(@Nonnull String name) { - Function standardizer = s -> { - Matcher matcher = sf_pattern.matcher(s); - Preconditions.checkArgument(matcher.matches(), "Chromosome name " + s + " does not match the required pattern"); - Preconditions.checkArgument(sf_pattern.matcher(s).matches(), "Chromosome name " + s + " is not standardized"); - String chrName = matcher.group(1).equals("MT")? "M" : matcher.group(1); // dbSNP and Ensembl use this, but neither GRC nor UCSC do - return "chr" + chrName + (matcher.group(2)==null? "" : matcher.group(2)); - }; - return standardized(name, standardizer); - } - - @Nonnull - public static ChromosomeName ucscWithWarning(@Nonnull String name) { - Function standardizer = s -> { - Matcher matcher = sf_pattern.matcher(s); - if (matcher.matches()) { - String chrName = matcher.group(1).equals("MT")? "M" : matcher.group(1); // dbSNP and Ensembl use this, but neither GRC nor UCSC do - return "chr" + chrName + (matcher.group(2)==null? "" : matcher.group(2)); - } else { - sf_logger.warn("Chromosome name {} is not standardized", s); - return s; - } - }; - return standardized(name, standardizer); - } - - @Nonnull - public static ChromosomeName standardized(@Nonnull String name, @Nonnull Function standardizer) { - return new ChromosomeName(name, standardizer.apply(name)); - } - - public ChromosomeName(@Nonnull String name) { - this(name, name); - } - - private ChromosomeName(@Nonnull String originalName, @Nonnull String standardizedName) { - Preconditions.checkNotNull(originalName); - Preconditions.checkNotNull(standardizedName); - m_originalName = originalName; - m_name = standardizedName; - } - - @Nonnull - public String getOriginalName() { - return m_originalName; - } - - @Nonnull - @Override - public String toString() { - return m_name; - } - - // TODO these aren't guaranteed to work for every convention - public boolean isMitochondial() { - return m_name.equals("chrM") || m_name.equals("M") || m_name.equals("MT") || m_name.equals("chrMT"); - } +public interface ChromosomeName extends Comparable { - public boolean isX() { - return m_name.equals("chrX") || m_name.equals("X"); - } + @Nonnull + default String original() { + return toString(); + } - public boolean isY() { - return m_name.equals("chrY") || m_name.equals("Y"); - } + boolean isMitochondial(); - public boolean isNonstandard() { - return m_name.contains("_"); - } + boolean isX(); - public boolean isAlt() { - return m_name.endsWith("alt"); - } + boolean isY(); - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - ChromosomeName that = (ChromosomeName) o; - return m_name.equals(that.m_name); - } + boolean isNonstandard(); - @Override - public int hashCode() { - return m_name.hashCode(); - } + boolean isAlt(); - @Override - public int compareTo(@Nonnull ChromosomeName o) { - return m_name.compareTo(o.m_name); - } + @Override + default int compareTo(@Nonnull ChromosomeName o) { + return toString().compareTo(o.toString()); + } } diff --git a/core/src/main/java/org/pharmgkb/parsers/model/CodeType.java b/core/src/main/java/org/pharmgkb/parsers/model/CodeType.java index 028477d..a4ff11f 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/CodeType.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/CodeType.java @@ -1,5 +1,10 @@ package org.pharmgkb.parsers.model; public enum CodeType { - CONCRETE, INEXACT, WILDCARD, GAP, STOP + CONCRETE, + INEXACT, + WILDCARD, + GAP, + STOP + ; } diff --git a/core/src/main/java/org/pharmgkb/parsers/model/CommonSpecies.java b/core/src/main/java/org/pharmgkb/parsers/model/CommonSpecies.java index 378a42b..2e1775b 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/CommonSpecies.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/CommonSpecies.java @@ -4,7 +4,6 @@ public enum CommonSpecies { - EColi("Escherichia coli", "E. coli"), YeastCerevisiae("Saccharomyces cerevisiae", "S. cerevisiae"), YeastPombe("Schizosaccharomyces pombe", "S. pombe"), @@ -35,21 +34,21 @@ public enum CommonSpecies { Chicken("Gallus gallus", "chiken") ; - private final String m_formalName; - private final String m_commonName; + private final String formalName; + private final String commonName; CommonSpecies(String formalName, String commonName) { - this.m_formalName = formalName; - this.m_commonName = commonName; + this.formalName = formalName; + this.commonName = commonName; } @Nonnull - public String getFormalName() { - return m_formalName; + public String formalName() { + return formalName; } @Nonnull - public String getCommonName() { - return m_commonName; + public String commonName() { + return commonName; } } diff --git a/core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimal.java b/core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimal.java index 7336313..440ce8e 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimal.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimal.java @@ -1,20 +1,18 @@ package org.pharmgkb.parsers.model; -import com.google.common.collect.Range; - import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; import java.math.BigDecimal; -import java.util.Arrays; import java.util.Optional; +import java.util.Set; /** * A {@link BigDecimal} that can be infinite or NaN. * @author Douglas Myers-Turnbull */ @Immutable -public class GeneralizedBigDecimal implements Comparable { +public class GeneralizedBigDecimal implements GeneralizedBigDecimalI { public static final GeneralizedBigDecimal NAN = new GeneralizedBigDecimal("NaN"); public static final GeneralizedBigDecimal POSITIVE_INFINITY = new GeneralizedBigDecimal("Inf"); @@ -33,7 +31,7 @@ public GeneralizedBigDecimal(long ell) { public GeneralizedBigDecimal(@Nonnull String string) { m_string = string; - if (Arrays.asList("Inf", "+Inf", "-Inf", "NaN").contains(string)) { + if (Set.of("Inf", "+Inf", "-Inf", "NaN").contains(string)) { m_digits = Optional.empty(); } else { m_digits = Optional.of(new BigDecimal(string)); @@ -45,37 +43,12 @@ public GeneralizedBigDecimal(@Nonnull BigDecimal bd) { m_digits = Optional.of(bd); } - @Nonnull - public Optional getValue() { + @Override + @Nonnull + public Optional value() { return m_digits; } - public boolean isInfinite() { - return m_string.endsWith("Inf"); - } - - public boolean isPositiveInfinity() { - return m_string.equals("Inf") || m_string.equals("+Inf"); - } - - public boolean isNegativeInfinity() { - return m_string.equals("-Inf"); - } - - public boolean isNan() { - return m_string.equals("NaN"); - } - - /** - * Returns one of the following: - *
    - *
  • {@code NaN}
  • - *
  • {@code Inf}
  • - *
  • {@code +Inf}
  • - *
  • {@code -Inf}
  • - *
  • the
  • - *
- */ @Nonnull @Override public String toString() { @@ -86,11 +59,11 @@ public String toString() { * {@code Inf} and {@code +Inf} are considered to be equal. */ @Override - public boolean equals(@Nullable Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - GeneralizedBigDecimal that = (GeneralizedBigDecimal) o; - return m_string.equals(that.m_string) || isPositiveInfinity() && that.isPositiveInfinity(); + public boolean equals(@Nullable Object obj) { + if (this == obj) return true; + if (obj == null || getClass() != obj.getClass()) return false; + GeneralizedBigDecimal o = (GeneralizedBigDecimal) obj; + return m_string.equals(o.m_string) || isPositiveInfinity() && o.isPositiveInfinity(); } @Override @@ -98,81 +71,4 @@ public int hashCode() { return m_string.hashCode(); } - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - @Override - public int compareTo(@Nonnull GeneralizedBigDecimal o) { - if (isNan() || o.isNan()) { - throw new UnsupportedOperationException("Can't compare NaN to anything (including another NaN)"); - } - if (m_digits.isPresent() && o.m_digits.isPresent()) { - return m_digits.get().compareTo(o.m_digits.get()); - } - if (isPositiveInfinity() && o.isPositiveInfinity() || isNegativeInfinity() && o.isNegativeInfinity()) return 0; - return isPositiveInfinity()? 1 : -1; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean lessThan(@Nonnull String o) { - return compareTo(new GeneralizedBigDecimal(o)) < 0; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean containedIn(@Nonnull Range range) { - return range.contains(this); - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean lessThanOrEqual(@Nonnull String o) { - return compareTo(new GeneralizedBigDecimal(o)) < 1; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean greaterThan(@Nonnull String o) { - return compareTo(new GeneralizedBigDecimal(o)) > 0; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean greaterThanOrEqual(@Nonnull String o) { - return compareTo(new GeneralizedBigDecimal(o)) > -1; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean lessThan(@Nonnull GeneralizedBigDecimal o) { - return compareTo(o) < 0; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean lessThanOrEqual(@Nonnull GeneralizedBigDecimal o) { - return compareTo(o) < 1; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean greaterThan(@Nonnull GeneralizedBigDecimal o) { - return compareTo(o) > 0; - } - - /** - * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} - */ - public boolean greaterThanOrEqual(@Nonnull GeneralizedBigDecimal o) { - return compareTo(o) > -1; - } } diff --git a/core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalI.java b/core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalI.java new file mode 100644 index 0000000..f9c45eb --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalI.java @@ -0,0 +1,138 @@ +package org.pharmgkb.parsers.model; + +import com.google.common.collect.Range; + +import javax.annotation.Nonnull; +import java.math.BigDecimal; +import java.util.Optional; + +public interface GeneralizedBigDecimalI extends Comparable { + + @Nonnull + Optional value(); + + default boolean isInfinite() { + return toString().endsWith("Inf"); + } + + default boolean isPositiveInfinity() { + return "Inf".equals(toString()) || "+Inf".equals(toString()); + } + + default boolean isNegativeInfinity() { + return "-Inf".equals(toString()); + } + + default boolean isNan() { + return "NaN".equals(toString()); + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + @Override + default int compareTo(@Nonnull GeneralizedBigDecimalI o) { + if (isNan() || o.isNan()) { + throw new UnsupportedOperationException("Can't compare NaN to anything (including another NaN)"); + } + if (value().isPresent() && o.value().isPresent()) { + return value().get().compareTo(o.value().get()); + } + if (isPositiveInfinity() && o.isPositiveInfinity() || isNegativeInfinity() && o.isNegativeInfinity()) { + return 0; + } + return isPositiveInfinity()? 1 : -1; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean containedIn(@Nonnull Range range) { + return range.contains(this); + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean lessThanOrEqual(@Nonnull String o) { + return compareTo(new GeneralizedBigDecimal(o)) < 1; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean greaterThan(@Nonnull String o) { + return compareTo(new GeneralizedBigDecimal(o)) > 0; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean greaterThanOrEqual(@Nonnull String o) { + return compareTo(new GeneralizedBigDecimal(o)) > -1; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean lessThanOrEqual(int o) { + return compareTo(new GeneralizedBigDecimal(o)) < 1; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean greaterThan(int o) { + return compareTo(new GeneralizedBigDecimal(o)) > 0; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean greaterThanOrEqual(int o) { + return compareTo(new GeneralizedBigDecimal(o)) > -1; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean lessThan(@Nonnull GeneralizedBigDecimalI o) { + return compareTo(o) < 0; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean lessThanOrEqual(@Nonnull GeneralizedBigDecimalI o) { + return compareTo(o) < 1; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean greaterThan(@Nonnull GeneralizedBigDecimalI o) { + return compareTo(o) > 0; + } + + /** + * @throws UnsupportedOperationException If this or {@code o} is {@code NaN} + */ + default boolean greaterThanOrEqual(@Nonnull GeneralizedBigDecimalI o) { + return compareTo(o) > -1; + } + + /** + * Returns one of the following: + *
    + *
  • {@code NaN}
  • + *
  • {@code Inf}
  • + *
  • {@code +Inf}
  • + *
  • {@code -Inf}
  • + *
  • the
  • + *
+ */ + @Nonnull + @Override + String toString(); + +} diff --git a/core/src/main/java/org/pharmgkb/parsers/model/Locus.java b/core/src/main/java/org/pharmgkb/parsers/model/Locus.java index 043095c..1cbceb8 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/Locus.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/Locus.java @@ -1,13 +1,10 @@ package org.pharmgkb.parsers.model; -import com.google.common.base.Preconditions; import com.google.common.collect.ComparisonChain; -import javax.annotation.Nonnegative; import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import javax.annotation.concurrent.Immutable; +import java.util.NoSuchElementException; import java.util.Objects; import java.util.Optional; import java.util.regex.Matcher; @@ -16,25 +13,26 @@ /** * A locus on a reference genome. As always in this package, the position is 0-based. Negative values are permitted. * @author Douglas Myers-Turnbull + * + * chromosome A chromosome name from GRCh38 (starts with "chr"; the mitochrondrial chromosome is "chrM") + * position A 0-based position on the chromosome + * strand "+", "-", "?" */ -@Immutable -public class Locus implements Comparable { +public record Locus( + ChromosomeName chromosome, + long position, + Strand strand +) implements Comparable { - private static final Pattern sf_pattern = Pattern.compile("^(chr(?:(?:\\d{1,2})|X|Y|M))\\(([+\\-?])\\):(-?\\d+)$"); - - private final ChromosomeName m_chromosome; - - private final long m_position; - - private final Strand m_strand; + private static final Pattern sf_pattern = Pattern.compile("^(chr(?:\\d{1,2}|X|Y|M))\\(([+\\-?])\\):(-?\\d+)$"); @Nonnull public static Locus parse(@Nonnull String string) { - Preconditions.checkNotNull(string); + Objects.requireNonNull(string); Matcher matcher = sf_pattern.matcher(string); if (matcher.matches()) { - ChromosomeName chr = new ChromosomeName(matcher.group(1)); - Optional strand = Strand.lookupBySymbol(matcher.group(2)); + ChromosomeName chr = new StandardChromosomeName(matcher.group(1)); + Optional strand = Strand.fromSymbol(matcher.group(2)); if (strand.isPresent()) { long pos = Long.parseLong(matcher.group(3)); return new Locus(chr, pos, strand.get()); @@ -44,82 +42,39 @@ public static Locus parse(@Nonnull String string) { throw new IllegalArgumentException("String " + string + " is not a valid locus"); } - /** - * @param chromosome A chromosome name from GRCh38 (starts with "chr"; the mitochrondrial chromosome is "chrM") - * @param position A 0-based position on the chromosome - * @param strand "+", "-", "?" - */ - public Locus(@Nonnull String chromosome, @Nonnegative long position, @Nonnull String strand) { - Preconditions.checkNotNull(chromosome); - Preconditions.checkNotNull(strand); - Optional strandInstance = Strand.lookupBySymbol(strand); - Preconditions.checkArgument(strandInstance.isPresent(), "Unknown strand " + strand); - m_chromosome = new ChromosomeName(chromosome); - m_position = position; - m_strand = strandInstance.get(); - } - - /** - * @param chromosome A chromosome name from GRCh38 (starts with "chr"; the mitochrondrial chromosome is "chrM") - * @param position A 0-based position on the chromosome - */ - public Locus(@Nonnull ChromosomeName chromosome, long position, @Nonnull Strand strand) { - Preconditions.checkNotNull(chromosome); - Preconditions.checkNotNull(strand); - m_chromosome = chromosome; - m_position = position; - m_strand = strand; + public Locus { + Objects.requireNonNull(chromosome); + Objects.requireNonNull(strand); } - public Locus(@Nonnull String chromosome, long position, @Nonnull Strand strand) { - Preconditions.checkNotNull(chromosome); - Preconditions.checkNotNull(strand); - m_chromosome = new ChromosomeName(chromosome); - m_position = position; - m_strand = strand; - } - - /** - * @return A standard chromosome name - */ - @Nonnull - public ChromosomeName getChromosome() { - return m_chromosome; + public Locus(@Nonnull String chromosome, long position, @Nonnull String strand) { + this( + new StandardChromosomeName(chromosome), + position, + Strand.fromSymbol(strand).orElseThrow(() -> new NoSuchElementException("No strand named " + strand)) + ); + Objects.requireNonNull(chromosome); + Objects.requireNonNull(strand); } - /** - * @return A 0-based position on the chromosome. - */ - public long getPosition() { - return m_position; - } - - @Nonnull - public Strand getStrand() { - return m_strand; + public Locus(@Nonnull String chromosome, long position, @Nonnull Strand strand) { + this( + new StandardChromosomeName(chromosome), + position, + strand + ); + Objects.requireNonNull(chromosome); + Objects.requireNonNull(strand); } public boolean isCompatibleWith(@Nonnull Locus locus) { - return m_chromosome.equals(locus.m_chromosome) && m_strand == locus.m_strand; + return chromosome.equals(locus.chromosome) && strand == locus.strand; } @Nonnull @Override public String toString() { - return m_chromosome + "(" + m_strand.getSymbol() + ")" + ":" + m_position; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Locus locus = (Locus) o; - return m_position == locus.m_position && m_chromosome.equals(locus.m_chromosome) && m_strand == locus.m_strand; - } - - @Override - public int hashCode() { - return Objects.hash(m_chromosome, m_position, m_strand); + return chromosome + "(" + strand.symbol() + ")" + ":" + position; } /** @@ -128,9 +83,9 @@ public int hashCode() { @Override public int compareTo(@Nonnull Locus o) { return ComparisonChain.start() - .compare(m_chromosome, o.m_chromosome) - .compare(m_position, o.m_position) - .compare(m_strand, o.m_strand) + .compare(chromosome, o.chromosome) + .compare(position, o.position) + .compare(strand, o.strand) .result(); } } diff --git a/core/src/main/java/org/pharmgkb/parsers/model/LocusRange.java b/core/src/main/java/org/pharmgkb/parsers/model/LocusRange.java index 9cdc375..0f42b37 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/LocusRange.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/LocusRange.java @@ -5,9 +5,9 @@ import javax.annotation.Nonnegative; import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; import java.util.Objects; import java.util.Optional; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -15,60 +15,51 @@ * A range of {@link Locus loci}. Contains a start and an end, and provides methods for determining overlap. * @author Douglas Myers-Turnbull */ -@Immutable -public class LocusRange implements Comparable { +public record LocusRange( + @Nonnull Locus start, + @Nonnull Locus end +) implements Comparable { - private final static Pattern sf_pattern = Pattern.compile("^(chr(?:(?:\\d{1,2})|X|Y|M))\\(([+-])\\):(\\d+)-(\\d+)$"); - - private final Locus m_start; - private final Locus m_end; - - @Nonnull - public Strand getStrand() { - return m_start.getStrand(); - } - - @Nonnull - public ChromosomeName getChromosome() { - return m_start.getChromosome(); - } + private static final Pattern sf_pattern = Pattern.compile("^(chr(?:\\d{1,2}|X|Y|M))\\(([+-])\\):(\\d+)-(\\d+)$"); /** * @throws IllegalArgumentException If start and end belong to different chromosomes or strands, or if end comes before start */ - public LocusRange(@Nonnull Locus start, @Nonnull Locus end) { - if (!start.getChromosome().equals(end.getChromosome())) { + public LocusRange { + Objects.requireNonNull(start); + Objects.requireNonNull(end); + if (!start.chromosome().equals(end.chromosome())) { throw new IllegalArgumentException("Start and end must have the same chromosome"); } - if (start.getStrand() != end.getStrand()) { + if (start.strand() != end.strand()) { throw new IllegalArgumentException("Start and end must belong to the same strand"); } - if (start.getPosition() > end.getPosition()) { + if (start.position() > end.position()) { throw new IllegalArgumentException("End " + end + " was not after start " + start); } - m_start = start; - m_end = end; } - @Nonnull - public Locus getStart() { - return m_start; + @Nonnull + public Strand strand() { + return start.strand(); } - @Nonnull - public Locus getEnd() { - return m_end; + @Nonnull + public ChromosomeName chromosome() { + return start.chromosome(); } /** - * @throws java.lang.IllegalArgumentException If the strand of {@code locus} does not match the strand of this range + * @throws IllegalArgumentException If the strand of {@code locus} does not match the strand of this range */ public boolean contains(@Nonnull Locus locus) { - Preconditions.checkArgument(locus.getStrand() == getStrand(), - "Cannot compare loci " + "belonging to different strands"); - return locus.getChromosome().equals(getChromosome()) - && locus.getPosition() >= m_start.getPosition() - && locus.getPosition() <= m_end.getPosition(); + Preconditions.checkArgument( + locus.strand() == strand(), + "Cannot compare loci belonging to different strands" + ); + return locus.chromosome().equals(chromosome()) + && locus.position() >= start.position() + && locus.position() <= end.position(); } public boolean overlapsWith(@Nonnull LocusRange locusRange) { @@ -81,12 +72,12 @@ public boolean overlapsWith(@Nonnull LocusRange locusRange) { */ public long calcOverlappingDensity(@Nonnull LocusRange locusRange) { Preconditions.checkArgument( - locusRange.getStrand() == getStrand(), - "Cannot compare loci belonging to different strands" + locusRange.strand() == strand(), + "Cannot compare loci belonging to different strands" ); - if (!locusRange.getChromosome().equals(getChromosome())) return 0; - return Math.min(m_end.getPosition(), locusRange.getEnd().getPosition()) - - Math.max(m_start.getPosition(), locusRange.getStart().getPosition() + if (!locusRange.chromosome().equals(chromosome())) return 0; + return Math.min(end.position(), locusRange.end().position()) + - Math.max(start.position(), locusRange.start().position() ); } @@ -94,25 +85,12 @@ public long calcOverlappingDensity(@Nonnull LocusRange locusRange) { * @return True if and only if the chromosomes and strands are the same */ public boolean isCompatibleWith(@Nonnull LocusRange range) { - return getChromosome().equals(range.getChromosome()) && getStrand() == range.getStrand(); + return chromosome().equals(range.chromosome()) && strand() == range.strand(); } @Nonnegative public long length() { - return m_end.getPosition() - m_start.getPosition(); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - LocusRange that = (LocusRange) o; - return m_end.equals(that.m_end) && m_start.equals(that.m_start); - } - - @Override - public int hashCode() { - return Objects.hash(m_start, m_end); + return end.position() - start.position(); } /** @@ -120,10 +98,10 @@ public int hashCode() { */ @Override public String toString() { - return m_start.getChromosome() + - "(" + m_start.getStrand().getSymbol() + ")" - + ":" + m_start.getPosition() - + '-' + m_end.getPosition(); + return start.chromosome() + + "(" + start.strand().symbol() + ")" + + ":" + start.position() + + '-' + end.position(); } /** @@ -133,25 +111,33 @@ public String toString() { @Override public int compareTo(@Nonnull LocusRange o) { return ComparisonChain.start() - .compare(m_start, o.m_start) - .compare(m_end, o.m_end) + .compare(start, o.start) + .compare(end, o.end) .result(); } /** * @param string A string in the form chromosome(strand):start-end; e.g. chr1(+):5-10 */ - @Nonnull + @Nonnull public static LocusRange parse(@Nonnull String string) { + return parse(string, StandardChromosomeName::new); + } + + /** + * @param string A string in the form chromosome(strand):start-end; e.g. chr1(+):5-10 + */ + @Nonnull + public static LocusRange parse(@Nonnull String string, Function chromosomeNameFunction) { Matcher matcher = sf_pattern.matcher(string); Preconditions.checkArgument(matcher.matches(), "String " + string + " is not a valid locus range"); String chromosome = matcher.group(1); - Optional strand = Strand.lookupBySymbol(matcher.group(2)); + Optional strand = Strand.fromSymbol(matcher.group(2)); if (strand.isPresent()) { long startPosition = Long.parseLong(matcher.group(3)); long stopPosition = Long.parseLong(matcher.group(4)); - Locus start = new Locus(new ChromosomeName(chromosome), startPosition, strand.get()); - Locus stop = new Locus(new ChromosomeName(chromosome), stopPosition, strand.get()); + Locus start = new Locus(chromosomeNameFunction.apply(chromosome), startPosition, strand.get()); + Locus stop = new Locus(chromosomeNameFunction.apply(chromosome), stopPosition, strand.get()); return new LocusRange(start, stop); } throw new IllegalArgumentException("String " + string + " is not a valid locus range"); diff --git a/core/src/main/java/org/pharmgkb/parsers/model/NucleotideCode.java b/core/src/main/java/org/pharmgkb/parsers/model/NucleotideCode.java index 3922e66..8109eb8 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/NucleotideCode.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/NucleotideCode.java @@ -1,8 +1,7 @@ package org.pharmgkb.parsers.model; -import java.util.HashMap; -import java.util.Map; -import java.util.NoSuchElementException; +import java.util.*; +import java.util.stream.Collectors; public enum NucleotideCode { @@ -29,25 +28,19 @@ public enum NucleotideCode { public final String name; public final CodeType type; - @SuppressWarnings("ParameterHidesMemberVariable") NucleotideCode(char character, String name, CodeType type) { this.character = character; this.name = name; this.type = type; } - private static Map sf_lookup = new HashMap<>(NucleotideCode.values().length); - static { - for (NucleotideCode nc : NucleotideCode.values()) { - sf_lookup.put(nc.character, nc); - } - } + private static final Map sf_lookup = + Arrays.stream(values()) + .map(aac -> new AbstractMap.SimpleEntry(aac.character, aac)) + .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); + public static NucleotideCode fromChar(char character) { - if (sf_lookup.containsKey(character)) { - return sf_lookup.get(character); - } else { - throw new NoSuchElementException("No nucleotide code " + character); - } + return Optional.ofNullable(sf_lookup.get(character)) + .orElseThrow(() -> new NoSuchElementException("No nucleotide code " + character)); } } - diff --git a/core/src/main/java/org/pharmgkb/parsers/model/StandardChromosomeName.java b/core/src/main/java/org/pharmgkb/parsers/model/StandardChromosomeName.java new file mode 100644 index 0000000..1afa75e --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/model/StandardChromosomeName.java @@ -0,0 +1,139 @@ +package org.pharmgkb.parsers.model; + +import java.util.Objects; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; +import java.lang.invoke.MethodHandles; +import java.util.Set; +import java.util.function.Function; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Attempts to ensure consistency in chromosome names. + * @author Douglas Myers-Turnbull + */ +@Immutable +public final class StandardChromosomeName implements ChromosomeName { + + private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final Pattern sf_pattern = Pattern.compile("^(?:chr)?(\\d{1,2}|X|Y|M|MT)(_[A-Z]+[0-9]+v\\d+?(?:_random|alt))?$"); + + private final String m_originalName; + private final String m_name; + + @Nonnull + public static StandardChromosomeName ucscWithFailure(@Nonnull String name) { + Function standardizer = s -> { + Matcher matcher = sf_pattern.matcher(s); + Preconditions.checkArgument(matcher.matches(), "Chromosome name " + s + " does not match the required pattern"); + Preconditions.checkArgument(sf_pattern.matcher(s).matches(), "Chromosome name " + s + " is not standardized"); + String chrName = "MT".equals(matcher.group(1))? "M" : matcher.group(1); // dbSNP and Ensembl use this, but neither GRC nor UCSC do + return "chr" + chrName + (matcher.group(2)==null? "" : matcher.group(2)); + }; + return standardized(name, standardizer); + } + + @Nonnull + public static StandardChromosomeName ucscWithWarning(@Nonnull String name) { + Function standardizer = s -> { + Matcher matcher = sf_pattern.matcher(s); + if (matcher.matches()) { + String chrName = "MT".equals(matcher.group(1))? "M" : matcher.group(1); // dbSNP and Ensembl use this, but neither GRC nor UCSC do + return "chr" + chrName + (matcher.group(2)==null? "" : matcher.group(2)); + } else { + sf_logger.warn("Chromosome name {} is not standardized", s); + return s; + } + }; + return standardized(name, standardizer); + } + + @Nonnull + public static StandardChromosomeName standardized( + @Nonnull String name, + @Nonnull Function standardizer + ) { + return new StandardChromosomeName(name, standardizer.apply(name)); + } + + public StandardChromosomeName(@Nonnull String name) { + this(name, name); + } + + private StandardChromosomeName(@Nonnull String originalName, @Nonnull String standardizedName) { + Objects.requireNonNull(originalName); + Objects.requireNonNull(standardizedName); + m_originalName = originalName; + m_name = standardizedName; + } + + @Override + @Nonnull + public String original() { + return m_originalName; + } + + // TODO these aren't guaranteed to work for every convention + + @Override + public boolean isMitochondial() { + return Set.of("chrM", "M", "MT", "chrMT").contains(m_name); + } + + @Override + public boolean isX() { + return "chrX".equals(m_name) || "X".equals(m_name); + } + + @Override + public boolean isY() { + return "chrY".equals(m_name) || "Y".equals(m_name); + } + + @Override + public boolean isNonstandard() { + return m_name.contains("_"); + } + + @Override + public boolean isAlt() { + return m_name.endsWith("alt"); + } + + @Nonnull + @Override + public String toString() { + return m_name; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (null == obj) { + return false; + } + if (!(obj instanceof ChromosomeName)) { + throw new IllegalArgumentException( + "Type " + + obj.getClass().getName() + + " is incompatible with " + + getClass().getName() + ); + } + final var o = (ChromosomeName) obj; + return Objects.equals(toString(), o.toString()); + } + + @Override + public int hashCode() { + return m_name.hashCode(); + } +} diff --git a/core/src/main/java/org/pharmgkb/parsers/model/Strand.java b/core/src/main/java/org/pharmgkb/parsers/model/Strand.java index 7e9a443..97c5b28 100644 --- a/core/src/main/java/org/pharmgkb/parsers/model/Strand.java +++ b/core/src/main/java/org/pharmgkb/parsers/model/Strand.java @@ -13,7 +13,9 @@ */ public enum Strand { - PLUS("+"), MINUS("-"); + PLUS("+"), + MINUS("-") + ; private final String m_symbol; @@ -22,12 +24,12 @@ public enum Strand { } @Nonnull - public String getSymbol() { + public String symbol() { return m_symbol; } @Nonnull - public static Optional lookupBySymbol(@Nonnull String symbol) { + public static Optional fromSymbol(@Nonnull String symbol) { return switch (symbol) { case "+" -> Optional.of(PLUS); case "-" -> Optional.of(MINUS); diff --git a/core/src/main/java/org/pharmgkb/parsers/model/package-info.java b/core/src/main/java/org/pharmgkb/parsers/model/package-info.java new file mode 100644 index 0000000..1ed1d5c --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.model; diff --git a/core/src/main/java/org/pharmgkb/parsers/package-info.java b/core/src/main/java/org/pharmgkb/parsers/package-info.java new file mode 100644 index 0000000..a038df2 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers; diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/IoUtils.java b/core/src/main/java/org/pharmgkb/parsers/utils/IoUtils.java deleted file mode 100644 index 13bd746..0000000 --- a/core/src/main/java/org/pharmgkb/parsers/utils/IoUtils.java +++ /dev/null @@ -1,196 +0,0 @@ -package org.pharmgkb.parsers.utils; - -import javax.annotation.Nonnull; -import java.io.*; -import java.net.HttpURLConnection; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.channels.Channels; -import java.nio.channels.FileChannel; -import java.nio.channels.ReadableByteChannel; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.util.stream.Stream; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -/** - * IO utilities. - * Use {@link IoUtils#readUtf8Lines}, {@link IoUtils#writeUtf8Lines(Path, Stream)}, and similar methods - * to read/write text or GZIP, depending on whether the filename extension ends with '.gz' or '.gzip'. - */ -public class IoUtils { - - @Nonnull - public static HttpHeadResponse getHeadResponse(@Nonnull String url) throws UncheckedIOException, InvalidResponseException { - return getHeadResponse(getUrl(url)); - } - - @Nonnull - public static HttpHeadResponse getHeadResponse(@Nonnull URL url) throws UncheckedIOException, InvalidResponseException { - try { - HttpURLConnection huc = (HttpURLConnection) url.openConnection(); - huc.setRequestMethod("HEAD"); - huc.connect(); - int code = huc.getResponseCode(); - if (code < 400 || code >= 500) { - throw new IOException("Response code is " + huc.getResponseCode()); - } - return HttpHeadResponse.fromConnection(huc); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - /** - * Downloads bytes. - * Works up to about 9 exabytes. - * @param url From - * @param path To - * @throws UncheckedIOException On any IO error - */ - public static void downloadBytesTo(@Nonnull URL url, @Nonnull Path path) throws UncheckedIOException { - try { - ReadableByteChannel in = Channels.newChannel(url.openStream()); - try (FileOutputStream out = new FileOutputStream(path.toFile())) { - try (FileChannel channel = out.getChannel()) { - channel.transferFrom(in, 0, Long.MAX_VALUE); - } - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - public static void downloadUtf8To(@Nonnull URL url, @Nonnull Path path) throws UncheckedIOException { - // doesn't handle gzip input - Stream stream = readUtf8LinesFromUrl(url); - writeUtf8Lines(path, stream); - } - - @Nonnull - public static Stream readGzipUtf8LinesFromUrl(@Nonnull URL url) throws UncheckedIOException { - try (QuietBufferedReader br = openGzipUtf8ReaderFromUrl(url)) { - Stream stream = br.streamLinesQuietly(); - return br.streamLinesQuietly(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Nonnull - public static Stream readUtf8LinesFromUrl(@Nonnull URL url) throws UncheckedIOException { - try (QuietBufferedReader br = openUtf8ReaderFromUrl(url)) { - Stream stream = br.streamLinesQuietly(); - return br.streamLinesQuietly(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Nonnull - public static QuietBufferedReader openGzipUtf8ReaderFromUrl(@Nonnull String url) throws UncheckedIOException { - return openGzipUtf8ReaderFromUrl(getUrl(url)); - } - - @Nonnull - public static QuietBufferedReader openUtf8ReaderFromUrl(@Nonnull String url) throws UncheckedIOException { - return openUtf8ReaderFromUrl(getUrl(url)); - } - - @Nonnull - public static QuietBufferedReader openGzipUtf8ReaderFromUrl(@Nonnull URL url) throws UncheckedIOException { - try { - return new QuietBufferedReader((new InputStreamReader(new GZIPInputStream(url.openStream())))); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Nonnull - public static QuietBufferedReader openUtf8ReaderFromUrl(@Nonnull URL url) throws UncheckedIOException { - try { - return new QuietBufferedReader((new InputStreamReader(url.openStream()))); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Nonnull - public static URL getUrl(@Nonnull String url) throws UncheckedIOException { - try { - return new URL(url); - } catch (MalformedURLException e) { - throw new UncheckedIOException(e); - } - } - - @Nonnull - public static Stream readUtf8Lines(@Nonnull Path path) throws UncheckedIOException { - try (QuietBufferedReader br = IoUtils.openUtf8Reader(path)) { - return br.streamLinesQuietly(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - public static void writeUtf8Lines(@Nonnull Path path, @Nonnull Stream lines) throws UncheckedIOException { - try (PrintWriter pw = openUtf8Writer(path, false)) { - lines.forEach(pw::println); - } - } - - public static void appendUtf8Lines(@Nonnull Path path, @Nonnull Stream lines) throws UncheckedIOException { - try (PrintWriter pw = openUtf8Writer(path, true)) { - lines.forEach(pw::println); - } - } - - @Nonnull - public static PrintWriter openUtf8Writer(@Nonnull Path path, boolean append) throws UncheckedIOException { - Charset encoding = StandardCharsets.UTF_8; - try { - if (path.endsWith(".gz") || path.endsWith(".gzip")) { - return new PrintWriter(new BufferedWriter(new OutputStreamWriter( - new GZIPOutputStream(new FileOutputStream(path.toFile(), append)), encoding - )), true); - } else { - return new PrintWriter(new BufferedWriter(new FileWriter(path.toString(), encoding, false)), true); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Nonnull - public static QuietBufferedReader openUtf8Reader(@Nonnull Path path) throws UncheckedIOException { - Charset encoding = StandardCharsets.UTF_8; - try { - if (path.endsWith(".gz") || path.endsWith(".gzip")) { - return new QuietBufferedReader(new InputStreamReader( - new GZIPInputStream(new FileInputStream(path.toFile())), - encoding - )); - } else { - return new QuietBufferedReader(new InputStreamReader(new FileInputStream(path.toFile()), encoding)); - } - } catch (IOException e) { - throw new UncheckedIOException(e); - } - } - - @Nonnull - public static void writeUtf8Lines(@Nonnull File file, @Nonnull Stream lines) throws UncheckedIOException { - writeUtf8Lines(file.toPath(), lines); - } - @Nonnull - public static PrintWriter openUtf8Writer(@Nonnull File file, boolean append) throws UncheckedIOException { - return openUtf8Writer(file.toPath(), append); - } - @Nonnull - public static QuietBufferedReader openUtf8Reader(@Nonnull File file) throws UncheckedIOException { - return openUtf8Reader(file.toPath()); - } - -} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/OptPairList.java b/core/src/main/java/org/pharmgkb/parsers/utils/OptPairList.java new file mode 100644 index 0000000..6dad131 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/utils/OptPairList.java @@ -0,0 +1,56 @@ +package org.pharmgkb.parsers.utils; + +import javax.annotation.Nullable; +import java.util.Collection; +import java.util.Objects; +import java.util.Optional; + +/** + * A {@link PairList} of two optional values. + * Call {@link #getLeft()} and {@link #getRight()} to get a value (throwing an exception if empty). + * @param The value type (not an optional) + */ +public class OptPairList extends PairList> { + + /** + * Creates a new {@code OptPair} + * containing {@code Optional.ofNullable(left)} and {@code Optional.ofNullable(right)}. + */ + public static OptPairList ofNullable(@Nullable T left, @Nullable T right) { + return new OptPairList<>(Optional.ofNullable(left), Optional.ofNullable(right)); + } + + public static OptPairList ofLeft(T left) { + return new OptPairList<>(Optional.of(left), Optional.empty()); + } + + public static OptPairList ofRight(T right) { + return new OptPairList<>(Optional.empty(), Optional.of(right)); + } + + public OptPairList(Optional left, Optional right) { + super(left, right); + } + + public OptPairList(Collection> collection) { + super(collection); + } + + public T getLeft() { + return left().orElseThrow(); + } + + public T getRight() { + return right().orElseThrow(); + } + + @Override + public String toString() { + return "(" + + left().map(Objects::toString).orElse("null") + + ", " + + right().map(Objects::toString).orElse("null") + + ")"; + } + +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/PairList.java b/core/src/main/java/org/pharmgkb/parsers/utils/PairList.java new file mode 100644 index 0000000..06c15d9 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/utils/PairList.java @@ -0,0 +1,69 @@ +package org.pharmgkb.parsers.utils; + +import java.util.*; + +/** + * A list of exactly two items of the same type, which must be non-null. + * Call {@link #left()} and {@link #right()}. + * @param The element type + */ +public class PairList extends AbstractList { + private final T left; + private final T right; + + public PairList(T left, T right) { + this.left = left; + this.right = right; + } + + public PairList(Collection collection) { + if (collection.size() != 2) { + throw new IllegalArgumentException( + "Must contain exactly 2 elements, not " + collection.size() + ". Contents: [" + collection + "]." + ); + } + List list = List.copyOf(collection); + left = list.get(0); + right = list.get(1); + } + + public T left() { + return left; + } + + public T right() { + return right; + } + + @Override + public T get(int index) { + return switch (index) { + case 0 -> left; + case 1 -> right; + default -> throw new IndexOutOfBoundsException("Index " + index + " > 2."); + }; + } + + @Override + public int size() { + return 2; + } + + @Override + public String toString() { + return "(" + left + ", " + right + ")"; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), left, right); + } + + @Override + public boolean equals(Object obj) { + return switch (obj) { + case PairList pair -> left == pair.left && right == pair.right; + default -> throw new IllegalArgumentException("Cannot compare type " + obj.getClass().getName() + " to Pair"); + }; + } +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructor.java b/core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructor.java index 38c7a3a..4ce59f9 100644 --- a/core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructor.java +++ b/core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructor.java @@ -4,16 +4,7 @@ import javax.annotation.concurrent.Immutable; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; - -class RuntimeReflectionException extends RuntimeException { - public RuntimeReflectionException() { } - public RuntimeReflectionException(String message) { super(message); } - public RuntimeReflectionException(String message, Throwable cause) { super(message, cause); } - public RuntimeReflectionException(Throwable cause) { super(cause); } - public RuntimeReflectionException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - } -} +import java.util.StringJoiner; /** * Helper to deal with type erasure problems. @@ -35,31 +26,37 @@ public RuntimeReflectionException(String message, Throwable cause, boolean enabl * @param The type of the class */ @Immutable -public class ReflectingConstructor { +public final class ReflectingConstructor implements ReflectingConstructorI { private final Class m_clazz; - private final Class[] m_signature; - private final Constructor m_constructor; + private final Constructor m_constructor; public ReflectingConstructor(@Nonnull Class clazz, @Nonnull Class... signature) { m_clazz = clazz; - m_signature = signature; - try { + try { m_constructor = clazz.getConstructor(String.class); } catch (NoSuchMethodException e) { throw new UnsupportedOperationException(e); } } - @Nonnull + @Override + @Nonnull public C instance(@Nonnull Object... args) { try { return m_constructor.newInstance(args); } catch (InstantiationException | IllegalAccessException | SecurityException e) { throw new UnsupportedOperationException("Failed to find constructor with signature (String)", e); } catch (InvocationTargetException e) { - throw new RuntimeReflectionException("Failed calling constructor (String) for " + this.m_clazz.getName(), e); + throw new RuntimeReflectionException("Failed calling constructor (String) for " + m_clazz.getName(), e); } } + @Override + public String toString() { + return new StringJoiner(", ", ReflectingConstructor.class.getSimpleName() + "[", "]") + .add("clazz=" + m_clazz) + .add("constructor=" + m_constructor) + .toString(); + } } diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructorI.java b/core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructorI.java new file mode 100644 index 0000000..26df877 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/utils/ReflectingConstructorI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.utils; + +import javax.annotation.Nonnull; + +public interface ReflectingConstructorI { + @Nonnull + C instance(@Nonnull Object... args); +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/RuntimeReflectionException.java b/core/src/main/java/org/pharmgkb/parsers/utils/RuntimeReflectionException.java new file mode 100644 index 0000000..ffb9999 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/utils/RuntimeReflectionException.java @@ -0,0 +1,11 @@ +package org.pharmgkb.parsers.utils; + +public class RuntimeReflectionException extends RuntimeException { + public RuntimeReflectionException() { } + public RuntimeReflectionException(String message) { super(message); } + public RuntimeReflectionException(String message, Throwable cause) { super(message, cause); } + public RuntimeReflectionException(Throwable cause) { super(cause); } + public RuntimeReflectionException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/Try.java b/core/src/main/java/org/pharmgkb/parsers/utils/Try.java index b6894b1..9ffc2cb 100644 --- a/core/src/main/java/org/pharmgkb/parsers/utils/Try.java +++ b/core/src/main/java/org/pharmgkb/parsers/utils/Try.java @@ -4,7 +4,9 @@ import javax.annotation.Nullable; import java.util.Objects; import java.util.Optional; +import java.util.function.BinaryOperator; import java.util.function.Function; +import java.util.function.Predicate; import java.util.function.Supplier; /** @@ -12,257 +14,262 @@ * Stores either the value, or the thrown exception. * Provides methods for mapping, composition, and recovery of the values. * Inspired by Scala's Try. + * + * This class is immutable, EXCEPT that some functions can add a suppressed exception to {@link #exception()}. + * Unfortunately, this is not really preventable. + * The affected methods are: + *
    + *
  1. {@link #recover(Object)}
  2. + *
  3. {@link #recover(Supplier)}
  4. + *
  5. {@link #recover(Function)}
  6. + *
  7. {@link #or(Try, BinaryOperator)}
  8. + *
  9. {@link #compose(Function)}
  10. + *
+ * * For example, this will download a resource, upload its text, and return the response: - * + * {@code * String response = Try.attempt(getWebResource) * .recover(getWebResourceFromMirror) * .map(resource -> resource.text) * .compose(text -> upload(text)) * .orElseGet(error -> "Failed! " + error.getMessage())); - * - * - * @param The type of the result of the operation + * } + * @param The type of the result of the operation */ -public class Try { +@SuppressWarnings("ProhibitedExceptionThrown") +public class Try implements TryI { - private final T value; + private final V value; private final E exception; - private final Class clazz; + private final Class clazz; - @Nonnull - public static Try succeed(@Nonnull U t) { - return new Try<>(t, null, Exception.class); - } - @Nonnull - public static Try succeed(@Nonnull U t, Class clazz) { - return new Try<>(t, null, clazz); - } - @Nonnull - public static Try fail(@Nonnull Exception e) { - return new Try<>(null, e, Exception.class); - } - @Nonnull - public static Try fail(@Nonnull V e, @Nonnull Class clazz) { - return new Try<>(null, e, clazz); - } + @Nonnull + public static Try succeed(@Nonnull T t) { + return new Try<>(t, null, Exception.class); + } - @Nonnull - public static Try attempt(@Nonnull Supplier t) { - return attempt(t, Exception.class); - } - public static Try attempt(@Nonnull Supplier t, @Nonnull Class clazz) { - try { - return new Try<>(t.get(), null, clazz); - } catch (Throwable e) { - if (clazz.isInstance(e)) { - return new Try<>(null, (V) e, clazz); - } else { - throw e; - } - } - } + @Nonnull + public static Try succeed(@Nonnull T t, @Nonnull Class allowedExceptionType) { + return new Try<>(t, null, allowedExceptionType); + } + + @Nonnull + public static Try fail(@Nonnull Exception e) { + return new Try<>(null, e, Exception.class); + } + + @Nonnull + public static Try fail(@Nonnull E e, @Nonnull Class allowedExceptionType) { + return new Try<>(null, e, allowedExceptionType); + } + + @Nonnull + public static Try attempt(@Nonnull Supplier t) { + return attempt(t, Exception.class); + } + + @SuppressWarnings("unchecked") + public static Try attempt( + @Nonnull Supplier t, @Nonnull Class clazz + ) { + try { + return new Try<>(t.get(), null, clazz); + } catch (Exception e) { + if (clazz.isInstance(e)) { + return new Try<>(null, (E) e, clazz); + } + throw e; + } + } - protected Try(@Nullable T value, @Nullable E exception, @Nonnull Class clazz) { - assert (value ==null)^(exception ==null); + protected Try(@Nullable V value, @Nullable E exception, Class clazz) { + assert null == value ^ null == exception; this.value = value; this.exception = exception; - this.clazz = clazz; + this.clazz = clazz; } - /** - * Maps the result of this Try through a function, if it's a success. - * Otherwise (if it's a failure), just returns a copy. - * Will re-throw exceptions thrown by {@code fn}. - * @param fn A function to apply to this if it's a success - * @param The new type - * @return A new Try - * @see Try#compose - */ - @Nonnull - public Try map(@Nonnull Function fn) { - if (value == null) { - return new Try<>(null, exception, clazz); - } else { - return new Try<>(fn.apply(value), null, clazz); - } - } + @Override + @Nonnull + public Try leftOr(@Nonnull Try other) { + if (null != value) { + return this; + } + if (null != other.value) { + return other; + } + return this; + } + + @Override + @Nonnull + public Try rightOr(@Nonnull Try other) { + if (null != other.value) { + return other; + } + if (null != value) { + return this; + } + return other; + } + + @Override + @Nonnull + public Try,E> or(@Nonnull Try other) { + if (null != exception && null != other.exception) { + exception.addSuppressed(other.exception); + return new Try<>(null, exception, clazz); + } + return new Try<>(OptPairList.ofNullable(value, other.value), null, clazz); + } + + @Override + @Nonnull + public Try or(@Nonnull Try other, @Nonnull BinaryOperator mergeSuccess) { + if (null != exception && null != other.exception) { + exception.addSuppressed(other.exception); + return this; + } + if (null != value && null != other.value) { + return new Try<>(mergeSuccess.apply(value, other.value), null, clazz); + } + if (null != other.value) { + return other; + } + return this; + } + + @Override + @Nonnull + public Try or( + @Nonnull Try other, + @Nonnull BinaryOperator mergeSuccess, + @Nonnull BinaryOperator mergeFailure + ) { + if (null != exception && null != other.exception) { + return new Try<>(null, mergeFailure.apply(exception, other.exception), clazz); + } + if (null != value && null != other.value) { + return new Try<>(mergeSuccess.apply(value, other.value), null, clazz); + } + if (null != other.value) { + return other; + } + return this; + } - /** - * Returns this try if it is a success. - * Otherwise, returns a successful Try containing {@code value2}. - * @param value2 A value to fill in - * @return A new Try - * @see Try#recover(Supplier) - * @see Try#recover(Function) - * @see Try#orElse(T) - */ - @Nonnull - public Try recover(@Nonnull T value2) { - if (value == null) { + @Override + @Nonnull + public Try recover(@Nonnull V value2) { + if (null == value) { return new Try<>(value2, null, clazz); - } else { - return this; } + return this; } - /** - * Attempt to recover failures. - * If this succeeded, returns it. - * If it failed, try filling it with {@code supplier.get()} instead. - * If calling {@code supplier()} throws Exception @{code e}, - * will return @{code Try.fail(e)} with {@code this.getException()} added as suppressed. - * @param sup Called when this Try failed - * @return A new Try - * @see Try#recover(T) - * @see Try#recover(Function) - * @see Try#orElseGet(Supplier) - */ - @Nonnull - public Try recover(@Nonnull Supplier sup) { - if (value == null) { - return _map(z -> sup.get()); - } else { - return this; + + @Override + @Nonnull + public Try recover(@Nonnull Supplier sup) { + if (null == value) { + return recoverInternal(z -> sup.get()); } + return this; } - /** - * Attempt to recover failures. - * If {@code this} is a success, returns it. - * If it is a failure, try filling it with {@code fn.apply(this.e)} instead. - * If calling {@code supplier()} throws Exception @{code e}, - * will return @{code Try.fail(e)} with {@code this.getException()} added as suppressed. - * @param fn Called when this Try failed - * @return A new Try - * @see Try#recover(Supplier) - * @see Try#recover(Object) - * @see Try#orElseGet(Function) - */ - @Nonnull - public Try recover(@Nonnull Function fn) { - if (value == null) { - return _map(z -> fn.apply(exception)); - } else { - return this; + @Override + @Nonnull + public Try recover(@Nonnull Function fn) { + if (null == value) { + return recoverInternal(z -> fn.apply(exception)); } + return this; } - /** - * You should mostly prefer calling {@code compose} instead. - * Composes this Try with a condition that must pass. - * If the condition fails, calls {@code this.clazz(String)}. - * @param condition A required condition - * @return A new Try - */ - public Try require(@Nonnull Function condition) throws UnsupportedOperationException, RuntimeReflectionException { - if (value == null || condition.apply(value)) return this; - return new Try<>(null, new ReflectingConstructor<>(clazz).instance("Failed requirement " + condition), clazz); + @Override + public Try require(@Nonnull Predicate predicate) { + if (null == value || predicate.test(value)) return this; + return new Try<>( + null, + new ReflectingConstructor<>(clazz).instance("Failed requirement " + predicate), + clazz + ); } - /** - * Compose this Try with a function. - * If {@code this} Try is a success, tries to map its result to {@code fn(this.value)}. - * If {@code this} Try is a failure, returns a copy of it (with type <Z>). - * If calling {@code fn} throws an exception {@code}, will add {@code this.exception} as a suppressed. - * @param fn A function to be called on {@code this.t} - * @param The new type - * @return A new @{code Try<Z>} - * @see Try#map - */ - @Nonnull - public Try compose(@Nonnull Function fn) { - if (value == null) { - return new Try<>(null, exception, clazz); - } else { - return _map(fn); - } - } + @Override + @Nonnull + public Try broaden() { + return new Try<>(value, exception, Exception.class); + } - @Nonnull - protected Try _map(@Nonnull Function fn) { - Z z; - try { - z = fn.apply(value); - } catch (Throwable e) { - if (!clazz.isInstance(e)) throw e; - if (this.exception != null) { // must always be true though! - e.addSuppressed(this.exception); - } - //noinspection unchecked - return new Try<>(null, (E) e, clazz); - } - return new Try<>(z, null, clazz); - } + @Override + @Nonnull + public Try compose(@Nonnull Function fn) { + if (null == value) { + return new Try<>(null, exception, clazz); + } + return mapInternal(fn); + } - public boolean isDefined() { - return this.value != null; - } - public boolean isEmpty() { - return this.value == null; - } - public boolean succeeded() { - return this.value != null; - } - public boolean failed() { - return this.value == null; - } + @Override + @Nonnull + public Try map(@Nonnull Function fn) { + if (null == value) { + return new Try<>(null, exception, clazz); + } + return new Try<>(fn.apply(value), null, clazz); + } - @Nonnull - public Optional get() { + @SuppressWarnings("unchecked") + @Nonnull + protected Try mapInternal(@Nonnull Function fn) { + assert null != value; + assert null == exception; + Z z; + try { + z = fn.apply(value); + } catch (Exception e) { + if (!clazz.isInstance(e)) + throw e; + return new Try<>(null, (E) e, clazz); + } + return new Try<>(z, null, clazz); + } + + @SuppressWarnings("unchecked") + @Nonnull + protected Try recoverInternal(@Nonnull Function fn) { + assert null == value; + assert null != exception; + V v; + try { + v = fn.apply(exception); + } catch (Exception e) { + if (!clazz.isInstance(e)) + throw e; + e.addSuppressed(exception); + return new Try<>(null, (E) e, clazz); + } + return new Try<>(v, null, clazz); + } + + @Override + @Nonnull + public Optional value() { return Optional.ofNullable(value); } - @Nonnull - public T orElse(@Nonnull T value2) { - return Optional.ofNullable(value).orElse(value2); - } - @Nonnull - public T orElseGet(@Nonnull Supplier supplier) { - return Optional.ofNullable(value).orElseGet(supplier); - } - @Nonnull - public T orElseGet(@Nonnull Function fn) { - if (value == null) { - return fn.apply(exception); - } else { - return value; - } - } - @Nonnull - public T orElseThrow() throws E { - if (value != null) { - return value; - } else { - throw exception; - } - } - @Nonnull - public T orElseThrow(@Nonnull RuntimeException e) throws RuntimeException { - if (value != null) { - return value; - } else { - throw e; - } - } - @Nonnull - public T orElseThrow(@Nonnull Function fn) throws E { - if (value != null) { - return value; - } else { - throw fn.apply(exception); - } - } - @Nonnull - public Optional getException() { + @Override + @Nonnull + public Optional exception() { return Optional.ofNullable(exception); } @Override public String toString() { - if (this.value != null) { - return "Success[" + value + "]"; - } else { - return "Failure[" + exception + "]"; - } + if (null == value) { + return "Failure[" + exception + "]"; + } + return "Success[" + value + "]"; } @Override diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/TryI.java b/core/src/main/java/org/pharmgkb/parsers/utils/TryI.java new file mode 100644 index 0000000..c2f0462 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/utils/TryI.java @@ -0,0 +1,389 @@ +package org.pharmgkb.parsers.utils; + +import javax.annotation.Nonnull; +import java.util.Optional; +import java.util.function.BinaryOperator; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.function.Supplier; + +public interface TryI { + + /** + * Returns a {@code Try} containing the {@link #value()} of {@code this} and {@code other}. + * The returned Try's {@link #exception()} will be: + *
    + *
  • this with other suppressed if both exceptions are non-empty
  • + *
  • this if only it is non-empty
  • + *
  • other if only it is non-empty
  • + *
  • {@code Optional.empty} if both are non-empty
  • + *
+ * + * This method is very rarely needed; + * see {@link #leftOr(Try)} and {@link #rightOr(Try)} instead. + * + * This method can add a suppressed exception to {@code this} {@link #exception()}. + * + * @param other A try to consider identically to {@code this} try, except that its {@link #exception()} + * will be added as a suppressed exception to {@code this} exception if both are non-empty. + * @return A new Try + * + * @see #or(Try, BinaryOperator) + * @see #or(Try, BinaryOperator, BinaryOperator) + * @see #leftOr(Try) + * @see #rightOr(Try) + * @see #recover(Function) + */ + @Nonnull + Try, E> or(@Nonnull Try other); + + /*** + * + * Returns a {@code Try} containing either the {@link #value()} of {@code this} or the {@link #value()} {@code other}, + * according to {@code mergeSuccess}. + * The returned Try's {@link #exception()} will be: + *
    + *
  • this with other suppressed if both exceptions are non-empty
  • + *
  • this if only it is non-empty
  • + *
  • other if only it is non-empty
  • + *
  • {@code Optional.empty} if both are non-empty
  • + *
+ * + * + * This method is very rarely needed; + * see {@link #leftOr(Try)} and {@link #rightOr(Try)} instead. + * + * This method can add a suppressed exception to {@code this} {@link #exception()}. + * + * @param other A try to consider identically to {@code this} try, except that its {@link #exception()} + * will be added as a suppressed exception to {@code this} exception if both are non-empty. + * @param mergeSuccess Takes this {@link #value()} as the first argument + * and other {@link #value()} as the second, + * and returns a {@link V} value to use + * @return A new Try + * @see #or(Try) + * @see #or(Try, BinaryOperator, BinaryOperator) + * @see #leftOr(Try) + * @see #rightOr(Try) + * @see #recover(Function) + */ + @Nonnull + Try or(@Nonnull Try other, @Nonnull BinaryOperator mergeSuccess); + + /*** + * + * Returns a {@code Try} containing either the {@link #value()} of {@code this} or the {@link #value()} {@code other}, + * according to {@code mergeSuccess}. + * The returned Try's {@link #exception()} will be: + *
    + *
  • this with other suppressed if both exceptions are non-empty
  • + *
  • this if only it is non-empty
  • + *
  • other if only it is non-empty
  • + *
  • {@code Optional.empty} if both are non-empty
  • + *
+ * + * + * This is an uncommonly needed method; + * see {@link #leftOr(Try)} and {@link #rightOr(Try)} instead. + * + * Unlike {@link #or(Try)} and {@link #or(Try, BinaryOperator)}, this method cannot affect the state of + * {@code this} or {@code other}. + * + * @param other A try to consider identically to {@code this} try + * @param mergeSuccess Takes this {@link #value()} as the first argument and other {@link #value()} + * as the second, and returns a {@link V} to use + * @param mergeFailure Takes this {@link #exception()} ()} as the first argument + * and other {@link #exception()} as the second, + * returns an {@link E} exception to use + * @return A new Try + * @see #or(Try) + * @see #or(Try, BinaryOperator) + * @see #leftOr(Try) + * @see #rightOr(Try) + * @see #recover(Function) + */ + @Nonnull + Try or(@Nonnull Try other, BinaryOperator mergeSuccess, BinaryOperator mergeFailure); + + /** + * Returns either {@code this} or {@code other}. + *
    + *
  • {@code other} if it contains a {@link #value() value} and {@code this} does not
  • + *
  • {@code this} otherwise
  • + *
+ * @param other Another Try, considered inferior to this one ({@code this} is preferred) + * @return Either {@code this} or {@code other} + * + * @see #rightOr(Try) + * @see #recover(Function) + * @see #or(Try, BinaryOperator, BinaryOperator) + */ + @Nonnull + Try leftOr(@Nonnull Try other); + + /** + * Returns either {@code this} or {@code other}. + *
    + *
  • {@code this} if it contains a {@link #value() value} and {@code other} does not
  • + *
  • {@code other} otherwise
  • + *
+ * @param other Another Try, considered superior to this one ({@code other} is preferred) + * @return Either {@code this} or {@code other} + * + * @see #leftOr(Try) + * @see #recover(Function) + * @see #or(Try, BinaryOperator, BinaryOperator) + */ + @Nonnull + Try rightOr(@Nonnull Try other); + + /** + * Returns this try if it is a success. + * Otherwise, returns a successful Try containing {@code value2}. + * + * This method can add a suppressed exception to {@code this} {@link #exception()}. + * + * @param value2 A value to fill in + * @return A new Try + * @see Try#recover(Supplier) + * @see Try#recover(Function) + * @see Try#orElse(V) + */ + @Nonnull + Try recover(@Nonnull V value2); + + /** + * Attempt to recover failures. If this succeeded, returns it. If it failed, try filling it with + * {@code supplier.get()} instead. If calling {@code supplier()} throws Exception @{code e}, will return @{code + * Try.fail(e)} with {@code this.getException()} added as suppressed. + * + * This method can add a suppressed exception to {@code this} {@link #exception()}. + * + * @param sup Called when this Try failed + * @return A new Try + * @see Try#recover(V) + * @see Try#recover(Function) + * @see Try#orElseGet(Supplier) + */ + @Nonnull + Try recover(@Nonnull Supplier sup); + + /** + * Attempt to recover failures. + * If {@code this} is a success, returns it. If it's a failure, fills it with {@code fn.apply(this.e)} instead. + * If calling {@code supplier()} throws Exception @{code e}, will return @{code Try.fail(e)} with {@code this + * .getException()} added as suppressed. + * + * This method can add a suppressed exception to {@code this} {@link #exception()}. + * + * @param fn Called when this Try failed + * @return A new Try + * @see Try#recover(Supplier) + * @see Try#recover(Object) + * @see Try#orElseGet(Function) + */ + @Nonnull + Try recover(@Nonnull Function fn); + + /** + * Composes this Try with a condition that must pass. + * If the condition fails, throws a new exception + * + * @param predicate A required predicate + * @return A new Try + * @see #compose(Function) + */ + Try require(@Nonnull Predicate predicate); + + /** + * Broaden allowed exception type to {@link Exception}. + * Unfortunately, Java doesn't permit something like {@code E2 extends Exception && super E}, + * so we can only broaden precisely to {@code Exception}. + * + * Example: + * + * {@code + * // if we raise an exception that's not an UnsupportedOperationException here, it'll get thrown: + * Try try1 = Try.succeed("puppy", UnsupportedOperationException.class); + * // We're going to try composing it with another function that could fail -- but with another exception type + * Try try2 = try1 + * .broaden() + * .compose(Integer::parseInt); // throws NumberFormatException + * // The NumberFormatException got caught only because we called broaden() first + * System.out.println(try2); // Failure[Exception ... java.lang.NumberFormatException: For input string: "puppy" + * } + */ + @Nonnull + Try broaden(); + + /** + * Maps the result of this Try through a function, if it's a success. + * Otherwise (if it's a failure), just returns a copy. + * Will re-throw exceptions thrown by {@code fn}. + * + * This method is similar to {@link #compose(Function)}: + *
    + *
  • + * {@link #map(Function)} (this method) catches immediately throws + * any exception thrown by {@code fn}. + *
  • + *
  • + * {@link #compose(Function)} catches {@link E}-type exceptions + * thrown by {@code fn} and returns a new failed {@code Try}. + *
  • + *
+ * + * @param fn A function to apply to this if it's a success + * @param The new type + * @return A new Try + * @see Try#compose + */ + @Nonnull Try map(@Nonnull Function fn); + + /** + * Compose this Try with a function. + * If {@code this} Try is a success, tries to map its result to {@code fn(this.value)}. + * If {@code this} Try is a failure, returns a copy of it (with type <Z>). + * If calling {@code fn} throws an exception {@code}, adds {@code this.exception} as a suppressed. + * + * This method is similar to {@link #map(Function)}: + *
    + *
  • + * {@link #compose(Function)} (this method) catches {@link E}-type exceptions + * thrown by {@code fn} and returns a new failed {@code Try}. + *
  • + *
  • + * {@link #map(Function)} catches immediately throws any exception thrown by {@code fn}. + *
  • + *
+ * + * @param fn A function to be called on {@code this.t} + * @param The new type + * @return A new @{code Try<Z>} + * @see Try#map + * @see Try#leftOr(Try) + * @see Try#or(Try, BinaryOperator, BinaryOperator) + */ + @Nonnull + Try compose(@Nonnull Function fn); + + /** + * Gets the value if this Try was successful ({@link #succeeded()} is true. + * The returned {@link V} will be non-empty if and only if {@link #exception()} is empty. + */ + @Nonnull + Optional value(); + + /** + * Gets the exception if this Try was a failure ({@link #failed()} is true. + * The returned {@link E} will be non-empty if and only if {@link #value()} is empty. + */ + @Nonnull + Optional exception(); + + /** + * Returns {@code this} {@link #value()} if {@link #succeeded() success}, or {@code value2} otherwise. + * @param value2 The fallback value + */ + @Nonnull + default V orElse(@Nonnull V value2) { + return value().orElse(value2); + } + + /** + * Returns {@code this} {@link #value()} if {@link #succeeded() success}, or {@code supplier.get()} otherwise. + * @param supplier A function that produces the fallback value if necessary + */ + @Nonnull + default V orElseGet(@Nonnull Supplier supplier) { + return value().orElseGet(supplier); + } + + /** + * Returns {@code this} {@link #value()} if {@link #succeeded() success}, or {@code supplier.get()} otherwise. + * @param fn A function that produces the fallback value if necessary. + * It takes the {@link E} exception {{@link #exception()}} as an argument and returns a fallback value. + * If {@code fn} throws an exception {@code e}, this method will throw {@code e}. + */ + @Nonnull + default V orElseGet(@Nonnull Function fn) { + return value().orElseGet(() -> fn.apply(exception().get())); + } + + /** + * Gets {@link #value()} if {@link #succeeded()}, otherwise throwing {@link #exception()} + * @throws E From {@link #exception()}, if {@link #failed()} + * @see #orElseThrow(Exception) + * @see #orElseThrow(Supplier) + * @see #orElseThrow(Function) + */ + @Nonnull + default V orElseThrow() throws E { + return value().orElseThrow(() -> exception().get()); + } + + /** + * Gets {@link #value()} if {@link #succeeded()}, otherwise throwing {@code supplier.get()}. + * @throws X From {@code supplier.get()}, if {@link #failed()} + * @see #orElseThrow() + * @see #orElseThrow(Exception) + * @see #orElseThrow(Function) + */ + @Nonnull + default V orElseThrow(@Nonnull Supplier supplier) throws X { + return value().orElseThrow(supplier); + } + + /** + * Gets {@link #value()} if {@link #succeeded()}, otherwise throwing {@code e}. + * @throws X From {@code e}, if {@link #failed()} + * @see #orElseThrow() + * @see #orElseThrow(Function) + */ + @Nonnull + default V orElseThrow(@Nonnull X e) throws X { + return value().orElseThrow(() -> e); + } + + /** + * Gets {@link #value()} if {@link #succeeded()}, otherwise throwing {@code fn(exception()}. + * @param fn Accepts {@link E} {@link #exception()} and returns a new exception + * @throws X From {@code fn}, if {@link #failed()} + * @see #orElseThrow() + * @see #orElseThrow(Exception) + * @see #orElseThrow(Supplier) + */ + @Nonnull + default V orElseThrow(@Nonnull Function fn) throws X { + return value().orElseThrow(() -> fn.apply(exception().get())); + } + + /** + * Whether this Try has a {@link #value()}; identical to {@link #succeeded()}. + */ + default boolean isDefined() { + return succeeded(); + } + + /** + * Whether this Try lacks a {@link #value()}; identical to {@link #failed()}. + */ + default boolean isEmpty() { + return failed(); + } + + /** + * Whether this Try has a {@link #value()} (and has a {@link #value()}. + */ + default boolean succeeded() { + return value().isPresent(); + } + + /** + * Whether this Try lacks a {@link #value()} (and has an {@link #exception()}). + */ + default boolean failed() { + return value().isPresent(); + } + +} diff --git a/core/src/main/java/org/pharmgkb/parsers/utils/package-info.java b/core/src/main/java/org/pharmgkb/parsers/utils/package-info.java new file mode 100644 index 0000000..a542535 --- /dev/null +++ b/core/src/main/java/org/pharmgkb/parsers/utils/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.utils; diff --git a/core/src/test/java/org/pharmgkb/parsers/escape/BackslashEscaperTest.java b/core/src/test/java/org/pharmgkb/parsers/escape/BackslashEscaperTest.java index beec44e..730ef54 100644 --- a/core/src/test/java/org/pharmgkb/parsers/escape/BackslashEscaperTest.java +++ b/core/src/test/java/org/pharmgkb/parsers/escape/BackslashEscaperTest.java @@ -2,7 +2,7 @@ import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link BackslashEscaper}. @@ -20,4 +20,4 @@ public void testEscape() { public void testUnescape() { } -} \ No newline at end of file +} diff --git a/core/src/test/java/org/pharmgkb/parsers/escape/Rfc3986EscaperTest.java b/core/src/test/java/org/pharmgkb/parsers/escape/Rfc3986EscaperTest.java index d8461c7..91b9fab 100644 --- a/core/src/test/java/org/pharmgkb/parsers/escape/Rfc3986EscaperTest.java +++ b/core/src/test/java/org/pharmgkb/parsers/escape/Rfc3986EscaperTest.java @@ -2,7 +2,7 @@ import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link Rfc3986Escaper}. @@ -45,4 +45,4 @@ public void testUnescapeInverse() { assertEquals("abc1", escaper.unescape("abc%31")); assertEquals("9abc1", escaper.unescape("%39abc%31")); } -} \ No newline at end of file +} diff --git a/core/src/test/java/org/pharmgkb/parsers/utils/IoUtilsTest.java b/core/src/test/java/org/pharmgkb/parsers/io/IoUtilsTest.java similarity index 80% rename from core/src/test/java/org/pharmgkb/parsers/utils/IoUtilsTest.java rename to core/src/test/java/org/pharmgkb/parsers/io/IoUtilsTest.java index fcbfb7c..1c4f3fc 100644 --- a/core/src/test/java/org/pharmgkb/parsers/utils/IoUtilsTest.java +++ b/core/src/test/java/org/pharmgkb/parsers/io/IoUtilsTest.java @@ -1,6 +1,7 @@ -package org.pharmgkb.parsers.utils; +package org.pharmgkb.parsers.io; import org.junit.jupiter.api.Test; +import org.pharmgkb.parsers.io.IoUtils; import java.io.File; import java.net.URISyntaxException; diff --git a/core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameNameTest.java b/core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameNameTest.java new file mode 100644 index 0000000..be48ffa --- /dev/null +++ b/core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameNameTest.java @@ -0,0 +1,40 @@ +package org.pharmgkb.parsers.model; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +public class ChromosomeNameNameTest { + + @Test + public void test1() { + assertEquals("chr1", new StandardChromosomeName("chr1").toString()); + } + + @Test + public void testUcsc() { + assertEquals("chr1", StandardChromosomeName.ucscWithFailure("1").toString()); + assertEquals("chrM", StandardChromosomeName.ucscWithFailure("MT").toString()); + } + + @Test + public void testId() { + assertEquals("chrX_AGAFASF55v22", new StandardChromosomeName("chrX_AGAFASF55v22").toString()); + } + + @Test + public void testRandom() { + assertEquals("chrX_AGAFASF55v22_random", new StandardChromosomeName("chrX_AGAFASF55v22_random").toString()); + } + + @Test + public void testAlt() { + assertEquals("chrX_AGAFASF55v22_alt", new StandardChromosomeName("chrX_AGAFASF55v22_alt").toString()); + } + + @Test + public void testNonstandard() { + assertEquals("CHR_HSCHR3_1_CTG2_1", new StandardChromosomeName("CHR_HSCHR3_1_CTG2_1").toString()); + assertEquals("HSCHR3_1_CTG2_1", new StandardChromosomeName("HSCHR3_1_CTG2_1").toString()); + } +} diff --git a/core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameTest.java b/core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameTest.java deleted file mode 100644 index f3aeabd..0000000 --- a/core/src/test/java/org/pharmgkb/parsers/model/ChromosomeNameTest.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.pharmgkb.parsers.model; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -public class ChromosomeNameTest { - - @Test - public void test1() { - assertEquals("chr1", new ChromosomeName("chr1").toString()); - } - - @Test - public void testUcsc() { - assertEquals("chr1", ChromosomeName.ucscWithFailure("1").toString()); - assertEquals("chrM", ChromosomeName.ucscWithFailure("MT").toString()); - } - - @Test - public void testId() { - assertEquals("chrX_AGAFASF55v22", new ChromosomeName("chrX_AGAFASF55v22").toString()); - } - - @Test - public void testRandom() { - assertEquals("chrX_AGAFASF55v22_random", new ChromosomeName("chrX_AGAFASF55v22_random").toString()); - } - - @Test - public void testAlt() { - assertEquals("chrX_AGAFASF55v22_alt", new ChromosomeName("chrX_AGAFASF55v22_alt").toString()); - } - - @Test - public void testNonstandard() { - assertEquals("CHR_HSCHR3_1_CTG2_1", new ChromosomeName("CHR_HSCHR3_1_CTG2_1").toString()); - assertEquals("HSCHR3_1_CTG2_1", new ChromosomeName("HSCHR3_1_CTG2_1").toString()); - } -} \ No newline at end of file diff --git a/core/src/test/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalTest.java b/core/src/test/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalTest.java index 5367f27..051e211 100644 --- a/core/src/test/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalTest.java +++ b/core/src/test/java/org/pharmgkb/parsers/model/GeneralizedBigDecimalTest.java @@ -14,19 +14,19 @@ public class GeneralizedBigDecimalTest { @Test public void testGetValue() { - assertEquals(new GeneralizedBigDecimal("0.5").getValue().get(), new BigDecimal("0.5")); - assertFalse(new GeneralizedBigDecimal("Inf").getValue().isPresent()); - assertFalse(new GeneralizedBigDecimal("+Inf").getValue().isPresent()); - assertFalse(new GeneralizedBigDecimal("-Inf").getValue().isPresent()); - assertFalse(new GeneralizedBigDecimal("NaN").getValue().isPresent()); + assertEquals(new BigDecimal("0.5"), new GeneralizedBigDecimal("0.5").value().get()); + assertFalse(new GeneralizedBigDecimal("Inf").value().isPresent()); + assertFalse(new GeneralizedBigDecimal("+Inf").value().isPresent()); + assertFalse(new GeneralizedBigDecimal("-Inf").value().isPresent()); + assertFalse(new GeneralizedBigDecimal("NaN").value().isPresent()); } @Test public void testToString() { - assertEquals(new GeneralizedBigDecimal("0.5").toString(), "0.5"); - assertEquals(new GeneralizedBigDecimal("Inf").toString(), "Inf"); - assertEquals(new GeneralizedBigDecimal("+Inf").toString(), "+Inf"); - assertEquals(new GeneralizedBigDecimal("-Inf").toString(), "-Inf"); + assertEquals("0.5", new GeneralizedBigDecimal("0.5").toString()); + assertEquals("Inf", new GeneralizedBigDecimal("Inf").toString()); + assertEquals("+Inf", new GeneralizedBigDecimal("+Inf").toString()); + assertEquals("-Inf", new GeneralizedBigDecimal("-Inf").toString()); } @Test @@ -66,4 +66,4 @@ public void testCompareTo() { assertTrue(e2.getMessage().contains("NaN")); } -} \ No newline at end of file +} diff --git a/core/src/test/java/org/pharmgkb/parsers/model/LocusRangeTest.java b/core/src/test/java/org/pharmgkb/parsers/model/LocusRangeTest.java index 3c98289..bb8466c 100644 --- a/core/src/test/java/org/pharmgkb/parsers/model/LocusRangeTest.java +++ b/core/src/test/java/org/pharmgkb/parsers/model/LocusRangeTest.java @@ -25,7 +25,10 @@ public void testContains() { public void testContainsBad() { LocusRange range = new LocusRange(new Locus("chrX", 0, Strand.MINUS), new Locus("chrX", 5, Strand.MINUS)); assertTrue(range.contains(new Locus("chrX", 0, Strand.MINUS))); - assertThrows(IllegalArgumentException.class, () -> range.contains(new Locus("chrX", 3, Strand.PLUS))); + assertThrows( + IllegalArgumentException.class, + () -> range.contains(new Locus("chrX", 3, Strand.PLUS)) + ); } @Test @@ -77,5 +80,4 @@ public void testBad1() { public void testBad2() { assertThrows(IllegalArgumentException.class, () -> LocusRange.parse("chrX(-):-5-10")); } - -} \ No newline at end of file +} diff --git a/core/src/test/java/org/pharmgkb/parsers/utils/TryTest.java b/core/src/test/java/org/pharmgkb/parsers/utils/TryTest.java index ffc1b48..a2521e2 100644 --- a/core/src/test/java/org/pharmgkb/parsers/utils/TryTest.java +++ b/core/src/test/java/org/pharmgkb/parsers/utils/TryTest.java @@ -6,17 +6,17 @@ import java.util.function.Function; import java.util.function.Supplier; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; public class TryTest { @Test public void testSimple() { - assertEquals(Try.succeed(1).get(), Optional.of(1)); - assertEquals(Try.succeed(1).getException(), Optional.empty()); - assertEquals(Try.fail(new Exception("")).get(), Optional.empty()); - assertEquals(Try.fail(new Exception("")).getException().get().getMessage(), ""); + assertEquals(Optional.of(1), Try.succeed(1).value()); + assertEquals(Optional.empty(), Try.succeed(1).exception()); + assertEquals(Optional.empty(), Try.fail(new Exception("")).value()); + assertEquals("", Try.fail(new Exception("")).exception().get().getMessage()); } @Test @@ -25,34 +25,34 @@ public void testAttempt() { Supplier failing = () -> { throw new IllegalArgumentException(""); }; - assertEquals(Try.attempt(supplier).get(), Optional.of("abc")); - assertEquals(Try.attempt(supplier).getException(), Optional.empty()); - assertEquals(Try.attempt(failing).getException().get().getMessage(), ""); - assertEquals(Try.attempt(failing).get(), Optional.empty()); + assertEquals(Optional.of("abc"), Try.attempt(supplier).value()); + assertEquals(Optional.empty(), Try.attempt(supplier).exception()); + assertEquals("", Try.attempt(failing).exception().get().getMessage()); + assertEquals(Optional.empty(), Try.attempt(failing).value()); } @Test public void testMap() { - assertEquals(Try.succeed("").map(x -> x + "abc").get(), Optional.of("abc")); + assertEquals(Optional.of("abc"), Try.succeed("").map(x -> x + "abc").value()); Try failed = Try.fail(new IllegalArgumentException("")); - assertEquals(failed.map(x -> x + "abc").get(), Optional.empty()); + assertEquals(Optional.empty(), failed.map(x -> x + "abc").value()); } @Test public void testRecover() { - assertEquals(Try.succeed("").recover(() -> "abc").get(), Optional.of("")); + assertEquals(Optional.of(""), Try.succeed("").recover(() -> "abc").value()); Try failed = Try.fail(new IllegalArgumentException("")); - assertEquals(failed.recover(() -> "abc").get(), Optional.of("abc")); + assertEquals(Optional.of("abc"), failed.recover(() -> "abc").value()); } @Test public void testCompose() { - assertEquals(Try.succeed("").compose(x -> x + "abc").get(), Optional.of("abc")); + assertEquals(Optional.of("abc"), Try.succeed("").compose(x -> x + "abc").value()); Try failed = Try.fail(new IllegalArgumentException("")); - assertEquals(failed.compose(x -> x + "abc").get(), Optional.empty()); + assertEquals(Optional.empty(), failed.compose(x -> x + "abc").value()); Function succeeding = x -> x/2.0; Function failing = x -> { throw new IllegalArgumentException(""); }; - assertEquals(Try.succeed(1).compose(succeeding).get(), Optional.of(0.5)); - assertEquals(Try.succeed(1).compose(succeeding).get(), Optional.of(0.5)); + assertEquals(Optional.of(0.5), Try.succeed(1).compose(succeeding).value()); + assertEquals(Optional.of(0.5), Try.succeed(1).compose(succeeding).value()); } } diff --git a/fasta/build.gradle b/fasta/build.gradle index 614da49..75740b8 100644 --- a/fasta/build.gradle +++ b/fasta/build.gradle @@ -1,10 +1,3 @@ -repositories { - maven { - url 'https://www.ebi.ac.uk/~maven/m2repo/' - } -} - dependencies { - implementation 'uk.ac.ebi.pride.tools:braf:1.2.2' - compile project(':core') + api project(':core') } diff --git a/fasta/lombok.config b/fasta/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/fasta/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReader.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReader.java new file mode 100644 index 0000000..583cd32 --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReader.java @@ -0,0 +1,239 @@ +package org.pharmgkb.parsers.fasta; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import javax.annotation.concurrent.NotThreadSafe; +import javax.annotation.concurrent.ThreadSafe; +import java.io.*; +import java.nio.CharBuffer; +import java.nio.file.Path; +import java.util.Objects; +import java.util.Optional; + +/** + * A character stream for FASTA, which reads base-by-base. + * + * The FASTA grammar is taken to be: + *
+ *     fasta      ::= '>'header newline sequence (newline fasta)?
+ *     header     ::= [^\n\r]+
+ *     sequence   ::= [^\n\r]+
+ * 
+ * Where {@code newline} is taken to be the platform-dependent newline sequence. + * Notice that, even though the newline is platform-dependent, neither the header nor sequence can contain a CL or LF, + * which is a platform-independent choice. Also notice that comments and empty lines are not part of the grammar. + * + * + * Example usage: + *
+ * >gene_1
+ * AT
+ * >gene_2
+ * GC
+ * 
+ * {@code + * FastaStream stream = new FastaStream(file); + * stream.currentHeader() // returns Optional.empty() + * stream.readNextBase(); // returns 'A' + * stream.currentHeader() // returns "gene_1" + * stream.readNextBase(); // returns 'T' + * stream.currentHeader() // returns "gene_2" + * stream.readNextBase(); // returns 'G' + * stream.currentHeader() // returns "gene_2" + * stream.readNextBase(); // returns 'C' + * stream.readNextBase(); // returns Optional.empty() + * } + * + * @author Douglas Myers-Turnbull + */ +@ThreadSafe +public class FastaBaseReader implements FastaBaseReaderI { + + private final Reader m_reader; + + private final CharBuffer m_buffer; + private String m_header; + private long m_nBasesSinceHeader; + private long m_nBasesReadTotal; + private long m_nHeadersRead; + private long m_nBytesReadTotal; + + protected FastaBaseReader(@Nonnull Reader reader, @Nonnegative int nCharsInBuffer) throws IOException { + m_reader = Objects.requireNonNull(reader); + m_buffer = CharBuffer.allocate(nCharsInBuffer); + initBuffer(); + } + + @Override + public void close() throws IOException { + synchronized (m_buffer) { + m_reader.close(); + } + } + + @Override + @Nonnull + public Optional readNextBase() throws IOException { + synchronized (m_buffer) { + try { + Character base; + do { + base = doRead(); + if (base == null) { + return Optional.empty(); + } + } while (base == '\n' || base == '\r'); + if (base == '>') { + readHeader(); + base = doRead(); + } + // no matter what, we always read just one base + m_nBasesSinceHeader++; + m_nBasesReadTotal++; + return Optional.ofNullable(base); + } catch (RuntimeException e) { // record more info + throw new IOException( + "Error reading; " + m_nBytesReadTotal + " bytes read; on header " + m_header + + "; buffer has " + m_buffer.remaining() + " remaining", + e + ); + } + } + } + + @Override + @Nonnull + public Optional currentHeader() { + synchronized (m_buffer) { + return Optional.ofNullable(m_header); + } + } + + @Override + @Nonnegative + public long nHeadersRead() { + synchronized (m_buffer) { + return m_nHeadersRead; + } + } + + @Override + @Nonnegative + public long nBasesReadTotal() { + synchronized (m_buffer) { + return m_nBasesReadTotal; + } + } + + @Override + @Nonnegative + public long nBasesSinceHeader() { + synchronized (m_buffer) { + return m_nBasesSinceHeader; + } + } + + @Override + @Nonnegative + public long nBytesReadTotal() { + synchronized (m_buffer) { + return m_nBytesReadTotal; + } + } + + @Nullable + private Character doRead() throws IOException { + if (!m_buffer.hasRemaining()) { // fill buffer if it's empty + if (!initBuffer()) { // if the stream itself is empty + return null; + } + } + m_nBytesReadTotal++; + return m_buffer.get(); + } + + private boolean initBuffer() throws IOException { + m_buffer.clear(); + if (m_reader.read(m_buffer) == -1) { // actually read + return false; + } + m_buffer.flip(); + return true; + } + + private void readHeader() throws IOException { + StringBuilder header = new StringBuilder(256); + Character c; + synchronized (m_buffer) { + do { + c = doRead(); + if (c == null) { + throw new EOFException("Stream ended unexpectedly in header"); + } + if (c != '\n' && c != '\r') { + header.append(c); + } + } while (c != '\n' && c != '\r'); + m_header = header.toString(); + m_nBasesSinceHeader = 0; + m_nHeadersRead++; + } + } + + @NotThreadSafe + public static class Builder { + + private Reader m_reader; + private int m_nCharsInBuffer; + + public Builder(@Nonnull Path file) throws FileNotFoundException { + this(new FileReader(file.toFile())); + } + + @Nonnull + public Builder(@Nonnull File file) throws FileNotFoundException { + this(new FileReader(file)); + } + + @Nonnull + public Builder(@Nonnull Reader reader) { + m_reader = reader; + m_nCharsInBuffer = 2048; + } + + @Nonnull + public Builder setnCharsInBuffer(@Nonnegative int nCharsInBuffer) { + m_nCharsInBuffer = nCharsInBuffer; + return this; + } + + @Nonnull + public FastaBaseReaderI build() throws IOException { + return new FastaBaseReader(m_reader, m_nCharsInBuffer); + } + } + + @Override + public String toString() { + synchronized (m_buffer) { + return "SimpleFastaBaseReader{" + + "reader=" + + m_reader + + ", buffer=" + + m_buffer + + ", header='" + + m_header + + '\'' + + ", nBasesSinceHeader=" + + m_nBasesSinceHeader + + ", nBasesReadTotal=" + + m_nBasesReadTotal + + ", nHeadersRead=" + + m_nHeadersRead + + ", nBytesReadTotal=" + + m_nBytesReadTotal + + '}'; + } + } +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReaderI.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReaderI.java new file mode 100644 index 0000000..0a9170f --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaBaseReaderI.java @@ -0,0 +1,35 @@ +package org.pharmgkb.parsers.fasta; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.io.Closeable; +import java.io.IOException; +import java.util.Optional; + +public interface FastaBaseReaderI extends Closeable { + + /** + * @return The next base (nucleotide or amino acid) in the stream + * @throws IOException For IO errors + */ + @Nonnull + Optional readNextBase() throws IOException; + + /** + * @return The last header read, or null if nothing was read yet + */ + @Nonnull + Optional currentHeader(); + + @Nonnegative + long nHeadersRead(); + + @Nonnegative + long nBasesReadTotal(); + + @Nonnegative + long nBasesSinceHeader(); + + @Nonnegative + long nBytesReadTotal(); +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParser.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParser.java index fba295b..a3fe31f 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParser.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParser.java @@ -2,7 +2,6 @@ import com.google.common.base.Preconditions; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.MultilineParser; import org.pharmgkb.parsers.fasta.model.FastaSequence; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,15 +42,15 @@ * * @author Douglas Myers-Turnbull */ -public class FastaSequenceParser implements MultilineParser { +public class FastaSequenceParser implements FastaSequenceParserI { - private static final long sf_logEvery = 10000; + private static final long sf_logEvery = 10000L; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); - private AtomicReference m_currentHeader = new AtomicReference<>(null); + private final AtomicReference m_currentHeader = new AtomicReference<>(null); @Nonnull @Override @@ -80,11 +79,9 @@ public Stream apply(@Nonnull String line) { return Stream.of(new FastaSequence(header, line)); } - /** - * @throws IllegalStateException If the last line processed was a header - */ - public void sanityCheckFinished() { - if (m_currentHeader != null) { + @Override + public void sanityCheckFinished() { + if (m_currentHeader.get() != null) { throw new IllegalStateException("The last line processed was a header on line #" + m_lineNumber); } } diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParserI.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParserI.java new file mode 100644 index 0000000..b442a74 --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceParserI.java @@ -0,0 +1,12 @@ +package org.pharmgkb.parsers.fasta; + +import org.pharmgkb.parsers.MultilineParser; +import org.pharmgkb.parsers.fasta.model.FastaSequence; + +public interface FastaSequenceParserI extends MultilineParser { + + /** + * @throws IllegalStateException If the last line processed was a header + */ + void sanityCheckFinished(); +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriter.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriter.java index d12bc9c..ee38665 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriter.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriter.java @@ -1,6 +1,5 @@ package org.pharmgkb.parsers.fasta; -import org.pharmgkb.parsers.LineStructureWriter; import org.pharmgkb.parsers.fasta.model.FastaSequence; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,22 +16,20 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class FastaSequenceWriter implements LineStructureWriter { +public class FastaSequenceWriter implements FastaSequenceWriterI { private static final long sf_logEvery = 10000; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); @Nonnull @Override public Stream apply(@Nonnull FastaSequence sequence) { - if (m_lineNumber.addAndGet(2) % sf_logEvery == 0) { sf_logger.debug("Writing line #{}", m_lineNumber); } - - return Stream.of(">" + sequence.getHeader(), sequence.getSequence()); + return Stream.of(">" + sequence.header(), sequence.sequence()); } @Nonnegative diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterI.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterI.java new file mode 100644 index 0000000..27326fb --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.fasta; + +import org.pharmgkb.parsers.LineStructureWriter; +import org.pharmgkb.parsers.fasta.model.FastaSequence; + +public interface FastaSequenceWriterI extends LineStructureWriter { + +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParser.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParser.java index 2509c99..e37e5f7 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParser.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParser.java @@ -2,7 +2,6 @@ import com.google.common.base.Preconditions; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.MultilineParser; import org.pharmgkb.parsers.ObjectBuilder; import org.pharmgkb.parsers.fasta.model.FastaSequence; @@ -20,7 +19,7 @@ * @author Douglas Myers-Turnbull */ @NotThreadSafe -public class MultilineFastaSequenceParser implements MultilineParser { +public class MultilineFastaSequenceParser implements MultilineFastaSequenceParserI { private final boolean m_allowComments; private final boolean m_allowBlankLines; @@ -31,7 +30,7 @@ public class MultilineFastaSequenceParser implements MultilineParser { - if (!m_hitTerm) throw new IllegalStateException("Stream never hit terminal sequence; appears not to have completed"); + if (!m_hitTerm) { + throw new IllegalStateException("Stream never hit terminal sequence; appears not to have completed"); + } }; } @@ -121,7 +123,7 @@ public long nLinesProcessed() { } @NotThreadSafe - public static class Builder implements ObjectBuilder { + public static class Builder implements ObjectBuilder { private boolean m_allowComments = false; private boolean m_allowBlankLines = false; @@ -170,7 +172,7 @@ public Builder setTermination(@Nonnull String terminationString) { @Nonnull @Override - public MultilineFastaSequenceParser build() { + public MultilineFastaSequenceParserI build() { return new MultilineFastaSequenceParser(this); } } diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserI.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserI.java new file mode 100644 index 0000000..f0f213c --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserI.java @@ -0,0 +1,12 @@ +package org.pharmgkb.parsers.fasta; + +import org.pharmgkb.parsers.MultilineParser; +import org.pharmgkb.parsers.fasta.model.FastaSequence; + +import javax.annotation.Nonnull; + +public interface MultilineFastaSequenceParserI extends MultilineParser { + + @Nonnull + Runnable getCloseHandler(); +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastqParser.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastqParser.java deleted file mode 100644 index 0bb9249..0000000 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/MultilineFastqParser.java +++ /dev/null @@ -1,5 +0,0 @@ -package org.pharmgkb.parsers.fasta; - -public class MultilineFastqParser { - -} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReader.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReader.java index 395a184..ff38d85 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReader.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReader.java @@ -1,24 +1,24 @@ package org.pharmgkb.parsers.fasta; -import com.google.common.collect.ImmutableList; import org.pharmgkb.parsers.BadDataFormatException; +import org.pharmgkb.parsers.io.BufferedRandomAccessFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.ThreadSafe; -import java.io.Closeable; -import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.Optional; +import java.util.Set; /** * A buffered arbitrary-position interface to read FASTA bases. @@ -52,190 +52,162 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class RandomAccessFastaBaseReader implements Closeable { +public class RandomAccessFastaBaseReader implements RandomAccessFastaBaseReaderI { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final String sf_tempExtension = ".no_breaks"; - private File m_originalFile; + private final Path m_originalFile; - private File m_tempFile; + private final Path m_tempFile; - private LinkedHashMap m_headerToPosition = new LinkedHashMap<>(128); - private BufferedRandomAccessFile m_stream; + private final LinkedHashMap m_headerToPosition = new LinkedHashMap<>(128); + private final BufferedRandomAccessFile m_stream; private String m_currentHeader; - private RandomAccessFastaBaseReader( - @Nonnull File file, @Nonnegative int nBytesInBuffer, @Nonnull File tempFile, + protected RandomAccessFastaBaseReader( + @Nonnull Path file, @Nonnegative int nBytesInBuffer, @Nonnull Path temp, boolean keepTempFileOnExit ) throws IOException, BadDataFormatException { - init(file, nBytesInBuffer, tempFile, keepTempFileOnExit); - } - - private void init( - @Nonnull File file, @Nonnegative int nBytesInBuffer, @Nonnull File temp, - boolean keepTempFileOnExit - ) throws IOException { - m_originalFile = file; - if (temp.exists() && keepTempFileOnExit) { // for safety - throw new IllegalArgumentException("Temporary file " + temp.getPath() + " already exists; delete first"); - } else { - m_tempFile = temp; + if (Files.exists(temp) && keepTempFileOnExit) { // for safety + throw new IllegalArgumentException("Temporary file " + temp + " already exists; delete first"); } + m_tempFile = temp; if (!keepTempFileOnExit) { - m_tempFile.deleteOnExit(); + m_tempFile.toFile().deleteOnExit(); } // we're going to read a FASTA stream base-by-base while: // 1) writing each base and each new header // 2) keeping track of how many bytes into the new file we are - try (SimpleFastaBaseReader simple = new SimpleFastaBaseReader.Builder(file).build()) { - try (PrintWriter pw = new PrintWriter(m_tempFile, StandardCharsets.UTF_8)) { + try ( + FastaBaseReaderI simple = new FastaBaseReader.Builder(file).build(); + PrintWriter pw = new PrintWriter(m_tempFile.toFile(), StandardCharsets.UTF_8) + ) { - long nBytesInNewFile = 0; // we'll use this for header positions - String currentHeader = null; // we'll need this to know whether a header is new - Optional nextChar; + long nBytesInNewFile = 0; // we'll use this for header positions + String currentHeader = null; // we'll need this to know whether a header is new + Optional nextChar; - while (((nextChar = simple.readNextBase()).isPresent())) { + while ((nextChar = simple.readNextBase()).isPresent()) { - //noinspection OptionalGetWithoutIsPresent - final String headerRead = simple.currentHeader().get(); // not null because we read a base + final String headerRead = simple.currentHeader().get(); // not null because we read a base - // only write the header if we just read it - if (!headerRead.equals(currentHeader)) { + // only write the header if we just read it + if (!headerRead.equals(currentHeader)) { - if (currentHeader == null) { - // 1 for the >, and 1 for the \n = 2 - nBytesInNewFile += headerRead.length() + 2; - } else { - // 1 for the \n, 1 for the >, and 1 for the \n = 3 - nBytesInNewFile += headerRead.length() + 3; - pw.println(); - } + //noinspection VariableNotUsedInsideIf + if (currentHeader == null) { + // 1 for the >, and 1 for the \n = 2 + nBytesInNewFile += headerRead.length() + 2; + } else { + // 1 for the \n, 1 for the >, and 1 for the \n = 3 + nBytesInNewFile += headerRead.length() + 3; + pw.println(); + } - //noinspection OptionalGetWithoutIsPresent - m_headerToPosition.put(simple.currentHeader().get(), nBytesInNewFile); - sf_logger.debug("{} -----> {} in {}", simple.currentHeader(), nBytesInNewFile, m_tempFile); - //noinspection OptionalGetWithoutIsPresent - currentHeader = simple.currentHeader().get(); - pw.println('>' + currentHeader); + m_headerToPosition.put(simple.currentHeader().get(), nBytesInNewFile); + sf_logger.debug("{} -----> {} in {}", simple.currentHeader(), nBytesInNewFile, m_tempFile); + currentHeader = simple.currentHeader().get(); + pw.println('>' + currentHeader); - } + } - nBytesInNewFile++; - pw.print(nextChar.get()); - if (nBytesInNewFile % 1000 == 0) { - pw.flush(); - } - } + nBytesInNewFile++; + pw.print(nextChar.get()); + if (nBytesInNewFile % 1000 == 0) { + pw.flush(); + } + } - pw.flush(); + pw.flush(); - } } - m_stream = new BufferedRandomAccessFile(m_tempFile.getPath(), "r", nBytesInBuffer); + m_stream = new BufferedRandomAccessFile(m_tempFile, "r", nBytesInBuffer); } - /** - * @return The list of headers (everything after the > sign) in the FASTA file, in order - */ - @Nonnull - public ImmutableList getHeaders() { - return ImmutableList.copyOf(m_headerToPosition.keySet()); + @Override + @Nonnull + public Set headers() { + return Collections.unmodifiableSet(m_headerToPosition.keySet()); } - /** - * - * @param header The exact FASTA header, without an initial > sign - * @param position The number of bases, starting at 0, from the first - * @return The nucleotide or amino acid at that position - * @throws IOException IO errors - * @throws java.lang.IllegalArgumentException If a header with that name is not in the FASTA file - */ - public synchronized char read( - @Nonnull String header, - @Nonnegative long position - ) throws IOException { + @Override + public char read(@Nonnull String header, @Nonnegative long position) throws IOException { return read(header, position, 1).charAt(0); } - /** - * - * @param header The exact FASTA header, without an initial > sign - * @param position The number of bases, starting at 0, from the first - * @return The nucleotide or amino acid at that position - * @throws IOException IO errors - * @throws java.lang.IllegalArgumentException If a header with that name is not in the FASTA file - */ - public synchronized String read( - @Nonnull String header, - @Nonnegative long position, - @Nonnegative long length - ) throws IOException { + @Override + public String read( + @Nonnull String header, + @Nonnegative long position, + @Nonnegative long length + ) throws IOException { if (!m_headerToPosition.containsKey(header)) { throw new IllegalArgumentException("Header " + header + " not found in FASTA file " + m_originalFile); } - m_currentHeader = header; long start = m_headerToPosition.get(header); if (start + position < 0) { // throw IOException here to include seek offset throw new IOException("Negative seek offset of " + (start + position) + " reading FASTA file " + m_tempFile); } - m_stream.seek(start + position); - StringBuilder builder = new StringBuilder(512); - for (int i = 0; i < length; i++) { - builder.append((char)m_stream.read()); - } - return builder.toString(); + synchronized (m_stream) { + m_currentHeader = header; + m_stream.seek(start + position); + StringBuilder builder = new StringBuilder(1024); + for (int i = 0; i < length; i++) { + builder.append((char) m_stream.read()); + } + return builder.toString(); + } } - /** - * @return The header that was last read when calling {@link #read}, or null if nothing has been read yet - */ - @Nonnull - public synchronized Optional currentHeader() { - return Optional.of(m_currentHeader); + @Override + @Nonnull + public Optional currentHeader() { + synchronized (m_stream) { + return Optional.of(m_currentHeader); + } } - /** - * @return The position that was last read when calling {@link #read}, or 0 if nothing has been read yet - */ - @Nonnegative - public synchronized long currentPosition() throws IOException { - return m_stream.getFilePointer(); + @Override + @Nonnegative + public long currentPosition() throws IOException { + synchronized (m_stream) { + return m_stream.getFilePointer(); + } } @Override - public synchronized void close() throws IOException { - m_stream.close(); + public void close() throws IOException { + synchronized (m_stream) { + m_stream.close(); + } } @NotThreadSafe public static class Builder { - private File m_file; - private File m_tempFile; + private final Path m_file; + private Path m_tempFile; private int m_nCharsInBuffer; private boolean m_keepTempFileOnExit; public Builder(@Nonnull Path file) { - this(file.toFile()); - } - - public Builder(@Nonnull File file) { - m_file = file; - m_nCharsInBuffer = 2048; - m_tempFile = new File(file.getPath() + sf_tempExtension); + m_keepTempFileOnExit = false; + m_file = file; + m_nCharsInBuffer = 2048; + m_tempFile = Path.of(file + sf_tempExtension); } @Nonnull - public Builder setnCharsInBuffer(@Nonnegative int nCharsInBuffer) { + public Builder setNCharsInBuffer(@Nonnegative int nCharsInBuffer) { m_nCharsInBuffer = nCharsInBuffer; return this; } @Nonnull - public Builder setTempFile(@Nonnull File tempFile) { + public Builder setTempFile(@Nonnull Path tempFile) { m_tempFile = tempFile; return this; } @@ -247,19 +219,27 @@ public Builder keepTempFileOnExit() { } @Nonnull - public RandomAccessFastaBaseReader build() throws IOException { + public RandomAccessFastaBaseReaderI build() throws IOException { return new RandomAccessFastaBaseReader(m_file, m_nCharsInBuffer, m_tempFile, m_keepTempFileOnExit); } } @Override public String toString() { - return "RandomAccessFastaBaseReader{" + - "originalFile=" + m_originalFile + - ", tempFile=" + m_tempFile + - ", headerToPosition=" + m_headerToPosition + - ", stream=" + m_stream + - ", currentHeader='" + m_currentHeader + '\'' + - '}'; + synchronized (m_stream) { + return "RandomAccessFastaBaseReader{" + + "originalFile=" + + m_originalFile + + ", tempFile=" + + m_tempFile + + ", headerToPosition=" + + m_headerToPosition + + ", stream=" + + m_stream + + ", currentHeader='" + + m_currentHeader + + '\'' + + '}'; + } } } diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderI.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderI.java new file mode 100644 index 0000000..8b0ed8e --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderI.java @@ -0,0 +1,59 @@ +package org.pharmgkb.parsers.fasta; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.io.Closeable; +import java.io.IOException; +import java.util.Optional; +import java.util.Set; + +public interface RandomAccessFastaBaseReaderI extends Closeable { + + /** + * @param header The exact FASTA header, without an initial > sign + * @param position The number of bases, starting at 0, from the first + * @return The nucleotide or amino acid at that position + * @throws IOException IO errors + * @throws IllegalArgumentException If a header with that name is not in the FASTA file + */ + char read( + @Nonnull + String header, + @Nonnegative + long position + ) throws IOException; + + /** + * @param header The exact FASTA header, without an initial > sign + * @param position The number of bases, starting at 0, from the first + * @return The nucleotide or amino acid at that position + * @throws IOException IO errors + * @throws IllegalArgumentException If a header with that name is not in the FASTA file + */ + String read( + @Nonnull + String header, + @Nonnegative + long position, + @Nonnegative + long length + ) throws IOException; + + /** + * @return The header that was last read when calling {@link #read}, or null if nothing has been read yet + */ + @Nonnull + Optional currentHeader(); + + /** + * @return The position that was last read when calling {@link #read}, or 0 if nothing has been read yet + */ + @Nonnegative + long currentPosition() throws IOException; + + /** + * @return The list of headers (everything after the > sign) in the FASTA file, in order + */ + @Nonnull + Set headers(); +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/SimpleFastaBaseReader.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/SimpleFastaBaseReader.java deleted file mode 100644 index 13542c4..0000000 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/SimpleFastaBaseReader.java +++ /dev/null @@ -1,213 +0,0 @@ -package org.pharmgkb.parsers.fasta; - -import javax.annotation.Nonnegative; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import javax.annotation.concurrent.NotThreadSafe; -import javax.annotation.concurrent.ThreadSafe; -import java.io.*; -import java.nio.CharBuffer; -import java.nio.file.Path; -import java.util.Optional; - -/** - * A character stream for FASTA, which reads base-by-base. - * - * The FASTA grammar is taken to be: - *
- *     fasta      ::= '>'header newline sequence (newline fasta)?
- *     header     ::= [^\n\r]+
- *     sequence   ::= [^\n\r]+
- * 
- * Where {@code newline} is taken to be the platform-dependent newline sequence. - * Notice that, even though the newline is platform-dependent, neither the header nor sequence can contain a CL or LF, - * which is a platform-independent choice. Also notice that comments and empty lines are not part of the grammar. - * - * - * Example usage: - *
- * >gene_1
- * AT
- * >gene_2
- * GC
- * 
- * {@code - * FastaStream stream = new FastaStream(file); - * stream.currentHeader() // returns Optional.empty() - * stream.readNextBase(); // returns 'A' - * stream.currentHeader() // returns "gene_1" - * stream.readNextBase(); // returns 'T' - * stream.currentHeader() // returns "gene_2" - * stream.readNextBase(); // returns 'G' - * stream.currentHeader() // returns "gene_2" - * stream.readNextBase(); // returns 'C' - * stream.readNextBase(); // returns Optional.empty() - * } - * - * @author Douglas Myers-Turnbull - */ -@ThreadSafe -public class SimpleFastaBaseReader implements Closeable { - - private final Reader m_reader; - - private CharBuffer m_buffer; - private String m_header; - private long m_nBasesSinceHeader; - private long m_nBasesReadTotal; - private long m_nHeadersRead; - private long m_nBytesReadTotal; - - private SimpleFastaBaseReader(@Nonnull Reader reader, @Nonnegative int nCharsInBuffer) throws IOException { - m_reader = reader; - m_buffer = CharBuffer.allocate(nCharsInBuffer); - initBuffer(); - } - - @Override - public synchronized void close() throws IOException { - m_reader.close(); - } - - /** - * @return The next base (nucleotide or amino acid) in the stream - * @throws IOException For IO errors - */ - @Nonnull - public synchronized Optional readNextBase() throws IOException { - try { - Character base; - do { - base = doRead(); - if (base == null) { - return Optional.empty(); - } - } while (base == '\n' || base == '\r'); - if (base == '>') { - readHeader(); - base = doRead(); - } - // no matter what, we always read just one base - m_nBasesSinceHeader++; - m_nBasesReadTotal++; - return Optional.ofNullable(base); - } catch (RuntimeException e) { // record more info - throw new IOException( - "Error reading; " + m_nBytesReadTotal + " bytes read; on header " + m_header - + "; buffer has " + m_buffer.remaining() + " remaining", - e - ); - } - } - - /** - * @return The last header read, or null if nothing was read yet - */ - @Nonnull - public synchronized Optional currentHeader() { - return Optional.ofNullable(m_header); - } - - @Nonnegative - public synchronized long nHeadersRead() { - return m_nHeadersRead; - } - - @Nonnegative - public synchronized long nBasesReadTotal() { - return m_nBasesReadTotal; - } - - @Nonnegative - public synchronized long nBasesSinceHeader() { - return m_nBasesSinceHeader; - } - - @Nonnegative - public synchronized long nBytesReadTotal() { - return m_nBytesReadTotal; - } - - @Nullable - private Character doRead() throws IOException { - if (!m_buffer.hasRemaining()) { // fill buffer if it's empty - if (!initBuffer()) { // if the stream itself is empty - return null; - } - } - m_nBytesReadTotal++; - return m_buffer.get(); - } - - private boolean initBuffer() throws IOException { - m_buffer.clear(); - if (m_reader.read(m_buffer) == -1) { // actually read - return false; - } - m_buffer.flip(); - return true; - } - - private void readHeader() throws IOException { - StringBuilder header = new StringBuilder(256); - Character c; - do { - c = doRead(); - if (c == null) { - throw new EOFException("Stream ended unexpectedly in header"); - } - if (c != '\n' && c != '\r') { - header.append(c); - } - } while (c != '\n' && c != '\r'); - m_header = header.toString(); - m_nBasesSinceHeader = 0; - m_nHeadersRead++; - } - - @NotThreadSafe - public static class Builder { - - private Reader m_reader; - private int m_nCharsInBuffer; - - public Builder(@Nonnull Path file) throws FileNotFoundException { - this(new FileReader(file.toFile())); - } - - @Nonnull - public Builder(@Nonnull File file) throws FileNotFoundException { - this(new FileReader(file)); - } - - @Nonnull - public Builder(@Nonnull Reader reader) { - m_reader = reader; - m_nCharsInBuffer = 2048; - } - - @Nonnull - public Builder setnCharsInBuffer(@Nonnegative int nCharsInBuffer) { - m_nCharsInBuffer = nCharsInBuffer; - return this; - } - - @Nonnull - public SimpleFastaBaseReader build() throws IOException { - return new SimpleFastaBaseReader(m_reader, m_nCharsInBuffer); - } - } - - @Override - public String toString() { - return "SimpleFastaBaseReader{" + - "reader=" + m_reader + - ", buffer=" + m_buffer + - ", header='" + m_header + '\'' + - ", nBasesSinceHeader=" + m_nBasesSinceHeader + - ", nBasesReadTotal=" + m_nBasesReadTotal + - ", nHeadersRead=" + m_nHeadersRead + - ", nBytesReadTotal=" + m_nBytesReadTotal + - '}'; - } -} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignment.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignment.java new file mode 100644 index 0000000..ebd4151 --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignment.java @@ -0,0 +1,40 @@ +package org.pharmgkb.parsers.fasta.model; + +import com.google.common.base.Preconditions; + +import javax.annotation.Nonnull; + +public class FastaAlignment implements FastaAlignmentI { + + private final String m_header; + + private final String m_sequence; + + /** + * @throws IllegalArgumentException If the header or sequence contains either newline character LF (\n) or CR (\n) + */ + public FastaAlignment(@Nonnull String header, @Nonnull String sequence) { + Preconditions.checkArgument( + !header.contains("\n") && !header.contains("\r"), + "Header \"" + header + "\" contains a newline" + ); + Preconditions.checkArgument( + !sequence.contains("\n") && !sequence.contains("\r"), + "Sequence \"" + sequence + "\" contains a newline" + ); + m_header = header; + m_sequence = sequence; + } + + @Nonnull + @Override + public String header() { + return m_header; + } + + @Nonnull + @Override + public String sequence() { + return m_sequence; + } +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignmentI.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignmentI.java new file mode 100644 index 0000000..78f3033 --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaAlignmentI.java @@ -0,0 +1,63 @@ +package org.pharmgkb.parsers.fasta.model; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.util.List; +import java.util.Optional; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public interface FastaAlignmentI extends FastaSequenceI { + + @Nonnull + default Stream> sequenceAsOptionalStream() { + return sequence().chars() + .mapToObj(i -> '.' != i && '-' != i && '~' != i ? Optional.empty() : Optional.of((char)i)); + } + + @Nonnegative + default int alignedLength() { + return (int)sequence().chars() + .filter(i -> '.' != (char) i && '-' != (char) i && '~' != (char) i).count(); + } + + @Nonnegative + default int unalignedLength() { + return (int)sequence().chars() + .filter(i -> '.' == (char) i || '-' == (char) i || '~' == (char) i).count(); + } + + @Nonnegative + default int leadingLength() { + return (int)sequence().chars().takeWhile(i -> '~' == (char) i).count(); + } + + @Nonnegative + default int trailingLength() { + var seq = new StringBuilder(sequence()).reverse().toString(); + return (int)seq.chars().takeWhile(i -> '~' == (char) i).count(); + } + + @Nonnull + default List unalignedPositions() { + return IntStream.range(0, length()) + .map( + i -> '.' == sequence().charAt(i) + || '-' == sequence().charAt(i) + || '~' == sequence().charAt(i) + ? i : -1 + ).filter(i -> -1 < i).boxed().toList(); + } + + @Nonnull + default List alignedPositions() { + return IntStream.range(0, length()) + .map( + i -> '.' != sequence().charAt(i) + && '-' != sequence().charAt(i) + && '~' != sequence().charAt(i) + ? i : -1 + ).filter(i -> -1 < i).boxed().toList(); + } + +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaSequence.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaSequence.java index ac1d008..6d2ddb2 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaSequence.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaSequence.java @@ -13,7 +13,7 @@ * @author Douglas Myers-Turnbull */ @Immutable -public class FastaSequence implements Comparable, HasSequence { +public final class FastaSequence implements Comparable, FastaSequenceI { private final String m_header; @@ -37,13 +37,18 @@ public FastaSequence(@Nonnull String header, @Nonnull String sequence) { @Nonnull @Override - public String getHeader() { return m_header; } + public String header() { return m_header; } @Nonnull @Override - public String getSequence() { return m_sequence; } + public String sequence() { return m_sequence; } - @Nonnull + @Override + public int length() { + return sequence().length(); + } + + @Nonnull @Override public String toString() { return ">" + m_header + System.lineSeparator() + m_sequence; diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/HasSequence.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaSequenceI.java similarity index 57% rename from fasta/src/main/java/org/pharmgkb/parsers/fasta/model/HasSequence.java rename to fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaSequenceI.java index 91a2040..5481b45 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/HasSequence.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastaSequenceI.java @@ -1,39 +1,44 @@ package org.pharmgkb.parsers.fasta.model; import org.pharmgkb.parsers.BadDataFormatException; +import org.pharmgkb.parsers.fasta.model.extra.IlluminaSequenceId; import org.pharmgkb.parsers.model.AminoAcidCode; import org.pharmgkb.parsers.model.NucleotideCode; +import javax.annotation.Nonnegative; import javax.annotation.Nonnull; import java.util.stream.Stream; -public interface HasSequence { +public interface FastaSequenceI { @Nonnull - String getHeader(); + String header(); @Nonnull - String getSequence(); + String sequence(); @Nonnull default IlluminaSequenceId headerToIlluminaId() throws BadDataFormatException { - return IlluminaSequenceId.parse(this.getHeader()); + return IlluminaSequenceId.parse(header()); } @Nonnull default Stream sequenceAsStream() { - return getSequence().chars().mapToObj(i -> (char)i); + return sequence().chars().mapToObj(i -> (char)i); } @Nonnull default Stream sequenceToNucleotides() throws BadDataFormatException { - return this.sequenceAsStream().map(NucleotideCode::fromChar); + return sequenceAsStream().map(NucleotideCode::fromChar); } @Nonnull default Stream sequenceToAminoAcids() throws BadDataFormatException { - return this.sequenceAsStream().map(AminoAcidCode::fromChar); + return sequenceAsStream().map(AminoAcidCode::fromChar); } + @Nonnegative default int length() { + return sequence().length(); + } } diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastqSequence.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastqSequence.java index 5e63de5..0d86e05 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastqSequence.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastqSequence.java @@ -13,7 +13,7 @@ * @author Douglas Myers-Turnbull */ @Immutable -public class FastqSequence implements Comparable, HasSequence, HasScores { +public final class FastqSequence implements Comparable, FastaSequenceI, FastqSequenceI { private final String m_header; private final String m_sequence; @@ -38,20 +38,23 @@ public FastqSequence(@Nonnull String header, @Nonnull String sequence, @Nonnull @Nonnull @Override - public String getHeader() { return m_header; } + public String header() { return m_header; } @Nonnull @Override - public String getSequence() { return m_sequence; } + public String sequence() { return m_sequence; } - @Nonnull + @Nonnull @Override - public String getScores() { return m_scores; } + public String scores() { return m_scores; } @Nonnull @Override public String toString() { - return ">" + m_header + System.lineSeparator() + m_sequence + System.lineSeparator() + "+" + System.lineSeparator() + m_scores; + return ">" + m_header + System.lineSeparator() + + m_sequence + System.lineSeparator() + + "+" + System.lineSeparator() + + m_scores; } @Override diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/HasScores.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastqSequenceI.java similarity index 67% rename from fasta/src/main/java/org/pharmgkb/parsers/fasta/model/HasScores.java rename to fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastqSequenceI.java index 3de938c..7d43251 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/HasScores.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/FastqSequenceI.java @@ -1,18 +1,19 @@ package org.pharmgkb.parsers.fasta.model; import org.pharmgkb.parsers.BadDataFormatException; +import org.pharmgkb.parsers.fasta.model.extra.Phred33Score; import javax.annotation.Nonnull; import java.util.stream.Stream; -public interface HasScores { +public interface FastqSequenceI extends FastaSequenceI { @Nonnull - String getScores(); + String scores(); @Nonnull default Stream scoresAsStream() { - return getScores().chars().mapToObj(i -> (char)i); + return scores().chars().mapToObj(i -> (char)i); } @Nonnull diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/IlluminaSequenceId.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/IlluminaSequenceId.java deleted file mode 100644 index 49dee4c..0000000 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/IlluminaSequenceId.java +++ /dev/null @@ -1,84 +0,0 @@ -package org.pharmgkb.parsers.fasta.model; - -import com.google.common.base.Splitter; - -import javax.annotation.Nonnegative; -import javax.annotation.Nonnull; -import java.util.List; -import java.util.stream.Collectors; - -public class IlluminaSequenceId { - - private final String m_instrument; - private final String m_runId; - private final String m_flowcellId; - private final String m_flowcellLane; - private final String m_titleInFlowcellLane; - private final int m_clusterXCoordinateInTile; - private final int m_clusterYCoordinateInFile; - private final byte m_pairMember; - private final boolean m_readIsFiltered; - private final int m_controlBits; - private final String m_indexSequence; - - public IlluminaSequenceId( - @Nonnull String instrument, @Nonnull String runId, - @Nonnull String flowcellId, @Nonnull String flowcellLane, @Nonnull String titleInFlowcellLane, - int clusterXCoordinateInTile, int clusterYCoordinateInFile, - @Nonnegative byte pairMember, - boolean readIsFiltered, - @Nonnegative int controlBits, - @Nonnull String indexSequence - ) { - m_instrument = instrument; - m_runId = runId; - m_flowcellId = flowcellId; - m_flowcellLane = flowcellLane; - m_titleInFlowcellLane = titleInFlowcellLane; - m_clusterXCoordinateInTile = clusterXCoordinateInTile; - m_clusterYCoordinateInFile = clusterYCoordinateInFile; - m_pairMember = pairMember; - m_readIsFiltered = readIsFiltered; - m_controlBits = controlBits; - m_indexSequence = indexSequence; - } - - @Nonnull - public static IlluminaSequenceId parse(@Nonnull String header) { - List fields = Splitter.on(":") - .splitToList(header).stream() - .map(String::trim) - .collect(Collectors.toList()); - if (!fields.get(8).equals("Y") && !fields.get(8).equals("N")) { - throw new IllegalArgumentException("Value " + fields.get(8) + " must be Y or N"); - } - return new IlluminaSequenceId( - fields.get(0), - fields.get(1), - fields.get(2), - fields.get(3), - fields.get(4), - Integer.parseInt(fields.get(5)), - Integer.parseInt(fields.get(6)), - Byte.parseByte(fields.get(7)), - fields.get(8).equals("Y"), - Integer.parseUnsignedInt(fields.get(9)), - fields.get(10) - ); - } - // ex: @EAS139:136:FC706VJ:2:2104:15343:197393 1:N:18: - - /* - EAS139 the unique instrument name - 136 the run id - FC706VJ the flowcell id - 2 flowcell lane - 2104 tile number within the flowcell lane - 15343 'x'-coordinate of the cluster within the tile - 197393 'y'-coordinate of the cluster within the tile - 1 the member of a pair, 1 or 2 (paired-end or mate-pair reads only) - Y Y if the read is filtered, N otherwise - 18 0 when none of the control bits are on, otherwise it is an even number - ATCACG index sequence - */ -} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/IlluminaSequenceId.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/IlluminaSequenceId.java new file mode 100644 index 0000000..9ebe17b --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/IlluminaSequenceId.java @@ -0,0 +1,73 @@ +package org.pharmgkb.parsers.fasta.model.extra; + +import com.google.common.base.Splitter; + +import javax.annotation.Nonnull; +import java.util.List; + +/** + * @param instrument + * @param runId + * @param flowcellId + * @param flowcellLane + * @param titleInFlowcellLane + * @param clusterXCoordinateInTile + * @param clusterYCoordinateInFile + * @param pairMember + * @param readIsFiltered + * @param controlBits + * @param indexSequence + */ +public record IlluminaSequenceId( + String instrument, + String runId, + String flowcellId, + String flowcellLane, + String titleInFlowcellLane, + int clusterXCoordinateInTile, + int clusterYCoordinateInFile, + byte pairMember, + boolean readIsFiltered, + int controlBits, + String indexSequence +) { + + @Nonnull + public static IlluminaSequenceId parse(@Nonnull String header) { + List fields = Splitter.on(":") + .splitToList(header).stream() + .map(String::trim) + .toList(); + if (!"Y".equals(fields.get(8)) && !"N".equals(fields.get(8))) { + throw new IllegalArgumentException("Value " + fields.get(8) + " must be Y or N"); + } + return new IlluminaSequenceId( + fields.get(0), + fields.get(1), + fields.get(2), + fields.get(3), + fields.get(4), + Integer.parseInt(fields.get(5)), + Integer.parseInt(fields.get(6)), + Byte.parseByte(fields.get(7)), + "Y".equals(fields.get(8)), + Integer.parseUnsignedInt(fields.get(9)), + fields.get(10) + ); + } + // ex: @EAS139:136:FC706VJ:2:2104:15343:197393 1:N:18: + + /* + EAS139 the unique instrument name + 136 the run id + FC706VJ the flowcell id + 2 flowcell lane + 2104 tile number within the flowcell lane + 15343 'x'-coordinate of the cluster within the tile + 197393 'y'-coordinate of the cluster within the tile + 1 the member of a pair, 1 or 2 (paired-end or mate-pair reads only) + Y Y if the read is filtered, N otherwise + 18 0 when none of the control bits are on, otherwise it is an even number + ATCACG index sequence + */ +} diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/Phred33Score.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/Phred33Score.java similarity index 79% rename from fasta/src/main/java/org/pharmgkb/parsers/fasta/model/Phred33Score.java rename to fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/Phred33Score.java index 5c2d2e0..e8ddf70 100644 --- a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/Phred33Score.java +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/Phred33Score.java @@ -1,13 +1,15 @@ -package org.pharmgkb.parsers.fasta.model; +package org.pharmgkb.parsers.fasta.model.extra; import javax.annotation.Nonnegative; import javax.annotation.concurrent.Immutable; import java.math.BigDecimal; import java.util.Objects; -@SuppressWarnings("CharacterComparison") +/** + * + */ @Immutable -public class Phred33Score implements Comparable { +public final class Phred33Score implements Comparable { private final char m_character; @@ -26,18 +28,18 @@ private Phred33Score(char character) { m_character = character; } - public char getCharacter() { + public char character() { return m_character; } @Nonnegative - public int getValue() { + public int value() { return (int)m_character + 33; } @Nonnegative public BigDecimal toLog10SangerProbability() { - return new BigDecimal(getValue()).scaleByPowerOfTen(-1).negate(); + return new BigDecimal(value()).scaleByPowerOfTen(-1).negate(); } @Override @@ -60,6 +62,6 @@ public int hashCode() { @Override public int compareTo(Phred33Score o) { - return Integer.compare(getValue(), o.getValue()); + return Integer.compare(value(), o.value()); } } diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/package-info.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/package-info.java new file mode 100644 index 0000000..44002e3 --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/extra/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.fasta.model.extra; diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/package-info.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/package-info.java new file mode 100644 index 0000000..50f3e90 --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.fasta.model; diff --git a/fasta/src/main/java/org/pharmgkb/parsers/fasta/package-info.java b/fasta/src/main/java/org/pharmgkb/parsers/fasta/package-info.java new file mode 100644 index 0000000..5288d81 --- /dev/null +++ b/fasta/src/main/java/org/pharmgkb/parsers/fasta/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.fasta; diff --git a/fasta/src/test/java/org/pharmgkb/parsers/fasta/SimpleFastaBaseReaderTest.java b/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaBaseReaderTest.java similarity index 72% rename from fasta/src/test/java/org/pharmgkb/parsers/fasta/SimpleFastaBaseReaderTest.java rename to fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaBaseReaderTest.java index ba6f508..08c84c7 100644 --- a/fasta/src/test/java/org/pharmgkb/parsers/fasta/SimpleFastaBaseReaderTest.java +++ b/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaBaseReaderTest.java @@ -12,27 +12,26 @@ import java.util.Map; import java.util.Optional; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.*; /** - * Tests {@link SimpleFastaBaseReader}. + * Tests {@link FastaBaseReader}. * @author Douglas Myers-Turnbull */ -public class SimpleFastaBaseReaderTest { +public class FastaBaseReaderTest { @Test public void testReadNext() throws Exception { - File file = Paths.get(SimpleFastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); - try (SimpleFastaBaseReader stream = new SimpleFastaBaseReader.Builder(file).build()) { + File file = Paths.get(FastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); + try (FastaBaseReaderI stream = new FastaBaseReader.Builder(file).build()) { test(stream); } } @Test public void testReadNextWithRefill() throws Exception { - File file = Paths.get(SimpleFastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); - try (SimpleFastaBaseReader stream = new SimpleFastaBaseReader.Builder(file) + File file = Paths.get(FastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); + try (FastaBaseReaderI stream = new FastaBaseReader.Builder(file) .setnCharsInBuffer(3) .build()) { test(stream); @@ -41,16 +40,16 @@ public void testReadNextWithRefill() throws Exception { @Test public void testReadNextWithBlankLine() throws Exception { - File file = Paths.get(SimpleFastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); + File file = Paths.get(FastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); String string = Files.asCharSource(file, Charset.defaultCharset()).read() + "\n"; - try (SimpleFastaBaseReader stream = new SimpleFastaBaseReader.Builder(new StringReader(string)) + try (FastaBaseReaderI stream = new FastaBaseReader.Builder(new StringReader(string)) .setnCharsInBuffer(3) .build()) { test(stream); } } - private void test(SimpleFastaBaseReader stream) throws IOException { + private void test(FastaBaseReaderI stream) throws IOException { Map expected = new LinkedHashMap<>(); expected.put("1", "atgc"); @@ -80,7 +79,7 @@ private void test(SimpleFastaBaseReader stream) throws IOException { assertEquals(1 + 4 + i, stream.nBasesReadTotal()); assertEquals(2, stream.nHeadersRead()); assertEquals(expected.get("2").charAt(i), (char)next.get()); - assertEquals(1 + 4 + prevBytesRead + i + (i / 4), stream.nBytesReadTotal()); + assertEquals(1 + 4 + prevBytesRead + i + i / 4, stream.nBytesReadTotal()); } for (int i = 0; i < 16 * 2; i++) { @@ -97,4 +96,4 @@ private void test(SimpleFastaBaseReader stream) throws IOException { private String repeat(String string, int n) { return String.valueOf(string).repeat(Math.max(0, n)); } -} \ No newline at end of file +} diff --git a/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceParserTest.java b/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceParserTest.java index 6b0b10b..cda3c46 100644 --- a/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceParserTest.java +++ b/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceParserTest.java @@ -74,9 +74,9 @@ public void testMissingSequence() { @Test public void testSanityCheckFinished() { - FastaSequenceParser parser = new FastaSequenceParser(); + FastaSequenceParserI parser = new FastaSequenceParser(); parser.parseAll(Stream.of(">xxx")).count(); IllegalStateException e = assertThrows(IllegalStateException.class, () -> parser.parseAll(Stream.of(">xxx")).count()); assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("last line processed")); } -} \ No newline at end of file +} diff --git a/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterTest.java b/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterTest.java index dd43e9a..b87db2c 100644 --- a/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterTest.java +++ b/fasta/src/test/java/org/pharmgkb/parsers/fasta/FastaSequenceWriterTest.java @@ -7,7 +7,7 @@ import java.util.List; import java.util.stream.Collectors; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link FastaSequenceWriter}. @@ -34,4 +34,4 @@ public void testApply() { List lines = seqs.stream().flatMap(new FastaSequenceWriter()).collect(Collectors.toList()); assertEquals(expected, lines); } -} \ No newline at end of file +} diff --git a/fasta/src/test/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserTest.java b/fasta/src/test/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserTest.java index bb86c0e..25cb42d 100644 --- a/fasta/src/test/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserTest.java +++ b/fasta/src/test/java/org/pharmgkb/parsers/fasta/MultilineFastaSequenceParserTest.java @@ -7,8 +7,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.*; /** * Reads a FASTA file line by line. @@ -20,7 +19,7 @@ public class MultilineFastaSequenceParserTest { @Test public void testApply1() { - MultilineFastaSequenceParser parser = new MultilineFastaSequenceParser.Builder().setTermination((char)0x01).build(); + MultilineFastaSequenceParserI parser = new MultilineFastaSequenceParser.Builder().setTermination((char)0x01).build(); List seqs = parser.collectAll(Stream.of(">header1", "ns1p1", "ns1p2", ">header2", "ns2p1").onClose(parser.getCloseHandler())); assertEquals(2, seqs.size()); assertEquals(new FastaSequence("header1", "ns1p1ns1p2"), seqs.get(0)); @@ -29,7 +28,7 @@ public void testApply1() { @Test public void testApply2() { - MultilineFastaSequenceParser parser = new MultilineFastaSequenceParser.Builder().setTermination((char)0x01).build(); + MultilineFastaSequenceParserI parser = new MultilineFastaSequenceParser.Builder().setTermination((char)0x01).build(); Stream lines = Stream.of(">header1", "ns1p1", "ns1p2", ">header2", "ns2p1"); List seqs = parser.parseAll(lines).collect(Collectors.toList()); assertEquals(2, seqs.size()); @@ -39,8 +38,8 @@ public void testApply2() { @Test public void testApply3() { - MultilineFastaSequenceParser parser = new MultilineFastaSequenceParser.Builder().setTermination((char)0x01).build(); + MultilineFastaSequenceParserI parser = new MultilineFastaSequenceParser.Builder().setTermination((char)0x01).build(); Stream stream = Stream.of(">header1", "ns1p1", "ns1p2", ">header2", "ns2p1").flatMap(parser); assertThrows(IllegalStateException.class, () -> stream.collect(Collectors.toList())); } -} \ No newline at end of file +} diff --git a/fasta/src/test/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderTest.java b/fasta/src/test/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderTest.java index 4d4159f..dcedaec 100644 --- a/fasta/src/test/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderTest.java +++ b/fasta/src/test/java/org/pharmgkb/parsers/fasta/RandomAccessFastaBaseReaderTest.java @@ -2,10 +2,10 @@ import org.junit.jupiter.api.Test; -import java.io.File; +import java.nio.file.Path; import java.nio.file.Paths; -import java.util.Arrays; import java.util.Locale; +import java.util.Set; import static org.junit.jupiter.api.Assertions.*; @@ -17,25 +17,25 @@ public class RandomAccessFastaBaseReaderTest { @Test public void testHeaders() throws Exception { - File file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); - RandomAccessFastaBaseReader stream = new RandomAccessFastaBaseReader.Builder(file).setnCharsInBuffer(5).build(); - assertEquals(Arrays.asList("1", "2", "3"), stream.getHeaders()); + Path file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()); + RandomAccessFastaBaseReaderI stream = new RandomAccessFastaBaseReader.Builder(file).setNCharsInBuffer(5).build(); + assertEquals(Set.of("1", "2", "3"), stream.headers()); } @Test public void testFileExists() throws Exception { - File file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); - RandomAccessFastaBaseReader.Builder builder = new RandomAccessFastaBaseReader.Builder(file).setnCharsInBuffer(5) + Path file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()); + RandomAccessFastaBaseReader.Builder builder = new RandomAccessFastaBaseReader.Builder(file).setNCharsInBuffer(5) .keepTempFileOnExit(); IllegalArgumentException e = assertThrows(IllegalArgumentException.class, builder::build); assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("already exists")); - assertTrue(e.getMessage().toLowerCase(Locale.ROOT).startsWith("temporary file " + file.getPath() + ".no_breaks")); + assertTrue(e.getMessage().toLowerCase(Locale.ROOT).startsWith("temporary file " + file + ".no_breaks")); } @SuppressWarnings("UseOfSystemOutOrSystemErr") public void debugTest() throws Exception { - File file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); - RandomAccessFastaBaseReader stream = new RandomAccessFastaBaseReader.Builder(file).setnCharsInBuffer(5).build(); + Path file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()); + RandomAccessFastaBaseReaderI stream = new RandomAccessFastaBaseReader.Builder(file).setNCharsInBuffer(5).build(); System.out.println("=========================1========================="); for (int i = 0; i < 4; i++) { System.out.println(i + " = " + stream.read("1", i)); @@ -52,8 +52,8 @@ public void debugTest() throws Exception { @Test public void testRead() throws Exception { - File file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()).toFile(); - RandomAccessFastaBaseReader stream = new RandomAccessFastaBaseReader.Builder(file).setnCharsInBuffer(5).build(); + Path file = Paths.get(RandomAccessFastaBaseReaderTest.class.getResource("test1.fasta").toURI()); + RandomAccessFastaBaseReaderI stream = new RandomAccessFastaBaseReader.Builder(file).setNCharsInBuffer(5).build(); assertEquals(Character.valueOf('a'), (Character)stream.read("1", 0)); assertEquals(Character.valueOf('t'), (Character)stream.read("1", 1)); assertEquals(Character.valueOf('c'), (Character)stream.read("1", 3)); @@ -64,4 +64,4 @@ public void testRead() throws Exception { assertEquals(Character.valueOf('t'), (Character)stream.read("1", 1)); assertEquals(Character.valueOf('c'), (Character)stream.read("3", 16)); } -} \ No newline at end of file +} diff --git a/genbank/build.gradle b/genbank/build.gradle index 41bb3c5..75740b8 100644 --- a/genbank/build.gradle +++ b/genbank/build.gradle @@ -1,3 +1,3 @@ dependencies { - compile project(':core') + api project(':core') } diff --git a/genbank/lombok.config b/genbank/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/genbank/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParser.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParser.java index b78b4fc..0dabbdd 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParser.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParser.java @@ -4,8 +4,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.MultilineParser; import org.pharmgkb.parsers.genbank.model.*; +import org.pharmgkb.parsers.genbank.model.annotations.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,19 +30,20 @@ * @author Douglas Myers-Turnbull */ @NotThreadSafe -public class GenbankParser implements MultilineParser { +public class GenbankParser implements GenbankParserI { private static final Pattern sf_plusSpace = Pattern.compile(" +"); private static final Pattern sf_pattern = Pattern.compile("/([^=]+)=(\"?[^\"]+\"?)"); private static final long sf_logEvery = 10000L; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); private String m_currentLine = ""; @Nonnull @Override - public Stream parseAll(@Nonnull Stream stream) throws UncheckedIOException, BadDataFormatException { + public Stream parseAll(@Nonnull Stream stream) + throws UncheckedIOException, BadDataFormatException { return stream.flatMap(this); } @@ -63,7 +64,7 @@ public Stream apply(@Nonnull String line) { throw new BadDataFormatException("Couldn't parse line #" + m_lineNumber, e); } catch (RuntimeException e) { // this is a little weird, but it's helpful - // not that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. + // note that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. e.addSuppressed(new RuntimeException("Unexpectedly failed to parse line " + m_lineNumber)); throw e; } @@ -76,8 +77,7 @@ private enum EntryType { FEATURE_LEVEL(5), FEATURE_PROPERTY_LEVEL(21) ; - @SuppressWarnings("FieldNamingConvention") - public int indentation; + public final int indentation; EntryType(int indent) { indentation = indent; } @@ -96,19 +96,19 @@ private GenbankAnnotation parse(String line) { String.join("-", dateChars), DateTimeFormatter.ofPattern("dd-MMM-yyyy") ); - return new LocusAnnotation(parts.get(0), parts.get(1), parts.get(3), parts.get(4), date); + return new GenbankLocusAnnotation(parts.get(0), parts.get(1), parts.get(3), parts.get(4), date); } case "DEFINITION" -> { - return new DefinitionAnnotation(e.trim()); + return new GenbankDefinitionAnnotation(e.trim()); } case "ACCESSION" -> { - return new AccessionAnnotation(e.trim()); + return new GenbankAccessionAnnotation(e.trim()); } case "VERSION" -> { List parts = e.splitAndTrim(); - return new VersionAnnotation(parts.get(0), parts.get(1)); + return new GenbankVersionAnnotation(parts.get(0), parts.get(1)); } case "KEYWORDS" -> { - List parts = e.trim().equals(".") ? Collections.emptyList() : e.splitAndTrim(); - return new KeywordsAnnotation(ImmutableList.copyOf(parts)); + List parts = ".".equals(e.trim()) ? Collections.emptyList() : e.splitAndTrim(); + return new GenbankKeywordsAnnotation(ImmutableList.copyOf(parts)); } case "COMMENT" -> { - return new CommentAnnotation(e.trim()); + return new GenbankCommentAnnotation(e.trim()); } case "SOURCE" -> { return parseSource(e); } case "REFERENCE" -> { @@ -122,16 +122,16 @@ private GenbankAnnotation parse(String line) { } } - private OriginAnnotation parseOrigin(Entry e) { + private static GenbankOriginAnnotation parseOrigin(Entry e) { StringBuilder builder = new StringBuilder(64); for (String s : e.textAsTrimmedLines()) { int ignore = sf_plusSpace.split(s)[0].length(); builder.append(s.substring(ignore).replace(" ", "")); } - return new OriginAnnotation(e.m_header, builder.toString()); + return new GenbankOriginAnnotation(e.m_header, builder.toString()); } - private SourceAnnotation parseSource(Entry e) { + private GenbankSourceAnnotation parseSource(Entry e) { Map asMap = parseSubEntriesAsMap(e, 2); List organismT = asMap.get("ORGANISM").textAfterDirectiveAsLines(); String organism = organismT.get(0); @@ -140,16 +140,16 @@ private SourceAnnotation parseSource(Entry e) { .flatMap(s -> Arrays.stream(s.split(";"))) .map(String::trim) .collect(Collectors.toList()); - return new SourceAnnotation( + return new GenbankSourceAnnotation( asMap.get("SOURCE").trim(), organism, ImmutableList.copyOf(lineage) ); } - private ReferenceAnnotation parseReference(Entry e) { + private GenbankReferenceAnnotation parseReference(Entry e) { Map asMap = parseSubEntriesAsMap(e, EntryType.MID_LEVEL.indentation); - return new ReferenceAnnotation( + return new GenbankReferenceAnnotation( e.m_header, Optional.ofNullable(asMap.get("AUTHORS")).map(Entry::trim), Optional.ofNullable(asMap.get("CONSORTIUM")).map(Entry::trim), @@ -160,13 +160,13 @@ private ReferenceAnnotation parseReference(Entry e) { ); } - private FeaturesAnnotation parseFeatures(Entry e) { + private GenbankFeaturesAnnotation parseFeatures(Entry e) { List entries = parseSubEntries(e, EntryType.FEATURE_LEVEL.indentation); List features = entries.stream() - .filter(e2 -> !e2.m_directive.equals("FEATURES")) + .filter(e2 -> !"FEATURES".equals(e2.m_directive)) .map(this::parseFeature) .collect(Collectors.toList()); - return new FeaturesAnnotation(e.m_header, ImmutableList.copyOf(features)); + return new GenbankFeaturesAnnotation(e.m_header, ImmutableList.copyOf(features)); } private GenbankFeature parseFeature(Entry e) { @@ -201,13 +201,13 @@ private List parseSubEntries(Entry e, int directiveIndents) { if (indent > directiveIndents) { current.append(line).append(System.lineSeparator()); } else { - if (current.length() > 0) { + if (!current.isEmpty()) { entries.add(Entry.extract(current.toString())); } current = new StringBuilder(line + System.lineSeparator()); } } - if (current.length() > 0) { + if (!current.isEmpty()) { entries.add(Entry.extract(current.toString())); } return entries; diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParserI.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParserI.java new file mode 100644 index 0000000..662e19e --- /dev/null +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/GenbankParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.genbank; + +import org.pharmgkb.parsers.MultilineParser; +import org.pharmgkb.parsers.genbank.model.annotations.GenbankAnnotation; + +public interface GenbankParserI extends MultilineParser { + +} diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankDivision.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankDivision.java index 762bd18..360da81 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankDivision.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankDivision.java @@ -26,14 +26,14 @@ public enum GenbankDivision { ENV("environmental sampling sequences"), NONSTANDARD("Divisions not speicified in the GenBank specification"); - private String m_description; + private final String m_description; GenbankDivision(@Nonnull String description) { m_description = description; } @Nonnull - public String getDescription() { + public String description() { return m_description; } } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankFeature.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankFeature.java index debdc5f..4aabf98 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankFeature.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankFeature.java @@ -12,7 +12,7 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class GenbankFeature { +public final class GenbankFeature { private final String m_kind; private final GenbankSequenceRange m_range; @@ -32,12 +32,12 @@ public GenbankFeature( } @Nonnull - public String getKind() { + public String kind() { return m_kind; } @Nonnull - public GenbankSequenceRange getRange() { + public GenbankSequenceRange range() { return m_range; } @@ -47,7 +47,7 @@ public GenbankSequenceRange getRange() { * Will be in the same order as the original. */ @Nonnull - public ImmutableMap getProperties() { + public ImmutableMap properties() { return m_properties; } @@ -55,7 +55,7 @@ public ImmutableMap getProperties() { * Lines at the end not conforming to GenBank format. Whitespace is still trimmed. */ @Nonnull - public ImmutableList getExtraLines() { + public ImmutableList extraLines() { return m_extraLines; } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankSequenceRange.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankSequenceRange.java index 8a37a66..02f29bc 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankSequenceRange.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankSequenceRange.java @@ -11,7 +11,7 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class GenbankSequenceRange { +public final class GenbankSequenceRange { private static final Pattern pattern = Pattern.compile("(complement\\()?(<)?(-?\\d+)\\.{2}(-?\\d+)(>)?\\)?"); private static final Pattern startPattern = Pattern.compile("\\.{2}"); @@ -24,7 +24,7 @@ public GenbankSequenceRange(@Nonnull String text) { } @Nonnull - public String getText() { + public String text() { return m_text; } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/AccessionAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankAccessionAnnotation.java similarity index 64% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/AccessionAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankAccessionAnnotation.java index 8790602..a683896 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/AccessionAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankAccessionAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; @@ -10,16 +10,16 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class AccessionAnnotation implements GenbankAnnotation { +public final class GenbankAccessionAnnotation implements GenbankAnnotation { private final String m_accession; - public AccessionAnnotation(@Nonnull String accession) { + public GenbankAccessionAnnotation(@Nonnull String accession) { m_accession = accession; } @Nonnull - public String getAccession() { + public String accession() { return m_accession; } @@ -33,8 +33,8 @@ public String toString() { @Override public boolean equals(Object o) { if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - AccessionAnnotation that = (AccessionAnnotation) o; + if (null == o || getClass() != o.getClass()) return false; + GenbankAccessionAnnotation that = (GenbankAccessionAnnotation) o; return Objects.equals(m_accession, that.m_accession); } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankAnnotation.java similarity index 71% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankAnnotation.java index bb98521..300729b 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/GenbankAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import javax.annotation.concurrent.Immutable; diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/CommentAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankCommentAnnotation.java similarity index 70% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/CommentAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankCommentAnnotation.java index 5b531d6..9837074 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/CommentAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankCommentAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; @@ -10,16 +10,16 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class CommentAnnotation implements GenbankAnnotation { +public final class GenbankCommentAnnotation implements GenbankAnnotation { private final String m_text; - public CommentAnnotation(@Nonnull String text) { + public GenbankCommentAnnotation(@Nonnull String text) { m_text = text; } @Nonnull - public String getText() { + public String text() { return m_text; } @@ -34,7 +34,7 @@ public String toString() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - CommentAnnotation that = (CommentAnnotation) o; + GenbankCommentAnnotation that = (GenbankCommentAnnotation) o; return Objects.equals(m_text, that.m_text); } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/DefinitionAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankDefinitionAnnotation.java similarity index 70% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/DefinitionAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankDefinitionAnnotation.java index 4e8c6f5..2f01e11 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/DefinitionAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankDefinitionAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; @@ -10,16 +10,16 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class DefinitionAnnotation implements GenbankAnnotation { +public final class GenbankDefinitionAnnotation implements GenbankAnnotation { private final String m_definition; - public DefinitionAnnotation(@Nonnull String definition) { + public GenbankDefinitionAnnotation(@Nonnull String definition) { m_definition = definition; } @Nonnull - public String getDefinition() { + public String definition() { return m_definition; } @@ -27,7 +27,7 @@ public String getDefinition() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - DefinitionAnnotation that = (DefinitionAnnotation) o; + GenbankDefinitionAnnotation that = (GenbankDefinitionAnnotation) o; return Objects.equals(m_definition, that.m_definition); } @@ -43,4 +43,3 @@ public String toString() { .toString(); } } - diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/FeaturesAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankFeaturesAnnotation.java similarity index 68% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/FeaturesAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankFeaturesAnnotation.java index 90471e3..6ad49ec 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/FeaturesAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankFeaturesAnnotation.java @@ -1,7 +1,8 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; import com.google.common.collect.ImmutableList; +import org.pharmgkb.parsers.genbank.model.GenbankFeature; import javax.annotation.Nonnull; import javax.annotation.concurrent.Immutable; @@ -11,23 +12,23 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class FeaturesAnnotation implements GenbankAnnotation { +public final class GenbankFeaturesAnnotation implements GenbankAnnotation { private final String m_header; private final ImmutableList m_features; - public FeaturesAnnotation(@Nonnull String header, @Nonnull ImmutableList features) { + public GenbankFeaturesAnnotation(@Nonnull String header, @Nonnull ImmutableList features) { m_header = header; m_features = features; } @Nonnull - public String getHeader() { + public String header() { return m_header; } @Nonnull - public ImmutableList getFeatures() { + public ImmutableList features() { return m_features; } @@ -43,7 +44,7 @@ public String toString() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - FeaturesAnnotation that = (FeaturesAnnotation) o; + GenbankFeaturesAnnotation that = (GenbankFeaturesAnnotation) o; return Objects.equals(m_header, that.m_header) && Objects.equals(m_features, that.m_features); } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/KeywordsAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankKeywordsAnnotation.java similarity index 70% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/KeywordsAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankKeywordsAnnotation.java index efd0ca4..5e108f7 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/KeywordsAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankKeywordsAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; import com.google.common.collect.ImmutableList; @@ -11,16 +11,16 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class KeywordsAnnotation implements GenbankAnnotation { +public final class GenbankKeywordsAnnotation implements GenbankAnnotation { private final ImmutableList m_keywords; - public KeywordsAnnotation(@Nonnull ImmutableList keywords) { + public GenbankKeywordsAnnotation(@Nonnull ImmutableList keywords) { m_keywords = keywords; } @Nonnull - public ImmutableList getKeywords() { + public ImmutableList keywords() { return m_keywords; } @@ -35,7 +35,7 @@ public String toString() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - KeywordsAnnotation that = (KeywordsAnnotation) o; + GenbankKeywordsAnnotation that = (GenbankKeywordsAnnotation) o; return Objects.equals(m_keywords, that.m_keywords); } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/LocusAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankLocusAnnotation.java similarity index 81% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/LocusAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankLocusAnnotation.java index c844f7f..ea64df6 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/LocusAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankLocusAnnotation.java @@ -1,7 +1,8 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.Enums; import com.google.common.base.MoreObjects; +import org.pharmgkb.parsers.genbank.model.GenbankDivision; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -13,7 +14,7 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class LocusAnnotation implements GenbankAnnotation { +public class GenbankLocusAnnotation implements GenbankAnnotation { private final String m_locusName; private final String m_sequenceLength; @@ -23,7 +24,7 @@ public class LocusAnnotation implements GenbankAnnotation { @SuppressWarnings("FieldNotUsedInToString") private final GenbankDivision m_standardDivision; - public LocusAnnotation( + public GenbankLocusAnnotation( @Nonnull String locusName, @Nonnull String sequenceLength, @Nonnull String moleculeType, @@ -39,33 +40,33 @@ public LocusAnnotation( } @Nonnull - public String getLocusName() { + public String locusName() { return m_locusName; } @Nonnull @Nonnegative - public String getSequenceLength() { + public String sequenceLength() { return m_sequenceLength; } @Nonnull - public String getMoleculeType() { + public String moleculeType() { return m_moleculeType; } @Nonnull - public String getDivision() { + public String division() { return m_division; } @Nonnull - public LocalDate getModificationDate() { + public LocalDate modificationDate() { return m_modificationDate; } @Nonnull - public GenbankDivision getStandardDivision() { + public GenbankDivision standardDivision() { return m_standardDivision; } @@ -73,7 +74,7 @@ public GenbankDivision getStandardDivision() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - LocusAnnotation that = (LocusAnnotation) o; + GenbankLocusAnnotation that = (GenbankLocusAnnotation) o; return Objects.equals(m_locusName, that.m_locusName) && Objects.equals(m_sequenceLength, that.m_sequenceLength) && Objects.equals(m_moleculeType, that.m_moleculeType) && diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/OriginAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankOriginAnnotation.java similarity index 73% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/OriginAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankOriginAnnotation.java index db24e50..20bd658 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/OriginAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankOriginAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; @@ -10,23 +10,23 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class OriginAnnotation implements GenbankAnnotation { +public class GenbankOriginAnnotation implements GenbankAnnotation { private final String m_header; private final String m_sequence; - public OriginAnnotation(@Nonnull String data, @Nonnull String sequence) { + public GenbankOriginAnnotation(@Nonnull String data, @Nonnull String sequence) { m_header = data; m_sequence = sequence; } @Nonnull - public String getHeader() { + public String header() { return m_header; } @Nonnull - public String getSequence() { + public String sequence() { return m_sequence; } @@ -42,7 +42,7 @@ public String toString() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - OriginAnnotation that = (OriginAnnotation) o; + GenbankOriginAnnotation that = (GenbankOriginAnnotation) o; return Objects.equals(m_header, that.m_header) && Objects.equals(m_sequence, that.m_sequence); } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/ReferenceAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankReferenceAnnotation.java similarity index 80% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/ReferenceAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankReferenceAnnotation.java index 79b24f1..9a6cb7c 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/ReferenceAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankReferenceAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; @@ -12,7 +12,7 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class ReferenceAnnotation implements GenbankAnnotation { +public class GenbankReferenceAnnotation implements GenbankAnnotation { private final String m_header; private final Optional m_authors; @@ -22,7 +22,7 @@ public class ReferenceAnnotation implements GenbankAnnotation { private final Optional m_pubmedId; private final Optional m_remark; - public ReferenceAnnotation( + public GenbankReferenceAnnotation( @Nonnull String header, @Nonnull Optional authors, @Nonnull Optional consortium, @@ -41,42 +41,42 @@ public ReferenceAnnotation( } @Nonnull - public String getHeader() { + public String header() { return m_header; } @Nonnull - public Optional getAuthors() { + public Optional authors() { return m_authors; } @Nonnull - public Optional getTitle() { + public Optional title() { return m_title; } @Nonnull - public Optional getJournal() { + public Optional journal() { return m_journal; } @Nonnegative - public Optional getPubmedId() { + public Optional pubmedId() { return m_pubmedId; } @Nonnull - public Optional getConsortium() { + public Optional consortium() { return m_consortium; } @Nonnull - public Optional getRemark() { + public Optional remark() { return m_remark; } public boolean isDirectSubmission() { - return m_title.orElse("-").equals("Direct Submission"); + return "Direct Submission".equals(m_title.orElse("-")); } @Override @@ -96,7 +96,7 @@ public String toString() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - ReferenceAnnotation reference = (ReferenceAnnotation) o; + GenbankReferenceAnnotation reference = (GenbankReferenceAnnotation) o; return m_pubmedId == reference.m_pubmedId && Objects.equals(m_header, reference.m_header) && Objects.equals(m_authors, reference.m_authors) && diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/SourceAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankSourceAnnotation.java similarity index 73% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/SourceAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankSourceAnnotation.java index ec8032f..7cd675e 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/SourceAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankSourceAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; import com.google.common.collect.ImmutableList; @@ -11,30 +11,30 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class SourceAnnotation implements GenbankAnnotation { +public class GenbankSourceAnnotation implements GenbankAnnotation { private final String m_name; private final String m_formalName; private final ImmutableList m_lineage; - public SourceAnnotation(@Nonnull String name, @Nonnull String formalName, @Nonnull ImmutableList lineage) { + public GenbankSourceAnnotation(@Nonnull String name, @Nonnull String formalName, @Nonnull ImmutableList lineage) { m_name = name; m_formalName = formalName; m_lineage = lineage; } @Nonnull - public String getName() { + public String name() { return m_name; } @Nonnull - public String getFormalName() { + public String formalName() { return m_formalName; } @Nonnull - public ImmutableList getLineage() { + public ImmutableList lineage() { return m_lineage; } @@ -51,7 +51,7 @@ public String toString() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - SourceAnnotation that = (SourceAnnotation) o; + GenbankSourceAnnotation that = (GenbankSourceAnnotation) o; return Objects.equals(m_name, that.m_name) && Objects.equals(m_formalName, that.m_formalName) && Objects.equals(m_lineage, that.m_lineage); diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/VersionAnnotation.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankVersionAnnotation.java similarity index 73% rename from genbank/src/main/java/org/pharmgkb/parsers/genbank/model/VersionAnnotation.java rename to genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankVersionAnnotation.java index 4adb104..a1c4a69 100644 --- a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/VersionAnnotation.java +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/GenbankVersionAnnotation.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.genbank.model; +package org.pharmgkb.parsers.genbank.model.annotations; import com.google.common.base.MoreObjects; @@ -10,23 +10,23 @@ * Author Douglas Myers-Turnbull */ @Immutable -public class VersionAnnotation implements GenbankAnnotation { +public class GenbankVersionAnnotation implements GenbankAnnotation { private final String m_accession; private final String m_versionNumber; - public VersionAnnotation(@Nonnull String accession, @Nonnull String versionNumber) { + public GenbankVersionAnnotation(@Nonnull String accession, @Nonnull String versionNumber) { m_accession = accession; m_versionNumber = versionNumber; } @Nonnull - public String getAccession() { + public String accession() { return m_accession; } @Nonnull - public String getVersionNumber() { + public String versionNumber() { return m_versionNumber; } @@ -42,7 +42,7 @@ public String toString() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - VersionAnnotation that = (VersionAnnotation) o; + GenbankVersionAnnotation that = (GenbankVersionAnnotation) o; return Objects.equals(m_accession, that.m_accession) && Objects.equals(m_versionNumber, that.m_versionNumber); } diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/package-info.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/package-info.java new file mode 100644 index 0000000..76d9a85 --- /dev/null +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/annotations/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.genbank.model.annotations; diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/package-info.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/package-info.java new file mode 100644 index 0000000..9d0080b --- /dev/null +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.genbank.model; diff --git a/genbank/src/main/java/org/pharmgkb/parsers/genbank/package-info.java b/genbank/src/main/java/org/pharmgkb/parsers/genbank/package-info.java new file mode 100644 index 0000000..a1e10e2 --- /dev/null +++ b/genbank/src/main/java/org/pharmgkb/parsers/genbank/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.genbank; diff --git a/genbank/src/test/java/org/pharmgkb/parsers/genbank/GenbankSequenceRangeTest.java b/genbank/src/test/java/org/pharmgkb/parsers/genbank/GenbankSequenceRangeTest.java index 1de5b07..7abda92 100644 --- a/genbank/src/test/java/org/pharmgkb/parsers/genbank/GenbankSequenceRangeTest.java +++ b/genbank/src/test/java/org/pharmgkb/parsers/genbank/GenbankSequenceRangeTest.java @@ -17,7 +17,7 @@ public void testSimple() { assertFalse(range.isComplement()); assertFalse(range.isEndPartial()); assertFalse(range.isStartPartial()); - assertEquals(text, range.getText()); + assertEquals(text, range.text()); assertEquals(-10L, range.start()); assertEquals(50, range.end()); } @@ -29,7 +29,7 @@ public void testPartial(){ assertFalse(range.isComplement()); assertTrue(range.isEndPartial()); assertTrue(range.isStartPartial()); - assertEquals(text, range.getText()); + assertEquals(text, range.text()); assertEquals(-10L, range.start()); assertEquals(50, range.end()); } @@ -41,8 +41,8 @@ public void testComplement() { assertTrue(range.isComplement()); assertTrue(range.isEndPartial()); assertTrue(range.isStartPartial()); - assertEquals(text, range.getText()); + assertEquals(text, range.text()); assertEquals(-10L, range.start()); assertEquals(50, range.end()); } -} \ No newline at end of file +} diff --git a/gff/build.gradle b/gff/build.gradle index 1cf83f3..c9508c4 100644 --- a/gff/build.gradle +++ b/gff/build.gradle @@ -1,3 +1,3 @@ dependencies { - compile project(':core') -} \ No newline at end of file + api project(':core') +} diff --git a/gff/lombok.config b/gff/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/gff/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Parser.java b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Parser.java index 0f054a2..4fa9ddc 100644 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Parser.java +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Parser.java @@ -1,10 +1,9 @@ package org.pharmgkb.parsers.gff; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineParser; -import org.pharmgkb.parsers.gff.model.CdsPhase; +import org.pharmgkb.parsers.gff.model.Gff3CdsPhase; import org.pharmgkb.parsers.gff.model.Gff3Feature; -import org.pharmgkb.parsers.gff.model.GffStrand; +import org.pharmgkb.parsers.gff.model.Gff3Strand; import org.pharmgkb.parsers.gff.utils.Gff3Escapers; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,7 +59,7 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class Gff3Parser implements LineParser { +public class Gff3Parser implements Gff3ParserI { private static final long sf_logEvery = 10000; private static final Pattern sf_comma = Pattern.compile(","); @@ -70,11 +69,12 @@ public class Gff3Parser implements LineParser { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); @Nonnull @Override - public Stream parseAll(@Nonnull Stream stream) throws UncheckedIOException, BadDataFormatException { + public Stream parseAll(@Nonnull Stream stream) + throws UncheckedIOException, BadDataFormatException { return stream.filter(s -> !s.startsWith("#")).map(this); } @@ -91,7 +91,7 @@ public Gff3Feature apply(@Nonnull String line) throws BadDataFormatException { final String coordinateSystemId = parts[0]; - final Optional source = parts[1].equals(".")? + final Optional source = ".".equals(parts[1])? Optional.empty() : Optional.of(parts[1]); @@ -99,20 +99,20 @@ public Gff3Feature apply(@Nonnull String line) throws BadDataFormatException { final long start = Long.parseLong(parts[3]) - 1; final long end = Long.parseLong(parts[4]) - 1; - final Optional score = parts[5].equals(".")? + final Optional score = ".".equals(parts[5])? Optional.empty() : Optional.of(new BigDecimal(parts[5])); - if (GffStrand.lookupBySymbol(parts[6]).isEmpty()) { + if (Gff3Strand.fromSymbol(parts[6]).isEmpty()) { throw new IllegalArgumentException("Strand " + parts[6] + " is unrecognized"); } - final GffStrand strand = GffStrand.lookupBySymbol(parts[6]).get(); + final Gff3Strand strand = Gff3Strand.fromSymbol(parts[6]).get(); - final Optional phase = switch (parts[7]) { + final Optional phase = switch (parts[7]) { case "." -> Optional.empty(); - case "0" -> Optional.of(CdsPhase.ZERO); - case "1" -> Optional.of(CdsPhase.ONE); - case "2" -> Optional.of(CdsPhase.TWO); + case "0" -> Optional.of(Gff3CdsPhase.ZERO); + case "1" -> Optional.of(Gff3CdsPhase.ONE); + case "2" -> Optional.of(Gff3CdsPhase.TWO); default -> throw new IllegalArgumentException("Phase " + parts[7] + " is unrecognized"); }; @@ -140,7 +140,7 @@ public Gff3Feature apply(@Nonnull String line) throws BadDataFormatException { ); } catch (RuntimeException e) { // this is a little weird, but it's helpful - // not that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. + // note that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. e.addSuppressed(new RuntimeException("Unexpectedly failed to parse line " + m_lineNumber)); throw e; } diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3ParserI.java b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3ParserI.java new file mode 100644 index 0000000..936eb13 --- /dev/null +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3ParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.gff; + +import org.pharmgkb.parsers.LineParser; +import org.pharmgkb.parsers.gff.model.Gff3Feature; + +public interface Gff3ParserI extends LineParser { + +} diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Writer.java b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Writer.java index 8da5b87..dd11703 100644 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Writer.java +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3Writer.java @@ -1,6 +1,5 @@ package org.pharmgkb.parsers.gff; -import org.pharmgkb.parsers.LineWriter; import org.pharmgkb.parsers.gff.model.Gff3Feature; import org.pharmgkb.parsers.gff.utils.Gff3Escapers; import org.slf4j.Logger; @@ -21,13 +20,13 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class Gff3Writer implements LineWriter { +public class Gff3Writer implements Gff3WriterI { private static final long sf_logEvery = 10000; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); @Nonnull @Override @@ -38,14 +37,14 @@ public String apply(@Nonnull Gff3Feature feat) { } return tabify( - Gff3Escapers.COORDINATE_SYSTEM_IDS.escape(feat.getCoordinateSystemName()), - feat.getSource().map(Gff3Escapers.FIELDS::escape).orElse(null), - Gff3Escapers.FIELDS.escape(feat.getType()), - feat.getStart() + 1, feat.getEnd() + 1, - feat.getScore().orElse(null), - feat.getStrand().getSymbol(), - feat.getPhase().orElse(null), - mapToString(feat.getAttributes()) + Gff3Escapers.COORDINATE_SYSTEM_IDS.escape(feat.coordinateSystemId()), + feat.source().map(Gff3Escapers.FIELDS::escape).orElse(null), + Gff3Escapers.FIELDS.escape(feat.type()), + feat.start() + 1, feat.end() + 1, + feat.score().orElse(null), + feat.strand().symbol(), + feat.phase().orElse(null), + mapToString(feat.attributes()) ); } diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3WriterI.java b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3WriterI.java new file mode 100644 index 0000000..cf2deee --- /dev/null +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/Gff3WriterI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.gff; + +import org.pharmgkb.parsers.LineWriter; +import org.pharmgkb.parsers.gff.model.Gff3Feature; + +public interface Gff3WriterI extends LineWriter { + +} diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/model/BaseGffFeature.java b/gff/src/main/java/org/pharmgkb/parsers/gff/model/BaseGffFeature.java deleted file mode 100644 index a912cd7..0000000 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/model/BaseGffFeature.java +++ /dev/null @@ -1,275 +0,0 @@ -package org.pharmgkb.parsers.gff.model; - -import com.google.common.base.MoreObjects; -import com.google.common.base.Preconditions; - -import javax.annotation.Nonnegative; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import javax.annotation.concurrent.Immutable; -import javax.annotation.concurrent.NotThreadSafe; -import java.math.BigDecimal; -import java.util.Objects; -import java.util.Optional; - -/** - * A line in a GFF2 file. - * See http://genome.ucsc.edu/FAQ/FAQformat.html#format3. - * @author Douglas Myers-Turnbull - */ -@Immutable -public abstract class BaseGffFeature { - - private final String m_coordinateSystemId; - - private final String m_type; - - private final long m_start; - - private final long m_end; - - private final Optional m_source; - - private final Optional m_score; - - private final GffStrand m_strand; - - private final Optional m_phase; - - protected BaseGffFeature(@Nonnull Builder builder) { - m_coordinateSystemId = builder.m_coordinateSystemId; - m_type = builder.m_type; - m_start = builder.m_start; - m_end = builder.m_end; - m_source = builder.m_source; - m_score = builder.m_score; - m_strand = builder.m_strand; - m_phase = builder.m_phase; - } - - /** - * @return Also known as the sequence ID - */ - @Nonnull - public String getCoordinateSystemName() { - return m_coordinateSystemId; - } - - /** - * 0-based: Note that this is the GFF file value minus 1. - */ - @Nonnegative - public long getStart() { - return m_start; - } - - /** - * 0-based: Note that this is the GFF file value minus 1. - */ - @Nonnegative - public long getEnd() { - return m_end; - } - - @Nonnull - public String getType() { - return m_type; - } - - @Nonnull - public Optional getSource() { - return m_source; - } - - @Nonnull - public Optional getScore() { - return m_score; - } - - @Nonnull - public GffStrand getStrand() { - return m_strand; - } - - @Nonnull - public Optional getPhase() { - return m_phase; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this).add("id", m_coordinateSystemId).add("type", m_type).add("source", m_source) - .add("start", m_start).add("end", m_end).add("score", m_score) - .add("strand", m_strand).add("phase", m_phase).toString(); - } - - @SuppressWarnings("EqualsCalledOnEnumConstant") - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - BaseGffFeature that = (BaseGffFeature) o; - return Objects.equals(m_coordinateSystemId, that.m_coordinateSystemId) - && Objects.equals(m_source, that.m_source) - && Objects.equals(m_type, that.m_type) - && Objects.equals(m_start, that.m_start) - && Objects.equals(m_end, that.m_end) - && Objects.equals(m_score, that.m_score) - && Objects.equals(m_strand, that.m_strand) - && Objects.equals(m_phase, that.m_phase); - } - - @Override - public int hashCode() { - return Objects.hash(m_coordinateSystemId, m_type, m_start, m_end, m_source, m_strand, m_phase, m_score); - } - - // TODO how can this implement implements ObjectBuilder ? - @NotThreadSafe - public abstract static class Builder> { - - @Nonnull - protected String m_coordinateSystemId; - - @Nonnull - protected String m_type; - - protected long m_start; - - protected long m_end; - - @Nonnull - protected Optional m_source; - - @Nonnull - protected Optional m_score; - - @Nonnull - protected GffStrand m_strand; - - @Nonnull - protected Optional m_phase; - - /** - * Note that the strand defaults to {@link GffStrand#UNSTRANDED}. - * @param coordinateSystemId Also known as the sequence ID - * @param type For example, "CDS" - * @param start 0-based - * @param end 0-based - */ - @SuppressWarnings("ConstantConditions") - public Builder(@Nonnull String coordinateSystemId, @Nonnull String type, @Nonnegative long start, @Nonnegative long end) { - Preconditions.checkArgument(start > -1, "Start " + start + " < 0"); - Preconditions.checkArgument(end > -1, "End " + end + " < 0"); - Preconditions.checkArgument(start <= end, "Start " + start + " comes before end " + end); - m_coordinateSystemId = coordinateSystemId; - m_type = type; - m_start = start; - m_end = end; - m_source = Optional.empty(); - m_score = Optional.empty(); - m_strand = GffStrand.UNSTRANDED; - m_phase = Optional.empty(); - } - - public Builder(@Nonnull Builder builder) { - m_coordinateSystemId = builder.m_coordinateSystemId; - m_type = builder.m_type; - m_start = builder.m_start; - m_end = builder.m_end; - m_source = builder.m_source; - m_score = builder.m_score; - m_strand = builder.m_strand; - m_phase = builder.m_phase; - } - - public Builder(@Nonnull BaseGffFeature feature) { - m_coordinateSystemId = feature.m_coordinateSystemId; - m_type = feature.m_type; - m_start = feature.m_start; - m_end = feature.m_end; - m_source = feature.m_source; - m_score = feature.m_score; - m_strand = feature.m_strand; - m_phase = feature.m_phase; - } - - @Nonnull - public Builder setCoordinateSystemId(@Nonnull String coordinateSystemId) { - m_coordinateSystemId = coordinateSystemId; - return this; - } - - @SuppressWarnings({"ConstantConditions", "unchecked"}) - @Nonnull - public B setType(@Nonnull String type) { - if (type == null) { - throw new NullPointerException("Type is null"); - } - m_type = type; - return (B) this; - } - - @SuppressWarnings({"unchecked", "ConstantConditions"}) - @Nonnull - public B setStart(@Nonnegative long start) { - Preconditions.checkArgument(start > -1, "Start " + start + " < 0"); - m_start = start; - return (B) this; - } - - @SuppressWarnings({"unchecked", "ConstantConditions"}) - @Nonnull - public B setEnd(@Nonnegative long end) { - Preconditions.checkArgument(end > -1, "End " + end + " < 0"); - m_end = end; - return (B) this; - } - - @Nonnull - public B setSource(@Nullable String source) { - return setSource(Optional.ofNullable(source)); - } - @SuppressWarnings("unchecked") - @Nonnull - public B setSource(@Nonnull Optional source) { - m_source = source; - return (B) this; - } - - @Nonnull - public B setScore(@Nullable BigDecimal score) { - return setScore(Optional.ofNullable(score)); - } - @SuppressWarnings("unchecked") - @Nonnull - public B setScore(@Nonnull Optional score) { - m_score = score; - return (B) this; - } - - @SuppressWarnings("unchecked") - @Nonnull - public B setStrand(@Nonnull GffStrand strand) { - m_strand = strand; - return (B) this; - } - - @Nonnull - public B setPhase(@Nullable CdsPhase phase) { - return setPhase(Optional.ofNullable(phase)); - } - @SuppressWarnings("unchecked") - @Nonnull - public B setPhase(@Nonnull Optional phase) { - m_phase = phase; - return (B) this; - } - - } - -} diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Attribute.java b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Attribute.java index 30c138c..cdf5d0e 100644 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Attribute.java +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Attribute.java @@ -27,7 +27,7 @@ public enum Gff3Attribute { } @Nonnull - public String getId() { + public String id() { return m_id; } } diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/model/CdsPhase.java b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3CdsPhase.java similarity index 65% rename from gff/src/main/java/org/pharmgkb/parsers/gff/model/CdsPhase.java rename to gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3CdsPhase.java index 08563f8..765258d 100644 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/model/CdsPhase.java +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3CdsPhase.java @@ -9,35 +9,37 @@ * The number of bases that must be removed from the beginning of a feature to reach the first base of the next codon. * @author Douglas Myers-Turnbull */ -public enum CdsPhase { +public enum Gff3CdsPhase { - ZERO(0), ONE(0), TWO(2); + ZERO(0), + ONE(1), + TWO(2) + ; private final int m_n; - @SuppressWarnings("ConstantConditions") @Nonnull - public static CdsPhase fromOffset(@Nonnegative int offset) { + public static Gff3CdsPhase fromOffset(@Nonnegative int offset) { Preconditions.checkArgument(offset > -1, "Offset must be nonnegative but was " + offset); return switch (offset % 3) { case 0 -> ZERO; case 1 -> ONE; case 2 -> TWO; - default -> throw new RuntimeException("Impossible offset of " + offset); + default -> throw new IllegalArgumentException("Impossible offset of " + offset); }; } @Nonnull - public CdsPhase add(@Nonnull CdsPhase frame) { + public Gff3CdsPhase add(@Nonnull Gff3CdsPhase frame) { return fromOffset(m_n + frame.m_n); } - CdsPhase(@Nonnegative int n) { + Gff3CdsPhase(@Nonnegative int n) { m_n = n; } @Nonnegative - public int getOffset() { + public int offset() { return m_n; } } diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Feature.java b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Feature.java index 079c6a7..384da73 100644 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Feature.java +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Feature.java @@ -1,19 +1,19 @@ package org.pharmgkb.parsers.gff.model; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; +import org.pharmgkb.parsers.ObjectBuilder; import org.pharmgkb.parsers.gff.Gff3Parser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; +import javax.annotation.Nullable; import javax.annotation.concurrent.NotThreadSafe; import java.lang.invoke.MethodHandles; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; +import java.math.BigDecimal; +import java.util.*; /** * A line of GFF3 data. @@ -21,57 +21,168 @@ * Note that coordinates here are 0-based, but they are 1-based in GFF3. * @author Douglas Myers-Turnbull */ -@Immutable -public class Gff3Feature extends BaseGffFeature { +public record Gff3Feature( + @Nonnull String coordinateSystemId, + @Nonnull String type, + @Nonnegative long start, + @Nonnegative long end, + @Nonnull Optional source, + @Nonnull Optional score, + @Nonnull Gff3Strand strand, + @Nonnull Optional phase, + @Nonnull ImmutableMap> attributes +) implements Gff3FeatureI { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private final ImmutableMap> m_attributes; - - private Gff3Feature(@Nonnull Builder builder) { - super(builder); - m_attributes = ImmutableMap.copyOf(builder.m_attributes); + public Gff3Feature(@Nonnull Builder builder) { + this( + builder.m_coordinateSystemId, + builder.m_type, + builder.m_start, + builder.m_end, + builder.m_source, + builder.m_score, + builder.m_strand, + builder.m_phase, + ImmutableMap.copyOf(builder.m_attributes) + ); } @Nonnull - public List getAttributes(@Nonnull Gff3Attribute key) { - return getAttributes(key.getId()); + public List attributes(@Nonnull Gff3Attribute key) { + return attributes(key.id()); } @Nonnull - public List getAttributes(@Nonnull String key) { - List found = m_attributes.get(key); + public List attributes(@Nonnull String key) { + List found = attributes.get(key); return found==null? Collections.emptyList() : found; } @Nonnull - public ImmutableMap> getAttributes() { - return m_attributes; + public ImmutableMap> attributes() { + return attributes; } - @NotThreadSafe - public static class Builder extends BaseGffFeature.Builder { - - private Map> m_attributes; - - @Nonnull - public Builder(@Nonnull String coordinateSystemId, @Nonnull String type, @Nonnegative long start, @Nonnegative long end) { - super(coordinateSystemId, type, start, end); + @NotThreadSafe + public static class Builder implements ObjectBuilder { + + private String m_coordinateSystemId; + private String m_type; + private long m_start; + private long m_end; + private Optional m_source = Optional.empty(); + private Optional m_score = Optional.empty(); + private Gff3Strand m_strand = Gff3Strand.UNKNOWN; + private Optional m_phase = Optional.empty(); + private final Map> m_attributes; + + public Builder( + @Nonnull String coordinateSystemId, + @Nonnull String type, + @Nonnegative long start, + @Nonnegative long end + ) { + m_coordinateSystemId = coordinateSystemId; + m_type = type; + m_start = start; + m_end = end; m_attributes = new TreeMap<>(); // for obvious sort order } public Builder(@Nonnull Builder builder) { - super(builder); + m_coordinateSystemId = builder.m_coordinateSystemId; + m_type = builder.m_type; + m_start = builder.m_start; + m_end = builder.m_end; + m_source = builder.m_source; + m_score = builder.m_score; + m_strand = builder.m_strand; + m_phase = builder.m_phase; m_attributes = new TreeMap<>(); // for obvious sort order - builder.m_attributes.forEach(m_attributes::put); + m_attributes.putAll(builder.m_attributes); } public Builder(@Nonnull Gff3Feature feature) { - super(feature); + m_coordinateSystemId = feature.coordinateSystemId(); + m_type = feature.type(); + m_start = feature.start(); + m_end = feature.end(); + m_source = feature.source(); + m_score = feature.score(); + m_strand = feature.strand(); + m_phase = feature.phase(); m_attributes = new TreeMap<>(); // for obvious sort order - feature.m_attributes.forEach(m_attributes::put); + m_attributes.putAll(feature.attributes()); } + @Nonnull + public Builder setCoordinateSystemId(@Nonnull String coordinateSystemId) { + m_coordinateSystemId = coordinateSystemId; + return this; + } + + @Nonnull + public Builder setType(@Nonnull String type) { + Objects.requireNonNull(type, "Type is null"); + m_type = type; + return this; + } + + @Nonnull + public Builder setStart(@Nonnegative long start) { + Preconditions.checkArgument(start > -1, "Start " + start + " < 0"); + m_start = start; + return this; + } + + @Nonnull + public Builder setEnd(@Nonnegative long end) { + Preconditions.checkArgument(end > -1, "End " + end + " < 0"); + m_end = end; + return this; + } + + @Nonnull + public Builder setSource(@Nullable String source) { + m_source = Optional.ofNullable(source); + return this; + } + @Nonnull + public Builder setSource(@Nonnull Optional source) { + m_source = source; + return this; + } + + @Nonnull + public Builder setScore(@Nullable BigDecimal score) { + m_score = Optional.ofNullable(score); + return this; + } + @Nonnull + public Builder setScore(@Nonnull Optional score) { + m_score = score; + return this; + } + + @Nonnull + public Builder setStrand(@Nonnull Gff3Strand strand) { + m_strand = strand; + return this; + } + + @Nonnull + public Builder setPhase(@Nullable Gff3CdsPhase phase) { + m_phase = Optional.ofNullable(phase); + return this; + } + @Nonnull + public Builder setPhase(@Nonnull Optional phase) { + m_phase = phase; + return this; + } + @Nonnull public Builder putAttributes(@Nonnull Map> attributes) { m_attributes.putAll(attributes); @@ -90,13 +201,16 @@ public Builder clearAttributes() { return this; } - @Nonnull + @Override + @Nonnull public Gff3Feature build() { if ("CDS".equalsIgnoreCase(m_type) && m_phase.isEmpty()) { - sf_logger.warn("The feature starting at {} and ending at {} is of type CDS but no phase is given", m_start, m_end); + sf_logger.warn( + "The feature starting at {} and ending at {} is of type CDS but no phase is given", + m_start, m_end + ); } return new Gff3Feature(this); } } - } diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3FeatureI.java b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3FeatureI.java new file mode 100644 index 0000000..895cc25 --- /dev/null +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3FeatureI.java @@ -0,0 +1,42 @@ +package org.pharmgkb.parsers.gff.model; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.math.BigDecimal; +import java.util.Optional; + +public interface Gff3FeatureI { + + /** + * @return Also known as the sequence ID + */ + @Nonnull + String coordinateSystemId(); + + /** + * 0-based: Note that this is the GFF file value minus 1. + */ + @Nonnegative + long start(); + + /** + * 0-based: Note that this is the GFF file value minus 1. + */ + @Nonnegative + long end(); + + @Nonnull + String type(); + + @Nonnull + Optional source(); + + @Nonnull + Optional score(); + + @Nonnull + Gff3Strand strand(); + + @Nonnull + Optional phase(); +} diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/model/GffStrand.java b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Strand.java similarity index 82% rename from gff/src/main/java/org/pharmgkb/parsers/gff/model/GffStrand.java rename to gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Strand.java index 84f75c8..1258daf 100644 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/model/GffStrand.java +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/model/Gff3Strand.java @@ -17,18 +17,22 @@ * * @author Douglas Myers-Turnbull */ -public enum GffStrand { +public enum Gff3Strand { - PLUS("+"), MINUS("-"), UNSTRANDED("."), UNKNOWN("?"); + PLUS("+"), + MINUS("-"), + UNSTRANDED("."), + UNKNOWN("?") + ; private final String m_symbol; - GffStrand(@Nonnull String symbol) { + Gff3Strand(@Nonnull String symbol) { m_symbol = symbol; } @Nonnull - public String getSymbol() { + public String symbol() { return m_symbol; } @@ -44,7 +48,7 @@ public Optional toGeneralStrand() { } @Nonnull - public static Optional lookupBySymbol(@Nonnull String symbol) { + public static Optional fromSymbol(@Nonnull String symbol) { return switch (symbol) { case "+" -> Optional.of(PLUS); case "-" -> Optional.of(MINUS); diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/model/package-info.java b/gff/src/main/java/org/pharmgkb/parsers/gff/model/package-info.java new file mode 100644 index 0000000..efb811e --- /dev/null +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.gff.model; diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/package-info.java b/gff/src/main/java/org/pharmgkb/parsers/gff/package-info.java new file mode 100644 index 0000000..90de398 --- /dev/null +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.gff; diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/utils/Gff3Escapers.java b/gff/src/main/java/org/pharmgkb/parsers/gff/utils/Gff3Escapers.java index 7d398c8..641aed8 100644 --- a/gff/src/main/java/org/pharmgkb/parsers/gff/utils/Gff3Escapers.java +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/utils/Gff3Escapers.java @@ -11,7 +11,7 @@ * Therefore, use this class only if you're implementing a new GFF3-like format (e.g. GTF or GVF). * @author Douglas Myers-Turnbull */ -public class Gff3Escapers { +public final class Gff3Escapers { public static final Rfc3986Escaper FIELDS = new Rfc3986Escaper.Builder() .addChars('\n', '\t', '\r', '%', ';', '=', '&', ',') @@ -26,4 +26,6 @@ public class Gff3Escapers { .addCharRange(0x61, 0x7a) .build(); + private Gff3Escapers() { + } } diff --git a/gff/src/main/java/org/pharmgkb/parsers/gff/utils/package-info.java b/gff/src/main/java/org/pharmgkb/parsers/gff/utils/package-info.java new file mode 100644 index 0000000..9468b04 --- /dev/null +++ b/gff/src/main/java/org/pharmgkb/parsers/gff/utils/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.gff.utils; diff --git a/gff/src/test/java/org/pharmgkb/parsers/gff/BaseGffFeatureTest.java b/gff/src/test/java/org/pharmgkb/parsers/gff/BaseGffFeatureTest.java deleted file mode 100644 index f786015..0000000 --- a/gff/src/test/java/org/pharmgkb/parsers/gff/BaseGffFeatureTest.java +++ /dev/null @@ -1,74 +0,0 @@ -package org.pharmgkb.parsers.gff; - -import org.junit.jupiter.api.Test; -import org.pharmgkb.parsers.gff.model.BaseGffFeature; - -import javax.annotation.Nonnegative; -import javax.annotation.Nonnull; -import java.util.Locale; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * Test {@link BaseGffFeature}. - * @author Douglas Myers-Turnbull - */ -public class BaseGffFeatureTest { - - @Test - public void testBasic() { - Feature feature = new Builder("chr1", "type", 0, 1).build(); - assertEquals("chr1", feature.getCoordinateSystemName()); - assertEquals("type", feature.getType()); - assertEquals(0, feature.getStart()); - assertEquals(1, feature.getEnd()); - } - - @Test - public void testEscapeCoordinateSystemId() { - Feature feature = new Builder("this/needs/unescaping", "type", 0, 1).build(); - } - - @Test - public void testZeroLength() { - Feature feature = new Builder("chr1", "type", 0, 0).build(); - assertEquals(0, feature.getStart()); - assertEquals(0, feature.getEnd()); - } - - @Test - public void testNegativeStart() { - IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> new Builder("chr1", "type", -1, 1)); - assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("start " + -1 + " < 0")); - } - - @Test - public void testNegativeEnd() { - IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> new Builder("chr1", "type", 1, -1)); - assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("end " + -1 + " < 0")); - } - - @Test - public void testEndBeforeStart() { - IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> new Builder("chr1", "type", 2, 1)); - assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("before")); - } - - private static class Feature extends BaseGffFeature { - protected Feature(@Nonnull Builder builder) { - super(builder); - } - } - - private static class Builder extends BaseGffFeature.Builder { - - public Builder(@Nonnull String coordinateSystemId, @Nonnull String type, @Nonnegative long start, @Nonnegative long end) { - super(coordinateSystemId, type, start, end); - } - - @Nonnull - public Feature build() { - return new Feature(this); - } - } -} \ No newline at end of file diff --git a/gff/src/test/java/org/pharmgkb/parsers/gff/Gff3StrandTest.java b/gff/src/test/java/org/pharmgkb/parsers/gff/Gff3StrandTest.java new file mode 100644 index 0000000..0a7574a --- /dev/null +++ b/gff/src/test/java/org/pharmgkb/parsers/gff/Gff3StrandTest.java @@ -0,0 +1,35 @@ +package org.pharmgkb.parsers.gff; + +import org.junit.jupiter.api.Test; +import org.pharmgkb.parsers.gff.model.Gff3Strand; +import org.pharmgkb.parsers.model.Strand; + +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests {@link Gff3Strand}. + * @author Douglas Myers-Turnbull + */ +public class Gff3StrandTest { + + @Test + public void testToGeneralStrand() { + assertEquals(Optional.of(Strand.PLUS), Gff3Strand.PLUS.toGeneralStrand()); + assertEquals(Optional.of(Strand.MINUS), Gff3Strand.MINUS.toGeneralStrand()); + assertFalse(Gff3Strand.UNSTRANDED.toGeneralStrand().isPresent()); + assertFalse(Gff3Strand.UNKNOWN.toGeneralStrand().isPresent()); + } + + @Test + public void testFromSymbol() { + + assertEquals(Optional.of(Gff3Strand.PLUS), Gff3Strand.fromSymbol("+")); + assertEquals(Optional.of(Gff3Strand.MINUS), Gff3Strand.fromSymbol("-")); + assertEquals(Optional.of(Gff3Strand.UNSTRANDED), Gff3Strand.fromSymbol(".")); + assertEquals(Optional.of(Gff3Strand.UNKNOWN), Gff3Strand.fromSymbol("?")); + + assertFalse(Gff3Strand.fromSymbol("*").isPresent()); + } +} diff --git a/gff/src/test/java/org/pharmgkb/parsers/gff/GffStrandTest.java b/gff/src/test/java/org/pharmgkb/parsers/gff/GffStrandTest.java deleted file mode 100644 index 599c29a..0000000 --- a/gff/src/test/java/org/pharmgkb/parsers/gff/GffStrandTest.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.pharmgkb.parsers.gff; - -import org.junit.jupiter.api.Test; -import org.pharmgkb.parsers.gff.model.GffStrand; -import org.pharmgkb.parsers.model.Strand; - -import java.util.Optional; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; - -/** - * Tests {@link GffStrand}. - * @author Douglas Myers-Turnbull - */ -public class GffStrandTest { - - @Test - public void testToGeneralStrand() { - assertEquals(Optional.of(Strand.PLUS), GffStrand.PLUS.toGeneralStrand()); - assertEquals(Optional.of(Strand.MINUS), GffStrand.MINUS.toGeneralStrand()); - assertFalse(GffStrand.UNSTRANDED.toGeneralStrand().isPresent()); - assertFalse(GffStrand.UNKNOWN.toGeneralStrand().isPresent()); - } - - @Test - public void testLookupBySymbol() { - - assertEquals(Optional.of(GffStrand.PLUS), GffStrand.lookupBySymbol("+")); - assertEquals(Optional.of(GffStrand.MINUS), GffStrand.lookupBySymbol("-")); - assertEquals(Optional.of(GffStrand.UNSTRANDED), GffStrand.lookupBySymbol(".")); - assertEquals(Optional.of(GffStrand.UNKNOWN), GffStrand.lookupBySymbol("?")); - - assertFalse(GffStrand.lookupBySymbol("*").isPresent()); - } -} \ No newline at end of file diff --git a/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3FeatureTest.java b/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3FeatureTest.java index 89d9f84..67da1bd 100644 --- a/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3FeatureTest.java +++ b/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3FeatureTest.java @@ -8,7 +8,7 @@ import java.util.Collections; import java.util.Optional; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Test {@link Gff3Feature}. @@ -22,9 +22,9 @@ public void testAttributes() throws Exception { .putAttributes("x", Arrays.asList("a", "b", "c")) .putAttributes("y", Collections.emptyList()) .build(); - assertEquals(Arrays.asList("a", "b", "c"), feature.getAttributes("x")); - assertEquals(Collections.emptyList(), feature.getAttributes("z")); - assertEquals(Collections.emptyList(), feature.getAttributes("notinthemap")); + assertEquals(Arrays.asList("a", "b", "c"), feature.attributes("x")); + assertEquals(Collections.emptyList(), feature.attributes("z")); + assertEquals(Collections.emptyList(), feature.attributes("notinthemap")); } @Test @@ -38,13 +38,13 @@ public void testCopyConstructor() throws Exception { .setEnd(10) .putAttributes("extra", Collections.singletonList("extra")) .build(); - assertEquals(0, b.getStart()); - assertEquals(10, b.getEnd()); - assertEquals(Optional.of(new BigDecimal("1")), b.getScore()); - assertEquals(Arrays.asList("a", "b", "c"), b.getAttributes("x")); - assertEquals(Collections.emptyList(), b.getAttributes("z")); - assertEquals(Collections.emptyList(), b.getAttributes("notinthemap")); - assertEquals(Collections.singletonList("extra"), b.getAttributes("extra")); + assertEquals(0, b.start()); + assertEquals(10, b.end()); + assertEquals(Optional.of(new BigDecimal("1")), b.score()); + assertEquals(Arrays.asList("a", "b", "c"), b.attributes("x")); + assertEquals(Collections.emptyList(), b.attributes("z")); + assertEquals(Collections.emptyList(), b.attributes("notinthemap")); + assertEquals(Collections.singletonList("extra"), b.attributes("extra")); } -} \ No newline at end of file +} diff --git a/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3ParserTest.java b/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3ParserTest.java index ec98373..640e2df 100644 --- a/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3ParserTest.java +++ b/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3ParserTest.java @@ -2,16 +2,16 @@ import org.junit.jupiter.api.Test; import org.pharmgkb.parsers.gff.Gff3Parser; -import org.pharmgkb.parsers.gff.model.CdsPhase; +import org.pharmgkb.parsers.gff.model.Gff3CdsPhase; import org.pharmgkb.parsers.gff.model.Gff3Feature; -import org.pharmgkb.parsers.gff.model.GffStrand; +import org.pharmgkb.parsers.gff.model.Gff3Strand; import java.math.BigDecimal; import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Test {@link Gff3Parser}. @@ -24,10 +24,10 @@ public void testSeveralLines() throws Exception { Path file = Paths.get(getClass().getResource("test.gff3").toURI()); List features = new Gff3Parser().collectAll(file); assertEquals(4, features.size()); - assertEquals("a", features.get(0).getCoordinateSystemName()); - assertEquals("b", features.get(1).getCoordinateSystemName()); - assertEquals("c", features.get(2).getCoordinateSystemName()); - assertEquals("d", features.get(3).getCoordinateSystemName()); + assertEquals("a", features.get(0).coordinateSystemId()); + assertEquals("b", features.get(1).coordinateSystemId()); + assertEquals("c", features.get(2).coordinateSystemId()); + assertEquals("d", features.get(3).coordinateSystemId()); } @Test @@ -36,8 +36,8 @@ public void testCorrectlySet() throws Exception { Gff3Feature expected = new Gff3Feature.Builder("the-seq-id", "the-type", 0, 10) .setSource("the-source") .setScore(new BigDecimal("5.3e-11")) - .setStrand(GffStrand.PLUS) - .setPhase(CdsPhase.ONE) + .setStrand(Gff3Strand.PLUS) + .setPhase(Gff3CdsPhase.ONE) .build(); assertEquals(expected, feature); } @@ -45,13 +45,13 @@ public void testCorrectlySet() throws Exception { @Test public void testUnescapeCoordinateSystemId() throws Exception { Gff3Feature feature = new Gff3Parser().apply("this%23has%7eother%7echars\t.\tgene\t1\t11\t.\t.\t.\t."); - assertEquals("this#has~other~chars", feature.getCoordinateSystemName()); + assertEquals("this#has~other~chars", feature.coordinateSystemId()); } @Test public void testUnescapeField() throws Exception { Gff3Feature feature = new Gff3Parser().apply("a\t.\tthis-has%3bsemicolons!\t1\t11\t.\t.\t.\t."); - assertEquals("this-has;semicolons!", feature.getType()); + assertEquals("this-has;semicolons!", feature.type()); } @Test @@ -61,18 +61,18 @@ public void testAttributes() throws Exception { expected.put("AAA", Collections.singletonList("1")); expected.put("BBB", Arrays.asList("1", "2", "3")); expected.put("CCC", Collections.singletonList("this=needs=escaping")); - assertEquals(expected, feature.getAttributes()); + assertEquals(expected, feature.attributes()); } @Test public void testStrand() throws Exception { - assertEquals(GffStrand.UNSTRANDED, + assertEquals(Gff3Strand.UNSTRANDED, new Gff3Parser().apply("a\t.\tgene\t1\t11\t.\t.\t.\t.") - .getStrand() + .strand() ); - assertEquals(GffStrand.UNKNOWN, + assertEquals(Gff3Strand.UNKNOWN, new Gff3Parser().apply("a\t.\tgene\t1\t11\t.\t?\t.\t.") - .getStrand() + .strand() ); } -} \ No newline at end of file +} diff --git a/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3WriterTest.java b/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3WriterTest.java index 99b28f7..c8d528b 100644 --- a/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3WriterTest.java +++ b/gff/src/test/java/org/pharmgkb/parsers/gff/gff3/Gff3WriterTest.java @@ -2,15 +2,15 @@ import org.junit.jupiter.api.Test; import org.pharmgkb.parsers.gff.Gff3Writer; -import org.pharmgkb.parsers.gff.model.CdsPhase; +import org.pharmgkb.parsers.gff.model.Gff3CdsPhase; import org.pharmgkb.parsers.gff.model.Gff3Feature; -import org.pharmgkb.parsers.gff.model.GffStrand; +import org.pharmgkb.parsers.gff.model.Gff3Strand; import java.math.BigDecimal; import java.util.Arrays; import java.util.Collections; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link Gff3Writer}. @@ -31,10 +31,10 @@ public void testSimple() throws Exception { @Test public void testMoreFields() throws Exception { Gff3Feature feature = new Gff3Feature.Builder("chr1", "ttt", 0, 5).setSource("source") - .setPhase(CdsPhase.ONE) + .setPhase(Gff3CdsPhase.ONE) .setScore(new BigDecimal("5.5e-11")) .setSource("the_source") - .setStrand(GffStrand.UNKNOWN) + .setStrand(Gff3Strand.UNKNOWN) .build(); String line = new Gff3Writer().apply(feature); assertEquals("chr1\tthe_source\tttt\t1\t6\t5.5E-11\t?\tONE\t.", line); @@ -49,4 +49,4 @@ public void testEscaping() throws Exception { String line = new Gff3Writer().apply(feature); assertEquals("has%23unescaped%23chars%09too\thas%3bsemicolons%0dtoo\tttt\t1\t6\t.\t.\t.\tx=a%3da,b%3db", line); } -} \ No newline at end of file +} diff --git a/lombok.config b/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/pedigree/build.gradle b/pedigree/build.gradle index 1cf83f3..c9508c4 100644 --- a/pedigree/build.gradle +++ b/pedigree/build.gradle @@ -1,3 +1,3 @@ dependencies { - compile project(':core') -} \ No newline at end of file + api project(':core') +} diff --git a/pedigree/lombok.config b/pedigree/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/pedigree/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParser.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParser.java index 04dd30e..1393219 100644 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParser.java +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParser.java @@ -2,11 +2,10 @@ import com.google.common.collect.ImmutableSet; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineStructureParser; import org.pharmgkb.parsers.ObjectBuilder; -import org.pharmgkb.parsers.pedigree.model.Pedigree; +import org.pharmgkb.parsers.pedigree.model.PedigreeI; import org.pharmgkb.parsers.pedigree.model.PedigreeBuilder; -import org.pharmgkb.parsers.pedigree.model.Sex; +import org.pharmgkb.parsers.pedigree.model.PedigreeSex; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,7 +47,7 @@ * } * @author Douglas Myers-Turnbull */ -public class PedigreeParser implements LineStructureParser { +public class PedigreeParser implements PedigreeParserI { private static final long sf_logEvery = 10000; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -60,9 +59,9 @@ public class PedigreeParser implements LineStructureParser { private final ImmutableSet m_unknownCodes; private final boolean m_parentsAddedFirst; - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); - private PedigreeParser(@Nonnull Builder builder) { + protected PedigreeParser(@Nonnull Builder builder) { m_noParentMarker = builder.m_noParentMarker; m_fieldSeparator = builder.m_fieldSeparator; m_femaleCodes = ImmutableSet.copyOf(builder.m_femaleCodes); @@ -73,7 +72,7 @@ private PedigreeParser(@Nonnull Builder builder) { @Nonnull @Override - public Pedigree apply(@Nonnull Stream stream) { + public PedigreeI apply(@Nonnull Stream stream) { PedigreeBuilder builder = new PedigreeBuilder(m_parentsAddedFirst); stream.forEach(line -> { @@ -96,13 +95,13 @@ public Pedigree apply(@Nonnull Stream stream) { if (!parts[3].equals(m_noParentMarker)) { motherId = parts[3]; } - Sex sex; + PedigreeSex sex; if (m_femaleCodes.contains(parts[4])) { - sex = Sex.FEMALE; + sex = PedigreeSex.FEMALE; } else if (m_maleCodes.contains(parts[4])) { - sex = Sex.MALE; + sex = PedigreeSex.MALE; } else if (m_unknownCodes.contains(parts[4])) { - sex = Sex.UNKNOWN; + sex = PedigreeSex.UNKNOWN; } else { throw new IllegalArgumentException("Sex " + parts[4] + " not recognized"); } @@ -114,7 +113,7 @@ public Pedigree apply(@Nonnull Stream stream) { } catch (RuntimeException e) { // this is a little weird, but it's helpful - // not that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. + // note that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. e.addSuppressed(new RuntimeException("Unexpectedly failed to parse line " + m_lineNumber)); throw e; } @@ -129,16 +128,17 @@ public long nLinesProcessed() { } @NotThreadSafe - public static class Builder implements ObjectBuilder { + public static class Builder implements ObjectBuilder { private String m_noParentMarker = "0"; private Pattern m_fieldSeparator = Pattern.compile("\\s+"); - private Set m_femaleCodes = new HashSet<>(); - private Set m_maleCodes = new HashSet<>(); - private Set m_unknownCodes = new HashSet<>(); + private Set m_femaleCodes; + private Set m_maleCodes; + private Set m_unknownCodes; private boolean m_parentsAddedFirst; public Builder() { + m_parentsAddedFirst = false; m_maleCodes.add("1"); m_femaleCodes.add("2"); m_unknownCodes.add("3"); @@ -201,7 +201,7 @@ public Builder setParentsAddedFirst() { @Override @Nonnull - public PedigreeParser build() { + public PedigreeParserI build() { return new PedigreeParser(this); } } diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParserI.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParserI.java new file mode 100644 index 0000000..380fe70 --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.pedigree; + +import org.pharmgkb.parsers.LineStructureParser; +import org.pharmgkb.parsers.pedigree.model.PedigreeI; + +public interface PedigreeParserI extends LineStructureParser { + +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriter.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriter.java index ee1f262..d7e2aea 100644 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriter.java +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriter.java @@ -1,8 +1,8 @@ package org.pharmgkb.parsers.pedigree; -import org.pharmgkb.parsers.LineStructureWriter; import org.pharmgkb.parsers.ObjectBuilder; import org.pharmgkb.parsers.pedigree.model.Pedigree; +import org.pharmgkb.parsers.pedigree.model.PedigreeI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,21 +18,21 @@ * Writes a {@link Pedigree} as a .ped file. * @author Douglas Myers-Turnbull */ -public class PedigreeWriter implements LineStructureWriter { +public class PedigreeWriter implements PedigreeWriterI { private static final long sf_logEvery = 10000; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private String m_noParentMarker; - private String m_fieldSeparator; - private String m_femaleCode; - private String m_maleCode; - private String m_unknownCode; + private final String m_noParentMarker; + private final String m_fieldSeparator; + private final String m_femaleCode; + private final String m_maleCode; + private final String m_unknownCode; - private AtomicLong m_lineNumber = new AtomicLong(0l); + private final AtomicLong m_lineNumber = new AtomicLong(0L); - private PedigreeWriter(@Nonnull Builder builder) { + protected PedigreeWriter(@Nonnull Builder builder) { m_noParentMarker = builder.m_noParentMarker; m_fieldSeparator = builder.m_fieldSeparator; m_femaleCode = builder.m_femaleCode; @@ -42,10 +42,10 @@ private PedigreeWriter(@Nonnull Builder builder) { @Override @Nonnull - public Stream apply(@Nonnull Pedigree pedigree) throws UncheckedIOException { + public Stream apply(@Nonnull PedigreeI pedigree) throws UncheckedIOException { - return pedigree.getFamilies().values().parallelStream() - .flatMap(family -> family.topologicalOrderStream() + return pedigree.families().values().parallelStream() + .flatMap(family -> family.topologicalOrder() .map(individual -> { if (m_lineNumber.incrementAndGet() % sf_logEvery == 0) { @@ -53,23 +53,23 @@ public Stream apply(@Nonnull Pedigree pedigree) throws UncheckedIOExcept } StringBuilder sb = new StringBuilder(64); - sb.append(family.getId()).append(m_fieldSeparator); - sb.append(individual.getId()).append(m_fieldSeparator); + sb.append(family.id()).append(m_fieldSeparator); + sb.append(individual.id()).append(m_fieldSeparator); sb.append( - individual.getFather().isEmpty() ? + individual.father().isEmpty() ? m_noParentMarker - : individual.getFather().get().getId() + : individual.father().get().id() ).append(m_fieldSeparator); sb.append( - individual.getMother().isEmpty() ? + individual.mother().isEmpty() ? m_noParentMarker - : individual.getMother().get().getId() + : individual.mother().get().id() ).append(m_fieldSeparator); - switch (individual.getSex()) { - case MALE -> sb.append(m_maleCode); - case FEMALE, UNKNOWN -> sb.append(m_femaleCode); - } - for (String info : individual.getInfo()) { + sb.append(switch (individual.sex()) { + case MALE -> m_maleCode; + case FEMALE, UNKNOWN -> m_femaleCode; + }); + for (String info : individual.info()) { sb.append(m_fieldSeparator).append(info); } return sb.toString(); @@ -138,7 +138,8 @@ public Builder setUnknownCode(@Nonnull String unknownCode) { return this; } - @Nonnull + @Override + @Nonnull public PedigreeWriter build() { return new PedigreeWriter(this); } diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriterI.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriterI.java new file mode 100644 index 0000000..77177bf --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/PedigreeWriterI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.pedigree; + +import org.pharmgkb.parsers.LineStructureWriter; +import org.pharmgkb.parsers.pedigree.model.PedigreeI; + +public interface PedigreeWriterI extends LineStructureWriter { + +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeBfsIterator.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeBfsIterator.java new file mode 100644 index 0000000..ed992ac --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeBfsIterator.java @@ -0,0 +1,31 @@ +package org.pharmgkb.parsers.pedigree.iterators; + +import org.pharmgkb.parsers.pedigree.model.PedigreeIndividual; + +import java.util.*; + +public class PedigreeBfsIterator implements Iterator { + + + private final Queue queue; + + public PedigreeBfsIterator(PedigreeIndividual root) { + queue = new ArrayDeque<>(); + queue.add(root); + } + + @Override + public boolean hasNext() { + return !queue.isEmpty(); + } + + @Override + public PedigreeIndividual next() { + PedigreeIndividual current = queue.poll(); + if (current == null) { + throw new NoSuchElementException("BFS iterator done"); + } + queue.addAll(current.children()); + return current; + } +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeDfsIterator.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeDfsIterator.java new file mode 100644 index 0000000..b50a00c --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeDfsIterator.java @@ -0,0 +1,31 @@ +package org.pharmgkb.parsers.pedigree.iterators; + +import org.pharmgkb.parsers.pedigree.model.PedigreeIndividual; + +import java.util.*; + +public class PedigreeDfsIterator implements Iterator { + + private final Stack stack; + + public PedigreeDfsIterator(PedigreeIndividual root) { + stack = new Stack<>(); + stack.push(root); + } + + @Override + public boolean hasNext() { + return !stack.isEmpty(); + } + + @Override + public PedigreeIndividual next() { + try { + PedigreeIndividual next = stack.pop(); + next.children().forEach(stack::push); + return next; + } catch (EmptyStackException e) { + throw new NoSuchElementException("DFS iterator done", e); + } + } +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeTopologicalIterator.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeTopologicalIterator.java new file mode 100644 index 0000000..980645b --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/PedigreeTopologicalIterator.java @@ -0,0 +1,40 @@ +package org.pharmgkb.parsers.pedigree.iterators; + +import org.pharmgkb.parsers.pedigree.model.PedigreeIndividual; + +import java.util.*; + +public class PedigreeTopologicalIterator implements Iterator { + + private final Queue queue; + + private Set parents; + + public PedigreeTopologicalIterator(PedigreeIndividual root) { + parents = new HashSet<>(); + queue = new ArrayDeque<>(); + queue.add(root); + } + + @Override + public boolean hasNext() { + return !queue.isEmpty(); + } + + @Override + public PedigreeIndividual next() { + PedigreeIndividual current = queue.poll(); + if (current == null) { + throw new NoSuchElementException("Topological iterator done"); + } + current.children() + .stream() + .filter(child -> + (child.mother().isEmpty() || parents.contains(child.mother().get())) + && (child.father().isEmpty() || parents.contains(child.father().get())) + ) + .forEach(queue::add); + parents.add(current); + return current; + } +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/package-info.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/package-info.java new file mode 100644 index 0000000..849bf46 --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/iterators/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.pedigree.iterators; diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Family.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Family.java deleted file mode 100644 index 4c9ee2c..0000000 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Family.java +++ /dev/null @@ -1,154 +0,0 @@ -package org.pharmgkb.parsers.pedigree.model; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.util.*; - -/** - * A collection of related individuals. Associated with an {@link #getId() Id} that is unique for this {@link Pedigree}. - * - * This class models the family as a DAG, where each node can have zero, one, or two parents, and any number of children. - * @author Douglas Myers-Turnbull - */ -@Immutable -public class Family implements Subtree { - - private String m_id; - - private NavigableSet m_roots; - - Family(@Nonnull String id) { - m_id = id; - m_roots = new TreeSet<>(); - } - - /** - * @return The Id of this Family - */ - @Nonnull - public String getId() { - return m_id; - } - - /** - * An alias for {@link #inOrder()}. - */ - @Nonnull - @Override - public Iterator iterator() { - return inOrder(); - } - - /** - * Returns the {@link Individual} with the specified Id, or null if it doesn't exist. - */ - @Nonnull - @Override - public Optional find(@Nonnull String individualId) { - for (Individual root : m_roots) { - Optional found = root.find(individualId); - if (found.isPresent()) { - return found; - } - } - return Optional.empty(); - } - - @Nonnull - @Override - public Iterator breadthFirst() { - Set visited = new LinkedHashSet<>(); - Queue queue = new ArrayDeque<>(m_roots); - while (!queue.isEmpty()) { - Individual current = queue.poll(); - visited.add(current); - queue.addAll(new ArrayList<>(current.getChildrenRaw())); - } - return visited.iterator(); - } - - @Nonnull - @Override - public Iterator depthFirst() { - Set visited = new LinkedHashSet<>(16); - for (Individual root : m_roots) { - for (Individual child : root.getChildrenRaw()) { - Iterator ind = child.depthFirst(); - while (ind.hasNext()) { - visited.add(ind.next()); - } - } - visited.add(root); - } - return visited.iterator(); - } - - /** - * Returns the set of {@link Individual Individuals} with no parents. - */ - @Nonnull - public NavigableSet getRoots() { - return new TreeSet<>(m_roots); - } - - /** - * Returns an iterator that uses the lexigraphical ordering of the {@link Individual#getId() Individual Ids}. - * For example: a, b, c, d - */ - @Nonnull - @Override - public Iterator inOrder() { - Set visited = new TreeSet<>(); - for (Individual root : m_roots) { - Iterator children = root.inOrder(); - while (children.hasNext()) { - visited.add(children.next()); - } - } - return visited.iterator(); - } - - @Nonnull - @Override - public Iterator topologicalOrder() { - Set fathersRemoved = new HashSet<>(16); - Set mothersRemoved = new HashSet<>(16); - List visited = new ArrayList<>(16); - Queue queue = new ArrayDeque<>(m_roots); - for (Individual root : m_roots) { - PedigreeUtils.computeTopologicalOrdering(visited, queue, fathersRemoved, mothersRemoved, root, m_id); - } - return visited.iterator(); - } - - /** - * For speed, only checks the individual Ids, which is safe since - * {@link PedigreeBuilder} ensures that no two Families share the same Id. - */ - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Family family = (Family) o; - return m_id.equals(family.m_id); - } - - @Override - public int hashCode() { - return m_id.hashCode(); - } - - @Override - public String toString() { - return "Family{" + "m_id='" + m_id + '\'' + '}'; - } - - void setId(@Nonnull String id) { - m_id = id; - } - - @Nonnull - NavigableSet getRootsRaw() { - return m_roots; - } -} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Individual.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Individual.java deleted file mode 100644 index d2b70de..0000000 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Individual.java +++ /dev/null @@ -1,216 +0,0 @@ -package org.pharmgkb.parsers.pedigree.model; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import javax.annotation.concurrent.Immutable; -import java.util.*; -import java.util.stream.Collectors; - -/** - * A member of a {@link Family}. - * Associated with an {@link #getId() Id} that is unique for this {@link Pedigree}. - * - * Implements {@link Subtree} for search and traversals using this node as root. - * For example, if A is a child of B, and C is a child of B, calling B.find("C") will return C,but calling B.find("A") - * will return null. - * Similarly, the iterator B.breadthFirst() will contain (B, C); it will not contain A. - * @author Douglas Myers-Turnbull - */ -@Immutable -public class Individual implements Subtree, Comparable { - - private String m_id; - - private Family m_family; - - private Optional m_father = Optional.empty(); - - private Optional m_mother = Optional.empty(); - - private NavigableSet m_children; - - private Sex m_sex; - - private List m_info; - - Individual(@Nonnull String id, @Nonnull Sex sex, @Nonnull Family family, @Nonnull List info) { - m_id = id; - m_sex = sex; - m_family = family; - m_info = info; - m_children = new TreeSet<>(); - } - - @Nonnull - @Override - public Optional find(@Nonnull String individualId) { - for (Individual node : this) { - if (node.getId().equals(individualId)) { - return Optional.of(node); - } - } - return Optional.empty(); - } - - @Nonnull - @Override - public Iterator breadthFirst() { - List visited = new ArrayList<>(); - Queue queue = new ArrayDeque<>(); - queue.add(this); - while (!queue.isEmpty()) { - Individual current = queue.poll(); - visited.add(current); - queue.addAll(current.getChildrenRaw().stream().collect(Collectors.toList())); - } - return visited.iterator(); - } - - @Nonnull - @Override - public Iterator depthFirst() { - Set visited = new LinkedHashSet<>(); - for (Individual child : m_children) { - Iterator i = child.depthFirst(); - while (i.hasNext()) { - visited.add(i.next()); - } - } - visited.add(this); - return visited.iterator(); - } - - @Nonnull - @Override - public Iterator inOrder() { - List visited = new ArrayList<>(); - visited.add(this); - for (Individual child : m_children) { - Iterator i = child.inOrder(); - while (i.hasNext()) { - visited.add(i.next()); - } - } - return visited.iterator(); - } - - @Nonnull - @Override - public Iterator topologicalOrder() { - Set fathersRemoved = new HashSet<>(); - Set mothersRemoved = new HashSet<>(); - List visited = new ArrayList<>(); - Queue queue = new ArrayDeque<>(); - queue.add(this); - PedigreeUtils.computeTopologicalOrdering(visited, queue, fathersRemoved, mothersRemoved, this, m_id); - return visited.iterator(); - } - - @Nonnull - @Override - public Iterator iterator() { - return inOrder(); - } - - /** - * Additional annotations, such as genotype, phenotype, probrand status, and diseases state. - */ - @Nonnull - public List getInfo() { - return new ArrayList<>(m_info); - } - - @Nonnull - public String getId() { - return m_id; - } - - @Nonnull - public Family getFamily() { - return m_family; - } - - @Override - public int compareTo(Individual other) { - return m_id.compareTo(other.getId()); - } - - void setId(String id) { - m_id = id; - } - - @Nonnull - public Optional getFather() { - return m_father; - } - void setFather(@Nullable Individual test) { - setFather(Optional.ofNullable(test)); - } - void setFather(@Nonnull Optional father) { - m_father = father; - } - - @Nonnull - public Optional getMother() { - return m_mother; - } - void setMother(@Nullable Individual test) { - setMother(Optional.ofNullable(test)); - } - void setMother(@Nonnull Optional mother) { - m_mother = mother; - } - - @Nonnull - public Sex getSex() { - return m_sex; - } - - void setSex(Sex sex) { - m_sex = sex; - } - - @Nonnull - public NavigableSet getChildren() { - return new TreeSet<>(m_children); - } - - void setChildren(NavigableSet children) { - m_children = children; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Individual that = (Individual) o; - return m_id.equals(that.m_id) && m_family.equals(that.m_family); - } - - @Override - public int hashCode() { - return m_id.hashCode(); - } - - public String toSimpleString() { - return "Individual{" + "m_id='" + m_id + '\'' + '}'; - } - - @Override - public String toString() { - return "Individual{" + - "id='" + m_id + '\'' + - ", family=" + m_family + - ", father=" + m_father + - ", mother=" + m_mother + - ", children=" + m_children + - ", sex=" + m_sex + - ", info=" + m_info + - '}'; - } - - @SuppressWarnings("SuspiciousGetterSetter") - NavigableSet getChildrenRaw() { - return m_children; - } -} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Pedigree.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Pedigree.java index 5168d1a..a737345 100644 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Pedigree.java +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Pedigree.java @@ -1,29 +1,29 @@ package org.pharmgkb.parsers.pedigree.model; +import org.pharmgkb.parsers.pedigree.PedigreeParser; + import javax.annotation.Nonnull; import javax.annotation.concurrent.Immutable; -import java.util.Iterator; -import java.util.NavigableMap; -import java.util.TreeMap; +import java.util.*; /** - * An immutable pedigree containing a set of {@link Family Families}. + * An immutable pedigree containing a set of {@link PedigreeFamily Families}. * Individuals can only be related to individuals in the same family. * @see PedigreeBuilder For building a pedigree programmatically - * @see org.pharmgkb.parsers.pedigree.PedigreeParser For building a pedigree from a .ped/LINKAGE/QTDT format + * @see PedigreeParser For building a pedigree from a .ped/LINKAGE/QTDT format * @author Douglas Myers-Turnbull */ @Immutable -public class Pedigree implements Iterable { +public class Pedigree implements PedigreeI { - private NavigableMap m_families; + private NavigableMap m_families; Pedigree() { m_families = new TreeMap<>(); } @Nonnull - public NavigableMap getFamilies() { + public NavigableMap families() { return new TreeMap<>(m_families); } @@ -44,19 +44,30 @@ public int hashCode() { return m_families.hashCode(); } - @Nonnull - public Family getFamily(@Nonnull String familyId) { - return m_families.get(familyId); - } + @Override + @Nonnull + public Optional findFamily(@Nonnull String familyId) { + return Optional.ofNullable(m_families.get(familyId)); + } + + @Override + @Nonnull + public PedigreeFamily getFamily(@Nonnull String familyId) { + var x = m_families.get(familyId); + if (x == null) { + throw new NoSuchElementException("No family with ID " + familyId); + } + return x; + } @Override @Nonnull - public Iterator iterator() { + public Iterator iterator() { return m_families.values().iterator(); } @Nonnull - NavigableMap getFamiliesRaw() { + NavigableMap getFamiliesRaw() { return m_families; } diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeBuilder.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeBuilder.java index d9680a6..70aacdf 100644 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeBuilder.java +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeBuilder.java @@ -28,17 +28,18 @@ * @author Douglas Myers-Turnbull */ @NotThreadSafe -public class PedigreeBuilder implements ObjectBuilder { +public class PedigreeBuilder implements ObjectBuilder { private final boolean m_parentsAddedFirst; private Pedigree m_pedigree = new Pedigree(); - private Set m_individualIdsUsed = new HashSet<>(); - private Map m_fatherPlaceholders = new HashMap<>(); - private Map m_motherPlaceholders = new HashMap<>(); + private final Set m_individualIdsUsed = new HashSet<>(400); + private final Map m_fatherPlaceholders = new HashMap<>(240); + private final Map m_motherPlaceholders = new HashMap<>(240); /** - * @param parentsAddedFirst If true, construction is faster, but individuals must be added in order: if non-null, {@code fatherId} and {@code motherId}, must reference individuals that have already been added + * @param parentsAddedFirst If true, construction is faster, but individuals must be added in order: + * if non-null, {@code fatherId} and {@code motherId}, must reference individuals that have already been added */ public PedigreeBuilder(boolean parentsAddedFirst) { m_parentsAddedFirst = parentsAddedFirst; @@ -46,9 +47,11 @@ public PedigreeBuilder(boolean parentsAddedFirst) { /** * - * Adds an individual. If inOrder in the constructor is true, this individual's parents, if any, must already have been added. + * Adds an individual. If inOrder in the constructor is true, this individual's parents, + * if any, must already have been added. * - * Note that the sex of a parent must be correct: if A is the mother of B, then A must have {@link Sex} FEMALE (cannot be MALE or UNKNOWN). + * Note that the sex of a parent must be correct: if {@code A} is the mother of {@code B}, then {@code A} + * must have {@link PedigreeSex} FEMALE (cannot be MALE or UNKNOWN). * * @throws IllegalStateException If {@link #build()} was already called * @@ -57,9 +60,9 @@ public PedigreeBuilder(boolean parentsAddedFirst) { */ @Nonnull public PedigreeBuilder add( - @Nonnull String familyId, @Nonnull String individualId, - @Nullable String fatherId, @Nullable String motherId, - @Nonnull Sex sex, @Nonnull List info + @Nonnull String familyId, @Nonnull String individualId, + @Nullable String fatherId, @Nullable String motherId, + @Nonnull PedigreeSex sex, @Nonnull List info ) { return addIndividual( familyId, individualId, @@ -73,7 +76,7 @@ public PedigreeBuilder add( * * Adds an individual. If inOrder in the constructor is true, this individual's parents, if any, must already have been added. * - * Note that the sex of a parent must be correct: if A is the mother of B, then A must have {@link Sex} FEMALE (cannot be MALE or UNKNOWN). + * Note that the sex of a parent must be correct: if A is the mother of B, then A must have {@link PedigreeSex} FEMALE (cannot be MALE or UNKNOWN). * * @throws IllegalStateException If {@link #build()} was already called * @@ -82,13 +85,13 @@ public PedigreeBuilder add( */ @Nonnull public PedigreeBuilder addIndividual( - @Nonnull String familyId, @Nonnull String individualId, - @Nonnull Optional fatherId, @Nonnull Optional motherId, - @Nonnull Sex sex, @Nonnull List info + @Nonnull String familyId, @Nonnull String individualId, + @Nonnull Optional fatherId, @Nonnull Optional motherId, + @Nonnull PedigreeSex sex, @Nonnull List info ) { // enforces immutability; see build() - if (m_pedigree == null) { + if (null == m_pedigree) { throw new IllegalStateException("PedigreeBuilder.build() already called"); } @@ -97,18 +100,19 @@ public PedigreeBuilder addIndividual( } m_individualIdsUsed.add(individualId); - Family family; + PedigreeFamily family; if (m_pedigree.getFamiliesRaw().containsKey(familyId)) { family = m_pedigree.getFamily(familyId); } else { - family = new Family(familyId); + family = new PedigreeFamily(familyId); m_pedigree.getFamiliesRaw().put(familyId, family); } - Individual individual = new Individual(individualId, sex, family, info); + PedigreeIndividual individual = new PedigreeIndividual(individualId, sex, family); + individual.m_info.addAll(info); if (fatherId.isEmpty() && motherId.isEmpty()) { - family.getRootsRaw().add(individual); + family.m_roots.add(individual); } else { if (fatherId.isPresent()) { boolean fatherAttached = attachFather(individual, fatherId.get()); @@ -127,18 +131,17 @@ public PedigreeBuilder addIndividual( if (!m_parentsAddedFirst) { attachRemaining(false); } - return this; - } /** * Builds the {@link Pedigree}. After this method is called, calls to - * {@link #addIndividual(String, String, Optional, Optional, Sex, List)} will result in a + * {@link #addIndividual(String, String, Optional, Optional, PedigreeSex, List)} will result in a * {@link java.lang.IllegalStateException} being thrown. */ - @Nonnull - public Pedigree build() { + @Override + @Nonnull + public PedigreeI build() { attachRemaining(true); Pedigree pedigree = m_pedigree; //noinspection AssignmentToNull @@ -146,70 +149,73 @@ public Pedigree build() { return pedigree; } - private boolean attachFather(@Nonnull Individual individual, @Nonnull String fatherId) { + private static boolean attachFather( + @Nonnull PedigreeIndividual individual, + @Nonnull String fatherId + ) { - for (Individual test : individual.getFamily()) { + for (PedigreeIndividual test : individual.family()) { - if (fatherId.equals(test.getId())) { - if (test.getSex() != Sex.MALE) { + if (fatherId.equals(test.id())) { + if (PedigreeSex.MALE != test.sex()) { throw new IllegalArgumentException( - "Individual " + individual.getId() + " must have a male father (Id: " + test.getId() + ")" + "Individual " + individual.id() + " must have a male father (Id: " + test.id() + ")" ); } - test.getChildrenRaw().add(individual); - individual.setFather(test); + test.m_children.add(individual); + individual.m_father = Optional.of(test); return true; } } - return false; } - private boolean attachMother(@Nonnull Individual individual, @Nonnull String motherId) { + private static boolean attachMother( + @Nonnull PedigreeIndividual individual, + @Nonnull String motherId + ) { - for (Individual test : individual.getFamily()) { + for (PedigreeIndividual test : individual.family()) { - if (motherId.equals(test.getId())) { - if (test.getSex() != Sex.FEMALE) { + if (motherId.equals(test.id())) { + if (PedigreeSex.FEMALE != test.sex()) { throw new IllegalArgumentException( - "Individual " +individual.getId()+ " must have a female mother (Id: " + test.getId() + ")"); + "Individual " + individual.id()+ " must have a female mother (Id: " + test.id() + ")"); } - test.getChildrenRaw().add(individual); - individual.setMother(test); + test.m_children.add(individual); + individual.m_mother = Optional.of(test); return true; } } - return false; } - private void attachRemaining(boolean require) { - for (Map.Entry entry : m_fatherPlaceholders.entrySet()) { - Individual individual = entry.getKey(); + for (Map.Entry entry : m_fatherPlaceholders.entrySet()) { + PedigreeIndividual individual = entry.getKey(); String fatherId = entry.getValue(); boolean attached = attachFather(individual, fatherId); if (require && !attached) { throw new IllegalArgumentException( - "Father of individual " + individual.getId() + "Father of individual " + individual.id() + " with father "+ fatherId + " does not exist in family " - + individual.getFamily().getId() + + individual.family().id() ); } } - for (Map.Entry entry : m_motherPlaceholders.entrySet()) { - Individual individual = entry.getKey(); + for (Map.Entry entry : m_motherPlaceholders.entrySet()) { + PedigreeIndividual individual = entry.getKey(); String motherId = entry.getValue(); boolean attached = attachMother(individual, motherId); if (require && !attached) { throw new IllegalArgumentException( - "Mother of individual " + individual.getId() + " with mother " + "Mother of individual " + individual.id() + " with mother " + motherId + " does not exist in family " - + individual.getFamily().getId() + + individual.family().id() ); } } diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamily.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamily.java new file mode 100644 index 0000000..600d3f9 --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamily.java @@ -0,0 +1,109 @@ +package org.pharmgkb.parsers.pedigree.model; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; +import java.util.*; +import java.util.stream.Stream; + +/** + * A collection of related individuals. Associated with an {@link #id() Id} that is unique for this {@link Pedigree}. + * + * This class models the family as a DAG, where each node can have zero, one, or two parents, and any number of children. + * @author Douglas Myers-Turnbull + */ +@Immutable +public class PedigreeFamily implements PedigreeFamilyI { + + private final String m_id; + + protected final NavigableSet m_roots; + + protected PedigreeFamily(@Nonnull String id) { + m_id = id; + m_roots = new TreeSet<>(); + } + + /** + * @return The Id of this Family + */ + @Override + @Nonnull + public String id() { + return m_id; + } + + /** + * Returns the {@link PedigreeIndividual} with the specified Id, or null if it doesn't exist. + */ + @Nonnull + @Override + public Optional find(@Nonnull String individualId) { + return m_roots.parallelStream().flatMap(i -> i.find(individualId).stream()).findAny(); + } + + @Nonnull + @Override + public Stream breadthFirstOrder() { + return m_roots.stream().flatMap(PedigreeIndividual::breadthFirstOrder); + } + + @Nonnull + @Override + public Stream depthFirstOrder() { + return m_roots.stream().flatMap(PedigreeIndividual::depthFirstOrder); + } + + /** + * Returns the set of {@link PedigreeIndividual Individuals} with no parents. + */ + @Override + @Nonnull + public NavigableSet roots() { + return new TreeSet<>(m_roots); + } + + /** + * Returns an iterator that uses the lexigraphical ordering of the {@link PedigreeIndividual#id() Individual Ids}. + * For example: a, b, c, d + */ + @Nonnull + @Override + public Stream lexigraphicalOrder() { + return m_roots.stream().sorted().flatMap(PedigreeIndividual::lexigraphicalOrder); + } + + @Nonnull + @Override + public Stream topologicalOrder() { + return m_roots.stream().flatMap(PedigreeIndividual::topologicalOrder); + } + + @Nonnull + @Override + public Stream anyOrder() { + return m_roots.parallelStream().flatMap(PedigreeIndividual::anyOrder); + } + + /** + * For speed, only checks the individual Ids, which is safe since + * {@link PedigreeBuilder} ensures that no two Families share the same Id. + */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PedigreeFamily family = (PedigreeFamily) o; + return m_id.equals(family.m_id); + } + + @Override + public int hashCode() { + return m_id.hashCode(); + } + + @Override + public String toString() { + return "Family{" + "m_id='" + m_id + '\'' + '}'; + } + +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamilyI.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamilyI.java new file mode 100644 index 0000000..67e87cf --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeFamilyI.java @@ -0,0 +1,11 @@ +package org.pharmgkb.parsers.pedigree.model; + +import javax.annotation.Nonnull; +import java.util.NavigableSet; + +public interface PedigreeFamilyI extends PedigreeSubtreeI { + + @Nonnull String id(); + @Nonnull + NavigableSet roots(); +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeI.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeI.java new file mode 100644 index 0000000..b5d3261 --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeI.java @@ -0,0 +1,15 @@ +package org.pharmgkb.parsers.pedigree.model; + +import javax.annotation.Nonnull; +import java.util.NavigableMap; +import java.util.Optional; + +public interface PedigreeI extends Iterable { + + @Nonnull NavigableMap families(); + + @Nonnull Optional findFamily(@Nonnull String familyId); + + @Nonnull + PedigreeFamily getFamily(@Nonnull String familyId); +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividual.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividual.java new file mode 100644 index 0000000..fc27969 --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividual.java @@ -0,0 +1,164 @@ +package org.pharmgkb.parsers.pedigree.model; + +import org.pharmgkb.parsers.pedigree.iterators.PedigreeBfsIterator; +import org.pharmgkb.parsers.pedigree.iterators.PedigreeDfsIterator; +import org.pharmgkb.parsers.pedigree.iterators.PedigreeTopologicalIterator; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; +import java.util.*; +import java.util.stream.Stream; + +/** + * A member of a {@link PedigreeFamily}. + * Associated with an {@link #id() Id} that is unique for this {@link Pedigree}. + * + * Implements {@link PedigreeSubtreeI} for search and traversals using this node as root. + * For example, if A is a child of B, and C is a child of B, calling B.find("C") will return C,but calling B.find("A") + * will return null. + * Similarly, the iterator B.breadthFirst() will contain (B, C); it will not contain A. + * @author Douglas Myers-Turnbull + */ +@SuppressWarnings({"NonFinalFieldInImmutable", "CompareToUsesNonFinalVariable", "ProtectedField"}) +@Immutable +public class PedigreeIndividual implements PedigreeIndividualI, Comparable { + + private String m_id; + + private final PedigreeFamily m_family; + + private final PedigreeSex m_sex; + + protected List m_info; + + protected Optional m_father; + + protected Optional m_mother; + + protected NavigableSet m_children; + + PedigreeIndividual( + @Nonnull String id, @Nonnull PedigreeSex sex, @Nonnull PedigreeFamily family + ) { + m_mother = Optional.empty(); + m_father = Optional.empty(); + m_id = id; + m_sex = sex; + m_family = family; + m_info = new ArrayList<>(8); + m_children = new TreeSet<>(); + } + + @Nonnull + @Override + public Optional find(@Nonnull String individualId) { + return anyOrder().filter(i -> i.id().equals(individualId)).findAny(); + } + + @Nonnull + @Override + public Stream breadthFirstOrder() { + return Stream.generate(() -> new PedigreeBfsIterator(this).next()); + } + + @Nonnull + @Override + public Stream depthFirstOrder() { + return Stream.generate(() -> new PedigreeDfsIterator(this).next()); + } + + @Nonnull + @Override + public Stream lexigraphicalOrder() { + return Stream.concat(Stream.of(this), m_children.stream().flatMap(PedigreeIndividual::lexigraphicalOrder).sorted()); + } + + @Nonnull + @Override + public Stream anyOrder() { + return Stream.concat(Stream.of(this), m_children.parallelStream().flatMap(PedigreeIndividual::anyOrder)); + } + + @Nonnull + @Override + public Stream topologicalOrder() { + return Stream.generate(() -> new PedigreeTopologicalIterator(this).next()); + } + + /** + * Additional annotations, such as genotype, phenotype, probrand status, and diseases state. + */ + @Override + @Nonnull + public List info() { + return Collections.unmodifiableList(m_info); + } + + @Override + @Nonnull + public String id() { + return m_id; + } + + @Override + @Nonnull + public PedigreeFamily family() { + return m_family; + } + + @Override + public int compareTo(PedigreeIndividual other) { + return m_id.compareTo(other.id()); + } + + @Override + @Nonnull + public Optional father() { + return m_father; + } + + @Override + @Nonnull + public Optional mother() { + return m_mother; + } + + @Override + @Nonnull + public PedigreeSex sex() { + return m_sex; + } + + @Override + @Nonnull + public NavigableSet children() { + return new TreeSet<>(m_children); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (null == o || getClass() != o.getClass()) return false; + PedigreeIndividual that = (PedigreeIndividual) o; + return m_id.equals(that.m_id) && m_family.equals(that.m_family); + } + + @Override + public int hashCode() { + return m_id.hashCode(); + } + + @Override + public String toString() { + return "Individual{" + + "id='" + m_id + '\'' + + ", family=" + m_family + + ", father=" + m_father + + ", mother=" + m_mother + + ", children=" + m_children + + ", sex=" + m_sex + + ", info=" + m_info + + '}'; + } + +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividualI.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividualI.java new file mode 100644 index 0000000..aff6c8d --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeIndividualI.java @@ -0,0 +1,27 @@ +package org.pharmgkb.parsers.pedigree.model; + +import javax.annotation.Nonnull; +import java.util.*; + +public interface PedigreeIndividualI extends PedigreeSubtreeI { + + /** + * Additional annotations, such as genotype, phenotype, probrand status, and diseases state. + */ + @Nonnull List info(); + + @Nonnull String id(); + + @Nonnull + PedigreeFamily family(); + + @Nonnull Optional father(); + + @Nonnull Optional mother(); + + @Nonnull + PedigreeSex sex(); + + @Nonnull NavigableSet children(); + +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Sex.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeSex.java similarity index 84% rename from pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Sex.java rename to pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeSex.java index e244a74..3c59bee 100644 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Sex.java +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeSex.java @@ -4,7 +4,7 @@ * Male, female, or unknown. * @author Douglas Myers-Turnbull */ -public enum Sex { +public enum PedigreeSex { MALE, FEMALE, diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeSubtreeI.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeSubtreeI.java new file mode 100644 index 0000000..85792a4 --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeSubtreeI.java @@ -0,0 +1,42 @@ +package org.pharmgkb.parsers.pedigree.model; + +import javax.annotation.Nonnull; +import java.util.Iterator; +import java.util.Optional; +import java.util.Spliterator; +import java.util.stream.Stream; + +/** + * A subtree of a {@link Pedigree} + * @param The nodes of the DAG; in this case, always {@link PedigreeIndividual} + * @author Douglas Myers-Turnbull + */ +public interface PedigreeSubtreeI extends Iterable { + + int SPLITERATOR_FLAGS = Spliterator.ORDERED | Spliterator.DISTINCT | Spliterator.IMMUTABLE | Spliterator.NONNULL; + + @Nonnull + Optional find(@Nonnull String individualId); + + @Nonnull + Stream breadthFirstOrder(); + + @Nonnull + Stream depthFirstOrder(); + + @Nonnull + Stream lexigraphicalOrder(); + + @Nonnull + Stream topologicalOrder(); + + @Nonnull + Stream anyOrder(); + + @Override + @Nonnull + default Iterator iterator() { + return topologicalOrder().iterator(); + } + +} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeUtils.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeUtils.java deleted file mode 100644 index 3ef8f08..0000000 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/PedigreeUtils.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.pharmgkb.parsers.pedigree.model; - -import javax.annotation.Nonnull; -import java.util.List; -import java.util.Queue; -import java.util.Set; - -/** - * Package-level utilities for building and traversing {@link Pedigree Pedigrees}. - * @author Douglas Myers-Turnbull - */ -class PedigreeUtils { - - /** - * Add the roots first! - */ - static void computeTopologicalOrdering( - @Nonnull List visited, @Nonnull Queue queue, - @Nonnull Set fathersRemoved, @Nonnull Set mothersRemoved, - @Nonnull Individual root, @Nonnull String id - ) { - while (!queue.isEmpty()) { - Individual current = queue.poll(); - visited.add(current); - for (Individual child : current.getChildren()) { - // assume that father iff male and mother iff female - // we check for this in the constructor and in PedigreeBuilder - if (current.getSex() == Sex.MALE) { - fathersRemoved.add(child); - } else { - mothersRemoved.add(child); - } - if ( - (child.getFather().isEmpty() || fathersRemoved.contains(child)) - && (child.getMother().isEmpty() || mothersRemoved.contains(child)) - ) { - // Because PedigreeBuilder only adds edges when it adds nodes, this is impossible - // Keep it here in case we add an alternate way to build pedigrees - if (queue.contains(child)) { - throw new IllegalStateException( - "Pedigree for " + id + " contains a cycle! See edge " - + current.getId() + " --> " + child.getId() - ); - } - queue.add(child); - } - } - } - } - -} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Subtree.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Subtree.java deleted file mode 100644 index bb6b7a0..0000000 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/Subtree.java +++ /dev/null @@ -1,90 +0,0 @@ -package org.pharmgkb.parsers.pedigree.model; - -import javax.annotation.Nonnull; -import java.util.Iterator; -import java.util.Optional; -import java.util.Spliterator; -import java.util.Spliterators; -import java.util.stream.Stream; -import java.util.stream.StreamSupport; - -/** - * A subtree of a {@link Pedigree} - * @param The nodes of the DAG; in this case, always {@link Individual} - * @author Douglas Myers-Turnbull - */ -public interface Subtree extends Iterable { - - int SPLITERATOR_FLAGS = Spliterator.ORDERED | Spliterator.DISTINCT | Spliterator.IMMUTABLE | Spliterator.NONNULL; - - @Nonnull - Optional find(@Nonnull String individualId); - - @Nonnull - Iterator breadthFirst(); - - @Nonnull - Iterator depthFirst(); - - @Nonnull - Iterator inOrder(); - - @Nonnull - Iterator topologicalOrder(); - - @Override - @Nonnull - default Iterator iterator() { - return inOrder(); - } - - @Nonnull - default Stream breadthFirstStream() { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(breadthFirst(), SPLITERATOR_FLAGS), - false - ); - } - - @Nonnull - default Stream depthFirstStream() { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(depthFirst(), SPLITERATOR_FLAGS), - false - ); - } - - @Nonnull - default Stream inOrderStream() { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(inOrder(), SPLITERATOR_FLAGS), - false - ); - } - - @Nonnull - default Stream topologicalOrderStream() { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(topologicalOrder(), SPLITERATOR_FLAGS), - false - ); - } - - @Nonnull - default Stream stream() { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(iterator(), SPLITERATOR_FLAGS), - false - ); - } - - @Nonnull - default Stream parallelStream() { - return StreamSupport.stream( - Spliterators.spliteratorUnknownSize(iterator(), SPLITERATOR_FLAGS), - true - ); - } - - -} diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/package-info.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/package-info.java new file mode 100644 index 0000000..44de03f --- /dev/null +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.pedigree.model; diff --git a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/package-info.java b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/package-info.java index ae7433f..0476e27 100644 --- a/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/package-info.java +++ b/pedigree/src/main/java/org/pharmgkb/parsers/pedigree/package-info.java @@ -10,4 +10,4 @@ * * @author Douglas Myers-Turnbull */ -package org.pharmgkb.parsers.pedigree; \ No newline at end of file +package org.pharmgkb.parsers.pedigree; diff --git a/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeBuilderTest.java b/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeBuilderTest.java index cfaecf7..00f5558 100644 --- a/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeBuilderTest.java +++ b/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeBuilderTest.java @@ -14,7 +14,7 @@ */ public class PedigreeBuilderTest { - private static Family m_family; + private PedigreeFamily m_family; @BeforeEach public void setUp() throws Exception { @@ -36,27 +36,27 @@ public void setUp() throws Exception { * | * gen 5: {A5_ua} */ - private static Family buildFamily() { + private static PedigreeFamily buildFamily() { PedigreeBuilder builder = new PedigreeBuilder(true); - builder.add("f1", "A0_fb", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A0_ma", null, null, Sex.MALE, Collections.emptyList()); - builder.add("f1", "A0_fa", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A0_fc", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A0_f_", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A1_ma", "A0_ma", "A0_fa", Sex.MALE, Collections.emptyList()); - builder.add("f1", "A2_ma", "A1_ma", null, Sex.MALE, Collections.emptyList()); - builder.add("f1", "A3_fa", "A2_ma", "A0_fb", Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A3_fb", "A2_ma", "A0_fb", Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A3_fc", "A2_ma", "A0_fb", Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A3_ua", null, "A0_fb", Sex.UNKNOWN, Collections.emptyList()); - builder.add("f1", "A3_ma", "A2_ma", null, Sex.MALE, Collections.emptyList()); - builder.add("f1", "A4_ma", null, "A3_fc", Sex.MALE, Collections.emptyList()); - builder.add("f1", "A4_fa", "A3_ma", "A0_fc", Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A5_ua", "A4_ma", "A4_fa", Sex.UNKNOWN, Collections.emptyList()); - - Pedigree pedigree = builder.build(); + builder.add("f1", "A0_fb", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A0_ma", null, null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A0_fa", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A0_fc", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A0_f_", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A1_ma", "A0_ma", "A0_fa", PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A2_ma", "A1_ma", null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A3_fa", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A3_fb", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A3_fc", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A3_ua", null, "A0_fb", PedigreeSex.UNKNOWN, Collections.emptyList()); + builder.add("f1", "A3_ma", "A2_ma", null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A4_ma", null, "A3_fc", PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A4_fa", "A3_ma", "A0_fc", PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A5_ua", "A4_ma", "A4_fa", PedigreeSex.UNKNOWN, Collections.emptyList()); + + var pedigree = builder.build(); assertNotNull(pedigree); - Family family = pedigree.getFamily("f1"); + PedigreeFamily family = pedigree.getFamily("f1"); assertNotNull(family); return family; @@ -64,167 +64,167 @@ private static Family buildFamily() { @Test public void testRoots() { - Iterator roots = m_family.getRoots().iterator(); - assertEquals("A0_f_", roots.next().getId()); - assertEquals("A0_fa", roots.next().getId()); - assertEquals("A0_fb", roots.next().getId()); - assertEquals("A0_fc", roots.next().getId()); - assertEquals("A0_ma", roots.next().getId()); + Iterator roots = m_family.roots().iterator(); + assertEquals("A0_f_", roots.next().id()); + assertEquals("A0_fa", roots.next().id()); + assertEquals("A0_fb", roots.next().id()); + assertEquals("A0_fc", roots.next().id()); + assertEquals("A0_ma", roots.next().id()); assertFalse(roots.hasNext()); } @Test public void testBfs() { - Iterator breadthFirst = m_family.breadthFirst(); - assertEquals("A0_f_", breadthFirst.next().getId()); - assertEquals("A0_fa", breadthFirst.next().getId()); - assertEquals("A0_fb", breadthFirst.next().getId()); - assertEquals("A0_fc", breadthFirst.next().getId()); - assertEquals("A0_ma", breadthFirst.next().getId()); - assertEquals("A1_ma", breadthFirst.next().getId()); - assertEquals("A3_fa", breadthFirst.next().getId()); - assertEquals("A3_fb", breadthFirst.next().getId()); - assertEquals("A3_fc", breadthFirst.next().getId()); - assertEquals("A3_ua", breadthFirst.next().getId()); - assertEquals("A4_fa", breadthFirst.next().getId()); - assertEquals("A2_ma", breadthFirst.next().getId()); - assertEquals("A4_ma", breadthFirst.next().getId()); - assertEquals("A5_ua", breadthFirst.next().getId()); - assertEquals("A3_ma", breadthFirst.next().getId()); + Iterator breadthFirst = m_family.breadthFirstOrder().iterator(); + assertEquals("A0_f_", breadthFirst.next().id()); + assertEquals("A0_fa", breadthFirst.next().id()); + assertEquals("A0_fb", breadthFirst.next().id()); + assertEquals("A0_fc", breadthFirst.next().id()); + assertEquals("A0_ma", breadthFirst.next().id()); + assertEquals("A1_ma", breadthFirst.next().id()); + assertEquals("A3_fa", breadthFirst.next().id()); + assertEquals("A3_fb", breadthFirst.next().id()); + assertEquals("A3_fc", breadthFirst.next().id()); + assertEquals("A3_ua", breadthFirst.next().id()); + assertEquals("A4_fa", breadthFirst.next().id()); + assertEquals("A2_ma", breadthFirst.next().id()); + assertEquals("A4_ma", breadthFirst.next().id()); + assertEquals("A5_ua", breadthFirst.next().id()); + assertEquals("A3_ma", breadthFirst.next().id()); assertFalse(breadthFirst.hasNext()); } @Test public void testDfs() { - Iterator depthFirst = m_family.depthFirst(); - assertEquals("A0_f_", depthFirst.next().getId()); - assertEquals("A3_fa", depthFirst.next().getId()); - assertEquals("A3_fb", depthFirst.next().getId()); - assertEquals("A5_ua", depthFirst.next().getId()); - assertEquals("A4_ma", depthFirst.next().getId()); - assertEquals("A3_fc", depthFirst.next().getId()); - assertEquals("A4_fa", depthFirst.next().getId()); - assertEquals("A3_ma", depthFirst.next().getId()); - - assertEquals("A2_ma", depthFirst.next().getId()); - assertEquals("A1_ma", depthFirst.next().getId()); - assertEquals("A0_fa", depthFirst.next().getId()); - assertEquals("A3_ua", depthFirst.next().getId()); - assertEquals("A0_fb", depthFirst.next().getId()); - assertEquals("A0_fc", depthFirst.next().getId()); - assertEquals("A0_ma", depthFirst.next().getId()); + Iterator depthFirst = m_family.depthFirstOrder().iterator(); + assertEquals("A0_f_", depthFirst.next().id()); + assertEquals("A3_fa", depthFirst.next().id()); + assertEquals("A3_fb", depthFirst.next().id()); + assertEquals("A5_ua", depthFirst.next().id()); + assertEquals("A4_ma", depthFirst.next().id()); + assertEquals("A3_fc", depthFirst.next().id()); + assertEquals("A4_fa", depthFirst.next().id()); + assertEquals("A3_ma", depthFirst.next().id()); + + assertEquals("A2_ma", depthFirst.next().id()); + assertEquals("A1_ma", depthFirst.next().id()); + assertEquals("A0_fa", depthFirst.next().id()); + assertEquals("A3_ua", depthFirst.next().id()); + assertEquals("A0_fb", depthFirst.next().id()); + assertEquals("A0_fc", depthFirst.next().id()); + assertEquals("A0_ma", depthFirst.next().id()); assertFalse(depthFirst.hasNext()); } @Test public void testInOrder() { SortedSet set = new TreeSet<>(); - m_family.forEach(s -> set.add(s.getId())); - Iterator inOrder = m_family.inOrder(); + m_family.forEach(s -> set.add(s.id())); + Iterator inOrder = m_family.lexigraphicalOrder().iterator(); for (String s : set) { - assertEquals(s, inOrder.next().getId()); + assertEquals(s, inOrder.next().id()); } assertFalse(inOrder.hasNext()); } @Test public void testTopologicalOrder() { - Iterator topologicalOrder = m_family.topologicalOrder(); - assertEquals("A0_f_", topologicalOrder.next().getId()); - assertEquals("A0_fa", topologicalOrder.next().getId()); - assertEquals("A0_fb", topologicalOrder.next().getId()); - assertEquals("A0_fc", topologicalOrder.next().getId()); - assertEquals("A0_ma", topologicalOrder.next().getId()); - assertEquals("A3_ua", topologicalOrder.next().getId()); - assertEquals("A1_ma", topologicalOrder.next().getId()); - assertEquals("A2_ma", topologicalOrder.next().getId()); - assertEquals("A3_fa", topologicalOrder.next().getId()); - assertEquals("A3_fb", topologicalOrder.next().getId()); - assertEquals("A3_fc", topologicalOrder.next().getId()); - assertEquals("A3_ma", topologicalOrder.next().getId()); - assertEquals("A4_ma", topologicalOrder.next().getId()); - assertEquals("A4_fa", topologicalOrder.next().getId()); - assertEquals("A5_ua", topologicalOrder.next().getId()); + Iterator topologicalOrder = m_family.topologicalOrder().iterator(); + assertEquals("A0_f_", topologicalOrder.next().id()); + assertEquals("A0_fa", topologicalOrder.next().id()); + assertEquals("A0_fb", topologicalOrder.next().id()); + assertEquals("A0_fc", topologicalOrder.next().id()); + assertEquals("A0_ma", topologicalOrder.next().id()); + assertEquals("A3_ua", topologicalOrder.next().id()); + assertEquals("A1_ma", topologicalOrder.next().id()); + assertEquals("A2_ma", topologicalOrder.next().id()); + assertEquals("A3_fa", topologicalOrder.next().id()); + assertEquals("A3_fb", topologicalOrder.next().id()); + assertEquals("A3_fc", topologicalOrder.next().id()); + assertEquals("A3_ma", topologicalOrder.next().id()); + assertEquals("A4_ma", topologicalOrder.next().id()); + assertEquals("A4_fa", topologicalOrder.next().id()); + assertEquals("A5_ua", topologicalOrder.next().id()); assertFalse(topologicalOrder.hasNext()); } @Test public void buildFamilyOutOfOrder() { PedigreeBuilder builder = new PedigreeBuilder(false); - builder.add("f1", "A3_fb", "A2_ma", "A0_fb", Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A0_fc", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A2_ma", "A1_ma", null, Sex.MALE, Collections.emptyList()); - builder.add("f1", "A0_f_", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A4_fa", "A3_ma", "A0_fc", Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A1_ma", "A0_ma", "A0_fa", Sex.MALE, Collections.emptyList()); - builder.add("f1", "A3_ma", "A2_ma", null, Sex.MALE, Collections.emptyList()); - builder.add("f1", "A0_fa", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A0_fb", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A4_ma", null, "A3_fc", Sex.MALE, Collections.emptyList()); - builder.add("f1", "A5_ua", "A4_ma", "A4_fa", Sex.UNKNOWN, Collections.emptyList()); - builder.add("f1", "A3_fa", "A2_ma", "A0_fb", Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A3_ua", null, "A0_fb", Sex.UNKNOWN, Collections.emptyList()); - builder.add("f1", "A0_ma", null, null, Sex.MALE, Collections.emptyList()); - builder.add("f1", "A3_fc", "A2_ma", "A0_fb", Sex.FEMALE, Collections.emptyList()); - - Pedigree pedigree = builder.build(); // this is really the check + builder.add("f1", "A3_fb", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A0_fc", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A2_ma", "A1_ma", null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A0_f_", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A4_fa", "A3_ma", "A0_fc", PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A1_ma", "A0_ma", "A0_fa", PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A3_ma", "A2_ma", null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A0_fa", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A0_fb", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A4_ma", null, "A3_fc", PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A5_ua", "A4_ma", "A4_fa", PedigreeSex.UNKNOWN, Collections.emptyList()); + builder.add("f1", "A3_fa", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A3_ua", null, "A0_fb", PedigreeSex.UNKNOWN, Collections.emptyList()); + builder.add("f1", "A0_ma", null, null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A3_fc", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.emptyList()); + + var pedigree = builder.build(); // this is really the check assertNotNull(pedigree); - Family family = pedigree.getFamily("f1"); + PedigreeFamily family = pedigree.getFamily("f1"); assertNotNull(family); } @Test public void testFind1() { - Optional found = m_family.find("A2_ma"); + Optional found = m_family.find("A2_ma"); assertTrue(found.isPresent()); - assertEquals("A2_ma", found.get().getId()); - assertEquals(Sex.MALE, found.get().getSex()); + assertEquals("A2_ma", found.get().id()); + assertEquals(PedigreeSex.MALE, found.get().sex()); } @Test public void testFind2() { - Optional found = m_family.find("A5_ua"); + Optional found = m_family.find("A5_ua"); assertTrue(found.isPresent()); - assertEquals("A5_ua", found.get().getId()); - assertTrue(found.get().getChildren().isEmpty()); - assertEquals(Sex.UNKNOWN, found.get().getSex()); + assertEquals("A5_ua", found.get().id()); + assertTrue(found.get().children().isEmpty()); + assertEquals(PedigreeSex.UNKNOWN, found.get().sex()); } @Test public void testTwoFamilies() { PedigreeBuilder builder = new PedigreeBuilder(true); - builder.add("f1", "A0_ma", null, null, Sex.MALE, Collections.emptyList()); - builder.add("f1", "A0_fa", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f1", "A1_fa", "A0_ma", "A0_fa", Sex.FEMALE, Collections.emptyList()); + builder.add("f1", "A0_ma", null, null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f1", "A0_fa", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f1", "A1_fa", "A0_ma", "A0_fa", PedigreeSex.FEMALE, Collections.emptyList()); - builder.add("f2", "B0_ma", null, null, Sex.MALE, Collections.emptyList()); - builder.add("f2", "B0_fa", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f2", "B1_fa", "B0_ma", "B0_fa", Sex.FEMALE, Collections.emptyList()); + builder.add("f2", "B0_ma", null, null, PedigreeSex.MALE, Collections.emptyList()); + builder.add("f2", "B0_fa", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f2", "B1_fa", "B0_ma", "B0_fa", PedigreeSex.FEMALE, Collections.emptyList()); - Pedigree pedigree = builder.build(); + var pedigree = builder.build(); assertNotNull(pedigree); - Family family1 = pedigree.getFamily("f1"); + PedigreeFamily family1 = pedigree.getFamily("f1"); assertNotNull(family1); - Iterator depthFirst1 = family1.depthFirst(); - assertEquals("A1_fa", depthFirst1.next().getId()); - assertEquals("A0_fa", depthFirst1.next().getId()); - assertEquals("A0_ma", depthFirst1.next().getId()); + Iterator depthFirst1 = family1.depthFirstOrder().iterator(); + assertEquals("A1_fa", depthFirst1.next().id()); + assertEquals("A0_fa", depthFirst1.next().id()); + assertEquals("A0_ma", depthFirst1.next().id()); - Family family2 = pedigree.getFamily("f2"); + PedigreeFamily family2 = pedigree.getFamily("f2"); assertNotNull(family2); - Iterator depthFirst2 = family2.depthFirst(); - assertEquals("B1_fa", depthFirst2.next().getId()); - assertEquals("B0_fa", depthFirst2.next().getId()); - assertEquals("B0_ma", depthFirst2.next().getId()); + Iterator depthFirst2 = family2.depthFirstOrder().iterator(); + assertEquals("B1_fa", depthFirst2.next().id()); + assertEquals("B0_fa", depthFirst2.next().id()); + assertEquals("B0_ma", depthFirst2.next().id()); } @Test public void testMissingFather() { PedigreeBuilder builder = new PedigreeBuilder(true); - builder.add("f1", "A0_fa", "asdf", null, Sex.FEMALE, Collections.emptyList()); // this is ok + builder.add("f1", "A0_fa", "asdf", null, PedigreeSex.FEMALE, Collections.emptyList()); // this is ok IllegalArgumentException e = assertThrows(IllegalArgumentException.class, builder::build); assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("father")); assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("does not exist")); @@ -233,7 +233,7 @@ public void testMissingFather() { @Test public void testMissingMother() { PedigreeBuilder builder = new PedigreeBuilder(true); - builder.add("f1", "A0_fa", null, "asdf", Sex.FEMALE, Collections.emptyList()); // this is ok + builder.add("f1", "A0_fa", null, "asdf", PedigreeSex.FEMALE, Collections.emptyList()); // this is ok IllegalArgumentException e = assertThrows(IllegalArgumentException.class, builder::build); assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("mother")); assertTrue(e.getMessage().toLowerCase(Locale.ROOT).contains("does not exist")); @@ -242,8 +242,8 @@ public void testMissingMother() { @Test public void testDifferentFamilies() { PedigreeBuilder builder = new PedigreeBuilder(true); - builder.add("f1", "A0_fa", null, null, Sex.FEMALE, Collections.emptyList()); - builder.add("f2", "B0_fa", null, "A0_fa", Sex.FEMALE, Collections.emptyList()); // this is ok + builder.add("f1", "A0_fa", null, null, PedigreeSex.FEMALE, Collections.emptyList()); + builder.add("f2", "B0_fa", null, "A0_fa", PedigreeSex.FEMALE, Collections.emptyList()); // this is ok assertThrows(IllegalArgumentException.class, builder::build); } -} \ No newline at end of file +} diff --git a/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeParserTest.java b/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeParserTest.java index 7e855ba..3a049e8 100644 --- a/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeParserTest.java +++ b/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeParserTest.java @@ -1,9 +1,8 @@ package org.pharmgkb.parsers.pedigree; import org.junit.jupiter.api.Test; -import org.pharmgkb.parsers.pedigree.model.Family; -import org.pharmgkb.parsers.pedigree.model.Individual; -import org.pharmgkb.parsers.pedigree.model.Pedigree; +import org.pharmgkb.parsers.pedigree.model.PedigreeFamily; +import org.pharmgkb.parsers.pedigree.model.PedigreeIndividual; import java.nio.file.Files; import java.nio.file.Path; @@ -45,34 +44,34 @@ public class PedigreeParserTest { public void testWithoutData() throws Exception { Path file = Paths.get(getClass().getResource("without_data.ped").toURI()); - PedigreeParser parser = new PedigreeParser.Builder().build(); - Pedigree pedigree = parser.apply(Files.lines(file)); - assertEquals(1, pedigree.getFamilies().size()); - Family family = pedigree.getFamily("1"); + PedigreeParserI parser = new PedigreeParser.Builder().build(); + var pedigree = parser.apply(Files.lines(file)); + assertEquals(1, pedigree.families().size()); + PedigreeFamily family = pedigree.getFamily("1"); assertNotNull(family); - Iterator roots = family.getRoots().iterator(); - assertEquals("1", roots.next().getId()); - assertEquals("2", roots.next().getId()); - assertEquals("3", roots.next().getId()); + Iterator roots = family.roots().iterator(); + assertEquals("1", roots.next().id()); + assertEquals("2", roots.next().id()); + assertEquals("3", roots.next().id()); assertFalse(roots.hasNext()); - Optional three = family.find("3"); - Optional four = family.find("4"); - Optional five = family.find("5"); - Optional six = family.find("6"); + Optional three = family.find("3"); + Optional four = family.find("4"); + Optional five = family.find("5"); + Optional six = family.find("6"); assertTrue(three.isPresent()); assertTrue(four.isPresent()); assertTrue(five.isPresent()); assertTrue(six.isPresent()); - assertEquals(four, five.get().getMother()); - assertEquals(three, five.get().getFather()); - assertTrue(three.get().getChildren().contains(five.get())); - assertTrue(three.get().getChildren().contains(six.get())); - assertTrue(four.get().getChildren().contains(five.get())); - assertTrue(four.get().getChildren().contains(six.get())); + assertEquals(four, five.get().mother()); + assertEquals(three, five.get().father()); + assertTrue(three.get().children().contains(five.get())); + assertTrue(three.get().children().contains(six.get())); + assertTrue(four.get().children().contains(five.get())); + assertTrue(four.get().children().contains(six.get())); } @@ -98,14 +97,14 @@ public void testWithoutData() throws Exception { public void testWithData() throws Exception { Path file = Paths.get(getClass().getResource("with_data.ped").toURI()); - PedigreeParser parser = new PedigreeParser.Builder().build(); - Pedigree pedigree = parser.apply(Files.lines(file)); - Family family = pedigree.getFamily("1"); + var parser = new PedigreeParser.Builder().build(); + var pedigree = parser.apply(Files.lines(file)); + PedigreeFamily family = pedigree.getFamily("1"); assertNotNull(family); - Optional six = family.find("6"); + Optional six = family.find("6"); assertTrue(six.isPresent()); List expected6 = Arrays.asList("4.321", "2", "4", "2", "2"); - assertEquals(expected6, six.get().getInfo()); + assertEquals(expected6, six.get().info()); } -} \ No newline at end of file +} diff --git a/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeWriterTest.java b/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeWriterTest.java index 7efe180..205bf5c 100644 --- a/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeWriterTest.java +++ b/pedigree/src/test/java/org/pharmgkb/parsers/pedigree/PedigreeWriterTest.java @@ -1,17 +1,14 @@ package org.pharmgkb.parsers.pedigree; import org.junit.jupiter.api.Test; -import org.pharmgkb.parsers.pedigree.model.Pedigree; import org.pharmgkb.parsers.pedigree.model.PedigreeBuilder; -import org.pharmgkb.parsers.pedigree.model.Sex; +import org.pharmgkb.parsers.pedigree.model.PedigreeSex; import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.stream.Collectors; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link PedigreeWriter}. @@ -23,27 +20,27 @@ public class PedigreeWriterTest { public void testWrite() { PedigreeBuilder builder = new PedigreeBuilder(true); - builder.add("f1", "A0_fb", null, null, Sex.FEMALE, Collections.singletonList("no disease")); - builder.add("f1", "A0_ma", null, null, Sex.MALE, Collections.singletonList("disease")); - builder.add("f1", "A0_fa", null, null, Sex.FEMALE, Collections.singletonList("disease")); - builder.add("f1", "A0_fc", null, null, Sex.FEMALE, Collections.singletonList("disease")); - builder.add("f1", "A0_f_", null, null, Sex.FEMALE, Collections.singletonList("disease")); - builder.add("f1", "A1_ma", "A0_ma", "A0_fa", Sex.MALE, Collections.singletonList("disease")); - builder.add("f1", "A2_ma", "A1_ma", null, Sex.MALE, Collections.singletonList("disease")); - builder.add("f1", "A3_fa", "A2_ma", "A0_fb", Sex.FEMALE, Collections.singletonList("disease")); - builder.add("f1", "A3_fb", "A2_ma", "A0_fb", Sex.FEMALE, Collections.singletonList("disease")); - builder.add("f1", "A3_fc", "A2_ma", "A0_fb", Sex.FEMALE, Collections.singletonList("disease")); - builder.add("f1", "A3_ua", null, "A0_fb", Sex.UNKNOWN, Collections.singletonList("disease")); - builder.add("f1", "A3_ma", "A2_ma", null, Sex.MALE, Collections.singletonList("disease")); - builder.add("f1", "A4_ma", null, "A3_fc", Sex.MALE, Collections.singletonList("disease")); - builder.add("f1", "A4_fa", "A3_ma", "A0_fc", Sex.FEMALE, Collections.singletonList("disease")); - builder.add("f1", "A5_ua", "A4_ma", "A4_fa", Sex.UNKNOWN, Arrays.asList("disease", "red hair")); + builder.add("f1", "A0_fb", null, null, PedigreeSex.FEMALE, Collections.singletonList("no disease")); + builder.add("f1", "A0_ma", null, null, PedigreeSex.MALE, Collections.singletonList("disease")); + builder.add("f1", "A0_fa", null, null, PedigreeSex.FEMALE, Collections.singletonList("disease")); + builder.add("f1", "A0_fc", null, null, PedigreeSex.FEMALE, Collections.singletonList("disease")); + builder.add("f1", "A0_f_", null, null, PedigreeSex.FEMALE, Collections.singletonList("disease")); + builder.add("f1", "A1_ma", "A0_ma", "A0_fa", PedigreeSex.MALE, Collections.singletonList("disease")); + builder.add("f1", "A2_ma", "A1_ma", null, PedigreeSex.MALE, Collections.singletonList("disease")); + builder.add("f1", "A3_fa", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.singletonList("disease")); + builder.add("f1", "A3_fb", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.singletonList("disease")); + builder.add("f1", "A3_fc", "A2_ma", "A0_fb", PedigreeSex.FEMALE, Collections.singletonList("disease")); + builder.add("f1", "A3_ua", null, "A0_fb", PedigreeSex.UNKNOWN, Collections.singletonList("disease")); + builder.add("f1", "A3_ma", "A2_ma", null, PedigreeSex.MALE, Collections.singletonList("disease")); + builder.add("f1", "A4_ma", null, "A3_fc", PedigreeSex.MALE, Collections.singletonList("disease")); + builder.add("f1", "A4_fa", "A3_ma", "A0_fc", PedigreeSex.FEMALE, Collections.singletonList("disease")); + builder.add("f1", "A5_ua", "A4_ma", "A4_fa", PedigreeSex.UNKNOWN, Arrays.asList("disease", "red hair")); - Pedigree pedigree = builder.build(); + var pedigree = builder.build(); assertNotNull(pedigree); - List lines = new PedigreeWriter.Builder().build().apply(pedigree).collect(Collectors.toList()); + List lines = new PedigreeWriter.Builder().build().apply(pedigree).toList(); assertEquals(15, lines.size()); assertEquals("f1\tA0_f_\t0\t0\t2\tdisease", lines.get(0)); assertEquals("f1\tA5_ua\tA4_ma\tA4_fa\t2\tdisease\tred hair", lines.get(14)); } -} \ No newline at end of file +} diff --git a/settings.gradle b/settings.gradle index 04fede6..fbce9bd 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,4 +1,3 @@ -rootProject.name = 'genomics-io' - -include 'core', 'bed', 'gff', 'fasta', 'pedigree', 'chain', 'vcf', 'genbank', 'turtle', 'bgee', 'text' +rootProject.name = 'bioio' +include 'core', 'bed', 'gff', 'fasta', 'pedigree', 'pdb', 'chain', 'vcf', 'genbank', 'turtle', 'bgee', 'text' diff --git a/text/build.gradle b/text/build.gradle index c0612d3..75740b8 100644 --- a/text/build.gradle +++ b/text/build.gradle @@ -1,3 +1,3 @@ dependencies { - compile project(':core') -} \ No newline at end of file + api project(':core') +} diff --git a/text/lombok.config b/text/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/text/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/text/src/main/java/MatrixParser.java b/text/src/main/java/org/pharmgkb/parsers/text/MatrixParser.java similarity index 87% rename from text/src/main/java/MatrixParser.java rename to text/src/main/java/org/pharmgkb/parsers/text/MatrixParser.java index 46c9bf2..8800cc6 100644 --- a/text/src/main/java/MatrixParser.java +++ b/text/src/main/java/org/pharmgkb/parsers/text/MatrixParser.java @@ -1,7 +1,8 @@ +package org.pharmgkb.parsers.text; + import com.google.common.base.MoreObjects; import com.google.common.base.Splitter; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineParser; import org.pharmgkb.parsers.ObjectBuilder; import org.pharmgkb.parsers.utils.ReflectingConstructor; import org.slf4j.Logger; @@ -30,7 +31,7 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class MatrixParser implements LineParser> { +public class MatrixParser implements MatrixParserI { private static final long sf_logEvery = 10000; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -43,9 +44,9 @@ public class MatrixParser implements LineParser> { private final Splitter m_splitter; private final Set m_lengths; - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); - private MatrixParser(@Nonnull Builder builder) { + protected MatrixParser(@Nonnull Builder builder) { m_converter = builder.m_converter; m_delimiter = builder.m_delimiter; m_lineExtractor = builder.m_lineExtractor; @@ -57,7 +58,8 @@ private MatrixParser(@Nonnull Builder builder) { @Nonnull @Override - public Stream> parseAll(@Nonnull Stream stream) throws UncheckedIOException, BadDataFormatException { + public Stream> parseAll(@Nonnull Stream stream) + throws UncheckedIOException, BadDataFormatException { return stream.map(this); } @@ -116,9 +118,9 @@ public String toString() { } @NotThreadSafe - public static class Builder implements ObjectBuilder> { + public static class Builder implements ObjectBuilder> { - private static final Pattern sf_bracketed = Pattern.compile(" *^[\\[{(]?([^]})]*)[]})]? *$"); + private static final Pattern sf_bracketed = Pattern.compile("^ *[\\[{(]?([^]})]*)[]})]? *$"); private static final Pattern sf_quoted = Pattern.compile("^ *[\"']?([^\"']*)[\"']? *$"); private final Function m_converter; @@ -157,7 +159,9 @@ public Builder setDelimiter(@Nonnull String delimiter) { @Nonnull public Builder setLineExtractor(@Nonnull Pattern regexWithGroup1) { if (regexWithGroup1.matcher("").groupCount() != 1) { - throw new IllegalArgumentException("Line extractor " + regexWithGroup1 + " should have exactly 1 capture group"); + throw new IllegalArgumentException( + "Line extractor " + regexWithGroup1 + " should have exactly 1 capture group" + ); } m_lineExtractor = regexWithGroup1; return this; @@ -166,7 +170,9 @@ public Builder setLineExtractor(@Nonnull Pattern regexWithGroup1) { @Nonnull public Builder setValueExtractor(@Nonnull Pattern regexWithGroup1) { if (regexWithGroup1.matcher("").groupCount() != 1) { - throw new IllegalArgumentException("Value extractor " + regexWithGroup1 + " should have exactly 1 capture group"); + throw new IllegalArgumentException( + "Value extractor " + regexWithGroup1 + " should have exactly 1 capture group" + ); } m_valueExtractor = regexWithGroup1; return this; @@ -180,10 +186,8 @@ public Builder allowJagged(@Nonnull Pattern regexWithGroup1) { @Nonnull @Override - public MatrixParser build() { + public MatrixParserI build() { return new MatrixParser<>(this); } - } } - diff --git a/text/src/main/java/org/pharmgkb/parsers/text/MatrixParserI.java b/text/src/main/java/org/pharmgkb/parsers/text/MatrixParserI.java new file mode 100644 index 0000000..1eef9e6 --- /dev/null +++ b/text/src/main/java/org/pharmgkb/parsers/text/MatrixParserI.java @@ -0,0 +1,9 @@ +package org.pharmgkb.parsers.text; + +import org.pharmgkb.parsers.LineParser; + +import java.util.List; + +public interface MatrixParserI extends LineParser> { + +} diff --git a/text/src/main/java/MatrixWriter.java b/text/src/main/java/org/pharmgkb/parsers/text/MatrixWriter.java similarity index 68% rename from text/src/main/java/MatrixWriter.java rename to text/src/main/java/org/pharmgkb/parsers/text/MatrixWriter.java index eeccb4e..0bcb789 100644 --- a/text/src/main/java/MatrixWriter.java +++ b/text/src/main/java/org/pharmgkb/parsers/text/MatrixWriter.java @@ -1,7 +1,7 @@ +package org.pharmgkb.parsers.text; import com.google.common.base.MoreObjects; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineWriter; import org.pharmgkb.parsers.ObjectBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,7 +22,7 @@ */ @SuppressWarnings("ALL") @ThreadSafe -public class MatrixWriter implements LineWriter> { +public class MatrixWriter implements MatrixWriterI { private static final long sf_logEvery = 10000; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -36,14 +36,14 @@ public class MatrixWriter implements LineWriter> { private final String m_valueSuffix; private final boolean m_jaggedDimensions; - public MatrixWriter(@Nonnull Builder builder) { - this.m_converter = builder.m_converter; - this.m_delimiter = builder.m_delimiter; - this.m_valuePrefix = builder.m_valuePrefix; - this.m_valueSuffix = builder.m_valueSuffix; - this.m_linePrefix = builder.m_linePrefix; - this.m_lineSuffix = builder.m_lineSuffix; - this.m_jaggedDimensions = builder.m_jaggedDimensions; + protected MatrixWriter(@Nonnull Builder builder) { + m_converter = builder.m_converter; + m_delimiter = builder.m_delimiter; + m_valuePrefix = builder.m_valuePrefix; + m_valueSuffix = builder.m_valueSuffix; + m_linePrefix = builder.m_linePrefix; + m_lineSuffix = builder.m_lineSuffix; + m_jaggedDimensions = builder.m_jaggedDimensions; } @Nonnull @@ -52,8 +52,8 @@ public String apply(@Nonnull List row) { if (m_lineNumber.incrementAndGet() % sf_logEvery == 0) { sf_logger.debug("Writing line #{}", m_lineNumber); } - List strings = row.stream().map(this.m_converter).collect(Collectors.toList()); - List bad = (strings.stream().filter(this.m_delimiter::contains).collect(Collectors.toList())); + List strings = row.stream().map(m_converter).collect(Collectors.toList()); + List bad = (strings.stream().filter(m_delimiter::contains).collect(Collectors.toList())); if (bad.size() > 0 && m_valueSuffix.isEmpty() && m_valuePrefix.isEmpty()) { throw new BadDataFormatException("Values contain the delimiters: " + String.join(",", bad)); } else if (bad.size() > 0) { @@ -61,8 +61,8 @@ public String apply(@Nonnull List row) { } // don't trim; let them do it String values = row.stream() - .map(s -> this.m_valuePrefix + s + this.m_valueSuffix) - .collect(Collectors.joining(this.m_delimiter)); + .map(s -> m_valuePrefix + s + m_valueSuffix) + .collect(Collectors.joining(m_delimiter)); return m_linePrefix + values + m_lineSuffix; } @@ -85,7 +85,7 @@ public String toString() { } @NotThreadSafe - public static class Builder implements ObjectBuilder> { + public static class Builder implements ObjectBuilder> { private Function m_converter; private String m_delimiter; @@ -96,43 +96,43 @@ public static class Builder implements ObjectBuilder> { private boolean m_jaggedDimensions; public Builder() { - this.m_converter = v -> v.toString().trim(); - this.m_delimiter = "\t"; - this.m_linePrefix = ""; - this.m_lineSuffix = ""; - this.m_valuePrefix = ""; - this.m_valueSuffix = ""; - this.m_jaggedDimensions = false; + m_converter = v -> v.toString().trim(); + m_delimiter = "\t"; + m_linePrefix = ""; + m_lineSuffix = ""; + m_valuePrefix = ""; + m_valueSuffix = ""; + m_jaggedDimensions = false; } @Nonnull - public MatrixWriter.Builder setDelimiter(String delimiter) { + public Builder setDelimiter(String delimiter) { m_delimiter = delimiter; return this; } @Nonnull - public MatrixWriter.Builder setConverter(Function converter) { + public Builder setConverter(Function converter) { m_converter = converter; return this; } @Nonnull - public MatrixWriter.Builder encloseLine(String prefix, String suffix) { + public Builder encloseLine(String prefix, String suffix) { m_linePrefix = prefix; m_lineSuffix = suffix; return this; } @Nonnull - public MatrixWriter.Builder encloseValue(String prefix, String suffix) { + public Builder encloseValue(String prefix, String suffix) { m_valuePrefix = prefix; m_valueSuffix = suffix; return this; } @Nonnull - public MatrixWriter.Builder allowJagged(Pattern regexWithGroup1) { + public Builder allowJagged(Pattern regexWithGroup1) { m_jaggedDimensions = true; return this; } @@ -143,4 +143,4 @@ public MatrixWriter build() { return new MatrixWriter(this); } } -} \ No newline at end of file +} diff --git a/text/src/main/java/org/pharmgkb/parsers/text/MatrixWriterI.java b/text/src/main/java/org/pharmgkb/parsers/text/MatrixWriterI.java new file mode 100644 index 0000000..0b3c09c --- /dev/null +++ b/text/src/main/java/org/pharmgkb/parsers/text/MatrixWriterI.java @@ -0,0 +1,9 @@ +package org.pharmgkb.parsers.text; + +import org.pharmgkb.parsers.LineWriter; + +import java.util.List; + +public interface MatrixWriterI extends LineWriter> { + +} diff --git a/text/src/main/java/org/pharmgkb/parsers/text/package-info.java b/text/src/main/java/org/pharmgkb/parsers/text/package-info.java new file mode 100644 index 0000000..768a949 --- /dev/null +++ b/text/src/main/java/org/pharmgkb/parsers/text/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.text; diff --git a/text/src/test/java/MatrixParserTest.java b/text/src/test/java/MatrixParserTest.java deleted file mode 100644 index 2dcb063..0000000 --- a/text/src/test/java/MatrixParserTest.java +++ /dev/null @@ -1,3 +0,0 @@ -public class MatrixParserTest { - -} diff --git a/text/src/test/java/MatrixWriterTest.java b/text/src/test/java/MatrixWriterTest.java deleted file mode 100644 index 9b61100..0000000 --- a/text/src/test/java/MatrixWriterTest.java +++ /dev/null @@ -1,2 +0,0 @@ -public class MatrixWriterTest { -} diff --git a/text/src/test/java/org/pharmgkb/parsers/text/MatrixParserTest.java b/text/src/test/java/org/pharmgkb/parsers/text/MatrixParserTest.java new file mode 100644 index 0000000..3cff80e --- /dev/null +++ b/text/src/test/java/org/pharmgkb/parsers/text/MatrixParserTest.java @@ -0,0 +1,5 @@ +package org.pharmgkb.parsers.text; + +public class MatrixParserTest { + +} diff --git a/text/src/test/java/org/pharmgkb/parsers/text/MatrixWriterTest.java b/text/src/test/java/org/pharmgkb/parsers/text/MatrixWriterTest.java new file mode 100644 index 0000000..a5768d1 --- /dev/null +++ b/text/src/test/java/org/pharmgkb/parsers/text/MatrixWriterTest.java @@ -0,0 +1,4 @@ +package org.pharmgkb.parsers.text; + +public class MatrixWriterTest { +} diff --git a/turtle/build.gradle b/turtle/build.gradle index 74caa33..da07db7 100644 --- a/turtle/build.gradle +++ b/turtle/build.gradle @@ -1,4 +1,4 @@ dependencies { - compile project(':core') -} \ No newline at end of file + api project(':core') +} diff --git a/turtle/lombok.config b/turtle/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/turtle/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/ChemblRdfResource.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/ChemblRdfResource.java deleted file mode 100644 index 925c320..0000000 --- a/turtle/src/main/java/org/pharmgkb/parsers/turtle/ChemblRdfResource.java +++ /dev/null @@ -1,47 +0,0 @@ -package org.pharmgkb.parsers.turtle; - -import org.pharmgkb.parsers.WebResource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.lang.invoke.MethodHandles; -import java.nio.file.Path; -import java.util.Optional; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - - -@Immutable -public class ChemblRdfResource extends WebResource { - - private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static final Pattern versionPattern = Pattern.compile("([0-9]{2})\\.([0-9])"); - public static final Set KNOWN_VERSIONS = Set.of( - "16.0", "17.0", "18.0", "19.0", "20.0", "21.0", "22.0", "22.1", "23.0", "24.0", "24.1", "25.0", "26.0" - ); - public static final Set KNOWN_TYPES = Set.of( - "activity", "assay", "bindingsite", "biocmpt", "cellline", "complextarget_targetcmpt_ls", - "document", "grouptarget_targetcmpt_ls", "indication", "journal", "moa", "molecule", - "molecule_chebi_ls", "molhierarchy", "protclass", "singletarget_targetcmpt_ls", - "source", "target", "targetcmpt", "targetcmpt_uniprot_ls", "targetrel", "unichem" - ); - - protected ChemblRdfResource(@Nonnull String url, @Nonnull Optional cachePath) { - super(url, true, cachePath); - } - - public ChemblRdfResource of(@Nonnull String type, @Nonnull String version) { - Matcher matcher = versionPattern.matcher(version); - if (!matcher.matches() || (Integer.parseInt(matcher.group(1)) < 26 && !ChemblRdfResource.KNOWN_VERSIONS.contains(version))) { - sf_logger.warn("Probable invalid ChEMBL RDF version # {}", version); - } - // TODO damn, this is FTP! - String url = "http://ftp.ebi.ac.uk/pub/databases/chembl/ChEMBL-RDF/$1/chembl_$1_$2.ttl.gz" - .replace("$1", version).replace("$2", type); - return new ChemblRdfResource(url, Optional.empty()); - } - -} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParser.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParser.java index 8df2e00..3b17d79 100644 --- a/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParser.java +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParser.java @@ -1,10 +1,9 @@ package org.pharmgkb.parsers.turtle; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.MultilineParser; -import org.pharmgkb.parsers.turtle.model.Node; -import org.pharmgkb.parsers.turtle.model.Prefix; -import org.pharmgkb.parsers.turtle.model.Triple; +import org.pharmgkb.parsers.turtle.model.TurtleNode; +import org.pharmgkb.parsers.turtle.model.TurtlePrefix; +import org.pharmgkb.parsers.turtle.model.TurtleTriple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,7 +26,7 @@ * @author Douglas Myers-Turnbull */ @NotThreadSafe -public class TurtleParser implements MultilineParser { +public class TurtleParser implements TurtleParserI { private static final long sf_logEvery = 10000; private static final Pattern sf_prefixPattern = @@ -37,18 +36,18 @@ public class TurtleParser implements MultilineParser { private static final Pattern sf_nodePattern = Pattern.compile("[<\"]?([^<\"]+)[>\"]?(?:@([A-Za-z0-9\\-_:]+))?(?:\\^{2}([A-Za-z0-9\\-_:]+))?"); private static final Pattern sf_xPattern = - Pattern.compile("([<\"]?(?:[^<\"]+)[>\"]?(?:@(?:[A-Za-z0-9\\-_:]+))?(?:\\^{2}(?:[A-Za-z0-9\\-_:]+))?)"); + Pattern.compile("([<\"]?[^<\"]+[>\"]?(?:@[A-Za-z0-9\\-_:]+)?(?:\\^{2}[A-Za-z0-9\\-_:]+)?)"); private static final Pattern sf_triplePattern = - Pattern.compile("^[ ]*" + sf_xPattern.pattern() + "[ ]+" + sf_xPattern + "[ ]+" + sf_xPattern + "[ ]*[;.]$"); + Pattern.compile("^ *" + sf_xPattern.pattern() + " +" + sf_xPattern + " +" + sf_xPattern + " *[;.]$"); private static final Pattern sf_doublePattern = - Pattern.compile("^[ ]*" + sf_xPattern + "[ ]+" + sf_xPattern + "[ ]*[;.]$"); + Pattern.compile("^ *" + sf_xPattern + " +" + sf_xPattern + " *[;.]$"); private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private final boolean m_usePrefixes; private final AtomicLong m_lineNumber; - private final Map m_prefixes; - private final AtomicReference m_subject; + private final Map m_prefixes; + private final AtomicReference m_subject; public TurtleParser() { this(true); @@ -62,31 +61,30 @@ public TurtleParser(boolean usePrefixes) { @Nonnull @Override - public Stream parseAll(@Nonnull Stream stream) throws BadDataFormatException { + public Stream parseAll(@Nonnull Stream stream) throws BadDataFormatException { return stream.flatMap(this); } @Nonnull @Override - public Stream apply(@Nonnull String line) { + public Stream apply(@Nonnull String line) { try { m_lineNumber.addAndGet(1); - //noinspection AssignmentToMethodParameter line = line.trim(); // NOTE! if (line.isEmpty() || line.startsWith("#")) { return Stream.empty(); } if (line.startsWith("@prefix")) { if (m_usePrefixes) { - Prefix prefix = parsePrefix(line); - m_prefixes.put(prefix.getPrefix(), prefix); + TurtlePrefix prefix = parsePrefix(line); + m_prefixes.put(prefix.prefix(), prefix); } return Stream.empty(); } - @Nullable Node subject = m_subject.get(); - @Nonnull Triple triple = (subject == null) ? parseTriple(line) : parsePartTriple(line, subject); + @Nullable TurtleNode subject = m_subject.get(); + @Nonnull TurtleTriple triple = subject == null ? parseTriple(line) : parsePartTriple(line, subject); if (line.endsWith(";")) { - m_subject.set(triple.getSubject()); + m_subject.set(triple.subject()); } else if (line.endsWith(".")) { m_subject.set(null); } else { @@ -98,7 +96,7 @@ public Stream apply(@Nonnull String line) { throw new BadDataFormatException("Couldn't parse line #" + m_lineNumber, e); } catch (RuntimeException e) { // this is a little weird, but it's helpful - // not that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. + // note that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. e.addSuppressed(new RuntimeException("Unexpectedly failed to parse line " + m_lineNumber)); throw e; } @@ -109,48 +107,49 @@ public long nLinesProcessed() { return m_lineNumber.get(); } - @Nonnull - public Map getPrefixes() { + @Override + @Nonnull + public Map prefixes() { return Collections.unmodifiableMap(m_prefixes); } - protected Prefix parsePrefix(String line) { + protected TurtlePrefix parsePrefix(String line) { Matcher matcher = sf_prefixPattern.matcher(line); if (!matcher.matches()) { throw new IllegalArgumentException("Prefix line '" + line + "' not understood"); } - return new Prefix(matcher.group(1).trim(), matcher.group(2).trim()); + return new TurtlePrefix(matcher.group(1).trim(), matcher.group(2).trim()); } - protected Triple parseTriple(String line) { + protected TurtleTriple parseTriple(String line) { Matcher matcher = sf_triplePattern.matcher(line); if (!matcher.matches()) { throw new IllegalArgumentException("Triple '" + line + "' not understood"); } - Node subject = parseNode(matcher.group(1).trim(), "subject"); - Node predicate = parseNode(matcher.group(2).trim(), "predicate"); - Node object = parseNode(matcher.group(3).trim(), "object"); - return new Triple(subject, predicate, object); + TurtleNode subject = parseNode(matcher.group(1).trim(), "subject"); + TurtleNode predicate = parseNode(matcher.group(2).trim(), "predicate"); + TurtleNode object = parseNode(matcher.group(3).trim(), "object"); + return new TurtleTriple(subject, predicate, object); } - protected Triple parsePartTriple(String line, Node subject) { + protected TurtleTriple parsePartTriple(String line, TurtleNode subject) { Matcher matcher = sf_doublePattern.matcher(line); if (!matcher.matches()) { throw new IllegalArgumentException("Triple (with preceding subject) '" + line + "' not understood"); } - Node predicate = parseNode(matcher.group(1).trim(), "predicate"); - Node object = parseNode(matcher.group(2).trim(), "object"); - return new Triple(subject, predicate, object); + TurtleNode predicate = parseNode(matcher.group(1).trim(), "predicate"); + TurtleNode object = parseNode(matcher.group(2).trim(), "object"); + return new TurtleTriple(subject, predicate, object); } - protected Node parseNode(String string, String label) { + protected TurtleNode parseNode(String string, String label) { Matcher matcher = sf_nodePattern.matcher(string); if (!matcher.matches()) { throw new IllegalArgumentException( "Failed to parse " + label + " '" + string + "' with regex " + sf_nodePattern.pattern() ); } - return new Node( + return new TurtleNode( matcher.group(1).trim(), Optional.ofNullable(matcher.group(2)).map(String::trim), Optional.ofNullable(matcher.group(3)).map(String::trim) diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParserI.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParserI.java new file mode 100644 index 0000000..2467c64 --- /dev/null +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleParserI.java @@ -0,0 +1,13 @@ +package org.pharmgkb.parsers.turtle; + +import org.pharmgkb.parsers.MultilineParser; +import org.pharmgkb.parsers.turtle.model.TurtlePrefix; +import org.pharmgkb.parsers.turtle.model.TurtleTriple; + +import javax.annotation.Nonnull; +import java.util.Map; + +public interface TurtleParserI extends MultilineParser { + @Nonnull + Map prefixes(); +} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/TripleGrouper.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleTripleGrouper.java similarity index 50% rename from turtle/src/main/java/org/pharmgkb/parsers/turtle/TripleGrouper.java rename to turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleTripleGrouper.java index 9604265..044dd59 100644 --- a/turtle/src/main/java/org/pharmgkb/parsers/turtle/TripleGrouper.java +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleTripleGrouper.java @@ -1,15 +1,14 @@ package org.pharmgkb.parsers.turtle; import com.google.common.collect.ImmutableMap; -import org.pharmgkb.parsers.turtle.model.Node; -import org.pharmgkb.parsers.turtle.model.Triple; -import org.pharmgkb.parsers.turtle.model.TripleGroup; +import org.pharmgkb.parsers.turtle.model.TurtleNode; +import org.pharmgkb.parsers.turtle.model.TurtleTriple; +import org.pharmgkb.parsers.turtle.model.TurtleGroup; import javax.annotation.Nonnull; import javax.annotation.concurrent.ThreadSafe; import java.util.HashMap; import java.util.Map; -import java.util.function.Function; import java.util.stream.Stream; @@ -18,22 +17,23 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class TripleGrouper implements Function> { +public class TurtleTripleGrouper implements TurtleTripleGrouperI { private String m_previousSubject = null; private String m_subject = null; - private Map m_list = new HashMap<>(16); + private final Map m_list = new HashMap<>(16); - @Nonnull - public Stream apply(@Nonnull Triple triple) { - if (!triple.getSubject().getValue().equals(m_subject)) { - m_subject = triple.getSubject().getValue(); + @Override + @Nonnull + public Stream apply(@Nonnull TurtleTriple triple) { + if (!triple.subject().value().equals(m_subject)) { + m_subject = triple.subject().value(); if (!m_list.isEmpty()) { - return Stream.of(new TripleGroup(m_previousSubject, ImmutableMap.copyOf(m_list))); + return Stream.of(new TurtleGroup(m_previousSubject, ImmutableMap.copyOf(m_list))); } m_previousSubject = m_subject; } - m_list.put(triple.getPredicate().getValue(), triple.getObject()); + m_list.put(triple.predicate().value(), triple.object()); return Stream.empty(); } @@ -46,8 +46,4 @@ public String toString() { '}'; } - @Nonnull - public Stream convert(@Nonnull Stream triples) { - return triples.flatMap(this); - } } diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleTripleGrouperI.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleTripleGrouperI.java new file mode 100644 index 0000000..9658db3 --- /dev/null +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/TurtleTripleGrouperI.java @@ -0,0 +1,16 @@ +package org.pharmgkb.parsers.turtle; + +import org.pharmgkb.parsers.turtle.model.TurtleTriple; +import org.pharmgkb.parsers.turtle.model.TurtleGroup; + +import javax.annotation.Nonnull; +import java.util.function.Function; +import java.util.stream.Stream; + +public interface TurtleTripleGrouperI extends Function> { + + @Nonnull + default Stream convert(@Nonnull Stream triples) { + return triples.flatMap(this); + } +} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Node.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Node.java deleted file mode 100644 index 0ca31d9..0000000 --- a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Node.java +++ /dev/null @@ -1,109 +0,0 @@ -package org.pharmgkb.parsers.turtle.model; - -import com.google.common.base.MoreObjects; -import org.pharmgkb.parsers.BadDataFormatException; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Objects; -import java.util.Optional; - - -@Immutable -public class Node { - - private final String m_value; - private final Optional m_language; - private final Optional m_dataType; - - public Node(@Nonnull String value, @Nonnull Optional language, @Nonnull Optional dataType) { - m_value = value; - m_language = language; - m_dataType = dataType; - } - - @Nonnull - public String getValue() { - return m_value; - } - - @Nonnull - public Optional getLanguage() { - return m_language; - } - - @Nonnull - public Optional getDataType() { - return m_dataType; - } - - @Nonnull - public String asString() { - return m_value + m_dataType.map(s -> "^^" + s).orElse("") + m_language + m_language.map(s -> "@" + s).orElse(""); - } - - /** - * @return The value of the literal (without enclosing quotes), unless it's a full URI - * @throws BadDataFormatException If the value is neither a URI nor a literal - */ - @Nonnull - public Optional asLiteral() { - if (m_value.startsWith("\"") && m_value.endsWith("\"")) { - return Optional.of(m_value.substring(1, m_value.length() - 1)); - } else if (m_value.startsWith("<") && m_value.endsWith(">")) { - return Optional.empty(); - } else { - throw new BadDataFormatException("Neither a literal nor a URI: " + m_value); - } - } - - /** - * @return A URI of the value unless it's a true literal - * @throws BadDataFormatException If the value should be a URI but isn't valid, or if it's neither a URI nor a literal - */ - @Nonnull - public Optional asUri() { - if (m_value.startsWith("<") && m_value.endsWith(">")) { - String trimmed = m_value.substring(1, m_value.length() - 1); - try { - return Optional.of(new URI(trimmed)); - } catch (URISyntaxException e) { - throw new BadDataFormatException("Failed to parse URI " + trimmed); - } - } else if (m_value.startsWith("\"") && m_value.endsWith("\"")) { - return Optional.empty(); - } else { - throw new BadDataFormatException("Neither a literal nor a URI: " + m_value); - } - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Node node = (Node) o; - return Objects.equals(m_value, node.m_value) && - Objects.equals(m_language, node.m_language) && - Objects.equals(m_dataType, node.m_dataType); - } - - @Override - public int hashCode() { - return Objects.hash(m_value, m_language, m_dataType); - } - - @Override - public String toString() { - MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this) - .add("", m_value); - if (m_language.isPresent()) { - helper = helper.add("language", m_language.get()); - } - if (m_dataType.isPresent()) { - helper.add("dataType", m_dataType.get()); - } - return helper.toString(); - } -} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Prefix.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Prefix.java deleted file mode 100644 index fc441f1..0000000 --- a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Prefix.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.pharmgkb.parsers.turtle.model; - -import com.google.common.base.MoreObjects; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.util.Objects; - - -/** - * @author Douglas Myers-Turnbull - */ -@Immutable -public class Prefix { - - private final String m_prefix; - private final String m_uri; - - public Prefix(@Nonnull String prefix, @Nonnull String uri) { - m_prefix = prefix; - m_uri = uri; - } - - @Nonnull - public String getPrefix() { - return m_prefix; - } - - @Nonnull - public String getUri() { - return m_uri; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Prefix prefix = (Prefix) o; - return Objects.equals(m_prefix, prefix.m_prefix) && - Objects.equals(m_uri, prefix.m_uri); - } - - @Override - public int hashCode() { - return Objects.hash(m_prefix, m_uri); - } - - @Nonnull - public String asString() { - return "@prefix " + m_prefix + ": <" + m_uri + "> ."; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("prefix", m_prefix) - .add("uri", m_uri) - .toString(); - } -} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Triple.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Triple.java deleted file mode 100644 index 1e5e9ce..0000000 --- a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/Triple.java +++ /dev/null @@ -1,69 +0,0 @@ -package org.pharmgkb.parsers.turtle.model; - -import com.google.common.base.MoreObjects; - -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import java.util.Objects; - - -/** - * @author Douglas Myers-Turnbull - */ -@Immutable -public class Triple { - - private final Node m_subject; - private final Node m_predicate; - private final Node m_object; - - public Triple(@Nonnull Node subject, @Nonnull Node predicate, @Nonnull Node object) { - m_subject = subject; - m_predicate = predicate; - m_object = object; - } - - @Nonnull - public Node getSubject() { - return m_subject; - } - - @Nonnull - public Node getPredicate() { - return m_predicate; - } - - @Nonnull - public Node getObject() { - return m_object; - } - - @Nonnull - public String asLine() { - return m_subject.asString() + " " + m_predicate.asString() + " " + m_object.asString() + " ."; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Triple triple = (Triple) o; - return Objects.equals(m_subject, triple.m_subject) && - Objects.equals(m_predicate, triple.m_predicate) && - Objects.equals(m_object, triple.m_object); - } - - @Override - public int hashCode() { - return Objects.hash(m_subject, m_predicate, m_object); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("subject", m_subject) - .add("predicate", m_predicate) - .add("object", m_object) - .toString(); - } -} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TripleGroup.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleGroup.java similarity index 74% rename from turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TripleGroup.java rename to turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleGroup.java index 3a9bc26..316ff82 100644 --- a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TripleGroup.java +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleGroup.java @@ -11,23 +11,23 @@ * @author Douglas Myers-Turnbull */ @Immutable -public class TripleGroup { +public class TurtleGroup { private final String m_subject; - private final ImmutableMap m_triples; + private final ImmutableMap m_triples; - public TripleGroup(@Nonnull String subject, @Nonnull ImmutableMap triples) { + public TurtleGroup(@Nonnull String subject, @Nonnull ImmutableMap triples) { m_subject = subject; m_triples = triples; } @Nonnull - public String getSubject() { + public String subject() { return m_subject; } @Nonnull - public ImmutableMap getTriples() { + public ImmutableMap triples() { return m_triples; } @@ -35,7 +35,7 @@ public ImmutableMap getTriples() { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - TripleGroup that = (TripleGroup) o; + TurtleGroup that = (TurtleGroup) o; return Objects.equals(m_subject, that.m_subject) && Objects.equals(m_triples, that.m_triples); } diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleNode.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleNode.java new file mode 100644 index 0000000..a703b07 --- /dev/null +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleNode.java @@ -0,0 +1,61 @@ +package org.pharmgkb.parsers.turtle.model; + +import org.pharmgkb.parsers.BadDataFormatException; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Optional; + + +@Immutable +public record TurtleNode( + @Nonnull String value, + @Nonnull Optional language, + @Nonnull Optional dataType + +) { + + @Nonnull + public String asString() { + return value + dataType.map(s -> "^^" + s).orElse("") + + language + language.map(s -> "@" + s).orElse(""); + } + + /** + * @return The value of the literal (without enclosing quotes), unless it's a full URI + * @throws BadDataFormatException If the value is neither a URI nor a literal + */ + @Nonnull + public Optional asLiteral() { + if (value.startsWith("\"") && value.endsWith("\"")) { + return Optional.of(value.substring(1, value.length() - 1)); + } + if (value.startsWith("<") && value.endsWith(">")) { + return Optional.empty(); + } + throw new BadDataFormatException("Neither a literal nor a URI: " + value); + } + + /** + * @return A URI of the value unless it's a true literal + * @throws BadDataFormatException If the value should be a URI but isn't valid, or if it's neither a URI nor a literal + */ + @Nonnull + public Optional asUri() { + if (value.startsWith("<") && value.endsWith(">")) { + String trimmed = value.substring(1, value.length() - 1); + try { + return Optional.of(new URI(trimmed)); + } catch (URISyntaxException e) { + throw new BadDataFormatException("Failed to parse URI " + trimmed, e); + } + } + if (value.startsWith("\"") && value.endsWith("\"")) { + return Optional.empty(); + } + throw new BadDataFormatException("Neither a literal nor a URI: " + value); + } + +} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtlePrefix.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtlePrefix.java new file mode 100644 index 0000000..32fd18e --- /dev/null +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtlePrefix.java @@ -0,0 +1,37 @@ +package org.pharmgkb.parsers.turtle.model; + +import org.pharmgkb.parsers.BadDataFormatException; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; +import java.net.URI; +import java.net.URISyntaxException; + + +/** + * @author Douglas Myers-Turnbull + */ +@Immutable +public record TurtlePrefix( + @Nonnull String prefix, + @Nonnull String uri +) { + + public TurtlePrefix { + uriInstance(); + } + + public URI uriInstance() { + try { + return new URI(uri); + } catch (URISyntaxException e) { + throw new BadDataFormatException(e); + } + } + + @Nonnull + public String asString() { + return "@prefix " + prefix + ": <" + uri + "> ."; + } + +} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleTriple.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleTriple.java new file mode 100644 index 0000000..b19eadf --- /dev/null +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/TurtleTriple.java @@ -0,0 +1,22 @@ +package org.pharmgkb.parsers.turtle.model; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; + + +/** + * @author Douglas Myers-Turnbull + */ +@Immutable +public record TurtleTriple( + TurtleNode subject, + TurtleNode predicate, + TurtleNode object +) { + + @Nonnull + public String asLine() { + return subject.asString() + " " + predicate.asString() + " " + object.asString() + " ."; + } + +} diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/package-info.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/package-info.java new file mode 100644 index 0000000..5532691 --- /dev/null +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.turtle.model; diff --git a/turtle/src/main/java/org/pharmgkb/parsers/turtle/package-info.java b/turtle/src/main/java/org/pharmgkb/parsers/turtle/package-info.java new file mode 100644 index 0000000..3cc347f --- /dev/null +++ b/turtle/src/main/java/org/pharmgkb/parsers/turtle/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.turtle; diff --git a/vcf/build.gradle b/vcf/build.gradle index 74caa33..da07db7 100644 --- a/vcf/build.gradle +++ b/vcf/build.gradle @@ -1,4 +1,4 @@ dependencies { - compile project(':core') -} \ No newline at end of file + api project(':core') +} diff --git a/vcf/build/resources/test/org/pharmgkb/parsers/vcf/example.vcf b/vcf/build/resources/test/org/pharmgkb/parsers/vcf/example.vcf deleted file mode 100644 index 9cf28a8..0000000 --- a/vcf/build/resources/test/org/pharmgkb/parsers/vcf/example.vcf +++ /dev/null @@ -1,27 +0,0 @@ -##fileformat=VCFv4.2 -##fileDate=20090805 -##source=myImputationProgramV3.1 -##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta -##contig= -##phasing=partial -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##PEDIGREE= -##SAMPLE= -##pedigreeDB= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 -20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. -20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. -20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,. -20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2:.,. -20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3 diff --git a/vcf/lombok.config b/vcf/lombok.config deleted file mode 100644 index 6aa51d7..0000000 --- a/vcf/lombok.config +++ /dev/null @@ -1,2 +0,0 @@ -# This file is generated by the 'io.freefair.lombok' Gradle plugin -config.stopBubbling = true diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParser.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParser.java index 2256884..29d9d91 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParser.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParser.java @@ -2,11 +2,11 @@ import com.google.common.base.Splitter; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineParser; import org.pharmgkb.parsers.model.GeneralizedBigDecimal; import org.pharmgkb.parsers.vcf.model.VcfPosition; import org.pharmgkb.parsers.vcf.model.VcfSample; import org.pharmgkb.parsers.vcf.utils.VcfEscapers; +import org.pharmgkb.parsers.vcf.validation.VcfValidator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -27,7 +27,7 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class VcfDataParser implements LineParser { +public class VcfDataParser implements VcfDataParserI { private static final long sf_logEvery = 10000; @@ -38,7 +38,7 @@ public class VcfDataParser implements LineParser { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0l); + private final AtomicLong m_lineNumber = new AtomicLong(0l); @Nonnull @Override @@ -79,7 +79,7 @@ public VcfPosition apply(@Nonnull String line) throws BadDataFormatException { VcfPosition.Builder builder = new VcfPosition.Builder(chromosome, position, ref); // ID - if (!data.get(2).equals(".")) { + if (!".".equals(data.get(2))) { builder.addIds( sf_semicolon.splitToList(data.get(2)).stream() .map(VcfEscapers.ID::unescape) @@ -88,12 +88,12 @@ public VcfPosition apply(@Nonnull String line) throws BadDataFormatException { } // ALT - if (!data.get(4).equals(".")) { + if (!".".equals(data.get(4))) { builder.addAlts(sf_comma.splitToList(data.get(4))); } // QUAL - if (!data.get(5).equals(".")) { + if (!".".equals(data.get(5))) { try { builder.setQuality(Optional.of(new GeneralizedBigDecimal(data.get(5)))); } catch (NumberFormatException e) { @@ -102,7 +102,7 @@ public VcfPosition apply(@Nonnull String line) throws BadDataFormatException { } // FILTER - if (!data.get(6).equals(".")) { + if (!".".equals(data.get(6))) { Stream unescaped = sf_semicolon.splitToList(data.get(6)).stream() .map(VcfEscapers.FILTER::unescape); builder.addFilters(unescaped.collect(Collectors.toList())); @@ -110,7 +110,7 @@ public VcfPosition apply(@Nonnull String line) throws BadDataFormatException { // INFO Set keysUsed = new HashSet<>(); - if (!data.get(7).equals(".")) { + if (!".".equals(data.get(7))) { List props = sf_semicolon.splitToList(data.get(7)); for (String prop : props) { int index = prop.indexOf('='); @@ -156,7 +156,7 @@ public VcfPosition apply(@Nonnull String line) throws BadDataFormatException { ); } catch (RuntimeException e) { // this is a little weird, but it's helpful - // not that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. + // note that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. e.addSuppressed(new RuntimeException("Failed on line " + m_lineNumber)); throw e; } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParserI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParserI.java new file mode 100644 index 0000000..6f7931a --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.vcf; + +import org.pharmgkb.parsers.LineParser; +import org.pharmgkb.parsers.vcf.model.VcfPosition; + +public interface VcfDataParserI extends LineParser { + +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriter.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriter.java index 4a56ab7..d48677e 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriter.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriter.java @@ -1,18 +1,14 @@ package org.pharmgkb.parsers.vcf; -import org.pharmgkb.parsers.LineWriter; import org.pharmgkb.parsers.model.GeneralizedBigDecimal; import org.pharmgkb.parsers.vcf.model.VcfPosition; import org.pharmgkb.parsers.vcf.model.VcfSample; import org.pharmgkb.parsers.vcf.model.allele.VcfAllele; import org.pharmgkb.parsers.vcf.utils.VcfEscapers; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; import javax.annotation.concurrent.ThreadSafe; -import java.lang.invoke.MethodHandles; import java.util.Collections; import java.util.List; import java.util.Map; @@ -24,51 +20,47 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class VcfDataWriter implements LineWriter { +public class VcfDataWriter implements VcfDataWriterI { - private static final long sf_logEvery = 10000; - - private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); @Nonnull @Override public String apply(@Nonnull VcfPosition position) { StringBuilder sb = new StringBuilder(128) - .append(position.getChromosome()) + .append(position.chromosome()) .append("\t") - .append(position.getPosition() + 1) // VCF is 1-based + .append(position.position() + 1) // VCF is 1-based .append("\t") .append(orDot( - position.getIds().stream() + position.ids().stream() .map(VcfEscapers.ID::escape) .collect(Collectors.toList()), ",") ) .append("\t") - .append(position.getRef().toVcfString()) + .append(position.ref().toVcfString()) .append("\t") .append(orDot( - position.getAlts().stream() + position.alts().stream() .map(VcfAllele::toVcfString) .collect(Collectors.toList()), ",") ) .append("\t") - .append(position.getQuality() + .append(position.quality() .map(GeneralizedBigDecimal::toString) .orElse(".")) .append("\t") .append(orDot( - position.getFilters().stream() + position.filters().stream() .map(VcfEscapers.FILTER::escape) .collect(Collectors.toList() ), ";") ) .append("\t") .append( - position.getInfo().asMap().entrySet().stream() + position.info().asMap().entrySet().stream() .map(e -> e.getKey() + ( Collections.singletonList("").containsAll(e.getValue())? "" @@ -78,12 +70,12 @@ public String apply(@Nonnull VcfPosition position) { )) .collect(Collectors.joining(";")) ); - if (!position.getFormat().isEmpty()) { + if (!position.format().isEmpty()) { sb.append("\t") - .append(position.getFormat().stream() + .append(position.format().stream() .map(VcfEscapers.FORMAT::escape) .collect(Collectors.joining(":"))); - for (VcfSample sample : position.getSamples()) { + for (VcfSample sample : position.samples()) { sb.append("\t") .append( sample.entrySet().stream() diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriterI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriterI.java new file mode 100644 index 0000000..29cfe59 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfDataWriterI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.vcf; + +import org.pharmgkb.parsers.LineWriter; +import org.pharmgkb.parsers.vcf.model.VcfPosition; + +public interface VcfDataWriterI extends LineWriter { + +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriter.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriter.java index 010c58a..b577189 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriter.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriter.java @@ -10,7 +10,9 @@ import javax.annotation.Nonnull; import javax.annotation.concurrent.NotThreadSafe; import java.io.*; +import java.nio.charset.StandardCharsets; import java.nio.file.Path; +import java.util.Objects; import java.util.stream.Stream; /** @@ -18,33 +20,44 @@ * @author Douglas Myers-Turnbull */ @NotThreadSafe -public class VcfFileWriter implements Closeable { +public class VcfFileWriter implements VcfFileWriterI { private final PrintWriter m_writer; private final int m_flushEvery; - @Nonnull - public static Stream concat(@Nonnull VcfMetadataCollection metadata, @Nonnull Stream positions) { - return Stream.concat( - metadata.getLines().stream().map(new VcfMetadataWriter()), - positions.map(new VcfDataWriter()) - ); - } - - private VcfFileWriter(@Nonnull Builder builder) { + protected VcfFileWriter(@Nonnull Builder builder) { m_writer = builder.m_writer; m_flushEvery = builder.m_flushEvery; } - public void write(@Nonnull VcfMetadataCollection metadata, @Nonnull Stream positions) { - Preconditions.checkNotNull(positions, "Positions cannot be null"); - Preconditions.checkNotNull(metadata, "Metadata cannot be null"); - write(metadata.getLines().stream(), positions); + @Nonnull + public static Stream concat( + @Nonnull VcfMetadataCollection metadata, + @Nonnull Stream positions + ) { + return Stream.concat( + metadata.lines().stream().map(new VcfMetadataWriter()), + positions.map(new VcfDataWriter()) + ); + } + + @Override + public void write( + @Nonnull VcfMetadataCollection metadata, + @Nonnull Stream positions + ) { + Objects.requireNonNull(positions, "Positions cannot be null"); + Objects.requireNonNull(metadata, "Metadata cannot be null"); + write(metadata.lines().stream(), positions); } - public void write(@Nonnull Stream metadata, @Nonnull Stream positions) { - Preconditions.checkNotNull(metadata, "Metadata cannot be null"); - Preconditions.checkNotNull(positions, "Positions cannot be null"); + @Override + public void write( + @Nonnull Stream metadata, + @Nonnull Stream positions + ) { + Objects.requireNonNull(metadata, "Metadata cannot be null"); + Objects.requireNonNull(positions, "Positions cannot be null"); metadata.map(new VcfMetadataWriter()) .forEach(m_writer::println); m_writer.flush(); @@ -63,24 +76,24 @@ public void close() { } @NotThreadSafe - public static class Builder implements ObjectBuilder { + public static class Builder implements ObjectBuilder { private final PrintWriter m_writer; private int m_flushEvery = 10000; public Builder(@Nonnull PrintWriter writer) { - Preconditions.checkNotNull(writer, "Writer cannot be null"); + Objects.requireNonNull(writer, "Writer cannot be null"); m_writer = writer; } public Builder(@Nonnull File file) throws IOException { - Preconditions.checkNotNull(file, "File cannot be null"); - m_writer = new PrintWriter(new FileWriter(file)); + Objects.requireNonNull(file, "File cannot be null"); + m_writer = new PrintWriter(new FileWriter(file, StandardCharsets.UTF_8)); } public Builder(@Nonnull Path file) throws IOException { - Preconditions.checkNotNull(file, "File cannot be null"); - m_writer = new PrintWriter(new FileWriter(file.toFile())); + Objects.requireNonNull(file, "File cannot be null"); + m_writer = new PrintWriter(new FileWriter(file.toFile(), StandardCharsets.UTF_8)); } /** @@ -95,7 +108,7 @@ public Builder setFlushEvery(@Nonnegative int flushEvery) { @Nonnull @Override - public VcfFileWriter build() { + public VcfFileWriterI build() { return new VcfFileWriter(this); } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriterI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriterI.java new file mode 100644 index 0000000..ffe6ba9 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfFileWriterI.java @@ -0,0 +1,22 @@ +package org.pharmgkb.parsers.vcf; + +import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; +import org.pharmgkb.parsers.vcf.model.VcfPosition; +import org.pharmgkb.parsers.vcf.model.metadata.VcfMetadata; + +import javax.annotation.Nonnull; +import java.io.Closeable; +import java.util.stream.Stream; + +public interface VcfFileWriterI extends Closeable { + + void write( + @Nonnull VcfMetadataCollection metadata, + @Nonnull Stream positions + ); + + void write( + @Nonnull Stream metadata, + @Nonnull Stream positions + ); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParser.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParser.java index d4ea5bb..1eab61b 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParser.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParser.java @@ -1,11 +1,9 @@ package org.pharmgkb.parsers.vcf; -import com.google.common.base.Preconditions; -import com.google.common.base.Splitter; import org.pharmgkb.parsers.BadDataFormatException; -import org.pharmgkb.parsers.LineStructureParser; import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; -import org.pharmgkb.parsers.vcf.utils.VcfMetadataFactory; +import org.pharmgkb.parsers.vcf.factories.VcfMetadataFactory; +import org.pharmgkb.parsers.vcf.factories.VcfMetadataFactoryI; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -13,6 +11,7 @@ import javax.annotation.Nonnull; import javax.annotation.concurrent.ThreadSafe; import java.lang.invoke.MethodHandles; +import java.util.Objects; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Stream; @@ -21,19 +20,31 @@ * @author Douglas Myers-Turnbull */ @ThreadSafe -public class VcfMetadataParser implements LineStructureParser { +public class VcfMetadataParser implements VcfMetadataParserI { private static final long sf_logEvery = 10000; - private static final Splitter sf_tab = Splitter.on("\t"); private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private AtomicLong m_lineNumber = new AtomicLong(0L); + private final AtomicLong m_lineNumber = new AtomicLong(0L); - @Nonnull + private final VcfMetadataFactoryI m_factory; + + public VcfMetadataParser(VcfMetadataFactoryI factory) { + m_factory = factory; + } + + public VcfMetadataParser() { + this(new VcfMetadataFactory()); + } + + @Nonnull @Override - public VcfMetadataCollection apply(@Nonnull Stream stream) throws BadDataFormatException { - Preconditions.checkNotNull(stream, "Stream cannot be null"); + public VcfMetadataCollection apply( + @Nonnull + Stream stream + ) throws BadDataFormatException { + Objects.requireNonNull(stream, "Stream cannot be null"); final VcfMetadataCollection.Builder builder = new VcfMetadataCollection.Builder(); stream.takeWhile(s -> s.startsWith("#")) .forEachOrdered(line -> { @@ -48,13 +59,13 @@ public VcfMetadataCollection apply(@Nonnull Stream stream) throws BadDat if (m_lineNumber.get() == 1L && !line.startsWith("##fileformat=VCFv")) { throw new BadDataFormatException("First line is " + line + "; doesn't appear to be VCF"); } - builder.addLine(VcfMetadataFactory.translate(line)); + builder.addLine(m_factory.translate(line)); } catch (IllegalArgumentException | IllegalStateException e) { throw new BadDataFormatException("Couldn't parse line #" + m_lineNumber, e); } catch (RuntimeException e) { // this is a little weird, but it's helpful - // not that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. + // note that we're not throwing a BadDataFormatException because we don't expect AIOOB, e.g. e.addSuppressed(new RuntimeException("Failed on line " + m_lineNumber)); throw e; } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParserI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParserI.java new file mode 100644 index 0000000..a1db9b2 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataParserI.java @@ -0,0 +1,8 @@ +package org.pharmgkb.parsers.vcf; + +import org.pharmgkb.parsers.LineStructureParser; +import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; + +public interface VcfMetadataParserI extends LineStructureParser { + +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriter.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriter.java index 05a2213..eb736ef 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriter.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriter.java @@ -1,17 +1,12 @@ package org.pharmgkb.parsers.vcf; -import com.google.common.base.Preconditions; -import org.pharmgkb.parsers.LineWriter; import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; import org.pharmgkb.parsers.vcf.model.metadata.VcfMetadata; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; import javax.annotation.concurrent.NotThreadSafe; -import java.lang.invoke.MethodHandles; -import java.util.stream.Stream; +import java.util.Objects; /** * @@ -29,30 +24,20 @@ * @author Douglas Myers-Turnbull */ @NotThreadSafe -public class VcfMetadataWriter implements LineWriter { - - private static final long sf_logEvery = 10000; - - private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); +public class VcfMetadataWriter implements VcfMetadataWriterI { private long m_lineNumber = 0L; - @Nonnegative @Override + @Nonnegative public long nLinesProcessed() { return m_lineNumber; } - @Nonnull - public Stream apply(@Nonnull VcfMetadataCollection collection) { - Preconditions.checkNotNull(collection, "Metadata cannot be null"); - return collection.getLines().stream().map(this); - } - @Nonnull @Override public String apply(@Nonnull VcfMetadata vcfMetadata) { - Preconditions.checkNotNull(vcfMetadata, "Metadata cannot be null"); + Objects.requireNonNull(vcfMetadata, "Metadata cannot be null"); m_lineNumber++; return vcfMetadata.toVcfLine(); } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriterI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriterI.java new file mode 100644 index 0000000..df657bd --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfMetadataWriterI.java @@ -0,0 +1,19 @@ +package org.pharmgkb.parsers.vcf; + +import org.pharmgkb.parsers.LineWriter; +import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; +import org.pharmgkb.parsers.vcf.model.metadata.VcfMetadata; + +import javax.annotation.Nonnull; +import java.util.Objects; +import java.util.stream.Stream; + +public interface VcfMetadataWriterI extends LineWriter { + + @Nonnull + default Stream apply(@Nonnull VcfMetadataCollection collection) { + Objects.requireNonNull(collection, "Metadata cannot be null"); + return collection.lines().stream().map(this); + } + +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfValidator.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfValidator.java deleted file mode 100644 index 5cc1be0..0000000 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/VcfValidator.java +++ /dev/null @@ -1,219 +0,0 @@ -package org.pharmgkb.parsers.vcf; - -import com.google.common.base.MoreObjects; -import com.google.common.base.Preconditions; -import org.pharmgkb.parsers.ObjectBuilder; -import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; -import org.pharmgkb.parsers.vcf.model.VcfPosition; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nonnegative; -import javax.annotation.Nonnull; -import javax.annotation.concurrent.Immutable; -import javax.annotation.concurrent.NotThreadSafe; -import javax.annotation.concurrent.ThreadSafe; -import java.io.Serial; -import java.lang.invoke.MethodHandles; -import java.util.Objects; -import java.util.function.Consumer; - -/** - * Checks errors arising from a contradiction between metadata and VCF positions. - * This class is implemented to {@link Consumer consume} a {@link VcfPosition VcfPositions} and perform a specified action for each error found. - * The recommended use is with {@link java.util.stream.Stream#peek(Consumer)} before reading or before writing (but before both is likely unnecessary). - * @author Douglas Myers-Turnbull - */ -@ThreadSafe -public class VcfValidator implements Consumer { - - private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - private final Consumer m_action; - - private final VcfMetadataCollection m_metadata; - - private VcfValidator(@Nonnull Consumer action, @Nonnull VcfMetadataCollection metadata) { - m_action = action; - m_metadata = metadata; - } - - @Override - public void accept(@Nonnull VcfPosition position) { - Preconditions.checkNotNull(position, "VcfPosition cannot be null"); - m_metadata.getSample().keySet().stream() - .filter(k -> m_metadata.getHeader().getSampleNames().contains(k)) - .map(s -> new InvalidProperty(position.getChromosome(), position.getPosition(), s, PropertyType.SAMPLE)) - .forEach(m_action); - m_metadata.getHeader().getSampleNames().stream() - .filter(k -> m_metadata.getSample().containsKey(k)) - .map(s -> new InvalidProperty(position.getChromosome(), position.getPosition(), s, PropertyType.SAMPLE)) - .forEach(m_action); - position.getFilters().stream() - .filter(s -> !m_metadata.getFilter().containsKey(s)) - .map(s -> new InvalidProperty(position.getChromosome(), position.getPosition(), s, PropertyType.FILTER)) - .forEach(m_action); - position.getFormat().stream() - .filter(s -> !m_metadata.getFormat().containsKey(s)) - .map(s -> new InvalidProperty(position.getChromosome(), position.getPosition(), s, PropertyType.FORMAT)) - .forEach(m_action); - position.getInfo().entries().stream() - .filter(e -> !m_metadata.getFormat().containsKey(e.getKey())) - .map(e -> new InvalidProperty(position.getChromosome(), position.getPosition(), e.getKey(), PropertyType.INFO)) - .forEach(m_action); - } - - @NotThreadSafe - public static class Builder implements ObjectBuilder { - - private final VcfMetadataCollection m_metadata; - - private Consumer m_action; - - /** - * The default action is to throw a {@link ValidationException} for the first invalid property. - */ - public Builder(@Nonnull VcfMetadataCollection metadata) { - Preconditions.checkNotNull(metadata, "Metadata cannot be null"); - m_action = error -> {throw new ValidationException(error);}; - m_metadata = metadata; - } - - /** - * Sets the {@link #setAction(Consumer) action} to logging a warning for each error. - */ - @Nonnull - public Builder warnOnly() { - m_action = error -> sf_logger.warn("Bad {}: \"{}\" for position {}:{}", error.getSource(), error.getKey(), error.getChromosome(), error.getPosition()); - return this; - } - - /** - * Replaces the action with a new one. - */ - @Nonnull - public Builder setAction(@Nonnull Consumer action) { - Preconditions.checkNotNull(action, "Action cannot be null"); - m_action = action; - return this; - } - - @Nonnull - @Override - public VcfValidator build() { - return new VcfValidator(m_action, m_metadata); - } - } - - /** - * An aspect of a VCF position that is wrong because it contradicts the metadata. - */ - @Immutable - public static class InvalidProperty { - - private final String m_chromosome; - private final long m_position; - private final String m_key; - private final PropertyType m_source; - - public InvalidProperty( - @Nonnull String chromosome, - @Nonnegative long position, - @Nonnull String key, - @Nonnull PropertyType source - ) { - m_chromosome = chromosome; - m_position = position; - m_key = key; - m_source = source; - } - - @Nonnull - public String getChromosome() { - return m_chromosome; - } - - public long getPosition() { - return m_position; - } - - @Nonnull - public PropertyType getSource() { - return m_source; - } - - @Nonnull - public String getKey() { - return m_key; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - InvalidProperty that = (InvalidProperty) o; - return m_position == that.m_position && - Objects.equals(m_chromosome, that.m_chromosome) && - Objects.equals(m_key, that.m_key) && - m_source == that.m_source; - } - - @Override - public int hashCode() { - return Objects.hash(m_chromosome, m_position, m_key, m_source); - } - - @Nonnull - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("chromosome", m_chromosome) - .add("position", m_position) - .add("key", m_key) - .add("source", m_source) - .toString(); - } - } - - /** - * What property is wrong: INFO, FORMAT, FILTER, or SAMPLE. - */ - public enum PropertyType { - INFO, FORMAT, FILTER, SAMPLE - } - - /** - * An exception caused by an {@link InvalidProperty}. - */ - public static class ValidationException extends RuntimeException { - - private final InvalidProperty m_invalid; - - public InvalidProperty getInvalidProperty() { - return m_invalid; - } - - public ValidationException(InvalidProperty error) { - super("Bad " + error.getSource() + ": \"" + error.getKey() + "\" for position " + error.getChromosome() + ":" + error.getPosition()); - m_invalid = error; - } - - @Serial - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - throw new java.io.NotSerializableException("org.pharmgkb.parsers.vcf.VcfValidator.ValidationException"); - } - - @Serial - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - throw new java.io.NotSerializableException("org.pharmgkb.parsers.vcf.VcfValidator.ValidationException"); - } - } - - @Override - public String toString() { - return "VcfValidator{" + - "action=" + m_action + - ", metadata: " + m_metadata.getLines().size() + " lines" + - '}'; - } -} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfMetadataCollector.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfMetadataCollector.java similarity index 79% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfMetadataCollector.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfMetadataCollector.java index 1b51eb9..6479b60 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfMetadataCollector.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfMetadataCollector.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.vcf.utils; +package org.pharmgkb.parsers.vcf.builders; import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; import org.pharmgkb.parsers.vcf.model.metadata.VcfMetadata; @@ -9,15 +9,14 @@ import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.Supplier; -import java.util.stream.Collector; /** * Collects a stream of {@link VcfMetadata} into a {@link VcfMetadataCollection}. * @author Douglas Myers-Turnbull */ -public class VcfMetadataCollector implements Collector { +public class VcfMetadataCollector implements VcfMetadataCollectorI { - private VcfMetadataCollection.Builder m_builder = new VcfMetadataCollection.Builder(); + private final VcfMetadataCollection.Builder m_builder = new VcfMetadataCollection.Builder(); @Override public Supplier supplier() { diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfMetadataCollectorI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfMetadataCollectorI.java new file mode 100644 index 0000000..3ab9895 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfMetadataCollectorI.java @@ -0,0 +1,10 @@ +package org.pharmgkb.parsers.vcf.builders; + +import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; +import org.pharmgkb.parsers.vcf.model.metadata.VcfMetadata; + +import java.util.stream.Collector; + +public interface VcfMetadataCollectorI + extends Collector { +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilder.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilder.java new file mode 100644 index 0000000..4f3a5de --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilder.java @@ -0,0 +1,45 @@ +package org.pharmgkb.parsers.vcf.builders; + + +import com.google.common.collect.ImmutableMap; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import javax.annotation.concurrent.NotThreadSafe; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +/** + * A builder for string-to-string maps. + * @author Douglas Myers-Turnbull + */ +@NotThreadSafe +public class VcfPropertyMapBuilder extends ImmutableMap.Builder + implements VcfPropertyMapBuilderI { + + public VcfPropertyMapBuilder() {} + + public VcfPropertyMapBuilder(@Nonnull Map map) { + map.forEach(this::put); + } + + @Override + @Nonnull + public VcfPropertyMapBuilderI put(@Nonnull String key, @Nonnull Optional value) { + Objects.requireNonNull(key, "Key cannot be null"); + Objects.requireNonNull(value, "Value cannot be null"); + value.ifPresent(s -> super.put(key, s)); + return this; + } + + @Nonnull + @Override + public VcfPropertyMapBuilder put(@Nonnull String key, @Nullable String value) { + Objects.requireNonNull(key, "Key cannot be null"); + if (null != value) { + super.put(key, value); + } + return this; + } +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilderI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilderI.java new file mode 100644 index 0000000..82491b3 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/VcfPropertyMapBuilderI.java @@ -0,0 +1,17 @@ +package org.pharmgkb.parsers.vcf.builders; + +import com.google.common.collect.ImmutableMap; +import org.pharmgkb.parsers.ObjectBuilder; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.Optional; + +public interface VcfPropertyMapBuilderI extends ObjectBuilder> { + + @Nonnull + VcfPropertyMapBuilderI put(@Nonnull String key, @Nonnull Optional value); + + @Nonnull + VcfPropertyMapBuilderI put(@Nonnull String key, @Nullable String value); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/package-info.java new file mode 100644 index 0000000..7e0b7f4 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/builders/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf.builders; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfAlleleFactory.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfAlleleFactory.java similarity index 60% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfAlleleFactory.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfAlleleFactory.java index 59716be..1459880 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfAlleleFactory.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfAlleleFactory.java @@ -1,28 +1,27 @@ -package org.pharmgkb.parsers.vcf.utils; +package org.pharmgkb.parsers.vcf.factories; -import com.google.common.base.Preconditions; import org.pharmgkb.parsers.vcf.model.allele.*; import javax.annotation.Nonnull; +import java.util.Objects; /** * Converts a VCF ALT allele string to the appropriate subclass of {@link VcfAllele}. * @author Douglas Myers-Turnbull */ -public class VcfAlleleFactory { +public class VcfAlleleFactory implements VcfAlleleFactoryI { - private VcfAlleleFactory() {} - - @Nonnull - public static VcfAllele translate(@Nonnull String string) { - Preconditions.checkNotNull(string, "Allele string cannot be null"); + @Override + @Nonnull + public VcfAllele translate(@Nonnull String string) { + Objects.requireNonNull(string, "Allele string cannot be null"); if (string.startsWith("[") || string.startsWith("]")) { return VcfBreakpointAllele.fromVcfAlt(string); } if (string.startsWith("<")) { return VcfSymbolicAllele.fromVcfAlt(string); } - if (string.equals("*")) { + if ("*".equals(string)) { return VcfDeletedAllele.DELETED; } return VcfBasesAllele.fromVcf(string); diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfAlleleFactoryI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfAlleleFactoryI.java new file mode 100644 index 0000000..9e82dfb --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfAlleleFactoryI.java @@ -0,0 +1,11 @@ +package org.pharmgkb.parsers.vcf.factories; + +import org.pharmgkb.parsers.vcf.model.allele.VcfAllele; + +import javax.annotation.Nonnull; + +public interface VcfAlleleFactoryI { + + @Nonnull + VcfAllele translate(@Nonnull String string); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfMetadataFactory.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfMetadataFactory.java similarity index 87% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfMetadataFactory.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfMetadataFactory.java index 1e87c8f..d595d14 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfMetadataFactory.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfMetadataFactory.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.vcf.utils; +package org.pharmgkb.parsers.vcf.factories; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; @@ -16,9 +16,7 @@ * Converts a VCF metadata line (as a string) to the appropriate subclass of {@link VcfMetadata}/ * @author Douglas Myers-Turnbull */ -public class VcfMetadataFactory { - - private VcfMetadataFactory() {} +public class VcfMetadataFactory implements VcfMetadataFactoryI { private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -26,13 +24,14 @@ private VcfMetadataFactory() {} private static final Splitter sf_equalsSplitter = Splitter.on('='); private static final Splitter sf_tabSplitter = Splitter.on('\t'); - @Nonnull - public static VcfMetadata translate(@Nonnull String line) { - Preconditions.checkNotNull(line, "Metadata line cannot be null"); + @Override + @Nonnull + public VcfMetadata translate(@Nonnull String line) { + Objects.requireNonNull(line, "Metadata line cannot be null"); Preconditions.checkArgument(line.startsWith("#"), "Metadata line does not start with #; was [[[" + line + "]]]"); Matcher matcher = sf_pattern.matcher(line); if (matcher.matches()) { - switch(matcher.group(1)) { + switch (matcher.group(1)) { // we can't pull the call to build() out without an exception if it's a raw metadata that // happens to start with ##xxx= case VcfMetadataType.ALT_ID: return new VcfAltMetadata(build(matcher.group(2))); @@ -42,11 +41,13 @@ public static VcfMetadata translate(@Nonnull String line) { case VcfMetadataType.SAMPLE_ID: return new VcfSampleMetadata(build(matcher.group(2))); case VcfMetadataType.CONTIG_ID: return new VcfContigMetadata(build(matcher.group(2))); case VcfMetadataType.PEDIGREE_ID: return new VcfPedigreeMetadata(build(matcher.group(2))); - } + default: + throw new IllegalStateException("Unexpected value: " + matcher.group(1)); + } } if (line.startsWith("##fileformat=VCFv")) { String version = line.substring("##fileformat=VCFv".length()); - if (!version.equals("4.3")) { + if (!"4.3".equals(version)) { sf_logger.warn("This package is only guaranteed to work for VCF version 4.3; this version is {}", version); } return new VcfVersionMetadata(version); @@ -87,7 +88,7 @@ private static List split(@Nonnull String input, char delimiter) { if (input.charAt(current) == '\"') { inQuotes = !inQuotes; } - boolean atLastChar = (current == input.length() - 1); + boolean atLastChar = current == input.length() - 1; if (atLastChar) { result.add(input.substring(start)); } else if (input.charAt(current) == delimiter && !inQuotes) { diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfMetadataFactoryI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfMetadataFactoryI.java new file mode 100644 index 0000000..f02c2f2 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfMetadataFactoryI.java @@ -0,0 +1,10 @@ +package org.pharmgkb.parsers.vcf.factories; + +import org.pharmgkb.parsers.vcf.model.metadata.*; + +import javax.annotation.Nonnull; + +public interface VcfMetadataFactoryI { + @Nonnull + VcfMetadata translate(@Nonnull String line); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactory.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactory.java new file mode 100644 index 0000000..860d909 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactory.java @@ -0,0 +1,126 @@ +package org.pharmgkb.parsers.vcf.factories; + +import org.pharmgkb.parsers.model.GeneralizedBigDecimal; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedProperty; +import org.pharmgkb.parsers.vcf.model.metadata.VcfFormatType; +import org.pharmgkb.parsers.vcf.model.metadata.VcfInfoType; + +import javax.annotation.Nonnull; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +public class VcfPropertyFactory implements VcfPropertyFactoryI { + + @Override + @Nonnull + public Optional convertProperty( + @Nonnull VcfReservedProperty key, + @Nonnull Optional value + ) { + return convertProperty(key.type(), value, key.isList()); + } + + @Override + @Nonnull + public Optional convertProperty( + @Nonnull Class clas, + @Nonnull Optional value, boolean isList + ) { + if (value.isEmpty()) { + return Optional.empty(); + } + if (!isList) { + try { + //noinspection unchecked + return Optional.of((T) convertElement(clas, value)); + } catch (ClassCastException e) { + throw new IllegalArgumentException("Wrong type specified", e); + } + } + List list = Arrays.stream(value.get().split(",")) + .map(part -> convertElement(clas, Optional.of(part))) + .collect(Collectors.toCollection(() -> new ArrayList<>(64))); + try { + //noinspection unchecked + return Optional.of((T) list); + } catch (ClassCastException e) { + throw new IllegalArgumentException("Wrong type specified", e); + } + } + + @Override + @Nonnull + public Optional convertProperty( + @Nonnull VcfFormatType type, + @Nonnull Optional value + ) { + Class clas = switch (type) { + case Integer -> Long.class; + case Float -> GeneralizedBigDecimal.class; + case Character -> Character.class; + case String -> String.class; + }; + return convertProperty(clas, value, false); + } + + @Override + @Nonnull + public Optional convertProperty( + @Nonnull VcfInfoType type, + @Nonnull Optional value + ) { + Class clas = switch (type) { + case Integer -> Long.class; + case Float -> GeneralizedBigDecimal.class; + case Character -> Character.class; + case String -> String.class; + case Flag -> Boolean.class; + }; + return convertProperty(clas, value, false); + } + + @Nonnull + private Optional convertElement(@Nonnull Class clas, @Nonnull Optional value) { + if (value.isEmpty()) { + return Optional.empty(); + } + String val = value.get(); + if (String.class == clas) { + return Optional.of(value); + } + if (Character.class == clas) { + if (val.length() == 1) { + return Optional.of(value); + } + throw new IllegalArgumentException("Invalid character value '" + value + "'"); + } + if (Boolean.class == clas) { + if ("0".equals(val) || "false".equalsIgnoreCase(val)) { + return Optional.of(false); + } + if ("1".equals(val) || "true".equalsIgnoreCase(val)) { + return Optional.of(true); + } + throw new IllegalArgumentException("Invalid boolean value: '" + value + "'"); + + } + if (GeneralizedBigDecimal.class == clas) { + try { + return Optional.of(new GeneralizedBigDecimal(val)); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Expected float; got " + value, e); + } + } else if (clas == Long.class) { + try { + return Optional.of(Long.parseLong(val)); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Expected integer; got " + value, e); + } + } + throw new UnsupportedOperationException("Type " + clas + " unrecognized"); + } + +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactoryI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactoryI.java new file mode 100644 index 0000000..8d6f5e2 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/VcfPropertyFactoryI.java @@ -0,0 +1,40 @@ +package org.pharmgkb.parsers.vcf.factories; + +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedProperty; +import org.pharmgkb.parsers.vcf.model.metadata.VcfFormatType; +import org.pharmgkb.parsers.vcf.model.metadata.VcfInfoType; + +import javax.annotation.Nonnull; +import java.util.Optional; + +/** + * A kit of methods for converting VCF strings to their expected types. + * @author Douglas Myers-Turnbull + */ +public interface VcfPropertyFactoryI { + + /** + * Converts a String representation of a property into a more useful type. Specifically, can return: + *
    + *
  • String
  • + *
  • Long
  • + *
  • GeneralizedBigDecimal
  • + *
  • The Boolean true (for flags)
  • + *
  • A List of any of the above types
  • + *
+ */ + @Nonnull + Optional convertProperty(@Nonnull VcfReservedProperty key, @Nonnull Optional value); + + /** + * @see #convertProperty(VcfReservedProperty, Optional) + */ + @Nonnull + Optional convertProperty(@Nonnull Class clas, @Nonnull Optional value, boolean isList); + + @Nonnull + Optional convertProperty(@Nonnull VcfFormatType type, @Nonnull Optional value); + + @Nonnull + Optional convertProperty(@Nonnull VcfInfoType type, @Nonnull Optional value); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/package-info.java new file mode 100644 index 0000000..022103b --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/factories/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf.factories; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfInfo.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfInfo.java index f738da7..428573d 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfInfo.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfInfo.java @@ -1,9 +1,8 @@ package org.pharmgkb.parsers.vcf.model; -import com.google.common.base.Preconditions; import com.google.common.collect.*; -import org.pharmgkb.parsers.vcf.model.extra.ReservedInfoProperty; -import org.pharmgkb.parsers.vcf.utils.VcfConversionUtils; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedInfoProperty; +import org.pharmgkb.parsers.vcf.factories.VcfPropertyFactory; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -16,7 +15,7 @@ /** * A map for the INFO column of a VCF position. - * This class is just a wrapper for a {@link ImmutableMultimap} that provides conversion utilities (see {@link VcfConversionUtils}). + * This class is just a wrapper for a {@link ImmutableMultimap} that provides conversion utilities (see {@link VcfPropertyFactory}). * @author Douglas Myers-Turnbull */ public class VcfInfo { @@ -28,7 +27,7 @@ public VcfInfo(@Nonnull ImmutableMultimap info) { } @Nonnull - public ImmutableMultimap getMap() { + public ImmutableMultimap map() { return m_info; } @@ -36,16 +35,16 @@ public ImmutableCollection get(@Nonnull String key) { return m_info.get(key); } - public ImmutableCollection get(@Nonnull ReservedInfoProperty key) { - return m_info.get(key.getId()); + public ImmutableCollection get(@Nonnull VcfReservedInfoProperty key) { + return m_info.get(key.id()); } @Nonnull - @SuppressWarnings("unchecked") - public ImmutableCollection getConverted(@Nonnull ReservedInfoProperty key) { - return ImmutableList.copyOf( // needed for consistency of API - m_info.get(key.getId()).stream() - .map(s -> (T) VcfConversionUtils.convertProperty(key, Optional.of(s)).get()) + public ImmutableCollection getConverted(@Nonnull VcfReservedInfoProperty key) { + //noinspection unchecked + return ImmutableList.copyOf( // needed for consistency of API + m_info.get(key.id()).stream() + .map(s -> (T) new VcfPropertyFactory().convertProperty(key, Optional.of(s)).get()) .collect(Collectors.toList()) ); } @@ -77,8 +76,8 @@ public boolean containsKey(@Nonnull String key) { return m_info.containsKey(key); } - public boolean containsKey(@Nonnull ReservedInfoProperty key) { - return m_info.containsKey(key.getId()); + public boolean containsKey(@Nonnull VcfReservedInfoProperty key) { + return m_info.containsKey(key.id()); } @Nonnull @@ -92,7 +91,7 @@ public int size() { } public boolean containsEntry(@Nonnull String key, @Nonnull String value) { - Preconditions.checkNotNull(key, "Info key cannot be null"); + Objects.requireNonNull(key, "Info key cannot be null"); return m_info.containsEntry(key, value); } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollection.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollection.java index 3cb7750..63a745c 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollection.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollection.java @@ -9,18 +9,15 @@ import javax.annotation.Nonnull; import javax.annotation.concurrent.Immutable; import javax.annotation.concurrent.NotThreadSafe; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Stores an ordered list of VCF metadata lines, including the {@code ##vcfVersion} and header ({@code #CHROM...}) lines. - * Also provides fast access to reserved metatadata types. For example, you can get all INFO metadata by {@link #getInfo()}. + * Also provides fast access to reserved metatadata types. For example, you can get all INFO metadata by {@link #info()}. * @author Douglas Myers-Turnbull */ @Immutable -public class VcfMetadataCollection { +public class VcfMetadataCollection implements VcfMetadataCollectionI { private final VcfVersionMetadata m_vcfVersion; private final VcfHeaderMetadata m_header; @@ -38,79 +35,88 @@ public class VcfMetadataCollection { private final ImmutableList m_assembly; private final ImmutableList m_pedigreeDb; - /** - * @return The VCF header line, minus the {@code ##vcfVersion=} - */ - @Nonnull - public String getVcfVersion() { - return m_vcfVersion.getVersionNumber(); + @Override + @Nonnull + public String vcfVersion() { + return m_vcfVersion.versionNumber(); } - @Nonnull - public ImmutableList getLines() { + @Override + @Nonnull + public ImmutableList lines() { return m_lines; } - @Nonnull - public ImmutableMap getAlt() { + @Override + @Nonnull + public ImmutableMap alt() { return m_alt; } - @Nonnull - public ImmutableMap getFilter() { + @Override + @Nonnull + public ImmutableMap filter() { return m_filter; } - @Nonnull - public ImmutableMap getFormat() { + @Override + @Nonnull + public ImmutableMap format() { return m_format; } - @Nonnull - public ImmutableMap getInfo() { + @Override + @Nonnull + public ImmutableMap info() { return m_info; } - @Nonnull - public ImmutableMap getSample() { + @Override + @Nonnull + public ImmutableMap sample() { return m_sample; } - @Nonnull - public ImmutableMap getContig() { + @Override + @Nonnull + public ImmutableMap contig() { return m_contig; } - @Nonnull - public ImmutableList getPedigree() { + @Override + @Nonnull + public ImmutableList pedigree() { return m_pedigree; } - @Nonnull - public ImmutableList getAssembly() { + @Override + @Nonnull + public ImmutableList assembly() { return m_assembly; } - @Nonnull - public ImmutableList getPedigreeDb() { + @Override + @Nonnull + public ImmutableList pedigreeDb() { return m_pedigreeDb; } - @Nonnull - public VcfHeaderMetadata getHeader() { + @Override + @Nonnull + public VcfHeaderMetadata header() { return m_header; } /** - * Convenience method for {@link VcfHeaderMetadata#getSampleNames()}. + * Convenience method for {@link VcfHeaderMetadata#sampleNames()}. * @return The names of the VCF samples, in order */ @Nonnull - public ImmutableList getSampleNames() { - return m_header.getSampleNames(); + public ImmutableList sampleNames() { + return m_header.sampleNames(); } - private VcfMetadataCollection(@Nonnull Builder builder) { + protected VcfMetadataCollection(@Nonnull Builder builder) { m_vcfVersion = builder.m_vcfVersion; @@ -134,19 +140,17 @@ public static class Builder implements ObjectBuilder { private VcfVersionMetadata m_vcfVersion = null; private VcfHeaderMetadata m_header = null; - - private List m_lines = new ArrayList<>(); - - private Map m_alt = new LinkedHashMap<>(); - private Map m_filter = new LinkedHashMap<>(); - private Map m_format = new LinkedHashMap<>(); - private Map m_info = new LinkedHashMap<>(); - private Map m_sample = new LinkedHashMap<>(); - private Map m_contig = new LinkedHashMap<>(); - - private List m_pedigree = new ArrayList<>(); - private List m_assembly = new ArrayList<>(); - private List m_pedigreeDb = new ArrayList<>(); + private final List m_lines = new ArrayList<>(); + private final Map m_alt = new LinkedHashMap<>(); + private final Map m_filter = new LinkedHashMap<>(); + private final Map m_format = new LinkedHashMap<>(); + private final Map m_info = new LinkedHashMap<>(); + private final Map m_sample = new LinkedHashMap<>(); + private final Map m_contig = new LinkedHashMap<>(); + + private final List m_pedigree = new ArrayList<>(); + private final List m_assembly = new ArrayList<>(); + private final List m_pedigreeDb = new ArrayList<>(); public Builder() { @@ -157,7 +161,7 @@ public Builder() { */ public Builder(@Nonnull VcfMetadataCollection... collections) { for (VcfMetadataCollection collection : collections) { - Preconditions.checkNotNull(collection, "VcfMetadataCollection cannot be null"); + Objects.requireNonNull(collection, "VcfMetadataCollection cannot be null"); collection.m_lines.forEach(this::addLine); } } @@ -167,48 +171,61 @@ public Builder(@Nonnull VcfMetadataCollection... collections) { */ public Builder(@Nonnull Builder... builders) { for (Builder builder : builders) { - Preconditions.checkNotNull(builder, "Builder cannot be null"); + Objects.requireNonNull(builder, "Builder cannot be null"); builder.m_lines.forEach(this::addLine); } } - @SuppressWarnings("OverlyStrongTypeCast") @Nonnull public Builder addLine(@Nonnull VcfMetadata line) { - Preconditions.checkNotNull(line, "VcfMetadata cannot be null"); + Objects.requireNonNull(line, "VcfMetadata cannot be null"); m_lines.add(line); if (line instanceof VcfVersionMetadata) { - if (m_vcfVersion != null) throw new IllegalArgumentException("Duplicate VCF version line"); - m_vcfVersion = ((VcfVersionMetadata)line); + if (null != m_vcfVersion) { + throw new IllegalArgumentException("Duplicate VCF version line"); + } + m_vcfVersion = (VcfVersionMetadata)line; } if (line instanceof VcfAltMetadata) { - String id = ((VcfAltMetadata)line).getId(); - if (m_alt.containsKey(id)) throw new IllegalArgumentException("Duplicate ALT metadata with ID " + id); + String id = ((VcfAltMetadata)line).id(); + if (m_alt.containsKey(id)) { + throw new IllegalArgumentException("Duplicate ALT metadata with ID " + id); + } m_alt.put(id, (VcfAltMetadata)line); } if (line instanceof VcfFilterMetadata) { - String id = ((VcfFilterMetadata)line).getId(); - if (m_filter.containsKey(id)) throw new IllegalArgumentException("Duplicate FILTER metadata with ID " + id); + String id = ((VcfFilterMetadata)line).id(); + if (m_filter.containsKey(id)) { + throw new IllegalArgumentException("Duplicate FILTER metadata with ID " + id); + } m_filter.put(id, (VcfFilterMetadata)line); } if (line instanceof VcfFormatMetadata) { - String id = ((VcfFormatMetadata)line).getId(); - if (m_format.containsKey(id)) throw new IllegalArgumentException("Duplicate FORMAT metadata with ID " + id); + String id = ((VcfFormatMetadata)line).id(); + if (m_format.containsKey(id)) { + throw new IllegalArgumentException("Duplicate FORMAT metadata with ID " + id); + } m_format.put(id, (VcfFormatMetadata)line); } if (line instanceof VcfInfoMetadata) { - String id = ((VcfInfoMetadata)line).getId(); - if (m_info.containsKey(id)) throw new IllegalArgumentException("Duplicate INFO metadata with ID " + id); + String id = ((VcfInfoMetadata)line).id(); + if (m_info.containsKey(id)) { + throw new IllegalArgumentException("Duplicate INFO metadata with ID " + id); + } m_info.put(id, (VcfInfoMetadata) line); } if (line instanceof VcfSampleMetadata) { - String id = ((VcfSampleMetadata)line).getId(); - if (m_sample.containsKey(id)) throw new IllegalArgumentException("Duplicate SAMPLE metadata with ID " + id); + String id = ((VcfSampleMetadata)line).id(); + if (m_sample.containsKey(id)) { + throw new IllegalArgumentException("Duplicate SAMPLE metadata with ID " + id); + } m_sample.put(id, (VcfSampleMetadata)line); } if (line instanceof VcfContigMetadata) { - String id = ((VcfContigMetadata)line).getId(); - if (m_contig.containsKey(id)) throw new IllegalArgumentException("Duplicate CONTIG metadata with ID " + id); + String id = ((VcfContigMetadata)line).id(); + if (m_contig.containsKey(id)) { + throw new IllegalArgumentException("Duplicate CONTIG metadata with ID " + id); + } m_contig.put(id, (VcfContigMetadata)line); } if (line instanceof VcfPedigreeMetadata) { @@ -223,7 +240,7 @@ public Builder addLine(@Nonnull VcfMetadata line) { } if (line instanceof VcfHeaderMetadata) { if (m_header != null) throw new IllegalArgumentException("Duplicate VCF header"); - m_header = ((VcfHeaderMetadata)line); + m_header = (VcfHeaderMetadata)line; } return this; } @@ -234,8 +251,14 @@ public Builder addLine(@Nonnull VcfMetadata line) { @Nonnull @Override public VcfMetadataCollection build() { - Preconditions.checkState(m_vcfVersion != null, "VCF version was not set"); - Preconditions.checkState(m_header != null, "VCF header line (starting with a single #) was not set"); + Preconditions.checkState( + null != m_vcfVersion, + "VCF version was not set" + ); + Preconditions.checkState( + null != m_header, + "VCF header line (starting with a single #) was not set" + ); return new VcfMetadataCollection(this); } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollectionI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollectionI.java new file mode 100644 index 0000000..e50c158 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfMetadataCollectionI.java @@ -0,0 +1,49 @@ +package org.pharmgkb.parsers.vcf.model; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.pharmgkb.parsers.vcf.model.metadata.*; + +import javax.annotation.Nonnull; + +public interface VcfMetadataCollectionI { + + /** + * @return The VCF header line, minus the {@code ##vcfVersion=} + */ + @Nonnull + String vcfVersion(); + + @Nonnull + ImmutableList lines(); + + @Nonnull + ImmutableMap alt(); + + @Nonnull + ImmutableMap filter(); + + @Nonnull + ImmutableMap format(); + + @Nonnull + ImmutableMap info(); + + @Nonnull + ImmutableMap sample(); + + @Nonnull + ImmutableMap contig(); + + @Nonnull + ImmutableList pedigree(); + + @Nonnull + ImmutableList assembly(); + + @Nonnull + ImmutableList pedigreeDb(); + + @Nonnull + VcfHeaderMetadata header(); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfPosition.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfPosition.java index 74e0178..6b43282 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfPosition.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfPosition.java @@ -1,7 +1,6 @@ package org.pharmgkb.parsers.vcf.model; import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.collect.*; import org.pharmgkb.parsers.ObjectBuilder; @@ -10,22 +9,23 @@ import org.pharmgkb.parsers.model.Strand; import org.pharmgkb.parsers.vcf.model.allele.VcfAllele; import org.pharmgkb.parsers.vcf.model.allele.VcfBasesAllele; -import org.pharmgkb.parsers.vcf.model.extra.ReservedFormatProperty; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedFormatProperty; import org.pharmgkb.parsers.vcf.model.extra.VcfGenotype; -import org.pharmgkb.parsers.vcf.utils.VcfAlleleFactory; +import org.pharmgkb.parsers.vcf.factories.VcfAlleleFactory; +import org.pharmgkb.parsers.vcf.model.extra.VcfGenotypeI; import org.pharmgkb.parsers.vcf.utils.VcfPatterns; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; -import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; import javax.annotation.concurrent.NotThreadSafe; import java.lang.invoke.MethodHandles; import java.util.*; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.IntStream; /** * Stores the entire contents of a single non-metadata (and non-header) VCF line. @@ -49,12 +49,12 @@ public class VcfPosition { private final ImmutableList m_samples; @Nonnull - public ImmutableList getFormat() { + public ImmutableList format() { return m_format; } @Nonnull - public String getChromosome() { + public String chromosome() { return m_chromosome; } @@ -62,7 +62,7 @@ public String getChromosome() { * Returns the 0-based position. * @return May be -1 for telomers; nonnegative otherwise */ - public long getPosition() { + public long position() { return m_position; } @@ -70,60 +70,59 @@ public long getPosition() { * @return The 0-based locus; note that the {@link Strand} is always {@link Strand#PLUS +}. */ @Nonnull - public Locus getLocus() { + public Locus locus() { return new Locus(m_chromosome, m_position, Strand.PLUS); } @Nonnull - public ImmutableList getIds() { + public ImmutableList ids() { return m_ids; } @Nonnull - public ImmutableList getAllAlleles() { + public ImmutableList allAlleles() { // note that this isn't fast return new ImmutableList.Builder().add(m_ref).addAll(m_alts).build(); } @Nonnull - public VcfAllele getRef() { + public VcfAllele ref() { return m_ref; } @Nonnull - public ImmutableList getAlts() { + public ImmutableList alts() { return m_alts; } @Nonnull - public Optional getQuality() { + public Optional quality() { return m_quality; } @Nonnull - public ImmutableList getFilters() { + public ImmutableList filters() { return m_filters; } @Nonnull - public VcfInfo getInfo() { + public VcfInfo info() { return m_info; } @Nonnull - public ImmutableList getSamples() { + public ImmutableList samples() { return m_samples; } /** - * @return A list with one element per sample; an element is {@link Optional#empty()} iff the {@link ReservedFormatProperty#Genotype GT} is not specified for that sample + * @return A list with one element per sample; an element is {@link Optional#empty()} + * iff the {@link VcfReservedFormatProperty#Genotype GT} is not specified for that sample */ @Nonnull - public ImmutableList> getGenotypes() { - ImmutableList.Builder> builder = new ImmutableList.Builder<>(); - for (int i = 0; i < m_samples.size(); i++) { - builder.add(getGenotype(i)); - } + public ImmutableList> genotypes() { + ImmutableList.Builder> builder = new ImmutableList.Builder<>(); + IntStream.range(0, m_samples.size()).mapToObj(this::getGenotype).forEach(builder::add); return builder.build(); } @@ -131,11 +130,11 @@ public ImmutableList> getGenotypes() { * @param index Starting at 0 */ @Nonnull - public Optional getGenotype(@Nonnegative int index) { - return VcfGenotype.fromGtString(this, m_samples.get(index).get(ReservedFormatProperty.Genotype)); + public Optional getGenotype(@Nonnegative int index) { + return VcfGenotype.fromGtString(this, m_samples.get(index).get(VcfReservedFormatProperty.Genotype)); } - private VcfPosition(@Nonnull Builder builder) { + protected VcfPosition(@Nonnull Builder builder) { m_chromosome = builder.m_chromosome; m_position = builder.m_position; m_ids = ImmutableList.copyOf(builder.m_ids); @@ -148,29 +147,43 @@ private VcfPosition(@Nonnull Builder builder) { m_samples = ImmutableList.copyOf(builder.m_samples); } - @Override - public boolean equals(@Nullable Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - VcfPosition that = (VcfPosition) o; - return com.google.common.base.Objects.equal(m_position, that.m_position) && - Objects.equal(m_chromosome, that.m_chromosome) && - Objects.equal(m_ids, that.m_ids) && - Objects.equal(m_ref, that.m_ref) && - Objects.equal(m_alts, that.m_alts) && - Objects.equal(m_quality, that.m_quality) && - Objects.equal(m_filters, that.m_filters) && - Objects.equal(m_info, that.m_info) && - Objects.equal(m_format, that.m_format) && - Objects.equal(m_samples, that.m_samples); - } - - @Override - public int hashCode() { - return Objects.hashCode(m_chromosome, m_position, m_ids, m_ref, m_alts, m_quality, m_filters, m_info, m_format, m_samples); - } - - @Nonnull + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (null == obj || getClass() != obj.getClass()) { + return false; + } + final var o = (VcfPosition) obj; + return m_position == o.m_position + && Objects.equals(m_chromosome, o.m_chromosome) + && Objects.equals(m_ids, o.m_ids) + && Objects.equals(m_ref, o.m_ref) && Objects.equals(m_alts, o.m_alts) + && Objects.equals(m_quality, o.m_quality) + && Objects.equals(m_filters, o.m_filters) + && Objects.equals(m_info, o.m_info) + && Objects.equals(m_format, o.m_format) + && Objects.equals(m_samples, o.m_samples); + } + + @Override + public int hashCode() { + return Objects.hash( + m_chromosome, + m_position, + m_ids, + m_ref, + m_alts, + m_quality, + m_filters, + m_info, + m_format, + m_samples + ); + } + + @Nonnull @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -195,24 +208,23 @@ public String toString() { public static class Builder implements ObjectBuilder { // don't even both checking this; it's HUGE - private static final Range sf_forbiddenRange = Range.closed((long)-2E31, (long)-2E31 + 7); + private static final Range sf_forbiddenRange = Range.closed((long)-2.0E31, (long)-2.0E31 + 7); // these are effectively final private String m_chromosome; private long m_position; private VcfBasesAllele m_ref; - private List m_ids = new ArrayList<>(); - private List m_alts = new ArrayList<>(); + private final List m_ids = new ArrayList<>(); + private final List m_alts = new ArrayList<>(); private Optional m_quality = Optional.empty(); - private List m_filters = new ArrayList<>(); - private LinkedListMultimap m_info = LinkedListMultimap.create(); + private final List m_filters = new ArrayList<>(); + private final LinkedListMultimap m_info = LinkedListMultimap.create(); private List m_format = new ArrayList<>(); private List m_samples = new ArrayList<>(); - @SuppressWarnings("DuplicatedCode") public Builder(@Nonnull Builder builder) { - Preconditions.checkNotNull(builder, "Builder cannot be null"); + Objects.requireNonNull(builder, "Builder cannot be null"); m_chromosome = builder.m_chromosome; m_position = builder.m_position; m_ids.addAll(builder.m_ids); @@ -228,7 +240,7 @@ public Builder(@Nonnull Builder builder) { @SuppressWarnings("DuplicatedCode") public Builder(@Nonnull VcfPosition position) { - Preconditions.checkNotNull(position, "VcfPosition cannot be null"); + Objects.requireNonNull(position, "VcfPosition cannot be null"); m_chromosome = position.m_chromosome; m_position = position.m_position; m_ids.addAll(position.m_ids); @@ -243,8 +255,11 @@ public Builder(@Nonnull VcfPosition position) { } public Builder(@Nonnull String chromosome, long position, @Nonnull String ref) { - VcfAllele translatedRef = VcfAlleleFactory.translate(ref); - Preconditions.checkArgument(translatedRef instanceof VcfBasesAllele, "REF must match [ATGCNatgcn] but was " + ref); + VcfAllele translatedRef = new VcfAlleleFactory().translate(ref); + Preconditions.checkArgument( + translatedRef instanceof VcfBasesAllele, + "REF must match [ATGCNatgcn] but was " + ref + ); init(chromosome, position, (VcfBasesAllele)translatedRef); } @@ -253,8 +268,8 @@ public Builder(@Nonnull String chromosome, long position, @Nonnull VcfBasesAllel } private void init(@Nonnull String chromosome, long position, @Nonnull VcfBasesAllele ref) { - Preconditions.checkNotNull(chromosome, "Chromosome cannot be null"); - Preconditions.checkNotNull(ref, "REF cannot be null"); + Objects.requireNonNull(chromosome, "Chromosome cannot be null"); + Objects.requireNonNull(ref, "REF cannot be null"); Preconditions.checkArgument(position > -2, "Position must be -1 or higher (0-based), and -1 only indicates the position is telomeric"); Preconditions.checkArgument(!sf_forbiddenRange.contains(position), "Int types cannot be between -2E31 and -2E31+7"); m_chromosome = chromosome; @@ -279,7 +294,7 @@ public Builder addIdsIfNotPresent(@Nonnull Collection ids) { @Nonnull public Builder addId(@Nonnull String id) { - Preconditions.checkNotNull(id, "ID cannot be null"); + Objects.requireNonNull(id, "ID cannot be null"); m_ids.add(id); return this; } @@ -289,7 +304,7 @@ public Builder addId(@Nonnull String id) { */ @Nonnull public Builder addIdIfNotPresent(@Nonnull String id) { - Preconditions.checkNotNull(id, "ID cannot be null"); + Objects.requireNonNull(id, "ID cannot be null"); if (!m_ids.contains(id)) m_ids.add(id); return this; } @@ -302,13 +317,13 @@ public Builder addAlts(@Nonnull Collection alts) { @Nonnull public Builder addAlt(@Nonnull String alt) { - Preconditions.checkNotNull(alt, "ALT cannot be null"); - return addAlt(VcfAlleleFactory.translate(alt)); + Objects.requireNonNull(alt, "ALT cannot be null"); + return addAlt(new VcfAlleleFactory().translate(alt)); } @Nonnull public Builder addAlt(@Nonnull VcfAllele alt) { - Preconditions.checkNotNull(alt, "ALT cannot be null"); + Objects.requireNonNull(alt, "ALT cannot be null"); m_alts.add(alt); return this; } @@ -318,7 +333,7 @@ public Builder addAlt(@Nonnull VcfAllele alt) { */ @Nonnull public Builder setQuality(@Nonnull Optional quality) { - Preconditions.checkNotNull(quality, "Quality cannot be null"); + Objects.requireNonNull(quality, "Quality cannot be null"); m_quality = quality; return this; } @@ -337,16 +352,16 @@ public Builder addFiltersIfNotPresent(@Nonnull Collection filters) { @Nonnull public Builder addFilter(@Nonnull String filter) { - Preconditions.checkNotNull(filter, "FILTER cannot be null"); - Preconditions.checkArgument(!filter.equals("0"), "Filter cannot be 0"); + Objects.requireNonNull(filter, "FILTER cannot be null"); + Preconditions.checkArgument(!"0".equals(filter), "Filter cannot be 0"); m_filters.add(filter); return this; } @Nonnull public Builder addFilterIfNotPresent(@Nonnull String filter) { - Preconditions.checkNotNull(filter, "FILTER cannot be null"); - Preconditions.checkArgument(!filter.equals("0"), "Filter cannot be 0"); + Objects.requireNonNull(filter, "FILTER cannot be null"); + Preconditions.checkArgument(!"0".equals(filter), "Filter cannot be 0"); if (!m_filters.contains(filter)) m_filters.add(filter); return this; } @@ -375,8 +390,8 @@ public Builder putInfo(@Nonnull String key, @Nonnull Collection values) @Nonnull public Builder putInfo(@Nonnull String key, @Nonnull String value) { - Preconditions.checkNotNull(key, "INFO key cannot be null"); - Preconditions.checkNotNull(value, "INFO value cannot be null"); + Objects.requireNonNull(key, "INFO key cannot be null"); + Objects.requireNonNull(value, "INFO value cannot be null"); Preconditions.checkArgument(!value.isEmpty() || !m_info.containsKey(key), "INFO value can only be a singleton list of an empty string or a list of non-empty strings"); check("INFO key", key, VcfPatterns.SINGLE_INFO_KEY_PATTERN); @@ -392,7 +407,7 @@ public Builder addFormats(@Nonnull Collection formats) { @Nonnull public Builder addFormat(@Nonnull String format) { - Preconditions.checkNotNull(format, "FORMAT cannot be null"); + Objects.requireNonNull(format, "FORMAT cannot be null"); check("FORMAT", format, VcfPatterns.SINGLE_FORMAT_PATTERN); m_format.add(format); return this; @@ -406,7 +421,7 @@ public Builder addSamples(@Nonnull Collection samples) { @Nonnull public Builder addSample(@Nonnull VcfSample sample) { - Preconditions.checkNotNull(sample, "Sample cannot be null"); + Objects.requireNonNull(sample, "Sample cannot be null"); m_samples.add(sample); return this; } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfSample.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfSample.java index c347c44..52cc682 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfSample.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/VcfSample.java @@ -1,15 +1,14 @@ package org.pharmgkb.parsers.vcf.model; import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.collect.ImmutableCollection; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.pharmgkb.parsers.ObjectBuilder; -import org.pharmgkb.parsers.vcf.model.extra.ReservedFormatProperty; -import org.pharmgkb.parsers.vcf.utils.VcfConversionUtils; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedFormatProperty; +import org.pharmgkb.parsers.vcf.factories.VcfPropertyFactory; import org.pharmgkb.parsers.vcf.utils.VcfPatterns; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -45,16 +44,16 @@ public Optional get(@Nonnull String key) { } @Nonnull - public Optional get(@Nonnull ReservedFormatProperty key) { - return Optional.ofNullable(m_properties.get(key.getId())); + public Optional get(@Nonnull VcfReservedFormatProperty key) { + return Optional.ofNullable(m_properties.get(key.id())); } /** - * Gets the property and converts it to the correct class; see {@link VcfConversionUtils}. + * Gets the property and converts it to the correct class; see {@link VcfPropertyFactory}. */ @Nonnull - public Optional getConverted(@Nonnull ReservedFormatProperty key) { - return VcfConversionUtils.convertProperty(key, Optional.ofNullable(m_properties.get(key.getId()))); + public Optional getConverted(@Nonnull VcfReservedFormatProperty key) { + return new VcfPropertyFactory().convertProperty(key, Optional.ofNullable(m_properties.get(key.id()))); } @Nonnull @@ -70,8 +69,8 @@ public ImmutableSet keySet() { return m_properties.keySet(); } - public boolean containsKey(@Nonnull ReservedFormatProperty key) { - return m_properties.containsKey(key.getId()); + public boolean containsKey(@Nonnull VcfReservedFormatProperty key) { + return m_properties.containsKey(key.id()); } public boolean containsKey(@Nonnull String key) { @@ -99,7 +98,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; VcfSample vcfSample = (VcfSample) o; - return Objects.equal(m_properties, vcfSample.m_properties); + return Objects.equals(m_properties, vcfSample.m_properties); } @Override @@ -133,8 +132,8 @@ public Builder() { * This is a weird constructor provided only for use by the parser. */ public Builder(@Nonnull Collection keys, @Nonnull Collection values) { - Preconditions.checkNotNull(keys, "Set of keys cannot be null"); - Preconditions.checkNotNull(values, "Set of values cannot be null"); + Objects.requireNonNull(keys, "Set of keys cannot be null"); + Objects.requireNonNull(values, "Set of values cannot be null"); // apparently, trailing fields can be dropped Preconditions.checkArgument(keys.size() >= values.size(), "Number of FORMAT properties (" + keys.size() + ") is less than the number of SAMPLE values (" + values.size() + ")"); @@ -146,27 +145,27 @@ public Builder(@Nonnull Collection keys, @Nonnull Collection val } public Builder(@Nonnull Builder builder) { - Preconditions.checkNotNull(builder, "Builder cannot be null"); + Objects.requireNonNull(builder, "Builder cannot be null"); builder.m_properties.forEach((key, value) -> m_properties.put(key, value)); } public Builder(@Nonnull VcfSample sample) { - Preconditions.checkNotNull(sample, "VcfSample cannot be null"); + Objects.requireNonNull(sample, "VcfSample cannot be null"); sample.m_properties.forEach((key, value) -> m_properties.put(key, value)); } @Nonnull public Builder put(@Nonnull String key, @Nullable String value) { - Preconditions.checkNotNull(key, "Sample key cannot be null"); + Objects.requireNonNull(key, "Sample key cannot be null"); return put(key, Optional.ofNullable(value)); } @Nonnull public Builder put(@Nonnull String key, @Nonnull Optional value) { - Preconditions.checkNotNull(key, "Sample key cannot be null"); - Preconditions.checkNotNull(value, "Sample value cannot be null"); + Objects.requireNonNull(key, "Sample key cannot be null"); + Objects.requireNonNull(value, "Sample value cannot be null"); Preconditions.checkArgument(VcfPatterns.SINGLE_FORMAT_PATTERN.matcher(key).matches()); - if (key.equals(ReservedFormatProperty.Genotype.getId()) && !m_properties.isEmpty()) { + if (key.equals(VcfReservedFormatProperty.Genotype.id()) && !m_properties.isEmpty()) { sf_logger.warn("VCF specification requires GT to be the first key in the FORMAT/SAMPLE fields if it is present"); } value.ifPresent(s -> m_properties.put(key, s)); diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBasesAllele.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBasesAllele.java index a72e21c..9cc89b5 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBasesAllele.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBasesAllele.java @@ -5,6 +5,7 @@ import javax.annotation.Nonnegative; import javax.annotation.Nonnull; +import java.util.Objects; import java.util.regex.Pattern; /** @@ -18,7 +19,7 @@ public class VcfBasesAllele implements VcfAllele { @Nonnull public static VcfBasesAllele fromVcf(@Nonnull String string) { - Preconditions.checkNotNull(string, "Allele string cannot be null"); + Objects.requireNonNull(string, "Allele string cannot be null"); return new VcfBasesAllele(string); } @@ -26,9 +27,11 @@ public static VcfBasesAllele fromVcf(@Nonnull String string) { * @param string A string following the VCF specification for the REF or ALT columns */ public VcfBasesAllele(@Nonnull String string) { - Preconditions.checkNotNull(string, "Allele string cannot be null"); - Preconditions.checkArgument(sf_pattern.matcher(string).matches(), - string + " does not look like an allele"); + Objects.requireNonNull(string, "Allele string cannot be null"); + Preconditions.checkArgument( + sf_pattern.matcher(string).matches(), + string + " does not look like an allele" + ); m_string = string; } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBreakpointAllele.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBreakpointAllele.java index d6080f0..e210e68 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBreakpointAllele.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfBreakpointAllele.java @@ -25,29 +25,29 @@ public class VcfBreakpointAllele implements VcfAllele { private final Orientation m_orientation; @Nonnull - public String getReplacementString() { + public String replacementString() { return m_replacementString; } @Nonnull - public Locus getLocus() { + public Locus locus() { return m_locus; } @Nonnull - public JoinSequencePlacement getPlacement() { + public JoinSequencePlacement placement() { return m_placement; } @Nonnull - public Orientation getOrientation() { + public Orientation orientation() { return m_orientation; } @Nonnull @Override public String toVcfString() { - String locusString = handleSymbolicChromsomeName(m_locus) + ":" + (m_locus.getPosition() + 1); + String locusString = handleSymbolicChromsomeName(m_locus) + ":" + (m_locus.position() + 1); char bracket = m_orientation == Orientation.Forward? '[' : ']'; return switch (m_placement) { case Prefix -> bracket + locusString + bracket + m_replacementString; @@ -58,7 +58,7 @@ public String toVcfString() { @Nonnull public static VcfBreakpointAllele fromVcfAlt(@Nonnull String string) { - Preconditions.checkNotNull(string, "Allele string cannot be null"); + Objects.requireNonNull(string, "Allele string cannot be null"); Preconditions.checkArgument(VcfPatterns.ALT_BREAKPOINT_PATTERN.matcher(string).matches(), "Invalid VCF breakpoint " + string); Orientation orientation = string.contains("[")? Orientation.Forward : Orientation.Reverse; @@ -77,7 +77,7 @@ public static VcfBreakpointAllele fromVcfAlt(@Nonnull String string) { placement = JoinSequencePlacement.Prefix; } - locus = new Locus(handleSymbolicChromsomeName(locus), locus.getPosition() - 1, Strand.PLUS); + locus = new Locus(handleSymbolicChromsomeName(locus), locus.position() - 1, Strand.PLUS); return new VcfBreakpointAllele(replacementString, locus, placement, orientation); } @@ -85,7 +85,7 @@ public static VcfBreakpointAllele fromVcfAlt(@Nonnull String string) { * Handle unescaping. */ private static String handleSymbolicChromsomeName(@Nonnull Locus locus) { - String chrName = locus.getChromosome().getOriginalName(); + String chrName = locus.chromosome().original(); try { return VcfSymbolicAllele.fromVcfAlt(chrName).toVcfString(); } catch (IllegalArgumentException ignored) { @@ -93,12 +93,14 @@ private static String handleSymbolicChromsomeName(@Nonnull Locus locus) { } } - public VcfBreakpointAllele(@Nonnull String replacementString, @Nonnull Locus locus, - @Nonnull JoinSequencePlacement placement, @Nonnull Orientation orientation) { - Preconditions.checkNotNull(locus, "Locus cannot be null"); - Preconditions.checkNotNull(replacementString, "Replacement string cannot be null"); - Preconditions.checkNotNull(placement, "Placement cannot be null"); - Preconditions.checkNotNull(orientation, "Orientation cannot be null"); + public VcfBreakpointAllele( + @Nonnull String replacementString, @Nonnull Locus locus, + @Nonnull JoinSequencePlacement placement, @Nonnull Orientation orientation + ) { + Objects.requireNonNull(locus, "Locus cannot be null"); + Objects.requireNonNull(replacementString, "Replacement string cannot be null"); + Objects.requireNonNull(placement, "Placement cannot be null"); + Objects.requireNonNull(orientation, "Orientation cannot be null"); m_replacementString = replacementString; m_locus = locus; m_placement = placement; @@ -137,6 +139,7 @@ public enum Orientation { } public enum JoinSequencePlacement { - Prefix, Suffix + Prefix, + Suffix } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfDeletedAllele.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfDeletedAllele.java index 3aea7f4..ea62264 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfDeletedAllele.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfDeletedAllele.java @@ -13,8 +13,6 @@ public class VcfDeletedAllele implements VcfAllele, Serializable { public static final VcfDeletedAllele DELETED = new VcfDeletedAllele(); - private VcfDeletedAllele() { } - @Nonnull @Override public String toVcfString() { diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfSymbolicAllele.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfSymbolicAllele.java index ea6b771..9b65e3e 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfSymbolicAllele.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/VcfSymbolicAllele.java @@ -18,7 +18,7 @@ public class VcfSymbolicAllele implements VcfAllele { @Nonnull public static VcfSymbolicAllele fromVcfAlt(@Nonnull String string) { - Preconditions.checkNotNull(string, "Allele string cannot be null"); + Objects.requireNonNull(string, "Allele string cannot be null"); Preconditions.checkArgument(string.startsWith("<") && string.endsWith(">")); return new VcfSymbolicAllele( VcfEscapers.SYMBOLIC_ALT_ID.unescape(string.substring(1, string.length() - 1)) @@ -26,7 +26,7 @@ public static VcfSymbolicAllele fromVcfAlt(@Nonnull String string) { } public VcfSymbolicAllele(@Nonnull String id) { - Preconditions.checkNotNull(id, "Symbolic allele ID cannot be null"); + Objects.requireNonNull(id, "Symbolic allele ID cannot be null"); m_id = VcfEscapers.SYMBOLIC_ALT_ID.unescape(id); } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/package-info.java new file mode 100644 index 0000000..9f1b6aa --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/allele/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf.model.allele; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/GenotypeLikelihoods.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/GenotypeLikelihoods.java deleted file mode 100644 index e56a396..0000000 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/GenotypeLikelihoods.java +++ /dev/null @@ -1,88 +0,0 @@ -package org.pharmgkb.parsers.vcf.model.extra; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import org.pharmgkb.parsers.model.GeneralizedBigDecimal; -import org.pharmgkb.parsers.vcf.model.VcfPosition; - -import javax.annotation.Nonnegative; -import javax.annotation.Nonnull; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -/** - * Parses out the {@code GL} string into a map from {@link VcfGenotype genotypes} to their likelihoods. - * @author Douglas Myers-Turnbull - */ -public class GenotypeLikelihoods { - - private final VcfPosition m_position; - - public GenotypeLikelihoods(@Nonnull VcfPosition position) { - m_position = position; - } - - /** - * Returns a map from each {@link VcfGenotype} to its likelihood, or {@link Optional#empty()} if the {@code GL} is not present. - * TODO Fix - */ - @Nonnull - public Optional> getLikelihoods(@Nonnegative int index) { - - VcfGenotype genotype = m_position.getGenotype(index).orElse(null); - int ploidy = genotype==null? 2 : genotype.ploidy(); // VCF spec says assume diploid - boolean isPhased = genotype != null && genotype.isPhased(); - - List likelihoods = - (List) m_position.getSamples() - .get(index) - .getConverted(ReservedFormatProperty.GenotypeLikelihoods) - .orElse(null); - if (likelihoods == null) return Optional.empty(); - - List genotypes = - ordering(ploidy, m_position.getAlts().size() + 1, ImmutableList.of(), new ArrayList<>(ploidy*(m_position.getAlts().size()+1))) - .stream() - .map(l -> new VcfGenotype.Builder(m_position, isPhased) - .requirePloidy(ploidy) -// .addAlleles() // TODO - .build() - ).collect(Collectors.toList()); - - ImmutableMap.Builder map = new ImmutableMap.Builder<>(); - if (genotypes.size() != likelihoods.size()) { - throw new IllegalArgumentException("Length of GL does not match length expected from GT"); - } - for (int i = 0; i < genotypes.size(); i++) { - map.put(genotypes.get(i), likelihoods.get(i)); - } - - return Optional.of(ImmutableMap.copyOf(map.build())); - } - - private List> ordering(int p, int n, List suffix, List> results) { - /* - Ordering (P , N , suffix =""): - for a in 0 . . . N - if (P == 1) println str (a) + suffix - if (P > 1) Ordering (P -1, a, str (a) + suffix ) - */ - for (int i = 0; i < n; i++) { - ImmutableList appended = new ImmutableList.Builder() - .add(i).addAll(suffix) - .build(); - if (p == 1) results.add(appended); - if (p > 1) return ordering(p - 1, n, appended, results); - } - return results; - } - - @Override - public String toString() { - return "GenotypeLikelihoods{" + - "position=" + m_position + - '}'; - } -} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/AltStructuralVariant.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfAltStructuralVariant.java similarity index 56% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/AltStructuralVariant.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfAltStructuralVariant.java index 45b433f..50bc990 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/AltStructuralVariant.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfAltStructuralVariant.java @@ -2,13 +2,12 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableList; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedStructuralVariantCode; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; import javax.annotation.concurrent.Immutable; -import java.util.ArrayList; import java.util.List; -import java.util.Optional; /** * A strictly validated VCF metadata ALT code of the form: @@ -16,7 +15,7 @@ * ##ALT= * } * Where {@code ID} is a colon-delimited list of identifiers. Some of these identifiers are reserved, as coded in the - * {@link ReservedStructuralVariantCode} class. The first identifier (at level 0) is required to be reserved. + * {@link VcfReservedStructuralVariantCode} class. The first identifier (at level 0) is required to be reserved. * As explicitly stated in the spec, these codes are case-sensitive. * Example: * {@code @@ -29,55 +28,44 @@ * @author Douglas Myers-Turnbull */ @Immutable -public class AltStructuralVariant { +public class VcfAltStructuralVariant { - public AltStructuralVariant(@Nonnull List components) { + public VcfAltStructuralVariant(@Nonnull List components) { if (components instanceof ImmutableList) { - //noinspection AssignmentOrReturnOfFieldWithMutableType - m_components = ((ImmutableList)components); + //noinspection AssignmentOrReturnOfFieldWithMutableType + m_components = (ImmutableList)components; } else { m_components = ImmutableList.copyOf(components); } + m_topLevel = m_components.stream() + .findFirst() + .flatMap(VcfReservedStructuralVariantCode::fromId) + .orElseThrow(() -> new IllegalArgumentException( + "Top-level structural variant code must be a top-level reserved code (e.g. DEL or CNV)" + )); } private static final Splitter sf_colon = Splitter.on(":"); private final ImmutableList m_components; - private ReservedStructuralVariantCode m_topLevel; // effectively final + private final VcfReservedStructuralVariantCode m_topLevel; /** * @param string The full code (e.g. INS:ME:LINE:type-a1) */ - public AltStructuralVariant(@Nonnull String string) { - - if (string.isEmpty()) { - throw new IllegalArgumentException("Structural variant code must not be empty"); - } - + public VcfAltStructuralVariant(@Nonnull String string) { + assert !string.isEmpty() : "Structural variant code must not be empty"; List components = sf_colon.splitToList(string); - List comps = new ArrayList<>(components.size()); - - //noinspection NonConstantStringShouldBeStringBuffer - String stringFromTop = ""; - int level = 0; - // TODO: This is extremely confusing - for (; level < components.size(); level++) { - stringFromTop += components.get(level); - Optional topLevel = ReservedStructuralVariantCode.fromId(stringFromTop); - topLevel.ifPresent(reservedStructuralVariantCode -> m_topLevel = reservedStructuralVariantCode); - comps.add(components.get(level)); - } - - // Make sure the top-level code exists - if (m_topLevel == null) { - throw new IllegalArgumentException("Top-level structural variant code must be a top-level reserved code (e.g. DEL or CNV)"); - } - - m_components = ImmutableList.copyOf(comps); + m_topLevel = components.stream().findFirst() + .flatMap(VcfReservedStructuralVariantCode::fromId) + .orElseThrow(() -> new IllegalArgumentException( + "Top-level structural variant code must be a top-level reserved code (e.g. DEL or CNV)" + )); + m_components = ImmutableList.copyOf(components); } @Nonnull - public ReservedStructuralVariantCode getTopLevelCode() { + public VcfReservedStructuralVariantCode topLevelCode() { return m_topLevel; } @@ -85,7 +73,7 @@ public ReservedStructuralVariantCode getTopLevelCode() { * @return The list of codes in order from level 0 to level n; for example ("INS", "ME", "LINE") */ @Nonnull - public ImmutableList getComponents() { + public ImmutableList components() { return m_components; } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotype.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotype.java index aa79bc3..8a7c9eb 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotype.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotype.java @@ -8,7 +8,7 @@ import org.pharmgkb.parsers.model.Locus; import org.pharmgkb.parsers.vcf.model.VcfPosition; import org.pharmgkb.parsers.vcf.model.allele.VcfAllele; -import org.pharmgkb.parsers.vcf.utils.VcfAlleleFactory; +import org.pharmgkb.parsers.vcf.factories.VcfAlleleFactory; import javax.annotation.Nonnegative; import javax.annotation.Nonnull; @@ -19,17 +19,8 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; -/** - * A multiploid genotype in VCF. - * Note that this class has both a builder and static factory methods. - * Example use: - * {@code - * VcfGenotype genotype = VcfGenotype.fromGtString(position, sample.get(ReservedFormatProperty.Genotype)); - * } - * @author Douglas Myers-Turnbull - */ @Immutable -public class VcfGenotype { +public class VcfGenotype implements VcfGenotypeI { private final ImmutableList> m_alleles; @@ -40,9 +31,9 @@ public class VcfGenotype { private final boolean m_isPhased; @Nonnull - public static Optional fromGtString(@Nonnull VcfPosition position, @Nonnull Optional gtString) { - Preconditions.checkNotNull(position, "VcfPosition cannot be null"); - Preconditions.checkNotNull(gtString, "Genotype string cannot be null"); + public static Optional fromGtString(@Nonnull VcfPosition position, @Nonnull Optional gtString) { + Objects.requireNonNull(position, "VcfPosition cannot be null"); + Objects.requireNonNull(gtString, "Genotype string cannot be null"); return gtString.map(gt -> new VcfGenotype.Builder(position, gt.contains("|")) .addAlleles(gt) .build() @@ -50,41 +41,46 @@ public static Optional fromGtString(@Nonnull VcfPosition position, } @Nonnull - public static VcfGenotype fromGtString(@Nonnull VcfPosition position, @Nonnull String gtString) { - Preconditions.checkNotNull(position, "VcfPosition cannot be null"); - Preconditions.checkNotNull(gtString, "Genotype string cannot be null"); + public static VcfGenotypeI fromGtString(@Nonnull VcfPosition position, @Nonnull String gtString) { + Objects.requireNonNull(position, "VcfPosition cannot be null"); + Objects.requireNonNull(gtString, "Genotype string cannot be null"); return new VcfGenotype.Builder(position, gtString.contains("|")) .addAlleles(gtString) .build(); } - private VcfGenotype(@Nonnull Builder builder) { + protected VcfGenotype(@Nonnull Builder builder) { m_isPhased = builder.m_isPhased; m_alleles = ImmutableList.copyOf(builder.m_alleles); m_indices = ImmutableList.copyOf(builder.m_indices); } - @Nonnegative + @Override + @Nonnegative public int ploidy() { return m_alleles.size(); } - @Nonnull - public ImmutableList> getAlleles() { + @Override + @Nonnull + public ImmutableList> alleles() { return m_alleles; } - @Nonnull - public ImmutableList getIndices() { + @Override + @Nonnull + public ImmutableList indices() { return m_indices; } - @Nonnull + @Override + @Nonnull public Optional getAllele(@Nonnegative int index) { return m_alleles.get(index); } - public boolean isPhased() { + @Override + public boolean isPhased() { return m_isPhased; } @@ -113,11 +109,8 @@ public int hashCode() { return Objects.hash(m_alleles, m_indices, m_isPhased); } - /** - * @return A string like {@code A||*}; - * in other words, like {@link #toVcfString()} except with {@link VcfAllele#toVcfString()} instead of the numeric index. - */ - @Nonnull + @Override + @Nonnull public String toSimpleString() { String delimiter = m_isPhased? "|" : "/"; return m_alleles.stream() @@ -125,10 +118,8 @@ public String toSimpleString() { .collect(Collectors.joining(delimiter)); } - /** - * @return A string like {@code 0|1} or {@code 0/0}. - */ - @Nonnull + @Override + @Nonnull public String toVcfString() { String delimiter = m_isPhased? "|" : "/"; return m_indices.stream() @@ -137,7 +128,7 @@ public String toVcfString() { } @NotThreadSafe - public static class Builder implements ObjectBuilder { + public static class Builder implements ObjectBuilder { private static final Splitter sf_slashOrBar = Splitter.on(Pattern.compile("[|/]")); @@ -150,28 +141,28 @@ public static class Builder implements ObjectBuilder { private List m_indices; // indicies in REF and ALT, REF=0, first ALT=1, ... public Builder(@Nonnull VcfPosition position, boolean isPhased) { - Preconditions.checkNotNull(position, "VcfPosition cannot be null"); + Objects.requireNonNull(position, "VcfPosition cannot be null"); m_isPhased = isPhased; m_ploidy = Optional.empty(); - m_knownAlleles = position.getAllAlleles(); - m_locus = position.getLocus(); + m_knownAlleles = position.allAlleles(); + m_locus = position.locus(); m_alleles = new ArrayList<>(2); m_indices = new ArrayList<>(2); } public Builder(@Nonnull VcfGenotype genotype, @Nonnull VcfPosition position) { - Preconditions.checkNotNull(genotype, "Genotype cannot be null"); - Preconditions.checkNotNull(position, "VcfPosition cannot be null"); + Objects.requireNonNull(genotype, "Genotype cannot be null"); + Objects.requireNonNull(position, "VcfPosition cannot be null"); m_ploidy = Optional.of(genotype.ploidy()); - m_knownAlleles = position.getAllAlleles(); - m_locus = position.getLocus(); + m_knownAlleles = position.allAlleles(); + m_locus = position.locus(); m_alleles = new ArrayList<>(genotype.m_alleles); m_indices = new ArrayList<>(genotype.m_indices); m_isPhased = genotype.m_isPhased; } public Builder(@Nonnull Builder builder) { - Preconditions.checkNotNull(builder, "Builder cannot be null"); + Objects.requireNonNull(builder, "Builder cannot be null"); m_ploidy = builder.m_ploidy; m_knownAlleles = builder.m_knownAlleles; m_locus = builder.m_locus; @@ -204,8 +195,8 @@ public Builder addNullAllele() { @Nonnull public Builder addAllele(@Nonnull String allele) { - Preconditions.checkNotNull(allele, "Allele cannot be null"); - return addAlleles(VcfAlleleFactory.translate(allele)); + Objects.requireNonNull(allele, "Allele cannot be null"); + return addAlleles(new VcfAlleleFactory().translate(allele)); } /** @@ -214,7 +205,7 @@ public Builder addAllele(@Nonnull String allele) { @Nonnull public Builder addAlleles(@Nonnull VcfAllele... alleles) { for (VcfAllele allele : alleles) { - Preconditions.checkNotNull(allele, "Allele cannot be null"); + Objects.requireNonNull(allele, "Allele cannot be null"); int index = m_knownAlleles.indexOf(allele); Preconditions.checkArgument(index > 0, "Allele " + allele + " not contained in position " + m_locus); @@ -243,7 +234,7 @@ public Builder addAlleles(@Nonnull int... indices) { */ @Nonnull public Builder addAlleles(@Nonnull Collection indices) { - Preconditions.checkNotNull(indices, "Indicies collection cannot be null"); + Objects.requireNonNull(indices, "Indicies collection cannot be null"); List alleles = m_knownAlleles; for (int index : indices) { Preconditions.checkArgument(index > -1 && index < alleles.size(), @@ -266,7 +257,7 @@ public Builder removeAlleles(@Nonnull int... indices) { */ @Nonnull public Builder removeAlleles(@Nonnull Collection indices) { - Preconditions.checkNotNull(indices, "Indicies collection cannot be null"); + Objects.requireNonNull(indices, "Indicies collection cannot be null"); for (int index : indices) m_alleles.remove(index); m_indices.removeAll(indices); return this; @@ -277,7 +268,7 @@ public Builder removeAlleles(@Nonnull Collection indices) { */ @Nonnull @Override - public VcfGenotype build() { + public VcfGenotypeI build() { Preconditions.checkState( m_ploidy.isEmpty() || m_alleles.size() != m_ploidy.get(), "Required ploidy " + m_ploidy + " but got " + m_alleles.size() @@ -288,7 +279,7 @@ public VcfGenotype build() { @Nonnull public Builder addAlleles(@Nonnull String gtString) { - Preconditions.checkNotNull(gtString, "Genotype string cannot be null"); + Objects.requireNonNull(gtString, "Genotype string cannot be null"); boolean hasBar = gtString.contains("|"); boolean hasSlash = gtString.contains("/"); @@ -300,7 +291,7 @@ public Builder addAlleles(@Nonnull String gtString) { ); for (String s : sf_slashOrBar.splitToList(gtString)) { - if (s.equals(".")) { + if (".".equals(s)) { addNullAllele(); } else { try { diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeI.java new file mode 100644 index 0000000..0268877 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeI.java @@ -0,0 +1,46 @@ +package org.pharmgkb.parsers.vcf.model.extra; + +import com.google.common.collect.ImmutableList; +import org.pharmgkb.parsers.vcf.model.allele.VcfAllele; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.util.Optional; + +/** + * A multiploid genotype in VCF. + * Note that this class has both a builder and static factory methods. + * Example use: + * {@code + * VcfGenotype genotype = VcfGenotype.fromGtString(position, sample.get(ReservedFormatProperty.Genotype)); + * } + * @author Douglas Myers-Turnbull + */ +public interface VcfGenotypeI { + @Nonnegative + int ploidy(); + + @Nonnull + ImmutableList> alleles(); + + @Nonnull + ImmutableList indices(); + + @Nonnull + Optional getAllele(@Nonnegative int index); + + boolean isPhased(); + + /** + * @return A string like {@code A||*}; + * in other words, like {@link #toVcfString()} except with {@link VcfAllele#toVcfString()} instead of the numeric index. + */ + @Nonnull + String toSimpleString(); + + /** + * @return A string like {@code 0|1} or {@code 0/0}. + */ + @Nonnull + String toVcfString(); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihood.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihood.java new file mode 100644 index 0000000..8fa7e06 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihood.java @@ -0,0 +1,85 @@ +package org.pharmgkb.parsers.vcf.model.extra; + +import com.google.common.collect.ImmutableCollection; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import org.pharmgkb.parsers.model.GeneralizedBigDecimalI; + +import javax.annotation.Nonnull; +import java.util.*; + +public class VcfGenotypeLikelihood extends AbstractMap { + private final ImmutableMap map; + + public VcfGenotypeLikelihood(ImmutableMap map) { + this.map = map; + } + + public ImmutableMap map() { + return map; + } + + public boolean isEmpty() { + return map.isEmpty(); + } + + public boolean containsKey(@Nonnull VcfGenotypeI key) { + return map.containsKey(key); + } + + public boolean containsValue(@Nonnull GeneralizedBigDecimalI value) { + return map.containsValue(value); + } + + @Nonnull + public GeneralizedBigDecimalI get(@Nonnull VcfGenotypeI key) { + GeneralizedBigDecimalI value = map.get(key); + if (null == value) { + throw new NoSuchElementException("No value for key " + key); + } + return value; + } + + @Nonnull + public GeneralizedBigDecimalI getOrDefault( + @Nonnull VcfGenotypeI key, + @Nonnull GeneralizedBigDecimalI defaultValue + ) { + return map.getOrDefault(key, defaultValue); + } + + @Nonnull + public ImmutableSet> entrySet() { + return map.entrySet(); + } + + @Nonnull + public ImmutableSet keySet() { + return map.keySet(); + } + + @Nonnull + public ImmutableCollection values() { + return map.values(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + VcfGenotypeLikelihood that = (VcfGenotypeLikelihood) o; + return Objects.equals(map, that.map); + } + + @Override + public int hashCode() { + return Objects.hash(map); + } + + @Override + public String toString() { + return new StringJoiner(", ", VcfGenotypeLikelihood.class.getSimpleName() + "[", "]") + .add("map=" + map) + .toString(); + } +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractor.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractor.java new file mode 100644 index 0000000..959c74e --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractor.java @@ -0,0 +1,109 @@ +package org.pharmgkb.parsers.vcf.model.extra; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.pharmgkb.parsers.model.GeneralizedBigDecimal; +import org.pharmgkb.parsers.vcf.model.VcfPosition; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedFormatProperty; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.stream.IntStream; + +/** + * Parses out the {@code GL} string into a map from {@link VcfGenotype genotypes} to their likelihoods. + * @author Douglas Myers-Turnbull + */ +public class VcfGenotypeLikelihoodExtractor implements VcfGenotypeLikelihoodExtractorI { + + private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final VcfPosition m_position; + + public VcfGenotypeLikelihoodExtractor(@Nonnull VcfPosition position) { + m_position = position; + sf_logger.warn("{} is not fully functional yet", getClass().getName()); + } + + /** + * Returns a map from each {@link VcfGenotype} to its likelihood, + * or {@link Optional#empty()} if the {@code GL} is not present. + * TODO Fix + */ + @Override + @Nonnull + public Optional getLikelihoods(@Nonnegative int index) { + + VcfGenotypeI genotype = m_position.getGenotype(index).orElse(null); + // VCF spec says assume diploid + int ploidy = Optional.ofNullable(genotype).map(VcfGenotypeI::ploidy).orElse(2); + boolean isPhased = genotype != null && genotype.isPhased(); + + List likelihoods = + (List) m_position.samples() + .get(index) + .getConverted(VcfReservedFormatProperty.GenotypeLikelihoods) + .orElse(null); + if (likelihoods == null) return Optional.empty(); + + List genotypes = + ordering( + ploidy, + m_position.alts().size() + 1, + ImmutableList.of(), + new ArrayList<>(ploidy*(m_position.alts().size()+1)) + ) + .stream() + .map(l -> new VcfGenotype.Builder(m_position, isPhased) + .requirePloidy(ploidy) +// .addAlleles() // TODO + .build() + ).toList(); + + ImmutableMap.Builder map = new ImmutableMap.Builder<>(); + if (genotypes.size() != likelihoods.size()) { + throw new IllegalArgumentException("Length of GL does not match length expected from GT"); + } + IntStream.range(0, genotypes.size()).forEach(i -> map.put(genotypes.get(i), likelihoods.get(i))); + + return Optional.of(new VcfGenotypeLikelihood(ImmutableMap.copyOf(map.build()))); + } + + @Override + @Nonnull + public VcfPosition position() { + return m_position; + } + + private List> ordering( + int p, int n, List suffix, List> results + ) { + /* + Ordering (P , N , suffix =""): + for a in 0 . . . N + if (P == 1) println str (a) + suffix + if (P > 1) Ordering (P -1, a, str (a) + suffix ) + */ + for (int i = 0; i < n; i++) { + ImmutableList appended = new ImmutableList.Builder() + .add(i).addAll(suffix) + .build(); + if (p == 1) results.add(appended); + if (p > 1) return ordering(p - 1, n, appended, results); + } + return results; + } + + @Override + public String toString() { + return "GenotypeLikelihoods{" + + "position=" + m_position + + '}'; + } +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractorI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractorI.java new file mode 100644 index 0000000..9403d6d --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/VcfGenotypeLikelihoodExtractorI.java @@ -0,0 +1,14 @@ +package org.pharmgkb.parsers.vcf.model.extra; + +import org.pharmgkb.parsers.vcf.model.VcfPosition; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; +import java.util.Optional; + +public interface VcfGenotypeLikelihoodExtractorI { + @Nonnull + VcfPosition position(); + @Nonnull + Optional getLikelihoods(@Nonnegative int index); +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/package-info.java new file mode 100644 index 0000000..fb7fbd0 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf.model.extra; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfContigMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfContigMetadata.java index dae58fb..364e5cb 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfContigMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfContigMetadata.java @@ -3,8 +3,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import org.pharmgkb.parsers.ObjectBuilder; -import org.pharmgkb.parsers.vcf.model.extra.ReservedStructuralVariantCode; -import org.pharmgkb.parsers.vcf.utils.PropertyMapBuilder; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedStructuralVariantCode; +import org.pharmgkb.parsers.vcf.builders.VcfPropertyMapBuilder; import org.pharmgkb.parsers.vcf.utils.VcfPatterns; import javax.annotation.Nonnegative; @@ -12,6 +12,7 @@ import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; import javax.annotation.concurrent.NotThreadSafe; +import java.util.Arrays; import java.util.Map; import java.util.Optional; @@ -33,9 +34,7 @@ public class VcfContigMetadata extends VcfIdMetadata { private static final ImmutableSet m_forbiddenIds; static { ImmutableSet.Builder builder = new ImmutableSet.Builder<>(); - for (ReservedStructuralVariantCode code : ReservedStructuralVariantCode.values()) { - builder.add(code.name()); - } + Arrays.stream(VcfReservedStructuralVariantCode.values()).map(Enum::name).forEach(builder::add); m_forbiddenIds = builder.build(); } @@ -46,11 +45,11 @@ public VcfContigMetadata(@Nonnull Map props) { super.require(ID, LENGTH); super.ensureNoExtras(ID, LENGTH, ASSEMBLY, MD5, SPECIES, TAXONOMY, URL); Preconditions.checkArgument( - VcfPatterns.CONTIG_ID_PATTERN.matcher(getId()).matches(), + VcfPatterns.CONTIG_ID_PATTERN.matcher(id()).matches(), "CONTIG ID must match " + VcfPatterns.CONTIG_ID_PATTERN.pattern() ); Preconditions.checkArgument( - !m_forbiddenIds.contains(getId()), + !m_forbiddenIds.contains(id()), "CONTIG ID cannot be a reserved structural variant code" ); m_length = Long.parseLong(props.get(LENGTH)); @@ -60,7 +59,7 @@ public VcfContigMetadata( @Nonnull String id, long length, @Nonnull String assembly, @Nullable String md5, @Nullable String species, @Nullable String taxonomy, @Nullable String url ) { - this(new PropertyMapBuilder() + this(new VcfPropertyMapBuilder() .put(ID, id) .put(LENGTH, String.valueOf(length)) .put(MD5, md5) @@ -74,32 +73,32 @@ public VcfContigMetadata( } - public long getLength() { + public long length() { return m_length; } @Nonnull - public String getAssembly() { + public String assembly() { return getPropertyRaw(ASSEMBLY).orElseThrow(() -> new IllegalStateException("Contig is missing required property " + ASSEMBLY)); } @Nonnull - public Optional getTaxonomy() { + public Optional taxonomy() { return getPropertyRaw(TAXONOMY); } @Nonnull - public Optional getSpecies() { + public Optional species() { return getPropertyUnquoted(SPECIES); } @Nonnull - public Optional getMd5() { + public Optional md5() { return getPropertyRaw(MD5); } @Nonnull - public Optional getUrl() { + public Optional url() { return getPropertyRaw(URL); } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFilterMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFilterMetadata.java index 14a0f8e..5429574 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFilterMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFilterMetadata.java @@ -22,5 +22,4 @@ public VcfFilterMetadata(@Nonnull String id, @Nonnull String description) { super(VcfMetadataType.Filter, id, description); } - } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatMetadata.java index ecf6a70..b0867dc 100755 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatMetadata.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers.vcf.model.metadata; -import org.pharmgkb.parsers.vcf.utils.PropertyMapBuilder; +import org.pharmgkb.parsers.vcf.builders.VcfPropertyMapBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,15 +29,17 @@ public class VcfFormatMetadata extends VcfIdMetadata { public VcfFormatMetadata(@Nonnull Map props) { super(VcfMetadataType.Format, props); - super.require(ID, DESCRIPTION, NUMBER, TYPE); - super.ensureNoExtras(ID, DESCRIPTION, NUMBER, TYPE); + require(ID, DESCRIPTION, NUMBER, TYPE); + ensureNoExtras(ID, DESCRIPTION, NUMBER, TYPE); m_type = VcfFormatType.valueOf(props.get(TYPE)); m_number = new VcfFormatNumber(props.get(NUMBER)); } - public VcfFormatMetadata(@Nonnull String id, long length, @Nonnull String description, @Nonnull String number, - @Nonnull VcfFormatType type) { - this(new PropertyMapBuilder() + public VcfFormatMetadata( + @Nonnull String id, long length, @Nonnull String description, @Nonnull String number, + @Nonnull VcfFormatType type + ) { + this(new VcfPropertyMapBuilder() .put(ID, id) .put(DESCRIPTION, description) .put(NUMBER, number) @@ -49,12 +51,12 @@ public VcfFormatMetadata(@Nonnull String id, long length, @Nonnull String descri } @Nonnull - public VcfFormatNumber getNumber() { + public VcfFormatNumber number() { return m_number; } @Nonnull - public VcfFormatType getType() { + public VcfFormatType type() { return m_type; } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatNumber.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatNumber.java index 11c6cf5..4b6e83a 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatNumber.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfFormatNumber.java @@ -43,7 +43,7 @@ public Optional asNumber() { @Override public String toString() { //noinspection OptionalGetWithoutIsPresent - return m_flag.map(VcfNumberFlag::getId).orElseGet(() -> String.valueOf(m_number.get())); + return m_flag.map(VcfNumberFlag::id).orElseGet(() -> String.valueOf(m_number.get())); } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfHeaderMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfHeaderMetadata.java index 34ef95b..cc5e5a6 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfHeaderMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfHeaderMetadata.java @@ -1,8 +1,6 @@ package org.pharmgkb.parsers.vcf.model.metadata; import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.pharmgkb.parsers.vcf.utils.VcfEscapers; @@ -10,6 +8,7 @@ import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; /** @@ -23,12 +22,12 @@ public class VcfHeaderMetadata implements VcfMetadata { private final ImmutableList m_sampleNames; public VcfHeaderMetadata(@Nonnull List sampleNames) { - Preconditions.checkNotNull(sampleNames, "List of sample names can be empty but not null"); + Objects.requireNonNull(sampleNames, "List of sample names can be empty but not null"); m_sampleNames = ImmutableList.copyOf(sampleNames); } @Nonnull - public ImmutableList getSampleNames() { + public ImmutableList sampleNames() { return m_sampleNames; } @@ -46,7 +45,7 @@ public boolean equals(@Nullable Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; VcfHeaderMetadata that = (VcfHeaderMetadata) o; - return Objects.equal(m_sampleNames, that.m_sampleNames); + return Objects.equals(m_sampleNames, that.m_sampleNames); } @Override diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdDescriptionMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdDescriptionMetadata.java index 59febc2..4001613 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdDescriptionMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdDescriptionMetadata.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers.vcf.model.metadata; -import org.pharmgkb.parsers.vcf.utils.PropertyMapBuilder; +import org.pharmgkb.parsers.vcf.builders.VcfPropertyMapBuilder; import javax.annotation.Nonnull; import javax.annotation.concurrent.Immutable; @@ -16,18 +16,18 @@ public abstract class VcfIdDescriptionMetadata extends VcfIdMetadata { public static final String ID = "ID"; public static final String DESCRIPTION = "Description"; - public VcfIdDescriptionMetadata(@Nonnull VcfMetadataType type, @Nonnull Map props) { + protected VcfIdDescriptionMetadata(@Nonnull VcfMetadataType type, @Nonnull Map props) { super(type, props); - super.require(ID, DESCRIPTION); - super.ensureNoExtras(ID, DESCRIPTION); + require(ID, DESCRIPTION); + ensureNoExtras(ID, DESCRIPTION); } - public VcfIdDescriptionMetadata(@Nonnull VcfMetadataType type, @Nonnull String id, @Nonnull String description) { - this(type, new PropertyMapBuilder().put(ID, id).put(DESCRIPTION, description).build()); + protected VcfIdDescriptionMetadata(@Nonnull VcfMetadataType type, @Nonnull String id, @Nonnull String description) { + this(type, new VcfPropertyMapBuilder().put(ID, id).put(DESCRIPTION, description).build()); } @Nonnull - public String getDescription() { + public String description() { //noinspection OptionalGetWithoutIsPresent return getPropertyRaw(DESCRIPTION).get(); } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdMetadata.java index 349b291..3210815 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfIdMetadata.java @@ -13,13 +13,12 @@ public abstract class VcfIdMetadata extends VcfMapMetadata { public static final String ID = "ID"; - public VcfIdMetadata(@Nonnull VcfMetadataType type, @Nonnull Map properties) { + protected VcfIdMetadata(@Nonnull VcfMetadataType type, @Nonnull Map properties) { super(type, properties); } - @SuppressWarnings("OptionalGetWithoutIsPresent") @Nonnull - public String getId() { + public String id() { return getPropertyRaw(ID).get(); } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfInfoMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfInfoMetadata.java index 8349cb6..80ae617 100755 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfInfoMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfInfoMetadata.java @@ -1,7 +1,7 @@ package org.pharmgkb.parsers.vcf.model.metadata; import org.pharmgkb.parsers.ObjectBuilder; -import org.pharmgkb.parsers.vcf.utils.PropertyMapBuilder; +import org.pharmgkb.parsers.vcf.builders.VcfPropertyMapBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,8 +35,8 @@ public class VcfInfoMetadata extends VcfIdMetadata { public VcfInfoMetadata(@Nonnull Map props) { super(VcfMetadataType.Info, props); - super.require(ID, DESCRIPTION, NUMBER, TYPE); - super.ensureNoExtras(ID, DESCRIPTION, NUMBER, TYPE, SOURCE, VERSION); + require(ID, DESCRIPTION, NUMBER, TYPE); + ensureNoExtras(ID, DESCRIPTION, NUMBER, TYPE, SOURCE, VERSION); m_type = VcfInfoType.valueOf(props.get(TYPE)); m_number = new VcfFormatNumber(props.get(NUMBER)); } @@ -46,7 +46,7 @@ public VcfInfoMetadata( @Nonnull String number, @Nonnull VcfInfoType type, @Nullable String source, @Nullable String version ) { - this(new PropertyMapBuilder() + this(new VcfPropertyMapBuilder() .put(ID, id) .put(DESCRIPTION, description) .put(NUMBER, number) @@ -60,12 +60,12 @@ public VcfInfoMetadata( } @Nonnull - public VcfFormatNumber getNumber() { + public VcfFormatNumber number() { return m_number; } @Nonnull - public VcfInfoType getType() { + public VcfInfoType type() { return m_type; } @@ -80,7 +80,12 @@ public static class Builder implements ObjectBuilder { private String m_source = null; private String m_version = null; - public Builder(@Nonnull String id, @Nonnull String description, @Nonnull String number, @Nonnull VcfInfoType type) { + public Builder( + @Nonnull String id, + @Nonnull String description, + @Nonnull String number, + @Nonnull VcfInfoType type + ) { m_id = id; m_description = description; m_number = number; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMapMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMapMetadata.java index f725027..047b869 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMapMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMapMetadata.java @@ -4,7 +4,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import org.pharmgkb.parsers.vcf.utils.PropertyMapBuilder; +import org.pharmgkb.parsers.vcf.builders.VcfPropertyMapBuilder; +import org.pharmgkb.parsers.vcf.builders.VcfPropertyMapBuilderI; import org.pharmgkb.parsers.vcf.utils.VcfEscapers; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -28,9 +29,9 @@ public abstract class VcfMapMetadata implements VcfMetadata { private final VcfMetadataType m_type; private final ImmutableMap m_properties; - public VcfMapMetadata(@Nonnull VcfMetadataType type, @Nonnull Map properties) { + protected VcfMapMetadata(@Nonnull VcfMetadataType type, @Nonnull Map properties) { m_type = type; - PropertyMapBuilder builder = new PropertyMapBuilder(); + VcfPropertyMapBuilderI builder = new VcfPropertyMapBuilder(); for (Map.Entry entry : properties.entrySet()) { String value = ifUnquoted(VcfEscapers.METADATA::unescape, entry.getValue()); builder.put(entry.getKey(), value); @@ -55,7 +56,7 @@ public Optional getPropertyRaw(@Nonnull String key) { } @Nonnull - public ImmutableSet getPropertyKeys() { + public ImmutableSet propertyKeys() { return m_properties.keySet(); } @@ -68,13 +69,13 @@ private void check() { } protected void require(@Nonnull String... names) { - Arrays.asList(names).forEach(s -> { - if (!m_properties.containsKey(s)) { - IllegalArgumentException x = new IllegalArgumentException("Missing required property " + s); - x.addSuppressed(new Exception(toString())); - throw x; - } - }); + Arrays.stream(names) + .filter(s -> !m_properties.containsKey(s)) + .map(s -> new IllegalArgumentException("Missing required property " + s)) + .forEach(x -> { + x.addSuppressed(new Exception(toString())); + throw x; + }); } /** @@ -83,19 +84,21 @@ protected void require(@Nonnull String... names) { * @param names An array of permitted property keys */ protected void ensureNoExtras(@Nonnull String... names) { - Set set = new HashSet<>(); + Set set = new HashSet<>(names.length); Collections.addAll(set, names); - m_properties.keySet().stream().filter(property -> !set.contains(property)).forEach(property -> - sf_logger.warn("Metadata line contains unexpected property {}", property)); + m_properties.keySet().stream().filter( + property -> !set.contains(property)) + .forEach(property -> + sf_logger.warn("Metadata line contains unexpected property {}", property) + ); } - @SuppressWarnings("EqualsCalledOnEnumConstant") @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; VcfMapMetadata that = (VcfMapMetadata) o; - return com.google.common.base.Objects.equal(m_type, that.m_type) && + return m_type == that.m_type && Objects.equal(m_properties, that.m_properties); } @@ -116,7 +119,7 @@ public String toString() { @Nonnull @Override public String toVcfLine() { - return "##" + m_type.getId() + "=<" + return "##" + m_type.id() + "=<" + m_properties.entrySet().stream() .map(e -> e.getKey() + "=" + ifUnquoted(VcfEscapers.METADATA::escape, e.getValue())) .collect(Collectors.joining(",")) diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadata.java index 379ce97..0bebcbe 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadata.java @@ -9,7 +9,8 @@ public interface VcfMetadata { /** - * @return The full VCF-formatted text of the line, including {@code #} or {@code ##}, and already escaped + * @return The full VCF-formatted text of the line, including {@code #} or {@code ##}, + * and already escaped */ @Nonnull String toVcfLine(); diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadataType.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadataType.java index e10fb81..0a1832b 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadataType.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfMetadataType.java @@ -33,7 +33,7 @@ public enum VcfMetadataType { } @Nonnull - public String getId() { + public String id() { return m_id; } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfNumberFlag.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfNumberFlag.java index a80559b..cd7cfd8 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfNumberFlag.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfNumberFlag.java @@ -32,7 +32,7 @@ public static Optional fromId(@Nonnull String id) { } @Nonnull - public String getId() { + public String id() { return m_id; } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfPedigreeMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfPedigreeMetadata.java index ad87740..1a169b7 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfPedigreeMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfPedigreeMetadata.java @@ -13,6 +13,6 @@ public class VcfPedigreeMetadata extends VcfIdMetadata { public VcfPedigreeMetadata(@Nonnull Map properties) { super(VcfMetadataType.Pedigree, properties); - super.require(ID); + require(ID); } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfRawMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfRawMetadata.java index 4db1abb..d476623 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfRawMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfRawMetadata.java @@ -1,13 +1,13 @@ package org.pharmgkb.parsers.vcf.model.metadata; import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; import com.google.common.base.Preconditions; import org.pharmgkb.parsers.vcf.utils.VcfEscapers; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; +import java.util.Objects; /** * A VCF metadata line that is either not mentioned in the specification or has no defined structure. @@ -19,7 +19,7 @@ public class VcfRawMetadata implements VcfMetadata { private final String m_line; public VcfRawMetadata(@Nonnull String line) { - Preconditions.checkNotNull(line, "Metadata line cannot be null"); + Objects.requireNonNull(line, "Metadata line cannot be null"); m_line = VcfEscapers.METADATA.unescape(line); } @@ -34,7 +34,7 @@ public boolean equals(@Nullable Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; VcfRawMetadata that = (VcfRawMetadata) o; - return Objects.equal(m_line, that.m_line); + return Objects.equals(m_line, that.m_line); } @Override diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfSampleMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfSampleMetadata.java index fba62e6..d096733 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfSampleMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfSampleMetadata.java @@ -1,6 +1,6 @@ package org.pharmgkb.parsers.vcf.model.metadata; -import org.pharmgkb.parsers.vcf.utils.PropertyMapBuilder; +import org.pharmgkb.parsers.vcf.builders.VcfPropertyMapBuilder; import javax.annotation.Nonnull; import java.util.Map; @@ -19,7 +19,7 @@ public VcfSampleMetadata(@Nonnull Map props) { } public VcfSampleMetadata(@Nonnull String id, @Nonnull String description) { - super(VcfMetadataType.Sample, new PropertyMapBuilder().put(ID, id).put(DESCRIPTION, description).build()); + super(VcfMetadataType.Sample, new VcfPropertyMapBuilder().put(ID, id).put(DESCRIPTION, description).build()); } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfVersionMetadata.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfVersionMetadata.java index a294a49..3364252 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfVersionMetadata.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/VcfVersionMetadata.java @@ -1,12 +1,11 @@ package org.pharmgkb.parsers.vcf.model.metadata; import com.google.common.base.MoreObjects; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.annotation.concurrent.Immutable; +import java.util.Objects; /** * The first metadata line in a VCF file, starting with {@code ##fileFormat=VCFv}. @@ -18,7 +17,7 @@ public class VcfVersionMetadata implements VcfMetadata { private final String m_versionNumber; public VcfVersionMetadata(@Nonnull String versionNumber) { - Preconditions.checkNotNull(versionNumber, "Version number cannot be null"); + Objects.requireNonNull(versionNumber, "Version number cannot be null"); m_versionNumber = versionNumber; } @@ -29,7 +28,7 @@ public String toVcfLine() { } @Nonnull - public String getVersionNumber() { + public String versionNumber() { return m_versionNumber; } @@ -38,7 +37,7 @@ public boolean equals(@Nullable Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; VcfVersionMetadata that = (VcfVersionMetadata) o; - return Objects.equal(m_versionNumber, that.m_versionNumber); + return Objects.equals(m_versionNumber, that.m_versionNumber); } @Override diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/package-info.java new file mode 100644 index 0000000..a5bbc52 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/metadata/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf.model.metadata; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/package-info.java new file mode 100644 index 0000000..0afae6c --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf.model; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedFormatProperty.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedFormatProperty.java similarity index 57% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedFormatProperty.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedFormatProperty.java index 7040c06..e5b08ba 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedFormatProperty.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedFormatProperty.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.vcf.model.extra; +package org.pharmgkb.parsers.vcf.model.reserved; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -9,10 +9,18 @@ * A FORMAT field specified as reserved in the VCF specification. * @author Douglas Myers-Turnbull */ -public enum ReservedFormatProperty implements ReservedProperty { - - Genotype("GT", "Genotype, encoded as allele values separated by either / or |.", String.class, false, "1"), - Depth("DP", "Read depth at this position for this sample.", Long.class, false, "1"), +public enum VcfReservedFormatProperty implements VcfReservedProperty { + + Genotype( + "GT", + "Genotype, encoded as allele values separated by either / or |.", + String.class, false, "1" + ), + Depth( + "DP", + "Read depth at this position for this sample.", + Long.class, false, "1" + ), Filter("FT", "Sample genotype filter indicating if this genotype was called.", String.class, false), GenotypeLikelihoods( "GL", "Genotype likelihoods comprised of comma separated floating point log10-scaled likelihoods" @@ -56,16 +64,45 @@ public enum ReservedFormatProperty implements ReservedProperty { "as listed in the Alt field (typically used in association analyses)", Long.class, true ), - MappingQuality("MQ", "RMS mapping quality, similar to the version in the Info field.", Long.class, true), + MappingQuality( + "MQ", + "RMS mapping quality, similar to the version in the Info field.", + Long.class, true + ), // structural variants - CopyNumber("CN", "Copy number genotype for imprecise events", Long.class, false, "1"), - CopyNumberGenotypeQuality("CNQ", "Copy number genotype quality for imprecise events", BigDecimal.class, false, "1"), - CopyNumberLikelihood("CNL", "Copy number genotype likelihood for imprecise events", BigDecimal.class, true, "."), - PhredScoreForNovelty("NQ", "Phred style probability score that the variant is novel", BigDecimal.class, false, "1"), - HaplotypeId("HAP", "Unique haplotype identifier", String.class, false, "1"), - AncestralHaplotypeId("AHAP", "Unique identifier of ancestral haplotype", String.class, false, "1"), ; + CopyNumber( + "CN", + "Copy number genotype for imprecise events", + Long.class, false, "1" + ), + CopyNumberGenotypeQuality( + "CNQ", + "Copy number genotype quality for imprecise events", + BigDecimal.class, false, "1" + ), + CopyNumberLikelihood( + "CNL", + "Copy number genotype likelihood for imprecise events", + BigDecimal.class, true, "." + ), + PhredScoreForNovelty( + "NQ", + "Phred style probability score that the variant is novel", + BigDecimal.class, false, "1" + ), + HaplotypeId( + "HAP", + "Unique haplotype identifier", + String.class, false, "1" + ), + AncestralHaplotypeId( + "AHAP", + "Unique identifier of ancestral haplotype", + String.class, false, "1" + ) + ; private final String m_id; @@ -75,41 +112,47 @@ public enum ReservedFormatProperty implements ReservedProperty { private final boolean m_isList; - private final Optional m_number; + @Nullable private final String m_number; - ReservedFormatProperty(@Nonnull String id, @Nonnull String description, @Nonnull Class type, boolean isList) { + VcfReservedFormatProperty(@Nonnull String id, @Nonnull String description, @Nonnull Class type, boolean isList) { this(id, description, type, isList, null); } - ReservedFormatProperty(@Nonnull String id, @Nonnull String description, @Nonnull Class type, boolean isList, @Nullable String number) { + VcfReservedFormatProperty( + @Nonnull String id, + @Nonnull String description, + @Nonnull Class type, + boolean isList, + @Nullable String number + ) { m_id = id; m_description = description; m_type = type; m_isList = isList; - m_number = Optional.ofNullable(number); + m_number = number; } @Override @Nonnull - public String getId() { + public String id() { return m_id; } @Override @Nonnull - public String getDescription() { + public String description() { return m_description; } @Override @Nonnull - public Class getType() { + public Class type() { return m_type; } @Nonnull - public Optional getNumber() { - return m_number; + public Optional number() { + return Optional.ofNullable(m_number); } @Override diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedInfoProperty.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedInfoProperty.java similarity index 92% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedInfoProperty.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedInfoProperty.java index 7cf40bf..93458b6 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedInfoProperty.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedInfoProperty.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.vcf.model.extra; +package org.pharmgkb.parsers.vcf.model.reserved; import javax.annotation.Nonnull; import java.math.BigDecimal; @@ -7,7 +7,7 @@ * An INFO field specified as reserved in the VCF specification. * @author Douglas Myers-Turnbull */ -public enum ReservedInfoProperty implements ReservedProperty { +public enum VcfReservedInfoProperty implements VcfReservedProperty { // standard @@ -92,8 +92,10 @@ public enum ReservedInfoProperty implements ReservedProperty { private final boolean m_isList; - ReservedInfoProperty(@Nonnull String id, @Nonnull String description, @Nonnull Class type, boolean isList, - @Nonnull String number) { + VcfReservedInfoProperty( + @Nonnull String id, @Nonnull String description, @Nonnull Class type, boolean isList, + @Nonnull String number + ) { m_id = id; m_description = description; m_type = type; @@ -103,24 +105,24 @@ public enum ReservedInfoProperty implements ReservedProperty { @Override @Nonnull - public String getId() { + public String id() { return m_id; } @Override @Nonnull - public String getDescription() { + public String description() { return m_description; } @Override @Nonnull - public Class getType() { + public Class type() { return m_type; } @Nonnull - public String getNumber() { + public String number() { return m_number; } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedProperty.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedProperty.java similarity index 56% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedProperty.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedProperty.java index 39b7feb..d12abc9 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedProperty.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedProperty.java @@ -1,4 +1,4 @@ -package org.pharmgkb.parsers.vcf.model.extra; +package org.pharmgkb.parsers.vcf.model.reserved; import javax.annotation.Nonnull; @@ -6,16 +6,16 @@ * A field specified as reserved in the VCF specification. * @author Douglas Myers-Turnbull */ -public interface ReservedProperty { +public interface VcfReservedProperty { @Nonnull - String getId(); + String id(); @Nonnull - String getDescription(); + String description(); @Nonnull - Class getType(); + Class type(); boolean isList(); } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedStructuralVariantCode.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedStructuralVariantCode.java similarity index 73% rename from vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedStructuralVariantCode.java rename to vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedStructuralVariantCode.java index c79e685..cdf782d 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/extra/ReservedStructuralVariantCode.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/VcfReservedStructuralVariantCode.java @@ -1,4 +1,6 @@ -package org.pharmgkb.parsers.vcf.model.extra; +package org.pharmgkb.parsers.vcf.model.reserved; + +import org.pharmgkb.parsers.vcf.model.extra.VcfAltStructuralVariant; import javax.annotation.Nonnull; import java.util.Arrays; @@ -8,10 +10,10 @@ /** * A reserved identifier for VCF ALT codes of structural variants. - * @see AltStructuralVariant + * @see VcfAltStructuralVariant * @author Douglas Myers-Turnbull */ -public enum ReservedStructuralVariantCode { +public enum VcfReservedStructuralVariantCode { Deletion("DEL"), Insertion("INS"), @@ -25,7 +27,7 @@ public enum ReservedStructuralVariantCode { private final List m_codes; @Nonnull - public static Optional fromId(@Nonnull String id) { + public static Optional fromId(@Nonnull String id) { return switch (id) { case "DEL" -> Optional.of(Deletion); case "INS" -> Optional.of(Insertion); @@ -39,7 +41,7 @@ public static Optional fromId(@Nonnull String id) }; } - ReservedStructuralVariantCode(@Nonnull String... codes) { + VcfReservedStructuralVariantCode(@Nonnull String... codes) { m_codes = Arrays.asList(codes); } @@ -47,12 +49,12 @@ public static Optional fromId(@Nonnull String id) * @return The code (e.g. CNV) */ @Nonnull - public String getId() { + public String id() { return String.join(":", m_codes); } @Nonnull - public List getCodes() { + public List codes() { return Collections.unmodifiableList(m_codes); } } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/package-info.java new file mode 100644 index 0000000..7793a7f --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/model/reserved/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf.model.reserved; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/package-info.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/package-info.java new file mode 100644 index 0000000..a86d9f5 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/package-info.java @@ -0,0 +1 @@ +package org.pharmgkb.parsers.vcf; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/PropertyMapBuilder.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/PropertyMapBuilder.java deleted file mode 100644 index 511af01..0000000 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/PropertyMapBuilder.java +++ /dev/null @@ -1,44 +0,0 @@ -package org.pharmgkb.parsers.vcf.utils; - - -import com.google.common.base.Preconditions; -import com.google.common.collect.ImmutableMap; -import org.pharmgkb.parsers.ObjectBuilder; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import javax.annotation.concurrent.NotThreadSafe; -import java.util.Map; -import java.util.Optional; - -/** - * A builder for string-to-string maps. - * @author Douglas Myers-Turnbull - */ -@NotThreadSafe -public class PropertyMapBuilder extends ImmutableMap.Builder implements ObjectBuilder> { - - public PropertyMapBuilder() {} - - public PropertyMapBuilder(@Nonnull Map map) { - map.forEach(this::put); - } - - @Nonnull - public PropertyMapBuilder put(@Nonnull String key, @Nonnull Optional value) { - Preconditions.checkNotNull(key, "Key cannot be null"); - Preconditions.checkNotNull(value, "Value cannot be null"); - value.ifPresent(s -> super.put(key, s)); - return this; - } - - @Nonnull - @Override - public PropertyMapBuilder put(@Nonnull String key, @Nullable String value) { - Preconditions.checkNotNull(key, "Key cannot be null"); - if (value != null) { - super.put(key, value); - } - return this; - } -} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfConversionUtils.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfConversionUtils.java deleted file mode 100644 index 3f18ad3..0000000 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfConversionUtils.java +++ /dev/null @@ -1,126 +0,0 @@ -package org.pharmgkb.parsers.vcf.utils; - -import org.pharmgkb.parsers.model.GeneralizedBigDecimal; -import org.pharmgkb.parsers.vcf.model.extra.ReservedProperty; -import org.pharmgkb.parsers.vcf.model.metadata.VcfFormatType; -import org.pharmgkb.parsers.vcf.model.metadata.VcfInfoType; - -import javax.annotation.Nonnull; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; - -/** - * Static methods for converting VCF strings to their expected types. - * @author Douglas Myers-Turnbull - */ -public class VcfConversionUtils { - - private VcfConversionUtils() {} - - /** - * Converts a String representation of a property into a more useful type. - * Specifically, can return: - *
    - *
  • String
  • - *
  • Long
  • - *
  • GeneralizedBigDecimal
  • - *
  • The Boolean true (for flags)
  • - *
  • A List of any of the above types
  • - *
- */ - @Nonnull - public static Optional convertProperty(@Nonnull ReservedProperty key, @Nonnull Optional value) { - return convertProperty(key.getType(), value, key.isList()); - } - - /** - * @see #convertProperty(ReservedProperty, Optional) - */ - @SuppressWarnings("unchecked") - @Nonnull - public static Optional convertProperty(@Nonnull Class clas, @Nonnull Optional value, boolean isList) { - if (value.isEmpty()) { - return Optional.empty(); - } - if (!isList) { - try { - return Optional.of((T) convertElement(clas, value)); - } catch (ClassCastException e) { - throw new IllegalArgumentException("Wrong type specified", e); - } - } - List list = new ArrayList<>(64); - for (String part : value.get().split(",")) { - list.add(convertElement(clas, Optional.of(part))); - } - try { - return Optional.of((T) list); - } catch (ClassCastException e) { - throw new IllegalArgumentException("Wrong type specified", e); - } - } - - @Nonnull - public static Optional convertProperty(@Nonnull VcfFormatType type, @Nonnull Optional value) { - Class clas = switch (type) { - case Integer -> Long.class; - case Float -> GeneralizedBigDecimal.class; - case Character -> Character.class; - case String -> String.class; - }; - return convertProperty(clas, value, false); - } - - @Nonnull - public static Optional convertProperty(@Nonnull VcfInfoType type, @Nonnull Optional value) { - Class clas = switch (type) { - case Integer -> Long.class; - case Float -> GeneralizedBigDecimal.class; - case Character -> Character.class; - case String -> String.class; - case Flag -> Boolean.class; - }; - return convertProperty(clas, value, false); - } - - @Nonnull - private static Optional convertElement(@Nonnull Class clas, @Nonnull Optional value) { - if (value.isEmpty()) { - return Optional.empty(); - } - String val = value.get(); - if (clas == String.class) { - return Optional.of(value); - } else if (clas == Character.class) { - if (val.length() == 1) { - return Optional.of(value); - } else { - throw new IllegalArgumentException("Invalid character value '" + value + "'"); - } - } else if (clas == Boolean.class) { - if (val.equals("0") || val.equalsIgnoreCase("false")) { - return Optional.of(false); - } - if (val.equals("1") || val.equalsIgnoreCase("true")) { - return Optional.of(true); - } - throw new IllegalArgumentException("Invalid boolean value: '" + value + "'"); - - } else if (clas == GeneralizedBigDecimal.class) { - try { - return Optional.of(new GeneralizedBigDecimal(val)); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Expected float; got " + value); - } - } else if (clas == Long.class) { - try { - return Optional.of(Long.parseLong(val)); - } catch (NumberFormatException e) { - throw new IllegalArgumentException("Expected integer; got " + value); - } - } - throw new UnsupportedOperationException("Type " + clas + " unrecognized"); - } - -} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfEscapers.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfEscapers.java index 62fcde9..8202298 100644 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfEscapers.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfEscapers.java @@ -9,7 +9,7 @@ * Therefore, use this class only if you're implementing a new VCF parser. * @author Douglas Myers-Turnbull */ -public class VcfEscapers { +public final class VcfEscapers { private VcfEscapers() {} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfPatterns.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfPatterns.java index 89146b6..9af9c8b 100755 --- a/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfPatterns.java +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/utils/VcfPatterns.java @@ -6,7 +6,7 @@ * Contains static methods for regex in VCF. * @author Douglas Myers-Turnbull */ -public class VcfPatterns { +public final class VcfPatterns { private VcfPatterns() {} @@ -19,8 +19,8 @@ private VcfPatterns() {} private static final String sf_simpleAltPattern = "(?:" + // wrap the whole expression "(?:" + // allow nucleotides, symbolic IDs, or both - "(?:[AaCcGgTtNn]+)" + // nucleotides - "|(?:<.+>)" + // symbolic IDs (declared in ALT metadata) + "[AaCcGgTtNn]+" + // nucleotides + "|<.+>" + // symbolic IDs (declared in ALT metadata) ")+" + // allow things like C (apparently) "|\\*" + // indicates that the position doesn't exist due to an upstream deletion ")"; diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfInvalidProperty.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfInvalidProperty.java new file mode 100644 index 0000000..319625f --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfInvalidProperty.java @@ -0,0 +1,15 @@ +package org.pharmgkb.parsers.vcf.validation; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.Immutable; + +/** + * An aspect of a VCF position that is wrong because it contradicts the metadata. + */ +@Immutable +public record VcfInvalidProperty( + @Nonnull String chromosome, + long position, + @Nonnull String key, + @Nonnull VcfPropertyType source +) { } diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfPropertyType.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfPropertyType.java new file mode 100644 index 0000000..1f34126 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfPropertyType.java @@ -0,0 +1,11 @@ +package org.pharmgkb.parsers.vcf.validation; + +/** + * What property is wrong: INFO, FORMAT, FILTER, or SAMPLE. + */ +public enum VcfPropertyType { + INFO, + FORMAT, + FILTER, + SAMPLE +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidationException.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidationException.java new file mode 100644 index 0000000..fbee554 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidationException.java @@ -0,0 +1,38 @@ +package org.pharmgkb.parsers.vcf.validation; + +import java.io.NotSerializableException; +import java.io.Serial; + +/** + * An exception caused by an {@link VcfInvalidProperty}. + */ +public class VcfValidationException extends RuntimeException { + + private final VcfInvalidProperty m_invalid; + + public VcfInvalidProperty property() { + return m_invalid; + } + + public VcfValidationException(VcfInvalidProperty error) { + super("Bad " + + error.source() + + ": \"" + + error.key() + + "\" for position " + + error.chromosome() + + ":" + + error.position()); + m_invalid = error; + } + + @Serial + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + throw new NotSerializableException("org.pharmgkb.parsers.vcf.validation.VcfValidator.ValidationException"); + } + + @Serial + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + throw new NotSerializableException("org.pharmgkb.parsers.vcf.validation.VcfValidator.ValidationException"); + } +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidator.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidator.java new file mode 100644 index 0000000..ba407e1 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidator.java @@ -0,0 +1,122 @@ +package org.pharmgkb.parsers.vcf.validation; + +import org.pharmgkb.parsers.ObjectBuilder; +import org.pharmgkb.parsers.vcf.model.VcfMetadataCollection; +import org.pharmgkb.parsers.vcf.model.VcfPosition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnull; +import javax.annotation.concurrent.NotThreadSafe; +import javax.annotation.concurrent.ThreadSafe; +import java.lang.invoke.MethodHandles; +import java.util.Objects; +import java.util.function.Consumer; + +/** + * Checks errors arising from a contradiction between metadata and VCF positions. + * This class is implemented to {@link Consumer consume} a {@link VcfPosition VcfPositions} and perform a specified action for each error found. + * The recommended use is with {@link java.util.stream.Stream#peek(Consumer)} before reading or before writing (but before both is likely unnecessary). + * @author Douglas Myers-Turnbull + */ +@ThreadSafe +public class VcfValidator implements VcfValidatorI { + + private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final Consumer m_action; + + private final VcfMetadataCollection m_metadata; + + protected VcfValidator( + @Nonnull Consumer action, + @Nonnull VcfMetadataCollection metadata + ) { + m_action = action; + m_metadata = metadata; + } + + @Override + public void accept(@Nonnull VcfPosition position) { + Objects.requireNonNull(position, "VcfPosition cannot be null"); + m_metadata.sample().keySet().stream() + .filter(k -> m_metadata.header().sampleNames().contains(k)) + .map(s -> new VcfInvalidProperty(position.chromosome(), position.position(), s, VcfPropertyType.SAMPLE)) + .forEach(m_action); + m_metadata.header().sampleNames().stream() + .filter(k -> m_metadata.sample().containsKey(k)) + .map(s -> new VcfInvalidProperty(position.chromosome(), position.position(), s, VcfPropertyType.SAMPLE)) + .forEach(m_action); + position.filters().stream() + .filter(s -> !m_metadata.filter().containsKey(s)) + .map(s -> new VcfInvalidProperty(position.chromosome(), position.position(), s, VcfPropertyType.FILTER)) + .forEach(m_action); + position.format().stream() + .filter(s -> !m_metadata.format().containsKey(s)) + .map(s -> new VcfInvalidProperty(position.chromosome(), position.position(), s, VcfPropertyType.FORMAT)) + .forEach(m_action); + position.info().entries().stream() + .filter(e -> !m_metadata.format().containsKey(e.getKey())) + .map(e -> new VcfInvalidProperty(position.chromosome(), position.position(), e.getKey(), VcfPropertyType.INFO)) + .forEach(m_action); + } + + @Override + public String toString() { + return "VcfValidator{" + + "action=" + m_action + + ", metadata: " + m_metadata.lines().size() + " lines" + + '}'; + } + + @NotThreadSafe + public static class Builder implements ObjectBuilder { + + private final VcfMetadataCollection m_metadata; + + private Consumer m_action; + + /** + * The default action is to throw a {@link VcfValidationException} for the first invalid property. + */ + public Builder(@Nonnull VcfMetadataCollection metadata) { + Objects.requireNonNull(metadata, "Metadata cannot be null"); + m_action = error -> {throw new VcfValidationException(error);}; + m_metadata = metadata; + } + + /** + * Sets the {@link #setAction(Consumer) action} to logging a warning for each error. + */ + @Nonnull + public Builder warnOnly() { + m_action = error -> sf_logger.warn( + "Bad {}: \"{}\" for position {}:{}", + error.source(), + error.key(), + error.chromosome(), + error.position() + ); + return this; + } + + /** + * Replaces the action with a new one. + */ + @Nonnull + public Builder setAction( + @Nonnull Consumer action + ) { + Objects.requireNonNull(action, "Action cannot be null"); + m_action = action; + return this; + } + + @Nonnull + @Override + public VcfValidatorI build() { + return new VcfValidator(m_action, m_metadata); + } + } + +} diff --git a/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidatorI.java b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidatorI.java new file mode 100644 index 0000000..ad40b85 --- /dev/null +++ b/vcf/src/main/java/org/pharmgkb/parsers/vcf/validation/VcfValidatorI.java @@ -0,0 +1,13 @@ +package org.pharmgkb.parsers.vcf.validation; + +import org.pharmgkb.parsers.vcf.model.VcfPosition; + +import javax.annotation.Nonnull; +import java.util.function.Consumer; + +public interface VcfValidatorI extends Consumer { + + @Override + void accept(@Nonnull VcfPosition position); + +} diff --git a/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfDataIntegrationTest.java b/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfDataIntegrationTest.java index 0d8cf16..15ee3e5 100644 --- a/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfDataIntegrationTest.java +++ b/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfDataIntegrationTest.java @@ -2,16 +2,15 @@ import org.junit.jupiter.api.Test; import org.pharmgkb.parsers.vcf.model.VcfPosition; -import org.pharmgkb.parsers.vcf.model.extra.ReservedFormatProperty; +import org.pharmgkb.parsers.vcf.model.reserved.VcfReservedFormatProperty; import java.io.File; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; import java.util.Optional; -import java.util.stream.Collectors; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * @author Douglas Myers-Turnbull @@ -23,26 +22,26 @@ public void testApply() throws Exception { Path input = new File("/Users/student/genome-sequence-io/vcf/src/test/resources/org/pharmgkb/parsers/vcf/example.vcf").toPath(); - VcfDataParser parser = new VcfDataParser(); - List positions = parser.parseAll(input).collect(Collectors.toList()); + VcfDataParserI parser = new VcfDataParser(); + List positions = parser.parseAll(input).toList(); List expectedGenotypes = positions.stream() - .map(p -> p.getSamples().get(0)) - .filter(s -> s.containsKey(ReservedFormatProperty.Genotype)) - .map(s -> s.get(ReservedFormatProperty.Genotype).get()) - .collect(Collectors.toList()); + .map(p -> p.samples().get(0)) + .filter(s -> s.containsKey(VcfReservedFormatProperty.Genotype)) + .map(s -> s.get(VcfReservedFormatProperty.Genotype).get()) + .toList(); List genotypes = positions.stream() .map(p -> p.getGenotype(0)) .filter(Optional::isPresent) .map(o -> o.get().toVcfString()) - .collect(Collectors.toList()); + .toList(); for (int i = 0; i < expectedGenotypes.size(); i++) { assertEquals("Genotype " + i + " is wrong", expectedGenotypes.get(i), genotypes.get(i)); } - List expected = Files.lines(input).skip(22L).collect(Collectors.toList()); - List actual = positions.stream().map(new VcfDataWriter()).collect(Collectors.toList()); + List expected = Files.lines(input).skip(22L).toList(); + List actual = positions.stream().map(new VcfDataWriter()).toList(); assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { @@ -51,4 +50,4 @@ public void testApply() throws Exception { } -} \ No newline at end of file +} diff --git a/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfValidatorTest.java b/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfValidatorTest.java index c289856..89f72f0 100644 --- a/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfValidatorTest.java +++ b/vcf/src/test/java/org/pharmgkb/parsers/vcf/VcfValidatorTest.java @@ -1,6 +1,7 @@ package org.pharmgkb.parsers.vcf; import org.junit.jupiter.api.Test; +import org.pharmgkb.parsers.vcf.validation.VcfValidator; /** * Tests {@link VcfValidator}. @@ -12,4 +13,4 @@ public class VcfValidatorTest { public void testAccept() throws Exception { // TODO } -} \ No newline at end of file +} diff --git a/vcf/src/test/java/org/pharmgkb/parsers/vcf/model/VcfPositionTest.java b/vcf/src/test/java/org/pharmgkb/parsers/vcf/model/VcfPositionTest.java index a45b90a..6bb72d0 100644 --- a/vcf/src/test/java/org/pharmgkb/parsers/vcf/model/VcfPositionTest.java +++ b/vcf/src/test/java/org/pharmgkb/parsers/vcf/model/VcfPositionTest.java @@ -7,7 +7,7 @@ import org.pharmgkb.parsers.vcf.model.allele.VcfAllele; import org.pharmgkb.parsers.vcf.model.allele.VcfBasesAllele; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests {@link VcfPosition}. @@ -23,15 +23,15 @@ public void test() { .addFilter("filter") .build(); - assertEquals(new Locus("chr1", -1, Strand.PLUS), position.getLocus()); - assertEquals("A", position.getRef().toVcfString()); + assertEquals(new Locus("chr1", -1, Strand.PLUS), position.locus()); + assertEquals("A", position.ref().toVcfString()); assertEquals(new ImmutableList.Builder() .add(new VcfBasesAllele("T")).build(), - position.getAlts()); + position.alts()); assertEquals(new ImmutableList.Builder() .add(new VcfBasesAllele("A")) .add(new VcfBasesAllele("T")).build(), - position.getAllAlleles()); - assertEquals(new ImmutableList.Builder().add("filter").build(), position.getFilters()); + position.allAlleles()); + assertEquals(new ImmutableList.Builder().add("filter").build(), position.filters()); } -} \ No newline at end of file +}