From 1e98c6d02cefefbaa1a15db0aea64ea7518025fa Mon Sep 17 00:00:00 2001
From: Josh Holland This is an implementation of {@link HaplotypeCaller} using spark to distribute the computation.
* It is still in an early stage of development and does not yet support all the options that the non-spark version does.
- * Specifically it does not support the --dbsnp, --comp, and --bamOutput options.
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/ApplyBQSRSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/ApplyBQSRSpark.java index 202f2b474ad..6ef52516bcd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/ApplyBQSRSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/ApplyBQSRSpark.java @@ -42,7 +42,8 @@ * gatk ApplyBQSRSpark \ * -I gs://my-gcs-bucket/input.bam \ * -bqsr gs://my-gcs-bucket/recalibration.table \ - * -SQQ 10 -SQQ 20 -SQQ 30 -SQQ 40 \ + * --static-quantized-quals 10 --static-quantized-quals 20 \ + * --static-quantized-quals 30 --static-quantized-quals 40 \ * -O gs://my-gcs-bucket/output.bam \ * -- \ * --sparkRunner GCS \ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSFilterArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSFilterArgumentCollection.java index 287cb9f3602..ba427fbe17b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSFilterArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSFilterArgumentCollection.java @@ -291,10 +291,10 @@ public void doReadFilterArgumentWarnings(final GATKReadFilterPluginDescriptor pl for (final ReadFilter filter : readFilters) { if (filter.getClass().isAssignableFrom(AmbiguousBaseReadFilter.class)) { logger.warn("Detected the use of AmbiguousBaseReadFilter, which is applied before the PathSeq " + - "base masking steps. Did you mean to use --maxMaskedBases, which is applied after masking?"); + "base masking steps. Did you mean to use --max-masked-bases, which is applied after masking?"); } else if (filter.getClass().isAssignableFrom(ReadLengthReadFilter.class)) { logger.warn("Detected the use of ReadLengthReadFilter, which is applied before the PathSeq " + - "clipping steps. Did you mean to use --minClippedReadLength, which is applied after clipping?"); + "clipping steps. Did you mean to use --min-clipped-read-length, which is applied after clipping?"); } } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSScorer.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSScorer.java index bc1d4b2be79..4bdca05a5b1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSScorer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSScorer.java @@ -107,7 +107,7 @@ static JavaRDD* *> groupReadsIntoPairs(final JavaRDD p } else if (unpairedReads != null) { groupedReads = unpairedReads.map(Collections::singletonList); } else { - throw new UserException.BadInput("No reads were loaded. Ensure --pairedInput and/or --unpairedInput are set and valid."); + throw new UserException.BadInput("No reads were loaded. Ensure --paired-input and/or --unpaired-input are set and valid."); } return groupedReads; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSUtils.java index a009b8e4546..475864b36d2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSUtils.java @@ -67,7 +67,7 @@ public static int pathseqGetRecommendedNumReducers(final String inputPath, final /** * Returns a deep copy of the input header with an empty sequence dictionary, and logs warnings if the input may - * be aligned but --isHostAligned was not set to true (or vice versa). + * be aligned but --is-host-aligned was not set to true (or vice versa). */ public static SAMFileHeader checkAndClearHeaderSequences(final SAMFileHeader inputHeader, final PSFilterArgumentCollection filterArgs, final Logger logger) { @@ -79,10 +79,10 @@ public static SAMFileHeader checkAndClearHeaderSequences(final SAMFileHeader inp final SAMFileHeader header = inputHeader.clone(); if (filterArgs.alignedInput && (header.getSequenceDictionary() == null || header.getSequenceDictionary().isEmpty())) { - logger.warn("--isHostAligned is true but the BAM header contains no sequences"); + logger.warn("--is-host-aligned is true but the BAM header contains no sequences"); } if (!filterArgs.alignedInput && header.getSequenceDictionary() != null && !header.getSequenceDictionary().isEmpty()) { - logger.warn("--isHostAligned is false but there are one or more sequences in the BAM header"); + logger.warn("--is-host-aligned is false but there are one or more sequences in the BAM header"); } //Clear header sequences diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBuildReferenceTaxonomy.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBuildReferenceTaxonomy.java index 88901683c08..6b0db42de00 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBuildReferenceTaxonomy.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBuildReferenceTaxonomy.java @@ -122,7 +122,7 @@ public class PathSeqBuildReferenceTaxonomy extends CommandLineProgram { public Object doWork() { if (refseqCatalogPath == null && genbankCatalogPath == null) { - throw new UserException.BadInput("At least one of --refseqCatalogPath or --genbankCatalogPath must be specified"); + throw new UserException.BadInput("At least one of --refseq-catalog or --genbank-catalog must be specified"); } logger.info("Parsing reference and files... (this may take a few minutes)"); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBwaSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBwaSpark.java index 1a8652fd741..7f6318322c6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBwaSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqBwaSpark.java @@ -212,7 +212,7 @@ private boolean alignBam(final String inputBamPath, final PSBwaAlignerSpark alig protected void runTool(final JavaSparkContext ctx) { if (!readArguments.getReadFiles().isEmpty()) { - throw new UserException.BadInput("Please use --pairedInput or --unpairedInput instead of --input"); + throw new UserException.BadInput("Please use --paired-input or --unpaired-input instead of --input"); } final ReadsSparkSource readsSource = new ReadsSparkSource(ctx, readArguments.getReadValidationStringency()); @@ -220,7 +220,7 @@ protected void runTool(final JavaSparkContext ctx) { boolean bPairedSuccess = alignBam(inputPaired, aligner, true, ctx, readsSource); boolean bUnpairedSuccess = alignBam(inputUnpaired, aligner, false, ctx, readsSource); if (!bPairedSuccess && !bUnpairedSuccess) { - throw new UserException.BadInput("No reads were loaded. Ensure --pairedInput and/or --unpairedInput are set and valid."); + throw new UserException.BadInput("No reads were loaded. Ensure --paired-input and/or --unpaired-input are set and valid."); } aligner.close(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqPipelineSpark.java index 109a988d730..a7d89111288 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqPipelineSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqPipelineSpark.java @@ -93,7 +93,7 @@ * Local mode:
* *- * gatk PathSeqFilterSpark \ + * gatk PathSeqPipelineSpark \ * --input input_reads.bam \ * --kmer-file host_kmers.bfi \ * --filter-bwa-image host_reference.img \ @@ -112,7 +112,7 @@ ** *Spark cluster on Google Cloud DataProc with 6 16-core / 208GB memory worker nodes:
* *- * gatk PathSeqFilterSpark \ + * gatk PathSeqPipelineSpark \ * --input gs://my-gcs-bucket/input_reads.bam \ * --kmer-file hdfs://my-cluster-m:8020//host_kmers.bfi \ * --filter-bwa-image /references/host_reference.img \ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqScoreSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqScoreSpark.java index fd005628dde..0a87bbcad55 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqScoreSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PathSeqScoreSpark.java @@ -186,7 +186,7 @@ static SAMFileHeader joinBamHeaders(final SAMFileHeader pairedHeader, final SAMF protected void runTool(final JavaSparkContext ctx) { if (!readArguments.getReadFiles().isEmpty()) { - throw new UserException.BadInput("Please use --pairedInput or --unpairedInput instead of --input"); + throw new UserException.BadInput("Please use --paired-input or --unpaired-input instead of --input"); } final ReadsSparkSource readsSource = new ReadsSparkSource(ctx, readArguments.getReadValidationStringency()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java index 1d0a220f2de..151f2434222 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/CombineGVCFs.java @@ -93,7 +93,7 @@ public final class CombineGVCFs extends MultiVariantWalkerGroupedOnStart { * span across a given genomic position (e.g. when scatter-gathering jobs across a compute farm). The option below enables users to break bands at * pre-defined positions. For example, a value of 10,000 would mean that we would ensure that no bands span across chr1:10000, chr1:20000, etc. * - * Note that the --convertToBasePairResolution argument is just a special case of this argument with a value of 1. + * Note that the --convert-to-base-pair-resolution argument is just a special case of this argument with a value of 1. */ @Argument(fullName=BREAK_BANDS_LONG_NAME, doc = "If > 0, reference bands will be broken up at genomic positions that are multiples of this number", optional=true) protected int multipleAtWhichToBreakBands = 0; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java index 9a80e89bf91..6883aefab90 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/filters/VariantFiltration.java @@ -87,9 +87,9 @@ public final class VariantFiltration extends VariantWalker { /** * Any variant which overlaps entries from the provided mask file will be filtered. If the user wants logic to be reversed, - * i.e. filter variants that do not overlap with provided mask, then argument -filterNotInMask can be used. + * i.e. filter variants that do not overlap with provided mask, then argument --filter-not-in-mask can be used. * Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used - * (e.g. if masking against Hapmap, use -maskName=hapmap for the normal masking and -maskName=not_hapmap for the reverse masking). + * (e.g. if masking against Hapmap, use --mask-name=hapmap for the normal masking and --mask-name=not_hapmap for the reverse masking). */ @Argument(fullName="mask", shortName="mask", doc="Input mask", optional=true) public FeatureInput* * Created by David Benjamin on 1/31/17. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/CalculateGenotypePosteriors.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/CalculateGenotypePosteriors.java index 9e414ff6f92..e1596ea57e8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/CalculateGenotypePosteriors.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/CalculateGenotypePosteriors.java @@ -75,8 +75,8 @@ * By default, priors will be applied to each variant separately, provided each variant features data from at least * 10 called samples (no-calls do not count). SNP sites in the input callset that have a SNP at the matching site in * the supporting VCF will have priors applied based on the AC from the supporting samples and the input callset - * unless the --ignoreInputSamples flag is used. If a site is not called in the supporting VCF, priors will be - * applied using the discovered AC from the input samples unless the --discoveredACpriorsOff flag is used. + * unless the --ignore-input-samples flag is used. If a site is not called in the supporting VCF, priors will be + * applied using the discovered AC from the input samples unless the --discovered-allele-count-priors-off flag is used. * For any non-SNP sites in the input callset, flat priors are applied. * * @@ -103,7 +103,7 @@ * -V input.vcf.gz \ * -O output.vcf.gz \ * -ped family.ped \ - * --skipPopulationPriors + * --skip-population-priors *mask; @@ -142,20 +142,20 @@ public final class VariantFiltration extends VariantWalker { public Integer maskExtension = 0; /** - * When using the -mask argument, the maskName will be annotated in the variant record. - * Note that when using the -filter-not-in-mask argument to reverse the masking logic, + * When using the --mask argument, the mask-name will be annotated in the variant record. + * Note that when using the --filter-not-in-mask argument to reverse the masking logic, * it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used - * (e.g. if masking against Hapmap, use -mask-name=hapmap for the normal masking and -mask-name=not_hapmap for the reverse masking). + * (e.g. if masking against Hapmap, use --mask-name=hapmap for the normal masking and --mask-name=not_hapmap for the reverse masking). */ @Argument(fullName=MASK_NAME_LONG_NAME, doc="The text to put in the FILTER field if a 'mask' is provided and overlaps with a variant call", optional=true) public String maskName = "Mask"; /** - * By default, if the -mask argument is used, any variant falling in a mask will be filtered. + * By default, if the --mask argument is used, any variant falling in a mask will be filtered. * If this argument is used, logic is reversed, and variants falling outside a given mask will be filtered. * Use case is, for example, if we have an interval list or BED file with "good" sites. * Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used - * (e.g. if masking against Hapmap, use -mask-name=hapmap for the normal masking and -mask-name=not_hapmap for the reverse masking). + * (e.g. if masking against Hapmap, use --mask-name=hapmap for the normal masking and --mask-name=not_hapmap for the reverse masking). */ @Argument(fullName=FILTER_NOT_IN_MASK_LONG_NAME, doc="Filter records NOT in given input mask.", optional=true) public boolean filterRecordsNotInMask = false; @@ -215,7 +215,7 @@ private static boolean invertLogic(final boolean logic, final boolean invert){ } /** - * Prepend inverse phrase to description if --invertFilterExpression + * Prepend inverse phrase to description if --invert-filter-expression * * @param description the description * @return the description with inverse prepended if --invert_filter_expression diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java index ccfcdf83576..544b73eaf6b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java @@ -162,7 +162,7 @@ public GenotypeCalculationArgumentCollection( final GenotypeCalculationArgumentC * f) If user-defined values add to more than one, an error will be produced. * * If user wants completely flat priors, then user should specify the same value (=1/(2*N+1)) 2*N times,e.g. - * -inputPrior 0.33 -inputPrior 0.33 + * --input-prior 0.33 --input-prior 0.33 * for the single-sample diploid case. */ @Advanced diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java index 770ed3435fd..ac40837f485 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java @@ -103,7 +103,7 @@ public class HaplotypeCallerArgumentCollection extends AssemblyBasedCallerArgume * If set, certain "early exit" optimizations in HaplotypeCaller, which aim to save compute and time by skipping * calculations if an ActiveRegion is determined to contain no variants, will be disabled. This is most likely to be useful if * you're using the -bamout argument to examine the placement of reads following reassembly and are interested in seeing the mapping of - * reads in regions with no variations. Setting the -forceActive and -dontTrimActiveRegions flags may also be necessary. + * reads in regions with no variations. Setting the --force-active and --dont-trim-active-regions flags may also be necessary. */ @Advanced @Argument(fullName = "disable-optimizations", doc="Don't skip calculations in ActiveRegions with no variants", diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index f7633f53ccc..1292fd6d5a1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -268,7 +268,7 @@ private void validateAndInitializeArgs() { Utils.validateArg(hcArgs.likelihoodArgs.BASE_QUALITY_SCORE_THRESHOLD >= QualityUtils.MIN_USABLE_Q_SCORE, "BASE_QUALITY_SCORE_THRESHOLD must be greater than or equal to " + QualityUtils.MIN_USABLE_Q_SCORE + " (QualityUtils.MIN_USABLE_Q_SCORE)"); if ( emitReferenceConfidence() && samplesList.numberOfSamples() != 1 ) { - throw new CommandLineException.BadArgumentValue("--emitRefConfidence", "Can only be used in single sample mode currently. Use the sample_name argument to run on a single sample out of a multi-sample BAM file."); + throw new CommandLineException.BadArgumentValue("--emit-ref-confidence", "Can only be used in single sample mode currently. Use the --sample-name argument to run on a single sample out of a multi-sample BAM file."); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java index 469eca6c4ef..f262a5741a7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java @@ -19,7 +19,7 @@ public final class ReadThreadingAssemblerArgumentCollection implements Serializa // ----------------------------------------------------------------------------------------------- /** - * Multiple kmer sizes can be specified, using e.g. `-kmerSize 10 -kmerSize 25`. + * Multiple kmer sizes can be specified, using e.g. `--kmer-size 10 --kmer-size 25`. */ @Advanced @Argument(fullName="kmer-size", doc="Kmer size to use in the read threading assembler", optional = true) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/validation/AnnotateVcfWithExpectedAlleleFraction.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/validation/AnnotateVcfWithExpectedAlleleFraction.java index 6a892426394..97de4e07841 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/validation/AnnotateVcfWithExpectedAlleleFraction.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/validation/AnnotateVcfWithExpectedAlleleFraction.java @@ -43,7 +43,7 @@ * gatk --java-options "-Xmx4g" AnnotateVcfWithExpectedAlleleFraction \ * -V input.vcf \ * -O output.vcf \ - * -mixingFractions mixingFractions.table + * --mixing-fractions mixingFractions.table * Apply frequency and HWE-based priors to the genotypes of a family without including the family allele counts @@ -112,7 +112,7 @@ * gatk --java-options "-Xmx4g" CalculateGenotypePosteriors \ * -V input.vcf.gz \ * -O output.vcf.gz \ - * --ignoreInputSamples + * --ignore-input-samples *
* gatk CNNVariantTrain \ - * -tensor-type reference \ - * -input-tensors-dir my_tensor_folder \ - * -model-name my_1d_model + * --tensor-type reference \ + * --input-tensor-dir my_tensor_folder \ + * --model-name my_1d_model ** *
* gatk CNNVariantTrain \ - * -input-tensors-dir my_tensor_folder \ - * -tensor-type read-tensor \ - * -model-name my_2d_model + * --input-tensor-dir my_tensor_folder \ + * --tensor-type read-tensor \ + * --model-name my_2d_model ** */ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/CNNVariantWriteTensors.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/CNNVariantWriteTensors.java index b850e5e8cb1..fd1cd1fc390 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/CNNVariantWriteTensors.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/CNNVariantWriteTensors.java @@ -28,7 +28,7 @@ *