diff --git a/src/constructor.cpp b/src/constructor.cpp index f5f4653b921..0c36ceaaae5 100644 --- a/src/constructor.cpp +++ b/src/constructor.cpp @@ -696,7 +696,7 @@ namespace vg { // Name the variant and place it in the order that we'll // actually construct nodes in (see utility.hpp) - string variant_name = make_variant_id(*variant); + string variant_name = sha1_variant_name ? make_variant_id(*variant) : get_or_make_variant_id(*variant); if (variants_by_name.count(variant_name)) { // Some VCFs may include multiple variants at the same // position with the same ref and alt. We will only take the diff --git a/src/constructor.hpp b/src/constructor.hpp index ede856cf46a..9b682a42855 100644 --- a/src/constructor.hpp +++ b/src/constructor.hpp @@ -80,6 +80,7 @@ class Constructor : public Progressive, public NameMapper { // _alt_6079b4a76d0ddd6b4b44aeb14d738509e266961c_0 and // _alt_6079b4a76d0ddd6b4b44aeb14d738509e266961c_1? bool alt_paths = false; + bool sha1_variant_name = true; // Should we handle structural variants in the VCF file, // or at least the ones we know how to? diff --git a/src/subcommand/construct_main.cpp b/src/subcommand/construct_main.cpp index 153aa2a2cb3..53ea4a20fd8 100644 --- a/src/subcommand/construct_main.cpp +++ b/src/subcommand/construct_main.cpp @@ -25,7 +25,9 @@ void help_construct(char** argv) { << " -r, --reference FILE input FASTA reference (may repeat)" << endl << " -v, --vcf FILE input VCF (may repeat)" << endl << " -n, --rename V=F match contig V in the VCFs to contig F in the FASTAs (may repeat)" << endl - << " -a, --alt-paths save paths for alts of variants by variant ID" << endl + << " -a, --alt-paths save paths for alts of variants by SHA1 hash" << endl + << " -A, --alt-paths-plain save paths for alts of variants by variant ID if possible, otherwise SHA1" << endl + << " (IDs must be unique across all input VCFs)" << endl << " -R, --region REGION specify a VCF contig name or 1-based inclusive region (may repeat, if on different contigs)" << endl << " -C, --region-is-chrom don't attempt to parse the regions (use when the reference" << endl << " sequence name could be inadvertently parsed as a region)" << endl @@ -87,6 +89,7 @@ int main_construct(int argc, char** argv) { {"drop-msa-paths", no_argument, 0, 'd'}, {"rename", required_argument, 0, 'n'}, {"alt-paths", no_argument, 0, 'a'}, + {"alt-paths-plain", no_argument, 0, 'A'}, {"handle-sv", no_argument, 0, 'S'}, {"insertions", required_argument, 0, 'I'}, {"progress", no_argument, 0, 'p'}, @@ -103,7 +106,7 @@ int main_construct(int argc, char** argv) { }; int option_index = 0; - c = getopt_long (argc, argv, "v:r:n:ph?z:t:R:m:aCfl:SI:M:dF:iN", + c = getopt_long (argc, argv, "v:r:n:ph?z:t:R:m:aACfl:SI:M:dF:iN", long_options, &option_index); /* Detect the end of the options. */ @@ -170,6 +173,11 @@ int main_construct(int argc, char** argv) { constructor.alt_paths = true; break; + case 'A': + constructor.alt_paths = true; + constructor.sha1_variant_name = false; + break; + case 'p': show_progress = true; break;