Skip to content

Commit

Permalink
Add ability to list reference and haplotype paths
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Sep 15, 2023
1 parent 01ecaa3 commit 172a355
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 15 deletions.
49 changes: 35 additions & 14 deletions src/subcommand/paths_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ void help_paths(char** argv) {
<< " -Q, --paths-by STR select the paths with the given name prefix" << endl
<< " -S, --sample STR select the haplotypes or reference paths for this sample" << endl
<< " -a, --variant-paths select the variant paths added by 'vg construct -a'" << endl
<< " -G, --generic-paths select the generic, non-reference, non-haplotype paths" << endl;
<< " -G, --generic-paths select the generic, non-reference, non-haplotype paths" << endl
<< " -R, --reference-paths select the reference paths" << endl
<< " -H, --haplotype-paths select the haplotype paths paths" << endl;
}

/// Chunk a path and emit it in Graph messages.
Expand Down Expand Up @@ -106,11 +108,8 @@ int main_paths(int argc, char** argv) {
string path_file;
bool select_alt_paths = false;
// What kinds of paths are we interested in?
unordered_set<PathSense> path_senses {
PathSense::REFERENCE,
PathSense::GENERIC,
PathSense::HAPLOTYPE
};
// Starts empty, but if the options put nothing in it we will add all senses.
unordered_set<PathSense> path_senses;
bool list_lengths = false;
bool list_metadata = false;
bool list_cyclicity = false;
Expand Down Expand Up @@ -143,6 +142,8 @@ int main_paths(int argc, char** argv) {
{"sample", required_argument, 0, 'S'},
{"variant-paths", no_argument, 0, 'a'},
{"generic-paths", no_argument, 0, 'G'},
{"reference-paths", no_argument, 0, 'R'},
{"haplotype-paths", no_argument, 0, 'H'},
{"coverage", no_argument, 0, 'c'},

// Hidden options for backward compatibility.
Expand All @@ -153,7 +154,7 @@ int main_paths(int argc, char** argv) {
};

int option_index = 0;
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:draGp:c",
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:draGRHp:c",
long_options, &option_index);

// Detect the end of the options.
Expand Down Expand Up @@ -239,8 +240,6 @@ int main_paths(int argc, char** argv) {

case 'S':
sample_name = optarg;
// We only care about things with references now.
path_senses = {PathSense::REFERENCE, PathSense::HAPLOTYPE};
selection_criteria++;
break;

Expand All @@ -250,9 +249,15 @@ int main_paths(int argc, char** argv) {
break;

case 'G':
// We only care about generic paths now.
path_senses = {PathSense::GENERIC};
selection_criteria++;
path_senses.insert(PathSense::GENERIC);
break;

case 'R':
path_senses.insert(PathSense::REFERENCE);
break;

case 'H':
path_senses.insert(PathSense::HAPLOTYPE);
break;

case 'c':
Expand Down Expand Up @@ -281,6 +286,22 @@ int main_paths(int argc, char** argv) {
}
}

if (path_senses.empty()) {
// No path senses were asked for explicitly.
// Fill in default ones.
path_senses = {
PathSense::REFERENCE,
PathSense::HAPLOTYPE
};
if (sample_name.empty()) {
// We can support paths with no sample.
path_senses.insert(PathSense::GENERIC);
}
} else {
// We asked for path senses specifically
selection_criteria++;
}

if (input_formats != 1 && input_formats != 2) {
std::cerr << "error: [vg paths] at least one input format (-x, -g) must be specified" << std::endl;
std::exit(EXIT_FAILURE);
Expand All @@ -303,7 +324,7 @@ int main_paths(int argc, char** argv) {
std::exit(EXIT_FAILURE);
}
if (selection_criteria > 1) {
std::cerr << "error: [vg paths] multiple selection criteria (-Q, -S, -a, -G, -p) cannot be used" << std::endl;
std::cerr << "error: [vg paths] multiple selection criteria (-Q, -S, -a, -G/-R/-H, -p) cannot be used" << std::endl;
std::exit(EXIT_FAILURE);
}
if (select_alt_paths && !gbwt_file.empty()) {
Expand Down Expand Up @@ -363,7 +384,7 @@ int main_paths(int argc, char** argv) {

string line;
while (getline(path_stream, line)) {
path_names.emplace(move(line));
path_names.emplace(std::move(line));
}
}

Expand Down
4 changes: 3 additions & 1 deletion test/t/11_vg_paths.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PATH=../bin:$PATH # for vg

export LC_ALL="C" # force a consistent sort order

plan tests 18
plan tests 20

vg construct -r small/x.fa -v small/x.vcf.gz -a > x.vg
vg construct -r small/x.fa -v small/x.vcf.gz > x2.vg
Expand All @@ -16,7 +16,9 @@ vg index -x x.xg -G x.gbwt -v small/x.vcf.gz x.vg
# List path/thread names from various input formats
is "$(vg paths --list -v x2.vg)" "x" "path listing works from vg"
is "$(vg paths --list -x x.xg)" "x" "path listing works from XG"
is "$(vg paths --list -x x.xg -G)" "x" "generic path listing works from XG"
is $(vg paths --list -g x.gbwt | wc -l) 2 "thread listing works from GBWT"
is $(vg paths --list -g x.gbwt -H | wc -l) 2 "haplotype thread listing works from GBWT"

# Select threads by name
is $(vg paths --list -Q "1#0#x#" -g x.gbwt | wc -l) 1 "thread selection by name prefix works correctly"
Expand Down

1 comment on commit 172a355

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch filter-paths. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 18323 seconds

Please sign in to comment.