Skip to content

Commit

Permalink
Merge pull request #4085 from vgteam/filter-paths
Browse files Browse the repository at this point in the history
Add ability to list reference and haplotype paths
  • Loading branch information
adamnovak committed Sep 18, 2023
2 parents 2f89fa0 + 172a355 commit 89eefd9
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 15 deletions.
49 changes: 35 additions & 14 deletions src/subcommand/paths_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ void help_paths(char** argv) {
<< " -Q, --paths-by STR select the paths with the given name prefix" << endl
<< " -S, --sample STR select the haplotypes or reference paths for this sample" << endl
<< " -a, --variant-paths select the variant paths added by 'vg construct -a'" << endl
<< " -G, --generic-paths select the generic, non-reference, non-haplotype paths" << endl;
<< " -G, --generic-paths select the generic, non-reference, non-haplotype paths" << endl
<< " -R, --reference-paths select the reference paths" << endl
<< " -H, --haplotype-paths select the haplotype paths paths" << endl;
}

/// Chunk a path and emit it in Graph messages.
Expand Down Expand Up @@ -106,11 +108,8 @@ int main_paths(int argc, char** argv) {
string path_file;
bool select_alt_paths = false;
// What kinds of paths are we interested in?
unordered_set<PathSense> path_senses {
PathSense::REFERENCE,
PathSense::GENERIC,
PathSense::HAPLOTYPE
};
// Starts empty, but if the options put nothing in it we will add all senses.
unordered_set<PathSense> path_senses;
bool list_lengths = false;
bool list_metadata = false;
bool list_cyclicity = false;
Expand Down Expand Up @@ -143,6 +142,8 @@ int main_paths(int argc, char** argv) {
{"sample", required_argument, 0, 'S'},
{"variant-paths", no_argument, 0, 'a'},
{"generic-paths", no_argument, 0, 'G'},
{"reference-paths", no_argument, 0, 'R'},
{"haplotype-paths", no_argument, 0, 'H'},
{"coverage", no_argument, 0, 'c'},

// Hidden options for backward compatibility.
Expand All @@ -153,7 +154,7 @@ int main_paths(int argc, char** argv) {
};

int option_index = 0;
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:draGp:c",
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:draGRHp:c",
long_options, &option_index);

// Detect the end of the options.
Expand Down Expand Up @@ -239,8 +240,6 @@ int main_paths(int argc, char** argv) {

case 'S':
sample_name = optarg;
// We only care about things with references now.
path_senses = {PathSense::REFERENCE, PathSense::HAPLOTYPE};
selection_criteria++;
break;

Expand All @@ -250,9 +249,15 @@ int main_paths(int argc, char** argv) {
break;

case 'G':
// We only care about generic paths now.
path_senses = {PathSense::GENERIC};
selection_criteria++;
path_senses.insert(PathSense::GENERIC);
break;

case 'R':
path_senses.insert(PathSense::REFERENCE);
break;

case 'H':
path_senses.insert(PathSense::HAPLOTYPE);
break;

case 'c':
Expand Down Expand Up @@ -281,6 +286,22 @@ int main_paths(int argc, char** argv) {
}
}

if (path_senses.empty()) {
// No path senses were asked for explicitly.
// Fill in default ones.
path_senses = {
PathSense::REFERENCE,
PathSense::HAPLOTYPE
};
if (sample_name.empty()) {
// We can support paths with no sample.
path_senses.insert(PathSense::GENERIC);
}
} else {
// We asked for path senses specifically
selection_criteria++;
}

if (input_formats != 1 && input_formats != 2) {
std::cerr << "error: [vg paths] at least one input format (-x, -g) must be specified" << std::endl;
std::exit(EXIT_FAILURE);
Expand All @@ -303,7 +324,7 @@ int main_paths(int argc, char** argv) {
std::exit(EXIT_FAILURE);
}
if (selection_criteria > 1) {
std::cerr << "error: [vg paths] multiple selection criteria (-Q, -S, -a, -G, -p) cannot be used" << std::endl;
std::cerr << "error: [vg paths] multiple selection criteria (-Q, -S, -a, -G/-R/-H, -p) cannot be used" << std::endl;
std::exit(EXIT_FAILURE);
}
if (select_alt_paths && !gbwt_file.empty()) {
Expand Down Expand Up @@ -363,7 +384,7 @@ int main_paths(int argc, char** argv) {

string line;
while (getline(path_stream, line)) {
path_names.emplace(move(line));
path_names.emplace(std::move(line));
}
}

Expand Down
4 changes: 3 additions & 1 deletion test/t/11_vg_paths.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PATH=../bin:$PATH # for vg

export LC_ALL="C" # force a consistent sort order

plan tests 18
plan tests 20

vg construct -r small/x.fa -v small/x.vcf.gz -a > x.vg
vg construct -r small/x.fa -v small/x.vcf.gz > x2.vg
Expand All @@ -16,7 +16,9 @@ vg index -x x.xg -G x.gbwt -v small/x.vcf.gz x.vg
# List path/thread names from various input formats
is "$(vg paths --list -v x2.vg)" "x" "path listing works from vg"
is "$(vg paths --list -x x.xg)" "x" "path listing works from XG"
is "$(vg paths --list -x x.xg -G)" "x" "generic path listing works from XG"
is $(vg paths --list -g x.gbwt | wc -l) 2 "thread listing works from GBWT"
is $(vg paths --list -g x.gbwt -H | wc -l) 2 "haplotype thread listing works from GBWT"

# Select threads by name
is $(vg paths --list -Q "1#0#x#" -g x.gbwt | wc -l) 1 "thread selection by name prefix works correctly"
Expand Down

2 comments on commit 89eefd9

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17574 seconds

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch v1.51.0. View the full report here.

15 tests passed, 1 tests failed and 0 tests skipped in 17111 seconds

Failed tests:

  • test_sim_chr21_snp1kg_trained (2289 seconds)

Please sign in to comment.