Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to list reference and haplotype paths #4085

Merged
merged 1 commit into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 35 additions & 14 deletions src/subcommand/paths_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ void help_paths(char** argv) {
<< " -Q, --paths-by STR select the paths with the given name prefix" << endl
<< " -S, --sample STR select the haplotypes or reference paths for this sample" << endl
<< " -a, --variant-paths select the variant paths added by 'vg construct -a'" << endl
<< " -G, --generic-paths select the generic, non-reference, non-haplotype paths" << endl;
<< " -G, --generic-paths select the generic, non-reference, non-haplotype paths" << endl
<< " -R, --reference-paths select the reference paths" << endl
<< " -H, --haplotype-paths select the haplotype paths paths" << endl;
}

/// Chunk a path and emit it in Graph messages.
Expand Down Expand Up @@ -106,11 +108,8 @@ int main_paths(int argc, char** argv) {
string path_file;
bool select_alt_paths = false;
// What kinds of paths are we interested in?
unordered_set<PathSense> path_senses {
PathSense::REFERENCE,
PathSense::GENERIC,
PathSense::HAPLOTYPE
};
// Starts empty, but if the options put nothing in it we will add all senses.
unordered_set<PathSense> path_senses;
bool list_lengths = false;
bool list_metadata = false;
bool list_cyclicity = false;
Expand Down Expand Up @@ -143,6 +142,8 @@ int main_paths(int argc, char** argv) {
{"sample", required_argument, 0, 'S'},
{"variant-paths", no_argument, 0, 'a'},
{"generic-paths", no_argument, 0, 'G'},
{"reference-paths", no_argument, 0, 'R'},
{"haplotype-paths", no_argument, 0, 'H'},
{"coverage", no_argument, 0, 'c'},

// Hidden options for backward compatibility.
Expand All @@ -153,7 +154,7 @@ int main_paths(int argc, char** argv) {
};

int option_index = 0;
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:draGp:c",
c = getopt_long (argc, argv, "hLXv:x:g:Q:VEMCFAS:Tq:draGRHp:c",
long_options, &option_index);

// Detect the end of the options.
Expand Down Expand Up @@ -239,8 +240,6 @@ int main_paths(int argc, char** argv) {

case 'S':
sample_name = optarg;
// We only care about things with references now.
path_senses = {PathSense::REFERENCE, PathSense::HAPLOTYPE};
selection_criteria++;
break;

Expand All @@ -250,9 +249,15 @@ int main_paths(int argc, char** argv) {
break;

case 'G':
// We only care about generic paths now.
path_senses = {PathSense::GENERIC};
selection_criteria++;
path_senses.insert(PathSense::GENERIC);
break;

case 'R':
path_senses.insert(PathSense::REFERENCE);
break;

case 'H':
path_senses.insert(PathSense::HAPLOTYPE);
break;

case 'c':
Expand Down Expand Up @@ -281,6 +286,22 @@ int main_paths(int argc, char** argv) {
}
}

if (path_senses.empty()) {
// No path senses were asked for explicitly.
// Fill in default ones.
path_senses = {
PathSense::REFERENCE,
PathSense::HAPLOTYPE
};
if (sample_name.empty()) {
// We can support paths with no sample.
path_senses.insert(PathSense::GENERIC);
}
} else {
// We asked for path senses specifically
selection_criteria++;
}

if (input_formats != 1 && input_formats != 2) {
std::cerr << "error: [vg paths] at least one input format (-x, -g) must be specified" << std::endl;
std::exit(EXIT_FAILURE);
Expand All @@ -303,7 +324,7 @@ int main_paths(int argc, char** argv) {
std::exit(EXIT_FAILURE);
}
if (selection_criteria > 1) {
std::cerr << "error: [vg paths] multiple selection criteria (-Q, -S, -a, -G, -p) cannot be used" << std::endl;
std::cerr << "error: [vg paths] multiple selection criteria (-Q, -S, -a, -G/-R/-H, -p) cannot be used" << std::endl;
std::exit(EXIT_FAILURE);
}
if (select_alt_paths && !gbwt_file.empty()) {
Expand Down Expand Up @@ -363,7 +384,7 @@ int main_paths(int argc, char** argv) {

string line;
while (getline(path_stream, line)) {
path_names.emplace(move(line));
path_names.emplace(std::move(line));
}
}

Expand Down
4 changes: 3 additions & 1 deletion test/t/11_vg_paths.t
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PATH=../bin:$PATH # for vg

export LC_ALL="C" # force a consistent sort order

plan tests 18
plan tests 20

vg construct -r small/x.fa -v small/x.vcf.gz -a > x.vg
vg construct -r small/x.fa -v small/x.vcf.gz > x2.vg
Expand All @@ -16,7 +16,9 @@ vg index -x x.xg -G x.gbwt -v small/x.vcf.gz x.vg
# List path/thread names from various input formats
is "$(vg paths --list -v x2.vg)" "x" "path listing works from vg"
is "$(vg paths --list -x x.xg)" "x" "path listing works from XG"
is "$(vg paths --list -x x.xg -G)" "x" "generic path listing works from XG"
is $(vg paths --list -g x.gbwt | wc -l) 2 "thread listing works from GBWT"
is $(vg paths --list -g x.gbwt -H | wc -l) 2 "haplotype thread listing works from GBWT"

# Select threads by name
is $(vg paths --list -Q "1#0#x#" -g x.gbwt | wc -l) 1 "thread selection by name prefix works correctly"
Expand Down
Loading