-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
VS-1549 Add VAT to integration tests #9085
Changes from all commits
b7e44b1
db98831
d11cb2b
7b9cb64
ea3c082
de4a19b
6324fe5
ad8bbfc
bfbcb7c
9e991e0
fba4bd7
879599e
ebf544e
e87c588
05cdc13
747ca56
9a673f8
57d2e77
505e5f2
1e7f0f9
99dc3bd
8f56aa8
f9fd042
7a7256d
31210ec
26fc0f0
53385a7
7929086
bb95780
4efd1f6
b6cdfed
ecf4e2b
97acc25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ workflow GvsValidateVat { | |
String project_id | ||
String dataset_name | ||
String vat_table_name | ||
Boolean? is_small_callset | ||
String? cloud_sdk_docker | ||
String? variants_docker | ||
} | ||
|
@@ -25,20 +26,23 @@ workflow GvsValidateVat { | |
String effective_cloud_sdk_docker = select_first([cloud_sdk_docker, GetToolVersions.cloud_sdk_docker]) | ||
String effective_variants_docker = select_first([variants_docker, GetToolVersions.variants_docker]) | ||
|
||
call Utils.GetBQTableLastModifiedDatetime as SampleDateTime { | ||
input: | ||
project_id = project_id, | ||
fq_table = fq_vat_table, | ||
cloud_sdk_docker = effective_cloud_sdk_docker, | ||
} | ||
# Defining is_small_callset allows us to run this WDL on a dataset that has not had samples loaded (for testing) | ||
if (!defined(is_small_callset)) { | ||
call Utils.GetBQTableLastModifiedDatetime as SampleDateTime { | ||
input: | ||
project_id = project_id, | ||
fq_table = fq_sample_table, | ||
cloud_sdk_docker = effective_cloud_sdk_docker, | ||
} | ||
|
||
call Utils.GetNumSamplesLoaded { | ||
input: | ||
fq_sample_table = fq_sample_table, | ||
project_id = project_id, | ||
sample_table_timestamp = SampleDateTime.last_modified_timestamp, | ||
control_samples = false, | ||
cloud_sdk_docker = effective_cloud_sdk_docker, | ||
call Utils.GetNumSamplesLoaded { | ||
input: | ||
fq_sample_table = fq_sample_table, | ||
project_id = project_id, | ||
sample_table_timestamp = SampleDateTime.last_modified_timestamp, | ||
control_samples = false, | ||
cloud_sdk_docker = effective_cloud_sdk_docker, | ||
} | ||
} | ||
|
||
call Utils.GetBQTableLastModifiedDatetime as VatDateTime { | ||
|
@@ -152,8 +156,9 @@ workflow GvsValidateVat { | |
cloud_sdk_docker = effective_cloud_sdk_docker, | ||
} | ||
|
||
# only check certain things if the callset is larger than 10,000 samples (a guess) | ||
Boolean callset_is_small = GetNumSamplesLoaded.num_samples < 10000 | ||
# Check if the input boolean `is_small_callset` is defined, | ||
# if not use the `GetNumSamples` task to find the number of samples in the callset and set the flag if it's < 10000 | ||
Boolean callset_is_small = select_first([is_small_callset, select_first([GetNumSamplesLoaded.num_samples, 1]) < 10000]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what does this do? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe a comment would be helpful or am I just WDL illiterate? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm adding a comment. Basically just checking if the |
||
if (!callset_is_small) { | ||
call ClinvarSignificance { | ||
input: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ version 1.0 | |
|
||
import "GvsQuickstartVcfIntegration.wdl" as QuickstartVcfIntegration | ||
import "GvsQuickstartHailIntegration.wdl" as QuickstartHailIntegration | ||
import "GvsQuickstartVATIntegration.wdl" as QuickstartVATIntegration | ||
import "../GvsJointVariantCalling.wdl" as JointVariantCalling | ||
import "../GvsUtils.wdl" as Utils | ||
|
||
|
@@ -14,6 +15,8 @@ workflow GvsQuickstartIntegration { | |
Boolean run_exome_integration = true | ||
Boolean run_beta_integration = true | ||
Boolean run_bge_integration = true | ||
Boolean run_vat_integration = true | ||
Boolean run_vat_integration_test_from_vds = true # If false, will use sites-only VCF | ||
String sample_id_column_name = "sample_id" | ||
String vcf_files_column_name = "hg38_reblocked_gvcf" | ||
String vcf_index_files_column_name = "hg38_reblocked_gvcf_index" | ||
|
@@ -25,6 +28,7 @@ workflow GvsQuickstartIntegration { | |
String? cloud_sdk_docker | ||
String? cloud_sdk_slim_docker | ||
String? variants_docker | ||
String? variants_nirvana_docker | ||
String? gatk_docker | ||
String? hail_version | ||
Boolean chr20_X_Y_only = true | ||
|
@@ -36,6 +40,7 @@ workflow GvsQuickstartIntegration { | |
File full_exome_interval_list = "gs://gcp-public-data--broad-references/hg38/v0/bge_exome_calling_regions.v1.1.interval_list" | ||
String expected_subdir = if (!chr20_X_Y_only) then "all_chrs/" else "" | ||
File expected_output_prefix = "gs://gvs-internal-quickstart/integration/2024-10-29/" + expected_subdir | ||
File truth_data_prefix = "gs://gvs-internal-quickstart/integration/test_data/2025-01-17/" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you set this value in your method config during testing to avoid accidentally committing like this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But I think this should be test input. It's where the input vds and sites-only VCFs are. If you change this to a new path (say update our VDS) you're going to (presumably) break the test. |
||
|
||
# WDL 1.0 trick to set a variable ('none') to be undefined. | ||
if (false) { | ||
|
@@ -53,6 +58,7 @@ workflow GvsQuickstartIntegration { | |
String effective_cloud_sdk_docker = select_first([cloud_sdk_docker, GetToolVersions.cloud_sdk_docker]) | ||
String effective_cloud_sdk_slim_docker = select_first([cloud_sdk_slim_docker, GetToolVersions.cloud_sdk_slim_docker]) | ||
String effective_variants_docker = select_first([variants_docker, GetToolVersions.variants_docker]) | ||
String effective_variants_nirvana_docker = select_first([variants_nirvana_docker, GetToolVersions.variants_nirvana_docker]) | ||
String effective_gatk_docker = select_first([gatk_docker, GetToolVersions.gatk_docker]) | ||
String effective_hail_version = select_first([hail_version, GetToolVersions.hail_version]) | ||
|
||
|
@@ -73,6 +79,10 @@ workflow GvsQuickstartIntegration { | |
} | ||
} | ||
|
||
String workspace_bucket = GetToolVersions.workspace_bucket | ||
String workspace_id = GetToolVersions.workspace_id | ||
String submission_id = GetToolVersions.submission_id | ||
gbggrant marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Note for `GvsQuickstartIntegration` we use the git_branch_or_tag *input* and its corresponding git hash. This is not | ||
# necessarily the same as the branch name selected in Terra for the integration `GvsQuickstartIntegration` workflow, | ||
# though in practice likely they are the same. | ||
|
@@ -99,9 +109,9 @@ workflow GvsQuickstartIntegration { | |
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker, | ||
variants_docker = effective_variants_docker, | ||
gatk_docker = effective_gatk_docker, | ||
workspace_bucket = GetToolVersions.workspace_bucket, | ||
workspace_id = GetToolVersions.workspace_id, | ||
submission_id = GetToolVersions.submission_id, | ||
workspace_bucket = workspace_bucket, | ||
workspace_id = workspace_id, | ||
submission_id = submission_id, | ||
hail_version = effective_hail_version, | ||
maximum_alternate_alleles = maximum_alternate_alleles, | ||
ploidy_table_name = ploidy_table_name, | ||
|
@@ -139,9 +149,9 @@ workflow GvsQuickstartIntegration { | |
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker, | ||
variants_docker = effective_variants_docker, | ||
gatk_docker = effective_gatk_docker, | ||
workspace_bucket = GetToolVersions.workspace_bucket, | ||
workspace_id = GetToolVersions.workspace_id, | ||
submission_id = GetToolVersions.submission_id, | ||
workspace_bucket = workspace_bucket, | ||
workspace_id = workspace_id, | ||
submission_id = submission_id, | ||
maximum_alternate_alleles = maximum_alternate_alleles, | ||
} | ||
call QuickstartVcfIntegration.GvsQuickstartVcfIntegration as QuickstartVcfVQSRIntegration { | ||
|
@@ -166,9 +176,9 @@ workflow GvsQuickstartIntegration { | |
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker, | ||
variants_docker = effective_variants_docker, | ||
gatk_docker = effective_gatk_docker, | ||
workspace_bucket = GetToolVersions.workspace_bucket, | ||
workspace_id = GetToolVersions.workspace_id, | ||
submission_id = GetToolVersions.submission_id, | ||
workspace_bucket = workspace_bucket, | ||
workspace_id = workspace_id, | ||
submission_id = submission_id, | ||
maximum_alternate_alleles = maximum_alternate_alleles, | ||
} | ||
|
||
|
@@ -212,9 +222,9 @@ workflow GvsQuickstartIntegration { | |
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker, | ||
variants_docker = effective_variants_docker, | ||
gatk_docker = effective_gatk_docker, | ||
workspace_bucket = GetToolVersions.workspace_bucket, | ||
workspace_id = GetToolVersions.workspace_id, | ||
submission_id = GetToolVersions.submission_id, | ||
workspace_bucket = workspace_bucket, | ||
workspace_id = workspace_id, | ||
submission_id = submission_id, | ||
maximum_alternate_alleles = maximum_alternate_alleles, | ||
target_interval_list = target_interval_list, | ||
} | ||
|
@@ -251,9 +261,9 @@ workflow GvsQuickstartIntegration { | |
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker, | ||
variants_docker = effective_variants_docker, | ||
gatk_docker = effective_gatk_docker, | ||
workspace_bucket = GetToolVersions.workspace_bucket, | ||
workspace_id = GetToolVersions.workspace_id, | ||
submission_id = GetToolVersions.submission_id, | ||
workspace_bucket = workspace_bucket, | ||
workspace_id = workspace_id, | ||
submission_id = submission_id, | ||
maximum_alternate_alleles = maximum_alternate_alleles, | ||
target_interval_list = target_interval_list, | ||
} | ||
|
@@ -270,8 +280,6 @@ workflow GvsQuickstartIntegration { | |
if (run_beta_integration) { | ||
String project_id = "gvs-internal" | ||
|
||
String workspace_bucket = GetToolVersions.workspace_bucket | ||
String submission_id = GetToolVersions.submission_id | ||
String extract_output_gcs_dir = "~{workspace_bucket}/output_vcfs/by_submission_id/~{submission_id}/beta" | ||
Boolean collect_variant_calling_metrics = true | ||
|
||
|
@@ -298,9 +306,9 @@ workflow GvsQuickstartIntegration { | |
cloud_sdk_docker = effective_cloud_sdk_docker, | ||
variants_docker = effective_variants_docker, | ||
gatk_docker = effective_gatk_docker, | ||
workspace_bucket = GetToolVersions.workspace_bucket, | ||
workspace_id = GetToolVersions.workspace_id, | ||
submission_id = GetToolVersions.submission_id, | ||
workspace_bucket = workspace_bucket, | ||
workspace_id = workspace_id, | ||
submission_id = submission_id, | ||
maximum_alternate_alleles = maximum_alternate_alleles, | ||
git_branch_or_tag = git_branch_or_tag, | ||
sample_id_column_name = sample_id_column_name, | ||
|
@@ -319,6 +327,28 @@ workflow GvsQuickstartIntegration { | |
} | ||
} | ||
|
||
if (run_vat_integration) { | ||
String extract_vat_output_gcs_dir = "~{workspace_bucket}/output_vat/by_submission_id/~{submission_id}/vat" | ||
|
||
call QuickstartVATIntegration.GvsQuickstartVATIntegration { | ||
input: | ||
git_branch_or_tag = git_branch_or_tag, | ||
git_hash = GetToolVersions.git_hash, | ||
use_default_dockers = use_default_dockers, | ||
truth_data_prefix = truth_data_prefix, | ||
expected_output_prefix = expected_output_prefix, | ||
dataset_suffix = "vat", | ||
output_path = extract_vat_output_gcs_dir, | ||
use_vds_as_input = run_vat_integration_test_from_vds, | ||
basic_docker = effective_basic_docker, | ||
cloud_sdk_docker = effective_cloud_sdk_docker, | ||
cloud_sdk_slim_docker = effective_cloud_sdk_slim_docker, | ||
variants_docker = effective_variants_docker, | ||
variants_nirvana_docker = effective_variants_nirvana_docker, | ||
gatk_docker = effective_gatk_docker, | ||
} | ||
} | ||
|
||
output { | ||
String recorded_git_hash = GetToolVersions.git_hash | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not "qualified" ? whats the goal of this? maybe we should just pass the name?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I needed
vat_table_name
to be an output of the wdl, and couldn't name an internal variable with the same name, so I changed it toeffective_vat_table_name
- using the pattern of a lot of naming nearby.