Skip to content

Commit

Permalink
Merge pull request #56 from apriltuesday/updates-for-testing
Browse files Browse the repository at this point in the history
EVA-3659 - Updates for testing session
  • Loading branch information
apriltuesday authored Sep 17, 2024
2 parents 9a18cc1 + 4058a1e commit 5222976
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 26 deletions.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ The path to the VCF files are provided in the Files section of the metadata and
This allows us to support different assemblies for each VCF file.
Please check the below sections `The metadata spreadsheet` and `The metadata JSON` for the format and options available in metadata files.

### The metadata spreadsheet
#### The metadata spreadsheet

The metadata template can be found within the etc folder at `eva_sub_cli/etc/EVA_Submission_template.xlsx`
It should be populated following the instruction provided within the template

### The metadata JSON
#### The metadata JSON

The metadata can also be provided via a JSON file which should conform to the schema located at
`eva_sub_cli/etc/eva_schema.json`
Expand Down Expand Up @@ -114,3 +114,9 @@ or
eva-sub-cli.py --metadata_xlsx metadata_spreadsheet.xlsx --submission_dir submission_dir --tasks SUBMIT
```
Will only submit the data and not validate.

### Shallow validation

If you are working with large VCF files and find that validation takes a very long time, you can add the
argument `--shallow` to the command, which will validate only the first 10,000 lines in each VCF. Note that running
shallow validation will **not** be sufficient for actual submission.
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM python:3.10

ENV vcf_validator_version=0.9.4
ENV vcf_validator_version=0.9.7
ENV NXF_VER=22.10.6

WORKDIR /opt
Expand Down
39 changes: 20 additions & 19 deletions eva_sub_cli/executables/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,40 +35,42 @@ def validate_command_line_arguments(args, argparser):
print(f"'{args.submission_dir}' does not have write permissions or is not a directory.")
sys.exit(1)


def parse_args(cmd_line_args):
argparser = ArgumentParser(prog='eva-sub-cli', description='EVA Submission CLI - validate and submit data to EVA')
argparser = ArgumentParser(prog='eva-sub-cli',
description='EVA Submission CLI - validate and submit data to EVA. '
'For full details, please see https://github.com/EBIvariation/eva-sub-cli')
argparser.add_argument('--version', action='version', version=f'%(prog)s {eva_sub_cli.__version__}')
argparser.add_argument('--submission_dir', required=True, type=str,
help='Path to the directory where all processing will be done '
'and submission info is/will be stored')
help='Path to the directory where all processing is done and submission info is stored')
vcf_group = argparser.add_argument_group(
'Input VCF and assembly',
"Specify the VCF files and associated assembly with the following options. If you used different assemblies "
"for different VCF files then include these in the metadata file."
"for different VCF files, then you must include these in the metadata file rather than specifying them here."
)
vcf_group.add_argument('--vcf_files', nargs='+', help="One or several vcf files to validate")
vcf_group.add_argument('--vcf_files', nargs='+', help="One or more VCF files to validate")
vcf_group.add_argument('--reference_fasta',
help="The fasta file containing the reference genome from which the variants were derived")
help="The FASTA file containing the reference genome from which the variants were derived")

metadata_group = argparser.add_argument_group('Metadata', 'Specify the metadata in a spreadsheet or in a JSON file')
metadata_group = metadata_group.add_mutually_exclusive_group(required=True)
metadata_group.add_argument("--metadata_json",
help="Json file that describe the project, analysis, samples and files")
help="JSON file that describes the project, analysis, samples and files")
metadata_group.add_argument("--metadata_xlsx",
help="Excel spreadsheet that describe the project, analysis, samples and files")
help="Excel spreadsheet that describes the project, analysis, samples and files")
argparser.add_argument('--tasks', nargs='+', choices=[VALIDATE, SUBMIT], default=[SUBMIT], type=str.lower,
help='Select a task to perform. Selecting VALIDATE will run the validation regardless of the'
' outcome of previous runs. Selecting SUBMIT will run validate only if the validation'
' was not performed successfully before and then run the submission.')
help='Select a task to perform (default SUBMIT). VALIDATE will run the validation'
' regardless of the outcome of previous runs. SUBMIT will run validate only if'
' the validation was not performed successfully before and then run the submission.')
argparser.add_argument('--executor', choices=[DOCKER, NATIVE], default=NATIVE, type=str.lower,
help='Select an execution type for running validation (default native)')
credential_group = argparser.add_argument_group('Credential', 'Specify the Webin credential you want to use to '
'upload to the EVA')
credential_group.add_argument("--username", help="Username used for connecting to the ENA webin account")
credential_group.add_argument("--password", help="Password used for connecting to the ENA webin account")
help='Select the execution type for running validation (default native)')
credential_group = argparser.add_argument_group('Credentials', 'Specify the ENA Webin credentials you want to use '
'to submit to the EVA')
credential_group.add_argument("--username", help="Username for your ENA Webin account")
credential_group.add_argument("--password", help="Password for your ENA Webin account")
argparser.add_argument('--shallow', action='store_true', default=False,
help='Set the validation to be performed on the first 10000 records of the VCF. '
'Only applies if the number of record exceed 10000')
'Only applies if the number of records exceed 10000')
argparser.add_argument('--debug', action='store_true', default=False,
help='Set the script to output debug messages')
args = argparser.parse_args(cmd_line_args)
Expand All @@ -77,7 +79,6 @@ def parse_args(cmd_line_args):


def main():

args = parse_args(sys.argv[1:])

args.submission_dir = os.path.abspath(args.submission_dir)
Expand All @@ -96,4 +97,4 @@ def main():
except SubmissionNotFoundException as snfe:
print(f'{snfe}. Please contact EVA Helpdesk')
except SubmissionStatusException as sse:
print(f'{sse}. Please try again later. If the problem persists, please contact EVA Helpdesk')
print(f'{sse}. Please try again later. If the problem persists, please contact EVA Helpdesk')
2 changes: 1 addition & 1 deletion eva_sub_cli/validators/docker_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
logger = logging_config.get_logger(__name__)

container_image = 'ebivariation/eva-sub-cli'
container_tag = 'v0.0.1.dev17'
container_tag = 'v0.0.1'
container_validation_dir = '/opt/vcf_validation'
container_validation_output_dir = 'vcf_validation_output'

Expand Down
3 changes: 2 additions & 1 deletion eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,5 +478,6 @@ def create_reports(self):
file_path = os.path.join(self.output_dir, 'report.html')
with open(file_path, "w") as f:
f.write(report_html)
self.info(f'View the validation report in your browser: {file_path}')
self.info(f'Validation result: {"SUCCESS" if self.verify_ready_for_submission_to_eva() else "FAILURE"}')
self.info(f'View the full report in your browser: {file_path}')
return file_path
4 changes: 2 additions & 2 deletions tests/test_docker_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ def test_validate(self):

with open(assembly_check_log_file) as assembly_check_log_file:
assembly_check_logs = assembly_check_log_file.readlines()
self.assertEqual('[info] Number of matches: 247/247\n', assembly_check_logs[5])
self.assertEqual('[info] Percentage of matches: 100%\n', assembly_check_logs[6])
self.assertEqual('[info] Number of matches: 247/247\n', assembly_check_logs[4])
self.assertEqual('[info] Percentage of matches: 100%\n', assembly_check_logs[5])

# Assert Samples concordance
expected_checker = {
Expand Down

0 comments on commit 5222976

Please sign in to comment.