Skip to content

Commit

Permalink
Change the top level directory from config to not include the release
Browse files Browse the repository at this point in the history
Pass the assembly_release_folder to run_release_for_assembly.py
Python interpreter is now taken form the currently used interpreter
  • Loading branch information
tcezard committed Jun 6, 2024
1 parent cb229e3 commit 4b173c7
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


def get_release_properties_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, species_release_folder):
release_species_inventory_table, release_version):
with get_metadata_connection_handle(profile, private_config_xml_file) as metadata_connection_handle:
release_inventory_info_for_assembly = get_release_inventory_info_for_assembly(taxonomy_id, assembly_accession,
release_species_inventory_table,
Expand All @@ -36,13 +36,13 @@ def get_release_properties_for_assembly(private_config_xml_file, profile, taxono

def create_release_properties_file_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version,
species_release_folder):
assembly_species_release_folder = os.path.join(species_release_folder, assembly_accession)
os.makedirs(assembly_species_release_folder, exist_ok=True)
output_file = "{0}/{1}_release.properties".format(assembly_species_release_folder, assembly_accession)
release_properties = get_release_properties_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version,
species_release_folder)
assembly_release_folder):
os.makedirs(assembly_release_folder, exist_ok=True)
output_file = "{0}/{1}_release.properties".format(assembly_release_folder, assembly_accession)
release_properties = get_release_properties_for_assembly(
private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version
)
properties_string = SpringPropertiesGenerator(profile, private_config_xml_file).get_release_properties(
temp_mongo_db=release_properties['mongo_accessioning_db'],
job_name='ACCESSION_RELEASE_JOB',
Expand All @@ -51,7 +51,7 @@ def create_release_properties_file_for_assembly(private_config_xml_file, profile
fasta=release_properties['fasta_path'],
assembly_report=release_properties['report_path'],
contig_naming='SEQUENCE_NAME',
output_folder=assembly_species_release_folder
output_folder=assembly_release_folder
)
open(output_file, "w").write(properties_string)
return output_file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ process initiate_release_status_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.initiate_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
"""
}

Expand All @@ -39,7 +39,7 @@ process copy_accessioning_collections_to_embassy {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.copy_accessioning_collections_to_embassy --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --dump-dir $params.dump_dir 1>> $params.log_file 2>&1
"""
}

Expand All @@ -56,7 +56,7 @@ process run_release_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.run_release_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --release-jar-path $params.jar.release_pipeline 1>> $params.log_file 2>&1
"""
}

Expand All @@ -73,7 +73,7 @@ process merge_dbsnp_eva_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.merge_dbsnp_eva_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -90,7 +90,7 @@ process sort_bgzip_index_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.sort_bgzip_index_release_files --bgzip-path $params.executable.bgzip --bcftools-path $params.executable.bcftools --vcf-sort-script-path $params.executable.sort_vcf_sorted_chromosomes --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -107,7 +107,7 @@ process validate_release_vcf_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.validate_release_vcf_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder --vcf-validator-path $params.executable.vcf_validator --assembly-checker-path $params.executable.vcf_assembly_checker 1>> $params.log_file 2>&1
"""
}

Expand All @@ -124,7 +124,7 @@ process analyze_vcf_validation_results {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.analyze_vcf_validation_results --species-release-folder $params.assembly_folder --assembly-accession $params.assembly 1>> $params.log_file 2>&1
"""
}

Expand All @@ -141,7 +141,7 @@ process count_rs_ids_in_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.count_rs_ids_in_release_files --count-ids-script-path $params.executable.count_ids_in_vcf --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -158,7 +158,7 @@ process validate_rs_release_files {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.validate_rs_release_files --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --release-version $params.release_version --species-release-folder $params.assembly_folder 1>> $params.log_file 2>&1
"""
}

Expand All @@ -175,7 +175,7 @@ process update_sequence_names_to_ena {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.update_sequence_names_to_ena --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --species-release-folder $params.assembly_folder --sequence-name-converter-path $params.executable.convert_vcf_file --bcftools-path $params.executable.bcftools 1>> $params.log_file 2>&1
"""
}

Expand All @@ -192,6 +192,6 @@ process update_release_status_for_assembly {
script:
"""
export PYTHONPATH=$params.python_path
$params.executable.python.interpreter -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
$params.executable.python_interpreter -m run_release_in_embassy.update_release_status_for_assembly --private-config-xml-file $params.maven.settings_file --profile $params.maven.environment --release-species-inventory-table eva_progress_tracker.clustering_release_tracker --taxonomy-id $params.taxonomy --assembly-accession $params.assembly --release-version $params.release_version 1>> $params.log_file 2>&1
"""
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, species_release_folder, release_jar_path,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory):
exit_code = -1
try:
Expand All @@ -38,7 +38,7 @@ def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, asse
release_properties_file = create_release_properties_file_for_assembly(private_config_xml_file, profile,
taxonomy_id, assembly_accession,
release_species_inventory_table,
release_version, species_release_folder)
release_version, assembly_release_folder)
release_command = 'java -Xmx{0}g -jar {1} --spring.config.location=file:{2} -Dspring.data.mongodb.port={3}'\
.format(memory, release_jar_path, release_properties_file, mongo_port)
run_command_with_output("Running release pipeline for assembly: " + assembly_accession, release_command)
Expand All @@ -60,15 +60,15 @@ def run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, asse
@click.option("--release-species-inventory-table", default="eva_progress_tracker.clustering_release_tracker",
required=False)
@click.option("--release-version", help="ex: 2", type=int, required=True)
@click.option("--species-release-folder", required=True)
@click.option("--assembly-release-folder", required=True)
@click.option("--release-jar-path", required=True)
@click.option("--memory", help="Memory in GB. ex: 8", default=8, type=int, required=False)
@click.command()
def main(private_config_xml_file, profile, taxonomy_id, assembly_accession, release_species_inventory_table,
release_version, species_release_folder, release_jar_path, memory):
release_version, assembly_release_folder, release_jar_path, memory):
logging_config.add_stdout_handler()
run_release_for_assembly(private_config_xml_file, profile, taxonomy_id, assembly_accession,
release_species_inventory_table, release_version, species_release_folder, release_jar_path,
release_species_inventory_table, release_version, assembly_release_folder, release_jar_path,
memory)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@


def get_nextflow_params(taxonomy_id, assembly_accession, release_version):
dump_dir = os.path.join(get_species_release_folder(taxonomy_id), 'dumps')
release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession)
dump_dir = os.path.join(get_species_release_folder(release_version, taxonomy_id), 'dumps')
release_dir = get_assembly_release_folder(release_version, taxonomy_id, assembly_accession)
config_param = os.path.join(release_dir, f'nextflow_params_{taxonomy_id}_{assembly_accession}.yaml')
os.makedirs(dump_dir, exist_ok=True)
# Add the same python interpreter as the one we're using to use with the python step scripts
cfg['executable']['python_interpreter'] = sys.executable
yaml_data = {
'assembly': assembly_accession,
'dump_dir': dump_dir,
Expand Down Expand Up @@ -62,20 +64,25 @@ def get_run_release_for_assembly_nextflow():
return os.path.join(curr_dir, 'run_release_for_assembly.nf')


def get_release_log_file_name(taxonomy_id, assembly_accession):
return f"{get_assembly_release_folder(taxonomy_id, assembly_accession)}/release_{taxonomy_id}_{assembly_accession}.log"
def get_release_log_file_name(release_version, taxonomy_id, assembly_accession):
return f"{get_assembly_release_folder(release_version, taxonomy_id, assembly_accession)}/release_{taxonomy_id}_{assembly_accession}.log"

@lru_cache
def get_release_folder(release_version):
folder = os.path.join(cfg.query('release', 'release_output'), f'release_{release_version}')
os.makedirs(folder, exist_ok=True)
return folder

@lru_cache
def get_species_release_folder(taxonomy_id):
folder = os.path.join(cfg.query('release', 'release_output'), get_release_folder_name(taxonomy_id))
def get_species_release_folder(release_version, taxonomy_id):
folder = os.path.join(get_release_folder(release_version), get_release_folder_name(taxonomy_id))
os.makedirs(folder, exist_ok=True)
return folder


@lru_cache
def get_assembly_release_folder(taxonomy_id, assembly_accession):
folder = os.path.join(get_species_release_folder(taxonomy_id), assembly_accession)
def get_assembly_release_folder(release_version, taxonomy_id, assembly_accession):
folder = os.path.join(get_species_release_folder(release_version, taxonomy_id), assembly_accession)
os.makedirs(folder, exist_ok=True)
return folder

Expand All @@ -93,7 +100,7 @@ def run_release_for_species(taxonomy_id, release_assemblies, release_version, re
for assembly_accession in release_assemblies:
nextflow_params = get_nextflow_params(taxonomy_id, assembly_accession, release_version)
workflow_file_path = get_run_release_for_assembly_nextflow()
release_dir = get_assembly_release_folder(taxonomy_id, assembly_accession)
release_dir = get_assembly_release_folder(release_version, taxonomy_id, assembly_accession)
nextflow_config = get_nextflow_config()
workflow_command = ' '.join((
f"cd {release_dir} &&",
Expand Down Expand Up @@ -128,7 +135,7 @@ def load_config(*args):
cfg.load_config_file(
*args,
os.environ.get('RELEASE_CONFIG'),
'~/.release_config.yml'
os.path.expanduser('~/.release_config.yml')
)


Expand Down

0 comments on commit 4b173c7

Please sign in to comment.