diff --git a/Jenkinsfile b/Jenkinsfile index ebb28fb2..3372b9e0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -31,13 +31,6 @@ spec: volumeMounts: - name: jenkins-docker-cfg mountPath: /kaniko/.docker - - name: crane - workingDir: /tmp/jenkins - image: gcr.io/go-containerregistry/crane:debug - imagePullPolicy: Always - command: - - /busybox/cat - tty: true volumes: - name: jenkins-docker-cfg projected: @@ -77,31 +70,31 @@ spec: steps { script { container(name: 'kaniko', shell: '/busybox/sh') { - kaniko.build("./Dockerfile", ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2", "$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"]) - } - } - } - post { - always { - archiveArtifacts artifacts: 'image.tar', onlyIfSuccessful: true - } - } - } - stage('Publish') { - steps { - script { - container(name: 'crane', shell: '/busybox/sh') { - def imageTagsPushAlways = ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2"] - def imageTagsPushForDevelopBranch = ["$IMAGE_NAME:$TAG3"] - def imageTagsPushForMasterBranch = ["$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"] - image.publish( - imageTagsPushAlways, - imageTagsPushForDevelopBranch, - imageTagsPushForMasterBranch - ) + kaniko.buildAndPush("./Dockerfile", ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2", "$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"]) } } } + // post { + // always { + // archiveArtifacts artifacts: 'image.tar', onlyIfSuccessful: true + // } + // } } + // stage('Publish') { + // steps { + // script { + // container(name: 'crane', shell: '/busybox/sh') { + // def imageTagsPushAlways = ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2"] + // def imageTagsPushForDevelopBranch = ["$IMAGE_NAME:$TAG3"] + // def imageTagsPushForMasterBranch = ["$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"] + // image.publish( + // imageTagsPushAlways, + // imageTagsPushForDevelopBranch, + // imageTagsPushForMasterBranch + // ) + // } + // } + // } + // } } } diff --git a/dags/_version.py b/dags/_version.py index 6f94c0ae..df012a40 100644 --- a/dags/_version.py +++ b/dags/_version.py @@ -1 +1 @@ -version = "0.10.0-dev" +version = "0.10.1-dev" diff --git a/dags/annotate.py b/dags/annotate.py index 40600daa..16da2474 100644 --- a/dags/annotate.py +++ b/dags/annotate.py @@ -7,7 +7,7 @@ from dug_helpers.dug_utils import DugUtil, get_topmed_files, get_dbgap_files,\ get_nida_files, get_sparc_files, get_anvil_files,\ get_cancer_data_commons_files, get_kids_first_files,\ - get_sprint_files + get_sprint_files, get_bacpac_files from roger.dag_util import default_args, create_python_task DAG_ID = 'annotate_dug' @@ -69,6 +69,10 @@ prepare_files = create_python_task(dag, "get_sprint_files", get_sprint_files) annotate_files = create_python_task(dag, "annotate_sprint_files", DugUtil.annotate_sprint_files) + elif data_set == "bacpac": + prepare_files = create_python_task(dag, "get_bacpac_files", get_bacpac_files) + annotate_files = create_python_task(dag, "annotate_bacpac_files", + DugUtil.annotate_bacpac_files) intro >> prepare_files prepare_files >> clear_annotation_items clear_annotation_items >> annotate_files diff --git a/dags/dug_helpers/dug_utils.py b/dags/dug_helpers/dug_utils.py index c3d5951f..8db0465c 100644 --- a/dags/dug_helpers/dug_utils.py +++ b/dags/dug_helpers/dug_utils.py @@ -668,6 +668,18 @@ def annotate_topmed_files(config=None, to_string=False, files=None): output_log = dug.log_stream.getvalue() if to_string else '' return output_log + @staticmethod + def annotate_bacpac_files(config=None, to_string=False, files=None): + with Dug(config, to_string=to_string) as dug: + if files is None: + files = Util.dug_bacpac_objects() + parser_name = "BACPAC" + log.info(files) + dug.annotate_files(parser_name=parser_name, + parsable_files=files) + output_log = dug.log_stream.getvalue() if to_string else '' + return output_log + @staticmethod def make_kg_tagged(config=None, to_string=False): with Dug(config, to_string=to_string) as dug: @@ -891,6 +903,8 @@ def get_cancer_data_commons_files(config: RogerConfig, to_string=False) -> List[ def get_sprint_files(config: RogerConfig, to_string=False) -> List[str]: return get_versioned_files(config, "sprint", "sprint", data_store=config.dug_inputs.data_source, unzip=True) +def get_bacpac_files(config: RogerConfig, to_string=False) -> List[str]: + return get_versioned_files(config, "bacpac", "bacpac", data_store=config.dug_inputs.data_source, unzip=True) def get_topmed_files(config: RogerConfig, to_string=False) -> List[str]: return get_versioned_files(config, "topmed", "topmed", data_store=config.dug_inputs.data_source, unzip=False) diff --git a/dags/metadata.yaml b/dags/metadata.yaml index af922dd3..f550be0e 100644 --- a/dags/metadata.yaml +++ b/dags/metadata.yaml @@ -146,4 +146,10 @@ dug_inputs: files: s3: - "sprint/v1.0/StanfordSPRINT_DataDictionary_2020-12-16.tar.gz" - format: sprint \ No newline at end of file + format: sprint + - name: bacpac + version: v1.0 + files: + s3: + - "heal-datasets/bacpac/bacpac_baseline_do_measures.tar.gz" + format: bacpac \ No newline at end of file diff --git a/dags/roger/core.py b/dags/roger/core.py index cf1aac0a..d46be45b 100644 --- a/dags/roger/core.py +++ b/dags/roger/core.py @@ -297,6 +297,11 @@ def dug_sprint_path(): """Anvil source files""" return Util.dug_input_files_path('sprint') + @staticmethod + def dug_bacpac_path(): + """Anvil source files""" + return Util.dug_input_files_path('bacpac') + @staticmethod def dug_crdc_path(): """Anvil source files""" @@ -331,6 +336,13 @@ def dug_sprint_objects(): lambda file_name: file_name.endswith('.xml'), file_path) return sorted([str(f) for f in files]) + @staticmethod + def dug_bacpac_objects(): + file_path = Util.dug_bacpac_path() + files = Util.get_files_recursive( + lambda file_name: file_name.endswith('.xml'), file_path) + return sorted([str(f) for f in files]) + @staticmethod def dug_crdc_objects(): file_path = Util.dug_crdc_path() diff --git a/requirements.txt b/requirements.txt index 81398bcb..cab038ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ redisgraph-bulk-loader==0.9.5 requests<2.24.0 pytest==6.2.2 PyYAML==5.3.1 -git+https://github.com/helxplatform/dug@v2.9.5 +git+https://github.com/helxplatform/dug@v2.9.6 elasticsearch==7.11.0 biolinkml>=1.5.10 orjson