Skip to content

Commit

Permalink
Bacpac parsing (#66)
Browse files Browse the repository at this point in the history
* bacpac parser

* point to bacpac dug

* fix typo

* Changed kaniko build to kaniko buildAndPush

* Removed items from Jenkinsfile to allow successful build of image

* Removed crane image from k8s manifest

* point to dug develop

* bump version

* adding bacpac dag tasks

* bump dug version

Co-authored-by: Yaphetkg <[email protected]>
Co-authored-by: Joshua Seals <[email protected]>
  • Loading branch information
3 people authored Jan 5, 2023
1 parent 7459128 commit 6fc7161
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 33 deletions.
51 changes: 22 additions & 29 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,6 @@ spec:
volumeMounts:
- name: jenkins-docker-cfg
mountPath: /kaniko/.docker
- name: crane
workingDir: /tmp/jenkins
image: gcr.io/go-containerregistry/crane:debug
imagePullPolicy: Always
command:
- /busybox/cat
tty: true
volumes:
- name: jenkins-docker-cfg
projected:
Expand Down Expand Up @@ -77,31 +70,31 @@ spec:
steps {
script {
container(name: 'kaniko', shell: '/busybox/sh') {
kaniko.build("./Dockerfile", ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2", "$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"])
}
}
}
post {
always {
archiveArtifacts artifacts: 'image.tar', onlyIfSuccessful: true
}
}
}
stage('Publish') {
steps {
script {
container(name: 'crane', shell: '/busybox/sh') {
def imageTagsPushAlways = ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2"]
def imageTagsPushForDevelopBranch = ["$IMAGE_NAME:$TAG3"]
def imageTagsPushForMasterBranch = ["$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"]
image.publish(
imageTagsPushAlways,
imageTagsPushForDevelopBranch,
imageTagsPushForMasterBranch
)
kaniko.buildAndPush("./Dockerfile", ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2", "$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"])
}
}
}
// post {
// always {
// archiveArtifacts artifacts: 'image.tar', onlyIfSuccessful: true
// }
// }
}
// stage('Publish') {
// steps {
// script {
// container(name: 'crane', shell: '/busybox/sh') {
// def imageTagsPushAlways = ["$IMAGE_NAME:$TAG1", "$IMAGE_NAME:$TAG2"]
// def imageTagsPushForDevelopBranch = ["$IMAGE_NAME:$TAG3"]
// def imageTagsPushForMasterBranch = ["$IMAGE_NAME:$TAG3", "$IMAGE_NAME:$TAG4"]
// image.publish(
// imageTagsPushAlways,
// imageTagsPushForDevelopBranch,
// imageTagsPushForMasterBranch
// )
// }
// }
// }
// }
}
}
2 changes: 1 addition & 1 deletion dags/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.10.0-dev"
version = "0.10.1-dev"
6 changes: 5 additions & 1 deletion dags/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dug_helpers.dug_utils import DugUtil, get_topmed_files, get_dbgap_files,\
get_nida_files, get_sparc_files, get_anvil_files,\
get_cancer_data_commons_files, get_kids_first_files,\
get_sprint_files
get_sprint_files, get_bacpac_files
from roger.dag_util import default_args, create_python_task

DAG_ID = 'annotate_dug'
Expand Down Expand Up @@ -69,6 +69,10 @@
prepare_files = create_python_task(dag, "get_sprint_files", get_sprint_files)
annotate_files = create_python_task(dag, "annotate_sprint_files",
DugUtil.annotate_sprint_files)
elif data_set == "bacpac":
prepare_files = create_python_task(dag, "get_bacpac_files", get_bacpac_files)
annotate_files = create_python_task(dag, "annotate_bacpac_files",
DugUtil.annotate_bacpac_files)
intro >> prepare_files
prepare_files >> clear_annotation_items
clear_annotation_items >> annotate_files
Expand Down
14 changes: 14 additions & 0 deletions dags/dug_helpers/dug_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,18 @@ def annotate_topmed_files(config=None, to_string=False, files=None):
output_log = dug.log_stream.getvalue() if to_string else ''
return output_log

@staticmethod
def annotate_bacpac_files(config=None, to_string=False, files=None):
with Dug(config, to_string=to_string) as dug:
if files is None:
files = Util.dug_bacpac_objects()
parser_name = "BACPAC"
log.info(files)
dug.annotate_files(parser_name=parser_name,
parsable_files=files)
output_log = dug.log_stream.getvalue() if to_string else ''
return output_log

@staticmethod
def make_kg_tagged(config=None, to_string=False):
with Dug(config, to_string=to_string) as dug:
Expand Down Expand Up @@ -891,6 +903,8 @@ def get_cancer_data_commons_files(config: RogerConfig, to_string=False) -> List[
def get_sprint_files(config: RogerConfig, to_string=False) -> List[str]:
return get_versioned_files(config, "sprint", "sprint", data_store=config.dug_inputs.data_source, unzip=True)

def get_bacpac_files(config: RogerConfig, to_string=False) -> List[str]:
return get_versioned_files(config, "bacpac", "bacpac", data_store=config.dug_inputs.data_source, unzip=True)

def get_topmed_files(config: RogerConfig, to_string=False) -> List[str]:
return get_versioned_files(config, "topmed", "topmed", data_store=config.dug_inputs.data_source, unzip=False)
Expand Down
8 changes: 7 additions & 1 deletion dags/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,10 @@ dug_inputs:
files:
s3:
- "sprint/v1.0/StanfordSPRINT_DataDictionary_2020-12-16.tar.gz"
format: sprint
format: sprint
- name: bacpac
version: v1.0
files:
s3:
- "heal-datasets/bacpac/bacpac_baseline_do_measures.tar.gz"
format: bacpac
12 changes: 12 additions & 0 deletions dags/roger/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,11 @@ def dug_sprint_path():
"""Anvil source files"""
return Util.dug_input_files_path('sprint')

@staticmethod
def dug_bacpac_path():
"""Anvil source files"""
return Util.dug_input_files_path('bacpac')

@staticmethod
def dug_crdc_path():
"""Anvil source files"""
Expand Down Expand Up @@ -331,6 +336,13 @@ def dug_sprint_objects():
lambda file_name: file_name.endswith('.xml'), file_path)
return sorted([str(f) for f in files])

@staticmethod
def dug_bacpac_objects():
file_path = Util.dug_bacpac_path()
files = Util.get_files_recursive(
lambda file_name: file_name.endswith('.xml'), file_path)
return sorted([str(f) for f in files])

@staticmethod
def dug_crdc_objects():
file_path = Util.dug_crdc_path()
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ redisgraph-bulk-loader==0.9.5
requests<2.24.0
pytest==6.2.2
PyYAML==5.3.1
git+https://github.com/helxplatform/[email protected].5
git+https://github.com/helxplatform/[email protected].6
elasticsearch==7.11.0
biolinkml>=1.5.10
orjson
Expand Down

0 comments on commit 6fc7161

Please sign in to comment.