-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add method to update pipeline image cache #63
base: develop
Are you sure you want to change the base?
Changes from all commits
dd80f90
9f7eec8
68aa2f8
54dcbb0
104c392
a54cd75
59d9631
0b7c2bb
7981b08
17cbe15
6e72057
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,10 @@ from urllib.parse import urljoin | |
from datetime import datetime | ||
import traceback | ||
import csv | ||
import git | ||
import subprocess | ||
import re | ||
import tempfile | ||
|
||
from apps.access import access_commands | ||
from apps.cmoch import cmoch_commands | ||
|
@@ -75,6 +79,7 @@ Usage: | |
beaglecli files patch <file_id> [--file-path=<file_path>] [--file-type=<file_type>] [--file-group=<file_group_id>] [--metadata=<metadata>]... [--size=<size>] | ||
beaglecli files list [--page-size=<page_size>] [--path=<path>]... [--metadata=<metadata>]... [--file-group=<file_group>]... [--file-name=<file_name>]... [--filename-regex=<filename_regex>] [--file-type=<file_type>]... [--all]... [--packaged]... [--force]... | ||
beaglecli files delete --file-id=<file_id>... | ||
beaglecli pipeline update-cache <cache_path> | ||
beaglecli sample create <sample-id> | ||
beaglecli sample list [--sample-id=<sample-id>] | ||
beaglecli sample redact <sample-id> [--value=<redact>] | ||
|
@@ -157,6 +162,11 @@ def files_commands(arguments, config): | |
return _patch_file(arguments, config) | ||
|
||
|
||
def pipeline_commands(arguments, config): | ||
if arguments.get('update-cache'): | ||
return _update_cache(arguments, config) | ||
|
||
|
||
def storage_commands(arguments, config): | ||
if arguments.get('list'): | ||
return _list_storage(arguments, config) | ||
|
@@ -221,6 +231,8 @@ def sample_commands(arguments, config): | |
def command(arguments, config): | ||
if arguments.get('files'): | ||
return files_commands(arguments, config) | ||
if arguments.get('pipeline'): | ||
return pipeline_commands(arguments, config) | ||
if arguments.get('storage'): | ||
return storage_commands(arguments, config) | ||
if arguments.get('file-types'): | ||
|
@@ -351,6 +363,33 @@ def _get_latest_runs(run_dict): | |
return run_list | ||
|
||
|
||
def _get_cwl_apps(): | ||
url = urljoin(BEAGLE_ENDPOINT, API['pipelines']) | ||
params = dict() | ||
params['page_size'] = 1000000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've seen some slow downs / bad responses when setting this parameter too high for the files system and runs endpoints. I don't think that should be an issue here since the pipelines list is relatively short. Nonetheless would we maybe want to make this an adjustable parameter somewhere? Magic numbers like these can sometimes be a bit tricky to track down when there's an issue. |
||
response = requests.get(url, headers={ | ||
'Authorization': 'Bearer %s' % config.token, 'Content-Type': 'application/json'}, params=params) | ||
cwl_set = set() | ||
if response.ok: | ||
response_json = response.json() | ||
if "results" in response_json: | ||
result_list = response_json["results"] | ||
for single_pipeline in result_list: | ||
pipeline_type = single_pipeline["pipeline_type"] | ||
if pipeline_type == 0: | ||
github = single_pipeline["github"] | ||
version = single_pipeline["version"] | ||
entrypoint = single_pipeline["entrypoint"] | ||
cwl_set.add((github, version, entrypoint)) | ||
return cwl_set | ||
else: | ||
print("Error: beagle returned an empty response") | ||
exit(1) | ||
else: | ||
print("ERROR: Could not retrieve app list") | ||
exit(1) | ||
|
||
|
||
def _get_apps_dict(): | ||
url = urljoin(BEAGLE_ENDPOINT, API['pipelines']) | ||
params = dict() | ||
|
@@ -595,9 +634,10 @@ def _list_files(arguments, config): | |
params['filename_regex'] = filename_regex | ||
params['file_type'] = file_type | ||
if all_pages: | ||
count_params = params | ||
count_params['count'] = True | ||
params['page_size'] = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={'Authorization': 'Bearer %s' % config.token}, params=count_params).json()['count'] | ||
count_params = params | ||
count_params['count'] = True | ||
params['page_size'] = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={ | ||
'Authorization': 'Bearer %s' % config.token}, params=count_params).json()['count'] | ||
response = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={ | ||
'Authorization': 'Bearer %s' % config.token}, params=params) | ||
response_json = json.dumps(response.json(), indent=4) | ||
|
@@ -712,6 +752,38 @@ def _create_sample(arguments, config): | |
response_json = json.dumps(response.json(), indent=4) | ||
return response_json | ||
|
||
# Pipeline | ||
|
||
|
||
def _update_cache(arguments, config): | ||
cache_path = arguments.get('<cache_path>') | ||
cwl_apps = _get_cwl_apps() | ||
num_cwl_apps = len(cwl_apps) | ||
current_cwl_app = 0 | ||
find_pipeline_folder = re.compile("Cloning into '(\S+)'") | ||
for repo, version, cwl in cwl_apps: | ||
with tempfile.TemporaryDirectory() as tmpDir: | ||
print("Working on cwl {} of {}".format( | ||
current_cwl_app, num_cwl_apps)) | ||
current_cwl_app += 1 | ||
try: | ||
git_repo = git.Git(tmpDir).clone( | ||
repo, "--branch", version, "--recurse-submodules") | ||
except git.exc.GitCommandError: | ||
print("Could not find repo {} version {}".format(repo, version)) | ||
pipeline_folder = find_pipeline_folder.match(git_repo).group(1) | ||
cwl_path = os.path.join(tmpDir, pipeline_folder, cwl) | ||
if os.path.exists(cwl_path): | ||
cache_command = "module load singularity/3.7.1; docker_extract.py -s {} {}".format( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would we also maybe want to make the singularity version a parameter? |
||
cache_path, cwl_path) | ||
try: | ||
subprocess.check_output(cache_command, shell=True) | ||
except: | ||
print("Malformed CWL: CWL {} for repo {} version {} might not be valid".format( | ||
cwl, repo, version)) | ||
else: | ||
print("Malformed pipeline: Could not find cwl: {} in {} version {}".format( | ||
cwl, repo, version)) | ||
|
||
# Update | ||
|
||
|
@@ -959,6 +1031,7 @@ def _redact_sample(arguments, config): | |
response_json = json.dumps(response.json(), indent=4) | ||
return response_json | ||
|
||
|
||
if __name__ == '__main__': | ||
config = Config.load() | ||
authenticate_command(config) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
docopt==0.6.2 | ||
requests==2.22.0 | ||
git+https://github.com/mskcc/cwl-utils.git | ||
GitPython==3.0.8 | ||
pandas |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wouldn't mind trying the feature out on an example though. Is there a good cache path I could test it out on?