diff --git a/beaglecli b/beaglecli index 8357061..8c52cdc 100755 --- a/beaglecli +++ b/beaglecli @@ -27,6 +27,10 @@ from urllib.parse import urljoin from datetime import datetime import traceback import csv +import git +import subprocess +import re +import tempfile from apps.access import access_commands from apps.cmoch import cmoch_commands @@ -75,6 +79,7 @@ Usage: beaglecli files patch [--file-path=] [--file-type=] [--file-group=] [--metadata=]... [--size=] beaglecli files list [--page-size=] [--path=]... [--metadata=]... [--file-group=]... [--file-name=]... [--filename-regex=] [--file-type=]... [--all]... [--packaged]... [--force]... beaglecli files delete --file-id=... + beaglecli pipeline update-cache beaglecli sample create beaglecli sample list [--sample-id=] beaglecli sample redact [--value=] @@ -157,6 +162,11 @@ def files_commands(arguments, config): return _patch_file(arguments, config) +def pipeline_commands(arguments, config): + if arguments.get('update-cache'): + return _update_cache(arguments, config) + + def storage_commands(arguments, config): if arguments.get('list'): return _list_storage(arguments, config) @@ -221,6 +231,8 @@ def sample_commands(arguments, config): def command(arguments, config): if arguments.get('files'): return files_commands(arguments, config) + if arguments.get('pipeline'): + return pipeline_commands(arguments, config) if arguments.get('storage'): return storage_commands(arguments, config) if arguments.get('file-types'): @@ -351,6 +363,33 @@ def _get_latest_runs(run_dict): return run_list +def _get_cwl_apps(): + url = urljoin(BEAGLE_ENDPOINT, API['pipelines']) + params = dict() + params['page_size'] = 1000000 + response = requests.get(url, headers={ + 'Authorization': 'Bearer %s' % config.token, 'Content-Type': 'application/json'}, params=params) + cwl_set = set() + if response.ok: + response_json = response.json() + if "results" in response_json: + result_list = response_json["results"] + for single_pipeline in result_list: + pipeline_type = single_pipeline["pipeline_type"] + if pipeline_type == 0: + github = single_pipeline["github"] + version = single_pipeline["version"] + entrypoint = single_pipeline["entrypoint"] + cwl_set.add((github, version, entrypoint)) + return cwl_set + else: + print("Error: beagle returned an empty response") + exit(1) + else: + print("ERROR: Could not retrieve app list") + exit(1) + + def _get_apps_dict(): url = urljoin(BEAGLE_ENDPOINT, API['pipelines']) params = dict() @@ -595,9 +634,10 @@ def _list_files(arguments, config): params['filename_regex'] = filename_regex params['file_type'] = file_type if all_pages: - count_params = params - count_params['count'] = True - params['page_size'] = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={'Authorization': 'Bearer %s' % config.token}, params=count_params).json()['count'] + count_params = params + count_params['count'] = True + params['page_size'] = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={ + 'Authorization': 'Bearer %s' % config.token}, params=count_params).json()['count'] response = requests.get(urljoin(BEAGLE_ENDPOINT, API['files']), headers={ 'Authorization': 'Bearer %s' % config.token}, params=params) response_json = json.dumps(response.json(), indent=4) @@ -712,6 +752,38 @@ def _create_sample(arguments, config): response_json = json.dumps(response.json(), indent=4) return response_json +# Pipeline + + +def _update_cache(arguments, config): + cache_path = arguments.get('') + cwl_apps = _get_cwl_apps() + num_cwl_apps = len(cwl_apps) + current_cwl_app = 0 + find_pipeline_folder = re.compile("Cloning into '(\S+)'") + for repo, version, cwl in cwl_apps: + with tempfile.TemporaryDirectory() as tmpDir: + print("Working on cwl {} of {}".format( + current_cwl_app, num_cwl_apps)) + current_cwl_app += 1 + try: + git_repo = git.Git(tmpDir).clone( + repo, "--branch", version, "--recurse-submodules") + except git.exc.GitCommandError: + print("Could not find repo {} version {}".format(repo, version)) + pipeline_folder = find_pipeline_folder.match(git_repo).group(1) + cwl_path = os.path.join(tmpDir, pipeline_folder, cwl) + if os.path.exists(cwl_path): + cache_command = "module load singularity/3.7.1; docker_extract.py -s {} {}".format( + cache_path, cwl_path) + try: + subprocess.check_output(cache_command, shell=True) + except: + print("Malformed CWL: CWL {} for repo {} version {} might not be valid".format( + cwl, repo, version)) + else: + print("Malformed pipeline: Could not find cwl: {} in {} version {}".format( + cwl, repo, version)) # Update @@ -959,6 +1031,7 @@ def _redact_sample(arguments, config): response_json = json.dumps(response.json(), indent=4) return response_json + if __name__ == '__main__': config = Config.load() authenticate_command(config) diff --git a/requirements.txt b/requirements.txt index 6cf06d1..0ec9025 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ docopt==0.6.2 requests==2.22.0 +git+https://github.com/mskcc/cwl-utils.git +GitPython==3.0.8 pandas \ No newline at end of file