|
9 | 9 |
|
10 | 10 | # System modules
|
11 | 11 | from time import time
|
| 12 | +import os |
| 13 | +import sys |
| 14 | +import platform |
| 15 | +import pkg_resources |
| 16 | +import subprocess |
| 17 | +import json |
12 | 18 |
|
13 | 19 | # Local modules
|
14 | 20 | from CHAP.processor import Processor
|
15 | 21 |
|
16 | 22 | class FoxdenMetaDataProcessor(Processor):
|
17 | 23 | """A Processor to communicate with FOXDEN MetaData server."""
|
18 | 24 |
|
19 |
| - def process(self, data, url, did, dryRun=False, verbose=False): |
| 25 | + def process(self, data, suffix='analysis=CHAP', verbose=False): |
20 | 26 | """FOXDEN MetaData processor
|
21 | 27 |
|
22 | 28 | :param data: Input data.
|
23 | 29 | :type data: list[PipelineData]
|
24 |
| - :param url: URL of service. |
25 |
| - :type url: str |
26 |
| - :param did: FOXDEN dataset identifier (did) |
27 |
| - :type did: string |
28 |
| - :param dryRun: `dryRun` option to verify HTTP workflow, |
29 |
| - defaults to `False`. |
30 |
| - :type dryRun: bool, optional |
| 30 | + :param suffix: did suffix to add, default 'analysis=CHAP' |
| 31 | + :type suffix: string, optional |
31 | 32 | :param verbose: verbose output
|
32 | 33 | :type verbose: bool, optional
|
33 | 34 | :return: data from FOXDEN MetaData service
|
34 | 35 | """
|
35 | 36 | t0 = time()
|
36 | 37 | self.logger.info(
|
37 |
| - f'Executing "process" with url={url} data={data} did={did}') |
| 38 | + f'Executing "process" with data={data}') |
| 39 | + output = [] |
| 40 | + for item in data: |
| 41 | + # each item in data list is a CHAP record {'name': ..., 'data': {}} |
| 42 | + for rec in item['data']: # get data part of processing item |
| 43 | + if 'did' not in rec: |
| 44 | + raise Exception('No did found in input data record') |
| 45 | + did = rec['did'] + '/' + suffix |
| 46 | + # construct analysis record |
| 47 | + rec = {'did': did, 'application': 'CHAP'} |
| 48 | + output.append(rec) |
38 | 49 | self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n')
|
39 |
| - return data |
| 50 | + return output |
40 | 51 |
|
41 | 52 | class FoxdenProvenanceProcessor(Processor):
|
42 | 53 | """A Processor to communicate with FOXDEN provenance server."""
|
43 |
| - def process(self, data, url, did, dryRun=False, verbose=False): |
| 54 | + def process(self, data, suffix='analysis=CHAP', verbose=False): |
44 | 55 | """FOXDEN Provenance processor
|
45 | 56 |
|
46 | 57 | :param data: Input data.
|
47 | 58 | :type data: list[PipelineData]
|
48 |
| - :param url: URL of service. |
49 |
| - :type url: str |
50 |
| - :param did: FOXDEN dataset identifier (did) |
51 |
| - :type did: string |
52 |
| - :param dryRun: `dryRun` option to verify HTTP workflow, |
53 |
| - defaults to `False`. |
54 |
| - :type dryRun: bool, optional |
| 59 | + :param suffix: did suffix to add, default 'analysis=CHAP' |
| 60 | + :type suffix: string, optional |
55 | 61 | :param verbose: verbose output
|
56 | 62 | :type verbose: bool, optional
|
57 | 63 | :return: data from FOXDEN provenance service
|
58 | 64 | """
|
59 | 65 | t0 = time()
|
60 | 66 | self.logger.info(
|
61 |
| - f'Executing "process" with url={url} data={data} did={did}') |
| 67 | + f'Executing "process" with data={data}') |
| 68 | + output = [] |
| 69 | + for item in data: |
| 70 | + # each item in data list is a CHAP record {'name': ..., 'data': {}} |
| 71 | + for rec in item['data']: # get data part of processing item |
| 72 | + if 'did' not in rec: |
| 73 | + raise Exception('No did found in input data record') |
| 74 | + rec['did'] = rec['did'] + '/' + suffix |
| 75 | + rec['parent_did'] = rec['did'] |
| 76 | + rec['scripts'] = [{'name': 'CHAP', 'parent_script': None, 'order_idx': 1}] |
| 77 | + rec['site'] = 'Cornell' |
| 78 | + rec['osinfo'] = osinfo() |
| 79 | + rec['environments'] = environments() |
| 80 | + rec['input_files'] = inputFiles() |
| 81 | + rec['output_files'] = outputFiles() |
| 82 | + rec['processing'] = 'CHAP pipeline' |
| 83 | + output.append(rec) |
62 | 84 | self.logger.info(f'Finished "process" in {time()-t0:.3f} seconds\n')
|
63 |
| - return data |
| 85 | + return output |
64 | 86 |
|
| 87 | +def osinfo(): |
| 88 | + """ |
| 89 | + Helper function to provide osinfo |
| 90 | + """ |
| 91 | + os_info = { |
| 92 | + "name": platform.system().lower() + "-" + platform.release(), |
| 93 | + "kernel": platform.version(), |
| 94 | + "version": platform.platform() |
| 95 | + } |
| 96 | + return os_info |
| 97 | + |
| 98 | +def environments(): |
| 99 | + """ |
| 100 | + Detects the current Python environment (system, virtualenv, or Conda) and |
| 101 | + collects package information. Returns a list of detected environments with |
| 102 | + installed packages. |
| 103 | + """ |
| 104 | + environments = [] |
| 105 | + os_name = platform.system().lower() + "-" + platform.release() |
| 106 | + |
| 107 | + # Check for Conda environment |
| 108 | + conda_env = os.getenv("CONDA_PREFIX") |
| 109 | + if conda_env: |
| 110 | + conda_env_name = os.getenv("CONDA_DEFAULT_ENV", "unknown-conda-env") |
| 111 | + try: |
| 112 | + # Fetch Conda packages |
| 113 | + conda_packages = subprocess.check_output(["conda", "list", "--json"], text=True) |
| 114 | + conda_packages = json.loads(conda_packages) |
| 115 | + packages = [{"name": pkg["name"], "version": pkg["version"]} for pkg in conda_packages] |
| 116 | + except Exception: |
| 117 | + packages = [] |
| 118 | + |
| 119 | + environments.append({ |
| 120 | + "name": conda_env_name, |
| 121 | + "version": sys.version.split()[0], |
| 122 | + "details": "Conda environment", |
| 123 | + "parent_environment": None, |
| 124 | + "os_name": os_name, |
| 125 | + "packages": packages |
| 126 | + }) |
| 127 | + |
| 128 | + # Check for Virtualenv (excluding Conda) |
| 129 | + elif hasattr(sys, 'real_prefix') or os.getenv("VIRTUAL_ENV"): |
| 130 | + venv_name = os.path.basename(os.getenv("VIRTUAL_ENV", "unknown-venv")) |
| 131 | + packages = [ |
| 132 | + {"name": pkg.key, "version": pkg.version} |
| 133 | + for pkg in pkg_resources.working_set |
| 134 | + ] |
| 135 | + |
| 136 | + environments.append({ |
| 137 | + "name": venv_name, |
| 138 | + "version": sys.version.split()[0], |
| 139 | + "details": "Virtualenv environment", |
| 140 | + "parent_environment": None, |
| 141 | + "os_name": os_name, |
| 142 | + "packages": packages |
| 143 | + }) |
| 144 | + |
| 145 | + # System Python (not inside Conda or Virtualenv) |
| 146 | + else: |
| 147 | + packages = [ |
| 148 | + {"name": pkg.key, "version": pkg.version} |
| 149 | + for pkg in pkg_resources.working_set |
| 150 | + ] |
| 151 | + |
| 152 | + environments.append({ |
| 153 | + "name": "system-python", |
| 154 | + "version": sys.version.split()[0], |
| 155 | + "details": "System-wide Python", |
| 156 | + "parent_environment": None, |
| 157 | + "os_name": os_name, |
| 158 | + "packages": packages |
| 159 | + }) |
| 160 | + |
| 161 | + return environments |
| 162 | + |
| 163 | +def inputFiles(): |
| 164 | + """ |
| 165 | + Helper function to provide input files for FOXDEN |
| 166 | + """ |
| 167 | + return [{'name':'/tmp/file1.png'}, {'name': '/tmp/file2.png'}] |
| 168 | + |
| 169 | +def outputFiles(): |
| 170 | + """ |
| 171 | + Helper function to provide output files for FOXDEN |
| 172 | + """ |
| 173 | + return [{'name':'/tmp/file1.png'}] |
65 | 174 |
|
66 | 175 | if __name__ == '__main__':
|
67 | 176 | # Local modules
|
|
0 commit comments