From e8def23d36d91e6d16acd198396ce399615171b2 Mon Sep 17 00:00:00 2001 From: Egor Yashin <61899414+molotgor@users.noreply.github.com> Date: Mon, 8 Jul 2024 13:47:39 +0400 Subject: [PATCH] [TH2-5212] added endpoint to request file by path (#5) * added endpoint to request file by path * update description of request functions * update replacePathServerToLocal to raise Exception and remove unused endpoint * changed use data access from `ro` to `rw` * Corrected log configuration * added /files/all request and parsing for file path parameter * added `umask 0007` to `~/.bashrc` file --------- Co-authored-by: nikita.smirnov --- Dockerfile | 5 +- README.md | 36 +- local-run/with-jupyter-notebook/compose.yml | 10 +- .../json-stream-provider/log4py.conf | 22 + .../jupyter-notebook/Dockerfile | 5 +- .../user_data/.gitignore | 1 - log_configuratior.py | 34 ++ package_info.json | 2 +- server.py | 515 +++++++++++------- var/th2/config/log4py.conf | 38 -- 10 files changed, 418 insertions(+), 250 deletions(-) create mode 100644 local-run/with-jupyter-notebook/json-stream-provider/log4py.conf delete mode 100644 local-run/with-jupyter-notebook/user_data/.gitignore create mode 100644 log_configuratior.py delete mode 100644 var/th2/config/log4py.conf diff --git a/Dockerfile b/Dockerfile index 7aa1bfd..2729076 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,9 +9,7 @@ COPY . /app # groupadd --system - create a system account # useradd --system - create a system account # useradd --gid - name or ID of the primary group of the new account -# usermod --append - append the user to the supplemental GROUPS mentioned by the -G/--groups option without removing the user from other groups -# usermod --groups - new list of supplementary GROUPS -RUN groupadd --system json-stream && useradd --system --gid json-stream --uid 1000 json-stream && usermod --append --groups users json-stream +RUN useradd --system --gid users --uid 1000 json-stream ENV TH2_CFG_DIR="/app/var/th2/config/" ENV HOME="/home/json-stream" @@ -41,6 +39,7 @@ ENV PYTHONPATH="${PYTHONPATH}:${PYTHON_LOCAL_LIB_PATH}:${PYTHON_SHARED_LIB_PATH} ENV PIP_CONFIG_FILE="${HOME}/.pip/pip.conf" RUN mkdir -p "${PYTHON_SHARED_LIB_PATH}" +RUN echo 'umask 0007' >> "${HOME}/.bashrc" ENTRYPOINT ["python", "/app/server.py"] CMD ["/var/th2/config/custom.json"] \ No newline at end of file diff --git a/README.md b/README.md index 92e0b9c..81c419d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# th2-json-stream-provider (j-sp) (0.0.3) +# th2-json-stream-provider (j-sp) (0.0.5) This python server is made to launch Jupyter notebooks (*.ipynb) and get results from them. @@ -83,6 +83,20 @@ You can put required files for you jupyter notebooks into `local-run/with-jupyte Or you can mount own folder by changing value of `USER_DATA_DIR` environment variable in the `local-run/with-jupyter-notebook/.evn` file.
Or change the `local-run/with-jupyter-notebook/compose.yml` file. Please note you should mount the same dictionary by the same path to `jupyter_notebook` and `json_stream_provider` services. +#### provide permission for `local-run/with-jupyter-notebook/user_data` folder +`jupyter-notebook` and `json-stream-provider` use user from default linux `users` group. +It means that: +* `user_data` folder internal folder should have `rwx` permission for `users` group. +* files in `user_data` folder should have `rw` permission for `users` group. + +Perhaps you will need sudo permission for the next commands + +```shell +cd local-run/with-jupyter-notebook +chgrp -R users user_data/ +chmod -R g=u user_data/ +``` + #### start command ```shell cd local-run/with-jupyter-notebook @@ -105,6 +119,26 @@ docker compose build ## Release notes: +### 0.0.5 + +* added `umask 0007` to `~/.bashrc` file to provide rw file access for `users` group +* added `/file` request for loading content of single jsonl file +* removed ability to get any file from machine via `/file` REST APIs +* added sorting on requests `/files/notebooks` and `/files/results` +* added `/files/all` request to list all files in `/notebooks` and `/results/` directories +* added `convert_parameter` function for parsing parameter depending on it's type +* update local run with jupyter-notebook: + * updated th2-rpt-viewer: + * added option to change default view type of result group + * added display of #display-table field in Table view type + * added option to view last N results of Notebook + * added validation of Notebook's parameters + * added timestamp and file path parameter types + * fixed clearing of Notebook's parameters on run + * increased width of parameters' inputs + * updated compose: + * changed use data access from `ro` to `rw` + ### 0.0.4 * added `${HOME}/python/lib` into `PYTHONPATH` environment variable diff --git a/local-run/with-jupyter-notebook/compose.yml b/local-run/with-jupyter-notebook/compose.yml index 1c1f9d0..16c3c35 100644 --- a/local-run/with-jupyter-notebook/compose.yml +++ b/local-run/with-jupyter-notebook/compose.yml @@ -1,14 +1,14 @@ services: json_stream_provider: - image: ghcr.io/th2-net/th2-json-stream-provider-py:0.0.4-dev + image: ghcr.io/th2-net/th2-json-stream-provider-py:0.0.5-th2-5212-9837013984-c5168cb ports: - "8081:8080" volumes: - j_sp_volume:/home/json-stream/:rw - jupyter_volume:/home/jovyan/:rw - python_lib:/home/json-stream/python/lib:ro - - ${USER_DATA_DIR}:/home/jovyan/user-data/:ro - - ./json-stream-provider/custom.json:/var/th2/config/custom.json:ro + - ${USER_DATA_DIR}:/home/jovyan/user-data/:rw + - ./json-stream-provider:/var/th2/config:ro networks: - th2_network @@ -21,12 +21,12 @@ services: volumes: - jupyter_volume:/home/jovyan/:rw - python_lib:/home/jovyan/python/lib:rw - - ${USER_DATA_DIR}:/home/jovyan/user-data/:ro + - ${USER_DATA_DIR}:/home/jovyan/user-data/:rw networks: - th2_network th2_rpt_viewer: - image: ghcr.io/th2-net/th2-rpt-viewer:5.2.8-th2-5208-9697982356 + image: ghcr.io/th2-net/th2-rpt-viewer:5.2.8-th2-5212-9836473151 ports: - "8083:8080" networks: diff --git a/local-run/with-jupyter-notebook/json-stream-provider/log4py.conf b/local-run/with-jupyter-notebook/json-stream-provider/log4py.conf new file mode 100644 index 0000000..5648c4a --- /dev/null +++ b/local-run/with-jupyter-notebook/json-stream-provider/log4py.conf @@ -0,0 +1,22 @@ +[loggers] +keys=root + +[handlers] +keys=consoleHandler + +[formatters] +keys=formatter + +[logger_root] +level=INFO +handlers=consoleHandler + +[handler_consoleHandler] +class=StreamHandler +level=INFO +formatter=formatter +args=(sys.stdout,) + +[formatter_formatter] +format=%(asctime)s.%(msecs)03d - %(name)s - %(levelname)s - %(message)s +datefmt=%Y-%m-%d %H:%M:%S \ No newline at end of file diff --git a/local-run/with-jupyter-notebook/jupyter-notebook/Dockerfile b/local-run/with-jupyter-notebook/jupyter-notebook/Dockerfile index 592efcd..6ac4f50 100644 --- a/local-run/with-jupyter-notebook/jupyter-notebook/Dockerfile +++ b/local-run/with-jupyter-notebook/jupyter-notebook/Dockerfile @@ -3,7 +3,8 @@ FROM jupyter/datascience-notebook:python-3.9 RUN echo "c.ServerApp.base_url = '/jupyter/'" >> /etc/jupyter/jupyter_server_config.py ENV PYTHON_SHARED_LIB_PATH="${HOME}/python/lib" +ENV PYTHONPATH="${PYTHONPATH}:${PYTHON_SHARED_LIB_PATH}" +ENV PIP_TARGET="${PYTHON_SHARED_LIB_PATH}" RUN mkdir -p "${PYTHON_SHARED_LIB_PATH}" -ENV PYTHONPATH="${PYTHONPATH}:${PYTHON_SHARED_LIB_PATH}" -ENV PIP_TARGET="${PYTHON_SHARED_LIB_PATH}" \ No newline at end of file +RUN echo 'umask 0007' >> "${HOME}/.bashrc" \ No newline at end of file diff --git a/local-run/with-jupyter-notebook/user_data/.gitignore b/local-run/with-jupyter-notebook/user_data/.gitignore deleted file mode 100644 index f59ec20..0000000 --- a/local-run/with-jupyter-notebook/user_data/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/log_configuratior.py b/log_configuratior.py new file mode 100644 index 0000000..bdc0575 --- /dev/null +++ b/log_configuratior.py @@ -0,0 +1,34 @@ +import logging.config +import os + +log4py_file = '/var/th2/config/log4py.conf' +def configureLogging(): + if os.path.exists(log4py_file): + logging.config.fileConfig(log4py_file, disable_existing_loggers=False) + logging.getLogger(__name__).info(f'Logger is configured by {log4py_file} file') + else: + default_logging_config = { + 'version': 1, + 'formatters': { + 'default': { + 'format': '%(asctime)s.%(msecs)03d - %(name)s - %(levelname)s - %(message)s', + 'datefmt': '%Y-%m-%d %H:%M:%S' + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'default', + 'level': 'DEBUG', + 'stream': 'ext://sys.stdout' + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'DEBUG', + }, + } + logging.config.dictConfig(default_logging_config) + logging.getLogger(__name__).info('Logger is configured by default') + + diff --git a/package_info.json b/package_info.json index 2b76941..4135c93 100644 --- a/package_info.json +++ b/package_info.json @@ -1,4 +1,4 @@ { "package_name": "th2-json-stream-provider", - "package_version": "0.0.4" + "package_version": "0.0.5" } \ No newline at end of file diff --git a/server.py b/server.py index 5494f65..35e805a 100644 --- a/server.py +++ b/server.py @@ -12,73 +12,62 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio +import json +import logging.config import os -os.system('pip list') +from argparse import ArgumentParser +from datetime import datetime, timezone -import subprocess -import sys import papermill as pm -from aiohttp.web_request import Request from aiohttp import web -from aiojobs.aiohttp import setup, spawn +from aiohttp.web_request import Request from aiohttp_swagger import * -from glob import glob -import json -import datetime -import asyncio -from argparse import ArgumentParser -import logging - -serverStatus: str = 'idle' -notebooksDir: str = '/home/jupyter-notebook/' -resultsDir: str = '/home/jupyter-notebook/results/' -logDir: str = '/home/jupyter-notebook/logs/' -tasks: dict = {} -logger: logging.Logger - - -def notebooksReg(path): - return path + '/*.ipynb' - +from aiojobs.aiohttp import setup, spawn -def resultsReg(path): - return path + '/*.jsonl' +from log_configuratior import configureLogging +os.system('pip list') -def resultsLog(path): - return path + '/*.log.jsonl' +server_status: str = 'ok' +notebooks_dir: str = '/home/jupyter-notebook/' +results_dir: str = '/home/jupyter-notebook/results/' +log_dir: str = '/home/jupyter-notebook/logs/' +tasks: dict = {} +configureLogging() +logger: logging.Logger = logging.getLogger('j-sp') -def createDir(path: str): +def create_dir(path: str): if not os.path.exists(path): os.makedirs(path) -def readConf(path: str): - global notebooksDir - global resultsDir - global logDir +def read_config(path: str): + global notebooks_dir + global results_dir + global log_dir global logger try: file = open(path, "r") result = json.load(file) - notebooksDir = result.get('notebooks', notebooksDir) - logger.info('notebooksDir=%s', notebooksDir) - if notebooksDir: - createDir(notebooksDir) - resultsDir = result.get('results', resultsDir) - logger.info('resultsDir=%s',resultsDir) - if resultsDir: - createDir(resultsDir) - logDir = result.get('logs', logDir) - logger.info('logDir=%s', logDir) - if logDir: - createDir(logDir) + notebooks_dir = result.get('notebooks', notebooks_dir) + logger.info('notebooks_dir=%s', notebooks_dir) + if notebooks_dir: + create_dir(notebooks_dir) + results_dir = result.get('results', results_dir) + logger.info('results_dir=%s',results_dir) + if results_dir: + create_dir(results_dir) + log_dir = result.get('logs', log_dir) + logger.info('log_dir=%s', log_dir) + if log_dir: + create_dir(log_dir) except Exception as e: - print(e) + logger.error(f"Read '{path}' configuration failure", e) -async def reqStatus(req: Request): +async def req_status(req: Request): """ --- description: This end-point allow to test that service is up. @@ -90,94 +79,147 @@ async def reqStatus(req: Request): "200": description: successful operation. Return json with server status """ - global serverStatus - return web.json_response({'status': serverStatus}) + global server_status + return web.json_response({'status': server_status}) -def getDirs(path): +def get_dirs(path): return [f.path for f in os.scandir(path) if f.is_dir() and f.name[0] != '.'] +def get_files(path, type): + return [f.path for f in os.scandir(path) if f.is_file() and f.name.endswith(type) and f.name[0] != '.'] + -def replaceSlashes(path: str): +def replace_slashes(path: str): return path.replace('\\', '/') +def replace_local_to_server(path: str): + if path.startswith(notebooks_dir): + return replace_slashes(path).replace(notebooks_dir, './notebooks/', 1) + elif path.startswith(results_dir): + return replace_slashes(path).replace(results_dir, './results/', 1) + else: + return replace_slashes(path) + -async def reqFiles(req: Request): +def replace_server_to_local(path: str): + if path.startswith('./notebooks'): + return replace_slashes(path).replace('./notebooks/', notebooks_dir, 1) + elif path.startswith('./results'): + return replace_slashes(path).replace('./results/', results_dir, 1) + raise Exception("Path didn't start with notebooks or results folder") + + +async def req_notebooks(req: Request): """ --- - description: This end-point allows to get files that could be requested. + description: This end-point allows to get notebooks that could be requested. Query requires path to directory in which notebooks is searched. tags: - File operation produces: - application/json responses: "200": - description: successful operation. Return string array of available files. + description: successful operation. Return dictionary of available directories/files. + "404": + description: failed operation when queried directory doesn't exist or requested path didn't start with ./notebooks. """ - path = path = req.rel_url.query.get('path') - dirsNote = [] - dirsRes = [] - if path: - if os.path.isdir(path): - dirs = list(getDirs(path)) - files = glob(notebooksReg(path)) + glob(resultsReg(path)) + global logger + path_arg = req.rel_url.query.get('path', '') + logger.info('/files/notebooks?path={path}'.format(path=str(path_arg))) + if path_arg == '': + dirs = [] + if os.path.isdir(notebooks_dir): + dirs = list(map(replace_local_to_server, get_dirs(notebooks_dir))) + files = list(map(replace_local_to_server, get_files(notebooks_dir, '.ipynb'))) + + dirs.sort() + files.sort() + + return web.json_response({ + 'directories': dirs, + 'files': files + }) + + try: + path_converted = replace_server_to_local(path_arg) + except: + return web.HTTPNotFound(reason="Requested path didn't start with ./notebooks") + + if path_arg: + if os.path.isdir(path_converted): + dirs = list(map(replace_local_to_server, get_dirs(path_converted))) + files = list(map(replace_local_to_server, get_files(path_converted, '.ipynb'))) + + dirs.sort() + files.sort() + return web.json_response({ 'directories': dirs, 'files': files }) else: return web.HTTPNotFound() - - if os.path.isdir(notebooksDir): - dirsNote = getDirs(notebooksDir) - if os.path.isdir(resultsDir): - dirsRes = getDirs(resultsDir) - files = glob(notebooksReg(notebooksDir)) + glob(resultsReg(resultsDir)) + return web.json_response({ - 'directories': list({*dirsNote, *dirsRes}), - 'files': files + 'directories': [], + 'files': [] }) -def replacePathLocalToServer(path: str): - if path.startswith(notebooksDir): - return replaceSlashes(path).replace(notebooksDir, './notebooks/', 1) - elif path.startswith(resultsDir): - return replaceSlashes(path).replace(resultsDir, './results/', 1) - else: - return replaceSlashes(path) - - -def replacePathServerToLocal(path: str): - if path.startswith('./notebooks'): - return replaceSlashes(path).replace('./notebooks/', notebooksDir, 1) - elif path.startswith('./results'): - return replaceSlashes(path).replace('./results/', resultsDir, 1) - else: - return replaceSlashes(path) - - -async def reqNotebooks(req: Request): +async def req_jsons(req: Request): """ --- - description: This end-point allows to get notebooks that could be requested. + description: This end-point allows to get jsonls that could be requested. Query requires path to directory in which jsonls is searched. tags: - File operation produces: - application/json responses: "200": - description: successful operation. Return string array of available files. + description: successful operation. Return dictionary of available directories/files. + "404": + description: failed operation when queried directory doesn't exist or requested path didn't start with ./results or ./notebooks. """ global logger - path = req.rel_url.query.get('path') - logger.info('/files/notebooks?path={path}'.format(path=str(path))) - pathConverted = path and replacePathServerToLocal(path) - dirsNote = [] - if path: - if os.path.isdir(pathConverted): - dirs = list(map(replacePathLocalToServer, getDirs(pathConverted))) - files = list(map(replacePathLocalToServer, glob(notebooksReg(pathConverted)))) + path_arg = req.rel_url.query.get('path', '') + logger.info('/files/results?path={path}'.format(path=str(path_arg))) + + if path_arg == '': + dirs_res = [] + dirs_note = [] + if os.path.isdir(results_dir): + dirs_res = list(map(replace_local_to_server, get_dirs(results_dir))) + + if os.path.isdir(notebooks_dir): + dirs_note = list(map(replace_local_to_server, get_dirs(notebooks_dir))) + + files_res = list(map(replace_local_to_server, get_files(results_dir, '.jsonl'))) + files_note = list(map(replace_local_to_server, get_files(notebooks_dir, '.jsonl'))) + + dirs = list({*dirs_note, *dirs_res}) + files = list({*files_note, *files_res}) + + dirs.sort() + files.sort() + + return web.json_response({ + 'directories': dirs, + 'files': files + }) + + try: + path_converted = replace_server_to_local(path_arg) + except: + return web.HTTPNotFound(reason="Requested path didn't start with ./results or ./notebooks") + if path_arg: + if os.path.isdir(path_converted): + dirs = list(map(replace_local_to_server, get_dirs(path_converted))) + files = list(map(replace_local_to_server, get_files(path_converted, '.jsonl'))) + + dirs.sort() + files.sort() + return web.json_response({ 'directories': dirs, 'files': files @@ -185,37 +227,64 @@ async def reqNotebooks(req: Request): else: return web.HTTPNotFound() - if os.path.isdir(notebooksDir): - dirsNote = list(map(replacePathLocalToServer, getDirs(notebooksDir))) - files = list(map(replacePathLocalToServer, glob(notebooksReg(notebooksDir)))) return web.json_response({ - 'directories': dirsNote, - 'files': files + 'directories': [], + 'files': [] }) - -async def reqJsons(req: Request): +async def req_files(req: Request): """ --- - description: This end-point allows to get jsons that could be requested. + description: This end-point allows to get files and directories. Query requires path to directory in which files and directories is searched. tags: - File operation produces: - application/json responses: "200": - description: successful operation. Return string array of available files. + description: successful operation. Return dictionary of available directories/files. + "404": + description: failed operation when queried directory doesn't exist or requested path didn't start with ./results or ./notebooks. """ global logger - path = req.rel_url.query.get('path') - logger.info('/files/results?path={path}'.format(path=str(path))) - pathConverted = path and replacePathServerToLocal(path) - dirsNote = [] - dirsRes = [] - if path: - if os.path.isdir(pathConverted): - dirs = list(map(replacePathLocalToServer, getDirs(pathConverted))) - files = list(map(replacePathLocalToServer, glob(resultsReg(pathConverted)))) + path_arg = req.rel_url.query.get('path', '') + logger.info('/files/all?path={path}'.format(path=str(path_arg))) + + if path_arg == '': + dirs_res = [] + dirs_note = [] + if os.path.isdir(results_dir): + dirs_res = list(map(replace_local_to_server, get_dirs(results_dir))) + + if os.path.isdir(notebooks_dir): + dirs_note = list(map(replace_local_to_server, get_dirs(notebooks_dir))) + + files_res = list(map(replace_local_to_server, get_files(results_dir, ''))) + files_note = list(map(replace_local_to_server, get_files(notebooks_dir, ''))) + + dirs = list({*dirs_note, *dirs_res}) + files = list({*files_note, *files_res}) + + dirs.sort() + files.sort() + + return web.json_response({ + 'directories': dirs, + 'files': files + }) + + try: + path_converted = replace_server_to_local(path_arg) + except: + return web.HTTPNotFound(reason="Requested path didn't start with ./results or ./notebooks") + if path_arg: + if os.path.isdir(path_converted): + dirs = list(map(replace_local_to_server, get_dirs(path_converted))) + files = list(map(replace_local_to_server, get_files(path_converted, ''))) + + dirs.sort() + files.sort() + return web.json_response({ 'directories': dirs, 'files': files @@ -223,25 +292,16 @@ async def reqJsons(req: Request): else: return web.HTTPNotFound() - if os.path.isdir(resultsDir): - dirsRes = list(map(replacePathLocalToServer, getDirs(resultsDir))) - - if os.path.isdir(notebooksDir): - dirsNote = list(map(replacePathLocalToServer, getDirs(notebooksDir))) - - filesRes = list(map(replacePathLocalToServer, glob(resultsReg(resultsDir)))) - filesNote = list(map(replacePathLocalToServer, glob(resultsReg(notebooksDir)))) - return web.json_response({ - 'directories': list({*dirsNote, *dirsRes}), - 'files': list({*filesNote, *filesRes}) + 'directories': [], + 'files': [] }) -async def reqArguments(req: Request): +async def req_parameters(req: Request): """ --- - description: This end-point allows to get parameters for file in requested path. + description: This end-point allows to get parameters for notebook in requested path. Query requires path to notebook. tags: - File operation produces: @@ -250,49 +310,73 @@ async def reqArguments(req: Request): "200": description: successful operation. Return json of file's parameters. "404": - description: requested file doesn't exist. + description: failed operation when queried file doesn't exist or requested path didn't start with ./notebooks. """ global logger - path = req.rel_url.query['path'] + path = req.rel_url.query.get('path', '') logger.info('/files?path={path}'.format(path=str(path))) - pathConverted = path and replacePathServerToLocal(path) - if not path or not os.path.isfile(pathConverted): + try: + path_converted = replace_server_to_local(path) + except: + return web.HTTPNotFound(reason="Requested path didn't start with ./notebooks") + if not path or not os.path.isfile(path_converted): return web.HTTPNotFound() - params = pm.inspect_notebook(pathConverted) + params = pm.inspect_notebook(path_converted) return web.json_response(params) -async def launchNotebook(input, arguments, file_name, task_id): +async def launch_notebook(input_path, arguments, file_name, task_id): global tasks global logger - logger.info('launching notebook {input} with {arguments}'.format(input=input, arguments=arguments)) - logOut: str = (logDir + '/%s.log.ipynb' % file_name) if logDir and file_name else None + logger.info(f'launching notebook {input_path} with {arguments}') + start_execution = datetime.now() + log_out: str = (log_dir + '/%s.log.ipynb' % file_name) if log_dir and file_name else None try: - with pm.utils.chdir(input[:input.rfind('/')]): - input = input[input.rfind('/')+1:] - pm.execute_notebook(input, logOut, arguments) - logger.debug('successfully launched notebook {input}'.format(input=input)) + with pm.utils.chdir(input_path[:input_path.rfind('/')]): + input_path = input_path[input_path.rfind('/') + 1:] + pm.execute_notebook(input_path, log_out, arguments) + logger.debug(f'successfully launched notebook {input_path}') if tasks.get(task_id): tasks[task_id] = { 'status': 'success', 'result': arguments.get('output_path') } except Exception as error: - logger.info('failed to launch notebook {input}'.format(input=input)) - logger.debug(error) + logger.error(f'failed to launch notebook {input_path}', error) if tasks.get(task_id): tasks[task_id] = { 'status': 'failed', 'result': error } finally: - logger.info('ended launch notebook {input} with {arguments}'.format(input=input, arguments=arguments)) + spent_time = (datetime.now() - start_execution).total_seconds() + logger.info(f'ended launch notebook {input_path} with {arguments} spent_time {spent_time} sec') + + +def convert_parameter(parameter, notebook_path): + parameter_type = parameter.get('type') + parameter_value = parameter.get('value') + if (parameter_type == 'file path'): + try: + parameter_path = replace_server_to_local(parameter_value) + except: + raise Exception( + "Parameter {name} of type={type} with value={value} didn't start with ./notebooks or ./results" + .format(name=parameter.get('name'), type=parameter_type, value=parameter_value) + ) + + relative_path = os.path.relpath(parameter_path, notebook_path[:notebook_path.rfind('/')]) + + return relative_path + + else: + return parameter_value -async def reqLaunch(req: Request): +async def req_launch(req: Request): """ --- - description: This end-point allows to get file's parameters for requested path. + description: This end-point allows to start notebook. Query requires path to notebook. Body requred to be dictionary of parameters. tags: - Execution operation produces: @@ -301,101 +385,135 @@ async def reqLaunch(req: Request): "200": description: successful operation. Return json with path for resulting file. "400": - description: body with parameters not present. + description: failed operation. body with parameters not present. "404": - description: requested file doesn't exist. - "503": - description: server is currently busy. + description: failed operation. requested file doesn't exist or requested path didn't start with ./notebooks. + "500": + description: failed operation. directory for output doesn't exist. """ from uuid import uuid4 global tasks global logger - path = req.rel_url.query.get('path') - logger.info('/execute?path={path}'.format(path=str(path))) + path_arg = req.rel_url.query.get('path', '') + logger.info('/execute?path={path}'.format(path=str(path_arg))) if not req.can_read_body: - return web.HTTPBadRequest(reason='body with parameters not present') - path = req.rel_url.query.get('path') - pathConverted = path and replacePathServerToLocal(path) - if not path or not os.path.isfile(pathConverted): + return web.HTTPBadRequest(reason='Body with parameters not present') + try: + path_converted = replace_server_to_local(path_arg) + except: + return web.HTTPNotFound(reason="Requested path didn't start with ./notebooks") + if not path_arg or not os.path.isfile(path_converted): return web.HTTPNotFound() - if not os.path.exists(resultsDir): - return web.HTTPInternalServerError(reason='no output directory') - notebookName = pathConverted.split('/')[-1].split('.')[0]; - timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f") - file_name = notebookName + '_' + timestamp - output_path = resultsDir + '/%s.jsonl' % str(file_name) - parameters = await req.json() + if not os.path.exists(results_dir): + return web.HTTPInternalServerError(reason='No output directory') + notebook_name = path_converted.split('/')[-1].split('.')[0] + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S-%f") + file_name = notebook_name + '_' + timestamp + output_path = results_dir + '/%s.jsonl' % str(file_name) + req_json = await req.json() + parameters = {} + for key, parameter in req_json.items(): + try: + parameters[key] = convert_parameter(parameter, path_converted) + except Exception as error: + return web.HTTPInternalServerError(reason=str(error)) parameters['output_path'] = output_path task_id = str(uuid4()) - job = spawn(req, launchNotebook(pathConverted, parameters, file_name, task_id)) + job = spawn(req, launch_notebook(path_converted, parameters, file_name, task_id)) tasks[task_id] = { 'status': 'in progress', 'job': job } asyncio.shield(job) - return web.json_response({'path': replacePathLocalToServer(output_path), 'task_id': task_id}) + return web.json_response({'task_id': task_id}) +async def req_file(req: Request): + """ + --- + description: This end-point allows to get file from requested path. Query requires path to file. + tags: + - File operation + produces: + - application/json + responses: + "200": + description: successful operation. Return file's json. + "404": + description: failed operation. requested file doesn't exist or requested path didn't start with ./results or ./notebooks. + """ + global tasks + global logger + path = req.rel_url.query.get('path', '') + logger.info('/file?path={path}'.format(path=str(path))) + path_converted = replace_server_to_local(path) + try: + path_converted = replace_server_to_local(path) + except: + return web.HTTPNotFound(reason="Requested path didn't start with ./results or ./notebooks") + if not path or not os.path.isfile(path_converted): + return web.HTTPNotFound() + file = open(path_converted, "r") + content = file.read() + file.close() + return web.json_response({'result': content}) + async def reqResult(req: Request): """ --- - description: This end-point allows to get result from requested path. + description: This end-point allows to get result from requested task. Query requires task id from which result is required. tags: - Execution operation produces: - application/json responses: "200": - description: successful operation. Return resulting file's json. + description: successful operation. Return different data depending on status: + 'in progress': return json with task's status + 'success': return json with result's content + 'error': return json with reason of failed run "400": - description: body with parameters not present. + description: failed operation. body with parameters not present. "404": - description: requested file doesn't exist. - "503": - description: server is currently busy. + description: failed operation. requested task doesn't exist or resulting file doesn't exist or status is unknown. """ global tasks global logger task_id = req.rel_url.query.get('id') logger.info('/result?id={task_id}'.format(task_id=str(task_id))) task = tasks.get(task_id, None) - if not task: - return web.HTTPNotFound() + if task is None: + return web.HTTPNotFound(reason="Requested task doesn't exist") status = task.get('status', None) if status == 'in progress': return web.json_response({'status': status}) elif status == 'success': - path = task.get('result','') - pathConverted = replacePathServerToLocal(path) - if not path or not os.path.isfile(pathConverted): - return web.HTTPNotFound() - file = open(pathConverted, "r") + path_param = task.get('result','') + if not path_param or not os.path.isfile(path_param): + return web.HTTPNotFound(reason="Resulting file doesn't exist") + file = open(path_param, "r") content = file.read() file.close() - return web.json_response({'status': status, 'result': content}) + return web.json_response({'status': status, 'result': content, 'path': replace_local_to_server(path_param) }) elif status == 'failed': error = task.get('result', Exception()) return web.json_response({'status': status, 'result': str(error)}) else: return web.HTTPNotFound() -async def reqStop(req: Request): +async def req_stop(req: Request): """ --- - description: This end-point allows to stop task by id. + description: This end-point allows to stop task. Query requires task id which will be stopped. tags: - Execution operation produces: - application/json responses: "200": - description: successful operation. Return resulting file's json. - "400": - description: body with parameters not present. - "404": - description: requested file doesn't exist. - "503": - description: server is currently busy. + description: successful operation. Return nothing. + "500": + description: failed operation. failed to stop process. """ global tasks global logger @@ -410,25 +528,24 @@ async def reqStop(req: Request): return web.HTTPOk() if __name__ == '__main__': - logging.basicConfig(filename='var/th2/config/log4py.conf', level=logging.DEBUG) - logger=logging.getLogger('th2_common') parser = ArgumentParser() parser.add_argument('config') - path = vars(parser.parse_args()).get('config') - if (path): - readConf(path) + cfg_path = vars(parser.parse_args()).get('config') + if (cfg_path): + read_config(cfg_path) app = web.Application() setup(app) - app.router.add_route('GET', "/status", reqStatus) - app.router.add_route('GET', "/files/all", reqFiles) - app.router.add_route('GET', "/files/notebooks", reqNotebooks) - app.router.add_route('GET', "/files/results", reqJsons) - app.router.add_route('GET', "/files", reqArguments) - app.router.add_route('POST', "/execute", reqLaunch) + app.router.add_route('GET', "/status", req_status) + app.router.add_route('GET', "/files/notebooks", req_notebooks) + app.router.add_route('GET', "/files/results", req_jsons) + app.router.add_route('GET', "/files/all", req_files) + app.router.add_route('GET', "/files", req_parameters) + app.router.add_route('GET', "/file", req_file) + app.router.add_route('POST', "/execute", req_launch) app.router.add_route('GET', "/result", reqResult) - app.router.add_route('POST', "/stop", reqStop) + app.router.add_route('POST', "/stop", req_stop) setup_swagger(app) logger.info('starting server') web.run_app(app) \ No newline at end of file diff --git a/var/th2/config/log4py.conf b/var/th2/config/log4py.conf deleted file mode 100644 index 1b2a653..0000000 --- a/var/th2/config/log4py.conf +++ /dev/null @@ -1,38 +0,0 @@ -[loggers] -keys=root,th2_common,aiopika - -[handlers] -keys=consoleHandler,fileHandler,metricsHandler - -[formatters] -keys=formatter - -[logger_root] -level=INFO -handlers=consoleHandler,fileHandler -propagate=0 - -[logger_th2_common] -level=INFO -qualname=th2_common -handlers=consoleHandler,fileHandler -propagate=0 - -[logger_aiopika] -level=WARNING -qualname=aio_pika -handlers=consoleHandler,fileHandler -propagate=0 - -[handler_consoleHandler] -class=StreamHandler -formatter=formatter -args=(sys.stdout,) - -[handler_fileHandler] -class=FileHandler -formatter=formatter -args=('../all.log',) - -[formatter_formatter] -format=%(asctime)s - %(name)s - %(levelname)s - %(message)s \ No newline at end of file