Skip to content

Commit

Permalink
Add restart (#6)
Browse files Browse the repository at this point in the history
* Adding restart upon error capability

* Finalize restart capability

* Bump version

* Change test settings to restart on error

* Fix json schema dir

* Bump version
  • Loading branch information
wvangeit authored Nov 1, 2024
1 parent 83be540 commit 2f9ac10
Show file tree
Hide file tree
Showing 29 changed files with 340 additions and 104 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.1.0
current_version = 0.2.1
commit = False
message = service version: {current_version} → {new_version}
tag = False
Expand Down
Empty file modified .github/workflows/test.yml
100644 → 100755
Empty file.
15 changes: 13 additions & 2 deletions .osparc/osparc-meta-dakota/metadata.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: DakotaService
description: "DakotaServiceService"
key: simcore/services/dynamic/osparc-meta-dakota
version: 0.1.0
integration-version: 0.1.0
version: 0.2.1
integration-version: 0.2.1
type: dynamic
authors:
- name: Werner Van Geit
Expand All @@ -22,6 +22,12 @@ inputs:
description:
Map feedback channel
type: data:*/*
input_2:
displayOrder: 2.0
label: Dakota service settings
description:
Dakota service settings file
type: data:*/*
outputs:
output_0:
displayOrder: 0.0
Expand All @@ -33,6 +39,11 @@ outputs:
label: Map commands
description: Map command channel
type: data:*/*
conf_json_schema:
displayOrder: 2.0
label: JSON schema
description: JSON schema of configuration file
type: data:*/*
boot-options:
boot_mode:
label: Boot mode
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ RUN pip3 install itis-dakota osparc_filecomms --upgrade
USER osparcuser

WORKDIR /home/osparcuser
RUN python3 -m venv venv
RUN . ./venv/bin/activate && pip3 install --upgrade -r /docker/requirements.txt

USER root

EXPOSE 8888

ENTRYPOINT [ "/bin/bash", "-c", "/docker/entrypoint.bash" ]
CMD [ "/bin/bash", "-c", "/docker/runner.bash "]
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SHELL = /bin/sh
MAKEFLAGS += -j3

export DOCKER_IMAGE_NAME ?= osparc-meta-dakota
export DOCKER_IMAGE_TAG ?= 0.1.0
export DOCKER_IMAGE_TAG ?= 0.2.1

export MASTER_AWS_REGISTRY ?= registry.osparc-master-zmt.click
export MASTER_REGISTRY ?= registry.osparc-master.speag.com
Expand All @@ -16,7 +16,7 @@ define _bumpversion
# upgrades as $(subst $(1),,$@) version, commits and tags
@docker run -it --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \
-u $(shell id -u):$(shell id -g) \
itisfoundation/ci-service-integration-library:v1.0.1-dev-33 \
itisfoundation/ci-service-integration-library:latest \
sh -c "cd /${DOCKER_IMAGE_NAME} && bump2version --verbose --list --config-file $(1) $(subst $(2),,$@)"
endef

Expand All @@ -31,27 +31,29 @@ version-patch version-minor version-major: .bumpversion.cfg ## increases service
compose-spec: ## runs ooil to assemble the docker-compose.yml file
@docker run --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \
-u $(shell id -u):$(shell id -g) \
itisfoundation/ci-service-integration-library:v1.0.4 \
itisfoundation/ci-service-integration-library:latest \
sh -c "cd /${DOCKER_IMAGE_NAME} && ooil compose"

clean: clean-validation
rm -rf docker-compose.yml

.PHONY: build
build: clean compose-spec ## build docker image
chmod -R 755 docker_scripts
docker compose build

clean-validation:
rm -rf validation-tmp
cp -r validation validation-tmp
chmod -R 770 validation-tmp
chmod -R 775 validation-tmp

run-compose-local:
docker compose down
docker compose --file docker-compose-local.yml up

run-mock-mapservice:
pip install osparc-filecomms
sleep 5
MOCK_MAP_INPUT_PATH=validation-tmp/outputs/output_1 MOCK_MAP_OUTPUT_PATH=validation-tmp/inputs/input_1 python validation-client/mock_mapservice.py

run-validation-client:
Expand Down
Empty file modified README.md
100644 → 100755
Empty file.
3 changes: 1 addition & 2 deletions docker-compose-local.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
version: '3.7'
services:
osparc-meta-dakota:
image: simcore/services/dynamic/osparc-meta-dakota:0.1.0
image: simcore/services/dynamic/osparc-meta-dakota:0.2.1
ports:
- "8888:8888"
environment:
Expand Down
4 changes: 0 additions & 4 deletions docker_scripts/dakota.bash

This file was deleted.

115 changes: 66 additions & 49 deletions docker_scripts/dakota-start.py → docker_scripts/dakota_start.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import contextlib
import http.server
import logging
import multiprocessing
import os
import pathlib as pl
import shutil
import socketserver
import sys
import threading
import time
import uuid

Expand All @@ -24,53 +22,21 @@
f"{str(pl.Path(__file__).resolve().parent)}"
)

import tools.maps # NOQA


NOISE_MUS = [0.0, 0.0]
NOISE_SIGMAS = [5.0, 10.0]

POLLING_TIME = 0.1
HTTP_PORT = 8888


def main():
dakota_service = DakotaService()

http_dir_path = pl.Path(__file__).parent / "http"

class HTTPHandler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(
*args, **kwargs, directory=http_dir_path.resolve()
)

try:
logger.info(
f"Starting http server at port {HTTP_PORT} and serving path {http_dir_path}"
)
with socketserver.TCPServer(("", HTTP_PORT), HTTPHandler) as httpd:
httpd_thread = threading.Thread(target=httpd.serve_forever)
httpd_thread.start()
dakota_service.start()
httpd.shutdown()
except Exception as err: # pylint: disable=broad-except
logger.error(f"{err} . Stopping %s", exc_info=True)
import map.maps # NOQA


class DakotaService:
def __init__(self):
def __init__(self, settings):
self.settings = settings
self.uuid = uuid.uuid4()
self.caller_uuid = None
self.map_uuid = None

self.input_dir_path = pl.Path(os.environ["DY_SIDECAR_PATH_INPUTS"])
self.input0_dir_path = self.input_dir_path / "input_0"
self.input1_dir_path = self.input_dir_path / "input_1"
self.input0_dir_path = self.settings.input_path / "input_0"
self.input1_dir_path = self.settings.input_path / "input_1"

self.output_dir_path = pl.Path(os.environ["DY_SIDECAR_PATH_OUTPUTS"])
self.output0_dir_path = self.output_dir_path / "output_0"
self.output1_dir_path = self.output_dir_path / "output_1"
self.output0_dir_path = self.settings.output_path / "output_0"
self.output1_dir_path = self.settings.output_path / "output_1"

self.dakota_conf_path = self.input0_dir_path / "dakota.in"

Expand All @@ -95,15 +61,15 @@ def clean_output(self, dir_path):
item.unlink()

def start(self):
self.map_object = tools.maps.oSparcFileMap(
self.map_object = map.maps.oSparcFileMap(
self.map_reply_file_path.resolve(),
self.map_caller_file_path.resolve(),
)

self.caller_uuid = self.caller_handshaker.shake()

while not self.dakota_conf_path.exists():
time.sleep(POLLING_TIME)
time.sleep(self.settings.file_polling_interval)
dakota_conf = self.dakota_conf_path.read_text()

clear_directory(
Expand All @@ -117,7 +83,62 @@ def start(self):
dirs_exist_ok=True,
)

self.start_dakota(dakota_conf, self.output0_dir_path)
first_error_time = None

while True:
try:
process = multiprocessing.Process(
target=self.start_dakota,
args=(dakota_conf, self.output0_dir_path),
)
process.start()
process.join()
logging.info(f"PROCESS ended with exitcode {process.exitcode}")
if process.exitcode != 0:
raise RuntimeError("Dakota subprocess failed")
break
except RuntimeError as error:
if not self.settings.restart_on_error:
raise error
if first_error_time is None:
first_error_time = time.time()
if (
time.time() - first_error_time
>= self.settings.restart_on_error_max_time
):
logging.info(
"Received a RunTimeError from Dakota, "
"max retry time reached, raising error"
)
raise error
else:
logging.info(
f"Received a RunTimeError from Dakota ({error}), "
"retrying ..."
)
time.sleep(self.settings.restart_on_error_polling_interval)
max_wait_time = self.settings.restart_on_error_max_time - (
time.time() - first_error_time
)
logging.info(
f"Will wait for a maximum of {max_wait_time} "
"seconds for a change in dakota conf file..."
)
dakota_conf = self.wait_for_dakota_conf_change(
dakota_conf, max_wait_time
)
logging.info("Change in dakota conf file detected")
continue

def wait_for_dakota_conf_change(self, old_dakota_conf, max_wait_time):
new_dakota_conf = None
start_time = time.time()
while new_dakota_conf is None or new_dakota_conf == old_dakota_conf:
if time.time() - start_time > max_wait_time:
raise TimeoutError("Waiting too long for new dakota.conf")
new_dakota_conf = self.dakota_conf_path.read_text()
time.sleep(self.settings.file_polling_interval)
return new_dakota_conf

def model_callback(self, dak_inputs):
param_sets = [
Expand Down Expand Up @@ -184,7 +205,3 @@ def clear_directory(path):
os.unlink(item_path)
elif os.path.isdir(item_path):
shutil.rmtree(item_path)


if __name__ == "__main__":
main()
47 changes: 24 additions & 23 deletions docker_scripts/entrypoint.bash
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#!/bin/bash

set -euo pipefail

IFS=$'\n\t'
INFO="INFO: [$(basename "$0")] "

echo "$INFO" "Starting container for map ..."
echo "$INFO" "Starting container for dakotarunner ..."

HOST_USERID=$(stat -c %u "${DY_SIDECAR_PATH_INPUTS}")
HOST_GROUPID=$(stat -c %g "${DY_SIDECAR_PATH_INPUTS}")
Expand All @@ -13,28 +14,28 @@ CONTAINER_GROUPNAME=$(getent group | grep "${HOST_GROUPID}" | cut --delimiter=:
OSPARC_USER='osparcuser'

if [ "$HOST_USERID" -eq 0 ]; then
echo "Warning: Folder mounted owned by root user... adding $OSPARC_USER to root..."
addgroup "$OSPARC_USER" root
# echo "Warning: Folder mounted owned by root user... adding $OSPARC_USER to root..."
addgroup "$OSPARC_USER" root
else
echo "Folder mounted owned by user $HOST_USERID:$HOST_GROUPID-'$CONTAINER_GROUPNAME'..."
# take host's credentials in $OSPARC_USER
if [ -z "$CONTAINER_GROUPNAME" ]; then
echo "Creating new group my$OSPARC_USER"
CONTAINER_GROUPNAME=my$OSPARC_USER
addgroup --gid "$HOST_GROUPID" "$CONTAINER_GROUPNAME"
else
echo "group already exists"
fi

echo "adding $OSPARC_USER to group $CONTAINER_GROUPNAME..."
usermod --append --groups "$CONTAINER_GROUPNAME" "$OSPARC_USER"

echo "changing owner ship of state directory /home/${OSPARC_USER}/work/workspace"
chown --recursive "$OSPARC_USER" "/home/${OSPARC_USER}/work/workspace"
echo "changing owner ship of state directory ${DY_SIDECAR_PATH_INPUTS}"
chown --recursive "$OSPARC_USER" "${DY_SIDECAR_PATH_INPUTS}"
echo "changing owner ship of state directory ${DY_SIDECAR_PATH_OUTPUTS}"
chown --recursive "$OSPARC_USER" "${DY_SIDECAR_PATH_OUTPUTS}"
# echo "Folder mounted owned by user $HOST_USERID:$HOST_GROUPID-'$CONTAINER_GROUPNAME'..."
# take host's credentials in $OSPARC_USER
if [ -z "$CONTAINER_GROUPNAME" ]; then
# echo "Creating new group my$OSPARC_USER"
CONTAINER_GROUPNAME=my$OSPARC_USER
addgroup --gid "$HOST_GROUPID" "$CONTAINER_GROUPNAME"
else
echo "group already exists"
fi

# echo "adding $OSPARC_USER to group $CONTAINER_GROUPNAME..."
usermod --append --groups "$CONTAINER_GROUPNAME" "$OSPARC_USER"

# echo "changing owner ship of state directory /home/${OSPARC_USER}/work/workspace"
chown --recursive "$OSPARC_USER" "/home/${OSPARC_USER}/work/workspace"
# echo "changing owner ship of state directory ${DY_SIDECAR_PATH_INPUTS}"
chown --recursive "$OSPARC_USER" "${DY_SIDECAR_PATH_INPUTS}"
# echo "changing owner ship of state directory ${DY_SIDECAR_PATH_OUTPUTS}"
chown --recursive "$OSPARC_USER" "${DY_SIDECAR_PATH_OUTPUTS}"
fi

exec gosu "$OSPARC_USER" /docker/dakota.bash
exec gosu "$OSPARC_USER" /docker/main.bash
37 changes: 37 additions & 0 deletions docker_scripts/http/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import http.server
import logging
import pathlib as pl
import socketserver
import threading

logging.basicConfig(
level=logging.INFO, format="[%(filename)s:%(lineno)d] %(message)s"
)
logger = logging.getLogger(__name__)

HTTP_PORT = 8888


def main():
http_dir_path = pl.Path(__file__).parent

class HTTPHandler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(
*args, **kwargs, directory=http_dir_path.resolve()
)

try:
logger.info(
f"Starting http server at port {HTTP_PORT} and serving path {http_dir_path}"
)
with socketserver.TCPServer(("", HTTP_PORT), HTTPHandler) as httpd:
httpd_thread = threading.Thread(target=httpd.serve_forever)
httpd_thread.start()
httpd.serve_forever()
except Exception as err: # pylint: disable=broad-except
logger.error(f"{err} . Stopping %s", exc_info=True)


if __name__ == "__main__":
main()
9 changes: 9 additions & 0 deletions docker_scripts/main.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
set -e

cd /docker/http
python server.py &

cd ${HOME}
source ./venv/bin/activate
python3 /docker/main.py
Loading

0 comments on commit 2f9ac10

Please sign in to comment.