From 9d2f479dd74cda7e3a58d31269454dbf5af7b64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Caud?= <82875602+frcaud@users.noreply.github.com> Date: Mon, 2 May 2022 15:35:44 +0200 Subject: [PATCH 01/20] [DOC] adding note for ami_image_name generic name (#574) * adding note for ami_image_name generic name * run CI --- doc/workers.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/workers.rst b/doc/workers.rst index 77571296..c2a48bcc 100644 --- a/doc/workers.rst +++ b/doc/workers.rst @@ -551,7 +551,11 @@ Create an event config.yml (see :ref:`deploy-ramp-event`) and update the console, 'Instances' tab on the left, under 'availability zone'. * ``ami_image_name``: name you gave to the image you prepared (see :ref:`prepare_instance`). This can be found in the EC2 console, under - 'Images' -> 'AMI' tab. + 'Images' -> 'AMI' tab. Note: you don't have to put the entire image name + and if you indicate the generic name you chose, it will automatically take + the latest version of the image created running the pipeline (e.g. + 'challenge-iris' will point to 'challenge-iris 2022-04-19T17-19-18.405Z' + if it's the latest one) * ``ami_user_name``: user name you used to ssh into your instance. Commonly 'ec2-user' or 'ubuntu'. * ``instance_type``: found in the EC2 console, 'Instances' tab, 'Description' From ccf2cf579785f615ba4aac2f663d3c96900a1497 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Sun, 22 May 2022 11:15:44 +0200 Subject: [PATCH 02/20] FIX add hyperlink to log when submission failed (#576) * FIX add hyperlink to log when submission failed * debug * Update ramp-database/ramp_database/tools/tests/test_leaderboard.py * Update test_leaderboard.py * Update ramp-database/ramp_database/tools/tests/test_leaderboard.py * Update ramp-database/ramp_database/tools/tests/test_leaderboard.py --- ramp-database/ramp_database/tools/leaderboard.py | 4 ++-- ramp-database/ramp_database/tools/tests/test_leaderboard.py | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ramp-database/ramp_database/tools/leaderboard.py b/ramp-database/ramp_database/tools/leaderboard.py index c22f21ad..f5f50eda 100644 --- a/ramp-database/ramp_database/tools/leaderboard.py +++ b/ramp-database/ramp_database/tools/leaderboard.py @@ -429,7 +429,7 @@ def get_leaderboard( "submission", "submitted at (UTC)", "state", - "wating list", + "waiting list", ] else: columns = ["team", "submission", "submitted at (UTC)", "error"] @@ -447,7 +447,7 @@ def get_leaderboard( pd.Timestamp(sub.submission_timestamp), ( sub.state_with_link - if leaderboard_type == "error" + if leaderboard_type == "failed" else sub.state ), ( diff --git a/ramp-database/ramp_database/tools/tests/test_leaderboard.py b/ramp-database/ramp_database/tools/tests/test_leaderboard.py index cb2e1117..535fb75b 100644 --- a/ramp-database/ramp_database/tools/tests/test_leaderboard.py +++ b/ramp-database/ramp_database/tools/tests/test_leaderboard.py @@ -1,3 +1,4 @@ +import re import shutil import pytest @@ -170,6 +171,8 @@ def test_get_leaderboard(session_toy_db): session_toy_db, "failed", "iris_test", "test_user" ) assert leaderboard_failed.count("") == 1 + # check that we have a link to the log of the failed submission + assert re.match(r".*.*", leaderboard_failed, flags=re.DOTALL) # the remaining submission should be successful leaderboard_public = get_leaderboard(session_toy_db, "public", "iris_test") From 8c3501efd452e7749caa8ab08808ca97a2307aec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Caud?= <82875602+frcaud@users.noreply.github.com> Date: Mon, 20 Jun 2022 12:05:30 +0200 Subject: [PATCH 03/20] remove json files in pkgs/cache as a workaround to permission error (#579) --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7f1c3f1e..f5f0d3bc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -53,6 +53,7 @@ jobs: - name: Create envs run: | conda install --yes mamba -n base -c conda-forge + rm -f /usr/share/miniconda/pkgs/cache/*.json # workaround for mamba-org/mamba#488 mamba create --yes -n testenv python=$PYTHON_VERSION mamba env update -n testenv -f environment.yml mamba env create -f ci_tools/environment_iris_kit.yml From 53636976c43133c67cebb3621b428f52e4a7aa2d Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 13 Sep 2022 03:05:29 +0200 Subject: [PATCH 04/20] Working on the conda cpp runner --- ramp-database/ramp_database/tools/event.py | 10 +- ramp-engine/ramp_engine/__init__.py | 2 + ramp-engine/ramp_engine/cpp_runner.py | 193 +++++++++++++++++++++ 3 files changed, 200 insertions(+), 5 deletions(-) create mode 100644 ramp-engine/ramp_engine/cpp_runner.py diff --git a/ramp-database/ramp_database/tools/event.py b/ramp-database/ramp_database/tools/event.py index a2174cd1..90e21064 100644 --- a/ramp-database/ramp_database/tools/event.py +++ b/ramp-database/ramp_database/tools/event.py @@ -286,11 +286,11 @@ def add_event( session.add(event) session.commit() - X_train, y_train = event.problem.get_train_data() - cv = event.problem.module.get_cv(X_train, y_train) - for train_indices, test_indices in cv: - cv_fold = CVFold(event=event, train_is=train_indices, test_is=test_indices) - session.add(cv_fold) + # X_train, y_train = event.problem.get_train_data() + # cv = event.problem.module.get_cv(X_train, y_train) + # for train_indices, test_indices in cv: + # cv_fold = CVFold(event=event, train_is=train_indices, test_is=test_indices) + # session.add(cv_fold) score_types = event.problem.module.score_types for score_type in score_types: diff --git a/ramp-engine/ramp_engine/__init__.py b/ramp-engine/ramp_engine/__init__.py index c9895e78..fed821cc 100644 --- a/ramp-engine/ramp_engine/__init__.py +++ b/ramp-engine/ramp_engine/__init__.py @@ -1,6 +1,7 @@ from .aws import AWSWorker from .dispatcher import Dispatcher # noqa from .local import CondaEnvWorker +from .cpp_runner import CppCondaEnvWorker from .remote import DaskWorker from ._version import __version__ @@ -9,6 +10,7 @@ "conda": CondaEnvWorker, "aws": AWSWorker, "dask": DaskWorker, + "conda-cpp": CppCondaEnvWorker, } __all__ = [ diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py new file mode 100644 index 00000000..8dc236ad --- /dev/null +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -0,0 +1,193 @@ +import logging +import os +import shutil +from datetime import datetime +import subprocess + +from .base import _get_traceback +from .conda import _conda_info_envs, _get_conda_env_path +from .local import CondaEnvWorker + +logger = logging.getLogger("RAMP-WORKER") + + +class CppCondaEnvWorker(CondaEnvWorker): + """Local worker which uses conda environment to dispatch submission. + + Parameters + ---------- + config : dict + Configuration dictionary to set the worker. The following parameter + should be set: + + * 'conda_env': the name of the conda environment to use. If not + specified, the base environment will be used. + * 'kit_dir': path to the directory of the RAMP kit; + * 'data_dir': path to the directory of the data; + * 'submissions_dir': path to the directory containing the + submissions; + * `logs_dir`: path to the directory where the log of the + submission will be stored; + * `predictions_dir`: path to the directory where the + predictions of the submission will be stored. + * 'timeout': timeout after a given number of seconds when + running the worker. If not provided, a default of 7200 + is used. + submission : str + Name of the RAMP submission to be handle by the worker. + + Attributes + ---------- + status : str + The status of the worker. It should be one of the following state: + + * 'initialized': the worker has been instanciated. + * 'setup': the worker has been set up. + * 'error': setup failed / training couldn't be started + * 'running': the worker is training the submission. + * 'finished': the worker finished to train the submission. + * 'collected': the results of the training have been collected. + """ + + def __init__(self, config, submission): + super().__init__(config=config, submission=submission) + + def setup(self): + """Set up the worker. + + The worker will find the path to the conda environment to use using + the configuration passed when instantiating the worker. + """ + # sanity check for the configuration variable + for required_param in ( + "kit_dir", + "data_dir", + "submissions_dir", + "logs_dir", + "predictions_dir", + ): + self._check_config_name(self.config, required_param) + # find the path to the conda environment + env_name = self.config.get("conda_env", "base") + conda_info = _conda_info_envs() + + self._python_bin_path = _get_conda_env_path(conda_info, env_name, self) + + super().setup() + + def teardown(self): + """Remove the predictions stores within the submission.""" + if self.status not in ("collected", "retry"): + raise ValueError("Collect the results before to kill the worker.") + output_training_dir = os.path.join( + self.config["kit_dir"], + "submissions", + self.submission, + "training_output", + ) + if os.path.exists(output_training_dir): + shutil.rmtree(output_training_dir) + super().teardown() + + def launch_submission(self): + """Launch the submission. + + Basically, it comes to run ``ramp_test_submission`` using the conda + environment given in the configuration. The submission is launched in + a subprocess to free to not lock the Python main process. + """ + if self.status == "running": + raise ValueError( + "Wait that the submission is processed before to " "launch a new one." + ) + self._log_dir = os.path.join(self.config["logs_dir"], self.submission) + os.makedirs(self._log_dir, exist_ok=True) + self._log_file = open(os.path.join(self._log_dir, "log"), "wb+") + submission_dir = os.path.join( + self.config["submissions_dir"], + self.submission, + ) + output_dir = os.path.join(submission_dir, "training_output") + os.makedirs(output_dir, exist_ok=True) + bin_path = os.path.join(submission_dir, "main") + INCLUDE_DIR = os.path.join(self.config["data_dir"], "include", "cpp") + DATA_DIR = os.path.join(self.config["data_dir"], "data", "secret") + + subprocess.check_call( + [ + "gcc", + os.path.join(submission_dir, "main.cpp"), + f"-I{INCLUDE_DIR}", + "-lstdc++", + "-O3", + "-o", + bin_path, + ], + ) + + self._proc = subprocess.Popen( + [ + bin_path, + ], + stdout=open(os.path.join(output_dir, "case0.ans"), "wb+"), + stderr=self._log_file, + stdin=open(os.path.join(DATA_DIR, "case0.in"), "rb"), + ) + + self._start_date = datetime.utcnow() + self.status = "running" + + def collect_results(self): + """Collect the results after that the submission is completed. + + Be aware that calling ``collect_results()`` before that the submission + finished will lock the Python main process awaiting for the submission + to be processed. Use ``worker.status`` to know the status of the worker + beforehand. + """ + if self.status == "initialized": + raise ValueError( + "The worker has not been setup and no submission " + "was launched. Call the method setup() and " + "launch_submission() before to collect the " + "results." + ) + elif self.status == "setup": + raise ValueError( + "No submission was launched. Call the method " + "launch_submission() and then try again to " + "collect the results." + ) + if self.status in ["finished", "running", "timeout"]: + # communicate() will wait for the process to be completed + self._proc.communicate() + self._log_file.close() + with open(os.path.join(self._log_dir, "log"), "rb") as f: + log_output = f.read() + error_msg = _get_traceback(log_output.decode("utf-8")) + if self.status == "timeout": + error_msg += "\nWorker killed due to timeout after {}s.".format( + self.timeout + ) + if self.status == "timeout": + returncode = 124 + else: + returncode = self._proc.returncode + pred_dir = os.path.join(self.config["predictions_dir"], self.submission) + output_training_dir = os.path.join( + self.config["submissions_dir"], + self.submission, + "training_output", + ) + if os.path.exists(pred_dir): + shutil.rmtree(pred_dir) + if returncode: + if os.path.exists(output_training_dir): + shutil.rmtree(output_training_dir) + self.status = "collected" + return (returncode, error_msg) + # copy the predictions into the disk + # no need to create the directory, it will be handle by copytree + shutil.copytree(output_training_dir, pred_dir) + self.status = "collected" + return (returncode, error_msg) From 77a7aa5751a344834980dd9b02f6b3000589ee4a Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 13 Sep 2022 04:04:30 +0200 Subject: [PATCH 05/20] More hacks to make the cpp runner work --- .../ramp_database/tools/leaderboard.py | 213 +----------------- .../ramp_database/tools/submission.py | 19 +- ramp-engine/ramp_engine/cpp_runner.py | 20 ++ 3 files changed, 33 insertions(+), 219 deletions(-) diff --git a/ramp-database/ramp_database/tools/leaderboard.py b/ramp-database/ramp_database/tools/leaderboard.py index 39f37bc6..d26beabd 100644 --- a/ramp-database/ramp_database/tools/leaderboard.py +++ b/ramp-database/ramp_database/tools/leaderboard.py @@ -46,78 +46,20 @@ def _compute_leaderboard( """ record_score = [] event = session.query(Event).filter_by(name=event_name).one() - map_score_precision = { - score_type.name: score_type.precision for score_type in event.score_types - } for sub in submissions: # take only max n bag - df_scores_bag = get_bagged_scores(session, sub.id) - highest_level = df_scores_bag.index.get_level_values("n_bag").max() - df_scores_bag = df_scores_bag.loc[(slice(None), highest_level), :] - df_scores_bag.index = df_scores_bag.index.droplevel("n_bag") - df_scores_bag = df_scores_bag.round(map_score_precision) - - df_scores = get_scores(session, sub.id) - df_scores = df_scores.round(map_score_precision) - - df_time = get_time(session, sub.id) - df_time = df_time.stack().to_frame() - df_time.index = df_time.index.set_names(["fold", "step"]) - df_time = df_time.rename(columns={0: "time"}) - df_time = df_time.sum(axis=0, level="step").T - - df_scores_mean = df_scores.groupby("step").mean() - df_scores_std = df_scores.groupby("step").std() - - # select only the validation and testing steps and rename them to - # public and private - map_renaming = {"valid": "public", "test": "private"} - df_scores_mean = ( - df_scores_mean.loc[list(map_renaming.keys())] - .rename(index=map_renaming) - .stack() + df = ( + get_bagged_scores(session, sub.id) + .reset_index(drop=True) + .max(axis=0) .to_frame() .T ) - df_scores_std = ( - df_scores_std.loc[list(map_renaming.keys())] - .rename(index=map_renaming) - .stack() - .to_frame() - .T - ) - df_scores_bag = df_scores_bag.rename(index=map_renaming).stack().to_frame().T - - df = pd.concat( - [df_scores_bag, df_scores_mean, df_scores_std], - axis=1, - keys=["bag", "mean", "std"], - ) - - df.columns = df.columns.set_names(["stat", "set", "score"]) - - # change the multi-index into a stacked index - df.columns = df.columns.map(lambda x: " ".join(x)) - - # add the aggregated time information - df_time.index = df.index - df_time = df_time.rename( - columns={ - "train": "train time [s]", - "valid": "validation time [s]", - "test": "test time [s]", - } - ) - df = pd.concat([df, df_time], axis=1) if leaderboard_type == "private": df["submission ID"] = sub.basename.replace("submission_", "") df["team"] = sub.team.name df["submission"] = sub.name_with_link if with_links else sub.name - df["contributivity"] = int(round(100 * sub.contributivity)) - df["historical contributivity"] = int( - round(100 * sub.historical_contributivity) - ) df["max RAM [MB]"] = get_submission_max_ram(session, sub.id) df["submitted at (UTC)"] = pd.Timestamp(sub.submission_timestamp) record_score.append(df) @@ -128,52 +70,7 @@ def _compute_leaderboard( # keep only second precision for the time stamp df["submitted at (UTC)"] = df["submitted at (UTC)"].astype("datetime64[s]") - # reordered the column - stats_order = ["bag", "mean", "std"] if leaderboard_type == "private" else ["bag"] - dataset_order = ( - ["public", "private"] if leaderboard_type == "private" else ["public"] - ) - score_order = [event.official_score_name] + [ - score_type.name - for score_type in event.score_types - if score_type.name != event.official_score_name - ] - score_list = [ - "{} {} {}".format(stat, dataset, score) - for dataset, score, stat in product(dataset_order, score_order, stats_order) - ] - # Only display train and validation time for the public leaderboard - time_list = ( - ["train time [s]", "validation time [s]", "test time [s]"] - if leaderboard_type == "private" - else ["train time [s]", "validation time [s]"] - ) - col_ordered = ( - ["team", "submission"] - + score_list - + ["contributivity", "historical contributivity"] - + time_list - + ["max RAM [MB]", "submitted at (UTC)"] - ) - if leaderboard_type == "private": - col_ordered = ["submission ID"] + col_ordered - df = df[col_ordered] - - # check if the contributivity columns are null - contrib_columns = ["contributivity", "historical contributivity"] - if (df[contrib_columns] == 0).all(axis=0).all(): - df = df.drop(columns=contrib_columns) - - df = df.sort_values( - "bag {} {}".format(leaderboard_type, event.official_score_name), - ascending=event.get_official_score_type(session).is_lower_the_better, - ) - - # rename the column name for the public leaderboard - if leaderboard_type == "public": - df = df.rename( - columns={key: value for key, value in zip(score_list, score_order)} - ) + df = df.sort_values(by="Total cost") return df @@ -206,105 +103,9 @@ def _compute_competition_leaderboard( session, submissions, "private", event_name, with_links=False ) - time_list = ( - ["train time [s]", "validation time [s]", "test time [s]"] - if leaderboard_type == "private" - else ["train time [s]", "validation time [s]"] - ) - - col_selected_private = ( - ["team", "submission"] - + ["bag private " + score_name, "bag public " + score_name] - + time_list - + ["submitted at (UTC)"] - ) - leaderboard_df = private_leaderboard[col_selected_private] - leaderboard_df = leaderboard_df.rename( - columns={ - "bag private " + score_name: "private " + score_name, - "bag public " + score_name: "public " + score_name, - } - ) - # select best submission for each team - best_df = ( - leaderboard_df.groupby("team").min() - if score_type.is_lower_the_better - else leaderboard_df.groupby("team").max() - ) - best_df = best_df[["public " + score_name]].reset_index() - best_df["best"] = True - - # merge to get a best indicator column then select best - leaderboard_df = pd.merge( - leaderboard_df, - best_df, - how="left", - left_on=["team", "public " + score_name], - right_on=["team", "public " + score_name], - ) - leaderboard_df = leaderboard_df.fillna(False) - leaderboard_df = leaderboard_df[leaderboard_df["best"]] - leaderboard_df = leaderboard_df.drop(columns="best") - - # dealing with ties: we need the lowest timestamp - best_df = leaderboard_df.groupby("team").min() - best_df = best_df[["submitted at (UTC)"]].reset_index() - best_df["best"] = True - leaderboard_df = pd.merge( - leaderboard_df, - best_df, - how="left", - left_on=["team", "submitted at (UTC)"], - right_on=["team", "submitted at (UTC)"], - ) - leaderboard_df = leaderboard_df.fillna(False) - leaderboard_df = leaderboard_df[leaderboard_df["best"]] - leaderboard_df = leaderboard_df.drop(columns="best") - - # sort by public score then by submission timestamp, compute rank - leaderboard_df = leaderboard_df.sort_values( - by=["public " + score_name, "submitted at (UTC)"], - ascending=[score_type.is_lower_the_better, True], - ) - leaderboard_df["public rank"] = np.arange(len(leaderboard_df)) + 1 - - # sort by private score then by submission timestamp, compute rank - leaderboard_df = leaderboard_df.sort_values( - by=["private " + score_name, "submitted at (UTC)"], - ascending=[score_type.is_lower_the_better, True], - ) - leaderboard_df["private rank"] = np.arange(len(leaderboard_df)) + 1 - - leaderboard_df["move"] = ( - leaderboard_df["public rank"] - leaderboard_df["private rank"] - ) - leaderboard_df["move"] = [ - "{:+d}".format(m) if m != 0 else "-" for m in leaderboard_df["move"] - ] - - col_selected = ( - [ - leaderboard_type + " rank", - "team", - "submission", - leaderboard_type + " " + score_name, - ] - + time_list - + ["submitted at (UTC)"] - ) - if leaderboard_type == "private": - col_selected.insert(1, "move") - - df = leaderboard_df[col_selected] - df = df.rename( - columns={ - leaderboard_type + " " + score_name: score_name, - leaderboard_type + " rank": "rank", - } - ) - df = df.sort_values(by="rank") - return df + best_df = private_leaderboard.groupby("team").min().reset_index() + return best_df def get_leaderboard_all_info(session, event_name): diff --git a/ramp-database/ramp_database/tools/submission.py b/ramp-database/ramp_database/tools/submission.py index eec4a568..ed2dc7c1 100644 --- a/ramp-database/ramp_database/tools/submission.py +++ b/ramp-database/ramp_database/tools/submission.py @@ -446,6 +446,7 @@ def get_time(session, submission_id): results["fold"].append(fold_id) for step in ("train", "valid", "test"): results[step].append(getattr(cv_fold, "{}_time".format(step))) + breakpoint() return pd.DataFrame(results).set_index("fold") @@ -717,21 +718,13 @@ def set_bagged_scores(session, submission_id, path_predictions): The path where the results files are located. """ submission = select_submission_by_id(session, submission_id) - df = pd.read_csv( - os.path.join(path_predictions, "bagged_scores.csv"), index_col=[0, 1] - ) - df_steps = df.index.get_level_values("step").unique().tolist() + with open(os.path.join(path_predictions, "score.txt")) as fh: + cost_value = float(fh.read().strip()) + for score in submission.scores: for step in ("valid", "test"): - highest_n_bag = df.index.get_level_values("n_bag").max() - if step in df_steps: - score_last_bag = df.loc[(step, highest_n_bag), score.score_name] - score_all_bags = df.loc[(step, slice(None)), score.score_name].tolist() - else: - score_last_bag = float(score.event_score_type.worst) - score_all_bags = None - setattr(score, "{}_score_cv_bag".format(step), score_last_bag) - setattr(score, "{}_score_cv_bags".format(step), score_all_bags) + setattr(score, "{}_score_cv_bag".format(step), cost_value) + setattr(score, "{}_score_cv_bags".format(step), [cost_value]) session.commit() diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index 8dc236ad..6662cb90 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -1,4 +1,5 @@ import logging +import sys import os import shutil from datetime import datetime @@ -186,6 +187,25 @@ def collect_results(self): shutil.rmtree(output_training_dir) self.status = "collected" return (returncode, error_msg) + + # scoring with the judger for now using a custom scoring function + sys.path.append( + os.path.join(self.config["data_dir"], "output_validators", "judger") + ) + from data import OutputData + + output_data = OutputData.from_file( + os.path.join(output_training_dir, "case0.ans") + ) + # Just some fake score for now + score = ( + output_data.deviceNum + + sum(output_data.regionIndexs) + + output_data.stepNum + ) + with open(os.path.join(output_training_dir, "score.txt"), "w") as fh: + fh.write(str(score)) + # copy the predictions into the disk # no need to create the directory, it will be handle by copytree shutil.copytree(output_training_dir, pred_dir) From b3e95f86dba0d00bf621e9d96aea0b585cf0dc2b Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Wed, 14 Sep 2022 16:53:36 +0000 Subject: [PATCH 06/20] Bump python version used in CI --- .github/workflows/main.yml | 25 +++++-------------- .../ramp_engine/tests/test_conda_worker.py | 1 + 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fcedb2e0..b9a34414 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,18 +13,8 @@ on: jobs: main: - name: test-py-${{ matrix.python }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-18.04] - python: [3.7, 3.8, 3.9] - include: - - os: ubuntu-18.04 - python: 3.8 - # the following has no effect with manual trigger - # where the ramp-workflow is specified anyway - ramp_workflow_version: master + name: test-py-3.10 + runs-on: ubuntu-latest services: @@ -47,7 +37,7 @@ jobs: with: update-conda: true activate-conda: false - python-version: ${{ matrix.python }} + python-version: 3.10 conda-channels: anaconda - name: Create envs @@ -56,12 +46,12 @@ jobs: conda env update -n testenv -f environment.yml conda env create -f ci_tools/environment_iris_kit.yml env: - PYTHON_VERSION: ${{ matrix.python }} + PYTHON_VERSION: 3.10 - name: Install ramp-board run: | source activate testenv - if [ "$PYTHON_VERSION" == "3.8" ]; then + if [ "$PYTHON_VERSION" == "3.10" ]; then python -m pip install "dask==2021.4.1" "distributed==2021.4.1" fi if [ "${{ matrix.ramp_workflow_version }}" == "master" ]; then @@ -121,13 +111,10 @@ jobs: - uses: actions/setup-python@v2 name: Install Python with: - python-version: '3.7' + python-version: '3.10' - name: Install dependencies run: pip install flake8 black==21.8b0 - - name: Run flake8 - run: flake8 ramp-* - - name: Run black run: black --check . diff --git a/ramp-engine/ramp_engine/tests/test_conda_worker.py b/ramp-engine/ramp_engine/tests/test_conda_worker.py index 6d55787a..f5471cbf 100644 --- a/ramp-engine/ramp_engine/tests/test_conda_worker.py +++ b/ramp-engine/ramp_engine/tests/test_conda_worker.py @@ -8,6 +8,7 @@ from ramp_engine.local import CondaEnvWorker from ramp_engine.remote import DaskWorker from ramp_engine.conda import _conda_info_envs +from ramp ALL_WORKERS = [CondaEnvWorker, DaskWorker] From 376a2eb7e427a111d1cec7d1980f77868fea919a Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Wed, 14 Sep 2022 22:32:44 +0000 Subject: [PATCH 07/20] A more through implementation of the cpp_runner --- ramp-engine/ramp_engine/cpp_runner.py | 135 ++++++++++++++++++-------- 1 file changed, 94 insertions(+), 41 deletions(-) diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index 6662cb90..0addb281 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -3,6 +3,7 @@ import os import shutil from datetime import datetime +import time import subprocess from .base import _get_traceback @@ -12,6 +13,10 @@ logger = logging.getLogger("RAMP-WORKER") +COMPILATION_ERROR = 1220 +RUNTIME_ERROR = 1221 +SCORING_ERROR = 1222 + class CppCondaEnvWorker(CondaEnvWorker): """Local worker which uses conda environment to dispatch submission. @@ -101,6 +106,7 @@ def launch_submission(self): raise ValueError( "Wait that the submission is processed before to " "launch a new one." ) + self._log_dir = os.path.join(self.config["logs_dir"], self.submission) os.makedirs(self._log_dir, exist_ok=True) self._log_file = open(os.path.join(self._log_dir, "log"), "wb+") @@ -114,29 +120,86 @@ def launch_submission(self): INCLUDE_DIR = os.path.join(self.config["data_dir"], "include", "cpp") DATA_DIR = os.path.join(self.config["data_dir"], "data", "secret") - subprocess.check_call( - [ - "gcc", - os.path.join(submission_dir, "main.cpp"), - f"-I{INCLUDE_DIR}", - "-lstdc++", - "-O3", - "-o", - bin_path, - ], - ) + self.status = "finished" + try: + subprocess.check_call( + [ + "gcc", + os.path.join(submission_dir, "main.cpp"), + f"-I{INCLUDE_DIR}", + "-lstdc++", + "-O3", + "-o", + bin_path, + ], + stderr=self._log_file, + stdout=self._log_file, + ) + except subprocess.CalledProcessError as err: - self._proc = subprocess.Popen( - [ - bin_path, - ], - stdout=open(os.path.join(output_dir, "case0.ans"), "wb+"), - stderr=self._log_file, - stdin=open(os.path.join(DATA_DIR, "case0.in"), "rb"), - ) + self._return_code = COMPILATION_ERROR + return + + # Compilation passed, clean up the log + shutil.copy(os.path.join(self._log_dir, "log"), os.path.join(self._log_dir, "compilation-log")) + self._log_file.truncate(0) + + # Run compiled code in batches + batch_size = 4 + for n_batch in range(3): + t0 = time.perf_counter() + procs = [] + for sub_idx in range(batch_size): + idx = batch_size*n_batch + sub_idx + # We have 9 test cases in total + if idx > 9: + continue + procs.append(subprocess.Popen( + [bin_path], + stdout=open(os.path.join(output_dir, f"case{idx}.ans"), "wb+"), + stderr=self._log_file, + stdin=open(os.path.join(DATA_DIR, f"case{idx}.in"), "rb"), + )) + for p in procs: + # Time remaining for this batch (evaluated in parallel) + dt = max(t0 + self.timeout - time.perf_counter(), 0) + if dt == 0: + self.status = "timeout" + self._return_code = 124 + return + try: + p.communicate(timeout=dt) + self._return_code = max(p.returncode, 0) + except subprocess.TimeoutExpired: + self.status = "timeout" + self._return_code = 124 + return + + if self._return_code > 0: + return + + # Running the model passed, clean up the log + shutil.copy(os.path.join(self._log_dir, "log"), os.path.join(self._log_dir, "run-log")) + self._log_file.truncate(0) - self._start_date = datetime.utcnow() - self.status = "running" + # Score the solution + judger_path = os.path.join(self.config["data_dir"], "output_validators", "judger", "__init__.py") + try: + subprocess.check_call( + [ + os.path.join(self._python_bin_path, 'python'), + judger_path, + DATA_DIR, + output_dir, + output_dir, + ], + stderr=self._log_file, + stdout=self._log_file, + ) + except subprocess.CalledProcessError as err: + self._return_code = SCORING_ERROR + return + def collect_results(self): """Collect the results after that the submission is completed. @@ -160,9 +223,6 @@ def collect_results(self): "collect the results." ) if self.status in ["finished", "running", "timeout"]: - # communicate() will wait for the process to be completed - self._proc.communicate() - self._log_file.close() with open(os.path.join(self._log_dir, "log"), "rb") as f: log_output = f.read() error_msg = _get_traceback(log_output.decode("utf-8")) @@ -173,7 +233,7 @@ def collect_results(self): if self.status == "timeout": returncode = 124 else: - returncode = self._proc.returncode + returncode = self._return_code pred_dir = os.path.join(self.config["predictions_dir"], self.submission) output_training_dir = os.path.join( self.config["submissions_dir"], @@ -188,26 +248,19 @@ def collect_results(self): self.status = "collected" return (returncode, error_msg) - # scoring with the judger for now using a custom scoring function - sys.path.append( - os.path.join(self.config["data_dir"], "output_validators", "judger") - ) - from data import OutputData - - output_data = OutputData.from_file( - os.path.join(output_training_dir, "case0.ans") - ) # Just some fake score for now - score = ( - output_data.deviceNum - + sum(output_data.regionIndexs) - + output_data.stepNum - ) - with open(os.path.join(output_training_dir, "score.txt"), "w") as fh: - fh.write(str(score)) # copy the predictions into the disk # no need to create the directory, it will be handle by copytree shutil.copytree(output_training_dir, pred_dir) self.status = "collected" return (returncode, error_msg) + + def check_timeout(self): + """We use a different timeout mechanism""" + return None + + def _is_submission_finished(): + """The parallelism happens at the level of test cases""" + return True + From cffa585891d6e8fc378f2ab75d1384de63e30826 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 20 Sep 2022 00:21:01 +0000 Subject: [PATCH 08/20] More updates to the cpp runner --- ramp-database/ramp_database/tools/leaderboard.py | 3 ++- ramp-engine/ramp_engine/base.py | 5 +++++ ramp-engine/ramp_engine/cpp_runner.py | 10 ++++++++++ ramp-frontend/ramp_frontend/templates/sandbox.html | 3 ++- ramp-frontend/ramp_frontend/wsgi.py | 14 ++++++++++++++ 5 files changed, 33 insertions(+), 2 deletions(-) diff --git a/ramp-database/ramp_database/tools/leaderboard.py b/ramp-database/ramp_database/tools/leaderboard.py index bd033dae..53c2c54b 100644 --- a/ramp-database/ramp_database/tools/leaderboard.py +++ b/ramp-database/ramp_database/tools/leaderboard.py @@ -60,7 +60,6 @@ def _compute_leaderboard( df["submission ID"] = sub.basename.replace("submission_", "") df["team"] = sub.team.name df["submission"] = sub.name_with_link if with_links else sub.name - df["max RAM [MB]"] = get_submission_max_ram(session, sub.id) df["submitted at (UTC)"] = pd.Timestamp(sub.submission_timestamp) record_score.append(df) @@ -69,6 +68,7 @@ def _compute_leaderboard( # keep only second precision for the time stamp df["submitted at (UTC)"] = df["submitted at (UTC)"].astype("datetime64[s]") + df.columns.name = None df = df.sort_values(by="Total cost") return df @@ -105,6 +105,7 @@ def _compute_competition_leaderboard( # select best submission for each team best_df = private_leaderboard.groupby("team").min().reset_index() + best_df.insert(0, 'rank', np.arange(1, best_df.shape[0]+1, dtype=np.int)) return best_df diff --git a/ramp-engine/ramp_engine/base.py b/ramp-engine/ramp_engine/base.py index 63cc66b5..b5f3d6a7 100644 --- a/ramp-engine/ramp_engine/base.py +++ b/ramp-engine/ramp_engine/base.py @@ -2,6 +2,7 @@ from abc import ABCMeta, abstractmethod from datetime import datetime import subprocess +import re logger = logging.getLogger("RAMP-WORKER") @@ -191,4 +192,8 @@ def _get_traceback(content): cut_exception_text = content.find("Traceback") if cut_exception_text > 0: content = content[cut_exception_text:] + else: + content = content[-10000:] + # strip paths + content = re.sub("/[^\s]+/", '', content) return content diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index 0addb281..fccfd1f2 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -17,6 +17,16 @@ RUNTIME_ERROR = 1221 SCORING_ERROR = 1222 + +def get_conda_cmd(options: list[str], memory="10m") -> list[str]: + cmd = ['docker', 'run', '-it', '--rm', '-v', + "/home/ubuntu/miniforge3/:/home/ubuntu/miniforge3/:ro", "-v", + "/etc/passwd:/etc/passwd:ro", "-v", "/etc/group:/etc/group:ro"] + options + [ + + "-m", memory, "ubuntu:kinetic-20220830"] + + + class CppCondaEnvWorker(CondaEnvWorker): """Local worker which uses conda environment to dispatch submission. diff --git a/ramp-frontend/ramp_frontend/templates/sandbox.html b/ramp-frontend/ramp_frontend/templates/sandbox.html index 995582ff..c81f854d 100644 --- a/ramp-frontend/ramp_frontend/templates/sandbox.html +++ b/ramp-frontend/ramp_frontend/templates/sandbox.html @@ -206,7 +206,8 @@
- Note that once you make a submission, all participants of your team will be locked to the current team. + Note that once you make a submission, all participants of your team will be locked to the current team.
+ By default, the C++ solution is run. To run the Python solution, `submission.cpp` should be an empty file.
diff --git a/ramp-frontend/ramp_frontend/wsgi.py b/ramp-frontend/ramp_frontend/wsgi.py index 5cd0f6b0..2c84d5fb 100644 --- a/ramp-frontend/ramp_frontend/wsgi.py +++ b/ramp-frontend/ramp_frontend/wsgi.py @@ -1,5 +1,6 @@ from ramp_utils import generate_flask_config from ramp_utils import read_config +import os from ramp_frontend import create_app @@ -18,7 +19,20 @@ def make_app(config_file): app : Flask The Flask app created. """ + try: + import sentry_sdk + if "SENTRY_DSN" in os.environ: + sentry_sdk.init( + dsn=os.environ['SENTRY_DSN'], + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for performance monitoring. + # We recommend adjusting this value in production. + traces_sample_rate=0.5 + ) + except ImportError: + pass config = read_config(config_file) flask_config = generate_flask_config(config) app = create_app(flask_config) + return app From dd5a192eb7a2877e1b2321fdf1d04655910cfafa Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 20 Sep 2022 02:07:07 +0000 Subject: [PATCH 09/20] Python + docker --- ramp-engine/ramp_engine/cpp_runner.py | 171 +++++++++++++++++++------- 1 file changed, 124 insertions(+), 47 deletions(-) diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index fccfd1f2..cb47d4c7 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -5,6 +5,7 @@ from datetime import datetime import time import subprocess +from pathlib import Path from .base import _get_traceback from .conda import _conda_info_envs, _get_conda_env_path @@ -18,13 +19,28 @@ SCORING_ERROR = 1222 -def get_conda_cmd(options: list[str], memory="10m") -> list[str]: - cmd = ['docker', 'run', '-it', '--rm', '-v', - "/home/ubuntu/miniforge3/:/home/ubuntu/miniforge3/:ro", "-v", - "/etc/passwd:/etc/passwd:ro", "-v", "/etc/group:/etc/group:ro"] + options + [ - - "-m", memory, "ubuntu:kinetic-20220830"] - +def get_conda_cmd(cmd: list[str], options: list[str] = None, memory="10m") -> list[str]: + + if options is None: + options = [] + cmd_full = ( + [ + "docker", + "run", + "-i", + "--rm", + "-v", + "/home/ubuntu/miniforge3/:/home/ubuntu/miniforge3/:ro", + "-v", + "/etc/passwd:/etc/passwd:ro", + "-v", + "/etc/group:/etc/group:ro", + ] + + options + + ["-m", memory, "ubuntu:kinetic-20220830"] + + cmd + ) + return cmd_full class CppCondaEnvWorker(CondaEnvWorker): @@ -105,6 +121,18 @@ def teardown(self): shutil.rmtree(output_training_dir) super().teardown() + def is_cpp_submission(self) -> bool: + """Return True if submission is C++, False if it's a Python one""" + + submission_dir = Path(self.config["submissions_dir"]) / self.submission + + if (submission_dir / "solution.cpp").exists() and (len( + (submission_dir / "solution.cpp").read_text().strip()) > 10 + ): + return True + else: + return False + def launch_submission(self): """Launch the submission. @@ -126,50 +154,97 @@ def launch_submission(self): ) output_dir = os.path.join(submission_dir, "training_output") os.makedirs(output_dir, exist_ok=True) - bin_path = os.path.join(submission_dir, "main") - INCLUDE_DIR = os.path.join(self.config["data_dir"], "include", "cpp") + INCLUDE_DIR = Path( + self.config["data_dir"], "..", "..", "smartfactoryinstruments-starting-kit" + ) DATA_DIR = os.path.join(self.config["data_dir"], "data", "secret") self.status = "finished" - try: - subprocess.check_call( - [ - "gcc", - os.path.join(submission_dir, "main.cpp"), - f"-I{INCLUDE_DIR}", - "-lstdc++", - "-O3", - "-o", - bin_path, - ], - stderr=self._log_file, - stdout=self._log_file, - ) - except subprocess.CalledProcessError as err: - self._return_code = COMPILATION_ERROR - return + is_cpp = self.is_cpp_submission() + if is_cpp: + bin_path = os.path.join(submission_dir, "main") + Path(submission_dir, "solution.py").unlink(missing_ok=True) + + try: + subprocess.check_call( + [ + "gcc", + os.path.join(submission_dir, "solution.cpp"), + f"-I{INCLUDE_DIR / 'CPP'}", + "-lstdc++", + "-O3", + "-o", + bin_path, + ], + stderr=self._log_file, + stdout=self._log_file, + ) + except subprocess.CalledProcessError as err: - # Compilation passed, clean up the log - shutil.copy(os.path.join(self._log_dir, "log"), os.path.join(self._log_dir, "compilation-log")) - self._log_file.truncate(0) + self._return_code = COMPILATION_ERROR + return - # Run compiled code in batches + # Compilation passed, clean up the log + shutil.copy( + os.path.join(self._log_dir, "log"), + os.path.join(self._log_dir, "compilation-log"), + ) + self._log_file.truncate(0) + else: + Path(submission_dir, "solution.cpp").unlink(missing_ok=True) + bin_path = os.path.join(submission_dir, "solution.py") + shutil.copy(INCLUDE_DIR / "python/data.py", submission_dir) + + # Run solution in batches batch_size = 4 for n_batch in range(3): t0 = time.perf_counter() procs = [] for sub_idx in range(batch_size): - idx = batch_size*n_batch + sub_idx + idx = batch_size * n_batch + sub_idx # We have 9 test cases in total if idx > 9: continue - procs.append(subprocess.Popen( - [bin_path], - stdout=open(os.path.join(output_dir, f"case{idx}.ans"), "wb+"), - stderr=self._log_file, - stdin=open(os.path.join(DATA_DIR, f"case{idx}.in"), "rb"), - )) + if is_cpp: + p = subprocess.Popen( + get_conda_cmd( + [str(bin_path)], + options=["-v", f"{submission_dir}:{submission_dir}:ro"], + ), + stdout=open(os.path.join(output_dir, f"case{idx}.ans"), "wb+"), + stderr=self._log_file, + stdin=open(os.path.join(DATA_DIR, f"case{idx}.in"), "rb"), + ) + else: + python_runner = ( + Path(self.config["data_dir"]) + / "../scripts/ramp_python_runner.py" + ).resolve() + p = subprocess.Popen( + get_conda_cmd( + [ + os.path.join(self._python_bin_path, "python"), + str(python_runner), + str(bin_path), + os.path.join(DATA_DIR, f"case{idx}.in"), + os.path.join(output_dir, f"case{idx}.ans"), + ], + options=[ + "-v", + f"{submission_dir}:{submission_dir}:ro", + "-v", + f"{python_runner.parent}:{python_runner.parent}:ro", + "-v", + f"{DATA_DIR}:{DATA_DIR}:ro", + "-v", + f"{output_dir}:{output_dir}", + ], + ), + stderr=self._log_file, + ) + + procs.append(p) for p in procs: # Time remaining for this batch (evaluated in parallel) dt = max(t0 + self.timeout - time.perf_counter(), 0) @@ -187,21 +262,25 @@ def launch_submission(self): if self._return_code > 0: return - + # Running the model passed, clean up the log - shutil.copy(os.path.join(self._log_dir, "log"), os.path.join(self._log_dir, "run-log")) + shutil.copy( + os.path.join(self._log_dir, "log"), os.path.join(self._log_dir, "run-log") + ) self._log_file.truncate(0) # Score the solution - judger_path = os.path.join(self.config["data_dir"], "output_validators", "judger", "__init__.py") + judger_path = os.path.join( + self.config["data_dir"], "output_validators", "judger", "__init__.py" + ) try: subprocess.check_call( [ - os.path.join(self._python_bin_path, 'python'), - judger_path, - DATA_DIR, - output_dir, - output_dir, + os.path.join(self._python_bin_path, "python"), + judger_path, + DATA_DIR, + output_dir, + output_dir, ], stderr=self._log_file, stdout=self._log_file, @@ -209,7 +288,6 @@ def launch_submission(self): except subprocess.CalledProcessError as err: self._return_code = SCORING_ERROR return - def collect_results(self): """Collect the results after that the submission is completed. @@ -273,4 +351,3 @@ def check_timeout(self): def _is_submission_finished(): """The parallelism happens at the level of test cases""" return True - From fe10b7fcd682b2344b8fe5796512ff0a1661d732 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 20 Sep 2022 08:20:34 +0000 Subject: [PATCH 10/20] More improvements --- ramp-engine/ramp_engine/base.py | 8 +++++++- ramp-engine/ramp_engine/cpp_runner.py | 16 +++++++++------- .../ramp_frontend/templates/sandbox.html | 2 +- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/ramp-engine/ramp_engine/base.py b/ramp-engine/ramp_engine/base.py index b5f3d6a7..6da837f5 100644 --- a/ramp-engine/ramp_engine/base.py +++ b/ramp-engine/ramp_engine/base.py @@ -193,7 +193,13 @@ def _get_traceback(content): if cut_exception_text > 0: content = content[cut_exception_text:] else: - content = content[-10000:] + content = content[-3000:] # strip paths content = re.sub("/[^\s]+/", '', content) + if content: + # Take only the last 2 lines + content = "\n".join(content.splitlines()[-3:]) + # If data is suspiciosly long truncate it + if len(content) > 150: + content = content[-150:] return content diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index cb47d4c7..f4d39640 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -29,6 +29,8 @@ def get_conda_cmd(cmd: list[str], options: list[str] = None, memory="10m") -> li "run", "-i", "--rm", + "--network", + "none", "-v", "/home/ubuntu/miniforge3/:/home/ubuntu/miniforge3/:ro", "-v", @@ -126,8 +128,8 @@ def is_cpp_submission(self) -> bool: submission_dir = Path(self.config["submissions_dir"]) / self.submission - if (submission_dir / "solution.cpp").exists() and (len( - (submission_dir / "solution.cpp").read_text().strip()) > 10 + if (submission_dir / "main.cpp").exists() and ( + len((submission_dir / "main.cpp").read_text().strip()) > 10 ): return True else: @@ -164,16 +166,16 @@ def launch_submission(self): is_cpp = self.is_cpp_submission() if is_cpp: bin_path = os.path.join(submission_dir, "main") - Path(submission_dir, "solution.py").unlink(missing_ok=True) try: subprocess.check_call( [ "gcc", - os.path.join(submission_dir, "solution.cpp"), + os.path.join(submission_dir, "main.cpp"), f"-I{INCLUDE_DIR / 'CPP'}", "-lstdc++", "-O3", + "-w", "-o", bin_path, ], @@ -192,7 +194,6 @@ def launch_submission(self): ) self._log_file.truncate(0) else: - Path(submission_dir, "solution.cpp").unlink(missing_ok=True) bin_path = os.path.join(submission_dir, "solution.py") shutil.copy(INCLUDE_DIR / "python/data.py", submission_dir) @@ -260,8 +261,9 @@ def launch_submission(self): self._return_code = 124 return - if self._return_code > 0: - return + + if self._return_code > 0: + return # Running the model passed, clean up the log shutil.copy( diff --git a/ramp-frontend/ramp_frontend/templates/sandbox.html b/ramp-frontend/ramp_frontend/templates/sandbox.html index c81f854d..17a9e9bf 100644 --- a/ramp-frontend/ramp_frontend/templates/sandbox.html +++ b/ramp-frontend/ramp_frontend/templates/sandbox.html @@ -207,7 +207,7 @@
Note that once you make a submission, all participants of your team will be locked to the current team.
- By default, the C++ solution is run. To run the Python solution, `submission.cpp` should be an empty file. + By default, the C++ solution (main.cpp) is run. To run the Python solution.py, change `main.cpp` to be an empty file.
From ff1517e24499758911889abfeb9542b9655ac27c Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 20 Sep 2022 09:42:13 +0000 Subject: [PATCH 11/20] Update slack URL --- ramp-engine/ramp_engine/cpp_runner.py | 2 +- ramp-frontend/ramp_frontend/views/admin.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index f4d39640..0c209aff 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -19,7 +19,7 @@ SCORING_ERROR = 1222 -def get_conda_cmd(cmd: list[str], options: list[str] = None, memory="10m") -> list[str]: +def get_conda_cmd(cmd: list[str], options: list[str] = None, memory="512m") -> list[str]: if options is None: options = [] diff --git a/ramp-frontend/ramp_frontend/views/admin.py b/ramp-frontend/ramp_frontend/views/admin.py index 863d5ca8..f6c14d77 100644 --- a/ramp-frontend/ramp_frontend/views/admin.py +++ b/ramp-frontend/ramp_frontend/views/admin.py @@ -83,7 +83,7 @@ def approve_users(): f"Dear {user.firstname},\n\n" f"Your xianti.fr account has been approved. You will now be able to sign-up for any RAMP Data Challenge, subject to eligibility, once it is open.\n\n" f"Also please join the Huawei RAMP Slack\n" - f"(https://join.slack.com/t/huaweiramp/shared_invite/zt-qbf4vy9s-0NS4~V898h40x8cI2KHEfQ)\n" + f"(https://join.slack.com/t/layoutsmartfactory/shared_invite/zt-1ge14ywqa-yVCSyQSxAO~f~6A0G0_HtA)\n" f"where all event related announcements will be made. For example, if you encounter any difficulties with the process or the platform, you can also ask questions there.\n\n" f"Best regards,\n" f"The Huawei - RAMP team" From d3501aa51b1dd9fe3ecf2fdedc5a978d4a57f8e7 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Tue, 20 Sep 2022 15:30:46 +0000 Subject: [PATCH 12/20] Handle invalid salt in the password --- ramp-database/ramp_database/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ramp-database/ramp_database/utils.py b/ramp-database/ramp_database/utils.py index 82940410..48c75a83 100644 --- a/ramp-database/ramp_database/utils.py +++ b/ramp-database/ramp_database/utils.py @@ -106,4 +106,8 @@ def check_password(password, hashed_password): is_same_password : bool Return True if the two passwords are identical. """ - return bcrypt.checkpw(_encode_string(password), _encode_string(hashed_password)) + try: + return bcrypt.checkpw(_encode_string(password), _encode_string(hashed_password)) + except ValueError: + # Some manually created password don't have an invalid salt, ignore it. + return False From f59d2fd15136c8d13031c99b3f1a1bf2040fad10 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Wed, 21 Sep 2022 05:51:02 +0000 Subject: [PATCH 13/20] Update email messages --- ramp-frontend/ramp_frontend/templates/update_profile.html | 5 ++++- ramp-frontend/ramp_frontend/views/admin.py | 2 +- ramp-frontend/ramp_frontend/views/visualization.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ramp-frontend/ramp_frontend/templates/update_profile.html b/ramp-frontend/ramp_frontend/templates/update_profile.html index 3a301964..2bd8ca74 100644 --- a/ramp-frontend/ramp_frontend/templates/update_profile.html +++ b/ramp-frontend/ramp_frontend/templates/update_profile.html @@ -70,10 +70,11 @@ [{{ error }}] {% endfor %}
+ {% if config['SIGN_UP_ASK_SOCIAL_MEDIA'] %}
diff --git a/ramp-frontend/ramp_frontend/views/admin.py b/ramp-frontend/ramp_frontend/views/admin.py index f6c14d77..86ad7f0f 100644 --- a/ramp-frontend/ramp_frontend/views/admin.py +++ b/ramp-frontend/ramp_frontend/views/admin.py @@ -115,7 +115,7 @@ def approve_users(): f"You can now proceed to your sandbox for this event and make " f"submissions.\n\n" f"Please note that by signing up to this event, you accept the Challenge Rules " - f"(https://xianti.fr/june-2021-challenge#rules).\n\n" + f".\n\n" f"See you on the RAMP website!\n" f"The Huawei - RAMP team" ) diff --git a/ramp-frontend/ramp_frontend/views/visualization.py b/ramp-frontend/ramp_frontend/views/visualization.py index 4e6196b5..29e6a9a5 100644 --- a/ramp-frontend/ramp_frontend/views/visualization.py +++ b/ramp-frontend/ramp_frontend/views/visualization.py @@ -43,7 +43,7 @@ def color_gradient(rgb, factor_array): colors = rgb2gray(colors) colors = gray2rgb( 255 - np.array([color * factor for color, factor in zip(colors, factor_array)]) - )[:, :, 0] + )[:, 0] return colors From 09d8573a6d90c3d2e041e01011706720767ab8e8 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Thu, 22 Sep 2022 23:24:52 +0000 Subject: [PATCH 14/20] Fix competition leaderboard order --- ramp-database/ramp_database/tools/leaderboard.py | 4 +++- ramp-frontend/ramp_frontend/views/auth.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ramp-database/ramp_database/tools/leaderboard.py b/ramp-database/ramp_database/tools/leaderboard.py index 53c2c54b..82dcd0b1 100644 --- a/ramp-database/ramp_database/tools/leaderboard.py +++ b/ramp-database/ramp_database/tools/leaderboard.py @@ -70,7 +70,7 @@ def _compute_leaderboard( df["submitted at (UTC)"] = df["submitted at (UTC)"].astype("datetime64[s]") df.columns.name = None - df = df.sort_values(by="Total cost") + df = df.sort_values(by="submitted at (UTC)", ascending=False) return df @@ -105,7 +105,9 @@ def _compute_competition_leaderboard( # select best submission for each team best_df = private_leaderboard.groupby("team").min().reset_index() + best_df = best_df.sort_values(by="Total cost") best_df.insert(0, 'rank', np.arange(1, best_df.shape[0]+1, dtype=np.int)) + return best_df diff --git a/ramp-frontend/ramp_frontend/views/auth.py b/ramp-frontend/ramp_frontend/views/auth.py index ac7a7dde..56679d5e 100644 --- a/ramp-frontend/ramp_frontend/views/auth.py +++ b/ramp-frontend/ramp_frontend/views/auth.py @@ -162,7 +162,7 @@ def sign_up(): f" {recover_url} \n\n" f"Please note that your signup request will be approved " f"after you send your proof of student status and expected " - f"date of graduation to Jiao Li (li.jiao@huawei.com). \n\n" + f"date of graduation to (contact@xianti.fr). \n\n" f"Please also note that by signing up, you accept the Terms of " f"Use [1], the Privacy Notice [2], and the Cookies Policy [3].\n\n" f" [1] https://xianti.fr/terms-of-usage \n" @@ -353,7 +353,7 @@ def user_confirm_email(token): return redirect(url_for("auth.login")) elif user.access_level == "asked": flash( - "Your email address has already been confirmed. Please send your proof of student status to Jiao Li if you haven't yet done so. If you have, we will approve your sign-up request as soon as possible.", + "Your email address has already been confirmed. Please send your proof of student status to contact@xianti.fr if you haven't yet done so. If you have, we will approve your sign-up request as soon as possible.", category="error", ) return redirect(url_for("general.index")) @@ -371,6 +371,6 @@ def user_confirm_email(token): body += "of this user: {}".format(url_approve) send_mail_with_context(admin.email, subject, body) flash( - "Please send your proof of student status to Jiao Li if you haven't yet done so. If you have, we will approve your sign-up request as soon as possible." + "Please send your proof of student status to contact@xianti.fr if you haven't yet done so. If you have, we will approve your sign-up request as soon as possible." ) return redirect(url_for("auth.login")) From 83d9e9568eee100a97dc70a4743a67893b865436 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Sun, 2 Oct 2022 21:06:43 +0000 Subject: [PATCH 15/20] Improve timeout handling --- ramp-engine/ramp_engine/cpp_runner.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index 0c209aff..5544667d 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -163,6 +163,8 @@ def launch_submission(self): self.status = "finished" + self._return_code = 0 + is_cpp = self.is_cpp_submission() if is_cpp: bin_path = os.path.join(submission_dir, "main") @@ -210,7 +212,8 @@ def launch_submission(self): if is_cpp: p = subprocess.Popen( get_conda_cmd( - [str(bin_path)], + # Make sure the process is killed as we cannot kill it from outside + ["timeout", "22", str(bin_path)], options=["-v", f"{submission_dir}:{submission_dir}:ro"], ), stdout=open(os.path.join(output_dir, f"case{idx}.ans"), "wb+"), @@ -225,6 +228,9 @@ def launch_submission(self): p = subprocess.Popen( get_conda_cmd( [ + # Make sure the process is killed as we cannot kill it from outside + "timeout", + "22", os.path.join(self._python_bin_path, "python"), str(python_runner), str(bin_path), @@ -255,8 +261,10 @@ def launch_submission(self): return try: p.communicate(timeout=dt) - self._return_code = max(p.returncode, 0) + self._return_code = max(p.returncode, self._return_code) except subprocess.TimeoutExpired: + for p in procs: + p.kill() self.status = "timeout" self._return_code = 124 return @@ -330,18 +338,13 @@ def collect_results(self): self.submission, "training_output", ) - if os.path.exists(pred_dir): - shutil.rmtree(pred_dir) if returncode: - if os.path.exists(output_training_dir): - shutil.rmtree(output_training_dir) - self.status = "collected" - return (returncode, error_msg) - - # Just some fake score for now + if returncode == 139: + error_msg = "Segmentation fault (core dumped)" # copy the predictions into the disk - # no need to create the directory, it will be handle by copytree + # no need to create the directory, it will be handled by copytree + shutil.rmtree(pred_dir, ignore_errors=True) shutil.copytree(output_training_dir, pred_dir) self.status = "collected" return (returncode, error_msg) From 56203294d57f3e30be1de779fe92dd4fe88b1d59 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Thu, 6 Oct 2022 16:43:44 +0000 Subject: [PATCH 16/20] Improve competition leaderboard --- ramp-database/ramp_database/tools/leaderboard.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ramp-database/ramp_database/tools/leaderboard.py b/ramp-database/ramp_database/tools/leaderboard.py index 82dcd0b1..a2f82435 100644 --- a/ramp-database/ramp_database/tools/leaderboard.py +++ b/ramp-database/ramp_database/tools/leaderboard.py @@ -103,8 +103,14 @@ def _compute_competition_leaderboard( session, submissions, "private", event_name, with_links=False ) + def _select_best_submission(df): + df = df.sort_values('Total cost') + # Take lowest score + del df['team'] + return df.iloc[0] + # select best submission for each team - best_df = private_leaderboard.groupby("team").min().reset_index() + best_df = private_leaderboard.groupby("team").apply(_select_best_submission).reset_index() best_df = best_df.sort_values(by="Total cost") best_df.insert(0, 'rank', np.arange(1, best_df.shape[0]+1, dtype=np.int)) From 2e151c1f9276f13b8fdf14b19a6a624940c17106 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Wed, 19 Oct 2022 23:28:44 +0000 Subject: [PATCH 17/20] More fixes --- ramp-engine/ramp_engine/cpp_runner.py | 35 ++++++++++++++------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index 5544667d..b414185f 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -156,10 +156,8 @@ def launch_submission(self): ) output_dir = os.path.join(submission_dir, "training_output") os.makedirs(output_dir, exist_ok=True) - INCLUDE_DIR = Path( - self.config["data_dir"], "..", "..", "smartfactoryinstruments-starting-kit" - ) - DATA_DIR = os.path.join(self.config["data_dir"], "data", "secret") + INCLUDE_DIR = Path(self.config["data_dir"]).resolve() + DATA_DIR = os.path.join(self.config["data_dir"], "Judger") self.status = "finished" @@ -174,7 +172,7 @@ def launch_submission(self): [ "gcc", os.path.join(submission_dir, "main.cpp"), - f"-I{INCLUDE_DIR / 'CPP'}", + f"-I{INCLUDE_DIR / 'include' / 'cpp'}", "-lstdc++", "-O3", "-w", @@ -197,17 +195,20 @@ def launch_submission(self): self._log_file.truncate(0) else: bin_path = os.path.join(submission_dir, "solution.py") - shutil.copy(INCLUDE_DIR / "python/data.py", submission_dir) + shutil.copy(INCLUDE_DIR / "Judger/_data.py", Path(submission_dir) / 'data.py') + shutil.copy(INCLUDE_DIR / "Judger/config.py", submission_dir) + shutil.copy(INCLUDE_DIR / "Judger/reader.py", submission_dir) # Run solution in batches - batch_size = 4 - for n_batch in range(3): + batch_size = 5 + (Path(output_dir) / "output").mkdir(exist_ok=True) + for n_batch in range(4): t0 = time.perf_counter() procs = [] for sub_idx in range(batch_size): idx = batch_size * n_batch + sub_idx # We have 9 test cases in total - if idx > 9: + if idx > 19: continue if is_cpp: p = subprocess.Popen( @@ -216,14 +217,14 @@ def launch_submission(self): ["timeout", "22", str(bin_path)], options=["-v", f"{submission_dir}:{submission_dir}:ro"], ), - stdout=open(os.path.join(output_dir, f"case{idx}.ans"), "wb+"), + stdout=open(os.path.join(output_dir, f"output/case{idx}.out"), "wb+"), stderr=self._log_file, - stdin=open(os.path.join(DATA_DIR, f"case{idx}.in"), "rb"), + stdin=open(os.path.join(DATA_DIR, f"input/case{idx}.in"), "rb"), ) else: python_runner = ( Path(self.config["data_dir"]) - / "../scripts/ramp_python_runner.py" + / "scripts/ramp_python_runner.py" ).resolve() p = subprocess.Popen( get_conda_cmd( @@ -234,8 +235,8 @@ def launch_submission(self): os.path.join(self._python_bin_path, "python"), str(python_runner), str(bin_path), - os.path.join(DATA_DIR, f"case{idx}.in"), - os.path.join(output_dir, f"case{idx}.ans"), + os.path.join(DATA_DIR, f"input/case{idx}.in"), + os.path.join(output_dir, f"output/case{idx}.out"), ], options=[ "-v", @@ -281,15 +282,15 @@ def launch_submission(self): # Score the solution judger_path = os.path.join( - self.config["data_dir"], "output_validators", "judger", "__init__.py" + self.config["data_dir"], "Judger/judge_ramp.py" ) try: subprocess.check_call( [ os.path.join(self._python_bin_path, "python"), judger_path, - DATA_DIR, - output_dir, + os.path.join(DATA_DIR, "input"), + os.path.join(output_dir, "output"), output_dir, ], stderr=self._log_file, From 00cee5842d3d100bb3f9c1df1ddff708fd3ed9c7 Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Sat, 19 Nov 2022 12:22:16 +0000 Subject: [PATCH 18/20] Updates to the event template --- ramp-frontend/ramp_frontend/templates/event.html | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ramp-frontend/ramp_frontend/templates/event.html b/ramp-frontend/ramp_frontend/templates/event.html index 7700de76..79a43f7b 100644 --- a/ramp-frontend/ramp_frontend/templates/event.html +++ b/ramp-frontend/ramp_frontend/templates/event.html @@ -52,12 +52,16 @@ Description + +
@@ -99,4 +106,4 @@ parseInt(i) + "px"; } -{% endblock %} {% endblock %} \ No newline at end of file +{% endblock %} {% endblock %} From 5af1e73741fed2f8eb40e59d1002194e5639704b Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Thu, 14 Sep 2023 01:29:39 +0200 Subject: [PATCH 19/20] Fixes for the 2023 event --- ramp-engine/ramp_engine/cpp_runner.py | 12 ++++++------ ramp-frontend/ramp_frontend/templates/event.html | 2 +- ramp-frontend/ramp_frontend/templates/sandbox.html | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ramp-engine/ramp_engine/cpp_runner.py b/ramp-engine/ramp_engine/cpp_runner.py index b414185f..c9ca8722 100644 --- a/ramp-engine/ramp_engine/cpp_runner.py +++ b/ramp-engine/ramp_engine/cpp_runner.py @@ -128,8 +128,8 @@ def is_cpp_submission(self) -> bool: submission_dir = Path(self.config["submissions_dir"]) / self.submission - if (submission_dir / "main.cpp").exists() and ( - len((submission_dir / "main.cpp").read_text().strip()) > 10 + if (submission_dir / "solution.cpp").exists() and ( + len((submission_dir / "solution.cpp").read_text().strip()) > 10 ): return True else: @@ -157,7 +157,7 @@ def launch_submission(self): output_dir = os.path.join(submission_dir, "training_output") os.makedirs(output_dir, exist_ok=True) INCLUDE_DIR = Path(self.config["data_dir"]).resolve() - DATA_DIR = os.path.join(self.config["data_dir"], "Judger") + DATA_DIR = Path(self.config["data_dir"]).resolve() self.status = "finished" @@ -171,8 +171,8 @@ def launch_submission(self): subprocess.check_call( [ "gcc", - os.path.join(submission_dir, "main.cpp"), - f"-I{INCLUDE_DIR / 'include' / 'cpp'}", + os.path.join(submission_dir, "solution.cpp"), + #f"-I{INCLUDE_DIR / 'include' / 'cpp'}", "-lstdc++", "-O3", "-w", @@ -208,7 +208,7 @@ def launch_submission(self): for sub_idx in range(batch_size): idx = batch_size * n_batch + sub_idx # We have 9 test cases in total - if idx > 19: + if idx > 9: continue if is_cpp: p = subprocess.Popen( diff --git a/ramp-frontend/ramp_frontend/templates/event.html b/ramp-frontend/ramp_frontend/templates/event.html index 79a43f7b..31668ba4 100644 --- a/ramp-frontend/ramp_frontend/templates/event.html +++ b/ramp-frontend/ramp_frontend/templates/event.html @@ -84,7 +84,7 @@
- Please refer to the PDF for the round 2 problem description. + Please refer to the PDF for the problem description.