dos-group · jonathanbader · Oct 13, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 results/*
 !results/*.gitkeep
+.DS_Store
+.idea
diff --git a/README.md b/README.md
@@ -11,14 +11,17 @@
 
 ### Run Sizey
 
-1. Create a Python virtual environment and install the dependencies 
-2. Run `python3 main.py filename alpha softmax error_metric seed`
+1. Create a Python virtual environment and install the dependencies
+2. Run `python3 main.py filename alpha softmax error_metric seed [--use_online_grid]`
+
+Where:
 
 - `filename` describes the workflow from the data folder. For instance `./data/trace_methylseq.csv`  
 - `alpha` sets the alpha you want to execute Sizey with. It has to be between 0.0 and 1.0  
-- `interpolation` actives the interpolation strategy. It is either False or True. If set to False, the Argmax strategy is used.
+- `softmax` toggles the softmax ensemble strategy. Set to `True` to use it, otherwise `False` for the argmax strategy.
 - `error_metric` defines the XYZ used for ABC. Currently, it is either `smoothed_mape` or `neg_mean_squared_error` whereas `smoothed_mape` should be used and other error metrics might be experimental and change the impact on the RAQ score.  
 - `seed` defines the seed for splitting up the initial data in training and test data and also defines the order of online task input.
+- `--use_online_grid` (optional) toggles the online grid search.
 
 Here is an example command: `./data/trace_methylseq.csv 0.0 True smoothed_mape 1996`
 

diff --git a/approach/abstract_predictor.py b/approach/abstract_predictor.py
@@ -6,8 +6,33 @@
 
 
 class PredictionModel(metaclass=ABCMeta):
+    """
+    PredictionModel is an abstract base class for implementing machine learning models for regression tasks.
+
+    This class provides a structure for defining methods related to model training, prediction, and updating.
+    It includes attributes for storing training data, scalers, and error metrics, and requires subclasses to
+    implement specific methods for model functionality.
+
+    Attributes:
+        workflow_name (str): Name of the workflow this model is associated with.
+        task_name (str): Name of the task this model is associated with.
+        err_metr (str): Error metric used for model evaluation (e.g., 'smoothed_mape').
+        regressor (Optional[object]): The machine learning model used for predictions.
+        train_X_scaler (Optional[object]): Scaler for normalizing training features.
+        train_y_scaler (Optional[object]): Scaler for normalizing training labels.
+        X_train_full (Optional[np.ndarray]): Historical training features.
+        y_train_full (Optional[np.ndarray]): Historical training labels.
+        model_error (Optional[float]): Error score of the model.
+    """
 
     def __init__(self, workflow_name: str, task_name: str, err_metr: str):
+        """
+        Initializes the PredictionModel with workflow name, task name, and error metric.
+
+        :param workflow_name: Name of the workflow this model is associated with.
+        :param task_name: Name of the task this model is associated with.
+        :param err_metr: Error metric to be used for model evaluation (e.g., 'smoothed_mape').
+        """
         self.workflow_name = workflow_name
         self.task_name = task_name
         self.err_metr = err_metr
@@ -18,30 +43,95 @@ def __init__(self, workflow_name: str, task_name: str, err_metr: str):
         self.y_train_full = None
         self.model_error = None
 
+    def _ensure_column_vector(self, array_like: Union[pd.Series, np.ndarray, list]) -> np.ndarray:
+        """
+        Converts the input array-like object into a numpy array shaped as a column vector.
+
+        This method ensures that the input, whether it is a pandas Series, a 1-D numpy array, or a list,
+        is transformed into a two-dimensional numpy array with a single column. This format is required
+        by scikit-learn for certain operations.
+
+        :param array_like: Input data to be converted. Can be a pandas Series, numpy array, or list.
+        :return: A numpy array reshaped into a column vector (2-D array with shape (n, 1)).
+        """
+        arr = np.asarray(array_like)
+        if arr.ndim == 1:
+            arr = arr.reshape(-1, 1)
+        return arr
+
     def initial_model_training(self, X_train, y_train) -> None:
+        """
+        Initializes the model with training data.
+
+        :param X_train: Training features.
+        :param y_train: Training labels.
+        """
         raise NotImplementedError('Model prediction method has not been implemented.')
 
     def predict_task(self, task_features: pd.Series) -> np.ndarray:
+        """
+        Predicts the output for a single task based on its features.
+
+        :param task_features: Features of the task to predict.
+        """
         raise NotImplementedError('Model prediction method has not been implemented.')
 
     def predict_tasks(self, taskDataframe: pd.DataFrame) -> float:
+        """
+        Predicts the output for multiple tasks based on their features.
+
+        :param taskDataframe: DataFrame containing features of multiple tasks.
+        """
         raise NotImplementedError('Predicting multiple tasks has not been implemented.')
 
     def update_model(self, X_train: pd.Series, y_train: float) -> None:
+        """
+        Updates the model with new training data.
+
+        :param X_train: New training features.
+        :param y_train: New training labels.
+        """
         raise NotImplementedError('Model update method has not been implemented.')
 
 
 
 class PredictionMethod(metaclass=ABCMeta):
 
     def predict(self, X_test, y_test, user_estimate):
+        """
+        Predicts the output for given test data and user estimate.
+
+        :param X_test: Test features.
+        :param y_test: Test labels.
+        :param user_estimate: User's estimate for the task.
+        """
         raise NotImplementedError('Model prediction method has not been implemented.')
 
     def update_model(self, X_train: pd.Series, y_train: float):
+        """
+        Updates the model with new training data.
+
+        :param X_train: New training features.
+        :param y_train: New training labels.
+        """
         raise NotImplementedError('Model prediction method has not been implemented.')
 
     def handle_underprediction(self, input_size: float, predicted: float, user_estimate: float, retry_number: int, actual_memory: float):
+        """
+        Handles underprediction scenarios by adjusting the predicted value.
+
+        :param input_size: Size of the input task.
+        :param predicted: Predicted value from the model.
+        :param user_estimate: User's estimate for the task.
+        :param retry_number: Number of retries attempted.
+        :param actual_memory: Actual memory used for the task.
+        """
         raise NotImplementedError('Model prediction method has not been implemented.')
 
     def get_number_subModels(self) -> dict[str, int]:
+        """
+        Returns the number of sub-models used in the prediction method.
+
+        :return: Dictionary with model names as keys and their counts as values.
+        """
         raise NotImplementedError('Model prediction method has not been implemented.')
diff --git a/approach/experiment_constants.py b/approach/experiment_constants.py
@@ -2,11 +2,28 @@
 
 
 class ERROR_STRATEGY(Enum):
+    """
+    Defines the strategy for handling errors in the experiment.
+
+    Attributes:
+        DOUBLE (int): Strategy to double the error value.
+        MAX_EVER_OBSERVED (int): Strategy to use the maximum error value ever observed.
+    """
     DOUBLE = 1
     MAX_EVER_OBSERVED = 2
 
 
 class OFFSET_STRATEGY(Enum):
+    """
+    Defines the strategy for handling offsets in the experiment.
+
+    Attributes:
+        STD (int): Strategy to use the standard deviation.
+        MED_UNDER (int): Strategy to use the median of a subset of data.
+        MED_ALL (int): Strategy to use the median of all data.
+        STDUNDER (int): Strategy to use a modified standard deviation.
+        DYNAMIC (int): Strategy to dynamically adjust the offset.
+    """
     STD = 1
     MED_UNDER = 2
     MED_ALL = 5