RTIInternational · mgdenno · Dec 30, 2024 · Dec 18, 2024 · Dec 18, 2024 · samlamont
diff --git a/README.md b/README.md
@@ -37,8 +37,8 @@ python -m teehr.utils.install_spark_jars
 ```
 Use Docker
 ```bash
-$ docker build -t teehr:v0.4.5 .
-$ docker run -it --rm --volume $HOME:$HOME -p 8888:8888 teehr:v0.4.5 jupyter lab --ip 0.0.0.0 $HOME
+$ docker build -t teehr:v0.4.6 .
+$ docker run -it --rm --volume $HOME:$HOME -p 8888:8888 teehr:v0.4.6 jupyter lab --ip 0.0.0.0 $HOME
 ```
 
 ## Examples

diff --git a/docs/sphinx/changelog/index.rst b/docs/sphinx/changelog/index.rst
@@ -2,6 +2,26 @@ Release Notes
 =============
 
 
+0.4.6 - 2024-12-17
+--------------------
+
+Added
+^^^^^
+* Adds `add_missing_columns` to the `_validate` method in the `BaseTable` class
+to allow for adding missing columns to the schema.
+   - When upgrading from 0.4.4 or earlier, you may need to run the following to add
+   the missing columns to the secondary_timeseries if you have existing datasets:
+     ```
+     sdf = ev.secondary_timeseries.to_sdf()
+     validated_sdf = ev.secondary_timeseries._validate(sdf, add_missing_columns=True)
+     ev.secondary_timeseries._write_spark_df(validated_sdf)
+     ``
+
+Changed
+^^^^^^^
+* None
+
+
 0.4.5 - 2024-12-09
 --------------------
 

diff --git a/docs/sphinx/getting_started/index.rst b/docs/sphinx/getting_started/index.rst
@@ -37,8 +37,8 @@ Or, if you do not want to install TEEHR in your own virtual environment, you can
 
 .. code-block:: bash
 
-   docker build -t teehr:v0.4.5 .
-   docker run -it --rm --volume $HOME:$HOME -p 8888:8888 teehr:v0.4.5 jupyter lab --ip 0.0.0.0 $HOME
+   docker build -t teehr:v0.4.6 .
+   docker run -it --rm --volume $HOME:$HOME -p 8888:8888 teehr:v0.4.6 jupyter lab --ip 0.0.0.0 $HOME
 
 Project Objectives
 ------------------

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "teehr"
-version = "0.4.5"
+version = "0.4.6"
 description = "Tools for Exploratory Evaluation in Hydrologic Research"
 authors = [
     "RTI International",

diff --git a/src/teehr/__init__.py b/src/teehr/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.4.5"
+__version__ = "0.4.6"
 
 from teehr.evaluation.evaluation import Evaluation  # noqa
 from teehr.models.metrics.metric_models import Metrics  # noqa

diff --git a/src/teehr/evaluation/tables/base_table.py b/src/teehr/evaluation/tables/base_table.py
@@ -10,6 +10,7 @@
 from teehr.utils.utils import to_path_or_s3path, path_to_spark
 from teehr.models.filters import FilterBaseModel
 import logging
+from pyspark.sql.functions import lit, col
 
 logger = logging.getLogger(__name__)
 
@@ -145,7 +146,12 @@ def _get_schema(self, type: str = "pyspark"):
 
         return self.schema_func()
 
-    def _validate(self, df: ps.DataFrame, strict: bool = True) -> ps.DataFrame:
+    def _validate(
+        self,
+        df: ps.DataFrame,
+        strict: bool = True,
+        add_missing_columns: bool = False
+    ) -> ps.DataFrame:
         """Validate a DataFrame against the table schema.
 
         Parameters
@@ -156,13 +162,25 @@ def _validate(self, df: ps.DataFrame, strict: bool = True) -> ps.DataFrame:
             If True, any extra columns will be dropped before validation.
             If False, will be validated as-is.
             The default is True.
+
+        Returns
+        -------
+        validated_df : ps.DataFrame
+            The validated DataFrame.
         """
         schema = self._get_schema()
 
         logger.info(f"Validating DataFrame with {schema.columns}.")
 
+        schema_cols = schema.columns.keys()
+
+        # Add missing columns
+        if add_missing_columns:
+            for col_name in schema_cols:
+                if col_name not in df.columns:
+                    df = df.withColumn(col_name, lit(None))
+
         if strict:
-            schema_cols = schema.columns.keys()
             df = df.select(*schema_cols)
 
         validated_df = schema.validate(df)

diff --git a/version.txt b/version.txt
@@ -1 +1 @@
-0.4.5
+0.4.6