From 34a7884ce5cc25a7c6608fcb5fc37643795373a1 Mon Sep 17 00:00:00 2001
From: Bernhard Ryeng <bryeng@users.noreply.github.com>
Date: Thu, 23 May 2024 14:58:38 +0200
Subject: [PATCH 1/4] Remove team param from snapshot directories.

---
 src/ssb_timeseries/io.py      | 18 +++++++-----------
 tests/test_dataset_sharing.py | 28 +++++++++++++---------------
 2 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/src/ssb_timeseries/io.py b/src/ssb_timeseries/io.py
index d42dd28..7c0243b 100644
--- a/src/ssb_timeseries/io.py
+++ b/src/ssb_timeseries/io.py
@@ -201,7 +201,7 @@ def read_metadata(self) -> dict:
 
     def write_metadata(self, meta: dict) -> None:
         """Write tags to the metadata file."""
-        os.makedirs(self.metadata_dir, exist_ok=True)
+        # no longer necessary: os.makedirs(self.metadata_dir, exist_ok=True)
         try:
             fs.write_json(self.metadata_fullpath, meta)
             ts_logger.info(
@@ -241,7 +241,6 @@ def last_version(self, directory: str, pattern: str = "*.parquet") -> str:
         files = fs.ls(directory, pattern=pattern)
         number_of_files = len(files)
 
-        # TODO: mypy --> error: Item "None" of "Match[str] | None" has no attribute "group"  [union-attr]
         vs = sorted([int(re.search("(_v)(\d+)(.parquet)", f).group(2)) for f in files])
         ts_logger.debug(
             f"DATASET {self.set_name}: io.last_version regex identified versions {vs} in {directory}."
@@ -306,17 +305,12 @@ def iso_no_colon(dt: datetime) -> str:
             )
         return out
 
-    def sharing_directory(self, bucket: str, team: str = "") -> PathStr:
+    def sharing_directory(self, bucket: str) -> PathStr:
         """Get name of sharing directory based on dataset parameters and configuration.
 
         Creates the directory if it does not exist.
         """
-        fix_test_cases_before_taking_this_approach = False
-        if team and fix_test_cases_before_taking_this_approach:
-            # allowing this breaks tests! --> TODO: adapt test cases
-            directory = os.path.join(bucket, team, self.set_name)
-        else:
-            directory = os.path.join(bucket, self.set_name)
+        directory = os.path.join(bucket, self.set_name)
 
         ts_logger.warning(f"DATASET.IO.SHARING_DIRECTORY: {directory}")
         fs.mkdir(directory)
@@ -364,13 +358,15 @@ def snapshot(
             ts_logger.warning(f"Sharing configs: {sharing}")
             for s in sharing:
                 ts_logger.debug(f"Sharing: {s}")
+                if "team" not in s.keys():
+                    s["team"] = "no team specified"
                 fs.cp(
                     data_publish_path,
-                    self.sharing_directory(bucket=s["path"], team=s["team"]),
+                    self.sharing_directory(bucket=s["path"]),
                 )
                 fs.cp(
                     meta_publish_path,
-                    self.sharing_directory(bucket=s["path"], team=s["team"]),
+                    self.sharing_directory(bucket=s["path"]),
                 )
                 ts_logger.warning(
                     f"DATASET {self.set_name}: sharing with {s['team']}, snapshot copied to {s['path']}."
diff --git a/tests/test_dataset_sharing.py b/tests/test_dataset_sharing.py
index 3ef663f..cc1bce4 100644
--- a/tests/test_dataset_sharing.py
+++ b/tests/test_dataset_sharing.py
@@ -1,7 +1,5 @@
 import logging
 
-import pytest
-
 from ssb_timeseries import fs
 from ssb_timeseries.dataset import Dataset
 from ssb_timeseries.dates import date_utc
@@ -18,7 +16,6 @@
 PRODUCT = "sample-data-product"
 
 
-@pytest.mark.skipif(False, reason="Don't skip.")
 @log_start_stop
 def test_snapshot_simple_set_has_higher_snapshot_file_count_after(caplog):
     caplog.set_level(logging.DEBUG)
@@ -37,15 +34,16 @@ def test_snapshot_simple_set_has_higher_snapshot_file_count_after(caplog):
     stage_path = x.io.snapshot_directory(
         product=x.product, process_stage=x.process_stage
     )
-    path_123 = x.io.dir(BUCKET, x.product, "shared", "s123")
-    path_234 = x.io.dir(BUCKET, x.product, "shared", "s234")
+    shared_base_path = x.io.dir(BUCKET, x.product, "shared", "all")
+    path_123 = shared_base_path
+    path_234 = shared_base_path
     x.sharing = [
         {
-            "team": "s123",
+            # should work even if tartget team is not specified(?
             "path": path_123,
         },
         {
-            "team": "s234",
+            "team": "",
             "path": path_234,
         },
     ]
@@ -80,7 +78,7 @@ def log(path, before, after):
 
 
 @log_start_stop
-def test_snapshot_estimate_has_higher_file_count_after(caplog):
+def test_snapshot_estimate_specified_has_higher_file_count_after(caplog):
     caplog.set_level(logging.DEBUG)
 
     x = Dataset(
@@ -97,22 +95,22 @@ def test_snapshot_estimate_has_higher_file_count_after(caplog):
     stage_path = x.io.snapshot_directory(
         product=x.product, process_stage=x.process_stage
     )
-    path_123 = x.io.dir(BUCKET, x.product, "shared", "s123")
-    path_234 = x.io.dir(BUCKET, x.product, "shared", "s234")
+    shared_base_path = x.io.dir(BUCKET, x.product, "shared")
+    team_path_123 = x.io.dir(shared_base_path, "s123")
+    team_path_234 = x.io.dir(shared_base_path, "s234")
     x.sharing = [
         {
             "team": "s123",
-            "path": path_123,
+            "path": team_path_123,
         },
         {
             "team": "s234",
-            "path": path_234,
+            "path": team_path_234,
         },
     ]
 
-    path_123 = x.io.dir(path_123, x.name)
-    path_234 = x.io.dir(path_234, x.name)
-
+    path_123 = x.io.dir(team_path_123, x.name)
+    path_234 = x.io.dir(team_path_234, x.name)
     x.save()
     ts_logger.debug(f"SNAPSHOT conf.bucket {BUCKET}")
     ts_logger.debug(f"SNAPSHOT to {path_123}")

From 819f90a6c5369bdfc3b524a70ac650f48b197a00 Mon Sep 17 00:00:00 2001
From: Bernhard Ryeng <bryeng@users.noreply.github.com>
Date: Tue, 28 May 2024 02:04:14 +0200
Subject: [PATCH 2/4] Use @dataclass to simplify configs. Reset configs after
 tests.

---
 src/ssb_timeseries/config.py     | 231 ++++++++++---------------------
 src/ssb_timeseries/fs.py         |  14 +-
 src/ssb_timeseries/properties.py |   3 -
 tests/conftest.py                |  83 +++++++----
 tests/test_config.py             | 133 +++++++++++-------
 tests/test_fs.py                 |   8 ++
 6 files changed, 227 insertions(+), 245 deletions(-)

diff --git a/src/ssb_timeseries/config.py b/src/ssb_timeseries/config.py
index 89751b6..54eb554 100644
--- a/src/ssb_timeseries/config.py
+++ b/src/ssb_timeseries/config.py
@@ -1,6 +1,7 @@
 import json
 import os
 import sys
+from dataclasses import asdict
 from dataclasses import dataclass
 from pathlib import Path
 
@@ -9,7 +10,7 @@
 from ssb_timeseries import fs
 from ssb_timeseries.types import PathStr
 
-# mypy: disable-error-code="assignment, arg-type"
+# mypy: disable-error-code="assignment, arg-type, override,call-arg,has-type"
 
 
 GCS = "gs://ssb-prod-dapla-felles-data-delt/poc-tidsserier"
@@ -17,26 +18,41 @@
 HOME = str(Path.home())
 LOGFILE = "timeseries.log"
 
-DEFAULT_BUCKET = HOME
-DEFAULT_TIMESERIES_LOCATION = os.path.join(HOME, "series_data")
-DEFAULT_CONFIG_LOCATION = os.path.join(HOME, "timeseries_config.json")
-DEFAULT_LOG_FILE_LOCATION: str = os.path.join(HOME, "logs", LOGFILE)
-CONFIGURATION_FILE: str = os.getenv("TIMESERIES_CONFIG", DEFAULT_CONFIG_LOCATION)
+DEFAULTS = {
+    "configuration_file": os.path.join(HOME, "timeseries_config.json"),
+    "timeseries_root": os.path.join(HOME, "series_data"),
+    "log_file": os.path.join(HOME, "logs", LOGFILE),
+    "bucket": HOME,
+}
+CONFIGURATION_FILE: str = os.getenv("TIMESERIES_CONFIG", DEFAULTS["configuration_file"])
 
 
-@dataclass(slots=True)
-class Cfg:
+@dataclass(slots=False)
+class Config:
     """Configuration class."""
 
     configuration_file: str = CONFIGURATION_FILE
-    repository: str = DEFAULT_TIMESERIES_LOCATION
-    log_file: str = DEFAULT_LOG_FILE_LOCATION
-    bucket: str = DEFAULT_BUCKET
-    product: str = ""
+    timeseries_root: str = DEFAULTS["timeseries_root"]
+    log_file: str = DEFAULTS["log_file"]
+    bucket: str = DEFAULTS["bucket"]
+
+    def __getitem__(self, item: str) -> str:
+        """Get the value of a configuration."""
+        d = asdict(self)
+        return str(d[item])
 
-    def __str__(self) -> str:
+    def __eq__(self, other: Self) -> bool:
+        """Equality test."""
+        return asdict(self) == other.__dict__()
+
+    def to_json(self, original_implementation: bool = False) -> str:
         """Return timeseries configurations as JSON string."""
-        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
+        if original_implementation:
+            return json.dumps(
+                self, default=lambda o: o.__dict__(), sort_keys=True, indent=4
+            )
+        else:
+            return json.dumps(asdict(self), sort_keys=True, indent=4)
 
     def save(self, path: PathStr = CONFIGURATION_FILE) -> None:
         """Saves configurations to JSON file and set environment variable TIMESERIES_CONFIG to the location of the file.
@@ -44,7 +60,7 @@ def save(self, path: PathStr = CONFIGURATION_FILE) -> None:
         Args:
             path (PathStr): Full path of the JSON file to save to. Defaults to the value of the environment variable TIMESERIES_CONFIG.
         """
-        fs.write_json(content=str(self), path=path)
+        fs.write_json(content=self.to_json(), path=path)
         if HOME == JOVYAN:
             # For some reason `os.environ["TIMESERIES_CONFIG"] = path` does not work:
             cmd = f"export TIMESERIES_CONFIG={CONFIGURATION_FILE}"
@@ -56,131 +72,24 @@ def save(self, path: PathStr = CONFIGURATION_FILE) -> None:
     @classmethod
     def load(cls, path: PathStr) -> Self:
         """Read the properties from a JSON file into a Config object."""
-        if path:
+        if fs.exists(path):
             json_file = json.loads(fs.read_json(path))
 
             return cls(
                 configuration_file=str(path),
                 bucket=json_file.get("bucket"),
-                repository=json_file.get("timeseries_root"),
+                timeseries_root=json_file.get("timeseries_root"),
                 product=json_file.get("product"),
                 log_file=json_file.get("log_file"),
             )
         else:
-            raise ValueError("cfg_from_file was called with an empty or invalid path.")
-
-
-class Config:
-    """Timeseries configurations: bucket, product, timeseries_root, log_file."""
-
-    def __init__(self, configuration_file: str = "", **kwargs: str) -> None:
-        """Create or retrieve configurations.
-
-        If called with no parameters, Config attempts to read from the file specified by the environment variable TIMSERIES_CONFIG. If that does not succeed, applies defaults.
-
-        Args:
-            configuration_file (str): Tries to read this before falling back to environment variable. Defaults to "".
-            kwargs (str):  Configuration options:
-
-        Kwargs:
-            - bucket              - The "production bucket" location. Sharing and snapshots typically go in the sub directories hee, depending on configs.
-            - product             - Optional sub directory for "production bucket".
-            - timeseries_root     - Series data are stored in tree underneath. Defaults to '$HOME/series_data/'
-            - log_file            - Exactly that. Defaults to '$HOME/series_data/'
-        """
-        if fs.exists(configuration_file):
-            # self = Cfg.load(configuration_file) # NOSONAR # TODO: switch to Cfg class to simplify code
-            self.configuration_file = configuration_file
-            os.environ["TIMESERIES_CONFIG"] = configuration_file
-        elif configuration_file:
-            if fs.exists(CONFIGURATION_FILE):
-                self.load(CONFIGURATION_FILE)
-                self.save(configuration_file)
-            else:
-                self.__set_default_config()
-
-        elif fs.exists(CONFIGURATION_FILE):
-            self.load(CONFIGURATION_FILE)
-            self.configuration_file = CONFIGURATION_FILE
-
-        if kwargs:
-            log_file = kwargs.get("log_file", "")
-            if log_file:
-                self.log_file = log_file
-            elif not self.log_file:
-                self.log_file = DEFAULT_LOG_FILE_LOCATION
-
-            timeseries_root = kwargs.get("timeseries_root", "")
-            if timeseries_root:
-                self.timeseries_root = timeseries_root
-            elif not self.timeseries_root:
-                self.timeseries_root = DEFAULT_TIMESERIES_LOCATION
-
-            bucket = kwargs.get("bucket", "")
-            if bucket:
-                self.bucket = bucket
-            elif not self.bucket:
-                self.bucket = DEFAULT_BUCKET
-
-            product = kwargs.get("product", "")
-            if product:
-                self.product = product
-
-        if not hasattr(self, "log_file"):
-            self.__set_default_config()
-
-        self.save()
-
-    @property
-    def file_system_type(self) -> str:
-        """Returns 'gcs' if Config.timeseries_root is on Google Cloud Storage,  otherwise'local'."""
-        if self.timeseries_root.startswith("gs://"):
-            return "gcs"
-        else:
-            return "local"
-
-    def to_json(self) -> str:
-        """Return timeseries configurations as JSON string."""
-        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
-
-    def __str__(self) -> str:
-        """Human readable string representation of configuration object: JSON string."""
-        return self.to_json()
-
-    def load(self, path: PathStr) -> None:
-        """Read the properties from a JSON file into a Config object."""
-        if path:
-            read_from_file = json.loads(fs.read_json(path))
-
-            self.bucket = read_from_file.get("bucket")
-            self.timeseries_root = read_from_file.get("timeseries_root")
-            self.product = read_from_file.get("product", "")
-            self.log_file = read_from_file.get("log_file", "")
-        else:
-            raise ValueError("Config.load(<path>) was called with an empty path.")
-
-    def save(self, path: PathStr = CONFIGURATION_FILE) -> None:
-        """Saves configurations to JSON file and set environment variable TIMESERIES_CONFIG to the location of the file.
-
-        Args:
-            path (PathStr): Full path of the JSON file to save to. Defaults to the value of the environment variable TIMESERIES_CONFIG.
-        """
-        fs.write_json(content=self.to_json(), path=path)
-        if HOME == JOVYAN:
-            # For some reason `os.environ["TIMESERIES_CONFIG"] = path` does not work:
-            cmd = f"export TIMESERIES_CONFIG={CONFIGURATION_FILE}"
-            os.system(cmd)
-            # os.system(f"echo '{cmd}' >> ~/.bashrc")
-        else:
-            os.environ["TIMESERIES_CONFIG"] = path
+            raise FileNotFoundError(
+                "Cfg.load() was called with an empty or invalid path."
+            )
 
-    def __set_default_config(self) -> None:
-        self.bucket = DEFAULT_BUCKET
-        self.configuration_file = DEFAULT_CONFIG_LOCATION
-        self.log_file = DEFAULT_LOG_FILE_LOCATION
-        self.product = ""
-        self.timeseries_root = DEFAULT_TIMESERIES_LOCATION
-        fs.touch(self.log_file)
+    def __dict__(self) -> dict[str, str]:
+        """Return timeseries configurations as dict."""
+        return asdict(self)
 
 
 CONFIG = Config(configuration_file=CONFIGURATION_FILE)
@@ -205,54 +114,58 @@ def main(*args: str | PathStr) -> None:
         ValueError: If args is not 'home' | 'gcs' | 'jovyan'.
 
     """
-    TIMESERIES_CONFIG = os.getenv("TIMESERIES_CONFIG", DEFAULT_CONFIG_LOCATION)
-    if not TIMESERIES_CONFIG:
-        print(
-            "Environvent variable TIMESERIES_CONFIG is empty. Using default: {DEFAULT_CONFIG_LOCATION}."
-        )
-        os.environ["TIMESERIES_CONFIG"] = DEFAULT_CONFIG_LOCATION
-        TIMESERIES_CONFIG = DEFAULT_CONFIG_LOCATION
-
     if args:
-        named_config = args[0]
+        config_identifier: PathStr = args[0]
     else:
-        named_config = sys.argv[1]
+        config_identifier = sys.argv[1]
 
     print(
-        f"Update configuration file TIMESERIES_CONFIG: {TIMESERIES_CONFIG}, with named presets: '{named_config}'."
+        f"Update configuration file TIMESERIES_CONFIG: {CONFIGURATION_FILE}, with named presets: '{config_identifier}'."
     )
-    match named_config:
+    match config_identifier:
         case "home":
+            identifier_is_named_option = True
             bucket = HOME
-            timeseries_root = os.path.join(HOME, "series_data")
-            log_file = DEFAULT_LOG_FILE_LOCATION
+            timeseries_root = fs.path(HOME, "series_data")
+            log_file = DEFAULTS["log_file"]
         case "gcs":
+            identifier_is_named_option = True
             bucket = GCS
-            timeseries_root = os.path.join(GCS, "series_data")
-            log_file = os.path.join(HOME, "logs", LOGFILE)
+            timeseries_root = fs.path(GCS, "series_data")
+            log_file = fs.path(HOME, "logs", LOGFILE)
         case "jovyan":
+            identifier_is_named_option = True
             bucket = JOVYAN
-            timeseries_root = os.path.join(JOVYAN, "series_data")
-            log_file = os.path.join(JOVYAN, "logs", LOGFILE)
+            timeseries_root = fs.path(JOVYAN, "series_data")
+            log_file = fs.path(JOVYAN, "logs", LOGFILE)
         case _:
-            raise ValueError(
-                f"Unrecognised named configuration preset '{named_config}'."
-            )
+            identifier_is_named_option = False
+            identifier_is_existing_file = fs.exists(config_identifier)
+            bucket = None
+
+    if identifier_is_named_option:
+        cfg = Config(
+            configuration_file=CONFIGURATION_FILE,
+            bucket=bucket,
+            timeseries_root=timeseries_root,
+            log_file=log_file,
+        )
+    elif identifier_is_existing_file:
+        cfg = Config(configuration_file=config_identifier)
+    else:
+        raise ValueError(
+            f"Unrecognised named configuration preset '{config_identifier}'."
+        )
 
-    cfg = Config(
-        configuration_file=TIMESERIES_CONFIG,
-        bucket=bucket,
-        timeseries_root=timeseries_root,
-        log_file=log_file,
-    )
-    cfg.save(TIMESERIES_CONFIG)
+    cfg.save(CONFIGURATION_FILE)
     print(cfg)
     print(os.getenv("TIMESERIES_CONFIG"))
 
 
 if __name__ == "__main__":
-    # Execute when called directly, ie not via import statements.
+    """Execute when called directly, ie not via import statements."""
     # ??? `poetry run timeseries-config <option>` does not appear to go this route.
+    # --> then it is not obvious that this is a good idea.
     print(f"Name of the script      : {sys.argv[0]=}")
     print(f"Arguments of the script : {sys.argv[1:]=}")
     main(sys.argv[1])
diff --git a/src/ssb_timeseries/fs.py b/src/ssb_timeseries/fs.py
index d54af94..990e932 100644
--- a/src/ssb_timeseries/fs.py
+++ b/src/ssb_timeseries/fs.py
@@ -72,6 +72,14 @@ def touch(path: PathStr) -> None:
         Path(path).touch()
 
 
+def path(*args: PathStr) -> str:
+    """Join args to form path. Make sure that gcs paths are begins with double slash: gs://..."""
+    p = Path(args[0]).joinpath(*args[1:])
+    return str(p).replace("gs:/", "gs://")
+    # Feels dirty. Could instead do something like:
+    # str(Path(args[0]).joinpath(*args[1:])).replace("gs:/{[a-z]}", "gs://{1}")
+
+
 def mkdir(path: PathStr) -> None:
     """Make directory regardless of filesystem is local or GCS."""
     # not good enough .. it is hard to distinguish between dirs and files that do not exist yet
@@ -83,7 +91,7 @@ def mkdir(path: PathStr) -> None:
 
 def mk_parent_dir(path: PathStr) -> None:
     """Ensure a parent directory exists. ... regardless of wether fielsystem is local or GCS."""
-    # wanted a mkdir that could work with both file and directory paths,
+    # wanted a mkdir that could work seamlessly with both file and directory paths,
     # but it is hard to distinguish between dirs and files that do not exist yet
     # --> use this to create parent directory for files, mkdir() when the last part of path is a directory
     if is_local(path):
@@ -151,7 +159,7 @@ def mv(from_path: PathStr, to_path: PathStr) -> None:
 
 
 def rm(path: PathStr) -> None:
-    """Remove file from local or GCS filesystem."""
+    """Remove file from local or GCS filesystem. Nonrecursive. For a recursive variant, see rmtree()."""
     if is_gcs(path):
         ...
         # TO DO: implement this (but recursive)
@@ -164,7 +172,7 @@ def rm(path: PathStr) -> None:
 def rmtree(
     path: str,
 ) -> None:
-    """Remove all directory and all its files and subdirectories regardless of local or GCS filesystem."""
+    """Recursively remove a directory and all its subdirectories and files regardless of local or GCS filesystem."""
     if is_gcs(path):
         ...
         # TO DO: implement this (but recursive)
diff --git a/src/ssb_timeseries/properties.py b/src/ssb_timeseries/properties.py
index 0596251..fafdbbf 100644
--- a/src/ssb_timeseries/properties.py
+++ b/src/ssb_timeseries/properties.py
@@ -19,19 +19,16 @@ def to_dict(cls) -> dict:
     @classmethod
     def keys(cls) -> list:
         """Returns a list of all the enum keys."""
-        # return cls._member_names_
         return [item.name for item in cls]
 
     @classmethod
     def values(cls) -> list:
         """Returns a list of all the enum values."""
-        # return list(cls._value2member_map_.keys)
         return [item.value[0] for item in cls]
 
     @classmethod
     def descriptions(cls) -> list:
         """Returns a list of descriptions for all enum values."""
-        # return list(cls._value2member_map_.keys())
         return [item.value[1] for item in cls]
 
     def __eq__(self, other: Self) -> bool:
diff --git a/tests/conftest.py b/tests/conftest.py
index d568052..81b294a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,22 +1,28 @@
 import inspect
-import os
+import logging
+from pathlib import Path
 
 import pytest
 
 from ssb_timeseries import config
-from ssb_timeseries.config import CONFIG as ORIGINAL_CONFIG
+from ssb_timeseries import fs
 from ssb_timeseries.dataset import Dataset
 from ssb_timeseries.dates import date_utc
-from ssb_timeseries.fs import rmtree
 from ssb_timeseries.properties import SeriesType
 from ssb_timeseries.sample_data import create_df
 
 # mypy: ignore-errors
 
-CONFIGURATION_FILE = ORIGINAL_CONFIG.configuration_file
+TEST_DIR = ""
 
 
 class Helpers:
+    configuration: config.Config = config.CONFIG
+
+    @staticmethod
+    def test_dir() -> str:
+        return TEST_DIR
+
     @staticmethod
     def function_name() -> str:
         return str(inspect.stack()[1][3])
@@ -28,40 +34,57 @@ def conftest() -> Helpers:
     return h
 
 
-@pytest.fixture(scope="function", autouse=False)
-def remember_config():
-    """A fixture to make sure that running tests do not change the configuration file."""
-    # config_file = os.getenv("TIMESERIES_CONFIG")
-    # if config_file:
-    # configuration = config.Config(configuration_file=config_file)
-    if CONFIGURATION_FILE:
-        configuration = ORIGINAL_CONFIG
+@pytest.fixture(scope="function", autouse=True)
+def reset_config_after():
+    cfg_file = config.CONFIGURATION_FILE
+    remembered_config = config.Config(cfg_file)
+    config.CONFIG = remembered_config
+    yield config.CONFIG
+    remembered_config.save(cfg_file)
+
+
+@pytest.fixture(scope="session", autouse=False)
+def buildup_and_teardown(tmp_path_factory, caplog):
+    """To make sure that tests do not change the configuration file."""
+    caplog.set_level(logging.DEBUG)
+    before_tests = config.CONFIG
+
+    if before_tests.configuration_file:
+        print(
+            f"Before running tests:\nTIMESERIES_CONFIG: {before_tests.configuration_file}:\n{before_tests.to_json()}"
+        )
+        cfg_file = Path(before_tests.configuration_file).name
+        config_file_for_testing = tmp_path_factory.mktemp("config") / cfg_file
+        config.CONFIG.configuration_file = config_file_for_testing
+        config.CONFIG.timeseries_root = tmp_path_factory.mktemp("series_data")
+        config.CONFIG.bucket = tmp_path_factory.mktemp("production-bucket")
+        config.CONFIG.save(config_file_for_testing)
+        Helpers.configuration = config.CONFIG
+
+    else:
         print(
-            f"Because TIMESERIES_CONFIG identifies a config file, before tests, read configuration: {configuration}"
+            f"No configuration file found before tests:\nTIMESERIES_CONFIG: {before_tests.configuration_file}\n..raise error?"
         )
 
+    print(f"Current configurations:\n{config.CONFIG}")
+
     # tests run here
-    yield
+    yield config.CONFIG
 
-    if CONFIGURATION_FILE and os.path.isfile(CONFIGURATION_FILE):
+    if config.CONFIG != before_tests:
         print(
-            f"To make sure the tests have not altered configurations:\n{config.Config()}"
+            f"Configurations was changed by tests:\n{config.CONFIG}\nReverting to original:\n{before_tests}"
+        )
+        before_tests.save(before_tests.configuration_file)
+    else:
+        print(
+            f"Final configurations after tests was identical to orginal:\n{config.CONFIG}\nReverting to original:\n{before_tests}"
         )
-        print(f"revert to what we read above:\n{configuration}")
-        configuration.save(CONFIGURATION_FILE)
-
-
-@pytest.fixture(scope="module", autouse=True)
-def print_stuff():
-    """Just testing pytest.fixtures."""
-    print("Before test module")
-    yield
-    print("After test modules")
 
 
 @pytest.fixture(scope="function", autouse=False)
 def existing_simple_set():
-    """A fixture to create simple dataset before running the test."""
+    """Create a simple dataset (and save so that files are existing) before running the test. Delete files afterwards."""
     # buildup: create dataset and save
     tags = {"A": ["a", "b", "c"], "B": ["p", "q", "r"], "C": ["x1", "y1", "z1"]}
     x = Dataset(
@@ -82,12 +105,12 @@ def existing_simple_set():
     yield x
 
     # teardown
-    rmtree(x.io.data_dir)
+    fs.rmtree(x.io.data_dir)
 
 
 @pytest.fixture(scope="function", autouse=False)
 def existing_estimate_set():
-    """A fixture to create simple dataset before running the test."""
+    """Create an estimeat (as_of_at) dataset (and save so that files are existing) before running the test. Delete files afterwards."""
     # buildup: create dataset and save
     tags = {"A": ["a", "b", "c"], "B": ["p", "q", "r"], "C": ["x1", "y1", "z1"]}
     x = Dataset(
@@ -109,7 +132,7 @@ def existing_estimate_set():
     yield x
 
     # teardown
-    rmtree(x.io.data_dir)
+    fs.rmtree(x.io.data_dir)
 
 
 @pytest.fixture(scope="function", autouse=False)
diff --git a/tests/test_config.py b/tests/test_config.py
index 8e938aa..99259f1 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,100 +1,133 @@
 import logging
-import os
 import uuid
 
-import pytest
-
 from ssb_timeseries import config
 from ssb_timeseries import fs
 from ssb_timeseries.logging import ts_logger
 
-# mypy: disable-error-code="no-untyped-def,type-var,arg-type,comparison-overlap"
+# NOSONAR
+# mypy: disable-error-code="no-untyped-def,type-var"
+
+CONFIGURATION_FILE = config.CONFIGURATION_FILE
+DEFAULT_TS_ROOT = config.DEFAULTS["timeseries_root"]
+
+
+def test_env_var_specifying_config_file_path(reset_config_after) -> None:
+
+    # neither approach works during tests?
+    # env_attempt_1 = os.getenv("TIMESERIES_CONFIG", "")
+    # if env_attempt_1:
+    #     cfg_file = env_attempt_1
+    # env_attempt_2 = os.environ["TIMESERIES_CONFIG"]
+    # elif env_attempt_2:
+    #     cfg_file = env_attempt_2
+    # else:
+    #
+    cfg_file = CONFIGURATION_FILE
 
+    assert cfg_file != ""
+    assert cfg_file == CONFIGURATION_FILE
+    # ... it seems that config.CONFIGURATION_FILE is more reliable than setting of env variables?
 
-HOME = os.getenv("HOME")
-TIMESERIES_CONFIG = os.getenv("TIMESERIES_CONFIG")
 
+def test_config_file_exists() -> None:
+    config_file = config.Config().configuration_file
+    assert fs.exists(config_file)
 
-def test_init_config_without_params(remember_config) -> None:
+
+def test_init_config_without_params(caplog) -> None:
+    caplog.set_level(logging.DEBUG)
     new_config = config.Config()
     ts_logger.debug(f"Created configuration: {new_config}")
     assert isinstance(new_config, config.Config)
+    for key in config.DEFAULTS.keys():
+        assert new_config[key] == config.DEFAULTS[key]
 
 
-def test_init_config_timeseries_in_shared_bucket_logs_in_jovyan_home(
-    remember_config,
-) -> None:
+def test_init_config_timeseries_in_shared_bucket_logs_in_jovyan_home(caplog) -> None:
+    caplog.set_level(logging.DEBUG)
     new_config = config.Config(
-        timeseries_root=config.GCS, log_file=config.DEFAULT_LOG_FILE_LOCATION
+        timeseries_root=config.GCS, log_file=config.DEFAULTS["log_file"]
     )
     ts_logger.debug(
         f"Created configuration: {new_config} with root {new_config.timeseries_root}"
     )
     assert isinstance(new_config, config.Config)
     assert new_config.timeseries_root == config.GCS
-    assert new_config.log_file == config.DEFAULT_LOG_FILE_LOCATION
+    assert new_config.log_file == config.DEFAULTS["log_file"]
 
 
-def test_config_change(remember_config) -> None:
+def test_config_defaults(reset_config_after) -> None:
 
+    default_config_file = config.DEFAULTS["configuration_file"]
     cfg_0 = config.Config()
-    assert isinstance(cfg_0, config.Config)
+    cfg_1 = config.Config(default_config_file)
+    cfg_2 = config.Config(configuration_file=default_config_file)
 
-    cfg_1 = config.Config(timeseries_root=config.DEFAULT_TIMESERIES_LOCATION)
-    cfg_1.save()
+    assert id(cfg_0) != id(cfg_1)
+    assert id(cfg_1) != id(cfg_2)
+    assert id(cfg_2) != id(cfg_0)
 
-    cfg_2 = config.Config()
-    assert isinstance(cfg_2, config.Config)
-    assert cfg_2.timeseries_root == config.DEFAULT_TIMESERIES_LOCATION
+    assert cfg_0.timeseries_root == DEFAULT_TS_ROOT
+    assert cfg_1.timeseries_root == DEFAULT_TS_ROOT
+    assert cfg_2.timeseries_root == DEFAULT_TS_ROOT
 
-    cfg_3 = config.Config(timeseries_root=config.GCS)
-    cfg_3.save()
 
-    cfg_4 = config.Config()
-    assert isinstance(cfg_4, config.Config)
-    assert cfg_4.timeseries_root == config.GCS
-    # reset to original config
-    cfg_0.save()
+def test_config_change(reset_config_after) -> None:
 
+    old = config.CONFIG
+    if old.timeseries_root == config.JOVYAN:
+        old.timeseries_root = config.GCS
+    else:
+        old.timeseries_root = config.JOVYAN
+    old.save()
+    new = config.Config(old.configuration_file)
+    # we should have both a new object (a new id) and a new path for timeseries_root
+    assert id(new) != id(old)
+    assert new.timeseries_root != old.timeseries_root
 
-def test_read_config_from_file(remember_config, print_stuff) -> None:
 
-    if fs.exists(TIMESERIES_CONFIG):
+def test_read_config_from_file() -> None:
+
+    if fs.exists(CONFIGURATION_FILE):
         ts_logger.debug(
-            f"Environment variable TIMESERIES_CONFIG was found: {TIMESERIES_CONFIG}"
+            f"Environment variable TIMESERIES_CONFIG was found: {CONFIGURATION_FILE}"
         )
-        configuration = config.Config(configuration_file=TIMESERIES_CONFIG)
+        configuration = config.Config(configuration_file=CONFIGURATION_FILE)
         assert isinstance(configuration, config.Config)
     else:
         new_config = config.Config(timeseries_root=config.GCS)
         ts_logger.debug(
-            f"Env variable pointed to non-existing configuration file: {TIMESERIES_CONFIG}. Using {new_config}."
+            f"Env variable pointed to non-existing configuration file: {CONFIGURATION_FILE}. Using {new_config}."
         )
         if isinstance(new_config, config.Config):
-            new_config.save(path=TIMESERIES_CONFIG)
+            new_config.save(path=CONFIGURATION_FILE)
             ts_logger.warning(
-                f"Configuration file did not exist: {TIMESERIES_CONFIG}. Created."
+                f"Configuration file did not exist: {CONFIGURATION_FILE}. Created."
             )
 
-        try_again = config.Config(configuration_file=TIMESERIES_CONFIG)
+        try_again = config.Config(configuration_file=CONFIGURATION_FILE)
         assert isinstance(try_again, config.Config)
 
 
-def test_read_config_from_missing_json_file(remember_config) -> None:
-    # setup: point to a config that does not exist (this should create the .json file):
-    tmp_config = os.path.join(os.getcwd(), f"timeseries_temp_config{uuid.uuid4()}.json")
-    configuration = config.Config(configuration_file=tmp_config, bucket=os.getcwd())
+def test_read_config_from_missing_json_file(
+    caplog, reset_config_after, conftest
+) -> None:
+    caplog.set_level(
+        logging.DEBUG
+    )  # setup: point to a config that does not exist (this should create the .json file):
+    test_dir = config.CONFIG.bucket
+    tmp_config = fs.path(test_dir, f"timeseries_temp_config{uuid.uuid4()}.json")
+    configuration = config.Config(
+        configuration_file=tmp_config,
+        bucket=test_dir,
+        timeseries_root=test_dir,
+    )
 
+    ts_logger.debug(
+        f"Using testdir: {test_dir}. Created configuration: {tmp_config}\n{configuration}"
+    )
     assert isinstance(configuration, config.Config)
-    assert configuration.bucket == os.getcwd()
-    assert configuration.log_file == config.DEFAULT_LOG_FILE_LOCATION
-
-    # teardown: remember_config fixture takes care of resetting, but we need to remove the temp file
-    fs.rm(tmp_config)
-
-
-@pytest.mark.skipif(HOME != "/home/bernhard", reason="None of your business.")
-def test_fail(remember_config, caplog):
-    caplog.set_level(logging.DEBUG)
-    ts_logger.debug("ts_logger.warning: std out")
-    ...
+    assert configuration.bucket == test_dir
+    assert configuration.timeseries_root == test_dir
+    assert configuration.log_file == config.DEFAULTS["log_file"]
diff --git a/tests/test_fs.py b/tests/test_fs.py
index 574e945..37452ae 100644
--- a/tests/test_fs.py
+++ b/tests/test_fs.py
@@ -45,6 +45,14 @@ def test_fs_type() -> None:
     assert fs.fs_type("/home/jovyan") == "local"
 
 
+def test_fs_path() -> None:
+    assert (
+        fs.path(BUCKET, "a", "b", "c")
+        == "gs://ssb-prod-dapla-felles-data-delt/poc-tidsserier/a/b/c"
+    )
+    assert fs.path(JOVYAN, "a", "b", "c") == "/home/jovyan/series_data/a/b/c"
+
+
 @pytest.mark.skipif(platform != "linux", reason="Can not see GCS.")
 def test_same_path() -> None:
     assert (

From 7d49acb7b617cfe3842cbc34948f532bd198b5a6 Mon Sep 17 00:00:00 2001
From: Bernhard Ryeng <bryeng@users.noreply.github.com>
Date: Tue, 28 May 2024 03:48:09 +0200
Subject: [PATCH 3/4] Fix plotting for sets with temporality from_to.

---
 src/ssb_timeseries/dataset.py  | 32 ++++++++++++++++++++++++++++++--
 tests/conftest.py              | 14 ++++++++------
 tests/test_dataset_plotting.py | 33 ++++++++++++++-------------------
 3 files changed, 52 insertions(+), 27 deletions(-)

diff --git a/src/ssb_timeseries/dataset.py b/src/ssb_timeseries/dataset.py
index 3832741..d9b3e8a 100644
--- a/src/ssb_timeseries/dataset.py
+++ b/src/ssb_timeseries/dataset.py
@@ -396,10 +396,38 @@ def plot(self, *args: Any, **kwargs: Any) -> Any:
 
         Convenience wrapper around Dataframe.plot() with sensible defaults.
         """
-        xlabels = self.datetime_columns()[0]
+        df = self.data.copy()
+
+        if self.data_type.temporality == properties.Temporality.FROM_TO:
+            interval_handling = kwargs.pop("interval_handling", "interval").lower()
+            match interval_handling:
+                case "interval":
+                    from_data = df
+                    to_data = df
+                    from_data["valid_to"] = from_data["valid_from"]
+                    df = pd.concat(
+                        [from_data, to_data],
+                        axis=0,
+                        ignore_index=True,
+                    ).sort_values(by=["valid_from", "valid_to"])
+                    df.drop(columns=["valid_to"], inplace=True)
+                    xlabels = "valid_from"
+                case "midpoint":
+                    xlabels = "midpoint"
+                    df["midpoint"] = df[self.datetime_columns()].median(axis=1)
+                    df.drop(columns=["valid_from", "valid_to"], inplace=True)
+
+                case _:
+                    raise ValueError(
+                        "Invalid option for interval_handling. Must be 'from', 'to', 'interval' or 'midpoint'."
+                    )
+        else:
+            xlabels = self.datetime_columns()[0]
+
         ts_logger.debug(f"DATASET.plot(): x labels = {xlabels}")
         ts_logger.debug(f"Dataset.plot({args!r}, {kwargs!r}) x-labels {xlabels}")
-        return self.data.plot(  # type: ignore[call-overload]
+
+        return df.plot(  # type: ignore[call-overload]
             xlabels,
             *args,
             legend=len(self.data.columns) < 9,
diff --git a/tests/conftest.py b/tests/conftest.py
index 81b294a..8c6663f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -142,7 +142,7 @@ def new_dataset_none_at():
     tags = {"A": ["a", "b", "c"], "B": ["p", "q", "r"], "C": ["x1", "y1", "z1"]}
     tag_values = [value for value in tags.values()]
     x = Dataset(
-        name="test-existing-simple-dataset",
+        name="test-existing-dataset-none-at",
         data_type=SeriesType.simple(),
         series_tags={"D": "d"},
         data=create_df(
@@ -166,7 +166,7 @@ def new_dataset_as_of_at():
     tags = {"A": ["a", "b", "c"], "B": ["p", "q", "r"], "C": ["x1", "y1", "z1"]}
     tag_values = [value for value in tags.values()]
     x = Dataset(
-        name="test-existing-simple-dataset",
+        name="test-existing-dataset-as-of-at",
         data_type=SeriesType.estimate(),
         as_of_tz=date_utc("2022-01-01"),
         series_tags={"D": "d"},
@@ -190,8 +190,8 @@ def new_dataset_as_of_from_to():
     tags = {"A": ["a", "b", "c"], "B": ["p", "q", "r"], "C": ["x1", "y1", "z1"]}
     tag_values = [value for value in tags.values()]
     x = Dataset(
-        name="test-existing-simple-dataset",
-        data_type=SeriesType.estimate(),
+        name="test-existing-as-of-from-to",
+        data_type=SeriesType.as_of_from_to(),
         as_of_tz=date_utc("2022-01-01"),
         series_tags={"D": "d"},
         data=create_df(
@@ -199,6 +199,7 @@ def new_dataset_as_of_from_to():
             start_date="2022-01-01",
             end_date="2022-10-03",
             freq="MS",
+            temporality="FROM_TO",
         ),
         name_pattern=["A", "B", "C"],
     )
@@ -215,14 +216,15 @@ def new_dataset_none_from_to():
     tags = {"A": ["a", "b", "c"], "B": ["p", "q", "r"], "C": ["x1", "y1", "z1"]}
     tag_values = [value for value in tags.values()]
     x = Dataset(
-        name="test-existing-simple-dataset",
-        data_type=SeriesType.estimate(),
+        name="test-existing-dataset-none-from-to",
+        data_type=SeriesType.from_to(),
         series_tags={"D": "d"},
         data=create_df(
             *tag_values,
             start_date="2022-01-01",
             end_date="2022-10-03",
             freq="MS",
+            temporality="FROM_TO",
         ),
         name_pattern=["A", "B", "C"],
     )
diff --git a/tests/test_dataset_plotting.py b/tests/test_dataset_plotting.py
index e75b4d9..bae78bd 100644
--- a/tests/test_dataset_plotting.py
+++ b/tests/test_dataset_plotting.py
@@ -11,26 +11,26 @@
 # arg-type,,union-attr
 
 
-def test_dataset_none_at_plot_returns_axes(
+def test_dataset_plot_none_at_returns_axes(
     new_dataset_none_at: Dataset, caplog: LogCaptureFixture
 ):
     caplog.set_level(logging.DEBUG)
 
-    x = new_dataset_none_at
-    y = x.plot()
+    p = new_dataset_none_at.plot()
+    ts_logger.debug(f"p = dataset.plot(): {p}")
 
-    assert isinstance(y, plt.Axes)
+    assert isinstance(p, plt.Axes)
 
 
-def test_dataset_plot_as_of_at_plot_returns_axes(
+def test_dataset_plot_as_of_at_returns_axes(
     new_dataset_as_of_at: Dataset, caplog: LogCaptureFixture
 ):
     caplog.set_level(logging.DEBUG)
 
-    x = new_dataset_as_of_at
-    y = x.plot()
+    p = new_dataset_as_of_at.plot()
+    ts_logger.debug(f"p = dataset.plot(): {p}")
 
-    assert isinstance(y, plt.Axes)
+    assert isinstance(p, plt.Axes)
 
 
 def test_dataset_plot_as_of_from_to_returns_axes(
@@ -38,11 +38,10 @@ def test_dataset_plot_as_of_from_to_returns_axes(
 ):
     caplog.set_level(logging.DEBUG)
 
-    x = new_dataset_as_of_from_to
-    y = x.plot()
-    ts_logger.debug(f"y = x.filter(regex='^x')\n{y}")
+    p = new_dataset_as_of_from_to.plot()
+    ts_logger.debug(f"p = dataset.plot(): {p}")
 
-    assert isinstance(y, plt.Axes)
+    assert isinstance(p, plt.Axes)
 
 
 def test_dataset_plot_none_from_to_returns_axes(
@@ -50,11 +49,7 @@ def test_dataset_plot_none_from_to_returns_axes(
 ):
     caplog.set_level(logging.DEBUG)
 
-    x = new_dataset_none_from_to
-    y = x.plot()
-    ts_logger.debug(f"y = x.filter(regex='^x')\n{y}")
+    p = new_dataset_none_from_to.plot()
+    ts_logger.debug(f"p = dataset.plot(): {p}")
 
-    assert isinstance(y, plt.Axes)
-
-
-# @pytest.mark.skip(reason="TODO: revisit dataset.__repr__.")
+    assert isinstance(p, plt.Axes)

From ee15337bb887b2c5571b286e80346706eb92b1fc Mon Sep 17 00:00:00 2001
From: Bernhard Ryeng <bryeng@users.noreply.github.com>
Date: Tue, 28 May 2024 08:48:34 +0200
Subject: [PATCH 4/4] Minor update to documentation.

---
 tests/test_config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_config.py b/tests/test_config.py
index 99259f1..645c185 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -60,6 +60,7 @@ def test_init_config_timeseries_in_shared_bucket_logs_in_jovyan_home(caplog) ->
 def test_config_defaults(reset_config_after) -> None:
 
     default_config_file = config.DEFAULTS["configuration_file"]
+    # all of these should result in the same information, but in different objects
     cfg_0 = config.Config()
     cfg_1 = config.Config(default_config_file)
     cfg_2 = config.Config(configuration_file=default_config_file)