From e2ac8e7e558dd0bf6699bd31afd24357d20a0f6d Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Mon, 18 Dec 2023 14:57:17 -0800
Subject: [PATCH 01/19] Start of sim archiving

---
 rubin_scheduler/sim_archive/sim_archive.py | 230 +++++++++++++++++++++
 tests/sim_archive/test_sim_archive.py      |  59 ++++++
 2 files changed, 289 insertions(+)
 create mode 100644 rubin_scheduler/sim_archive/sim_archive.py
 create mode 100644 tests/sim_archive/test_sim_archive.py

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
new file mode 100644
index 00000000..f6cbccf2
--- /dev/null
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -0,0 +1,230 @@
+import datetime
+import hashlib
+import json
+import os
+import shutil
+import socket
+import sys
+from numbers import Number
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from contextlib import redirect_stdout
+
+import lsst.resources
+import numpy as np
+import yaml
+from astropy.time import Time
+
+from conda.exceptions import EnvironmentLocationNotFound
+from conda.gateways.disk.test import is_conda_environment
+from conda.cli.main_list import print_packages
+
+import rubin_scheduler
+from rubin_scheduler.scheduler.utils import SchemaConverter
+from rubin_scheduler.utils import Site
+
+SITE = None
+
+
+def make_sim_archive_dir(
+    observations, reward_df=None, obs_rewards=None, in_files={}, sim_runner_kwargs={}, data_path=None
+):
+    """Create or fill a local simulation archive directory.
+
+    Parameters
+    ----------
+    observations : `numpy.recarray`
+        The observations data, in the "obs" format as accepted and created by
+        `rubin_scheduler.scheduler.utils.SchemaConverter`.
+    reward_df : `pandas.DataFrame`, optional
+        The reward data, by default None.
+    obs_rewards : `pandas.DataFrame`, optional
+        The observation rewards data, by default None.
+    in_files : `dict`, optional
+        Additional input files to be included in the archive, by default {}.
+    sim_runner_kwargs : `dict`, optional
+        Additional simulation runner keyword arguments, by default {}.
+    data_path : `str` or `pathlib.Path`, optional
+        The path to the simulation archive directory, by default None.
+
+    Returns
+    -------
+    data_dir : `pathlib.Path` or `tempfile.TemporaryDirectory`
+        The temporary directory containing the simulation archive.
+    """
+    if data_path is None:
+        data_dir = TemporaryDirectory()
+        data_path = Path(data_dir.name)
+    else:
+        data_dir = None
+
+        if not isinstance(data_path, Path):
+            data_path = Path(data_path)
+
+    files = {}
+
+    # Save the observations
+    files["observations"] = {"name": "opsim.db"}
+    opsim_output_fname = data_path.joinpath(files["observations"]["name"])
+    SchemaConverter().obs2opsim(observations, filename=opsim_output_fname)
+
+    # Save the rewards
+    if reward_df is not None and obs_rewards is not None:
+        files["rewards"] = {"name": "rewards.h5"}
+        rewards_fname = data_path.joinpath(files["rewards"]["name"])
+        if reward_df is not None:
+            reward_df.to_hdf(rewards_fname, "reward_df")
+        if obs_rewards is not None:
+            obs_rewards.to_hdf(rewards_fname, "obs_rewards")
+
+    # Save basic statistics
+    files["statistics"] = {"name": "obs_stats.txt"}
+    stats_fname = data_path.joinpath(files["statistics"]["name"])
+    with open(stats_fname, "w") as stats_io:
+        print(SchemaConverter().obs2opsim(observations).describe().T.to_csv(sep="\t"), file=stats_io)
+
+
+    # Save the conda environment
+    conda_prefix = Path(sys.executable).parent.parent.as_posix()
+    if not is_conda_environment(conda_prefix):
+        raise EnvironmentLocationNotFound(conda_prefix)
+
+    environment_fname = data_path.joinpath('environment.txt').as_posix()
+
+    # Python equivilest of conda list --export -p $conda_prefix > $environment_fname
+    with open(environment_fname, 'w') as environment_io:
+        with redirect_stdout(environment_io):
+            print_packages(conda_prefix, format='export')
+
+    # Save pypi packages
+    pypi_fname = data_path.joinpath('pypi.json').as_posix()
+
+    pip_json_output = os.popen('pip list --format json')
+    pip_list = json.loads(pip_json_output.read())
+
+    with open(pypi_fname, 'w') as pypi_io:
+        print(json.dumps(pip_list, indent=4), file=pypi_io)
+
+    # Add supplied files
+    for file_type, fname in in_files.items():
+        files[file_type] = {"name": Path(fname).name}
+        try:
+            shutil.copyfile(fname, data_path.joinpath(files[file_type]["name"]))
+        except shutil.SameFileError:
+            pass
+
+    # Add file hashes
+    for file_type in files:
+        fname = data_path.joinpath(files[file_type]["name"])
+        with open(fname, "rb") as file_io:
+            content = file_io.read()
+
+        files[file_type]["md5"] = hashlib.md5(content).hexdigest()
+
+    # Metadata
+    # To use a different site, a user can set the global variable SITE.
+    site = Site(name="LSST") if SITE is None else SITE
+
+    def evening_local_date(mjd, longitude=site.longitude):
+        evening_local_mjd = np.floor(mjd + longitude / 360 - 0.5).astype(int)
+        evening_local_iso = Time(evening_local_mjd, format="mjd").iso[:10]
+        return evening_local_iso
+
+    opsim_metadata = {}
+    opsim_metadata["scheduler_version"] = rubin_scheduler.__version__
+    opsim_metadata["host"] = socket.getfqdn()
+    opsim_metadata["username"] = os.environ["USER"]
+
+    simulation_dates = {}
+    if "mjd_start" in sim_runner_kwargs:
+        simulation_dates["start"] = evening_local_date(sim_runner_kwargs["mjd_start"])
+
+        if "survey_length" in sim_runner_kwargs:
+            simulation_dates["end"] = evening_local_date(
+                sim_runner_kwargs["mjd_start"] + sim_runner_kwargs["survey_length"]
+            )
+    else:
+        simulation_dates["start"] = evening_local_date(observations["mjd"].min())
+        simulation_dates["end"] = evening_local_date(observations["mjd"].max())
+
+    if len(sim_runner_kwargs) > 0:
+        opsim_metadata["sim_runner_kwargs"] = {}
+        for key, value in sim_runner_kwargs.items():
+            match value:
+                case bool() | Number() | str():
+                    opsim_metadata["sim_runner_kwargs"][key] = value
+                case _:
+                    opsim_metadata["sim_runner_kwargs"][key] = str(value)
+
+    opsim_metadata["simulated_dates"] = simulation_dates
+    opsim_metadata["files"] = files
+
+    sim_metadata_fname = data_path.joinpath("sim_metadata.yaml")
+    with open(sim_metadata_fname, "w") as sim_metadata_io:
+        print(yaml.dump(opsim_metadata, indent=4), file=sim_metadata_io)
+
+    files["metadata"] = {"name": sim_metadata_fname}
+
+    if data_dir is not None:
+        # If we created a temporary directory, if we do not return it, it
+        # will get automatically cleaned up, losing our work.
+        # So, return it.
+        return data_dir
+
+    return data_path
+
+
+def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-prenight/opsim/"):
+    """Transfer the contents of an archive directory to an resource.
+
+    Parameters:
+    ----------
+    archive_dir : `str`
+        The path to the archive directory containing the files to be transferred.
+    archive_base_uri : `str`, optional
+        The base URI where the archive files will be transferred to. Default is "s3://rubin-scheduler-prenight/opsim/".
+
+    Returns:
+    -------
+    resource_rpath : `lsst.resources.ResourcePath`
+        The destination resource.
+    """
+
+    metadata_fname = Path(archive_dir).joinpath("sim_metadata.yaml")
+    with open(metadata_fname, "r") as metadata_io:
+        sim_metadata = yaml.safe_load(metadata_io)
+
+    insert_date = datetime.datetime.utcnow().date().isoformat()
+    insert_date_rpath = lsst.resources.ResourcePath(archive_base_uri).join(insert_date, forceDirectory=True)
+    if not insert_date_rpath.exists():
+        insert_date_rpath.mkdir()
+
+    # Number the sims in the insert date dir by
+    # looing for all the interger directories, and choosing the next one.
+    found_ids = []
+    for base_dir, found_dirs, found_files in insert_date_rpath.walk():
+        if base_dir == insert_date_rpath:
+            for found_dir in found_dirs:
+                try:
+                    found_ids.append(int(found_dir[:-1]))
+                except ValueError:
+                    pass
+
+    new_id = max(found_ids) + 1 if len(found_ids) > 0 else 1
+    resource_rpath = insert_date_rpath.join(f"{new_id}", forceDirectory=True)
+    resource_rpath.mkdir()
+
+    # Include the metadata file itself.
+    sim_metadata["files"]["metadata"] = {"name": "sim_metadata.yaml"}
+
+    for file_info in sim_metadata["files"].values():
+        source_fname = Path(archive_dir).joinpath(file_info["name"])
+        with open(source_fname, "rb") as source_io:
+            content = source_io.read()
+
+        destination_rpath = resource_rpath.join(file_info["name"])
+        destination_rpath.write(content)
+
+        print(f"Copied {source_fname} to {destination_rpath}")
+
+    return resource_rpath
diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py
new file mode 100644
index 00000000..06d592da
--- /dev/null
+++ b/tests/sim_archive/test_sim_archive.py
@@ -0,0 +1,59 @@
+import lzma
+import pickle
+import unittest
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from rubin_scheduler.scheduler import sim_runner
+from rubin_scheduler.scheduler.example import example_scheduler
+from rubin_scheduler.scheduler.model_observatory import ModelObservatory
+from rubin_scheduler.sim_archive.sim_archive import make_sim_archive_dir, transfer_archive_dir
+from rubin_scheduler.utils import survey_start_mjd
+
+TEST_SIM_DATE = "2025-01-01"
+
+
+class TestSimArchive(unittest.TestCase):
+    def test_sim_archive(self):
+        # Begin by running a short simulation
+        mjd_start = survey_start_mjd()
+        survey_length = 1  # days
+        scheduler = example_scheduler(mjd_start=mjd_start)
+        scheduler.keep_rewards = True
+        observatory = ModelObservatory(mjd_start=mjd_start)
+
+        # Record the state of the scheduler at the start of the sim.
+        data_dir = TemporaryDirectory()
+        data_path = Path(data_dir.name)
+
+        scheduler_fname = data_path.joinpath("scheduler.pickle.xz")
+        with lzma.open(scheduler_fname, "wb", format=lzma.FORMAT_XZ) as pio:
+            pickle.dump(scheduler, pio)
+
+        files_to_archive = {"scheduler": scheduler_fname}
+
+        # Run the simulation
+        sim_runner_kwargs = {
+            "mjd_start": mjd_start,
+            "survey_length": survey_length,
+            "record_rewards": True,
+        }
+
+        observatory, scheduler, observations, reward_df, obs_rewards = sim_runner(
+            observatory, scheduler, **sim_runner_kwargs
+        )
+
+        # Make the scratch sim archive
+        make_sim_archive_dir(
+            observations,
+            reward_df=reward_df,
+            obs_rewards=obs_rewards,
+            in_files=files_to_archive,
+            sim_runner_kwargs=sim_runner_kwargs,
+            data_path=data_path,
+        )
+
+        # Move the scratch sim archive to a test resource
+        test_resource_dir = TemporaryDirectory()
+        test_resource_uri = "file://" + test_resource_dir.name
+        transfer_archive_dir(data_dir.name, test_resource_uri)

From 503ff550b9d9da11ec2110f5cf6fc4e3c5f3f0e6 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Tue, 19 Dec 2023 11:30:22 -0800
Subject: [PATCH 02/19] Add opsim archive validation function

---
 rubin_scheduler/sim_archive/sim_archive.py | 52 ++++++++++++++++------
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index f6cbccf2..54b5ee21 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -5,19 +5,18 @@
 import shutil
 import socket
 import sys
+from contextlib import redirect_stdout
 from numbers import Number
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from contextlib import redirect_stdout
 
-import lsst.resources
 import numpy as np
 import yaml
 from astropy.time import Time
-
+from conda.cli.main_list import print_packages
 from conda.exceptions import EnvironmentLocationNotFound
 from conda.gateways.disk.test import is_conda_environment
-from conda.cli.main_list import print_packages
+from lsst.resources import ResourcePath
 
 import rubin_scheduler
 from rubin_scheduler.scheduler.utils import SchemaConverter
@@ -83,26 +82,25 @@ def make_sim_archive_dir(
     with open(stats_fname, "w") as stats_io:
         print(SchemaConverter().obs2opsim(observations).describe().T.to_csv(sep="\t"), file=stats_io)
 
-
     # Save the conda environment
     conda_prefix = Path(sys.executable).parent.parent.as_posix()
     if not is_conda_environment(conda_prefix):
         raise EnvironmentLocationNotFound(conda_prefix)
 
-    environment_fname = data_path.joinpath('environment.txt').as_posix()
+    environment_fname = data_path.joinpath("environment.txt").as_posix()
 
     # Python equivilest of conda list --export -p $conda_prefix > $environment_fname
-    with open(environment_fname, 'w') as environment_io:
+    with open(environment_fname, "w") as environment_io:
         with redirect_stdout(environment_io):
-            print_packages(conda_prefix, format='export')
+            print_packages(conda_prefix, format="export")
 
     # Save pypi packages
-    pypi_fname = data_path.joinpath('pypi.json').as_posix()
+    pypi_fname = data_path.joinpath("pypi.json").as_posix()
 
-    pip_json_output = os.popen('pip list --format json')
+    pip_json_output = os.popen("pip list --format json")
     pip_list = json.loads(pip_json_output.read())
 
-    with open(pypi_fname, 'w') as pypi_io:
+    with open(pypi_fname, "w") as pypi_io:
         print(json.dumps(pip_list, indent=4), file=pypi_io)
 
     # Add supplied files
@@ -186,7 +184,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre
 
     Returns:
     -------
-    resource_rpath : `lsst.resources.ResourcePath`
+    resource_rpath : `ResourcePath`
         The destination resource.
     """
 
@@ -195,7 +193,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre
         sim_metadata = yaml.safe_load(metadata_io)
 
     insert_date = datetime.datetime.utcnow().date().isoformat()
-    insert_date_rpath = lsst.resources.ResourcePath(archive_base_uri).join(insert_date, forceDirectory=True)
+    insert_date_rpath = ResourcePath(archive_base_uri).join(insert_date, forceDirectory=True)
     if not insert_date_rpath.exists():
         insert_date_rpath.mkdir()
 
@@ -228,3 +226,31 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre
         print(f"Copied {source_fname} to {destination_rpath}")
 
     return resource_rpath
+
+
+def check_opsim_archive_resource(archive_uri):
+    """Check the contents of an opsim archive resource.
+
+    Parameters:
+    ----------
+    archive_uri : `str`
+        The URI of the archive resource to be checked.
+
+    Returns:
+    -------
+    validity: `dict`
+        A dictionary of files checked, and their validity.
+    """
+    metadata_path = ResourcePath(archive_uri).join("sim_metadata.yaml")
+    with metadata_path.open(mode="r") as metadata_io:
+        sim_metadata = yaml.safe_load(metadata_io)
+
+    results = {}
+
+    for file_info in sim_metadata["files"].values():
+        resource_path = ResourcePath(archive_uri).join(file_info["name"])
+        content = resource_path.read()
+
+        results[file_info["name"]] = file_info["md5"] == hashlib.md5(content).hexdigest()
+
+    return results

From 7736e46c3001fedaed22df3fb648e4fb0395cb39 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Tue, 19 Dec 2023 11:30:49 -0800
Subject: [PATCH 03/19] Add validation and contents to sim archive tests

---
 tests/sim_archive/test_sim_archive.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py
index 06d592da..4eec5a33 100644
--- a/tests/sim_archive/test_sim_archive.py
+++ b/tests/sim_archive/test_sim_archive.py
@@ -7,11 +7,13 @@
 from rubin_scheduler.scheduler import sim_runner
 from rubin_scheduler.scheduler.example import example_scheduler
 from rubin_scheduler.scheduler.model_observatory import ModelObservatory
-from rubin_scheduler.sim_archive.sim_archive import make_sim_archive_dir, transfer_archive_dir
+from rubin_scheduler.sim_archive.sim_archive import (
+    check_opsim_archive_resource,
+    make_sim_archive_dir,
+    transfer_archive_dir,
+)
 from rubin_scheduler.utils import survey_start_mjd
 
-TEST_SIM_DATE = "2025-01-01"
-
 
 class TestSimArchive(unittest.TestCase):
     def test_sim_archive(self):
@@ -56,4 +58,12 @@ def test_sim_archive(self):
         # Move the scratch sim archive to a test resource
         test_resource_dir = TemporaryDirectory()
         test_resource_uri = "file://" + test_resource_dir.name
-        transfer_archive_dir(data_dir.name, test_resource_uri)
+        sim_archive_uri = transfer_archive_dir(data_dir.name, test_resource_uri)
+
+        # Check the saved archive
+        archive_check = check_opsim_archive_resource(sim_archive_uri)
+        self.assertEqual(
+            archive_check.keys(), set(["opsim.db", "rewards.h5", "scheduler.pickle.xz", "obs_stats.txt"])
+        )
+        for value in archive_check.values():
+            self.assertTrue(value)

From 9b56db7cb0e170b70880141b10267c29b9836b52 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Tue, 19 Dec 2023 12:27:03 -0800
Subject: [PATCH 04/19] include environment files in opsim archive

---
 rubin_scheduler/sim_archive/sim_archive.py | 21 ++++++++++++---------
 tests/sim_archive/test_sim_archive.py      | 12 +++++++++++-
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 54b5ee21..1751980f 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -14,7 +14,6 @@
 import yaml
 from astropy.time import Time
 from conda.cli.main_list import print_packages
-from conda.exceptions import EnvironmentLocationNotFound
 from conda.gateways.disk.test import is_conda_environment
 from lsst.resources import ResourcePath
 
@@ -84,18 +83,20 @@ def make_sim_archive_dir(
 
     # Save the conda environment
     conda_prefix = Path(sys.executable).parent.parent.as_posix()
-    if not is_conda_environment(conda_prefix):
-        raise EnvironmentLocationNotFound(conda_prefix)
+    if is_conda_environment(conda_prefix):
+        conda_base_fname = 'environment.txt'
+        environment_fname = data_path.joinpath(conda_base_fname).as_posix()
 
-    environment_fname = data_path.joinpath("environment.txt").as_posix()
+        # Python equivilest of conda list --export -p $conda_prefix > $environment_fname
+        with open(environment_fname, "w") as environment_io:
+            with redirect_stdout(environment_io):
+                print_packages(conda_prefix, format="export")
 
-    # Python equivilest of conda list --export -p $conda_prefix > $environment_fname
-    with open(environment_fname, "w") as environment_io:
-        with redirect_stdout(environment_io):
-            print_packages(conda_prefix, format="export")
+        files['environment'] = {'name': conda_base_fname}
 
     # Save pypi packages
-    pypi_fname = data_path.joinpath("pypi.json").as_posix()
+    pypi_base_fname = 'pypi.json'
+    pypi_fname = data_path.joinpath(pypi_base_fname).as_posix()
 
     pip_json_output = os.popen("pip list --format json")
     pip_list = json.loads(pip_json_output.read())
@@ -103,6 +104,8 @@ def make_sim_archive_dir(
     with open(pypi_fname, "w") as pypi_io:
         print(json.dumps(pip_list, indent=4), file=pypi_io)
 
+    files['pypi'] = {'name': pypi_base_fname}
+
     # Add supplied files
     for file_type, fname in in_files.items():
         files[file_type] = {"name": Path(fname).name}
diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py
index 4eec5a33..0db95ace 100644
--- a/tests/sim_archive/test_sim_archive.py
+++ b/tests/sim_archive/test_sim_archive.py
@@ -63,7 +63,17 @@ def test_sim_archive(self):
         # Check the saved archive
         archive_check = check_opsim_archive_resource(sim_archive_uri)
         self.assertEqual(
-            archive_check.keys(), set(["opsim.db", "rewards.h5", "scheduler.pickle.xz", "obs_stats.txt"])
+            archive_check.keys(),
+            set(
+                [
+                    "opsim.db",
+                    "rewards.h5",
+                    "scheduler.pickle.xz",
+                    "obs_stats.txt",
+                    "environment.txt",
+                    "pypi.json",
+                ]
+            ),
         )
         for value in archive_check.values():
             self.assertTrue(value)

From 0e95d4f252dccd4565eee9b8119fa9d5736ae17a Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Wed, 20 Dec 2023 08:14:18 -0800
Subject: [PATCH 05/19] Add tags and label to sim metadata

---
 rubin_scheduler/sim_archive/sim_archive.py | 30 ++++++++++++++++++----
 tests/sim_archive/test_sim_archive.py      |  2 ++
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 1751980f..d04d0b56 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -25,7 +25,14 @@
 
 
 def make_sim_archive_dir(
-    observations, reward_df=None, obs_rewards=None, in_files={}, sim_runner_kwargs={}, data_path=None
+    observations,
+    reward_df=None,
+    obs_rewards=None,
+    in_files={},
+    sim_runner_kwargs={},
+    tags=[],
+    label=None,
+    data_path=None,
 ):
     """Create or fill a local simulation archive directory.
 
@@ -42,6 +49,10 @@ def make_sim_archive_dir(
         Additional input files to be included in the archive, by default {}.
     sim_runner_kwargs : `dict`, optional
         Additional simulation runner keyword arguments, by default {}.
+    tags : `list` [`str`], optional
+        A list of tags/keywords to be included in the metadata, by default [].
+    label : `str`, optional
+        A label to be included in the metadata, by default None.
     data_path : `str` or `pathlib.Path`, optional
         The path to the simulation archive directory, by default None.
 
@@ -84,7 +95,7 @@ def make_sim_archive_dir(
     # Save the conda environment
     conda_prefix = Path(sys.executable).parent.parent.as_posix()
     if is_conda_environment(conda_prefix):
-        conda_base_fname = 'environment.txt'
+        conda_base_fname = "environment.txt"
         environment_fname = data_path.joinpath(conda_base_fname).as_posix()
 
         # Python equivilest of conda list --export -p $conda_prefix > $environment_fname
@@ -92,10 +103,10 @@ def make_sim_archive_dir(
             with redirect_stdout(environment_io):
                 print_packages(conda_prefix, format="export")
 
-        files['environment'] = {'name': conda_base_fname}
+        files["environment"] = {"name": conda_base_fname}
 
     # Save pypi packages
-    pypi_base_fname = 'pypi.json'
+    pypi_base_fname = "pypi.json"
     pypi_fname = data_path.joinpath(pypi_base_fname).as_posix()
 
     pip_json_output = os.popen("pip list --format json")
@@ -104,7 +115,7 @@ def make_sim_archive_dir(
     with open(pypi_fname, "w") as pypi_io:
         print(json.dumps(pip_list, indent=4), file=pypi_io)
 
-    files['pypi'] = {'name': pypi_base_fname}
+    files["pypi"] = {"name": pypi_base_fname}
 
     # Add supplied files
     for file_type, fname in in_files.items():
@@ -160,6 +171,15 @@ def evening_local_date(mjd, longitude=site.longitude):
     opsim_metadata["simulated_dates"] = simulation_dates
     opsim_metadata["files"] = files
 
+    if len(tags) > 0:
+        for tag in tags:
+            assert isinstance(tag, str), "Tags must be strings."
+        opsim_metadata["tags"] = tags
+
+    if label is not None:
+        assert isinstance(label, str), "The sim label must be a string."
+        opsim_metadata["label"] = label
+
     sim_metadata_fname = data_path.joinpath("sim_metadata.yaml")
     with open(sim_metadata_fname, "w") as sim_metadata_io:
         print(yaml.dump(opsim_metadata, indent=4), file=sim_metadata_io)
diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py
index 0db95ace..83dbcc74 100644
--- a/tests/sim_archive/test_sim_archive.py
+++ b/tests/sim_archive/test_sim_archive.py
@@ -52,6 +52,8 @@ def test_sim_archive(self):
             obs_rewards=obs_rewards,
             in_files=files_to_archive,
             sim_runner_kwargs=sim_runner_kwargs,
+            tags=["test"],
+            label="test",
             data_path=data_path,
         )
 

From 0983f8de2289b6aca4ed890ce9348c1dabaeb5e9 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Wed, 20 Dec 2023 09:53:01 -0800
Subject: [PATCH 06/19] add func for querying sim archive metadata

---
 rubin_scheduler/sim_archive/sim_archive.py | 65 ++++++++++++++++++++++
 tests/sim_archive/test_sim_archive.py      |  7 +++
 2 files changed, 72 insertions(+)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index d04d0b56..7867480e 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -277,3 +277,68 @@ def check_opsim_archive_resource(archive_uri):
         results[file_info["name"]] = file_info["md5"] == hashlib.md5(content).hexdigest()
 
     return results
+
+
+def _build_archived_sim_label(base_uri, metadata_resource, metadata):
+    label_base = metadata_resource.dirname().geturl().removeprefix(base_uri).rstrip("/").lstrip("/")
+
+    # If a label is supplied by the metadata, use it
+    if "label" in metadata:
+        label = f"{label_base} {metadata['label']}"
+        return label
+
+    try:
+        sim_dates = metadata["simulated_dates"]
+        start_date = sim_dates["start"]
+        end_date = sim_dates["end"]
+        label = f"{label_base} of {start_date}"
+        if end_date != start_date:
+            label = f"{label} to {end_date}"
+    except KeyError:
+        label = label_base
+
+    if "scheduler_version" in metadata:
+        label = f"{label} with {metadata['scheduler_version']}"
+
+    return label
+
+
+def read_archived_sim_metadata(base_uri, latest=None, num_nights=5):
+    """Read metadata for a time range of archived opsim output.
+
+    Parameters:
+    ----------
+    base_uri : `str`
+        The base URI of the archive resource to be checked.
+    latest : `str`, optional
+        The date of the latest simulation whose metadata should be loaded.
+        This is the date on which the simulations was added to the archive,
+        not necessarily the date on which the simulation was run, or any
+        of the dates simulated.
+        Default is today.
+    num_nights : `int`
+        The number of nights of the date window to load.
+
+    Returns:
+    -------
+    sim_metadata: `dict`
+        A dictionary of metadata for simulations in the date range.
+    """
+    latest_mjd = int(Time.now().mjd if latest is None else Time(latest).mjd)
+    earliest_mjd = int(latest_mjd - num_nights)
+
+    all_metadata = {}
+    for mjd in range(earliest_mjd, latest_mjd + 1):
+        iso_date = Time(mjd, format="mjd").iso[:10]
+        date_resource = ResourcePath(base_uri).join(iso_date, forceDirectory=True)
+        if date_resource.exists():
+            for base_dir, found_dirs, found_files in date_resource.walk(file_filter=r".*sim_metadata.yaml"):
+                for found_file in found_files:
+                    found_resource = ResourcePath(base_dir).join(found_file)
+                    these_metadata = yaml.safe_load(found_resource.read().decode("utf-8"))
+                    these_metadata["label"] = _build_archived_sim_label(
+                        base_uri, found_resource, these_metadata
+                    )
+                    all_metadata[str(found_resource.dirname())] = these_metadata
+
+    return all_metadata
diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py
index 83dbcc74..29b63263 100644
--- a/tests/sim_archive/test_sim_archive.py
+++ b/tests/sim_archive/test_sim_archive.py
@@ -10,6 +10,7 @@
 from rubin_scheduler.sim_archive.sim_archive import (
     check_opsim_archive_resource,
     make_sim_archive_dir,
+    read_archived_sim_metadata,
     transfer_archive_dir,
 )
 from rubin_scheduler.utils import survey_start_mjd
@@ -79,3 +80,9 @@ def test_sim_archive(self):
         )
         for value in archive_check.values():
             self.assertTrue(value)
+
+        # Read back the metadata
+        archive_metadata = read_archived_sim_metadata(test_resource_uri)
+        base = sim_archive_uri.dirname().geturl().removeprefix(test_resource_uri).rstrip("/").lstrip("/")
+        expected_label = f"{base} test"
+        self.assertEqual(archive_metadata[sim_archive_uri.geturl()]["label"], expected_label)

From 8ce01c706a80b270b38519fcbbeebc1d25320d1b Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Wed, 20 Dec 2023 12:14:44 -0800
Subject: [PATCH 07/19] replace printing with logging

---
 rubin_scheduler/sim_archive/sim_archive.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 7867480e..2b6d41cc 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -1,6 +1,7 @@
 import datetime
 import hashlib
 import json
+import logging
 import os
 import shutil
 import socket
@@ -22,6 +23,7 @@
 from rubin_scheduler.utils import Site
 
 SITE = None
+LOGGER = logging.getLogger(__name__)
 
 
 def make_sim_archive_dir(
@@ -246,7 +248,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre
         destination_rpath = resource_rpath.join(file_info["name"])
         destination_rpath.write(content)
 
-        print(f"Copied {source_fname} to {destination_rpath}")
+        LOGGER.info(f"Copied {source_fname} to {destination_rpath}")
 
     return resource_rpath
 

From 0525e94fc9368e3f9d0321d3d5c961445d069f44 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Wed, 20 Dec 2023 12:20:37 -0800
Subject: [PATCH 08/19] follow module conventions

---
 rubin_scheduler/sim_archive/__init__.py    |  1 +
 rubin_scheduler/sim_archive/sim_archive.py | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 rubin_scheduler/sim_archive/__init__.py

diff --git a/rubin_scheduler/sim_archive/__init__.py b/rubin_scheduler/sim_archive/__init__.py
new file mode 100644
index 00000000..ee2aa6d1
--- /dev/null
+++ b/rubin_scheduler/sim_archive/__init__.py
@@ -0,0 +1 @@
+from .sim_archive import *
diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 2b6d41cc..21ed783a 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -1,3 +1,13 @@
+"""Tools for maintaining an archive of opsim output and metadata.
+"""
+
+__all__ = [
+    "make_sim_archive_dir",
+    "transfer_archive_dir",
+    "check_opsim_archive_resource",
+    "read_archived_sim_metadata",
+]
+
 import datetime
 import hashlib
 import json

From 18dd89933458a271fa55c604c33f8eecc7dda3ec Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Tue, 2 Jan 2024 12:14:23 -0800
Subject: [PATCH 09/19] include lsst.resources to support sim_archive

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index e473fccd..4a415bba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,7 @@ dependencies = [
     "requests",
     "shapely",
     "tqdm",
+    "lsst.resources",
 ]
 
 [project.optional-dependencies]

From ef07272c1aa11cad692772e676a5444d4a4b63f0 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Tue, 2 Jan 2024 16:01:15 -0800
Subject: [PATCH 10/19] initial cli for sim_archive

---
 pyproject.toml                             |   1 +
 rubin_scheduler/sim_archive/sim_archive.py | 125 +++++++++++++++++----
 tests/sim_archive/test_sim_archive.py      |  28 +++++
 3 files changed, 134 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4a415bba..a232b82b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,7 @@ dev = [
 [project.scripts]
 scheduler_download_data = "rubin_scheduler.data.scheduler_download_data:scheduler_download_data"
 rs_download_sky = "rubin_scheduler.data.rs_download_sky:rs_download_sky"
+archive_sim = "rubin_scheduler.sim_archive:make_sim_archive_cli"
 
 
 [tool.setuptools.dynamic]
diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 21ed783a..29d51af1 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -6,8 +6,10 @@
     "transfer_archive_dir",
     "check_opsim_archive_resource",
     "read_archived_sim_metadata",
+    "make_sim_archive_cli",
 ]
 
+import argparse
 import datetime
 import hashlib
 import json
@@ -22,6 +24,7 @@
 from tempfile import TemporaryDirectory
 
 import numpy as np
+import pandas as pd
 import yaml
 from astropy.time import Time
 from conda.cli.main_list import print_packages
@@ -45,6 +48,7 @@ def make_sim_archive_dir(
     tags=[],
     label=None,
     data_path=None,
+    capture_env=True,
 ):
     """Create or fill a local simulation archive directory.
 
@@ -67,6 +71,9 @@ def make_sim_archive_dir(
         A label to be included in the metadata, by default None.
     data_path : `str` or `pathlib.Path`, optional
         The path to the simulation archive directory, by default None.
+    capture_env : `bool`
+        Use the current environment as the sim environment.
+        Defaults to True.
 
     Returns
     -------
@@ -104,30 +111,31 @@ def make_sim_archive_dir(
     with open(stats_fname, "w") as stats_io:
         print(SchemaConverter().obs2opsim(observations).describe().T.to_csv(sep="\t"), file=stats_io)
 
-    # Save the conda environment
-    conda_prefix = Path(sys.executable).parent.parent.as_posix()
-    if is_conda_environment(conda_prefix):
-        conda_base_fname = "environment.txt"
-        environment_fname = data_path.joinpath(conda_base_fname).as_posix()
+    if capture_env:
+        # Save the conda environment
+        conda_prefix = Path(sys.executable).parent.parent.as_posix()
+        if is_conda_environment(conda_prefix):
+            conda_base_fname = "environment.txt"
+            environment_fname = data_path.joinpath(conda_base_fname).as_posix()
 
-        # Python equivilest of conda list --export -p $conda_prefix > $environment_fname
-        with open(environment_fname, "w") as environment_io:
-            with redirect_stdout(environment_io):
-                print_packages(conda_prefix, format="export")
+            # Python equivilest of conda list --export -p $conda_prefix > $environment_fname
+            with open(environment_fname, "w") as environment_io:
+                with redirect_stdout(environment_io):
+                    print_packages(conda_prefix, format="export")
 
-        files["environment"] = {"name": conda_base_fname}
+            files["environment"] = {"name": conda_base_fname}
 
-    # Save pypi packages
-    pypi_base_fname = "pypi.json"
-    pypi_fname = data_path.joinpath(pypi_base_fname).as_posix()
+        # Save pypi packages
+        pypi_base_fname = "pypi.json"
+        pypi_fname = data_path.joinpath(pypi_base_fname).as_posix()
 
-    pip_json_output = os.popen("pip list --format json")
-    pip_list = json.loads(pip_json_output.read())
+        pip_json_output = os.popen("pip list --format json")
+        pip_list = json.loads(pip_json_output.read())
 
-    with open(pypi_fname, "w") as pypi_io:
-        print(json.dumps(pip_list, indent=4), file=pypi_io)
+        with open(pypi_fname, "w") as pypi_io:
+            print(json.dumps(pip_list, indent=4), file=pypi_io)
 
-    files["pypi"] = {"name": pypi_base_fname}
+        files["pypi"] = {"name": pypi_base_fname}
 
     # Add supplied files
     for file_type, fname in in_files.items():
@@ -155,8 +163,10 @@ def evening_local_date(mjd, longitude=site.longitude):
         return evening_local_iso
 
     opsim_metadata = {}
-    opsim_metadata["scheduler_version"] = rubin_scheduler.__version__
-    opsim_metadata["host"] = socket.getfqdn()
+    if capture_env:
+        opsim_metadata["scheduler_version"] = rubin_scheduler.__version__
+        opsim_metadata["host"] = socket.getfqdn()
+
     opsim_metadata["username"] = os.environ["USER"]
 
     simulation_dates = {}
@@ -354,3 +364,78 @@ def read_archived_sim_metadata(base_uri, latest=None, num_nights=5):
                     all_metadata[str(found_resource.dirname())] = these_metadata
 
     return all_metadata
+
+
+def make_sim_archive_cli(*args):
+    parser = argparse.ArgumentParser(description="Add files to sim archive")
+    parser.add_argument(
+        "label",
+        type=str,
+        help="A label for the simulation.",
+    )
+    parser.add_argument(
+        "opsim",
+        type=str,
+        help="File name of opsim database.",
+    )
+    parser.add_argument("--rewards", type=str, default=None, help="A rewards HDF5 file.")
+    parser.add_argument(
+        "--scheduler_version",
+        type=str,
+        help="The version of the scheduler run.",
+    )
+    parser.add_argument("--scheduler", type=str, default=None, help="A snapshot of the scheduler.")
+    parser.add_argument("--script", type=str, default=None, help="The script run to create the simulation.")
+    parser.add_argument(
+        "--notebook", type=str, default=None, help="The notebook run to create the simulation."
+    )
+    parser.add_argument(
+        "--current_env",
+        action="store_true",
+        help="Record the current environment as the simulation environment.",
+    )
+    parser.add_argument(
+        "--archive_base_uri",
+        type=str,
+        default="s3://rubin-scheduler-prenight/opsim/",
+        help="Base URI for the archive",
+    )
+    parser.add_argument("--tags", type=str, default=[], nargs="*", help="The tags on the simulation.")
+    arg_values = parser.parse_args() if len(args) == 0 else parser.parse_args(args)
+
+    observations = SchemaConverter().opsim2obs(arg_values.opsim)
+
+    if arg_values.rewards is not None:
+        try:
+            reward_df = pd.read_hdf(arg_values.rewards, "reward_df")
+        except KeyError:
+            reward_df = None
+
+        try:
+            obs_rewards = pd.read_hdf(arg_values.rewards, "obs_rewards")
+        except KeyError:
+            obs_rewards = None
+
+    filename_args = ["scheduler", "script", "notebook"]
+    in_files = {}
+    for filename_arg in filename_args:
+        try:
+            filename = getattr(arg_values, filename_arg)
+            if filename is not None:
+                in_files[filename] = filename
+        except AttributeError:
+            pass
+
+    data_path = make_sim_archive_dir(
+        observations,
+        reward_df,
+        obs_rewards,
+        in_files,
+        tags=arg_values.tags,
+        label=arg_values.label,
+        capture_env=arg_values.current_env,
+    )
+
+    sim_archive_uri = transfer_archive_dir(data_path.name, arg_values.archive_base_uri)
+
+    return sim_archive_uri
diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py
index 29b63263..0ecbf8ad 100644
--- a/tests/sim_archive/test_sim_archive.py
+++ b/tests/sim_archive/test_sim_archive.py
@@ -1,14 +1,18 @@
 import lzma
 import pickle
 import unittest
+import urllib
 from pathlib import Path
 from tempfile import TemporaryDirectory
 
+import lsst.resources
+
 from rubin_scheduler.scheduler import sim_runner
 from rubin_scheduler.scheduler.example import example_scheduler
 from rubin_scheduler.scheduler.model_observatory import ModelObservatory
 from rubin_scheduler.sim_archive.sim_archive import (
     check_opsim_archive_resource,
+    make_sim_archive_cli,
     make_sim_archive_dir,
     read_archived_sim_metadata,
     transfer_archive_dir,
@@ -86,3 +90,27 @@ def test_sim_archive(self):
         base = sim_archive_uri.dirname().geturl().removeprefix(test_resource_uri).rstrip("/").lstrip("/")
         expected_label = f"{base} test"
         self.assertEqual(archive_metadata[sim_archive_uri.geturl()]["label"], expected_label)
+
+    def test_cli(self):
+        test_resource_path = lsst.resources.ResourcePath("resource://schedview/data/")
+        with test_resource_path.join("sample_opsim.db").as_local() as local_rp:
+            opsim = urllib.parse.urlparse(local_rp.geturl()).path
+
+        with test_resource_path.join("sample_rewards.h5").as_local() as local_rp:
+            rewards = urllib.parse.urlparse(local_rp.geturl()).path
+
+        with test_resource_path.join("sample_scheduler.pickle.xz").as_local() as local_rp:
+            scheduler = urllib.parse.urlparse(local_rp.geturl()).path
+
+        with TemporaryDirectory() as test_archive_dir:
+            test_archive_uri = f"file://{test_archive_dir}/"
+            make_sim_archive_cli(
+                "Test",
+                opsim,
+                "--rewards",
+                rewards,
+                "--scheduler",
+                scheduler,
+                "--archive_base_uri",
+                test_archive_uri,
+            )

From 6d8271a172903295abfdc8772b30f3c9fec56c03 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Wed, 3 Jan 2024 10:46:38 -0800
Subject: [PATCH 11/19] include conda package dependency

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 52734b5a..9dd8eae9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,4 @@ h5py
 requests
 shapely
 tqdm
+conda

From bdfd4fb4d9ac966215422c67f5fa2fdf58de0668 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Wed, 3 Jan 2024 15:09:12 -0800
Subject: [PATCH 12/19] Support getting sunset from local date

---
 rubin_scheduler/site_models/almanac.py | 90 ++++++++++++++++++++++++--
 tests/site_models/test_almanac.py      | 15 +++++
 2 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/rubin_scheduler/site_models/almanac.py b/rubin_scheduler/site_models/almanac.py
index cce6fa7a..f92610a9 100644
--- a/rubin_scheduler/site_models/almanac.py
+++ b/rubin_scheduler/site_models/almanac.py
@@ -1,5 +1,6 @@
 __all__ = ("Almanac",)
 
+import datetime
 import os
 
 import numpy as np
@@ -80,18 +81,97 @@ def get_planet_positions(self, mjd):
             result[pn + "_RA"] = temp_result
         return result
 
-    def get_sunset_info(self, mjd):
+    def get_sunset_info(self, mjd=None, evening_date=None, longitude=None):
+        """Returns a numpy array with mjds for various events
+        (sunset, moonrise, sun at -12 degrees alt, etc.).
+
+        Parameters
+        ----------
+        mjd : `float`
+            A UTC MJD that occurs during the desired night.
+            Defaults to None.
+        evening_date : `str` or `datetime.date`
+            The local date of the evening of the night whose index is desired,
+            in ISO8601 format (YYYY-MM-DD). Defaults to None.
+        longitude : `float` or  `astropy.coordinates.angles.core.Angle`
+            If a float, then the value is interpreted as being in radians.
+            Defaults to None.
+
+        Returns
+        -------
+        sunset_info : `numpy.void`
+            A numpy object with dtype([
+                ('night', '<i8'),
+                ('sunset', '<f8'),
+                ('sun_n12_setting', '<f8'),
+                ('sun_n18_setting', '<f8'),
+                ('sun_n18_rising', '<f8'),
+                ('sun_n12_rising', '<f8'),
+                ('sunrise', '<f8'),
+                ('moonrise', '<f8'),
+                ('moonset', '<f8')
+            ])
         """
-        Returns a numpy array with mjds for various events (sunset, moonrise, sun at -12 degrees alt, etc.).
-        Also the integer night number.
-        """
-        indx = np.searchsorted(self.sunsets["sunset"], mjd, side="right") - 1
+        if mjd is not None and evening_date is not None:
+            raise ValueError("At most one of mjd and evening_date can be set")
+
+        # Default to now
+        if mjd is None and evening_date is None:
+            mjd = 40587 + datetime.datetime.now().timestamp() / (24 * 60 * 60)
+
+        if mjd is not None:
+            indx = np.searchsorted(self.sunsets["sunset"], mjd, side="right") - 1
+        elif evening_date is not None:
+            if longitude is None:
+                raise ValueError("If evening_date is set, longitude is needed as well")
+            indx = self.index_for_local_evening(evening_date, longitude)
+
         return self.sunsets[indx]
 
     def mjd_indx(self, mjd):
         indx = np.searchsorted(self.sunsets["sunset"], mjd, side="right") - 1
         return indx
 
+    def index_for_local_evening(self, evening_date, longitude):
+        """The index of the night with sunset at a given local date.
+
+        Parameters
+        ----------
+        evening_date : `str` or `datetime.date`
+            The local date of the evening of the night whose index is desired,
+            in ISO8601 format (YYYY-MM-DD).
+        longitude : `float` or  `astropy.coordinates.angles.core.Angle`
+            If a float, then the value is interpreted as being in radians.
+
+        Returns
+        -------
+        night_index : `int`
+            The index of the requested night.
+        """
+        try:
+            longitude = longitude.radian
+        except AttributeError:
+            pass
+
+        if isinstance(evening_date, str):
+            evening_date = datetime.date.fromisoformat(evening_date)
+
+        evening_datetime = datetime.datetime(evening_date.year, evening_date.month, evening_date.day)
+        evening_mjd = np.floor(evening_datetime.timestamp() / (24 * 60 * 60) + 40587)
+
+        # Depending on the time of year, the UTC date rollover might not
+        # always be on the same side of local sunset. Shift by the longitude
+        # to make sure the rollover is always near midnight, far from sunset.
+        matching_nights = np.argwhere(
+            np.floor(self.sunsets["sunset"] + longitude / (2 * np.pi)) == evening_mjd
+        )
+        if len(matching_nights) < 1:
+            raise ValueError(f"Requested night {evening_date} outside of almanac date range")
+
+        night_index = matching_nights.item()
+
+        return night_index
+
     def get_sun_moon_positions(self, mjd):
         """
         All angles in Radians. moonPhase between 0 and 100.
diff --git a/tests/site_models/test_almanac.py b/tests/site_models/test_almanac.py
index dbf3e1f5..00283ee2 100644
--- a/tests/site_models/test_almanac.py
+++ b/tests/site_models/test_almanac.py
@@ -1,5 +1,7 @@
 import unittest
 
+import numpy as np
+import pandas as pd
 from rubin_scheduler.site_models import Almanac
 
 
@@ -15,6 +17,19 @@ def test_alm(self):
         moon = alma.get_sun_moon_positions(mjd)
         indx = alma.mjd_indx(mjd)
 
+    def test_index_for_local_evening(self):
+        almanac = Almanac()
+        longitude = np.radians(-70.75)
+        test_dates = ["2024-12-22", "2024-03-15", "2024-06-22"]
+        for test_date in test_dates:
+            night_index = almanac.index_for_local_evening(test_date, longitude)
+            sunset_mjd = almanac.sunsets[night_index]["sunset"]
+            sunset_timestamp = (sunset_mjd - 40587) * 24 * 60 * 60
+            sunset_local_iso = (
+                pd.Timestamp(sunset_timestamp, tz="UTC", unit="s").tz_convert("Chile/Continental").isoformat()
+            )
+            self.assertTrue(sunset_local_iso.startswith(test_date))
+
 
 if __name__ == "__main__":
     unittest.main()

From f7bdb65dc4c78ea56f2eac8b29a62cdc4fbf6d5e Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Wed, 3 Jan 2024 15:09:54 -0800
Subject: [PATCH 13/19] driver to run sim and archive results in one command

---
 rubin_scheduler/sim_archive/sim_archive.py | 73 ++++++++++++++++++----
 1 file changed, 62 insertions(+), 11 deletions(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 29d51af1..29695afa 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -7,6 +7,7 @@
     "check_opsim_archive_resource",
     "read_archived_sim_metadata",
     "make_sim_archive_cli",
+    "drive_sim",
 ]
 
 import argparse
@@ -14,12 +15,14 @@
 import hashlib
 import json
 import logging
+import lzma
+import pickle
 import os
 import shutil
 import socket
 import sys
 from contextlib import redirect_stdout
-from numbers import Number
+from numbers import Number, Real, Integral
 from pathlib import Path
 from tempfile import TemporaryDirectory
 
@@ -33,6 +36,7 @@
 
 import rubin_scheduler
 from rubin_scheduler.scheduler.utils import SchemaConverter
+from rubin_scheduler.scheduler import sim_runner
 from rubin_scheduler.utils import Site
 
 SITE = None
@@ -171,22 +175,28 @@ def evening_local_date(mjd, longitude=site.longitude):
 
     simulation_dates = {}
     if "mjd_start" in sim_runner_kwargs:
-        simulation_dates["start"] = evening_local_date(sim_runner_kwargs["mjd_start"])
+        simulation_dates["first"] = evening_local_date(sim_runner_kwargs["mjd_start"])
 
         if "survey_length" in sim_runner_kwargs:
-            simulation_dates["end"] = evening_local_date(
+            simulation_dates["last"] = evening_local_date(
                 sim_runner_kwargs["mjd_start"] + sim_runner_kwargs["survey_length"]
             )
     else:
-        simulation_dates["start"] = evening_local_date(observations["mjd"].min())
-        simulation_dates["end"] = evening_local_date(observations["mjd"].max())
+        simulation_dates["first"] = evening_local_date(observations["mjd"].min())
+        simulation_dates["last"] = evening_local_date(observations["mjd"].max())
 
     if len(sim_runner_kwargs) > 0:
         opsim_metadata["sim_runner_kwargs"] = {}
         for key, value in sim_runner_kwargs.items():
+            # Cast numpy number types to ints, floats, and reals to avoid
+            # confusing the yaml module.
             match value:
-                case bool() | Number() | str():
+                case bool():
                     opsim_metadata["sim_runner_kwargs"][key] = value
+                case Integral():
+                    opsim_metadata["sim_runner_kwargs"][key] = int(value)
+                case Number():
+                    opsim_metadata["sim_runner_kwargs"][key] = float(value)
                 case _:
                     opsim_metadata["sim_runner_kwargs"][key] = str(value)
 
@@ -311,11 +321,11 @@ def _build_archived_sim_label(base_uri, metadata_resource, metadata):
 
     try:
         sim_dates = metadata["simulated_dates"]
-        start_date = sim_dates["start"]
-        end_date = sim_dates["end"]
-        label = f"{label_base} of {start_date}"
-        if end_date != start_date:
-            label = f"{label} to {end_date}"
+        first_date = sim_dates["first"]
+        last_date = sim_dates["last"]
+        label = f"{label_base} of {first_date}"
+        if last_date != first_date:
+            label = f"{label} through {last_date}"
     except KeyError:
         label = label_base
 
@@ -439,3 +449,44 @@ def make_sim_archive_cli(*args):
     sim_archive_uri = transfer_archive_dir(data_path.name, arg_values.archive_base_uri)
 
     return sim_archive_uri
+
+
+def drive_sim(label, observatory, scheduler, archive_uri=None, tags=[], script=None, notebook=None, **kwargs):
+    in_files = {}
+    if script is not None:
+        in_files["script"] = script
+
+    if notebook is not None:
+        in_files["notebook"] = notebook
+
+    with TemporaryDirectory() as local_data_dir:
+        # We want to store the state of the scheduler at the start of the sim,
+        # so we need to save it now before we run the simulation.
+        scheduler_path = Path(local_data_dir).joinpath("scheduler.pickle.xz")
+        with lzma.open(scheduler_path, "wb", format=lzma.FORMAT_XZ) as pio:
+            pickle.dump(scheduler, pio)
+            in_files["scheduler"] = scheduler_path.name
+
+        sim_results = sim_runner(observatory, scheduler, **kwargs)
+
+        observations = sim_results[2]
+        reward_df = sim_results[3] if scheduler.keep_rewards else None
+        obs_rewards = sim_results[4] if scheduler.keep_rewards else None
+
+        data_dir = make_sim_archive_dir(
+            observations,
+            reward_df=reward_df,
+            obs_rewards=obs_rewards,
+            in_files=in_files,
+            sim_runner_kwargs=kwargs,
+            tags=tags,
+            label=label,
+            capture_env=True,
+        )
+
+    if archive_uri is not None:
+        resource_path = transfer_archive_dir(data_dir.name, archive_uri)
+    else:
+        resource_path = ResourcePath(data_dir.name, forceDirctory=True)
+
+    return resource_path

From 7a50d30333e2a1fe74c2c4f9b68b1ea977a1abd6 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Thu, 4 Jan 2024 08:33:58 -0800
Subject: [PATCH 14/19] match api if drive_sim to sim_runner

---
 rubin_scheduler/sim_archive/sim_archive.py | 67 +++++++++++++++++++++-
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 29695afa..9b927b74 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -451,7 +451,69 @@ def make_sim_archive_cli(*args):
     return sim_archive_uri
 
 
-def drive_sim(label, observatory, scheduler, archive_uri=None, tags=[], script=None, notebook=None, **kwargs):
+def drive_sim(
+    observatory, scheduler, archive_uri=None, label=None, tags=[], script=None, notebook=None, **kwargs
+):
+    """Run a simulation and archive the results.
+
+    Parameters
+    ----------
+    observatory : `ModelObservatory`
+        The model for the observatory.
+    scheduler : `CoreScheduler`
+        The scheduler to use.
+    archive_uri : `str`, optional
+        The root URI of the archive resource into which the results should
+        be stored. Defaults to None.
+    label : `str`, optional
+        The label for the simulation in the archive. Defaults to None.
+    tags : `list` of `str`, optional
+        The tags for the simulation in the archive. Defaults to an empty list.
+    script : `str`
+        The filename of the script producing this simulation.
+        Defaults to None.
+    notebook : `str`, optional
+        The filename of the notebook producing the simulation.
+        Defaults to None.
+
+    Returns
+    -------
+    observatory : `ModelObservatory`
+        The model for the observatory.
+    scheduler : `CoreScheduler`
+        The scheduler used.
+    observations : `foo`
+        The observations produced.
+    reward_df : `pandas.DataFrame`, optional
+        The table of rewards. Present if `record_rewards`
+        or `scheduler.keep_rewards` is True.
+    obs_rewards : `pandas.Series`, optional
+        The mapping of entries in reward_df to observations. Present if
+        `record_rewards` or `scheduler.keep_rewards` is True.
+    resource_path : `ResourcePath`, optional
+        The resource path to the archive of the simulation. Present if
+        `archive_uri` was set.
+
+    Notes
+    -----
+    Additional parameters not described above will be passed into
+    `sim_runner`.
+
+    If the `archive_uri` parameter is not supplied, `sim_runner` is run
+    directly, so that `drive_sim` can act as a drop-in replacement of
+    `sim-runner`.
+
+    In a jupyter notebook, the notebook can be saved for the notebook paramater
+    using `%notebook $notebook_fname` (where `notebook_fname` is variable
+    holding the filename for the notebook) in the cell prior to calling
+    `drive_sim`.
+    """
+    if 'record_rewards' in kwargs:
+        if kwargs['record_rewards'] and not scheduler.keep_rewards:
+            raise ValueError("To keep rewards, scheduler.keep_rewards must be True")
+    else:
+        kwargs['record_rewards'] = scheduler.keep_rewards
+
     in_files = {}
     if script is not None:
         in_files["script"] = script
@@ -489,4 +551,5 @@ def drive_sim(label, observatory, scheduler, archive_uri=None, tags=[], script=N
     else:
         resource_path = ResourcePath(data_dir.name, forceDirctory=True)
 
-    return resource_path
+    results = sim_results + (resource_path,)
+    return results

From 05c0ae7c98684191045f7027ca9d06cdde7c73e2 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Thu, 4 Jan 2024 12:44:02 -0800
Subject: [PATCH 15/19] improve help for make_sim_archive

---
 rubin_scheduler/sim_archive/sim_archive.py | 26 ++++++++++++++++------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index 9b927b74..d69a9d6c 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -392,12 +392,24 @@ def make_sim_archive_cli(*args):
     parser.add_argument(
         "--scheduler_version",
         type=str,
-        help="The version of the scheduler run.",
+        default=None,
+        help="The version of the scheduler that producte the opsim database.",
     )
-    parser.add_argument("--scheduler", type=str, default=None, help="A snapshot of the scheduler.")
-    parser.add_argument("--script", type=str, default=None, help="The script run to create the simulation.")
     parser.add_argument(
-        "--notebook", type=str, default=None, help="The notebook run to create the simulation."
+        "--scheduler",
+        type=str,
+        default=None,
+        help="A snapshot of the scheduler used to produce the database, at the start of the simulation.",
+    )
+    parser.add_argument(
+        "--script", type=str, default=None, help="The file name of the script run to create the simulation."
+    )
+    parser.add_argument(
+        "--notebook",
+        type=str,
+        default=None,
+        help="""The file name of the notebook run to create the simulation.
+        This can be produced using the %notebook magic.""",
     )
     parser.add_argument(
         "--current_env",
@@ -508,11 +520,11 @@ def drive_sim(
     holding the filename for the notebook) in the cell prior to calling
     `drive_sim`.
     """
-    if 'record_rewards' in kwargs:
-        if kwargs['record_rewards'] and not scheduler.keep_rewards:
+    if "record_rewards" in kwargs:
+        if kwargs["record_rewards"] and not scheduler.keep_rewards:
             raise ValueError("To keep rewards, scheduler.keep_rewards must be True")
     else:
-        kwargs['record_rewards'] = scheduler.keep_rewards
+        kwargs["record_rewards"] = scheduler.keep_rewards
 
     in_files = {}
     if script is not None:

From bf9afedc5365d6b7164d569cc659817250e911d6 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Fri, 5 Jan 2024 09:03:53 -0800
Subject: [PATCH 16/19] be robust wrt absent rewards in archive

---
 rubin_scheduler/sim_archive/sim_archive.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py
index d69a9d6c..ec1b9d44 100644
--- a/rubin_scheduler/sim_archive/sim_archive.py
+++ b/rubin_scheduler/sim_archive/sim_archive.py
@@ -22,7 +22,7 @@
 import socket
 import sys
 from contextlib import redirect_stdout
-from numbers import Number, Real, Integral
+from numbers import Number, Integral
 from pathlib import Path
 from tempfile import TemporaryDirectory
 
@@ -404,12 +404,14 @@ def make_sim_archive_cli(*args):
     parser.add_argument(
         "--script", type=str, default=None, help="The file name of the script run to create the simulation."
     )
+
+    notebook_help = "The file name of the notebook run to create the simulation."
+    notebook_help = notebook_help + " This can be produced using the %%notebook magic."
     parser.add_argument(
         "--notebook",
         type=str,
         default=None,
-        help="""The file name of the notebook run to create the simulation.
-        This can be produced using the %notebook magic.""",
+        help=notebook_help,
     )
     parser.add_argument(
         "--current_env",
@@ -437,6 +439,9 @@ def make_sim_archive_cli(*args):
             obs_rewards = pd.read_hdf(arg_values.rewards, "obs_rewards")
         except KeyError:
             obs_rewards = None
+    else:
+        reward_df = None
+        obs_rewards = None
 
     filename_args = ["scheduler", "script", "notebook"]
     in_files = {}

From 554b4cc421029bfcedacc2a80ed3264516f11f59 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Fri, 5 Jan 2024 09:46:56 -0800
Subject: [PATCH 17/19] set nside in DDFs so roi works correctly

---
 rubin_scheduler/scheduler/example/example_scheduler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rubin_scheduler/scheduler/example/example_scheduler.py b/rubin_scheduler/scheduler/example/example_scheduler.py
index 0f638e28..9d988d08 100644
--- a/rubin_scheduler/scheduler/example/example_scheduler.py
+++ b/rubin_scheduler/scheduler/example/example_scheduler.py
@@ -1087,10 +1087,10 @@ def ddf_surveys(detailers=None, season_unobs_frac=0.2, euclid_detailers=None, ns
     euclid_obs = np.where((obs_array["note"] == "DD:EDFS_b") | (obs_array["note"] == "DD:EDFS_a"))[0]
     all_other = np.where((obs_array["note"] != "DD:EDFS_b") & (obs_array["note"] != "DD:EDFS_a"))[0]
 
-    survey1 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], detailers=detailers)
+    survey1 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], nside=nside, detailers=detailers)
     survey1.set_script(obs_array[all_other])
 
-    survey2 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], detailers=euclid_detailers)
+    survey2 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], nside=nside, detailers=euclid_detailers)
     survey2.set_script(obs_array[euclid_obs])
 
     return [survey1, survey2]

From 13b1f3256d5b6f10369685b2a5809c4391182dc6 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Fri, 5 Jan 2024 11:24:11 -0800
Subject: [PATCH 18/19] install lsst.resources in github actions

---
 .github/workflows/build_docs.yaml     | 1 +
 .github/workflows/test_and_build.yaml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml
index bd4313ef..31088c69 100644
--- a/.github/workflows/build_docs.yaml
+++ b/.github/workflows/build_docs.yaml
@@ -30,6 +30,7 @@ jobs:
         run: |
           mamba install --quiet --file=requirements.txt          
           mamba install --quiet pip
+          pip install lsst.resources
           pip install "documenteer[guide]"
 
       - name: install rubin_scheduler
diff --git a/.github/workflows/test_and_build.yaml b/.github/workflows/test_and_build.yaml
index 2ccbb5da..c920482b 100644
--- a/.github/workflows/test_and_build.yaml
+++ b/.github/workflows/test_and_build.yaml
@@ -39,6 +39,7 @@ jobs:
         run: |
           mamba install --quiet --file=requirements.txt
           mamba install --quiet --file=test-requirements.txt
+          pip install lsst.resources
 
       - name: install rubin_scheduler
         shell: bash -l {0}

From b05caeac308d46a379e2eeef4c57afa6ef731776 Mon Sep 17 00:00:00 2001
From: Eric Neilsen <neilsen@fnal.gov>
Date: Fri, 5 Jan 2024 12:07:02 -0800
Subject: [PATCH 19/19] skip test that requires schedview if it is not
 installed.

---
 tests/sim_archive/test_sim_archive.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py
index 0ecbf8ad..94fbfee6 100644
--- a/tests/sim_archive/test_sim_archive.py
+++ b/tests/sim_archive/test_sim_archive.py
@@ -1,3 +1,4 @@
+import importlib
 import lzma
 import pickle
 import unittest
@@ -91,6 +92,7 @@ def test_sim_archive(self):
         expected_label = f"{base} test"
         self.assertEqual(archive_metadata[sim_archive_uri.geturl()]["label"], expected_label)
 
+    @unittest.skipIf(importlib.util.find_spec("schedview") is None, "No schedview")
     def test_cli(self):
         test_resource_path = lsst.resources.ResourcePath("resource://schedview/data/")
         with test_resource_path.join("sample_opsim.db").as_local() as local_rp: