From e2ac8e7e558dd0bf6699bd31afd24357d20a0f6d Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Mon, 18 Dec 2023 14:57:17 -0800 Subject: [PATCH 01/19] Start of sim archiving --- rubin_scheduler/sim_archive/sim_archive.py | 230 +++++++++++++++++++++ tests/sim_archive/test_sim_archive.py | 59 ++++++ 2 files changed, 289 insertions(+) create mode 100644 rubin_scheduler/sim_archive/sim_archive.py create mode 100644 tests/sim_archive/test_sim_archive.py diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py new file mode 100644 index 00000000..f6cbccf2 --- /dev/null +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -0,0 +1,230 @@ +import datetime +import hashlib +import json +import os +import shutil +import socket +import sys +from numbers import Number +from pathlib import Path +from tempfile import TemporaryDirectory +from contextlib import redirect_stdout + +import lsst.resources +import numpy as np +import yaml +from astropy.time import Time + +from conda.exceptions import EnvironmentLocationNotFound +from conda.gateways.disk.test import is_conda_environment +from conda.cli.main_list import print_packages + +import rubin_scheduler +from rubin_scheduler.scheduler.utils import SchemaConverter +from rubin_scheduler.utils import Site + +SITE = None + + +def make_sim_archive_dir( + observations, reward_df=None, obs_rewards=None, in_files={}, sim_runner_kwargs={}, data_path=None +): + """Create or fill a local simulation archive directory. + + Parameters + ---------- + observations : `numpy.recarray` + The observations data, in the "obs" format as accepted and created by + `rubin_scheduler.scheduler.utils.SchemaConverter`. + reward_df : `pandas.DataFrame`, optional + The reward data, by default None. + obs_rewards : `pandas.DataFrame`, optional + The observation rewards data, by default None. + in_files : `dict`, optional + Additional input files to be included in the archive, by default {}. + sim_runner_kwargs : `dict`, optional + Additional simulation runner keyword arguments, by default {}. + data_path : `str` or `pathlib.Path`, optional + The path to the simulation archive directory, by default None. + + Returns + ------- + data_dir : `pathlib.Path` or `tempfile.TemporaryDirectory` + The temporary directory containing the simulation archive. + """ + if data_path is None: + data_dir = TemporaryDirectory() + data_path = Path(data_dir.name) + else: + data_dir = None + + if not isinstance(data_path, Path): + data_path = Path(data_path) + + files = {} + + # Save the observations + files["observations"] = {"name": "opsim.db"} + opsim_output_fname = data_path.joinpath(files["observations"]["name"]) + SchemaConverter().obs2opsim(observations, filename=opsim_output_fname) + + # Save the rewards + if reward_df is not None and obs_rewards is not None: + files["rewards"] = {"name": "rewards.h5"} + rewards_fname = data_path.joinpath(files["rewards"]["name"]) + if reward_df is not None: + reward_df.to_hdf(rewards_fname, "reward_df") + if obs_rewards is not None: + obs_rewards.to_hdf(rewards_fname, "obs_rewards") + + # Save basic statistics + files["statistics"] = {"name": "obs_stats.txt"} + stats_fname = data_path.joinpath(files["statistics"]["name"]) + with open(stats_fname, "w") as stats_io: + print(SchemaConverter().obs2opsim(observations).describe().T.to_csv(sep="\t"), file=stats_io) + + + # Save the conda environment + conda_prefix = Path(sys.executable).parent.parent.as_posix() + if not is_conda_environment(conda_prefix): + raise EnvironmentLocationNotFound(conda_prefix) + + environment_fname = data_path.joinpath('environment.txt').as_posix() + + # Python equivilest of conda list --export -p $conda_prefix > $environment_fname + with open(environment_fname, 'w') as environment_io: + with redirect_stdout(environment_io): + print_packages(conda_prefix, format='export') + + # Save pypi packages + pypi_fname = data_path.joinpath('pypi.json').as_posix() + + pip_json_output = os.popen('pip list --format json') + pip_list = json.loads(pip_json_output.read()) + + with open(pypi_fname, 'w') as pypi_io: + print(json.dumps(pip_list, indent=4), file=pypi_io) + + # Add supplied files + for file_type, fname in in_files.items(): + files[file_type] = {"name": Path(fname).name} + try: + shutil.copyfile(fname, data_path.joinpath(files[file_type]["name"])) + except shutil.SameFileError: + pass + + # Add file hashes + for file_type in files: + fname = data_path.joinpath(files[file_type]["name"]) + with open(fname, "rb") as file_io: + content = file_io.read() + + files[file_type]["md5"] = hashlib.md5(content).hexdigest() + + # Metadata + # To use a different site, a user can set the global variable SITE. + site = Site(name="LSST") if SITE is None else SITE + + def evening_local_date(mjd, longitude=site.longitude): + evening_local_mjd = np.floor(mjd + longitude / 360 - 0.5).astype(int) + evening_local_iso = Time(evening_local_mjd, format="mjd").iso[:10] + return evening_local_iso + + opsim_metadata = {} + opsim_metadata["scheduler_version"] = rubin_scheduler.__version__ + opsim_metadata["host"] = socket.getfqdn() + opsim_metadata["username"] = os.environ["USER"] + + simulation_dates = {} + if "mjd_start" in sim_runner_kwargs: + simulation_dates["start"] = evening_local_date(sim_runner_kwargs["mjd_start"]) + + if "survey_length" in sim_runner_kwargs: + simulation_dates["end"] = evening_local_date( + sim_runner_kwargs["mjd_start"] + sim_runner_kwargs["survey_length"] + ) + else: + simulation_dates["start"] = evening_local_date(observations["mjd"].min()) + simulation_dates["end"] = evening_local_date(observations["mjd"].max()) + + if len(sim_runner_kwargs) > 0: + opsim_metadata["sim_runner_kwargs"] = {} + for key, value in sim_runner_kwargs.items(): + match value: + case bool() | Number() | str(): + opsim_metadata["sim_runner_kwargs"][key] = value + case _: + opsim_metadata["sim_runner_kwargs"][key] = str(value) + + opsim_metadata["simulated_dates"] = simulation_dates + opsim_metadata["files"] = files + + sim_metadata_fname = data_path.joinpath("sim_metadata.yaml") + with open(sim_metadata_fname, "w") as sim_metadata_io: + print(yaml.dump(opsim_metadata, indent=4), file=sim_metadata_io) + + files["metadata"] = {"name": sim_metadata_fname} + + if data_dir is not None: + # If we created a temporary directory, if we do not return it, it + # will get automatically cleaned up, losing our work. + # So, return it. + return data_dir + + return data_path + + +def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-prenight/opsim/"): + """Transfer the contents of an archive directory to an resource. + + Parameters: + ---------- + archive_dir : `str` + The path to the archive directory containing the files to be transferred. + archive_base_uri : `str`, optional + The base URI where the archive files will be transferred to. Default is "s3://rubin-scheduler-prenight/opsim/". + + Returns: + ------- + resource_rpath : `lsst.resources.ResourcePath` + The destination resource. + """ + + metadata_fname = Path(archive_dir).joinpath("sim_metadata.yaml") + with open(metadata_fname, "r") as metadata_io: + sim_metadata = yaml.safe_load(metadata_io) + + insert_date = datetime.datetime.utcnow().date().isoformat() + insert_date_rpath = lsst.resources.ResourcePath(archive_base_uri).join(insert_date, forceDirectory=True) + if not insert_date_rpath.exists(): + insert_date_rpath.mkdir() + + # Number the sims in the insert date dir by + # looing for all the interger directories, and choosing the next one. + found_ids = [] + for base_dir, found_dirs, found_files in insert_date_rpath.walk(): + if base_dir == insert_date_rpath: + for found_dir in found_dirs: + try: + found_ids.append(int(found_dir[:-1])) + except ValueError: + pass + + new_id = max(found_ids) + 1 if len(found_ids) > 0 else 1 + resource_rpath = insert_date_rpath.join(f"{new_id}", forceDirectory=True) + resource_rpath.mkdir() + + # Include the metadata file itself. + sim_metadata["files"]["metadata"] = {"name": "sim_metadata.yaml"} + + for file_info in sim_metadata["files"].values(): + source_fname = Path(archive_dir).joinpath(file_info["name"]) + with open(source_fname, "rb") as source_io: + content = source_io.read() + + destination_rpath = resource_rpath.join(file_info["name"]) + destination_rpath.write(content) + + print(f"Copied {source_fname} to {destination_rpath}") + + return resource_rpath diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py new file mode 100644 index 00000000..06d592da --- /dev/null +++ b/tests/sim_archive/test_sim_archive.py @@ -0,0 +1,59 @@ +import lzma +import pickle +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory + +from rubin_scheduler.scheduler import sim_runner +from rubin_scheduler.scheduler.example import example_scheduler +from rubin_scheduler.scheduler.model_observatory import ModelObservatory +from rubin_scheduler.sim_archive.sim_archive import make_sim_archive_dir, transfer_archive_dir +from rubin_scheduler.utils import survey_start_mjd + +TEST_SIM_DATE = "2025-01-01" + + +class TestSimArchive(unittest.TestCase): + def test_sim_archive(self): + # Begin by running a short simulation + mjd_start = survey_start_mjd() + survey_length = 1 # days + scheduler = example_scheduler(mjd_start=mjd_start) + scheduler.keep_rewards = True + observatory = ModelObservatory(mjd_start=mjd_start) + + # Record the state of the scheduler at the start of the sim. + data_dir = TemporaryDirectory() + data_path = Path(data_dir.name) + + scheduler_fname = data_path.joinpath("scheduler.pickle.xz") + with lzma.open(scheduler_fname, "wb", format=lzma.FORMAT_XZ) as pio: + pickle.dump(scheduler, pio) + + files_to_archive = {"scheduler": scheduler_fname} + + # Run the simulation + sim_runner_kwargs = { + "mjd_start": mjd_start, + "survey_length": survey_length, + "record_rewards": True, + } + + observatory, scheduler, observations, reward_df, obs_rewards = sim_runner( + observatory, scheduler, **sim_runner_kwargs + ) + + # Make the scratch sim archive + make_sim_archive_dir( + observations, + reward_df=reward_df, + obs_rewards=obs_rewards, + in_files=files_to_archive, + sim_runner_kwargs=sim_runner_kwargs, + data_path=data_path, + ) + + # Move the scratch sim archive to a test resource + test_resource_dir = TemporaryDirectory() + test_resource_uri = "file://" + test_resource_dir.name + transfer_archive_dir(data_dir.name, test_resource_uri) From 503ff550b9d9da11ec2110f5cf6fc4e3c5f3f0e6 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Tue, 19 Dec 2023 11:30:22 -0800 Subject: [PATCH 02/19] Add opsim archive validation function --- rubin_scheduler/sim_archive/sim_archive.py | 52 ++++++++++++++++------ 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index f6cbccf2..54b5ee21 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -5,19 +5,18 @@ import shutil import socket import sys +from contextlib import redirect_stdout from numbers import Number from pathlib import Path from tempfile import TemporaryDirectory -from contextlib import redirect_stdout -import lsst.resources import numpy as np import yaml from astropy.time import Time - +from conda.cli.main_list import print_packages from conda.exceptions import EnvironmentLocationNotFound from conda.gateways.disk.test import is_conda_environment -from conda.cli.main_list import print_packages +from lsst.resources import ResourcePath import rubin_scheduler from rubin_scheduler.scheduler.utils import SchemaConverter @@ -83,26 +82,25 @@ def make_sim_archive_dir( with open(stats_fname, "w") as stats_io: print(SchemaConverter().obs2opsim(observations).describe().T.to_csv(sep="\t"), file=stats_io) - # Save the conda environment conda_prefix = Path(sys.executable).parent.parent.as_posix() if not is_conda_environment(conda_prefix): raise EnvironmentLocationNotFound(conda_prefix) - environment_fname = data_path.joinpath('environment.txt').as_posix() + environment_fname = data_path.joinpath("environment.txt").as_posix() # Python equivilest of conda list --export -p $conda_prefix > $environment_fname - with open(environment_fname, 'w') as environment_io: + with open(environment_fname, "w") as environment_io: with redirect_stdout(environment_io): - print_packages(conda_prefix, format='export') + print_packages(conda_prefix, format="export") # Save pypi packages - pypi_fname = data_path.joinpath('pypi.json').as_posix() + pypi_fname = data_path.joinpath("pypi.json").as_posix() - pip_json_output = os.popen('pip list --format json') + pip_json_output = os.popen("pip list --format json") pip_list = json.loads(pip_json_output.read()) - with open(pypi_fname, 'w') as pypi_io: + with open(pypi_fname, "w") as pypi_io: print(json.dumps(pip_list, indent=4), file=pypi_io) # Add supplied files @@ -186,7 +184,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre Returns: ------- - resource_rpath : `lsst.resources.ResourcePath` + resource_rpath : `ResourcePath` The destination resource. """ @@ -195,7 +193,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre sim_metadata = yaml.safe_load(metadata_io) insert_date = datetime.datetime.utcnow().date().isoformat() - insert_date_rpath = lsst.resources.ResourcePath(archive_base_uri).join(insert_date, forceDirectory=True) + insert_date_rpath = ResourcePath(archive_base_uri).join(insert_date, forceDirectory=True) if not insert_date_rpath.exists(): insert_date_rpath.mkdir() @@ -228,3 +226,31 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre print(f"Copied {source_fname} to {destination_rpath}") return resource_rpath + + +def check_opsim_archive_resource(archive_uri): + """Check the contents of an opsim archive resource. + + Parameters: + ---------- + archive_uri : `str` + The URI of the archive resource to be checked. + + Returns: + ------- + validity: `dict` + A dictionary of files checked, and their validity. + """ + metadata_path = ResourcePath(archive_uri).join("sim_metadata.yaml") + with metadata_path.open(mode="r") as metadata_io: + sim_metadata = yaml.safe_load(metadata_io) + + results = {} + + for file_info in sim_metadata["files"].values(): + resource_path = ResourcePath(archive_uri).join(file_info["name"]) + content = resource_path.read() + + results[file_info["name"]] = file_info["md5"] == hashlib.md5(content).hexdigest() + + return results From 7736e46c3001fedaed22df3fb648e4fb0395cb39 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Tue, 19 Dec 2023 11:30:49 -0800 Subject: [PATCH 03/19] Add validation and contents to sim archive tests --- tests/sim_archive/test_sim_archive.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py index 06d592da..4eec5a33 100644 --- a/tests/sim_archive/test_sim_archive.py +++ b/tests/sim_archive/test_sim_archive.py @@ -7,11 +7,13 @@ from rubin_scheduler.scheduler import sim_runner from rubin_scheduler.scheduler.example import example_scheduler from rubin_scheduler.scheduler.model_observatory import ModelObservatory -from rubin_scheduler.sim_archive.sim_archive import make_sim_archive_dir, transfer_archive_dir +from rubin_scheduler.sim_archive.sim_archive import ( + check_opsim_archive_resource, + make_sim_archive_dir, + transfer_archive_dir, +) from rubin_scheduler.utils import survey_start_mjd -TEST_SIM_DATE = "2025-01-01" - class TestSimArchive(unittest.TestCase): def test_sim_archive(self): @@ -56,4 +58,12 @@ def test_sim_archive(self): # Move the scratch sim archive to a test resource test_resource_dir = TemporaryDirectory() test_resource_uri = "file://" + test_resource_dir.name - transfer_archive_dir(data_dir.name, test_resource_uri) + sim_archive_uri = transfer_archive_dir(data_dir.name, test_resource_uri) + + # Check the saved archive + archive_check = check_opsim_archive_resource(sim_archive_uri) + self.assertEqual( + archive_check.keys(), set(["opsim.db", "rewards.h5", "scheduler.pickle.xz", "obs_stats.txt"]) + ) + for value in archive_check.values(): + self.assertTrue(value) From 9b56db7cb0e170b70880141b10267c29b9836b52 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Tue, 19 Dec 2023 12:27:03 -0800 Subject: [PATCH 04/19] include environment files in opsim archive --- rubin_scheduler/sim_archive/sim_archive.py | 21 ++++++++++++--------- tests/sim_archive/test_sim_archive.py | 12 +++++++++++- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 54b5ee21..1751980f 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -14,7 +14,6 @@ import yaml from astropy.time import Time from conda.cli.main_list import print_packages -from conda.exceptions import EnvironmentLocationNotFound from conda.gateways.disk.test import is_conda_environment from lsst.resources import ResourcePath @@ -84,18 +83,20 @@ def make_sim_archive_dir( # Save the conda environment conda_prefix = Path(sys.executable).parent.parent.as_posix() - if not is_conda_environment(conda_prefix): - raise EnvironmentLocationNotFound(conda_prefix) + if is_conda_environment(conda_prefix): + conda_base_fname = 'environment.txt' + environment_fname = data_path.joinpath(conda_base_fname).as_posix() - environment_fname = data_path.joinpath("environment.txt").as_posix() + # Python equivilest of conda list --export -p $conda_prefix > $environment_fname + with open(environment_fname, "w") as environment_io: + with redirect_stdout(environment_io): + print_packages(conda_prefix, format="export") - # Python equivilest of conda list --export -p $conda_prefix > $environment_fname - with open(environment_fname, "w") as environment_io: - with redirect_stdout(environment_io): - print_packages(conda_prefix, format="export") + files['environment'] = {'name': conda_base_fname} # Save pypi packages - pypi_fname = data_path.joinpath("pypi.json").as_posix() + pypi_base_fname = 'pypi.json' + pypi_fname = data_path.joinpath(pypi_base_fname).as_posix() pip_json_output = os.popen("pip list --format json") pip_list = json.loads(pip_json_output.read()) @@ -103,6 +104,8 @@ def make_sim_archive_dir( with open(pypi_fname, "w") as pypi_io: print(json.dumps(pip_list, indent=4), file=pypi_io) + files['pypi'] = {'name': pypi_base_fname} + # Add supplied files for file_type, fname in in_files.items(): files[file_type] = {"name": Path(fname).name} diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py index 4eec5a33..0db95ace 100644 --- a/tests/sim_archive/test_sim_archive.py +++ b/tests/sim_archive/test_sim_archive.py @@ -63,7 +63,17 @@ def test_sim_archive(self): # Check the saved archive archive_check = check_opsim_archive_resource(sim_archive_uri) self.assertEqual( - archive_check.keys(), set(["opsim.db", "rewards.h5", "scheduler.pickle.xz", "obs_stats.txt"]) + archive_check.keys(), + set( + [ + "opsim.db", + "rewards.h5", + "scheduler.pickle.xz", + "obs_stats.txt", + "environment.txt", + "pypi.json", + ] + ), ) for value in archive_check.values(): self.assertTrue(value) From 0e95d4f252dccd4565eee9b8119fa9d5736ae17a Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Wed, 20 Dec 2023 08:14:18 -0800 Subject: [PATCH 05/19] Add tags and label to sim metadata --- rubin_scheduler/sim_archive/sim_archive.py | 30 ++++++++++++++++++---- tests/sim_archive/test_sim_archive.py | 2 ++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 1751980f..d04d0b56 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -25,7 +25,14 @@ def make_sim_archive_dir( - observations, reward_df=None, obs_rewards=None, in_files={}, sim_runner_kwargs={}, data_path=None + observations, + reward_df=None, + obs_rewards=None, + in_files={}, + sim_runner_kwargs={}, + tags=[], + label=None, + data_path=None, ): """Create or fill a local simulation archive directory. @@ -42,6 +49,10 @@ def make_sim_archive_dir( Additional input files to be included in the archive, by default {}. sim_runner_kwargs : `dict`, optional Additional simulation runner keyword arguments, by default {}. + tags : `list` [`str`], optional + A list of tags/keywords to be included in the metadata, by default []. + label : `str`, optional + A label to be included in the metadata, by default None. data_path : `str` or `pathlib.Path`, optional The path to the simulation archive directory, by default None. @@ -84,7 +95,7 @@ def make_sim_archive_dir( # Save the conda environment conda_prefix = Path(sys.executable).parent.parent.as_posix() if is_conda_environment(conda_prefix): - conda_base_fname = 'environment.txt' + conda_base_fname = "environment.txt" environment_fname = data_path.joinpath(conda_base_fname).as_posix() # Python equivilest of conda list --export -p $conda_prefix > $environment_fname @@ -92,10 +103,10 @@ def make_sim_archive_dir( with redirect_stdout(environment_io): print_packages(conda_prefix, format="export") - files['environment'] = {'name': conda_base_fname} + files["environment"] = {"name": conda_base_fname} # Save pypi packages - pypi_base_fname = 'pypi.json' + pypi_base_fname = "pypi.json" pypi_fname = data_path.joinpath(pypi_base_fname).as_posix() pip_json_output = os.popen("pip list --format json") @@ -104,7 +115,7 @@ def make_sim_archive_dir( with open(pypi_fname, "w") as pypi_io: print(json.dumps(pip_list, indent=4), file=pypi_io) - files['pypi'] = {'name': pypi_base_fname} + files["pypi"] = {"name": pypi_base_fname} # Add supplied files for file_type, fname in in_files.items(): @@ -160,6 +171,15 @@ def evening_local_date(mjd, longitude=site.longitude): opsim_metadata["simulated_dates"] = simulation_dates opsim_metadata["files"] = files + if len(tags) > 0: + for tag in tags: + assert isinstance(tag, str), "Tags must be strings." + opsim_metadata["tags"] = tags + + if label is not None: + assert isinstance(label, str), "The sim label must be a string." + opsim_metadata["label"] = label + sim_metadata_fname = data_path.joinpath("sim_metadata.yaml") with open(sim_metadata_fname, "w") as sim_metadata_io: print(yaml.dump(opsim_metadata, indent=4), file=sim_metadata_io) diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py index 0db95ace..83dbcc74 100644 --- a/tests/sim_archive/test_sim_archive.py +++ b/tests/sim_archive/test_sim_archive.py @@ -52,6 +52,8 @@ def test_sim_archive(self): obs_rewards=obs_rewards, in_files=files_to_archive, sim_runner_kwargs=sim_runner_kwargs, + tags=["test"], + label="test", data_path=data_path, ) From 0983f8de2289b6aca4ed890ce9348c1dabaeb5e9 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Wed, 20 Dec 2023 09:53:01 -0800 Subject: [PATCH 06/19] add func for querying sim archive metadata --- rubin_scheduler/sim_archive/sim_archive.py | 65 ++++++++++++++++++++++ tests/sim_archive/test_sim_archive.py | 7 +++ 2 files changed, 72 insertions(+) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index d04d0b56..7867480e 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -277,3 +277,68 @@ def check_opsim_archive_resource(archive_uri): results[file_info["name"]] = file_info["md5"] == hashlib.md5(content).hexdigest() return results + + +def _build_archived_sim_label(base_uri, metadata_resource, metadata): + label_base = metadata_resource.dirname().geturl().removeprefix(base_uri).rstrip("/").lstrip("/") + + # If a label is supplied by the metadata, use it + if "label" in metadata: + label = f"{label_base} {metadata['label']}" + return label + + try: + sim_dates = metadata["simulated_dates"] + start_date = sim_dates["start"] + end_date = sim_dates["end"] + label = f"{label_base} of {start_date}" + if end_date != start_date: + label = f"{label} to {end_date}" + except KeyError: + label = label_base + + if "scheduler_version" in metadata: + label = f"{label} with {metadata['scheduler_version']}" + + return label + + +def read_archived_sim_metadata(base_uri, latest=None, num_nights=5): + """Read metadata for a time range of archived opsim output. + + Parameters: + ---------- + base_uri : `str` + The base URI of the archive resource to be checked. + latest : `str`, optional + The date of the latest simulation whose metadata should be loaded. + This is the date on which the simulations was added to the archive, + not necessarily the date on which the simulation was run, or any + of the dates simulated. + Default is today. + num_nights : `int` + The number of nights of the date window to load. + + Returns: + ------- + sim_metadata: `dict` + A dictionary of metadata for simulations in the date range. + """ + latest_mjd = int(Time.now().mjd if latest is None else Time(latest).mjd) + earliest_mjd = int(latest_mjd - num_nights) + + all_metadata = {} + for mjd in range(earliest_mjd, latest_mjd + 1): + iso_date = Time(mjd, format="mjd").iso[:10] + date_resource = ResourcePath(base_uri).join(iso_date, forceDirectory=True) + if date_resource.exists(): + for base_dir, found_dirs, found_files in date_resource.walk(file_filter=r".*sim_metadata.yaml"): + for found_file in found_files: + found_resource = ResourcePath(base_dir).join(found_file) + these_metadata = yaml.safe_load(found_resource.read().decode("utf-8")) + these_metadata["label"] = _build_archived_sim_label( + base_uri, found_resource, these_metadata + ) + all_metadata[str(found_resource.dirname())] = these_metadata + + return all_metadata diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py index 83dbcc74..29b63263 100644 --- a/tests/sim_archive/test_sim_archive.py +++ b/tests/sim_archive/test_sim_archive.py @@ -10,6 +10,7 @@ from rubin_scheduler.sim_archive.sim_archive import ( check_opsim_archive_resource, make_sim_archive_dir, + read_archived_sim_metadata, transfer_archive_dir, ) from rubin_scheduler.utils import survey_start_mjd @@ -79,3 +80,9 @@ def test_sim_archive(self): ) for value in archive_check.values(): self.assertTrue(value) + + # Read back the metadata + archive_metadata = read_archived_sim_metadata(test_resource_uri) + base = sim_archive_uri.dirname().geturl().removeprefix(test_resource_uri).rstrip("/").lstrip("/") + expected_label = f"{base} test" + self.assertEqual(archive_metadata[sim_archive_uri.geturl()]["label"], expected_label) From 8ce01c706a80b270b38519fcbbeebc1d25320d1b Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Wed, 20 Dec 2023 12:14:44 -0800 Subject: [PATCH 07/19] replace printing with logging --- rubin_scheduler/sim_archive/sim_archive.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 7867480e..2b6d41cc 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -1,6 +1,7 @@ import datetime import hashlib import json +import logging import os import shutil import socket @@ -22,6 +23,7 @@ from rubin_scheduler.utils import Site SITE = None +LOGGER = logging.getLogger(__name__) def make_sim_archive_dir( @@ -246,7 +248,7 @@ def transfer_archive_dir(archive_dir, archive_base_uri="s3://rubin-scheduler-pre destination_rpath = resource_rpath.join(file_info["name"]) destination_rpath.write(content) - print(f"Copied {source_fname} to {destination_rpath}") + LOGGER.info(f"Copied {source_fname} to {destination_rpath}") return resource_rpath From 0525e94fc9368e3f9d0321d3d5c961445d069f44 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Wed, 20 Dec 2023 12:20:37 -0800 Subject: [PATCH 08/19] follow module conventions --- rubin_scheduler/sim_archive/__init__.py | 1 + rubin_scheduler/sim_archive/sim_archive.py | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 rubin_scheduler/sim_archive/__init__.py diff --git a/rubin_scheduler/sim_archive/__init__.py b/rubin_scheduler/sim_archive/__init__.py new file mode 100644 index 00000000..ee2aa6d1 --- /dev/null +++ b/rubin_scheduler/sim_archive/__init__.py @@ -0,0 +1 @@ +from .sim_archive import * diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 2b6d41cc..21ed783a 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -1,3 +1,13 @@ +"""Tools for maintaining an archive of opsim output and metadata. +""" + +__all__ = [ + "make_sim_archive_dir", + "transfer_archive_dir", + "check_opsim_archive_resource", + "read_archived_sim_metadata", +] + import datetime import hashlib import json From 18dd89933458a271fa55c604c33f8eecc7dda3ec Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Tue, 2 Jan 2024 12:14:23 -0800 Subject: [PATCH 09/19] include lsst.resources to support sim_archive --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index e473fccd..4a415bba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "requests", "shapely", "tqdm", + "lsst.resources", ] [project.optional-dependencies] From ef07272c1aa11cad692772e676a5444d4a4b63f0 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Tue, 2 Jan 2024 16:01:15 -0800 Subject: [PATCH 10/19] initial cli for sim_archive --- pyproject.toml | 1 + rubin_scheduler/sim_archive/sim_archive.py | 125 +++++++++++++++++---- tests/sim_archive/test_sim_archive.py | 28 +++++ 3 files changed, 134 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4a415bba..a232b82b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dev = [ [project.scripts] scheduler_download_data = "rubin_scheduler.data.scheduler_download_data:scheduler_download_data" rs_download_sky = "rubin_scheduler.data.rs_download_sky:rs_download_sky" +archive_sim = "rubin_scheduler.sim_archive:make_sim_archive_cli" [tool.setuptools.dynamic] diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 21ed783a..29d51af1 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -6,8 +6,10 @@ "transfer_archive_dir", "check_opsim_archive_resource", "read_archived_sim_metadata", + "make_sim_archive_cli", ] +import argparse import datetime import hashlib import json @@ -22,6 +24,7 @@ from tempfile import TemporaryDirectory import numpy as np +import pandas as pd import yaml from astropy.time import Time from conda.cli.main_list import print_packages @@ -45,6 +48,7 @@ def make_sim_archive_dir( tags=[], label=None, data_path=None, + capture_env=True, ): """Create or fill a local simulation archive directory. @@ -67,6 +71,9 @@ def make_sim_archive_dir( A label to be included in the metadata, by default None. data_path : `str` or `pathlib.Path`, optional The path to the simulation archive directory, by default None. + capture_env : `bool` + Use the current environment as the sim environment. + Defaults to True. Returns ------- @@ -104,30 +111,31 @@ def make_sim_archive_dir( with open(stats_fname, "w") as stats_io: print(SchemaConverter().obs2opsim(observations).describe().T.to_csv(sep="\t"), file=stats_io) - # Save the conda environment - conda_prefix = Path(sys.executable).parent.parent.as_posix() - if is_conda_environment(conda_prefix): - conda_base_fname = "environment.txt" - environment_fname = data_path.joinpath(conda_base_fname).as_posix() + if capture_env: + # Save the conda environment + conda_prefix = Path(sys.executable).parent.parent.as_posix() + if is_conda_environment(conda_prefix): + conda_base_fname = "environment.txt" + environment_fname = data_path.joinpath(conda_base_fname).as_posix() - # Python equivilest of conda list --export -p $conda_prefix > $environment_fname - with open(environment_fname, "w") as environment_io: - with redirect_stdout(environment_io): - print_packages(conda_prefix, format="export") + # Python equivilest of conda list --export -p $conda_prefix > $environment_fname + with open(environment_fname, "w") as environment_io: + with redirect_stdout(environment_io): + print_packages(conda_prefix, format="export") - files["environment"] = {"name": conda_base_fname} + files["environment"] = {"name": conda_base_fname} - # Save pypi packages - pypi_base_fname = "pypi.json" - pypi_fname = data_path.joinpath(pypi_base_fname).as_posix() + # Save pypi packages + pypi_base_fname = "pypi.json" + pypi_fname = data_path.joinpath(pypi_base_fname).as_posix() - pip_json_output = os.popen("pip list --format json") - pip_list = json.loads(pip_json_output.read()) + pip_json_output = os.popen("pip list --format json") + pip_list = json.loads(pip_json_output.read()) - with open(pypi_fname, "w") as pypi_io: - print(json.dumps(pip_list, indent=4), file=pypi_io) + with open(pypi_fname, "w") as pypi_io: + print(json.dumps(pip_list, indent=4), file=pypi_io) - files["pypi"] = {"name": pypi_base_fname} + files["pypi"] = {"name": pypi_base_fname} # Add supplied files for file_type, fname in in_files.items(): @@ -155,8 +163,10 @@ def evening_local_date(mjd, longitude=site.longitude): return evening_local_iso opsim_metadata = {} - opsim_metadata["scheduler_version"] = rubin_scheduler.__version__ - opsim_metadata["host"] = socket.getfqdn() + if capture_env: + opsim_metadata["scheduler_version"] = rubin_scheduler.__version__ + opsim_metadata["host"] = socket.getfqdn() + opsim_metadata["username"] = os.environ["USER"] simulation_dates = {} @@ -354,3 +364,78 @@ def read_archived_sim_metadata(base_uri, latest=None, num_nights=5): all_metadata[str(found_resource.dirname())] = these_metadata return all_metadata + + +def make_sim_archive_cli(*args): + parser = argparse.ArgumentParser(description="Add files to sim archive") + parser.add_argument( + "label", + type=str, + help="A label for the simulation.", + ) + parser.add_argument( + "opsim", + type=str, + help="File name of opsim database.", + ) + parser.add_argument("--rewards", type=str, default=None, help="A rewards HDF5 file.") + parser.add_argument( + "--scheduler_version", + type=str, + help="The version of the scheduler run.", + ) + parser.add_argument("--scheduler", type=str, default=None, help="A snapshot of the scheduler.") + parser.add_argument("--script", type=str, default=None, help="The script run to create the simulation.") + parser.add_argument( + "--notebook", type=str, default=None, help="The notebook run to create the simulation." + ) + parser.add_argument( + "--current_env", + action="store_true", + help="Record the current environment as the simulation environment.", + ) + parser.add_argument( + "--archive_base_uri", + type=str, + default="s3://rubin-scheduler-prenight/opsim/", + help="Base URI for the archive", + ) + parser.add_argument("--tags", type=str, default=[], nargs="*", help="The tags on the simulation.") + arg_values = parser.parse_args() if len(args) == 0 else parser.parse_args(args) + + observations = SchemaConverter().opsim2obs(arg_values.opsim) + + if arg_values.rewards is not None: + try: + reward_df = pd.read_hdf(arg_values.rewards, "reward_df") + except KeyError: + reward_df = None + + try: + obs_rewards = pd.read_hdf(arg_values.rewards, "obs_rewards") + except KeyError: + obs_rewards = None + + filename_args = ["scheduler", "script", "notebook"] + in_files = {} + for filename_arg in filename_args: + try: + filename = getattr(arg_values, filename_arg) + if filename is not None: + in_files[filename] = filename + except AttributeError: + pass + + data_path = make_sim_archive_dir( + observations, + reward_df, + obs_rewards, + in_files, + tags=arg_values.tags, + label=arg_values.label, + capture_env=arg_values.current_env, + ) + + sim_archive_uri = transfer_archive_dir(data_path.name, arg_values.archive_base_uri) + + return sim_archive_uri diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py index 29b63263..0ecbf8ad 100644 --- a/tests/sim_archive/test_sim_archive.py +++ b/tests/sim_archive/test_sim_archive.py @@ -1,14 +1,18 @@ import lzma import pickle import unittest +import urllib from pathlib import Path from tempfile import TemporaryDirectory +import lsst.resources + from rubin_scheduler.scheduler import sim_runner from rubin_scheduler.scheduler.example import example_scheduler from rubin_scheduler.scheduler.model_observatory import ModelObservatory from rubin_scheduler.sim_archive.sim_archive import ( check_opsim_archive_resource, + make_sim_archive_cli, make_sim_archive_dir, read_archived_sim_metadata, transfer_archive_dir, @@ -86,3 +90,27 @@ def test_sim_archive(self): base = sim_archive_uri.dirname().geturl().removeprefix(test_resource_uri).rstrip("/").lstrip("/") expected_label = f"{base} test" self.assertEqual(archive_metadata[sim_archive_uri.geturl()]["label"], expected_label) + + def test_cli(self): + test_resource_path = lsst.resources.ResourcePath("resource://schedview/data/") + with test_resource_path.join("sample_opsim.db").as_local() as local_rp: + opsim = urllib.parse.urlparse(local_rp.geturl()).path + + with test_resource_path.join("sample_rewards.h5").as_local() as local_rp: + rewards = urllib.parse.urlparse(local_rp.geturl()).path + + with test_resource_path.join("sample_scheduler.pickle.xz").as_local() as local_rp: + scheduler = urllib.parse.urlparse(local_rp.geturl()).path + + with TemporaryDirectory() as test_archive_dir: + test_archive_uri = f"file://{test_archive_dir}/" + make_sim_archive_cli( + "Test", + opsim, + "--rewards", + rewards, + "--scheduler", + scheduler, + "--archive_base_uri", + test_archive_uri, + ) From 6d8271a172903295abfdc8772b30f3c9fec56c03 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Wed, 3 Jan 2024 10:46:38 -0800 Subject: [PATCH 11/19] include conda package dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 52734b5a..9dd8eae9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ h5py requests shapely tqdm +conda From bdfd4fb4d9ac966215422c67f5fa2fdf58de0668 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Wed, 3 Jan 2024 15:09:12 -0800 Subject: [PATCH 12/19] Support getting sunset from local date --- rubin_scheduler/site_models/almanac.py | 90 ++++++++++++++++++++++++-- tests/site_models/test_almanac.py | 15 +++++ 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/rubin_scheduler/site_models/almanac.py b/rubin_scheduler/site_models/almanac.py index cce6fa7a..f92610a9 100644 --- a/rubin_scheduler/site_models/almanac.py +++ b/rubin_scheduler/site_models/almanac.py @@ -1,5 +1,6 @@ __all__ = ("Almanac",) +import datetime import os import numpy as np @@ -80,18 +81,97 @@ def get_planet_positions(self, mjd): result[pn + "_RA"] = temp_result return result - def get_sunset_info(self, mjd): + def get_sunset_info(self, mjd=None, evening_date=None, longitude=None): + """Returns a numpy array with mjds for various events + (sunset, moonrise, sun at -12 degrees alt, etc.). + + Parameters + ---------- + mjd : `float` + A UTC MJD that occurs during the desired night. + Defaults to None. + evening_date : `str` or `datetime.date` + The local date of the evening of the night whose index is desired, + in ISO8601 format (YYYY-MM-DD). Defaults to None. + longitude : `float` or `astropy.coordinates.angles.core.Angle` + If a float, then the value is interpreted as being in radians. + Defaults to None. + + Returns + ------- + sunset_info : `numpy.void` + A numpy object with dtype([ + ('night', '<i8'), + ('sunset', '<f8'), + ('sun_n12_setting', '<f8'), + ('sun_n18_setting', '<f8'), + ('sun_n18_rising', '<f8'), + ('sun_n12_rising', '<f8'), + ('sunrise', '<f8'), + ('moonrise', '<f8'), + ('moonset', '<f8') + ]) """ - Returns a numpy array with mjds for various events (sunset, moonrise, sun at -12 degrees alt, etc.). - Also the integer night number. - """ - indx = np.searchsorted(self.sunsets["sunset"], mjd, side="right") - 1 + if mjd is not None and evening_date is not None: + raise ValueError("At most one of mjd and evening_date can be set") + + # Default to now + if mjd is None and evening_date is None: + mjd = 40587 + datetime.datetime.now().timestamp() / (24 * 60 * 60) + + if mjd is not None: + indx = np.searchsorted(self.sunsets["sunset"], mjd, side="right") - 1 + elif evening_date is not None: + if longitude is None: + raise ValueError("If evening_date is set, longitude is needed as well") + indx = self.index_for_local_evening(evening_date, longitude) + return self.sunsets[indx] def mjd_indx(self, mjd): indx = np.searchsorted(self.sunsets["sunset"], mjd, side="right") - 1 return indx + def index_for_local_evening(self, evening_date, longitude): + """The index of the night with sunset at a given local date. + + Parameters + ---------- + evening_date : `str` or `datetime.date` + The local date of the evening of the night whose index is desired, + in ISO8601 format (YYYY-MM-DD). + longitude : `float` or `astropy.coordinates.angles.core.Angle` + If a float, then the value is interpreted as being in radians. + + Returns + ------- + night_index : `int` + The index of the requested night. + """ + try: + longitude = longitude.radian + except AttributeError: + pass + + if isinstance(evening_date, str): + evening_date = datetime.date.fromisoformat(evening_date) + + evening_datetime = datetime.datetime(evening_date.year, evening_date.month, evening_date.day) + evening_mjd = np.floor(evening_datetime.timestamp() / (24 * 60 * 60) + 40587) + + # Depending on the time of year, the UTC date rollover might not + # always be on the same side of local sunset. Shift by the longitude + # to make sure the rollover is always near midnight, far from sunset. + matching_nights = np.argwhere( + np.floor(self.sunsets["sunset"] + longitude / (2 * np.pi)) == evening_mjd + ) + if len(matching_nights) < 1: + raise ValueError(f"Requested night {evening_date} outside of almanac date range") + + night_index = matching_nights.item() + + return night_index + def get_sun_moon_positions(self, mjd): """ All angles in Radians. moonPhase between 0 and 100. diff --git a/tests/site_models/test_almanac.py b/tests/site_models/test_almanac.py index dbf3e1f5..00283ee2 100644 --- a/tests/site_models/test_almanac.py +++ b/tests/site_models/test_almanac.py @@ -1,5 +1,7 @@ import unittest +import numpy as np +import pandas as pd from rubin_scheduler.site_models import Almanac @@ -15,6 +17,19 @@ def test_alm(self): moon = alma.get_sun_moon_positions(mjd) indx = alma.mjd_indx(mjd) + def test_index_for_local_evening(self): + almanac = Almanac() + longitude = np.radians(-70.75) + test_dates = ["2024-12-22", "2024-03-15", "2024-06-22"] + for test_date in test_dates: + night_index = almanac.index_for_local_evening(test_date, longitude) + sunset_mjd = almanac.sunsets[night_index]["sunset"] + sunset_timestamp = (sunset_mjd - 40587) * 24 * 60 * 60 + sunset_local_iso = ( + pd.Timestamp(sunset_timestamp, tz="UTC", unit="s").tz_convert("Chile/Continental").isoformat() + ) + self.assertTrue(sunset_local_iso.startswith(test_date)) + if __name__ == "__main__": unittest.main() From f7bdb65dc4c78ea56f2eac8b29a62cdc4fbf6d5e Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Wed, 3 Jan 2024 15:09:54 -0800 Subject: [PATCH 13/19] driver to run sim and archive results in one command --- rubin_scheduler/sim_archive/sim_archive.py | 73 ++++++++++++++++++---- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 29d51af1..29695afa 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -7,6 +7,7 @@ "check_opsim_archive_resource", "read_archived_sim_metadata", "make_sim_archive_cli", + "drive_sim", ] import argparse @@ -14,12 +15,14 @@ import hashlib import json import logging +import lzma +import pickle import os import shutil import socket import sys from contextlib import redirect_stdout -from numbers import Number +from numbers import Number, Real, Integral from pathlib import Path from tempfile import TemporaryDirectory @@ -33,6 +36,7 @@ import rubin_scheduler from rubin_scheduler.scheduler.utils import SchemaConverter +from rubin_scheduler.scheduler import sim_runner from rubin_scheduler.utils import Site SITE = None @@ -171,22 +175,28 @@ def evening_local_date(mjd, longitude=site.longitude): simulation_dates = {} if "mjd_start" in sim_runner_kwargs: - simulation_dates["start"] = evening_local_date(sim_runner_kwargs["mjd_start"]) + simulation_dates["first"] = evening_local_date(sim_runner_kwargs["mjd_start"]) if "survey_length" in sim_runner_kwargs: - simulation_dates["end"] = evening_local_date( + simulation_dates["last"] = evening_local_date( sim_runner_kwargs["mjd_start"] + sim_runner_kwargs["survey_length"] ) else: - simulation_dates["start"] = evening_local_date(observations["mjd"].min()) - simulation_dates["end"] = evening_local_date(observations["mjd"].max()) + simulation_dates["first"] = evening_local_date(observations["mjd"].min()) + simulation_dates["last"] = evening_local_date(observations["mjd"].max()) if len(sim_runner_kwargs) > 0: opsim_metadata["sim_runner_kwargs"] = {} for key, value in sim_runner_kwargs.items(): + # Cast numpy number types to ints, floats, and reals to avoid + # confusing the yaml module. match value: - case bool() | Number() | str(): + case bool(): opsim_metadata["sim_runner_kwargs"][key] = value + case Integral(): + opsim_metadata["sim_runner_kwargs"][key] = int(value) + case Number(): + opsim_metadata["sim_runner_kwargs"][key] = float(value) case _: opsim_metadata["sim_runner_kwargs"][key] = str(value) @@ -311,11 +321,11 @@ def _build_archived_sim_label(base_uri, metadata_resource, metadata): try: sim_dates = metadata["simulated_dates"] - start_date = sim_dates["start"] - end_date = sim_dates["end"] - label = f"{label_base} of {start_date}" - if end_date != start_date: - label = f"{label} to {end_date}" + first_date = sim_dates["first"] + last_date = sim_dates["last"] + label = f"{label_base} of {first_date}" + if last_date != first_date: + label = f"{label} through {last_date}" except KeyError: label = label_base @@ -439,3 +449,44 @@ def make_sim_archive_cli(*args): sim_archive_uri = transfer_archive_dir(data_path.name, arg_values.archive_base_uri) return sim_archive_uri + + +def drive_sim(label, observatory, scheduler, archive_uri=None, tags=[], script=None, notebook=None, **kwargs): + in_files = {} + if script is not None: + in_files["script"] = script + + if notebook is not None: + in_files["notebook"] = notebook + + with TemporaryDirectory() as local_data_dir: + # We want to store the state of the scheduler at the start of the sim, + # so we need to save it now before we run the simulation. + scheduler_path = Path(local_data_dir).joinpath("scheduler.pickle.xz") + with lzma.open(scheduler_path, "wb", format=lzma.FORMAT_XZ) as pio: + pickle.dump(scheduler, pio) + in_files["scheduler"] = scheduler_path.name + + sim_results = sim_runner(observatory, scheduler, **kwargs) + + observations = sim_results[2] + reward_df = sim_results[3] if scheduler.keep_rewards else None + obs_rewards = sim_results[4] if scheduler.keep_rewards else None + + data_dir = make_sim_archive_dir( + observations, + reward_df=reward_df, + obs_rewards=obs_rewards, + in_files=in_files, + sim_runner_kwargs=kwargs, + tags=tags, + label=label, + capture_env=True, + ) + + if archive_uri is not None: + resource_path = transfer_archive_dir(data_dir.name, archive_uri) + else: + resource_path = ResourcePath(data_dir.name, forceDirctory=True) + + return resource_path From 7a50d30333e2a1fe74c2c4f9b68b1ea977a1abd6 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Thu, 4 Jan 2024 08:33:58 -0800 Subject: [PATCH 14/19] match api if drive_sim to sim_runner --- rubin_scheduler/sim_archive/sim_archive.py | 67 +++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 29695afa..9b927b74 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -451,7 +451,69 @@ def make_sim_archive_cli(*args): return sim_archive_uri -def drive_sim(label, observatory, scheduler, archive_uri=None, tags=[], script=None, notebook=None, **kwargs): +def drive_sim( + observatory, scheduler, archive_uri=None, label=None, tags=[], script=None, notebook=None, **kwargs +): + """Run a simulation and archive the results. + + Parameters + ---------- + observatory : `ModelObservatory` + The model for the observatory. + scheduler : `CoreScheduler` + The scheduler to use. + archive_uri : `str`, optional + The root URI of the archive resource into which the results should + be stored. Defaults to None. + label : `str`, optional + The label for the simulation in the archive. Defaults to None. + tags : `list` of `str`, optional + The tags for the simulation in the archive. Defaults to an empty list. + script : `str` + The filename of the script producing this simulation. + Defaults to None. + notebook : `str`, optional + The filename of the notebook producing the simulation. + Defaults to None. + + Returns + ------- + observatory : `ModelObservatory` + The model for the observatory. + scheduler : `CoreScheduler` + The scheduler used. + observations : `foo` + The observations produced. + reward_df : `pandas.DataFrame`, optional + The table of rewards. Present if `record_rewards` + or `scheduler.keep_rewards` is True. + obs_rewards : `pandas.Series`, optional + The mapping of entries in reward_df to observations. Present if + `record_rewards` or `scheduler.keep_rewards` is True. + resource_path : `ResourcePath`, optional + The resource path to the archive of the simulation. Present if + `archive_uri` was set. + + Notes + ----- + Additional parameters not described above will be passed into + `sim_runner`. + + If the `archive_uri` parameter is not supplied, `sim_runner` is run + directly, so that `drive_sim` can act as a drop-in replacement of + `sim-runner`. + + In a jupyter notebook, the notebook can be saved for the notebook paramater + using `%notebook $notebook_fname` (where `notebook_fname` is variable + holding the filename for the notebook) in the cell prior to calling + `drive_sim`. + """ + if 'record_rewards' in kwargs: + if kwargs['record_rewards'] and not scheduler.keep_rewards: + raise ValueError("To keep rewards, scheduler.keep_rewards must be True") + else: + kwargs['record_rewards'] = scheduler.keep_rewards + in_files = {} if script is not None: in_files["script"] = script @@ -489,4 +551,5 @@ def drive_sim(label, observatory, scheduler, archive_uri=None, tags=[], script=N else: resource_path = ResourcePath(data_dir.name, forceDirctory=True) - return resource_path + results = sim_results + (resource_path,) + return results From 05c0ae7c98684191045f7027ca9d06cdde7c73e2 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Thu, 4 Jan 2024 12:44:02 -0800 Subject: [PATCH 15/19] improve help for make_sim_archive --- rubin_scheduler/sim_archive/sim_archive.py | 26 ++++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index 9b927b74..d69a9d6c 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -392,12 +392,24 @@ def make_sim_archive_cli(*args): parser.add_argument( "--scheduler_version", type=str, - help="The version of the scheduler run.", + default=None, + help="The version of the scheduler that producte the opsim database.", ) - parser.add_argument("--scheduler", type=str, default=None, help="A snapshot of the scheduler.") - parser.add_argument("--script", type=str, default=None, help="The script run to create the simulation.") parser.add_argument( - "--notebook", type=str, default=None, help="The notebook run to create the simulation." + "--scheduler", + type=str, + default=None, + help="A snapshot of the scheduler used to produce the database, at the start of the simulation.", + ) + parser.add_argument( + "--script", type=str, default=None, help="The file name of the script run to create the simulation." + ) + parser.add_argument( + "--notebook", + type=str, + default=None, + help="""The file name of the notebook run to create the simulation. + This can be produced using the %notebook magic.""", ) parser.add_argument( "--current_env", @@ -508,11 +520,11 @@ def drive_sim( holding the filename for the notebook) in the cell prior to calling `drive_sim`. """ - if 'record_rewards' in kwargs: - if kwargs['record_rewards'] and not scheduler.keep_rewards: + if "record_rewards" in kwargs: + if kwargs["record_rewards"] and not scheduler.keep_rewards: raise ValueError("To keep rewards, scheduler.keep_rewards must be True") else: - kwargs['record_rewards'] = scheduler.keep_rewards + kwargs["record_rewards"] = scheduler.keep_rewards in_files = {} if script is not None: From bf9afedc5365d6b7164d569cc659817250e911d6 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Fri, 5 Jan 2024 09:03:53 -0800 Subject: [PATCH 16/19] be robust wrt absent rewards in archive --- rubin_scheduler/sim_archive/sim_archive.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rubin_scheduler/sim_archive/sim_archive.py b/rubin_scheduler/sim_archive/sim_archive.py index d69a9d6c..ec1b9d44 100644 --- a/rubin_scheduler/sim_archive/sim_archive.py +++ b/rubin_scheduler/sim_archive/sim_archive.py @@ -22,7 +22,7 @@ import socket import sys from contextlib import redirect_stdout -from numbers import Number, Real, Integral +from numbers import Number, Integral from pathlib import Path from tempfile import TemporaryDirectory @@ -404,12 +404,14 @@ def make_sim_archive_cli(*args): parser.add_argument( "--script", type=str, default=None, help="The file name of the script run to create the simulation." ) + + notebook_help = "The file name of the notebook run to create the simulation." + notebook_help = notebook_help + " This can be produced using the %%notebook magic." parser.add_argument( "--notebook", type=str, default=None, - help="""The file name of the notebook run to create the simulation. - This can be produced using the %notebook magic.""", + help=notebook_help, ) parser.add_argument( "--current_env", @@ -437,6 +439,9 @@ def make_sim_archive_cli(*args): obs_rewards = pd.read_hdf(arg_values.rewards, "obs_rewards") except KeyError: obs_rewards = None + else: + reward_df = None + obs_rewards = None filename_args = ["scheduler", "script", "notebook"] in_files = {} From 554b4cc421029bfcedacc2a80ed3264516f11f59 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Fri, 5 Jan 2024 09:46:56 -0800 Subject: [PATCH 17/19] set nside in DDFs so roi works correctly --- rubin_scheduler/scheduler/example/example_scheduler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rubin_scheduler/scheduler/example/example_scheduler.py b/rubin_scheduler/scheduler/example/example_scheduler.py index 0f638e28..9d988d08 100644 --- a/rubin_scheduler/scheduler/example/example_scheduler.py +++ b/rubin_scheduler/scheduler/example/example_scheduler.py @@ -1087,10 +1087,10 @@ def ddf_surveys(detailers=None, season_unobs_frac=0.2, euclid_detailers=None, ns euclid_obs = np.where((obs_array["note"] == "DD:EDFS_b") | (obs_array["note"] == "DD:EDFS_a"))[0] all_other = np.where((obs_array["note"] != "DD:EDFS_b") & (obs_array["note"] != "DD:EDFS_a"))[0] - survey1 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], detailers=detailers) + survey1 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], nside=nside, detailers=detailers) survey1.set_script(obs_array[all_other]) - survey2 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], detailers=euclid_detailers) + survey2 = ScriptedSurvey([bf.AvoidDirectWind(nside=nside)], nside=nside, detailers=euclid_detailers) survey2.set_script(obs_array[euclid_obs]) return [survey1, survey2] From 13b1f3256d5b6f10369685b2a5809c4391182dc6 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Fri, 5 Jan 2024 11:24:11 -0800 Subject: [PATCH 18/19] install lsst.resources in github actions --- .github/workflows/build_docs.yaml | 1 + .github/workflows/test_and_build.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml index bd4313ef..31088c69 100644 --- a/.github/workflows/build_docs.yaml +++ b/.github/workflows/build_docs.yaml @@ -30,6 +30,7 @@ jobs: run: | mamba install --quiet --file=requirements.txt mamba install --quiet pip + pip install lsst.resources pip install "documenteer[guide]" - name: install rubin_scheduler diff --git a/.github/workflows/test_and_build.yaml b/.github/workflows/test_and_build.yaml index 2ccbb5da..c920482b 100644 --- a/.github/workflows/test_and_build.yaml +++ b/.github/workflows/test_and_build.yaml @@ -39,6 +39,7 @@ jobs: run: | mamba install --quiet --file=requirements.txt mamba install --quiet --file=test-requirements.txt + pip install lsst.resources - name: install rubin_scheduler shell: bash -l {0} From b05caeac308d46a379e2eeef4c57afa6ef731776 Mon Sep 17 00:00:00 2001 From: Eric Neilsen <neilsen@fnal.gov> Date: Fri, 5 Jan 2024 12:07:02 -0800 Subject: [PATCH 19/19] skip test that requires schedview if it is not installed. --- tests/sim_archive/test_sim_archive.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/sim_archive/test_sim_archive.py b/tests/sim_archive/test_sim_archive.py index 0ecbf8ad..94fbfee6 100644 --- a/tests/sim_archive/test_sim_archive.py +++ b/tests/sim_archive/test_sim_archive.py @@ -1,3 +1,4 @@ +import importlib import lzma import pickle import unittest @@ -91,6 +92,7 @@ def test_sim_archive(self): expected_label = f"{base} test" self.assertEqual(archive_metadata[sim_archive_uri.geturl()]["label"], expected_label) + @unittest.skipIf(importlib.util.find_spec("schedview") is None, "No schedview") def test_cli(self): test_resource_path = lsst.resources.ResourcePath("resource://schedview/data/") with test_resource_path.join("sample_opsim.db").as_local() as local_rp: