From a2729625dea6261d551f9d8dfd83a8d13bc221a0 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Tue, 9 Mar 2021 17:21:43 -0800
Subject: [PATCH 001/108] refactor: use pandas for local scenario list
 operations

---
 powersimdata/data_access/scenario_list.py | 54 +++++++++--------------
 1 file changed, 22 insertions(+), 32 deletions(-)

diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 070f13734..164b2fb89 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -1,3 +1,4 @@
+import os
 import posixpath
 from collections import OrderedDict
 
@@ -99,20 +100,8 @@ def generate_scenario_id(self):
 
         :return: (*str*) -- new scenario id.
         """
-        print("--> Generating scenario id")
-        command = "(flock -x 200; \
-                   id=$(awk -F',' 'END{print $1+1}' %s); \
-                   echo $id, >> %s; \
-                   echo $id) 200>%s" % (
-            self._server_path,
-            self._server_path,
-            posixpath.join(server_setup.DATA_ROOT_DIR, "scenario.lockfile"),
-        )
-
-        err_message = "Failed to generate id for new scenario"
-        command_output = self._execute_and_check_err(command, err_message)
-        scenario_id = command_output[0].splitlines()[0]
-        return scenario_id
+        table = self.get_table(self._SCENARIO_LIST)
+        return str(table.index.max() + 1)
 
     def get_scenario(self, descriptor):
         """Get information for a scenario based on id or name
@@ -148,34 +137,35 @@ def err_message(text):
                 .to_dict("records", into=OrderedDict)[0]
             )
 
+    def _save_file(self, table):
+        """Save to local directory
+
+        :param pandas.DataFrame table: the scenario list data frame
+        """
+        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._SCENARIO_LIST))
+
     def add_entry(self, scenario_info):
         """Adds scenario to the scenario list file on server.
 
         :param collections.OrderedDict scenario_info: entry to add to scenario list.
         """
-        print("--> Adding entry in %s on server" % self._SCENARIO_LIST)
-        entry = ",".join(scenario_info.values())
-        options = "-F, -v INPLACE_SUFFIX=.bak -i inplace"
-        # AWK parses the file line-by-line. When the entry of the first column is
-        # equal to the scenario identification number, the entire line is replaced
-        # by the scenaario information.
-        program = "'{if($1==%s) $0=\"%s\"};1'" % (
-            scenario_info["id"],
-            entry,
-        )
-        command = "awk %s %s %s" % (options, program, self._server_path)
+        table = self.get_table(self._SCENARIO_LIST)
+        table.reset_index()
+        table.append(scenario_info)
+        self._save_file(table)
 
-        err_message = "Failed to add entry in %s on server" % self._SCENARIO_LIST
-        _ = self._execute_and_check_err(command, err_message)
+        print("--> Adding entry in %s on server" % self._SCENARIO_LIST)
+        self.data_access.move_to(self._SCENARIO_LIST)
 
     def delete_entry(self, scenario_info):
         """Deletes entry in scenario list.
 
         :param collections.OrderedDict scenario_info: entry to delete from scenario list.
         """
-        print("--> Deleting entry in %s on server" % self._SCENARIO_LIST)
-        entry = ",".join(scenario_info.values())
-        command = "sed -i.bak '/%s/d' %s" % (entry, self._server_path)
+        table = self.get_table(self._SCENARIO_LIST)
+        scenario_id = int(scenario_info["id"])
+        table.drop(scenario_id)
+        self._save_file(table)
 
-        err_message = "Failed to delete entry in %s on server" % self._SCENARIO_LIST
-        _ = self._execute_and_check_err(command, err_message)
+        print("--> Deleting entry in %s on server" % self._SCENARIO_LIST)
+        self.data_access.move_to(self._SCENARIO_LIST)

From 1914d519674a35d113bec620376e4af3ef2a0664 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Tue, 9 Mar 2021 18:02:18 -0800
Subject: [PATCH 002/108] refactor: use pandas for local execute list
 operations

---
 powersimdata/data_access/execute_list.py      | 56 ++++++++++---------
 powersimdata/data_access/scenario_list.py     |  6 +-
 .../data_access/tests/test_execute_table.py   |  5 +-
 powersimdata/scenario/execute.py              | 18 +++---
 powersimdata/scenario/move.py                 |  3 +-
 5 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index 95723cb4e..83b134e95 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -1,3 +1,4 @@
+import os
 import posixpath
 
 from powersimdata.data_access.csv_store import CsvStore
@@ -50,15 +51,15 @@ def add_entry(self, scenario_info):
             ),
         )
 
-    def update_execute_list(self, status, scenario_info):
+    def set_status(self, scenario_id, status):
         """Updates status of scenario in execute list
 
+        :param int scenario_id: the scenario id
         :param str status: execution status.
-        :param collections.OrderedDict scenario_info: entry to update
         """
         self.cur.execute(
             "UPDATE execute_list SET status = %s WHERE id = %s",
-            (status, scenario_info["id"]),
+            (status, scenario_id),
         )
 
     def delete_entry(self, scenario_info):
@@ -85,6 +86,13 @@ def __init__(self, ssh_client):
             server_setup.DATA_ROOT_DIR, self._EXECUTE_LIST
         )
 
+    def _save_file(self, table):
+        """Save to local directory
+
+        :param pandas.DataFrame table: the execute list data frame
+        """
+        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._EXECUTE_LIST))
+
     def get_execute_table(self):
         """Returns execute table from server if possible, otherwise read local
         copy. Updates the local copy upon successful server connection.
@@ -107,39 +115,35 @@ def get_status(self, scenario_id):
             raise Exception(f"Scenario not found in execute list, id = {scenario_id}")
 
     def add_entry(self, scenario_info):
-        """Adds scenario to the execute list file on server.
+        """Add entry to execute list
 
         :param collections.OrderedDict scenario_info: entry to add
         """
-        print("--> Adding entry in execute table on server")
-        entry = "%s,created" % scenario_info["id"]
-        command = "echo %s >> %s" % (entry, self._server_path)
-        err_message = "Failed to update %s on server" % self._EXECUTE_LIST
-        _ = self._execute_and_check_err(command, err_message)
+        scenario_id = int(scenario_info["id"])
+        return self.set_status(scenario_id, "created")
 
-    def update_execute_list(self, status, scenario_info):
-        """Updates status in execute list file on server.
+    def set_status(self, scenario_id, status):
+        """Set the scenario status
 
-        :param str status: execution status.
-        :param collections.OrderedDict scenario_info: entry to update
+        :param int scenario_id: the scenario id
+        :param str status: the new status
         """
-        print("--> Updating status in execute table on server")
-        options = "-F, -v OFS=',' -v INPLACE_SUFFIX=.bak -i inplace"
-        # AWK parses the file line-by-line. When the entry of the first column is equal
-        # to the scenario identification number, the second column is replaced by the
-        # status parameter.
-        program = "'{if($1==%s) $2=\"%s\"};1'" % (scenario_info["id"], status)
-        command = "awk %s %s %s" % (options, program, self._server_path)
-        err_message = "Failed to update %s on server" % self._EXECUTE_LIST
-        _ = self._execute_and_check_err(command, err_message)
+        table = self.get_execute_table()
+        table.loc[scenario_id, "status"] = status
+        self._save_file(table)
+
+        print(f"-->  Setting status={status} in execute table on server")
+        self.data_access.move_to(self._EXECUTE_LIST)
 
     def delete_entry(self, scenario_info):
         """Deletes entry from execute list on server.
 
         :param collections.OrderedDict scenario_info: entry to delete
         """
+        table = self.get_execute_table()
+        scenario_id = int(scenario_info["id"])
+        table.drop(scenario_id)
+        self._save_file(table)
+
         print("--> Deleting entry in execute table on server")
-        entry = "^%s,extracted" % scenario_info["id"]
-        command = "sed -i.bak '/%s/d' %s" % (entry, self._server_path)
-        err_message = "Failed to delete entry in %s on server" % self._EXECUTE_LIST
-        _ = self._execute_and_check_err(command, err_message)
+        self.data_access.move_to(self._EXECUTE_LIST)
diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 164b2fb89..1783ba496 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -100,7 +100,7 @@ def generate_scenario_id(self):
 
         :return: (*str*) -- new scenario id.
         """
-        table = self.get_table(self._SCENARIO_LIST)
+        table = self.get_scenario_table()
         return str(table.index.max() + 1)
 
     def get_scenario(self, descriptor):
@@ -149,7 +149,7 @@ def add_entry(self, scenario_info):
 
         :param collections.OrderedDict scenario_info: entry to add to scenario list.
         """
-        table = self.get_table(self._SCENARIO_LIST)
+        table = self.get_scenario_table()
         table.reset_index()
         table.append(scenario_info)
         self._save_file(table)
@@ -162,7 +162,7 @@ def delete_entry(self, scenario_info):
 
         :param collections.OrderedDict scenario_info: entry to delete from scenario list.
         """
-        table = self.get_table(self._SCENARIO_LIST)
+        table = self.get_scenario_table()
         scenario_id = int(scenario_info["id"])
         table.drop(scenario_id)
         self._save_file(table)
diff --git a/powersimdata/data_access/tests/test_execute_table.py b/powersimdata/data_access/tests/test_execute_table.py
index b511c01f9..a38e8d92e 100644
--- a/powersimdata/data_access/tests/test_execute_table.py
+++ b/powersimdata/data_access/tests/test_execute_table.py
@@ -73,8 +73,9 @@ def test_add_entry(store):
 def test_update_entry(store):
     info = _get_test_row()
     store.add_entry(info)
-    store.update_execute_list("testing", info)
-    status = store.get_status(info["id"])
+    sid = info["id"]
+    store.set_status(sid, "testing")
+    status = store.get_status(sid)
     assert status.loc[0, "status"] == "testing"
 
 
diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py
index 323b8c374..3e49c02e2 100644
--- a/powersimdata/scenario/execute.py
+++ b/powersimdata/scenario/execute.py
@@ -27,6 +27,8 @@ def __init__(self, scenario):
         """Constructor."""
         self._scenario_info = scenario.info
         self._scenario_status = scenario.status
+        self._scenario_id = int(self._scenario_info["id"])
+
         super().__init__(scenario)
 
         print(
@@ -68,13 +70,13 @@ def get_grid(self):
 
     def _update_scenario_status(self):
         """Updates scenario status."""
-        scenario_id = self._scenario_info["id"]
-        self._scenario_status = self._execute_list_manager.get_status(scenario_id)
+        self._scenario_status = self._execute_list_manager.get_status(self._scenario_id)
 
     def _update_scenario_info(self):
         """Updates scenario information."""
-        scenario_id = self._scenario_info["id"]
-        self._scenario_info = self._scenario_list_manager.get_scenario(scenario_id)
+        self._scenario_info = self._scenario_list_manager.get_scenario(
+            self._scenario_id
+        )
 
     def _run_script(self, script, extra_args=None):
         """Returns running process
@@ -152,9 +154,7 @@ def prepare_simulation_input(self, profiles_as=None):
 
             si.prepare_mpc_file()
 
-            self._execute_list_manager.update_execute_list(
-                "prepared", self._scenario_info
-            )
+            self._execute_list_manager.set_status(self._scenario_id, "prepared")
         else:
             print("---------------------------")
             print("SCENARIO CANNOT BE PREPARED")
@@ -211,7 +211,7 @@ def _launch_in_container(self, threads, solver):
             None, which translates to gurobi
         :return: (*requests.Response*) -- the http response object
         """
-        scenario_id = self._scenario_info["id"]
+        scenario_id = self._scenario_id
         url = f"http://{server_setup.SERVER_ADDRESS}:5000/launch/{scenario_id}"
         resp = requests.post(url, params={"threads": threads, "solver": solver})
         if resp.status_code != 200:
@@ -274,7 +274,7 @@ def check_progress(self):
         if mode != DeploymentMode.Container:
             raise NotImplementedError("Operation only supported for container mode")
 
-        scenario_id = self._scenario_info["id"]
+        scenario_id = self._scenario_id
         url = f"http://{server_setup.SERVER_ADDRESS}:5000/status/{scenario_id}"
         resp = requests.get(url)
         return resp.json()
diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py
index b9e6606f6..53df4c3d1 100644
--- a/powersimdata/scenario/move.py
+++ b/powersimdata/scenario/move.py
@@ -39,7 +39,8 @@ def move_scenario(self, target="disk"):
         backup.move_output_data()
         backup.move_temporary_folder()
 
-        self._execute_list_manager.update_execute_list("moved", self._scenario_info)
+        sid = int(self._scenario_info["id"])
+        self._execute_list_manager.set_status(sid, "moved")
 
         # Delete attributes
         self._clean()

From df4dda01f89fb5dd0af5bfe996693eebbe5fa48a Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 10 Mar 2021 12:59:17 -0800
Subject: [PATCH 003/108] fix: bugs found while testing

---
 powersimdata/data_access/data_access.py   | 12 ++++++++----
 powersimdata/data_access/execute_list.py  |  8 ++++----
 powersimdata/data_access/scenario_list.py | 16 +++++++++++-----
 powersimdata/scenario/execute.py          | 16 ++++++++++------
 4 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 16c5e8e9c..2c474984f 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -116,19 +116,22 @@ def copy_from(self, file_name, from_dir=None):
         """
         pass
 
-    def move_to(self, file_name, to_dir, change_name_to=None):
+    def move_to(self, file_name, to_dir=None, change_name_to=None, force=False):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
         """
+        if to_dir is None:  # already symlinked via dockerfile
+            return
         self._check_filename(file_name)
         src = posixpath.join(server_setup.LOCAL_DIR, file_name)
         file_name = file_name if change_name_to is None else change_name_to
         dest = posixpath.join(self.root, to_dir, file_name)
         print(f"--> Moving file {src} to {dest}")
-        self._check_file_exists(dest, should_exist=False)
+        if not force:
+            self._check_file_exists(dest, should_exist=False)
         self.copy(src, dest)
         self.remove(src)
 
@@ -235,7 +238,7 @@ def copy_from(self, file_name, from_dir=None):
             sftp.get(from_path, to_path, callback=cbk)
             bar.close()
 
-    def move_to(self, file_name, to_dir=None, change_name_to=None):
+    def move_to(self, file_name, to_dir=None, change_name_to=None, force=False):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
@@ -255,7 +258,8 @@ def move_to(self, file_name, to_dir=None, change_name_to=None):
         to_dir = "" if to_dir is None else to_dir
         to_path = posixpath.join(self.root, to_dir, file_name)
         self.makedir(to_dir)
-        self._check_file_exists(to_path, should_exist=False)
+        if not force:
+            self._check_file_exists(to_path, should_exist=False)
 
         with self.ssh.open_sftp() as sftp:
             print(f"Transferring {from_path} to server")
diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index 83b134e95..004793217 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -125,15 +125,15 @@ def add_entry(self, scenario_info):
     def set_status(self, scenario_id, status):
         """Set the scenario status
 
-        :param int scenario_id: the scenario id
+        :param int/str scenario_id: the scenario id
         :param str status: the new status
         """
         table = self.get_execute_table()
-        table.loc[scenario_id, "status"] = status
+        table.loc[int(scenario_id), "status"] = status
         self._save_file(table)
 
         print(f"-->  Setting status={status} in execute table on server")
-        self.data_access.move_to(self._EXECUTE_LIST)
+        self.data_access.move_to(self._EXECUTE_LIST, force=True)
 
     def delete_entry(self, scenario_info):
         """Deletes entry from execute list on server.
@@ -146,4 +146,4 @@ def delete_entry(self, scenario_info):
         self._save_file(table)
 
         print("--> Deleting entry in execute table on server")
-        self.data_access.move_to(self._EXECUTE_LIST)
+        self.data_access.move_to(self._EXECUTE_LIST, force=True)
diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 1783ba496..71080e04b 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -2,6 +2,8 @@
 import posixpath
 from collections import OrderedDict
 
+import pandas as pd
+
 from powersimdata.data_access.csv_store import CsvStore
 from powersimdata.data_access.sql_store import SqlStore, to_data_frame
 from powersimdata.utility import server_setup
@@ -101,7 +103,9 @@ def generate_scenario_id(self):
         :return: (*str*) -- new scenario id.
         """
         table = self.get_scenario_table()
-        return str(table.index.max() + 1)
+        max_value = table.index.max()
+        result = 1 if pd.isna(max_value) else max_value + 1
+        return str(result)
 
     def get_scenario(self, descriptor):
         """Get information for a scenario based on id or name
@@ -150,12 +154,14 @@ def add_entry(self, scenario_info):
         :param collections.OrderedDict scenario_info: entry to add to scenario list.
         """
         table = self.get_scenario_table()
-        table.reset_index()
-        table.append(scenario_info)
+        table.reset_index(inplace=True)
+        entry = pd.DataFrame({k: [v] for k, v in scenario_info.items()})
+        table = table.append(entry)
+        table.set_index("id", inplace=True)
         self._save_file(table)
 
         print("--> Adding entry in %s on server" % self._SCENARIO_LIST)
-        self.data_access.move_to(self._SCENARIO_LIST)
+        self.data_access.move_to(self._SCENARIO_LIST, force=True)
 
     def delete_entry(self, scenario_info):
         """Deletes entry in scenario list.
@@ -168,4 +174,4 @@ def delete_entry(self, scenario_info):
         self._save_file(table)
 
         print("--> Deleting entry in %s on server" % self._SCENARIO_LIST)
-        self.data_access.move_to(self._SCENARIO_LIST)
+        self.data_access.move_to(self._SCENARIO_LIST, force=True)
diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py
index 3e49c02e2..b0a8c2d43 100644
--- a/powersimdata/scenario/execute.py
+++ b/powersimdata/scenario/execute.py
@@ -27,7 +27,6 @@ def __init__(self, scenario):
         """Constructor."""
         self._scenario_info = scenario.info
         self._scenario_status = scenario.status
-        self._scenario_id = int(self._scenario_info["id"])
 
         super().__init__(scenario)
 
@@ -40,6 +39,9 @@ def __init__(self, scenario):
 
         self._set_ct_and_grid()
 
+    def _scenario_id(self):
+        return self._scenario_info["id"]
+
     def _set_ct_and_grid(self):
         """Sets change table and grid."""
         base_grid = Grid(
@@ -70,12 +72,14 @@ def get_grid(self):
 
     def _update_scenario_status(self):
         """Updates scenario status."""
-        self._scenario_status = self._execute_list_manager.get_status(self._scenario_id)
+        self._scenario_status = self._execute_list_manager.get_status(
+            self._scenario_id()
+        )
 
     def _update_scenario_info(self):
         """Updates scenario information."""
         self._scenario_info = self._scenario_list_manager.get_scenario(
-            self._scenario_id
+            self._scenario_id()
         )
 
     def _run_script(self, script, extra_args=None):
@@ -154,7 +158,7 @@ def prepare_simulation_input(self, profiles_as=None):
 
             si.prepare_mpc_file()
 
-            self._execute_list_manager.set_status(self._scenario_id, "prepared")
+            self._execute_list_manager.set_status(self._scenario_id(), "prepared")
         else:
             print("---------------------------")
             print("SCENARIO CANNOT BE PREPARED")
@@ -211,7 +215,7 @@ def _launch_in_container(self, threads, solver):
             None, which translates to gurobi
         :return: (*requests.Response*) -- the http response object
         """
-        scenario_id = self._scenario_id
+        scenario_id = self._scenario_id()
         url = f"http://{server_setup.SERVER_ADDRESS}:5000/launch/{scenario_id}"
         resp = requests.post(url, params={"threads": threads, "solver": solver})
         if resp.status_code != 200:
@@ -274,7 +278,7 @@ def check_progress(self):
         if mode != DeploymentMode.Container:
             raise NotImplementedError("Operation only supported for container mode")
 
-        scenario_id = self._scenario_id
+        scenario_id = self._scenario_id()
         url = f"http://{server_setup.SERVER_ADDRESS}:5000/status/{scenario_id}"
         resp = requests.get(url)
         return resp.json()

From 58c177f5d313784366ae0ea6ef5073a8d23c9501 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 10 Mar 2021 13:34:22 -0800
Subject: [PATCH 004/108] fix: enable module import for test purposes

---
 powersimdata/utility/templates/__init__.py | 0
 setup.py                                   | 1 +
 2 files changed, 1 insertion(+)
 create mode 100644 powersimdata/utility/templates/__init__.py

diff --git a/powersimdata/utility/templates/__init__.py b/powersimdata/utility/templates/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/setup.py b/setup.py
index 68aab9ab4..083865c04 100644
--- a/setup.py
+++ b/setup.py
@@ -23,6 +23,7 @@
             "network/*/data/*.csv",
             "design/investment/data/*.csv",
             "design/investment/data/*/*",
+            "utility/templates/*.csv",
         ]
     },
     zip_safe=False,

From 32ce0d5a14b23af40659d6a9f26c3ba047367f3f Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 10 Mar 2021 13:59:31 -0800
Subject: [PATCH 005/108] chore: remove old code, reuse save method, misc
 cleanup

---
 powersimdata/data_access/csv_store.py     | 22 +++++----------------
 powersimdata/data_access/execute_list.py  | 20 +++++--------------
 powersimdata/data_access/scenario_list.py | 24 +++++++----------------
 powersimdata/scenario/execute.py          |  1 -
 powersimdata/scenario/move.py             |  2 +-
 5 files changed, 18 insertions(+), 51 deletions(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index f2131f3de..05125b544 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 
 import pandas as pd
@@ -47,22 +48,9 @@ def _parse_csv(self, file_object):
         table.fillna("", inplace=True)
         return table.astype(str)
 
-    def _execute_and_check_err(self, command, err_message):
-        """Executes command and checks for error.
+    def _save_file(self, table):
+        """Save to local directory
 
-        :param str command: command to execute over ssh.
-        :param str err_message: error message to be raised.
-        :raises IOError: if command is not successfully executed.
-        :return: (*list*) -- list of command output.
+        :param pandas.DataFrame table: the data frame to save
         """
-        stdin, stdout, stderr = self.data_access.execute_command(command)
-        command_output = stdout.readlines()
-        command_error = stderr.readlines()
-        if len(command_error) != 0:
-            command_error = [
-                i.replace("\t", " ").replace("\n", "") for i in command_error
-            ]
-            for ce in command_error:
-                print(ce)
-            raise IOError(err_message)
-        return command_output
+        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index 004793217..60073b965 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -1,4 +1,3 @@
-import os
 import posixpath
 
 from powersimdata.data_access.csv_store import CsvStore
@@ -77,21 +76,12 @@ class ExecuteListManager(CsvStore):
     :param paramiko.client.SSHClient ssh_client: session with an SSH server.
     """
 
-    _EXECUTE_LIST = "ExecuteList.csv"
+    _FILE_NAME = "ExecuteList.csv"
 
     def __init__(self, ssh_client):
         """Constructor"""
         super().__init__(ssh_client)
-        self._server_path = posixpath.join(
-            server_setup.DATA_ROOT_DIR, self._EXECUTE_LIST
-        )
-
-    def _save_file(self, table):
-        """Save to local directory
-
-        :param pandas.DataFrame table: the execute list data frame
-        """
-        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._EXECUTE_LIST))
+        self._server_path = posixpath.join(server_setup.DATA_ROOT_DIR, self._FILE_NAME)
 
     def get_execute_table(self):
         """Returns execute table from server if possible, otherwise read local
@@ -99,7 +89,7 @@ def get_execute_table(self):
 
         :return: (*pandas.DataFrame*) -- execute list as a data frame.
         """
-        return self.get_table(self._EXECUTE_LIST)
+        return self.get_table(self._FILE_NAME)
 
     def get_status(self, scenario_id):
         """Return the status for the scenario
@@ -133,7 +123,7 @@ def set_status(self, scenario_id, status):
         self._save_file(table)
 
         print(f"-->  Setting status={status} in execute table on server")
-        self.data_access.move_to(self._EXECUTE_LIST, force=True)
+        self.data_access.move_to(self._FILE_NAME, force=True)
 
     def delete_entry(self, scenario_info):
         """Deletes entry from execute list on server.
@@ -146,4 +136,4 @@ def delete_entry(self, scenario_info):
         self._save_file(table)
 
         print("--> Deleting entry in execute table on server")
-        self.data_access.move_to(self._EXECUTE_LIST, force=True)
+        self.data_access.move_to(self._FILE_NAME, force=True)
diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 71080e04b..16af72b00 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -1,4 +1,3 @@
-import os
 import posixpath
 from collections import OrderedDict
 
@@ -80,14 +79,12 @@ class ScenarioListManager(CsvStore):
     :param paramiko.client.SSHClient ssh_client: session with an SSH server.
     """
 
-    _SCENARIO_LIST = "ScenarioList.csv"
+    _FILE_NAME = "ScenarioList.csv"
 
     def __init__(self, ssh_client):
         """Constructor"""
         super().__init__(ssh_client)
-        self._server_path = posixpath.join(
-            server_setup.DATA_ROOT_DIR, self._SCENARIO_LIST
-        )
+        self._server_path = posixpath.join(server_setup.DATA_ROOT_DIR, self._FILE_NAME)
 
     def get_scenario_table(self):
         """Returns scenario table from server if possible, otherwise read local
@@ -95,7 +92,7 @@ def get_scenario_table(self):
 
         :return: (*pandas.DataFrame*) -- scenario list as a data frame.
         """
-        return self.get_table(self._SCENARIO_LIST)
+        return self.get_table(self._FILE_NAME)
 
     def generate_scenario_id(self):
         """Generates scenario id.
@@ -141,13 +138,6 @@ def err_message(text):
                 .to_dict("records", into=OrderedDict)[0]
             )
 
-    def _save_file(self, table):
-        """Save to local directory
-
-        :param pandas.DataFrame table: the scenario list data frame
-        """
-        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._SCENARIO_LIST))
-
     def add_entry(self, scenario_info):
         """Adds scenario to the scenario list file on server.
 
@@ -160,8 +150,8 @@ def add_entry(self, scenario_info):
         table.set_index("id", inplace=True)
         self._save_file(table)
 
-        print("--> Adding entry in %s on server" % self._SCENARIO_LIST)
-        self.data_access.move_to(self._SCENARIO_LIST, force=True)
+        print("--> Adding entry in %s on server" % self._FILE_NAME)
+        self.data_access.move_to(self._FILE_NAME, force=True)
 
     def delete_entry(self, scenario_info):
         """Deletes entry in scenario list.
@@ -173,5 +163,5 @@ def delete_entry(self, scenario_info):
         table.drop(scenario_id)
         self._save_file(table)
 
-        print("--> Deleting entry in %s on server" % self._SCENARIO_LIST)
-        self.data_access.move_to(self._SCENARIO_LIST, force=True)
+        print("--> Deleting entry in %s on server" % self._FILE_NAME)
+        self.data_access.move_to(self._FILE_NAME, force=True)
diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py
index b0a8c2d43..4e29622c8 100644
--- a/powersimdata/scenario/execute.py
+++ b/powersimdata/scenario/execute.py
@@ -27,7 +27,6 @@ def __init__(self, scenario):
         """Constructor."""
         self._scenario_info = scenario.info
         self._scenario_status = scenario.status
-
         super().__init__(scenario)
 
         print(
diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py
index 53df4c3d1..f93d6afc4 100644
--- a/powersimdata/scenario/move.py
+++ b/powersimdata/scenario/move.py
@@ -39,7 +39,7 @@ def move_scenario(self, target="disk"):
         backup.move_output_data()
         backup.move_temporary_folder()
 
-        sid = int(self._scenario_info["id"])
+        sid = self._scenario_info["id"]
         self._execute_list_manager.set_status(sid, "moved")
 
         # Delete attributes

From 9bbb91946c213005d7ddeebe844b0e7ccf1c22b9 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 10 Mar 2021 17:38:33 -0800
Subject: [PATCH 006/108] feat: synchronize uploads for scenario and execute
 list

---
 powersimdata/data_access/csv_store.py     |  5 ++-
 powersimdata/data_access/data_access.py   | 53 +++++++++++++++++++----
 powersimdata/data_access/execute_list.py  |  6 +--
 powersimdata/data_access/scenario_list.py |  6 +--
 4 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index 05125b544..fb464aa45 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -48,9 +48,10 @@ def _parse_csv(self, file_object):
         table.fillna("", inplace=True)
         return table.astype(str)
 
-    def _save_file(self, table):
-        """Save to local directory
+    def commit(self, table):
+        """Save to local directory and upload if needed
 
         :param pandas.DataFrame table: the data frame to save
         """
         table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
+        self.data_access.push(self._FILE_NAME)
diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 2c474984f..e357743d7 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -97,6 +97,13 @@ def execute_command_async(self, command):
         """
         raise NotImplementedError
 
+    def push(self, file_name):
+        """Push the file from local to remote root folder, ensuring integrity
+
+        :param str file_name: the file name, located at the local root
+        """
+        raise NotImplementedError
+
     def close(self):
         """Perform any necessary cleanup for the object."""
         pass
@@ -116,22 +123,26 @@ def copy_from(self, file_name, from_dir=None):
         """
         pass
 
-    def move_to(self, file_name, to_dir=None, change_name_to=None, force=False):
+    def push(self, file_name):
+        """Nothing to be done due to symlink
+
+        :param str file_name: the file name, located at the local root
+        """
+        pass
+
+    def move_to(self, file_name, to_dir, change_name_to=None):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
         """
-        if to_dir is None:  # already symlinked via dockerfile
-            return
         self._check_filename(file_name)
         src = posixpath.join(server_setup.LOCAL_DIR, file_name)
         file_name = file_name if change_name_to is None else change_name_to
         dest = posixpath.join(self.root, to_dir, file_name)
         print(f"--> Moving file {src} to {dest}")
-        if not force:
-            self._check_file_exists(dest, should_exist=False)
+        self._check_file_exists(dest, should_exist=False)
         self.copy(src, dest)
         self.remove(src)
 
@@ -238,7 +249,7 @@ def copy_from(self, file_name, from_dir=None):
             sftp.get(from_path, to_path, callback=cbk)
             bar.close()
 
-    def move_to(self, file_name, to_dir=None, change_name_to=None, force=False):
+    def move_to(self, file_name, to_dir=None, change_name_to=None):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
@@ -258,8 +269,7 @@ def move_to(self, file_name, to_dir=None, change_name_to=None, force=False):
         to_dir = "" if to_dir is None else to_dir
         to_path = posixpath.join(self.root, to_dir, file_name)
         self.makedir(to_dir)
-        if not force:
-            self._check_file_exists(to_path, should_exist=False)
+        self._check_file_exists(to_path, should_exist=False)
 
         with self.ssh.open_sftp() as sftp:
             print(f"Transferring {from_path} to server")
@@ -288,6 +298,33 @@ def execute_command_async(self, command):
         process = Popen(full_command)
         return process
 
+    def push(self, file_name):
+        """Push file_name to remote root
+
+        :param str file_name: the file name, located at the local root
+        :raises IOError: if command generated stderr
+        """
+        backup = f"{file_name}.bak"
+        self.move_to(file_name, change_name_to=backup)
+
+        values = {
+            "original": posixpath.join(self.root, file_name),
+            "updated": posixpath.join(self.root, backup),
+            "lockfile": posixpath.join(self.root, "scenario.lockfile"),
+        }
+
+        template = "(flock -x 200; \
+                conflicts=$(comm -23 {original} {updated} | wc -l); \
+                if [ $conflicts -eq 0 ]; then mv {updated} {original} -b; \
+                else echo CONFLICT_ERROR 1>&2; fi \
+                200>{lockfile}"
+
+        command = template.format(**values)
+        _, _, stderr = self.execute_command(command)
+
+        if len(stderr.readlines()) > 0:
+            raise IOError("Failed to push file - most likely a conflict was detected.")
+
     def close(self):
         """Close the connection that was opened when the object was created."""
         self.ssh.close()
diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index 60073b965..994f72dd8 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -120,10 +120,9 @@ def set_status(self, scenario_id, status):
         """
         table = self.get_execute_table()
         table.loc[int(scenario_id), "status"] = status
-        self._save_file(table)
 
         print(f"-->  Setting status={status} in execute table on server")
-        self.data_access.move_to(self._FILE_NAME, force=True)
+        self.commit(table)
 
     def delete_entry(self, scenario_info):
         """Deletes entry from execute list on server.
@@ -133,7 +132,6 @@ def delete_entry(self, scenario_info):
         table = self.get_execute_table()
         scenario_id = int(scenario_info["id"])
         table.drop(scenario_id)
-        self._save_file(table)
 
         print("--> Deleting entry in execute table on server")
-        self.data_access.move_to(self._FILE_NAME, force=True)
+        self.commit(table)
diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 16af72b00..4811065cf 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -148,10 +148,9 @@ def add_entry(self, scenario_info):
         entry = pd.DataFrame({k: [v] for k, v in scenario_info.items()})
         table = table.append(entry)
         table.set_index("id", inplace=True)
-        self._save_file(table)
 
         print("--> Adding entry in %s on server" % self._FILE_NAME)
-        self.data_access.move_to(self._FILE_NAME, force=True)
+        self.commit(table)
 
     def delete_entry(self, scenario_info):
         """Deletes entry in scenario list.
@@ -161,7 +160,6 @@ def delete_entry(self, scenario_info):
         table = self.get_scenario_table()
         scenario_id = int(scenario_info["id"])
         table.drop(scenario_id)
-        self._save_file(table)
 
         print("--> Deleting entry in %s on server" % self._FILE_NAME)
-        self.data_access.move_to(self._FILE_NAME, force=True)
+        self.commit(table)

From 384afe3a0b54c632670a6d3f05570be983300cc4 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 10 Mar 2021 18:39:18 -0800
Subject: [PATCH 007/108] fix: typo and delete logic

---
 powersimdata/data_access/data_access.py | 27 ++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index e357743d7..339713391 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -22,12 +22,13 @@ def copy_from(self, file_name, from_dir):
         """
         raise NotImplementedError
 
-    def move_to(self, file_name, to_dir, change_name_to=None):
+    def move_to(self, file_name, to_dir, change_name_to=None, preserve=False):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
+        :param bool preserve: whether to keep the local copy
         """
         raise NotImplementedError
 
@@ -130,12 +131,13 @@ def push(self, file_name):
         """
         pass
 
-    def move_to(self, file_name, to_dir, change_name_to=None):
+    def move_to(self, file_name, to_dir, change_name_to=None, preserve=False):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
+        :param bool preserve: whether to keep the local copy
         """
         self._check_filename(file_name)
         src = posixpath.join(server_setup.LOCAL_DIR, file_name)
@@ -144,7 +146,9 @@ def move_to(self, file_name, to_dir, change_name_to=None):
         print(f"--> Moving file {src} to {dest}")
         self._check_file_exists(dest, should_exist=False)
         self.copy(src, dest)
-        self.remove(src)
+        if not preserve:
+            print("--> Deleting original copy")
+            self.remove(src)
 
     def execute_command(self, command):
         """Execute a command locally at the data access.
@@ -249,12 +253,13 @@ def copy_from(self, file_name, from_dir=None):
             sftp.get(from_path, to_path, callback=cbk)
             bar.close()
 
-    def move_to(self, file_name, to_dir=None, change_name_to=None):
+    def move_to(self, file_name, to_dir=None, change_name_to=None, preserve=False):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
+        :param bool preserve: whether to keep the local copy
         :raises FileNotFoundError: if specified file does not exist
         """
         self._check_filename(file_name)
@@ -275,8 +280,9 @@ def move_to(self, file_name, to_dir=None, change_name_to=None):
             print(f"Transferring {from_path} to server")
             sftp.put(from_path, to_path)
 
-        print(f"--> Deleting {from_path} on local machine")
-        os.remove(from_path)
+        if not preserve:
+            print(f"--> Deleting {from_path} on local machine")
+            os.remove(from_path)
 
     def execute_command(self, command):
         """Execute a command locally at the data access.
@@ -305,7 +311,7 @@ def push(self, file_name):
         :raises IOError: if command generated stderr
         """
         backup = f"{file_name}.bak"
-        self.move_to(file_name, change_name_to=backup)
+        self.move_to(file_name, change_name_to=backup, preserve=True)
 
         values = {
             "original": posixpath.join(self.root, file_name),
@@ -316,13 +322,16 @@ def push(self, file_name):
         template = "(flock -x 200; \
                 conflicts=$(comm -23 {original} {updated} | wc -l); \
                 if [ $conflicts -eq 0 ]; then mv {updated} {original} -b; \
-                else echo CONFLICT_ERROR 1>&2; fi \
+                else echo CONFLICT_ERROR 1>&2; fi) \
                 200>{lockfile}"
 
         command = template.format(**values)
         _, _, stderr = self.execute_command(command)
 
-        if len(stderr.readlines()) > 0:
+        errors = stderr.readlines()
+        if len(errors) > 0:
+            for e in errors:
+                print(e)
             raise IOError("Failed to push file - most likely a conflict was detected.")
 
     def close(self):

From 7d1976bda03edf618d273ea818df4f422c776deb Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Thu, 11 Mar 2021 15:22:53 -0800
Subject: [PATCH 008/108] fix: use hash to verify changes instead of diff

---
 powersimdata/data_access/csv_store.py     | 25 ++++++++++--
 powersimdata/data_access/data_access.py   | 47 ++++++++++++++++++++---
 powersimdata/data_access/execute_list.py  | 12 ++++--
 powersimdata/data_access/scenario_list.py | 12 ++++--
 powersimdata/scenario/execute.py          |  2 +-
 5 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index fb464aa45..461a0da7c 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -1,3 +1,4 @@
+import functools
 import os
 from pathlib import Path
 
@@ -6,6 +7,22 @@
 from powersimdata.utility import server_setup
 
 
+def verify_hash(func):
+    """Utility function which verifies the sha1sum of the file before writing
+    it on the server. Operates on methods that return an updated scenario or
+    execute list.
+    """
+
+    @functools.wraps(func)
+    def wrapper(self, *args, **kwargs):
+        checksum = self.data_access.checksum(self._FILE_NAME)
+        table = func(self, *args, **kwargs)
+        self.commit(table, checksum)
+        return table
+
+    return wrapper
+
+
 class CsvStore:
     """Base class for common functionality used to manage scenario and execute
     list stored as csv files on the server
@@ -17,12 +34,13 @@ def __init__(self, data_access):
         """Constructor"""
         self.data_access = data_access
 
-    def get_table(self, filename):
+    def get_table(self):
         """Read the given file from the server, falling back to local copy if
         unable to connect.
 
         :return: (*pandas.DataFrame*) -- the specified table as a data frame.
         """
+        filename = self._FILE_NAME
         local_path = Path(server_setup.LOCAL_DIR, filename)
 
         try:
@@ -48,10 +66,11 @@ def _parse_csv(self, file_object):
         table.fillna("", inplace=True)
         return table.astype(str)
 
-    def commit(self, table):
+    def commit(self, table, checksum):
         """Save to local directory and upload if needed
 
         :param pandas.DataFrame table: the data frame to save
+        :param str checksum: the checksum prior to download
         """
         table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
-        self.data_access.push(self._FILE_NAME)
+        self.data_access.push(self._FILE_NAME, checksum)
diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 339713391..929b8b4f5 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -98,10 +98,20 @@ def execute_command_async(self, command):
         """
         raise NotImplementedError
 
-    def push(self, file_name):
+    def checksum(self, relative_path):
+        """Return the checksum of the file path, and write the content if the
+        server is remote
+
+        :param str relative_path: path relative to root
+        :return: (*str*) -- the checksum of the file
+        """
+        raise NotImplementedError
+
+    def push(self, file_name, checksum):
         """Push the file from local to remote root folder, ensuring integrity
 
         :param str file_name: the file name, located at the local root
+        :param str checksum: the checksum prior to download
         """
         raise NotImplementedError
 
@@ -124,13 +134,23 @@ def copy_from(self, file_name, from_dir=None):
         """
         pass
 
-    def push(self, file_name):
+    def push(self, file_name, checksum):
         """Nothing to be done due to symlink
 
         :param str file_name: the file name, located at the local root
+        :param str checksum: the checksum prior to download
         """
         pass
 
+    def checksum(self, relative_path):
+        """Return dummy value since this is only required for remote
+        environment
+
+        :param str relative_path: path relative to root
+        :return: (*str*) -- the checksum of the file
+        """
+        return "dummy_value"
+
     def move_to(self, file_name, to_dir, change_name_to=None, preserve=False):
         """Copy a file from userspace to data store.
 
@@ -304,10 +324,25 @@ def execute_command_async(self, command):
         process = Popen(full_command)
         return process
 
-    def push(self, file_name):
+    def checksum(self, relative_path):
+        """Return the checksum of the file path (using sha1sum)
+
+        :param str relative_path: path relative to root
+        :return: (*str*) -- the checksum of the file
+        """
+        full_path = posixpath.join(self.root, relative_path)
+        self._check_file_exists(full_path)
+
+        command = f"sha1sum {full_path}"
+        _, stdout, _ = self.execute_command(command)
+        lines = stdout.readlines()
+        return lines[0].strip()
+
+    def push(self, file_name, checksum):
         """Push file_name to remote root
 
         :param str file_name: the file name, located at the local root
+        :param str checksum: the checksum prior to download
         :raises IOError: if command generated stderr
         """
         backup = f"{file_name}.bak"
@@ -317,11 +352,13 @@ def push(self, file_name):
             "original": posixpath.join(self.root, file_name),
             "updated": posixpath.join(self.root, backup),
             "lockfile": posixpath.join(self.root, "scenario.lockfile"),
+            "checksum": checksum,
         }
 
         template = "(flock -x 200; \
-                conflicts=$(comm -23 {original} {updated} | wc -l); \
-                if [ $conflicts -eq 0 ]; then mv {updated} {original} -b; \
+                prev='{checksum}'; \
+                curr=$(sha1sum {original}); \
+                if [[ $prev == $curr ]]; then mv {updated} {original} -b; \
                 else echo CONFLICT_ERROR 1>&2; fi) \
                 200>{lockfile}"
 
diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index 994f72dd8..91f053bc2 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -1,6 +1,6 @@
 import posixpath
 
-from powersimdata.data_access.csv_store import CsvStore
+from powersimdata.data_access.csv_store import CsvStore, verify_hash
 from powersimdata.data_access.sql_store import SqlStore, to_data_frame
 from powersimdata.utility import server_setup
 
@@ -89,7 +89,7 @@ def get_execute_table(self):
 
         :return: (*pandas.DataFrame*) -- execute list as a data frame.
         """
-        return self.get_table(self._FILE_NAME)
+        return self.get_table()
 
     def get_status(self, scenario_id):
         """Return the status for the scenario
@@ -112,26 +112,30 @@ def add_entry(self, scenario_info):
         scenario_id = int(scenario_info["id"])
         return self.set_status(scenario_id, "created")
 
+    @verify_hash
     def set_status(self, scenario_id, status):
         """Set the scenario status
 
         :param int/str scenario_id: the scenario id
         :param str status: the new status
+        :return: (*pandas.DataFrame*) -- the updated data frame
         """
         table = self.get_execute_table()
         table.loc[int(scenario_id), "status"] = status
 
         print(f"-->  Setting status={status} in execute table on server")
-        self.commit(table)
+        return table
 
+    @verify_hash
     def delete_entry(self, scenario_info):
         """Deletes entry from execute list on server.
 
         :param collections.OrderedDict scenario_info: entry to delete
+        :return: (*pandas.DataFrame*) -- the updated data frame
         """
         table = self.get_execute_table()
         scenario_id = int(scenario_info["id"])
         table.drop(scenario_id)
 
         print("--> Deleting entry in execute table on server")
-        self.commit(table)
+        return table
diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 4811065cf..514884183 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -3,7 +3,7 @@
 
 import pandas as pd
 
-from powersimdata.data_access.csv_store import CsvStore
+from powersimdata.data_access.csv_store import CsvStore, verify_hash
 from powersimdata.data_access.sql_store import SqlStore, to_data_frame
 from powersimdata.utility import server_setup
 
@@ -92,7 +92,7 @@ def get_scenario_table(self):
 
         :return: (*pandas.DataFrame*) -- scenario list as a data frame.
         """
-        return self.get_table(self._FILE_NAME)
+        return self.get_table()
 
     def generate_scenario_id(self):
         """Generates scenario id.
@@ -138,10 +138,12 @@ def err_message(text):
                 .to_dict("records", into=OrderedDict)[0]
             )
 
+    @verify_hash
     def add_entry(self, scenario_info):
         """Adds scenario to the scenario list file on server.
 
         :param collections.OrderedDict scenario_info: entry to add to scenario list.
+        :return: (*pandas.DataFrame*) -- the updated data frame
         """
         table = self.get_scenario_table()
         table.reset_index(inplace=True)
@@ -150,16 +152,18 @@ def add_entry(self, scenario_info):
         table.set_index("id", inplace=True)
 
         print("--> Adding entry in %s on server" % self._FILE_NAME)
-        self.commit(table)
+        return table
 
+    @verify_hash
     def delete_entry(self, scenario_info):
         """Deletes entry in scenario list.
 
         :param collections.OrderedDict scenario_info: entry to delete from scenario list.
+        :return: (*pandas.DataFrame*) -- the updated data frame
         """
         table = self.get_scenario_table()
         scenario_id = int(scenario_info["id"])
         table.drop(scenario_id)
 
         print("--> Deleting entry in %s on server" % self._FILE_NAME)
-        self.commit(table)
+        return table
diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py
index 4e29622c8..473a2f444 100644
--- a/powersimdata/scenario/execute.py
+++ b/powersimdata/scenario/execute.py
@@ -196,7 +196,7 @@ def _launch_on_server(self, threads=None, solver=None, extract_data=True):
             extra_args.append("--threads " + str(threads))
 
         if solver:
-            extra_args.append("--solver", solver)
+            extra_args.append("--solver " + solver)
 
         if not isinstance(extract_data, bool):
             raise TypeError("extract_data must be a boolean: 'True' or 'False'")

From c8db291e6249164854b5c4e4995411e4a825dd36 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 12 Mar 2021 13:57:51 -0800
Subject: [PATCH 009/108] test: add tests for execute list manager

---
 powersimdata/data_access/execute_list.py      |  2 +-
 .../data_access/tests/test_execute_csv.py     | 73 ++++++++++++++++++-
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index 91f053bc2..e33a01523 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -135,7 +135,7 @@ def delete_entry(self, scenario_info):
         """
         table = self.get_execute_table()
         scenario_id = int(scenario_info["id"])
-        table.drop(scenario_id)
+        table.drop(scenario_id, inplace=True)
 
         print("--> Deleting entry in execute table on server")
         return table
diff --git a/powersimdata/data_access/tests/test_execute_csv.py b/powersimdata/data_access/tests/test_execute_csv.py
index 9f8809116..b0f032d4b 100644
--- a/powersimdata/data_access/tests/test_execute_csv.py
+++ b/powersimdata/data_access/tests/test_execute_csv.py
@@ -1,10 +1,16 @@
+import os
+import shutil
+from collections import OrderedDict
+
 import pandas as pd
 import pytest
 from numpy.testing import assert_array_equal
 from pandas.testing import assert_frame_equal
 
-from powersimdata.data_access.data_access import SSHDataAccess
+import powersimdata.utility.templates as templates
+from powersimdata.data_access.data_access import LocalDataAccess, SSHDataAccess
 from powersimdata.data_access.execute_list import ExecuteListManager
+from powersimdata.utility import server_setup
 
 
 @pytest.fixture
@@ -40,3 +46,68 @@ def test_get_execute_file_from_server_header(execute_table):
     header = ["status"]
     assert_array_equal(execute_table.columns, header)
     assert "id" == execute_table.index.name
+
+
+def clone_template():
+    orig = os.path.join(templates.__path__[0], "ExecuteList.csv")
+    backup = os.path.join(server_setup.LOCAL_DIR, "ExecuteList.csv.test")
+    shutil.copy(orig, backup)
+
+
+def mock_row():
+    return OrderedDict(
+        [
+            ("id", "1"),
+            ("state", "create"),
+            ("interconnect", "Western"),
+        ]
+    )
+
+
+@pytest.fixture
+def manager():
+    clone_template()
+    data_access = LocalDataAccess()
+    manager = ExecuteListManager(data_access)
+    manager._FILE_NAME = "ExecuteList.csv.test"
+    return manager
+
+
+def test_blank_csv_append(manager):
+    manager.add_entry(mock_row())
+    table = manager.get_execute_table()
+    assert table.shape == (1, 1)
+    status = manager.get_status(1)
+    assert status == "created"
+
+
+def test_set_status(manager):
+    manager.add_entry(mock_row())
+    asdf = "asdf"
+    result = manager.set_status(1, asdf)
+    assert result.loc[1, "status"] == asdf
+
+    foo = "foo"
+    result = manager.set_status("1", foo)
+    assert result.loc[1, "status"] == foo
+
+
+def test_get_status(manager):
+    manager.add_entry(mock_row())
+    status = manager.get_status(1)
+    assert status == "created"
+
+    status = manager.get_status("1")
+    assert status == "created"
+
+
+def test_delete(manager):
+    manager.add_entry(mock_row())
+    table = manager.get_execute_table()
+    assert table.shape == (1, 1)
+
+    table = manager.delete_entry(mock_row())
+    assert table.shape == (0, 1)
+
+    table = manager.get_execute_table()
+    assert table.shape == (0, 1)

From d285547b082aad0294d4433a9ae44e0ef5018f43 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 12 Mar 2021 14:22:31 -0800
Subject: [PATCH 010/108] test: add tests for scenario list manager and
 simplify delete signature

---
 powersimdata/data_access/execute_list.py      | 13 ++--
 powersimdata/data_access/scenario_list.py     | 13 ++--
 .../data_access/tests/test_execute_csv.py     |  6 +-
 .../data_access/tests/test_scenario_csv.py    | 75 ++++++++++++++++++-
 4 files changed, 89 insertions(+), 18 deletions(-)

diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index e33a01523..b21f4700c 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -61,13 +61,13 @@ def set_status(self, scenario_id, status):
             (status, scenario_id),
         )
 
-    def delete_entry(self, scenario_info):
+    def delete_entry(self, scenario_id):
         """Deletes entry from execute list.
 
-        :param collections.OrderedDict scenario_info: entry to delete
+        :param int/str scenario_id: the id of the scenario
         """
         sql = self.delete("id")
-        self.cur.execute(sql, (scenario_info["id"],))
+        self.cur.execute(sql, (scenario_id,))
 
 
 class ExecuteListManager(CsvStore):
@@ -127,15 +127,14 @@ def set_status(self, scenario_id, status):
         return table
 
     @verify_hash
-    def delete_entry(self, scenario_info):
+    def delete_entry(self, scenario_id):
         """Deletes entry from execute list on server.
 
-        :param collections.OrderedDict scenario_info: entry to delete
+        :param int/str scenario_id: the id of the scenario
         :return: (*pandas.DataFrame*) -- the updated data frame
         """
         table = self.get_execute_table()
-        scenario_id = int(scenario_info["id"])
-        table.drop(scenario_id, inplace=True)
+        table.drop(int(scenario_id), inplace=True)
 
         print("--> Deleting entry in execute table on server")
         return table
diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 514884183..2285f81d4 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -64,13 +64,13 @@ def add_entry(self, scenario_info):
         sql = self.insert(subset=scenario_info.keys())
         self.cur.execute(sql, tuple(scenario_info.values()))
 
-    def delete_entry(self, scenario_info):
+    def delete_entry(self, scenario_id):
         """Deletes entry in scenario list.
 
-        :param collections.OrderedDict scenario_info: entry to delete from scenario list.
+        :param int/str scenario_id: the id of the scenario
         """
         sql = self.delete("id")
-        self.cur.execute(sql, (scenario_info["id"],))
+        self.cur.execute(sql, (scenario_id,))
 
 
 class ScenarioListManager(CsvStore):
@@ -155,15 +155,14 @@ def add_entry(self, scenario_info):
         return table
 
     @verify_hash
-    def delete_entry(self, scenario_info):
+    def delete_entry(self, scenario_id):
         """Deletes entry in scenario list.
 
-        :param collections.OrderedDict scenario_info: entry to delete from scenario list.
+        :param int/str scenario_id: the id of the scenario
         :return: (*pandas.DataFrame*) -- the updated data frame
         """
         table = self.get_scenario_table()
-        scenario_id = int(scenario_info["id"])
-        table.drop(scenario_id)
+        table.drop(int(scenario_id), inplace=True)
 
         print("--> Deleting entry in %s on server" % self._FILE_NAME)
         return table
diff --git a/powersimdata/data_access/tests/test_execute_csv.py b/powersimdata/data_access/tests/test_execute_csv.py
index b0f032d4b..93bdca858 100644
--- a/powersimdata/data_access/tests/test_execute_csv.py
+++ b/powersimdata/data_access/tests/test_execute_csv.py
@@ -50,8 +50,8 @@ def test_get_execute_file_from_server_header(execute_table):
 
 def clone_template():
     orig = os.path.join(templates.__path__[0], "ExecuteList.csv")
-    backup = os.path.join(server_setup.LOCAL_DIR, "ExecuteList.csv.test")
-    shutil.copy(orig, backup)
+    dest = os.path.join(server_setup.LOCAL_DIR, "ExecuteList.csv.test")
+    shutil.copy(orig, dest)
 
 
 def mock_row():
@@ -106,7 +106,7 @@ def test_delete(manager):
     table = manager.get_execute_table()
     assert table.shape == (1, 1)
 
-    table = manager.delete_entry(mock_row())
+    table = manager.delete_entry(1)
     assert table.shape == (0, 1)
 
     table = manager.get_execute_table()
diff --git a/powersimdata/data_access/tests/test_scenario_csv.py b/powersimdata/data_access/tests/test_scenario_csv.py
index 98a331ab3..6dbc3dde0 100644
--- a/powersimdata/data_access/tests/test_scenario_csv.py
+++ b/powersimdata/data_access/tests/test_scenario_csv.py
@@ -1,10 +1,16 @@
+import os
+import shutil
+from collections import OrderedDict
+
 import pandas as pd
 import pytest
 from numpy.testing import assert_array_equal
 from pandas.testing import assert_frame_equal
 
-from powersimdata.data_access.data_access import SSHDataAccess
+import powersimdata.utility.templates as templates
+from powersimdata.data_access.data_access import LocalDataAccess, SSHDataAccess
 from powersimdata.data_access.scenario_list import ScenarioListManager
+from powersimdata.utility import server_setup
 
 
 @pytest.fixture
@@ -57,3 +63,70 @@ def test_get_scenario_file_local(scenario_table):
     scm = ScenarioListManager(None)
     from_local = scm.get_scenario_table()
     assert_frame_equal(from_local, scenario_table)
+
+
+def clone_template():
+    orig = os.path.join(templates.__path__[0], "ScenarioList.csv")
+    dest = os.path.join(server_setup.LOCAL_DIR, "ScenarioList.csv.test")
+    shutil.copy(orig, dest)
+
+
+@pytest.fixture
+def manager():
+    clone_template()
+    data_access = LocalDataAccess()
+    manager = ScenarioListManager(data_access)
+    manager._FILE_NAME = "ScenarioList.csv.test"
+    yield manager
+    data_access.close()
+
+
+def mock_row(sid=1):
+    return OrderedDict(
+        [
+            ("id", str(sid)),
+            ("plan", "test"),
+            ("name", "dummy"),
+            ("state", "create"),
+            ("grid_model", ""),
+            ("interconnect", "Western"),
+            ("base_demand", ""),
+            ("base_hydro", ""),
+            ("base_solar", ""),
+            ("base_wind", ""),
+            ("change_table", ""),
+            ("start_date", ""),
+            ("end_date", ""),
+            ("interval", ""),
+            ("engine", ""),
+        ]
+    )
+
+
+def test_generate_id(manager):
+    new_id = manager.generate_scenario_id()
+    assert new_id == "1"
+
+
+def test_blank_csv_append(manager):
+    manager.add_entry(mock_row(1))
+    table = manager.add_entry(mock_row(2))
+    assert table.shape == (2, 16)
+
+
+def test_get_scenario(manager):
+    manager.add_entry(mock_row(1))
+    manager.add_entry(mock_row(2))
+    manager.add_entry(mock_row(3))
+    entry = manager.get_scenario(2)
+    assert entry["id"] == "2"
+    entry = manager.get_scenario("2")
+    assert entry["id"] == "2"
+
+
+def test_delete_entry(manager):
+    manager.add_entry(mock_row(1))
+    manager.add_entry(mock_row(2))
+    manager.add_entry(mock_row(3))
+    table = manager.delete_entry(2)
+    assert table.shape == (2, 16)

From 8c7400eb0346f20a8e8057b0f9f8619220ff96ab Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 12 Mar 2021 14:44:14 -0800
Subject: [PATCH 011/108] test: fix db tests

---
 powersimdata/data_access/tests/test_execute_table.py  | 5 +++--
 powersimdata/data_access/tests/test_scenario_table.py | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/powersimdata/data_access/tests/test_execute_table.py b/powersimdata/data_access/tests/test_execute_table.py
index a38e8d92e..b1969694c 100644
--- a/powersimdata/data_access/tests/test_execute_table.py
+++ b/powersimdata/data_access/tests/test_execute_table.py
@@ -83,7 +83,8 @@ def test_update_entry(store):
 @pytest.mark.db
 def test_delete_entry(store):
     info = _get_test_row()
+    sid = info["id"]
     store.add_entry(info)
-    store.delete_entry(info)
-    status = store.get_status(info["id"])
+    store.delete_entry(sid)
+    status = store.get_status(sid)
     assert status.shape == (0, 0)
diff --git a/powersimdata/data_access/tests/test_scenario_table.py b/powersimdata/data_access/tests/test_scenario_table.py
index 1b6392b73..3b3ad0167 100644
--- a/powersimdata/data_access/tests/test_scenario_table.py
+++ b/powersimdata/data_access/tests/test_scenario_table.py
@@ -95,7 +95,8 @@ def test_add_entry_missing_required_raises():
 @pytest.mark.db
 def test_delete_entry(store):
     info = _get_test_row()
+    sid = info["id"]
     store.add_entry(info)
-    store.delete_entry(info)
-    entry = store.get_scenario_by_id(info["id"])
+    store.delete_entry(sid)
+    entry = store.get_scenario_by_id(sid)
     assert entry.shape == (0, 0)

From 1f35f1c6d8694a3abb78150b4a18217138da6de2 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 12 Mar 2021 16:26:49 -0800
Subject: [PATCH 012/108] test: create path if needed

---
 powersimdata/data_access/tests/test_execute_csv.py  | 1 +
 powersimdata/data_access/tests/test_scenario_csv.py | 1 +
 powersimdata/utility/server_setup.py                | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/powersimdata/data_access/tests/test_execute_csv.py b/powersimdata/data_access/tests/test_execute_csv.py
index 93bdca858..4e5eaf0e8 100644
--- a/powersimdata/data_access/tests/test_execute_csv.py
+++ b/powersimdata/data_access/tests/test_execute_csv.py
@@ -51,6 +51,7 @@ def test_get_execute_file_from_server_header(execute_table):
 def clone_template():
     orig = os.path.join(templates.__path__[0], "ExecuteList.csv")
     dest = os.path.join(server_setup.LOCAL_DIR, "ExecuteList.csv.test")
+    os.makedirs(server_setup.LOCAL_DIR, exist_ok=True)
     shutil.copy(orig, dest)
 
 
diff --git a/powersimdata/data_access/tests/test_scenario_csv.py b/powersimdata/data_access/tests/test_scenario_csv.py
index 6dbc3dde0..5f833973b 100644
--- a/powersimdata/data_access/tests/test_scenario_csv.py
+++ b/powersimdata/data_access/tests/test_scenario_csv.py
@@ -68,6 +68,7 @@ def test_get_scenario_file_local(scenario_table):
 def clone_template():
     orig = os.path.join(templates.__path__[0], "ScenarioList.csv")
     dest = os.path.join(server_setup.LOCAL_DIR, "ScenarioList.csv.test")
+    os.makedirs(server_setup.LOCAL_DIR, exist_ok=True)
     shutil.copy(orig, dest)
 
 
diff --git a/powersimdata/utility/server_setup.py b/powersimdata/utility/server_setup.py
index af5f5f258..aeea66cfb 100644
--- a/powersimdata/utility/server_setup.py
+++ b/powersimdata/utility/server_setup.py
@@ -10,7 +10,7 @@
 BASE_PROFILE_DIR = "raw"
 INPUT_DIR = "data/input"
 OUTPUT_DIR = "data/output"
-LOCAL_DIR = os.path.join(str(Path.home()), "ScenarioData", "")
+LOCAL_DIR = os.path.join(Path.home(), "ScenarioData", "")
 MODEL_DIR = "/home/bes/pcm"
 
 

From 3fef005823f9f138e68c786ee59ffa0e575a2ae7 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 12 Mar 2021 16:57:53 -0800
Subject: [PATCH 013/108] chore: cleanup temp csv used in tests

---
 .../data_access/tests/test_execute_csv.py     | 20 ++++++++++---------
 .../data_access/tests/test_scenario_csv.py    |  5 +++--
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/powersimdata/data_access/tests/test_execute_csv.py b/powersimdata/data_access/tests/test_execute_csv.py
index 4e5eaf0e8..1766699a5 100644
--- a/powersimdata/data_access/tests/test_execute_csv.py
+++ b/powersimdata/data_access/tests/test_execute_csv.py
@@ -53,6 +53,17 @@ def clone_template():
     dest = os.path.join(server_setup.LOCAL_DIR, "ExecuteList.csv.test")
     os.makedirs(server_setup.LOCAL_DIR, exist_ok=True)
     shutil.copy(orig, dest)
+    return dest
+
+
+@pytest.fixture
+def manager():
+    test_csv = clone_template()
+    data_access = LocalDataAccess()
+    manager = ExecuteListManager(data_access)
+    manager._FILE_NAME = "ExecuteList.csv.test"
+    yield manager
+    os.remove(test_csv)
 
 
 def mock_row():
@@ -65,15 +76,6 @@ def mock_row():
     )
 
 
-@pytest.fixture
-def manager():
-    clone_template()
-    data_access = LocalDataAccess()
-    manager = ExecuteListManager(data_access)
-    manager._FILE_NAME = "ExecuteList.csv.test"
-    return manager
-
-
 def test_blank_csv_append(manager):
     manager.add_entry(mock_row())
     table = manager.get_execute_table()
diff --git a/powersimdata/data_access/tests/test_scenario_csv.py b/powersimdata/data_access/tests/test_scenario_csv.py
index 5f833973b..813c2c835 100644
--- a/powersimdata/data_access/tests/test_scenario_csv.py
+++ b/powersimdata/data_access/tests/test_scenario_csv.py
@@ -70,16 +70,17 @@ def clone_template():
     dest = os.path.join(server_setup.LOCAL_DIR, "ScenarioList.csv.test")
     os.makedirs(server_setup.LOCAL_DIR, exist_ok=True)
     shutil.copy(orig, dest)
+    return dest
 
 
 @pytest.fixture
 def manager():
-    clone_template()
+    test_csv = clone_template()
     data_access = LocalDataAccess()
     manager = ScenarioListManager(data_access)
     manager._FILE_NAME = "ScenarioList.csv.test"
     yield manager
-    data_access.close()
+    os.remove(test_csv)
 
 
 def mock_row(sid=1):

From 0275a563dee9c3165a4fe97a3047066c525b41b8 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 12 Mar 2021 18:25:33 -0800
Subject: [PATCH 014/108] chore: update usages of delete method

---
 powersimdata/scenario/delete.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py
index 85daf1b5e..85ea11f17 100644
--- a/powersimdata/scenario/delete.py
+++ b/powersimdata/scenario/delete.py
@@ -30,23 +30,22 @@ def delete_scenario(self):
         """Deletes scenario on server."""
 
         # Delete entry in scenario list
-        self._scenario_list_manager.delete_entry(self._scenario_info)
-        self._execute_list_manager.delete_entry(self._scenario_info)
+        scenario_id = self._scenario_info["id"]
+        self._scenario_list_manager.delete_entry(scenario_id)
+        self._execute_list_manager.delete_entry(scenario_id)
+
+        wildcard = f"{scenario_id}_*"
 
         # Delete links to base profiles on server
         print("--> Deleting scenario input data on server")
-        target = posixpath.join(
-            self.path_config.input_dir(), "%s_*" % (self._scenario_info["id"])
-        )
+        target = posixpath.join(self.path_config.input_dir(), wildcard)
         _, _, stderr = self._data_access.remove(target, recursive=False, force=True)
         if len(stderr.readlines()) != 0:
             raise IOError("Failed to delete scenario input data on server")
 
         # Delete output profiles
         print("--> Deleting scenario output data on server")
-        target = posixpath.join(
-            self.path_config.output_dir(), "%s_*" % (self._scenario_info["id"])
-        )
+        target = posixpath.join(self.path_config.output_dir(), wildcard)
         _, _, stderr = self._data_access.remove(target, recursive=False, force=True)
         if len(stderr.readlines()) != 0:
             raise IOError("Failed to delete scenario output data on server")
@@ -54,7 +53,7 @@ def delete_scenario(self):
         # Delete temporary folder enclosing simulation inputs
         print("--> Deleting temporary folder on server")
         tmp_dir = posixpath.join(
-            self.path_config.execute_dir(), "scenario_%s" % (self._scenario_info["id"])
+            self.path_config.execute_dir(), f"scenario_{scenario_id}"
         )
         _, _, stderr = self._data_access.remove(tmp_dir, recursive=True, force=True)
         if len(stderr.readlines()) != 0:
@@ -63,9 +62,7 @@ def delete_scenario(self):
         # Delete local files
         print("--> Deleting input and output data on local machine")
         local_file = glob.glob(
-            os.path.join(
-                server_setup.LOCAL_DIR, "data", "**", self._scenario_info["id"] + "_*"
-            )
+            os.path.join(server_setup.LOCAL_DIR, "data", "**", wildcard)
         )
         for f in local_file:
             os.remove(f)

From e60960803064cf0d447fb88f3e38adccecae923a Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 17 Mar 2021 14:36:34 -0700
Subject: [PATCH 015/108] chore: simplify import

---
 powersimdata/data_access/tests/test_execute_csv.py  | 3 +--
 powersimdata/data_access/tests/test_scenario_csv.py | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/powersimdata/data_access/tests/test_execute_csv.py b/powersimdata/data_access/tests/test_execute_csv.py
index 1766699a5..de598bf85 100644
--- a/powersimdata/data_access/tests/test_execute_csv.py
+++ b/powersimdata/data_access/tests/test_execute_csv.py
@@ -7,10 +7,9 @@
 from numpy.testing import assert_array_equal
 from pandas.testing import assert_frame_equal
 
-import powersimdata.utility.templates as templates
 from powersimdata.data_access.data_access import LocalDataAccess, SSHDataAccess
 from powersimdata.data_access.execute_list import ExecuteListManager
-from powersimdata.utility import server_setup
+from powersimdata.utility import server_setup, templates
 
 
 @pytest.fixture
diff --git a/powersimdata/data_access/tests/test_scenario_csv.py b/powersimdata/data_access/tests/test_scenario_csv.py
index 813c2c835..74d02a225 100644
--- a/powersimdata/data_access/tests/test_scenario_csv.py
+++ b/powersimdata/data_access/tests/test_scenario_csv.py
@@ -7,10 +7,9 @@
 from numpy.testing import assert_array_equal
 from pandas.testing import assert_frame_equal
 
-import powersimdata.utility.templates as templates
 from powersimdata.data_access.data_access import LocalDataAccess, SSHDataAccess
 from powersimdata.data_access.scenario_list import ScenarioListManager
-from powersimdata.utility import server_setup
+from powersimdata.utility import server_setup, templates
 
 
 @pytest.fixture

From 1d2dda060b7ea488c8da2779f2316b98d06284ad Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 17 Mar 2021 15:53:15 -0700
Subject: [PATCH 016/108] chore: use unique suffix for temp copy

---
 powersimdata/data_access/data_access.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 929b8b4f5..a2fc0c02a 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -345,7 +345,7 @@ def push(self, file_name, checksum):
         :param str checksum: the checksum prior to download
         :raises IOError: if command generated stderr
         """
-        backup = f"{file_name}.bak"
+        backup = f"{file_name}.temp"
         self.move_to(file_name, change_name_to=backup, preserve=True)
 
         values = {

From ba1aea63e83f6159203f3036bfd9006ce8c07149 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 17:15:44 -0700
Subject: [PATCH 017/108] test: add test of getting bus demand for scenario in
 create state

---
 powersimdata/scenario/tests/test_create.py | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 powersimdata/scenario/tests/test_create.py

diff --git a/powersimdata/scenario/tests/test_create.py b/powersimdata/scenario/tests/test_create.py
new file mode 100644
index 000000000..b9aabd4e4
--- /dev/null
+++ b/powersimdata/scenario/tests/test_create.py
@@ -0,0 +1,11 @@
+import pytest
+
+from powersimdata.scenario.scenario import Scenario
+
+
+@pytest.mark.ssh
+def test_get_bus_demand():
+    scenario = Scenario("")
+    scenario.state.set_builder(interconnect="Texas")
+    scenario.state.builder.set_base_profile("demand", "vJan2021")
+    scenario.state.get_bus_demand()

From 711e9fbfa1ad4b9e49572589c1680e6d3b5713cc Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 17:17:02 -0700
Subject: [PATCH 018/108] fix: update scenario info before calling
 get_bus_demand

---
 powersimdata/scenario/create.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 8a5a70ec3..6008aca6b 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -149,6 +149,7 @@ def get_bus_demand(self):
 
         :return: (*pandas.DataFrame*) -- data frame of demand (hour, bus).
         """
+        self._update_scenario_info()
         grid = self.get_grid()
         return get_bus_demand(self._scenario_info, grid)
 

From bcefd12918ff87bd4356b267b423ae38b123c402 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 17:20:25 -0700
Subject: [PATCH 019/108] fix: filter demand to zones in grid

---
 powersimdata/input/input_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index 2fc9d176d..94007d0c5 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -144,8 +144,8 @@ def get_bus_demand(scenario_info, grid):
     :param powersimdata.input.grid.Grid grid: grid to construct bus demand for.
     :return: (*pandas.DataFrame*) -- data frame of demand.
     """
-    demand = InputData().get_data(scenario_info, "demand")
     bus = grid.bus
+    demand = InputData().get_data(scenario_info, "demand")[bus.zone_id.unique()]
     bus["zone_Pd"] = bus.groupby("zone_id")["Pd"].transform("sum")
     bus["zone_share"] = bus["Pd"] / bus["zone_Pd"]
     zone_bus_shares = pd.DataFrame(

From 42dd4a5e2aa83bc31e33793434d6c5e45d207162 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Tue, 16 Mar 2021 16:19:49 -0700
Subject: [PATCH 020/108] feat: get profile versions from blob storage

---
 powersimdata/input/input_data.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index 94007d0c5..65b2a40b7 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -2,6 +2,7 @@
 import posixpath
 
 import pandas as pd
+import requests
 
 from powersimdata.data_access.context import Context
 from powersimdata.utility import server_setup
@@ -11,6 +12,8 @@
 
 profile_kind = {"demand", "hydro", "solar", "wind"}
 
+BLOB_STORAGE = "https://bescienceswebsite.blob.core.windows.net/profiles"
+
 
 class InputData(object):
     """Load input data.
@@ -85,7 +88,7 @@ def get_data(self, scenario_info, field_name):
         return data
 
     def get_profile_version(self, grid_model, kind):
-        """Returns available raw profile either from server or local directory.
+        """Returns available raw profile from blob storage
 
         :param str grid_model: grid model.
         :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
@@ -96,20 +99,12 @@ def get_profile_version(self, grid_model, kind):
         if kind not in profile_kind:
             raise ValueError("kind must be one of %s" % " | ".join(profile_kind))
 
-        query = posixpath.join(
-            server_setup.DATA_ROOT_DIR,
-            server_setup.BASE_PROFILE_DIR,
-            grid_model,
-            kind + "_*",
-        )
-        stdin, stdout, stderr = self.data_access.execute_command("ls " + query)
-        if len(stderr.readlines()) != 0:
+        resp = requests.get(f"{BLOB_STORAGE}/{grid_model}/version.json")
+        versions = resp.json()
+        if kind not in versions:
             print("No %s profiles available." % kind)
-            version = []
         else:
-            filename = [os.path.basename(line.rstrip()) for line in stdout.readlines()]
-            version = [f[f.rfind("_") + 1 : -4] for f in filename]
-        return version
+            return versions[kind]
 
 
 def _read_data(filepath):

From 6e019fed2f620eb92379b09efd2503a9068485e7 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Tue, 16 Mar 2021 17:46:09 -0700
Subject: [PATCH 021/108] feat: download profiles from blob storage

---
 powersimdata/input/input_data.py | 93 +++++++++++++++++++++-----------
 powersimdata/scenario/create.py  |  2 +-
 2 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index 65b2a40b7..d788f08cd 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -1,5 +1,5 @@
 import os
-import posixpath
+import shutil
 
 import pandas as pd
 import requests
@@ -15,6 +15,60 @@
 BLOB_STORAGE = "https://bescienceswebsite.blob.core.windows.net/profiles"
 
 
+_file_extension = {
+    **{"ct": "pkl", "grid": "mat"},
+    **{k: "csv" for k in profile_kind},
+}
+
+
+class InputHelper:
+    def __init__(self, data_access):
+        self.data_access = data_access
+
+    @staticmethod
+    def get_file_components(scenario_info, field_name):
+        ext = _file_extension[field_name]
+        file_name = scenario_info["id"] + "_" + field_name + "." + ext
+        from_dir = server_setup.INPUT_DIR
+        return file_name, from_dir
+
+    def download_file(self, file_name, from_dir):
+        self.data_access.copy_from(file_name, from_dir)
+
+
+class ProfileHelper:
+    @staticmethod
+    def get_file_components(scenario_info, field_name):
+        ext = _file_extension[field_name]
+        version = scenario_info["base_" + field_name]
+        file_name = field_name + "_" + version + "." + ext
+        from_dir = scenario_info["grid_model"]
+        return file_name, from_dir
+
+    @staticmethod
+    def download_file(file_name, from_dir):
+        url = f"{BLOB_STORAGE}/{from_dir}/{file_name}"
+        dest = os.path.join(server_setup.LOCAL_DIR, file_name)
+        with requests.get(url, stream=True) as r:
+            with open(dest, "wb") as f:
+                shutil.copyfileobj(r.raw, f)
+
+        return dest
+
+
+def _check_field(field_name):
+    """Checks field name.
+
+    :param str field_name: *'demand'*, *'hydro'*, *'solar'*, *'wind'*,
+        *'ct'* or *'grid'*.
+    :raises ValueError: if not *'demand'*, *'hydro'*, *'solar'*, *'wind'*
+        *'ct'* or *'grid'*
+    """
+    possible = list(_file_extension.keys())
+    if field_name not in possible:
+        raise ValueError("Only %s data can be loaded" % " | ".join(possible))
+
+
 class InputData(object):
     """Load input data.
 
@@ -25,25 +79,8 @@ def __init__(self, data_loc=None):
         """Constructor."""
         os.makedirs(server_setup.LOCAL_DIR, exist_ok=True)
 
-        self.file_extension = {
-            **{"ct": "pkl", "grid": "mat"},
-            **{k: "csv" for k in profile_kind},
-        }
-
         self.data_access = Context.get_data_access(data_loc)
 
-    def _check_field(self, field_name):
-        """Checks field name.
-
-        :param str field_name: *'demand'*, *'hydro'*, *'solar'*, *'wind'*,
-            *'ct'* or *'grid'*.
-        :raises ValueError: if not *'demand'*, *'hydro'*, *'solar'*, *'wind'*
-            *'ct'* or *'grid'*
-        """
-        possible = list(self.file_extension.keys())
-        if field_name not in possible:
-            raise ValueError("Only %s data can be loaded" % " | ".join(possible))
-
     def get_data(self, scenario_info, field_name):
         """Returns data either from server or local directory.
 
@@ -55,20 +92,15 @@ def get_data(self, scenario_info, field_name):
             dictionary, or the path to a matfile enclosing the grid data.
         :raises FileNotFoundError: if file not found on local machine.
         """
-        self._check_field(field_name)
-
+        _check_field(field_name)
         print("--> Loading %s" % field_name)
-        ext = self.file_extension[field_name]
 
         if field_name in profile_kind:
-            version = scenario_info["base_" + field_name]
-            file_name = field_name + "_" + version + "." + ext
-            from_dir = posixpath.join(
-                server_setup.BASE_PROFILE_DIR, scenario_info["grid_model"]
-            )
+            helper = ProfileHelper
         else:
-            file_name = scenario_info["id"] + "_" + field_name + "." + ext
-            from_dir = server_setup.INPUT_DIR
+            helper = InputHelper(self.data_access)
+
+        file_name, from_dir = helper.get_file_components(scenario_info, field_name)
 
         filepath = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
         key = cache_key(filepath)
@@ -82,12 +114,13 @@ def get_data(self, scenario_info, field_name):
                 "%s not found in %s on local machine"
                 % (file_name, server_setup.LOCAL_DIR)
             )
-            self.data_access.copy_from(file_name, from_dir)
+            helper.download_file(file_name, from_dir)
             data = _read_data(filepath)
         _cache.put(key, data)
         return data
 
-    def get_profile_version(self, grid_model, kind):
+    @staticmethod
+    def get_profile_version(grid_model, kind):
         """Returns available raw profile from blob storage
 
         :param str grid_model: grid model.
diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 6008aca6b..e8d5b99ed 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -334,7 +334,7 @@ def get_base_profile(self, kind):
         :param str kind: one of *'demand'*, *'hydro'*, *'solar'*, *'wind'*.
         :return: (*list*) -- available version for selected profile kind.
         """
-        return InputData().get_profile_version(self.grid_model, kind)
+        return InputData.get_profile_version(self.grid_model, kind)
 
     def set_base_profile(self, kind, version):
         """Sets demand profile.

From aa8c034bed4756c606cf4f738eed371bfdec619d Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 17 Mar 2021 13:08:46 -0700
Subject: [PATCH 022/108] fix: use consistent paths

---
 powersimdata/input/input_data.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index d788f08cd..e16dd9db6 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -42,17 +42,21 @@ def get_file_components(scenario_info, field_name):
         ext = _file_extension[field_name]
         version = scenario_info["base_" + field_name]
         file_name = field_name + "_" + version + "." + ext
-        from_dir = scenario_info["grid_model"]
+        grid_model = scenario_info["grid_model"]
+        from_dir = f"{server_setup.BASE_PROFILE_DIR}/{grid_model}"
         return file_name, from_dir
 
     @staticmethod
     def download_file(file_name, from_dir):
+        print(f"--> Downloading {file_name} from blob storage.")
         url = f"{BLOB_STORAGE}/{from_dir}/{file_name}"
-        dest = os.path.join(server_setup.LOCAL_DIR, file_name)
+        dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
+        os.makedirs(os.path.dirname(dest), exist_ok=True)
         with requests.get(url, stream=True) as r:
             with open(dest, "wb") as f:
                 shutil.copyfileobj(r.raw, f)
 
+        print("--> Done!")
         return dest
 
 
@@ -132,7 +136,7 @@ def get_profile_version(grid_model, kind):
         if kind not in profile_kind:
             raise ValueError("kind must be one of %s" % " | ".join(profile_kind))
 
-        resp = requests.get(f"{BLOB_STORAGE}/{grid_model}/version.json")
+        resp = requests.get(f"{BLOB_STORAGE}/raw/{grid_model}/version.json")
         versions = resp.json()
         if kind not in versions:
             print("No %s profiles available." % kind)

From 0c3ea4d22de85f4fd016c3d99b38a31c76d3213a Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 17 Mar 2021 13:49:15 -0700
Subject: [PATCH 023/108] feat: progress bar for download

---
 powersimdata/input/input_data.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index e16dd9db6..04e757264 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -1,8 +1,8 @@
 import os
-import shutil
 
 import pandas as pd
 import requests
+from tqdm.auto import tqdm
 
 from powersimdata.data_access.context import Context
 from powersimdata.utility import server_setup
@@ -52,9 +52,19 @@ def download_file(file_name, from_dir):
         url = f"{BLOB_STORAGE}/{from_dir}/{file_name}"
         dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
         os.makedirs(os.path.dirname(dest), exist_ok=True)
-        with requests.get(url, stream=True) as r:
-            with open(dest, "wb") as f:
-                shutil.copyfileobj(r.raw, f)
+        resp = requests.get(url, stream=True)
+        content_length = int(resp.headers.get("content-length", 0))
+        with open(dest, "wb") as f:
+            with tqdm(
+                unit="B",
+                unit_scale=True,
+                unit_divisor=1024,
+                miniters=1,
+                total=content_length,
+            ) as pbar:
+                for chunk in resp.iter_content(chunk_size=4096):
+                    f.write(chunk)
+                    pbar.update(len(chunk))
 
         print("--> Done!")
         return dest

From 55ad3a35f759a1759780e614ba4ca44469c407a2 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 11:53:30 -0700
Subject: [PATCH 024/108] refactor: use top level version list to simplify
 paths

---
 powersimdata/input/input_data.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index 04e757264..b37ffa5a3 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -12,8 +12,6 @@
 
 profile_kind = {"demand", "hydro", "solar", "wind"}
 
-BLOB_STORAGE = "https://bescienceswebsite.blob.core.windows.net/profiles"
-
 
 _file_extension = {
     **{"ct": "pkl", "grid": "mat"},
@@ -21,6 +19,9 @@
 }
 
 
+BASE_URL = "https://bescienceswebsite.blob.core.windows.net/profiles"
+
+
 class InputHelper:
     def __init__(self, data_access):
         self.data_access = data_access
@@ -49,7 +50,7 @@ def get_file_components(scenario_info, field_name):
     @staticmethod
     def download_file(file_name, from_dir):
         print(f"--> Downloading {file_name} from blob storage.")
-        url = f"{BLOB_STORAGE}/{from_dir}/{file_name}"
+        url = f"{BASE_URL}/{from_dir}/{file_name}"
         dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
         os.makedirs(os.path.dirname(dest), exist_ok=True)
         resp = requests.get(url, stream=True)
@@ -146,12 +147,11 @@ def get_profile_version(grid_model, kind):
         if kind not in profile_kind:
             raise ValueError("kind must be one of %s" % " | ".join(profile_kind))
 
-        resp = requests.get(f"{BLOB_STORAGE}/raw/{grid_model}/version.json")
-        versions = resp.json()
-        if kind not in versions:
-            print("No %s profiles available." % kind)
-        else:
-            return versions[kind]
+        resp = requests.get(f"{BASE_URL}/version.json")
+        version = resp.json()
+        if grid_model in version and kind in version[grid_model]:
+            return version[grid_model][kind]
+        print("No %s profiles available." % kind)
 
 
 def _read_data(filepath):

From 32491bd5486f4bf87d41e98b26b7f2333ce63265 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 19 Mar 2021 11:07:52 -0700
Subject: [PATCH 025/108] chore: remove base profile dir and fix pandas warning

---
 powersimdata/input/input_data.py        |  2 +-
 powersimdata/input/transform_profile.py |  2 +-
 powersimdata/scenario/move.py           | 17 -----------------
 powersimdata/utility/server_setup.py    |  4 ----
 4 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index b37ffa5a3..062c09a3b 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -44,7 +44,7 @@ def get_file_components(scenario_info, field_name):
         version = scenario_info["base_" + field_name]
         file_name = field_name + "_" + version + "." + ext
         grid_model = scenario_info["grid_model"]
-        from_dir = f"{server_setup.BASE_PROFILE_DIR}/{grid_model}"
+        from_dir = f"raw/{grid_model}"
         return file_name, from_dir
 
     @staticmethod
diff --git a/powersimdata/input/transform_profile.py b/powersimdata/input/transform_profile.py
index 69cbf4603..1e7826d89 100644
--- a/powersimdata/input/transform_profile.py
+++ b/powersimdata/input/transform_profile.py
@@ -103,7 +103,7 @@ def _get_demand_profile(self):
         :return: (*pandas.DataFrame*) -- data frame of demand.
         """
         zone_id = sorted(self.grid.bus.zone_id.unique())
-        demand = self._input_data.get_data(self.scenario_info, "demand")[zone_id]
+        demand = self._input_data.get_data(self.scenario_info, "demand").loc[:, zone_id]
         if bool(self.ct) and "demand" in list(self.ct.keys()):
             for key, value in self.ct["demand"]["zone_id"].items():
                 print(
diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py
index f93d6afc4..2a3743ef2 100644
--- a/powersimdata/scenario/move.py
+++ b/powersimdata/scenario/move.py
@@ -35,7 +35,6 @@ def move_scenario(self, target="disk"):
         backup = BackUpDisk(self._data_access, self._scenario_info)
 
         backup.move_input_data()
-        backup.copy_base_profile()
         backup.move_output_data()
         backup.move_temporary_folder()
 
@@ -76,22 +75,6 @@ def move_input_data(self):
         self._data_access.copy(source, target, update=True)
         self._data_access.remove(source, recursive=True, force=True)
 
-    def copy_base_profile(self):
-        """Copies base profile"""
-        print("--> Copying base profiles to backup disk")
-        for kind in ["demand", "hydro", "solar", "wind"]:
-            src = posixpath.join(
-                self.server_config.base_profile_dir(),
-                self._scenario_info["grid_model"],
-                kind + "_" + self._scenario_info["base_" + kind] + ".csv",
-            )
-            dest = posixpath.join(
-                self.backup_config.base_profile_dir(), self._scenario_info["grid_model"]
-            )
-            _, stdout, stderr = self._data_access.copy(src, dest, update=True)
-            print(stdout.readlines())
-            print(stderr.readlines())
-
     def move_output_data(self):
         """Moves output data"""
         print("--> Moving scenario output data to backup disk")
diff --git a/powersimdata/utility/server_setup.py b/powersimdata/utility/server_setup.py
index aeea66cfb..07fdcb759 100644
--- a/powersimdata/utility/server_setup.py
+++ b/powersimdata/utility/server_setup.py
@@ -7,7 +7,6 @@
 BACKUP_DATA_ROOT_DIR = "/mnt/RE-Storage/v2"
 DATA_ROOT_DIR = "/mnt/bes/pcm"
 EXECUTE_DIR = "tmp"
-BASE_PROFILE_DIR = "raw"
 INPUT_DIR = "data/input"
 OUTPUT_DIR = "data/output"
 LOCAL_DIR = os.path.join(Path.home(), "ScenarioData", "")
@@ -36,9 +35,6 @@ def _join(self, rel_path):
     def execute_dir(self):
         return self._join(EXECUTE_DIR)
 
-    def base_profile_dir(self):
-        return self._join(BASE_PROFILE_DIR)
-
     def input_dir(self):
         return self._join(INPUT_DIR)
 

From 1f9291f6ad89d6c56c5d6f56f50575c397cc164a Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 19 Mar 2021 17:04:37 -0700
Subject: [PATCH 026/108] feat: support custom profiles through local
 version.json

---
 powersimdata/data_access/data_access.py    | 25 ++++++++
 powersimdata/data_access/profile_helper.py | 72 ++++++++++++++++++++++
 powersimdata/input/input_data.py           | 56 ++---------------
 powersimdata/scenario/create.py            |  2 +-
 4 files changed, 102 insertions(+), 53 deletions(-)
 create mode 100644 powersimdata/data_access/profile_helper.py

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index a2fc0c02a..3fc82a4a5 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -1,3 +1,4 @@
+import json
 import operator
 import os
 import posixpath
@@ -7,6 +8,7 @@
 import paramiko
 from tqdm import tqdm
 
+from powersimdata.data_access.profile_helper import ProfileHelper
 from powersimdata.utility import server_setup
 from powersimdata.utility.helpers import CommandBuilder
 
@@ -115,6 +117,9 @@ def push(self, file_name, checksum):
         """
         raise NotImplementedError
 
+    def get_profile_version(self, grid_model, kind):
+        return ProfileHelper.get_profile_version(grid_model, kind)
+
     def close(self):
         """Perform any necessary cleanup for the object."""
         pass
@@ -191,6 +196,26 @@ def wrap(s):
         )
         return wrap(None), wrap(proc.stdout), wrap(proc.stderr)
 
+    def get_profile_version(self, grid_model, kind):
+        """Returns available raw profile from blob storage or local disk
+
+        :param str grid_model: grid model.
+        :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
+        :return: (*list*) -- available profile version.
+        """
+        blob_versions = super().get_profile_version(grid_model, kind)
+        version_file = os.path.join(server_setup.LOCAL_DIR, "version.json")
+        if not os.path.exists(version_file):
+            return blob_versions
+        with open(version_file) as f:
+            version = json.load(f)
+            return list(
+                set(
+                    blob_versions
+                    + ProfileHelper.parse_version(grid_model, kind, version)
+                )
+            )
+
 
 class SSHDataAccess(DataAccess):
     """Interface to a remote data store, accessed via SSH."""
diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py
new file mode 100644
index 000000000..aa53fe302
--- /dev/null
+++ b/powersimdata/data_access/profile_helper.py
@@ -0,0 +1,72 @@
+import os
+
+import requests
+from tqdm.auto import tqdm
+
+from powersimdata.utility import server_setup
+
+
+class ProfileHelper:
+    BASE_URL = "https://bescienceswebsite.blob.core.windows.net/profiles"
+
+    @staticmethod
+    def get_file_components(scenario_info, field_name):
+        version = scenario_info["base_" + field_name]
+        file_name = field_name + "_" + version + ".csv"
+        grid_model = scenario_info["grid_model"]
+        from_dir = f"raw/{grid_model}"
+        return file_name, from_dir
+
+    @staticmethod
+    def download_file(file_name, from_dir):
+        print(f"--> Downloading {file_name} from blob storage.")
+        url = f"{ProfileHelper.BASE_URL}/{from_dir}/{file_name}"
+        dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
+        os.makedirs(os.path.dirname(dest), exist_ok=True)
+        resp = requests.get(url, stream=True)
+        content_length = int(resp.headers.get("content-length", 0))
+        with open(dest, "wb") as f:
+            with tqdm(
+                unit="B",
+                unit_scale=True,
+                unit_divisor=1024,
+                miniters=1,
+                total=content_length,
+            ) as pbar:
+                for chunk in resp.iter_content(chunk_size=4096):
+                    f.write(chunk)
+                    pbar.update(len(chunk))
+
+        print("--> Done!")
+        return dest
+
+    @staticmethod
+    def parse_version(grid_model, kind, version):
+        """Parse available versions from the given spec
+
+        :param str grid_model: grid model.
+        :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
+        :param dict version: json response
+        :return: (*list*) -- available profile version.
+        :raises ValueError: if kind not one of *'demand'*, *'hydro'*, *'solar'* or
+            *'wind'*.
+        """
+        profile_kind = {"demand", "hydro", "solar", "wind"}
+        if kind not in profile_kind:
+            raise ValueError("kind must be one of %s" % " | ".join(profile_kind))
+
+        if grid_model in version and kind in version[grid_model]:
+            return version[grid_model][kind]
+        print("No %s profiles available." % kind)
+
+    @staticmethod
+    def get_profile_version(grid_model, kind):
+        """Returns available raw profile from blob storage
+
+        :param str grid_model: grid model.
+        :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
+        :return: (*list*) -- available profile version.
+        """
+
+        resp = requests.get(f"{ProfileHelper.BASE_URL}/version.json")
+        return ProfileHelper.parse_version(grid_model, kind, resp.json())
diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index 062c09a3b..c6a42aa06 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -1,10 +1,9 @@
 import os
 
 import pandas as pd
-import requests
-from tqdm.auto import tqdm
 
 from powersimdata.data_access.context import Context
+from powersimdata.data_access.profile_helper import ProfileHelper
 from powersimdata.utility import server_setup
 from powersimdata.utility.helpers import MemoryCache, cache_key
 
@@ -19,9 +18,6 @@
 }
 
 
-BASE_URL = "https://bescienceswebsite.blob.core.windows.net/profiles"
-
-
 class InputHelper:
     def __init__(self, data_access):
         self.data_access = data_access
@@ -37,40 +33,6 @@ def download_file(self, file_name, from_dir):
         self.data_access.copy_from(file_name, from_dir)
 
 
-class ProfileHelper:
-    @staticmethod
-    def get_file_components(scenario_info, field_name):
-        ext = _file_extension[field_name]
-        version = scenario_info["base_" + field_name]
-        file_name = field_name + "_" + version + "." + ext
-        grid_model = scenario_info["grid_model"]
-        from_dir = f"raw/{grid_model}"
-        return file_name, from_dir
-
-    @staticmethod
-    def download_file(file_name, from_dir):
-        print(f"--> Downloading {file_name} from blob storage.")
-        url = f"{BASE_URL}/{from_dir}/{file_name}"
-        dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
-        os.makedirs(os.path.dirname(dest), exist_ok=True)
-        resp = requests.get(url, stream=True)
-        content_length = int(resp.headers.get("content-length", 0))
-        with open(dest, "wb") as f:
-            with tqdm(
-                unit="B",
-                unit_scale=True,
-                unit_divisor=1024,
-                miniters=1,
-                total=content_length,
-            ) as pbar:
-                for chunk in resp.iter_content(chunk_size=4096):
-                    f.write(chunk)
-                    pbar.update(len(chunk))
-
-        print("--> Done!")
-        return dest
-
-
 def _check_field(field_name):
     """Checks field name.
 
@@ -134,24 +96,14 @@ def get_data(self, scenario_info, field_name):
         _cache.put(key, data)
         return data
 
-    @staticmethod
-    def get_profile_version(grid_model, kind):
-        """Returns available raw profile from blob storage
+    def get_profile_version(self, grid_model, kind):
+        """Returns available raw profile from blob storage or local disk
 
         :param str grid_model: grid model.
         :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
         :return: (*list*) -- available profile version.
-        :raises ValueError: if kind not one of *'demand'*, *'hydro'*, *'solar'* or
-            *'wind'*.
         """
-        if kind not in profile_kind:
-            raise ValueError("kind must be one of %s" % " | ".join(profile_kind))
-
-        resp = requests.get(f"{BASE_URL}/version.json")
-        version = resp.json()
-        if grid_model in version and kind in version[grid_model]:
-            return version[grid_model][kind]
-        print("No %s profiles available." % kind)
+        return self.data_access.get_profile_version(grid_model, kind)
 
 
 def _read_data(filepath):
diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index e8d5b99ed..6008aca6b 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -334,7 +334,7 @@ def get_base_profile(self, kind):
         :param str kind: one of *'demand'*, *'hydro'*, *'solar'*, *'wind'*.
         :return: (*list*) -- available version for selected profile kind.
         """
-        return InputData.get_profile_version(self.grid_model, kind)
+        return InputData().get_profile_version(self.grid_model, kind)
 
     def set_base_profile(self, kind, version):
         """Sets demand profile.

From d73908985237b267c8a8f85d264a3bb6d566113c Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 19 Mar 2021 17:34:53 -0700
Subject: [PATCH 027/108] chore: remove redundant validation

---
 powersimdata/data_access/profile_helper.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py
index aa53fe302..d66191386 100644
--- a/powersimdata/data_access/profile_helper.py
+++ b/powersimdata/data_access/profile_helper.py
@@ -48,13 +48,7 @@ def parse_version(grid_model, kind, version):
         :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
         :param dict version: json response
         :return: (*list*) -- available profile version.
-        :raises ValueError: if kind not one of *'demand'*, *'hydro'*, *'solar'* or
-            *'wind'*.
         """
-        profile_kind = {"demand", "hydro", "solar", "wind"}
-        if kind not in profile_kind:
-            raise ValueError("kind must be one of %s" % " | ".join(profile_kind))
-
         if grid_model in version and kind in version[grid_model]:
             return version[grid_model][kind]
         print("No %s profiles available." % kind)

From 505f53dd040a5743dd1cbcff307ce0cb766dac12 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 22 Mar 2021 12:17:33 -0700
Subject: [PATCH 028/108] test: add unit tests and move some logic around

---
 powersimdata/data_access/data_access.py       | 16 +++----------
 powersimdata/data_access/profile_helper.py    | 20 +++++++++++++++-
 .../data_access/tests/test_profile_helper.py  | 24 +++++++++++++++++++
 powersimdata/input/tests/test_input_data.py   | 20 ++++++++++++++++
 4 files changed, 66 insertions(+), 14 deletions(-)
 create mode 100644 powersimdata/data_access/tests/test_profile_helper.py
 create mode 100644 powersimdata/input/tests/test_input_data.py

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 3fc82a4a5..9ff6bec80 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -1,4 +1,3 @@
-import json
 import operator
 import os
 import posixpath
@@ -203,18 +202,9 @@ def get_profile_version(self, grid_model, kind):
         :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
         :return: (*list*) -- available profile version.
         """
-        blob_versions = super().get_profile_version(grid_model, kind)
-        version_file = os.path.join(server_setup.LOCAL_DIR, "version.json")
-        if not os.path.exists(version_file):
-            return blob_versions
-        with open(version_file) as f:
-            version = json.load(f)
-            return list(
-                set(
-                    blob_versions
-                    + ProfileHelper.parse_version(grid_model, kind, version)
-                )
-            )
+        blob_version = super().get_profile_version(grid_model, kind)
+        local_version = ProfileHelper.get_profile_version_local(grid_model, kind)
+        return list(set(blob_version + local_version))
 
 
 class SSHDataAccess(DataAccess):
diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py
index d66191386..d3f689db0 100644
--- a/powersimdata/data_access/profile_helper.py
+++ b/powersimdata/data_access/profile_helper.py
@@ -1,3 +1,4 @@
+import json
 import os
 
 import requests
@@ -46,12 +47,13 @@ def parse_version(grid_model, kind, version):
 
         :param str grid_model: grid model.
         :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
-        :param dict version: json response
+        :param dict version: version information per grid model
         :return: (*list*) -- available profile version.
         """
         if grid_model in version and kind in version[grid_model]:
             return version[grid_model][kind]
         print("No %s profiles available." % kind)
+        return []
 
     @staticmethod
     def get_profile_version(grid_model, kind):
@@ -64,3 +66,19 @@ def get_profile_version(grid_model, kind):
 
         resp = requests.get(f"{ProfileHelper.BASE_URL}/version.json")
         return ProfileHelper.parse_version(grid_model, kind, resp.json())
+
+    @staticmethod
+    def get_profile_version_local(grid_model, kind):
+        """Returns available raw profile from local file
+
+        :param str grid_model: grid model.
+        :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
+        :return: (*list*) -- available profile version.
+        """
+
+        version_file = os.path.join(server_setup.LOCAL_DIR, "version.json")
+        if not os.path.exists(version_file):
+            return []
+        with open(version_file) as f:
+            version = json.load(f)
+            return ProfileHelper.parse_version(grid_model, kind, version)
diff --git a/powersimdata/data_access/tests/test_profile_helper.py b/powersimdata/data_access/tests/test_profile_helper.py
new file mode 100644
index 000000000..03423a525
--- /dev/null
+++ b/powersimdata/data_access/tests/test_profile_helper.py
@@ -0,0 +1,24 @@
+from powersimdata.data_access.profile_helper import ProfileHelper
+
+
+def test_parse_version_default():
+    assert [] == ProfileHelper.parse_version("usa_tamu", "solar", {})
+
+
+def test_parse_version_missing_key():
+    version = {"solar": ["v123"]}
+    assert [] == ProfileHelper.parse_version("usa_tamu", "solar", version)
+
+
+def test_parse_version():
+    expected = ["v123", "v456"]
+    version = {"usa_tamu": {"solar": expected}}
+    assert expected == ProfileHelper.parse_version("usa_tamu", "solar", version)
+    assert [] == ProfileHelper.parse_version("usa_tamu", "hydro", version)
+
+
+def test_get_file_components():
+    s_info = {"base_wind": "v8", "grid_model": "europe"}
+    file_name, from_dir = ProfileHelper.get_file_components(s_info, "wind")
+    assert "wind_v8.csv" == file_name
+    assert "raw/europe" == from_dir
diff --git a/powersimdata/input/tests/test_input_data.py b/powersimdata/input/tests/test_input_data.py
new file mode 100644
index 000000000..0e37b3b20
--- /dev/null
+++ b/powersimdata/input/tests/test_input_data.py
@@ -0,0 +1,20 @@
+import pytest
+
+from powersimdata.input.input_data import InputHelper, _check_field
+
+
+def test_get_file_components():
+    s_info = {"id": "123"}
+    ct_file, _ = InputHelper.get_file_components(s_info, "ct")
+    grid_file, from_dir = InputHelper.get_file_components(s_info, "grid")
+    assert "123_ct.pkl" == ct_file
+    assert "123_grid.mat" == grid_file
+    assert "data/input" == from_dir
+
+
+def test_check_field():
+    _check_field("demand")
+    _check_field("hydro")
+    with pytest.raises(ValueError):
+        _check_field("foo")
+        _check_field("coal")

From 27123e56023162c6acf589690c215f4004c19585 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 22 Mar 2021 14:28:01 -0700
Subject: [PATCH 029/108] chore: more specific method name

---
 powersimdata/data_access/data_access.py    | 2 +-
 powersimdata/data_access/profile_helper.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 9ff6bec80..4b5d8b15a 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -117,7 +117,7 @@ def push(self, file_name, checksum):
         raise NotImplementedError
 
     def get_profile_version(self, grid_model, kind):
-        return ProfileHelper.get_profile_version(grid_model, kind)
+        return ProfileHelper.get_profile_version_cloud(grid_model, kind)
 
     def close(self):
         """Perform any necessary cleanup for the object."""
diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py
index d3f689db0..808fc96ca 100644
--- a/powersimdata/data_access/profile_helper.py
+++ b/powersimdata/data_access/profile_helper.py
@@ -56,7 +56,7 @@ def parse_version(grid_model, kind, version):
         return []
 
     @staticmethod
-    def get_profile_version(grid_model, kind):
+    def get_profile_version_cloud(grid_model, kind):
         """Returns available raw profile from blob storage
 
         :param str grid_model: grid model.

From 41cce2429ed21716ec8e7e0aa768a4c617a4ffa8 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 22 Mar 2021 15:53:02 -0700
Subject: [PATCH 030/108] docs: add missing docstrings

---
 powersimdata/data_access/data_access.py    |  6 ++++++
 powersimdata/data_access/profile_helper.py | 13 +++++++++++++
 powersimdata/input/input_data.py           | 11 +++++++++++
 3 files changed, 30 insertions(+)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 4b5d8b15a..879dc5af5 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -117,6 +117,12 @@ def push(self, file_name, checksum):
         raise NotImplementedError
 
     def get_profile_version(self, grid_model, kind):
+        """Returns available raw profile from blob storage
+
+        :param str grid_model: grid model.
+        :param str kind: *'demand'*, *'hydro'*, *'solar'* or *'wind'*.
+        :return: (*list*) -- available profile version.
+        """
         return ProfileHelper.get_profile_version_cloud(grid_model, kind)
 
     def close(self):
diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py
index 808fc96ca..47ed30f08 100644
--- a/powersimdata/data_access/profile_helper.py
+++ b/powersimdata/data_access/profile_helper.py
@@ -12,6 +12,13 @@ class ProfileHelper:
 
     @staticmethod
     def get_file_components(scenario_info, field_name):
+        """Get the file name and relative path for the given profile and
+        scenario.
+
+        :param dict scenario_info: a ScenarioInfo instance
+        :param str field_name: the kind of profile
+        :return: (*tuple*) -- file name and path
+        """
         version = scenario_info["base_" + field_name]
         file_name = field_name + "_" + version + ".csv"
         grid_model = scenario_info["grid_model"]
@@ -20,6 +27,12 @@ def get_file_components(scenario_info, field_name):
 
     @staticmethod
     def download_file(file_name, from_dir):
+        """Download the profile from blob storage at the given path
+
+        :param str file_name: profile csv
+        :param str from_dir: the path relative to the blob container
+        :return: (*str*) -- path to downloaded file
+        """
         print(f"--> Downloading {file_name} from blob storage.")
         url = f"{ProfileHelper.BASE_URL}/{from_dir}/{file_name}"
         dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py
index c6a42aa06..c16d0876a 100644
--- a/powersimdata/input/input_data.py
+++ b/powersimdata/input/input_data.py
@@ -24,12 +24,23 @@ def __init__(self, data_access):
 
     @staticmethod
     def get_file_components(scenario_info, field_name):
+        """Get the file name and relative path for either ct or grid
+
+        :param dict scenario_info: a ScenarioInfo instance
+        :param str field_name: the input file type
+        :return: (*tuple*) -- file name and path
+        """
         ext = _file_extension[field_name]
         file_name = scenario_info["id"] + "_" + field_name + "." + ext
         from_dir = server_setup.INPUT_DIR
         return file_name, from_dir
 
     def download_file(self, file_name, from_dir):
+        """Download the file if using server, otherwise no-op
+
+        :param str file_name: either grid or ct file name
+        :param str from_dir: the path relative to the root dir
+        """
         self.data_access.copy_from(file_name, from_dir)
 
 

From 4d5abcc80f9c4fa86e23ce0f6587b4dac9488c18 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 22 Mar 2021 17:47:37 -0700
Subject: [PATCH 031/108] fix: create local path correctly

---
 powersimdata/data_access/profile_helper.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py
index 47ed30f08..6b6d14567 100644
--- a/powersimdata/data_access/profile_helper.py
+++ b/powersimdata/data_access/profile_helper.py
@@ -22,7 +22,7 @@ def get_file_components(scenario_info, field_name):
         version = scenario_info["base_" + field_name]
         file_name = field_name + "_" + version + ".csv"
         grid_model = scenario_info["grid_model"]
-        from_dir = f"raw/{grid_model}"
+        from_dir = os.path.join("raw", grid_model)
         return file_name, from_dir
 
     @staticmethod
@@ -34,7 +34,8 @@ def download_file(file_name, from_dir):
         :return: (*str*) -- path to downloaded file
         """
         print(f"--> Downloading {file_name} from blob storage.")
-        url = f"{ProfileHelper.BASE_URL}/{from_dir}/{file_name}"
+        url_path = "/".join(os.path.split(from_dir))
+        url = f"{ProfileHelper.BASE_URL}/{url_path}/{file_name}"
         dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name)
         os.makedirs(os.path.dirname(dest), exist_ok=True)
         resp = requests.get(url, stream=True)

From 130ef65c1a54a1809ce314981f0d0d7eed19782d Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 11:02:29 -0700
Subject: [PATCH 032/108] feat: import Scenario and Grid objects to main
 namespace

---
 powersimdata/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/powersimdata/__init__.py b/powersimdata/__init__.py
index e69de29bb..54efb616e 100644
--- a/powersimdata/__init__.py
+++ b/powersimdata/__init__.py
@@ -0,0 +1,2 @@
+from powersimdata.input.grid import Grid  # noqa: F401
+from powersimdata.scenario.scenario import Scenario  # noqa: F401

From 3f3b848f071e1f64a8e6e39de916c6d3505bdc60 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 11:06:15 -0700
Subject: [PATCH 033/108] feat: allow empty Scenario() init

---
 powersimdata/scenario/scenario.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py
index 82de20f2c..791f019aa 100644
--- a/powersimdata/scenario/scenario.py
+++ b/powersimdata/scenario/scenario.py
@@ -13,14 +13,15 @@
 class Scenario(object):
     """Handles scenario.
 
-    :param int/str descriptor: scenario name or index.
+    :param int/str descriptor: scenario name or index. If None, default to a Scenario
+        in Create state.
     """
 
-    def __init__(self, descriptor):
+    def __init__(self, descriptor=None):
         """Constructor."""
         if isinstance(descriptor, int):
             descriptor = str(descriptor)
-        if not isinstance(descriptor, str):
+        if descriptor is not None and not isinstance(descriptor, str):
             raise TypeError("Descriptor must be a string or int (for a Scenario ID)")
 
         self.data_access = Context.get_data_access()

From c4ec43d66ac755862f9885e11c18aca04d0c82f4 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 11:22:49 -0700
Subject: [PATCH 034/108] feat: add exported_methods to State and child classes

---
 powersimdata/scenario/analyze.py | 21 +++++++++++++++++++++
 powersimdata/scenario/create.py  | 12 ++++++++++++
 powersimdata/scenario/delete.py  |  4 ++++
 powersimdata/scenario/execute.py | 10 ++++++++++
 powersimdata/scenario/move.py    |  4 ++++
 powersimdata/scenario/state.py   |  6 +++---
 6 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/powersimdata/scenario/analyze.py b/powersimdata/scenario/analyze.py
index 43b4854a1..aa88c21e7 100644
--- a/powersimdata/scenario/analyze.py
+++ b/powersimdata/scenario/analyze.py
@@ -20,6 +20,27 @@ class Analyze(State):
 
     name = "analyze"
     allowed = []
+    exported_methods = {
+        "get_averaged_cong",
+        "get_bus_demand",
+        "get_congl",
+        "get_congu",
+        "get_ct",
+        "get_demand",
+        "get_hydro",
+        "get_grid",
+        "get_dcline_pf",
+        "get_lmp",
+        "get_load_shed",
+        "get_pf",
+        "get_pg",
+        "get_solar",
+        "get_storage_e",
+        "get_storage_pg",
+        "get_wind",
+        "print_infeasibilities",
+        "print_scenario_info",
+    }
 
     def __init__(self, scenario):
         """Constructor."""
diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 6008aca6b..eb17312f6 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -24,6 +24,18 @@ class Create(State):
 
     name = "create"
     allowed = []
+    exported_methods = {
+        "create_scenario",
+        "get_ct",
+        "get_grid",
+        "get_demand",
+        "get_bus_demand",
+        "get_hydro",
+        "get_solar",
+        "get_wind",
+        "print_scenario_info",
+        "set_builder",
+    }
 
     def __init__(self, scenario):
         """Constructor."""
diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py
index 85ea11f17..d00cd9981 100644
--- a/powersimdata/scenario/delete.py
+++ b/powersimdata/scenario/delete.py
@@ -11,6 +11,10 @@ class Delete(State):
 
     name = "delete"
     allowed = []
+    exported_methods = {
+        "delete_scenario",
+        "print_scenario_info",
+    }
 
     def print_scenario_info(self):
         """Prints scenario information.
diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py
index 473a2f444..f358bf1c7 100644
--- a/powersimdata/scenario/execute.py
+++ b/powersimdata/scenario/execute.py
@@ -22,6 +22,16 @@ class Execute(State):
 
     name = "execute"
     allowed = []
+    exported_methods = {
+        "check_progress",
+        "extract_simulation_output",
+        "get_ct",
+        "get_grid",
+        "launch_simulation",
+        "prepare_simulation_input",
+        "print_scenario_info",
+        "print_scenario_status",
+    }
 
     def __init__(self, scenario):
         """Constructor."""
diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py
index 2a3743ef2..657d8d827 100644
--- a/powersimdata/scenario/move.py
+++ b/powersimdata/scenario/move.py
@@ -12,6 +12,10 @@ class Move(State):
 
     name = "move"
     allowed = []
+    exported_methods = {
+        "move_scenario",
+        "print_scenario_info",
+    }
 
     def print_scenario_info(self):
         """Prints scenario information."""
diff --git a/powersimdata/scenario/state.py b/powersimdata/scenario/state.py
index 46fde4945..cc818f4b4 100644
--- a/powersimdata/scenario/state.py
+++ b/powersimdata/scenario/state.py
@@ -34,7 +34,7 @@ def switch(self, state):
             print("State switching: %s --> %s" % (self, state.name))
             self._leave()
             self.__class__ = state
-            self._enter()
+            self._enter(state)
         else:
             raise Exception(
                 "State switching: %s --> %s not permitted" % (self, state.name)
@@ -55,6 +55,6 @@ def _leave(self):
             del self.grid
             del self.ct
 
-    def _enter(self):
+    def _enter(self, state):
         """Initializes when entering state."""
-        pass
+        self.exported_methods = state.exported_methods

From 21cc0ed8c02ece2bc9f308fce3df7ec9f79f19ec Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 13:23:25 -0700
Subject: [PATCH 035/108] feat: add check in Scenario for state methods

---
 powersimdata/scenario/scenario.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py
index 791f019aa..1ae75ff3e 100644
--- a/powersimdata/scenario/scenario.py
+++ b/powersimdata/scenario/scenario.py
@@ -42,6 +42,18 @@ def __init__(self, descriptor=None):
             except AttributeError:
                 return
 
+    def __getattr__(self, name):
+        if name in self.state.exported_methods:
+            return getattr(self.state, name)
+        else:
+            raise AttributeError(
+                f"Scenario object in {self.state.name} state "
+                f"has no attribute {name}"
+            )
+
+    def __dir__(self):
+        return sorted(super().__dir__() + list(self.state.exported_methods))
+
     def _set_info(self, descriptor):
         """Sets scenario information.
 

From c4222924637d94d030015d32b94899acbd42269c Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 16:10:43 -0700
Subject: [PATCH 036/108] feat: add access to _Builder attributes from Create

---
 powersimdata/scenario/create.py   | 49 +++++++++++++++++++++++--------
 powersimdata/scenario/scenario.py |  2 ++
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index eb17312f6..597b9c056 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -15,6 +15,19 @@
 from powersimdata.scenario.state import State
 from powersimdata.utility import server_setup
 
+default_exported_methods = (
+    "create_scenario",
+    "get_ct",
+    "get_grid",
+    "get_demand",
+    "get_bus_demand",
+    "get_hydro",
+    "get_solar",
+    "get_wind",
+    "print_scenario_info",
+    "set_builder",
+)
+
 
 class Create(State):
     """Scenario is in a state of being created.
@@ -24,18 +37,6 @@ class Create(State):
 
     name = "create"
     allowed = []
-    exported_methods = {
-        "create_scenario",
-        "get_ct",
-        "get_grid",
-        "get_demand",
-        "get_bus_demand",
-        "get_hydro",
-        "get_solar",
-        "get_wind",
-        "print_scenario_info",
-        "set_builder",
-    }
 
     def __init__(self, scenario):
         """Constructor."""
@@ -61,8 +62,24 @@ def __init__(self, scenario):
                 ("engine", ""),
             ]
         )
+        self.exported_methods = set(default_exported_methods)
         super().__init__(scenario)
 
+    def __getattr__(self, name):
+        if self.builder is not None:
+            if name in self.builder.exported_methods:
+                return getattr(self.builder, name)
+            else:
+                raise AttributeError(f"Create object has no attribute {name}")
+        else:
+            raise AttributeError(
+                f"Create object without a builder set has no attribute {name}. "
+                "Did you forget to run set_builder?"
+            )
+
+    def _custom_getattr_error_message(self, name):
+        return self.__getattr__(name)
+
     def _update_scenario_info(self):
         """Updates scenario information."""
         if self.builder is not None:
@@ -245,6 +262,7 @@ def set_builder(self, grid_model="usa_tamu", interconnect="USA"):
         self.builder = _Builder(
             grid_model, interconnect, self._scenario_list_manager.get_scenario_table()
         )
+        self.exported_methods |= _Builder.exported_methods
 
         print("--> Summary")
         print("# Existing study")
@@ -282,6 +300,13 @@ class _Builder(object):
     solar = ""
     wind = ""
     engine = "REISE.jl"
+    exported_methods = {
+        "set_name",
+        "set_time",
+        "set_base_profile",
+        "set_engine",
+        "get_grid",
+    }
 
     def __init__(self, grid_model, interconnect, table):
         """Constructor."""
diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py
index 1ae75ff3e..44b947bf7 100644
--- a/powersimdata/scenario/scenario.py
+++ b/powersimdata/scenario/scenario.py
@@ -45,6 +45,8 @@ def __init__(self, descriptor=None):
     def __getattr__(self, name):
         if name in self.state.exported_methods:
             return getattr(self.state, name)
+        elif hasattr(self.state, "_custom_getattr_error_message"):
+            return self.state._custom_getattr_error_message(name)
         else:
             raise AttributeError(
                 f"Scenario object in {self.state.name} state "

From 7c21da0f880277a7f012e401719b90cc50a57379 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 18 Mar 2021 16:45:05 -0700
Subject: [PATCH 037/108] refactor: move builder-requiring methods to Builder
 class

---
 powersimdata/scenario/create.py | 144 ++++++++++++++------------------
 1 file changed, 62 insertions(+), 82 deletions(-)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 597b9c056..5bb75f391 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -17,13 +17,7 @@
 
 default_exported_methods = (
     "create_scenario",
-    "get_ct",
-    "get_grid",
-    "get_demand",
     "get_bus_demand",
-    "get_hydro",
-    "get_solar",
-    "get_wind",
     "print_scenario_info",
     "set_builder",
 )
@@ -120,59 +114,6 @@ def _upload_change_table(self):
         file_name = self._scenario_info["id"] + "_ct.pkl"
         self._data_access.move_to(file_name, server_setup.INPUT_DIR)
 
-    def get_ct(self):
-        """Returns change table.
-
-        :return: (*dict*) -- change table.
-        :raises Exception: if :attr:`builder` has not been assigned yet through
-            meth:`set_builder`.
-        """
-        if self.builder is not None:
-            return copy.deepcopy(self.builder.change_table.ct)
-        else:
-            raise Exception("change table not set")
-
-    def get_grid(self):
-        """Returns the Grid object.
-
-        :return: (*powersimdata.input.grid.Grid*) -- a Grid object.
-        :raises Exception: if :attr:`builder` has not been assigned yet through
-            meth:`set_builder`.
-        """
-        if self.builder is not None:
-            return self.builder.get_grid()
-        else:
-            raise Exception("grid not set")
-
-    def get_profile(self, kind):
-        """Returns demand, hydro, solar or wind  profile.
-
-        :param str kind: either *'demand'*, *'hydro'*, *'solar'*, *'wind'*.
-        :return: (*pandas.DataFrame*) -- profile.
-        :raises Exception: if :attr:`builder` has not been assigned yet through
-            meth:`set_builder` or if :meth:`_Builder.set_base_profile` has not been
-            called yet.
-        """
-        if getattr(self.builder, kind):
-            profile = TransformProfile(
-                {
-                    "grid_model": getattr(self.builder, "grid_model"),
-                    "base_%s" % kind: getattr(self.builder, kind),
-                },
-                self.get_grid(),
-                self.get_ct(),
-            )
-            return profile.get_profile(kind)
-        else:
-            raise Exception("%s profile version not set" % kind)
-
-    def get_demand(self):
-        """Returns demand profile.
-
-        :return: (*pandas.DataFrame*) -- data frame of demand (hour, zone id).
-        """
-        return self.get_profile("demand")
-
     def get_bus_demand(self):
         """Returns demand profiles, by bus.
 
@@ -182,27 +123,6 @@ def get_bus_demand(self):
         grid = self.get_grid()
         return get_bus_demand(self._scenario_info, grid)
 
-    def get_hydro(self):
-        """Returns hydro profile.
-
-        :return: (*pandas.DataFrame*) -- data frame of hydro power output (hour, plant).
-        """
-        return self.get_profile("hydro")
-
-    def get_solar(self):
-        """Returns solar profile.
-
-        :return: (*pandas.DataFrame*) -- data frame of solar power output (hour, plant).
-        """
-        return self.get_profile("solar")
-
-    def get_wind(self):
-        """Returns wind profile.
-
-        :return: (*pandas.DataFrame*) -- data frame of wind power output (hour, plant).
-        """
-        return self.get_profile("wind")
-
     def create_scenario(self):
         """Creates scenario."""
         self._update_scenario_info()
@@ -301,11 +221,17 @@ class _Builder(object):
     wind = ""
     engine = "REISE.jl"
     exported_methods = {
-        "set_name",
-        "set_time",
         "set_base_profile",
         "set_engine",
+        "set_name",
+        "set_time",
+        "get_ct",
         "get_grid",
+        "get_demand",
+        "get_hydro",
+        "get_solar",
+        "get_wind",
+        "change_table",
     }
 
     def __init__(self, grid_model, interconnect, table):
@@ -320,6 +246,60 @@ def __init__(self, grid_model, interconnect, table):
 
         self.existing = table[table.interconnect == self.interconnect]
 
+    def get_ct(self):
+        """Returns change table.
+
+        :return: (*dict*) -- change table.
+        """
+        return copy.deepcopy(self.change_table.ct)
+
+    def get_profile(self, kind):
+        """Returns demand, hydro, solar or wind  profile.
+
+        :param str kind: either *'demand'*, *'hydro'*, *'solar'*, *'wind'*.
+        :return: (*pandas.DataFrame*) -- profile.
+        """
+        if getattr(self, kind):
+            profile = TransformProfile(
+                {
+                    "grid_model": self.grid_model,
+                    "base_%s" % kind: getattr(self, kind),
+                },
+                self.get_grid(),
+                self.get_ct(),
+            )
+            return profile.get_profile(kind)
+        else:
+            raise Exception("%s profile version not set" % kind)
+
+    def get_demand(self):
+        """Returns demand profile.
+
+        :return: (*pandas.DataFrame*) -- data frame of demand (hour, zone id).
+        """
+        return self.get_profile("demand")
+
+    def get_hydro(self):
+        """Returns hydro profile.
+
+        :return: (*pandas.DataFrame*) -- data frame of hydro power output (hour, plant).
+        """
+        return self.get_profile("hydro")
+
+    def get_solar(self):
+        """Returns solar profile.
+
+        :return: (*pandas.DataFrame*) -- data frame of solar power output (hour, plant).
+        """
+        return self.get_profile("solar")
+
+    def get_wind(self):
+        """Returns wind profile.
+
+        :return: (*pandas.DataFrame*) -- data frame of wind power output (hour, plant).
+        """
+        return self.get_profile("wind")
+
     def set_name(self, plan_name, scenario_name):
         """Sets scenario name.
 

From 9e414e1a9bee77094883434a65bc73393edc6c6b Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 19 Mar 2021 10:02:41 -0700
Subject: [PATCH 038/108] feat: add set_grid as alias to set_builder

---
 powersimdata/scenario/create.py            | 13 +++++++++++--
 powersimdata/scenario/tests/test_create.py |  2 +-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 5bb75f391..74e8d9e8b 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -1,5 +1,6 @@
 import copy
 import pickle
+import warnings
 from collections import OrderedDict
 
 import numpy as np
@@ -20,6 +21,7 @@
     "get_bus_demand",
     "print_scenario_info",
     "set_builder",
+    "set_grid",
 )
 
 
@@ -173,8 +175,15 @@ def print_scenario_info(self):
         for key, val in self._scenario_info.items():
             print("%s: %s" % (key, val))
 
-    def set_builder(self, grid_model="usa_tamu", interconnect="USA"):
-        """Sets builder.
+    def set_builder(self, *args, **kwargs):
+        """Alias to set_grid."""
+        warnings.warn(
+            "set_builder is deprecated, use set_grid instead", DeprecationWarning
+        )
+        self.set_grid(*args, **kwargs)
+
+    def set_grid(self, grid_model="usa_tamu", interconnect="USA"):
+        """Sets grid builder.
 
         :param str grid_model: name of grid model. Default is *'usa_tamu'*.
         :param str/list interconnect: name of interconnect(s). Default is *'USA'*.
diff --git a/powersimdata/scenario/tests/test_create.py b/powersimdata/scenario/tests/test_create.py
index b9aabd4e4..fbb4a8bf7 100644
--- a/powersimdata/scenario/tests/test_create.py
+++ b/powersimdata/scenario/tests/test_create.py
@@ -6,6 +6,6 @@
 @pytest.mark.ssh
 def test_get_bus_demand():
     scenario = Scenario("")
-    scenario.state.set_builder(interconnect="Texas")
+    scenario.state.set_grid(interconnect="Texas")
     scenario.state.builder.set_base_profile("demand", "vJan2021")
     scenario.state.get_bus_demand()

From 8e14901d7aab7997421737ebdfe8e96cf8661f0c Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 19 Mar 2021 11:26:27 -0700
Subject: [PATCH 039/108] refactor: change AttributeError messages for clarity

---
 powersimdata/scenario/create.py   | 17 +++++------------
 powersimdata/scenario/scenario.py |  4 ++--
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 74e8d9e8b..1cceeed29 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -62,19 +62,12 @@ def __init__(self, scenario):
         super().__init__(scenario)
 
     def __getattr__(self, name):
-        if self.builder is not None:
-            if name in self.builder.exported_methods:
-                return getattr(self.builder, name)
-            else:
-                raise AttributeError(f"Create object has no attribute {name}")
+        if self.builder is not None and name in self.builder.exported_methods:
+            return getattr(self.builder, name)
+        if self.builder is None and name in _Builder.exported_methods:
+            raise AttributeError(f"Call set_grid first to access {name} attribute")
         else:
-            raise AttributeError(
-                f"Create object without a builder set has no attribute {name}. "
-                "Did you forget to run set_builder?"
-            )
-
-    def _custom_getattr_error_message(self, name):
-        return self.__getattr__(name)
+            raise AttributeError(f"Create object has no attribute {name}")
 
     def _update_scenario_info(self):
         """Updates scenario information."""
diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py
index 44b947bf7..b271a2c0e 100644
--- a/powersimdata/scenario/scenario.py
+++ b/powersimdata/scenario/scenario.py
@@ -45,8 +45,8 @@ def __init__(self, descriptor=None):
     def __getattr__(self, name):
         if name in self.state.exported_methods:
             return getattr(self.state, name)
-        elif hasattr(self.state, "_custom_getattr_error_message"):
-            return self.state._custom_getattr_error_message(name)
+        elif hasattr(self.state, "__getattr__"):
+            return self.state.__getattr__(name)
         else:
             raise AttributeError(
                 f"Scenario object in {self.state.name} state "

From 79db71fdddf985f3fb097ae2fb82b7fa37b7f739 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 19 Mar 2021 11:34:53 -0700
Subject: [PATCH 040/108] fix: add __setattr__ check for __getattr__ attributes

---
 powersimdata/scenario/create.py   |  7 +++++++
 powersimdata/scenario/scenario.py | 25 ++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 1cceeed29..a88ead170 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -69,6 +69,13 @@ def __getattr__(self, name):
         else:
             raise AttributeError(f"Create object has no attribute {name}")
 
+    def __setattr__(self, name, value):
+        if name in _Builder.exported_methods:
+            raise AttributeError(
+                f"{name} is exported from Create.builder, edit it there if necessary"
+            )
+        super().__setattr__(name, value)
+
     def _update_scenario_info(self):
         """Updates scenario information."""
         if self.builder is not None:
diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py
index b271a2c0e..6bb07db1a 100644
--- a/powersimdata/scenario/scenario.py
+++ b/powersimdata/scenario/scenario.py
@@ -4,7 +4,7 @@
 from powersimdata.data_access.execute_list import ExecuteListManager
 from powersimdata.data_access.scenario_list import ScenarioListManager
 from powersimdata.scenario.analyze import Analyze
-from powersimdata.scenario.create import Create
+from powersimdata.scenario.create import Create, _Builder
 from powersimdata.scenario.execute import Execute
 
 pd.set_option("display.max_colwidth", None)
@@ -17,6 +17,15 @@ class Scenario(object):
         in Create state.
     """
 
+    _setattr_allowlist = {
+        "data_access",
+        "state",
+        "status",
+        "info",
+        "_scenario_list_manager",
+        "_execute_list_manager",
+    }
+
     def __init__(self, descriptor=None):
         """Constructor."""
         if isinstance(descriptor, int):
@@ -53,6 +62,20 @@ def __getattr__(self, name):
                 f"has no attribute {name}"
             )
 
+    def __setattr__(self, name, value):
+        if name in self._setattr_allowlist:
+            super().__setattr__(name, value)
+        elif isinstance(self.state, Create) and name in _Builder.exported_methods:
+            raise AttributeError(
+                f"{name} is exported from Scenario.state.builder, "
+                "edit it there if necessary"
+            )
+        elif name in self.state.exported_methods:
+            raise AttributeError(
+                f"{name} is exported from Scenario.state, edit it there if necessary"
+            )
+        super().__setattr__(name, value)
+
     def __dir__(self):
         return sorted(super().__dir__() + list(self.state.exported_methods))
 

From dded92824a2305a85b055d3af8f869e886a0d182 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Mon, 22 Mar 2021 08:06:27 -0700
Subject: [PATCH 041/108] refactor: move default exported methods back to class

---
 powersimdata/scenario/create.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index a88ead170..924aa267f 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -16,14 +16,6 @@
 from powersimdata.scenario.state import State
 from powersimdata.utility import server_setup
 
-default_exported_methods = (
-    "create_scenario",
-    "get_bus_demand",
-    "print_scenario_info",
-    "set_builder",
-    "set_grid",
-)
-
 
 class Create(State):
     """Scenario is in a state of being created.
@@ -33,6 +25,13 @@ class Create(State):
 
     name = "create"
     allowed = []
+    default_exported_methods = (
+        "create_scenario",
+        "get_bus_demand",
+        "print_scenario_info",
+        "set_builder",
+        "set_grid",
+    )
 
     def __init__(self, scenario):
         """Constructor."""
@@ -58,7 +57,7 @@ def __init__(self, scenario):
                 ("engine", ""),
             ]
         )
-        self.exported_methods = set(default_exported_methods)
+        self.exported_methods = set(self.default_exported_methods)
         super().__init__(scenario)
 
     def __getattr__(self, name):

From 34d703799464665418714a8aa6f66c947d10eec7 Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Thu, 25 Mar 2021 19:21:25 -0700
Subject: [PATCH 042/108] fix: upload storage matfile to server (#427)

---
 powersimdata/scenario/execute.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py
index f358bf1c7..5036fb729 100644
--- a/powersimdata/scenario/execute.py
+++ b/powersimdata/scenario/execute.py
@@ -362,6 +362,10 @@ def prepare_mpc_file(self):
         self._data_access.move_to(
             file_name, self.REL_TMP_DIR, change_name_to="case.mat"
         )
+        if len(self.grid.storage["gen"]) > 0:
+            self._data_access.move_to(
+                storage_file_name, self.REL_TMP_DIR, change_name_to="case_storage.mat"
+            )
 
     def prepare_profile(self, kind, profile_as=None):
         """Prepares profile for simulation.

From 2577fe578fe0d3e7a2ee3f7183489abad5d4a2e8 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Wed, 24 Mar 2021 15:38:20 -0700
Subject: [PATCH 043/108] ci: add pep8-naming check to flake8 and ignore a
 bunch of warnings

---
 powersimdata/design/generation/cost_curves.py | 12 ++---
 .../generation/tests/test_cost_curves.py      | 50 +++++++++++++++----
 powersimdata/design/investment/const.py       |  2 +-
 .../design/investment/create_mapping_files.py |  4 +-
 .../design/investment/investment_costs.py     | 15 ++++--
 .../design/transmission/tests/test_upgrade.py | 34 ++++++-------
 powersimdata/input/tests/test_change_table.py |  8 +--
 powersimdata/utility/distance.py              |  2 +-
 tox.ini                                       |  1 +
 9 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/powersimdata/design/generation/cost_curves.py b/powersimdata/design/generation/cost_curves.py
index 4afb73674..69207dcef 100644
--- a/powersimdata/design/generation/cost_curves.py
+++ b/powersimdata/design/generation/cost_curves.py
@@ -250,8 +250,8 @@ def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=
     df = df.reset_index(drop=True)
 
     # Determine the points that comprise the supply curve
-    P = []
-    F = []
+    P = []  # noqa: N806
+    F = []  # noqa: N806
     p_diff_sum = 0
     for i in df.index:
         P.append(p_diff_sum)
@@ -298,7 +298,7 @@ def lower_bound_index(x, l):
             return i - 1
 
 
-def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):
+def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):  # noqa: N803
     """Runs a test that is similar to the Kolmogorov-Smirnov test. This function takes
     two supply curves as inputs and returns the greatest difference in price between
     the two supply curves. This function requires that the supply curves offer the same
@@ -329,11 +329,11 @@ def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):
         )
 
     # Create a list that has every capacity value in which either supply curve steps up
-    P_all = list(set(P1) | set(P2))
+    P_all = list(set(P1) | set(P2))  # noqa: N806
     P_all.sort()
 
     # For each capacity value, associate the two corresponding price values
-    F_all = []
+    F_all = []  # noqa: N806
     for i in range(len(P_all)):
         # Determine the correpsonding price from the first supply curve
         if P_all[i] == P1[-1]:
@@ -351,7 +351,7 @@ def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):
         F_all.append([f1, f2])
 
     # Determine the price differences for each capacity value
-    F_diff = [abs(F_all[i][0] - F_all[i][1]) for i in range(len(F_all))]
+    F_diff = [abs(F_all[i][0] - F_all[i][1]) for i in range(len(F_all))]  # noqa: N806
 
     # Determine the maximum price difference
     max_diff = max(F_diff)
diff --git a/powersimdata/design/generation/tests/test_cost_curves.py b/powersimdata/design/generation/tests/test_cost_curves.py
index 2050ef142..ceea40c59 100644
--- a/powersimdata/design/generation/tests/test_cost_curves.py
+++ b/powersimdata/design/generation/tests/test_cost_curves.py
@@ -176,17 +176,32 @@ def test_get_supply_data():
 
 
 def test_build_supply_curve_1seg():
-    Ptest, Ftest = build_supply_curve(grid, 1, "Colorado", "ng", "loadzone", plot=False)
-    Pexp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]
-    Fexp = [25.10, 25.10, 30.40, 30.40, 30.40, 30.40, 31.25, 31.25, 40.00, 40.00]
+    Ptest, Ftest = build_supply_curve(  # noqa: N806
+        grid, 1, "Colorado", "ng", "loadzone", plot=False
+    )
+    Pexp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]  # noqa: N806
+    Fexp = [  # noqa: N806
+        25.10,
+        25.10,
+        30.40,
+        30.40,
+        30.40,
+        30.40,
+        31.25,
+        31.25,
+        40.00,
+        40.00,
+    ]
     assert all([Ptest[i] == Pexp[i] for i in range(len(Ptest))])
     assert all([Ftest[i] == Fexp[i] for i in range(len(Ptest))])
 
 
 def test_build_supply_curve_2seg():
-    Ptest, Ftest = build_supply_curve(grid, 2, "Utah", "coal", "loadzone", plot=False)
-    Pexp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]
-    Fexp = [
+    Ptest, Ftest = build_supply_curve(  # noqa: N806
+        grid, 2, "Utah", "coal", "loadzone", plot=False
+    )
+    Pexp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]  # noqa: N806
+    Fexp = [  # noqa: N806
         30.100,
         30.100,
         30.300,
@@ -205,9 +220,26 @@ def test_build_supply_curve_2seg():
 
 
 def test_ks_test():
-    P1, F1 = build_supply_curve(grid, 1, "Washington", "coal", "loadzone", plot=False)
-    P2 = [0, 15, 15, 40, 40, 75, 75, 130, 130, 190, 190, 225, 225, max(P1)]
-    F2 = [
+    P1, F1 = build_supply_curve(  # noqa: N806
+        grid, 1, "Washington", "coal", "loadzone", plot=False
+    )
+    P2 = [  # noqa: N806
+        0,
+        15,
+        15,
+        40,
+        40,
+        75,
+        75,
+        130,
+        130,
+        190,
+        190,
+        225,
+        225,
+        max(P1),
+    ]
+    F2 = [  # noqa: N806
         23.00,
         23.00,
         27.00,
diff --git a/powersimdata/design/investment/const.py b/powersimdata/design/investment/const.py
index b878597b7..dde79e23f 100644
--- a/powersimdata/design/investment/const.py
+++ b/powersimdata/design/investment/const.py
@@ -21,7 +21,7 @@
 }
 
 # 2020 USD, from MISO cost estimations
-hvdc_terminal_cost_per_MW = 135e3
+hvdc_terminal_cost_per_MW = 135e3  # noqa: N816
 
 ac_line_cost = {
     "kV": [229, 230, 230, 230, 345, 345, 345, 345, 500, 765],
diff --git a/powersimdata/design/investment/create_mapping_files.py b/powersimdata/design/investment/create_mapping_files.py
index 50f634ae9..3673e87cf 100644
--- a/powersimdata/design/investment/create_mapping_files.py
+++ b/powersimdata/design/investment/create_mapping_files.py
@@ -223,7 +223,7 @@ def write_poly_shapefile():
     """
     fiona = _check_import("fiona")
     shapely_geometry = _check_import("shapely.geometry")
-    Polygon = shapely_geometry.Polygon
+    polygon = shapely_geometry.Polygon
     mapping = shapely_geometry.mapping
 
     outpath = const.reeds_wind_shapefile_path
@@ -258,7 +258,7 @@ def write_poly_shapefile():
             for j in poly_df.index:
                 ls += [(poly_df.loc[j, "long"], poly_df.loc[j, "lat"])]
 
-            poly = Polygon(ls)
+            poly = polygon(ls)
             c.write(
                 {
                     "geometry": mapping(poly),
diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index 3c18fcf90..e1cab962b 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -82,20 +82,25 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
             a message has already been printed that this lookup was not found.
         :return: (*float*) -- regional multiplier.
         """
-        max_kV = bus.loc[[x.from_bus_id, x.to_bus_id], "baseKV"].max()
+        max_kV = bus.loc[[x.from_bus_id, x.to_bus_id], "baseKV"].max()  # noqa: N806
         region = bus_reg.loc[x.from_bus_id, "name_abbr"]
         region_mults = ac_reg_mult.loc[ac_reg_mult.name_abbr == region]
 
-        mult_lookup_kV = region_mults.loc[(region_mults.kV - max_kV).abs().idxmin()].kV
-        region_kV_mults = region_mults[region_mults.kV == mult_lookup_kV]
-        region_kV_mults = region_kV_mults.loc[~region_kV_mults.mult.isnull()]
+        mult_lookup_kV = region_mults.loc[  # noqa: N806
+            (region_mults.kV - max_kV).abs().idxmin()
+        ].kV
+        region_kV_mults = region_mults[region_mults.kV == mult_lookup_kV]  # noqa: N806
+        region_kV_mults = region_kV_mults.loc[  # noqa: N806
+            ~region_kV_mults.mult.isnull()
+        ]
         if len(region_kV_mults) == 0:
             mult = 1
             if (mult_lookup_kV, region) not in xfmr_lookup_alerted:
                 print(f"No multiplier for voltage {mult_lookup_kV} in {region}")
                 xfmr_lookup_alerted.add((mult_lookup_kV, region))
         else:
-            mult_lookup_MW = region_kV_mults.loc[
+
+            mult_lookup_MW = region_kV_mults.loc[  # noqa: N806
                 (region_kV_mults.MW - x.rateA).abs().idxmin(), "MW"
             ]
             mult = (
diff --git a/powersimdata/design/transmission/tests/test_upgrade.py b/powersimdata/design/transmission/tests/test_upgrade.py
index ea2c9c58e..1620a2b43 100644
--- a/powersimdata/design/transmission/tests/test_upgrade.py
+++ b/powersimdata/design/transmission/tests/test_upgrade.py
@@ -139,43 +139,43 @@ def setUp(self):
         self.grid.id2zone = {201: "Wahington", 202: "Oregon"}
         self.grid.zone2id = {"Washington": 201, "Oregon": 202}
 
-    def test_internal_W(self):
+    def test_internal_W(self):  # noqa: N802
         branch_idxs = get_branches_by_area(self.grid, {"Washington"}, method="internal")
         assert branch_idxs == {106, 107, 108}
 
-    def test_internal_E(self):
+    def test_internal_E(self):  # noqa: N802
         branch_idxs = get_branches_by_area(self.grid, ["Oregon"], method="internal")
         assert branch_idxs == {102, 103, 104}
 
-    def test_internal_EW(self):
+    def test_internal_EW(self):  # noqa: N802
         branch_idxs = get_branches_by_area(
             self.grid, ("Washington", "Oregon"), "internal"
         )
         assert branch_idxs == {102, 103, 104, 106, 107, 108}
 
-    def test_bridging_W(self):
+    def test_bridging_W(self):  # noqa: N802
         branch_idxs = get_branches_by_area(self.grid, ["Washington"], method="bridging")
         assert branch_idxs == {101, 105}
 
-    def test_bridging_E(self):
+    def test_bridging_E(self):  # noqa: N802
         branch_idxs = get_branches_by_area(self.grid, {"Oregon"}, method="bridging")
         assert branch_idxs == {101, 105}
 
-    def test_bridging_EW(self):
+    def test_bridging_EW(self):  # noqa: N802
         branch_idxs = get_branches_by_area(
             self.grid, ("Washington", "Oregon"), "bridging"
         )
         assert branch_idxs == {101, 105}
 
-    def test_either_W(self):
+    def test_either_W(self):  # noqa: N802
         branch_idxs = get_branches_by_area(self.grid, ("Washington",), method="either")
         assert branch_idxs == {101, 105, 106, 107, 108}
 
-    def test_either_E(self):
+    def test_either_E(self):  # noqa: N802
         branch_idxs = get_branches_by_area(self.grid, ("Oregon",), method="either")
         assert branch_idxs == {101, 102, 103, 104, 105}
 
-    def test_either_EW(self):
+    def test_either_EW(self):  # noqa: N802
         branch_idxs = get_branches_by_area(
             self.grid, ("Oregon", "Washington"), "either"
         )
@@ -267,21 +267,21 @@ def test_identify_mesh_branch_upgrades_quantile90(self):
 
     # These tests use the 'MW' ranking: [102, 101, 103]
     # This happens because 101 is very small, 102 is small (compared to 103)
-    def test_identify_mesh_MW_n_3(self):
+    def test_identify_mesh_MW_n_3(self):  # noqa: N802
         expected_return = {101, 102, 103}
         branches = _identify_mesh_branch_upgrades(
             self.mock_scenario, upgrade_n=3, method="MW"
         )
         self.assertEqual(branches, expected_return)
 
-    def test_identify_mesh_MW_n_2(self):
+    def test_identify_mesh_MW_n_2(self):  # noqa: N802
         expected_return = {101, 102}
         branches = _identify_mesh_branch_upgrades(
             self.mock_scenario, upgrade_n=2, method="MW"
         )
         self.assertEqual(branches, expected_return)
 
-    def test_identify_mesh_MW_n_2_allow_list(self):
+    def test_identify_mesh_MW_n_2_allow_list(self):  # noqa: N802
         expected_return = {102, 103}
         allow_list = {102, 103, 104}
         branches = _identify_mesh_branch_upgrades(
@@ -289,7 +289,7 @@ def test_identify_mesh_MW_n_2_allow_list(self):
         )
         self.assertEqual(branches, expected_return)
 
-    def test_identify_mesh_MW_n_2_deny_list(self):
+    def test_identify_mesh_MW_n_2_deny_list(self):  # noqa: N802
         expected_return = {101, 103}
         deny_list = [102, 105]
         branches = _identify_mesh_branch_upgrades(
@@ -297,7 +297,7 @@ def test_identify_mesh_MW_n_2_deny_list(self):
         )
         self.assertEqual(branches, expected_return)
 
-    def test_identify_mesh_MW_n_1(self):
+    def test_identify_mesh_MW_n_1(self):  # noqa: N802
         expected_return = {102}
         branches = _identify_mesh_branch_upgrades(
             self.mock_scenario, upgrade_n=1, method="MW"
@@ -306,21 +306,21 @@ def test_identify_mesh_MW_n_1(self):
 
     # These tests use the 'MWmiles' ranking: [101, 102, 103]
     # This happens because 101 is zero-distance, 102 is short (compared to 103)
-    def test_identify_mesh_MWmiles_n_3(self):
+    def test_identify_mesh_MWmiles_n_3(self):  # noqa: N802
         expected_return = {101, 102, 103}
         branches = _identify_mesh_branch_upgrades(
             self.mock_scenario, upgrade_n=3, method="MWmiles"
         )
         self.assertEqual(branches, expected_return)
 
-    def test_identify_mesh_MWmiles_n_2(self):
+    def test_identify_mesh_MWmiles_n_2(self):  # noqa: N802
         expected_return = {101, 102}
         branches = _identify_mesh_branch_upgrades(
             self.mock_scenario, upgrade_n=2, method="MWmiles"
         )
         self.assertEqual(branches, expected_return)
 
-    def test_identify_mesh_MWmiles_n_1(self):
+    def test_identify_mesh_MWmiles_n_1(self):  # noqa: N802
         expected_return = {101}
         branches = _identify_mesh_branch_upgrades(
             self.mock_scenario, upgrade_n=1, method="MWmiles"
diff --git a/powersimdata/input/tests/test_change_table.py b/powersimdata/input/tests/test_change_table.py
index 777cff772..be3c1b8be 100644
--- a/powersimdata/input/tests/test_change_table.py
+++ b/powersimdata/input/tests/test_change_table.py
@@ -102,13 +102,13 @@ def test_add_dcline_in_different_interconnect(ct):
     assert ct.ct == expected
 
 
-def test_add_dcline_Pmin_and_Pmax_success(ct):
+def test_add_dcline_Pmin_and_Pmax_success(ct):  # noqa: N802
     new_dcline = [{"Pmax": 2000, "Pmin": 0, "from_bus_id": 200, "to_bus_id": 2000}]
     ct.add_dcline(new_dcline)
     assert ct.ct == {"new_dcline": new_dcline}
 
 
-def test_add_dcline_Pmin_gt_Pmax(ct):
+def test_add_dcline_Pmin_gt_Pmax(ct):  # noqa: N802
     new_dcline = [{"Pmax": 2000, "Pmin": 3000, "from_bus_id": 200, "to_bus_id": 2000}]
     with pytest.raises(ValueError) as excinfo:
         ct.add_dcline(new_dcline)
@@ -116,7 +116,7 @@ def test_add_dcline_Pmin_gt_Pmax(ct):
     assert ct.ct == {}
 
 
-def test_add_dcline_Pmin_and_Pmax_and_capacity(ct):
+def test_add_dcline_Pmin_and_Pmax_and_capacity(ct):  # noqa: N802
     new_dcline = [
         {"Pmax": 200, "Pmin": -200, "capacity": 10, "from_bus_id": 1, "to_bus_id": 2}
     ]
@@ -146,7 +146,7 @@ def test_add_branch_zero_distance_between_buses(ct):
     assert ct.ct == {}
 
 
-def test_add_branch_Pmin_and_Pmax(ct):
+def test_add_branch_Pmin_and_Pmax(ct):  # noqa: N802
     new_dcline = [{"Pmax": 2000, "Pmin": 0, "from_bus_id": 200, "to_bus_id": 2000}]
     with pytest.raises(ValueError) as excinfo:
         ct.add_branch(new_dcline)
diff --git a/powersimdata/utility/distance.py b/powersimdata/utility/distance.py
index 3761228fa..2a5295882 100644
--- a/powersimdata/utility/distance.py
+++ b/powersimdata/utility/distance.py
@@ -9,7 +9,7 @@ def haversine(point1, point2):
     :return: (*float*) -- distance in miles.
     """
 
-    _AVG_EARTH_RADIUS_MILES = 3958.7613
+    _AVG_EARTH_RADIUS_MILES = 3958.7613  # noqa: N806
 
     # unpack latitude/longitude
     lat1, lng1 = point1
diff --git a/tox.ini b/tox.ini
index 27d644276..cf914c7f2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -11,6 +11,7 @@ deps =
     {format,checkformatting}: black
     {format,checkformatting}: isort
     flake8: flake8
+    flake8: pep8-naming
 commands =
     pytest: pipenv sync --dev
     ci: pytest -m 'not ssh' 

From 9980b91149ab4f221fed9ddbbc6dc7dd3268f80b Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Thu, 25 Mar 2021 13:10:31 -0700
Subject: [PATCH 044/108] chore: rename to fix warnings, noqa Polygon

---
 .../design/investment/create_mapping_files.py  |  4 ++--
 .../design/transmission/tests/test_upgrade.py  | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/powersimdata/design/investment/create_mapping_files.py b/powersimdata/design/investment/create_mapping_files.py
index 3673e87cf..08d8b2167 100644
--- a/powersimdata/design/investment/create_mapping_files.py
+++ b/powersimdata/design/investment/create_mapping_files.py
@@ -223,7 +223,7 @@ def write_poly_shapefile():
     """
     fiona = _check_import("fiona")
     shapely_geometry = _check_import("shapely.geometry")
-    polygon = shapely_geometry.Polygon
+    Polygon = shapely_geometry.Polygon  # noqa: N806
     mapping = shapely_geometry.mapping
 
     outpath = const.reeds_wind_shapefile_path
@@ -258,7 +258,7 @@ def write_poly_shapefile():
             for j in poly_df.index:
                 ls += [(poly_df.loc[j, "long"], poly_df.loc[j, "lat"])]
 
-            poly = polygon(ls)
+            poly = Polygon(ls)
             c.write(
                 {
                     "geometry": mapping(poly),
diff --git a/powersimdata/design/transmission/tests/test_upgrade.py b/powersimdata/design/transmission/tests/test_upgrade.py
index 1620a2b43..882325425 100644
--- a/powersimdata/design/transmission/tests/test_upgrade.py
+++ b/powersimdata/design/transmission/tests/test_upgrade.py
@@ -139,43 +139,43 @@ def setUp(self):
         self.grid.id2zone = {201: "Wahington", 202: "Oregon"}
         self.grid.zone2id = {"Washington": 201, "Oregon": 202}
 
-    def test_internal_W(self):  # noqa: N802
+    def test_internal_washington(self):
         branch_idxs = get_branches_by_area(self.grid, {"Washington"}, method="internal")
         assert branch_idxs == {106, 107, 108}
 
-    def test_internal_E(self):  # noqa: N802
+    def test_internal_oregon(self):
         branch_idxs = get_branches_by_area(self.grid, ["Oregon"], method="internal")
         assert branch_idxs == {102, 103, 104}
 
-    def test_internal_EW(self):  # noqa: N802
+    def test_internal_multi_state(self):
         branch_idxs = get_branches_by_area(
             self.grid, ("Washington", "Oregon"), "internal"
         )
         assert branch_idxs == {102, 103, 104, 106, 107, 108}
 
-    def test_bridging_W(self):  # noqa: N802
+    def test_bridging_washington(self):
         branch_idxs = get_branches_by_area(self.grid, ["Washington"], method="bridging")
         assert branch_idxs == {101, 105}
 
-    def test_bridging_E(self):  # noqa: N802
+    def test_bridging_oregon(self):
         branch_idxs = get_branches_by_area(self.grid, {"Oregon"}, method="bridging")
         assert branch_idxs == {101, 105}
 
-    def test_bridging_EW(self):  # noqa: N802
+    def test_bridging_multi_state(self):
         branch_idxs = get_branches_by_area(
             self.grid, ("Washington", "Oregon"), "bridging"
         )
         assert branch_idxs == {101, 105}
 
-    def test_either_W(self):  # noqa: N802
+    def test_either_washington(self):
         branch_idxs = get_branches_by_area(self.grid, ("Washington",), method="either")
         assert branch_idxs == {101, 105, 106, 107, 108}
 
-    def test_either_E(self):  # noqa: N802
+    def test_either_oregon(self):
         branch_idxs = get_branches_by_area(self.grid, ("Oregon",), method="either")
         assert branch_idxs == {101, 102, 103, 104, 105}
 
-    def test_either_EW(self):  # noqa: N802
+    def test_either_multi_state(self):
         branch_idxs = get_branches_by_area(
             self.grid, ("Oregon", "Washington"), "either"
         )

From 9f2a3be819a6b7d7c9cb474913d341349309c781 Mon Sep 17 00:00:00 2001
From: Lane Smith <lane.smith@breakthroughenergy.org>
Date: Thu, 25 Mar 2021 19:10:53 -0700
Subject: [PATCH 045/108] refactor: make cost curve visualization variable
 names more descriptive

---
 powersimdata/design/generation/cost_curves.py | 277 ++++++++++--------
 .../generation/tests/test_cost_curves.py      |  38 +--
 2 files changed, 174 insertions(+), 141 deletions(-)

diff --git a/powersimdata/design/generation/cost_curves.py b/powersimdata/design/generation/cost_curves.py
index 69207dcef..f1511f269 100644
--- a/powersimdata/design/generation/cost_curves.py
+++ b/powersimdata/design/generation/cost_curves.py
@@ -33,9 +33,9 @@ def linearize_gencost(input_grid, num_segments=1):
         raise ValueError("gencost currently limited to quadratic")
 
     # Access the quadratic cost curve information
-    old_a = gencost_before.c2
-    old_b = gencost_before.c1
-    old_c = gencost_before.c0
+    quad_term = gencost_before.c2
+    lin_term = gencost_before.c1
+    const_term = gencost_before.c0
 
     # Convert dispatchable generators to piecewise segments
     dispatchable_gens = plant.Pmin != plant.Pmax
@@ -51,12 +51,18 @@ def linearize_gencost(input_grid, num_segments=1):
         gencost_after.loc[dispatchable_gens, "n"] = num_segments + 1
         power_step = (plant.Pmax - plant.Pmin) / num_segments
         for i in range(num_segments + 1):
-            x_label = "p" + str(i + 1)
-            y_label = "f" + str(i + 1)
-            x_data = plant.Pmin + power_step * i
-            y_data = old_a * x_data ** 2 + old_b * x_data + old_c
-            gencost_after.loc[dispatchable_gens, x_label] = x_data[dispatchable_gens]
-            gencost_after.loc[dispatchable_gens, y_label] = y_data[dispatchable_gens]
+            capacity_label = "p" + str(i + 1)
+            price_label = "f" + str(i + 1)
+            capacity_data = plant.Pmin + power_step * i
+            price_data = (
+                quad_term * capacity_data ** 2 + lin_term * capacity_data + const_term
+            )
+            gencost_after.loc[dispatchable_gens, capacity_label] = capacity_data[
+                dispatchable_gens
+            ]
+            gencost_after.loc[dispatchable_gens, price_label] = price_data[
+                dispatchable_gens
+            ]
     else:
         grid.gencost["after"] = gencost_before.copy()
 
@@ -70,9 +76,9 @@ def linearize_gencost(input_grid, num_segments=1):
             nondispatchable_gens, "n"
         ]
         power = plant.Pmax
-        y_data = old_a * power ** 2 + old_b * power + old_c
+        price_data = quad_term * power ** 2 + lin_term * power + const_term
         gencost_after.loc[nondispatchable_gens, ["c2", "c1"]] = 0
-        gencost_after.loc[nondispatchable_gens, "c0"] = y_data[nondispatchable_gens]
+        gencost_after.loc[nondispatchable_gens, "c0"] = price_data[nondispatchable_gens]
 
     gencost_after["interconnect"] = gencost_before["interconnect"]
 
@@ -139,12 +145,13 @@ def get_supply_data(grid, num_segments=1, save=None):
     return supply_df
 
 
-def check_supply_data(data, num_segments=1):
+def check_supply_data(supply_data, num_segments=1):
     """Checks to make sure that the input supply data is a DataFrame and has the
     correct columns. This is especially needed for checking instances where the input
     supply data is not the DataFrame returned from get_supply_data().
 
-    :param pandas.DataFrame data: DataFrame containing the supply curve information.
+    :param pandas.DataFrame supply_data: DataFrame containing the supply curve
+        information.
     :param int num_segments: The number of segments into which the piecewise linear
         cost curve will be split.
     :raises TypeError: if the input supply data is not a pandas.DataFrame.
@@ -153,8 +160,8 @@ def check_supply_data(data, num_segments=1):
     """
 
     # Check that the data is input as a DataFrame
-    if not isinstance(data, pd.DataFrame):
-        raise TypeError("Supply data must be input as a DataFrame.")
+    if not isinstance(supply_data, pd.DataFrame):
+        raise TypeError("supply_data must be input as a DataFrame.")
 
     # Mandatory columns to be contained in the DataFrame
     mand_cols = {
@@ -174,7 +181,7 @@ def check_supply_data(data, num_segments=1):
             mand_cols.update(["p_diff" + str(i), "slope" + str(i)])
 
     # Make sure all of the mandatory columns are contained in the input DataFrame
-    miss_cols = mand_cols - set(data.columns)
+    miss_cols = mand_cols - set(supply_data.columns)
     if len(miss_cols) > 0:
         raise ValueError(f'Missing columns: {", ".join(miss_cols)}')
 
@@ -211,60 +218,62 @@ def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=
         )
 
     # Obtain the desired generator cost and plant information data
-    data = get_supply_data(grid, num_segments)
+    supply_data = get_supply_data(grid, num_segments)
 
     # Check the input supply data
-    check_supply_data(data, num_segments)
+    check_supply_data(supply_data, num_segments)
 
     # Check to make sure the generator type is valid
-    if gen_type not in data["type"].unique():
+    if gen_type not in supply_data["type"].unique():
         raise ValueError(f"{gen_type} is not a valid generation type.")
 
     # Identify the load zones that correspond to the specified area and area_type
     returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)
 
     # Trim the DataFrame to only be of the desired area and generation type
-    data = data.loc[data.zone_name.isin(returned_zones)]
-    data = data.loc[data["type"] == gen_type]
+    supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)]
+    supply_data = supply_data.loc[supply_data["type"] == gen_type]
 
     # Remove generators that have no capacity (e.g., Maine coal generators)
-    if data["slope1"].isnull().values.any():
-        data.dropna(subset=["slope1"], inplace=True)
+    if supply_data["slope1"].isnull().values.any():
+        supply_data.dropna(subset=["slope1"], inplace=True)
 
     # Check if the area contains generators of the specified type
-    if data.empty:
+    if supply_data.empty:
         return [], []
 
     # Combine the p_diff and slope information for each cost segment
-    df_cols = []
+    supply_df_cols = []
     for i in range(num_segments):
-        df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))])
-        df_cols[i].rename(
+        supply_df_cols.append(
+            supply_data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))]
+        )
+        supply_df_cols[i].rename(
             columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"},
             inplace=True,
         )
-    df = pd.concat(df_cols, axis=0)
+    supply_df = pd.concat(supply_df_cols, axis=0)
 
     # Sort the trimmed DataFrame by slope
-    df = df.sort_values(by="slope")
-    df = df.reset_index(drop=True)
+    supply_df = supply_df.sort_values(by="slope")
+    supply_df = supply_df.reset_index(drop=True)
 
     # Determine the points that comprise the supply curve
-    P = []  # noqa: N806
-    F = []  # noqa: N806
-    p_diff_sum = 0
-    for i in df.index:
-        P.append(p_diff_sum)
-        F.append(df["slope"][i])
-        P.append(df["p_diff"][i] + p_diff_sum)
-        F.append(df["slope"][i])
-        p_diff_sum += df["p_diff"][i]
+    capacity_data = []  # noqa: N806
+    price_data = []  # noqa: N806
+    capacity_diff_sum = 0
+    for i in supply_df.index:
+        capacity_data.append(capacity_diff_sum)
+        price_data.append(supply_df["slope"][i])
+        capacity_data.append(supply_df["p_diff"][i] + capacity_diff_sum)
+        price_data.append(supply_df["slope"][i])
+        capacity_diff_sum += supply_df["p_diff"][i]
 
     # Plot the curve
     if plot:
         plt = _check_import("matplotlib.pyplot")
         plt.figure(figsize=[20, 10])
-        plt.plot(P, F)
+        plt.plot(capacity_data, price_data)
         plt.title(f"Supply curve for {gen_type} generators in {area}", fontsize=20)
         plt.xlabel("Capacity (MW)", fontsize=20)
         plt.ylabel("Price ($/MW)", fontsize=20)
@@ -273,41 +282,49 @@ def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=
         plt.show()
 
     # Return the capacity and bid amounts
-    return P, F
+    return capacity_data, price_data
 
 
-def lower_bound_index(x, l):
+def lower_bound_index(desired_capacity, capacity_data):
     """Determines the index of the lower capacity value that defines a price segment.
     Useful for accessing the prices associated with capacity values that aren't
     explicitly stated in the capacity lists that are generated by the
     build_supply_curve() function. Needed for ks_test().
 
-    :param float/int x: Capacity value for which you want to determine the index of the
-        lowest capacity value in a price segment.
-    :param list l: List of capacity values used to generate a supply curve.
+    :param float/int desired_capacity: Capacity value for which you want to determine
+        the index of the lowest capacity value in a price segment.
+    :param list capacity_data: List of capacity values used to generate a supply curve.
     :return: (*int*) -- Index of a price segment's capacity lower bound.
     """
 
     # Check that the list is not empty and that the capacity falls within the list range
-    if not l or l[0] > x:
+    if not capacity_data or capacity_data[0] > desired_capacity:
         return None
 
-    # Get the index of the value that is immediately less than the provided capacity
-    for i, j in enumerate(l):
-        if j > x:
+    # Get the index of the capacity that is immediately less than the desired capacity
+    for i, j in enumerate(capacity_data):
+        if j > desired_capacity:
             return i - 1
 
 
-def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):  # noqa: N803
+def ks_test(
+    capacity_data1,
+    price_data1,
+    capacity_data2,
+    price_data2,
+    area=None,
+    gen_type=None,
+    plot=True,
+):  # noqa: N803
     """Runs a test that is similar to the Kolmogorov-Smirnov test. This function takes
     two supply curves as inputs and returns the greatest difference in price between
     the two supply curves. This function requires that the supply curves offer the same
     amount of capacity.
 
-    :param list P1: List of capacity values for the first supply curve.
-    :param list F1: List of price values for the first supply curve.
-    :param list P2: List of capacity values for the second supply curve.
-    :param list F2: List of price values for the second supply curve.
+    :param list capacity_data1: List of capacity values for the first supply curve.
+    :param list price_data1: List of price values for the first supply curve.
+    :param list capacity_data2: List of capacity values for the second supply curve.
+    :param list price_data2: List of price values for the second supply curve.
     :param str area: Either the load zone, state name, state abbreviation, or
         interconnect. Defaults to None because it's not essential.
     :param str gen_type: Generation type. Defaults to None because it's not essential.
@@ -319,49 +336,55 @@ def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):  # noqa: N803
     """
 
     # Check that input capacities and prices are provided as lists
-    if not all(isinstance(i, list) for i in [P1, F1, P2, F2]):
-        raise TypeError("P1, F1, P2, and F2 must be input as lists.")
+    if not all(
+        isinstance(i, list)
+        for i in [capacity_data1, price_data1, capacity_data2, price_data2]
+    ):
+        raise TypeError("Supply curve data must be input as lists.")
 
     # Check that the supply curves offer the same amount of capacity
-    if max(P1) != max(P2):
+    if max(capacity_data1) != max(capacity_data2):
         raise ValueError(
             "The two supply curves do not offer the same amount of capacity (MW)."
         )
 
     # Create a list that has every capacity value in which either supply curve steps up
-    P_all = list(set(P1) | set(P2))  # noqa: N806
-    P_all.sort()
+    capacity_data_all = list(set(capacity_data1) | set(capacity_data2))  # noqa: N806
+    capacity_data_all.sort()
 
     # For each capacity value, associate the two corresponding price values
-    F_all = []  # noqa: N806
-    for i in range(len(P_all)):
+    price_data_all = []  # noqa: N806
+    for i in range(len(capacity_data_all)):
         # Determine the correpsonding price from the first supply curve
-        if P_all[i] == P1[-1]:
-            f1 = F1[-1]
+        if capacity_data_all[i] == capacity_data1[-1]:
+            f1 = price_data1[-1]
         else:
-            f1 = F1[lower_bound_index(P_all[i], P1)]
+            f1 = price_data1[lower_bound_index(capacity_data_all[i], capacity_data1)]
 
         # Determine the correpsonding price from the second supply curve
-        if P_all[i] == P2[-1]:
-            f2 = F2[-1]
+        if capacity_data_all[i] == capacity_data2[-1]:
+            f2 = price_data2[-1]
         else:
-            f2 = F2[lower_bound_index(P_all[i], P2)]
+            f2 = price_data2[lower_bound_index(capacity_data_all[i], capacity_data2)]
 
         # Pair the two price values
-        F_all.append([f1, f2])
+        price_data_all.append([f1, f2])
 
     # Determine the price differences for each capacity value
-    F_diff = [abs(F_all[i][0] - F_all[i][1]) for i in range(len(F_all))]  # noqa: N806
+    price_data_diff = [
+        abs(price_data_all[i][0] - price_data_all[i][1])
+        for i in range(len(price_data_all))
+    ]  # noqa: N806
 
     # Determine the maximum price difference
-    max_diff = max(F_diff)
+    max_diff = max(price_data_diff)
 
     # Plot the two supply curves overlaid
     if plot:
         plt = _check_import("matplotlib.pyplot")
         plt.figure(figsize=[20, 10])
-        plt.plot(P1, F1)
-        plt.plot(P2, F2)
+        plt.plot(capacity_data1, price_data1)
+        plt.plot(capacity_data2, price_data2)
         if None in {area, gen_type}:
             plt.title("Supply Curve Comparison", fontsize=20)
         else:
@@ -379,7 +402,7 @@ def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):  # noqa: N803
     return max_diff
 
 
-def plot_c1_vs_c2(
+def plot_linear_vs_quadratic_terms(
     grid,
     area,
     gen_type,
@@ -389,7 +412,8 @@ def plot_c1_vs_c2(
     num_sd=3,
     alpha=0.1,
 ):
-    """Compares the c1 and c2 parameters from the quadratic generator cost curves.
+    """Compares the linear (c1) and quadratic (c2) parameters from the quadratic
+    generator cost curves.
 
     :param powersimdata.input.grid.Grid grid: Grid object.
     :param str area: Either the load zone, state name, state abbreviation, or
@@ -398,15 +422,16 @@ def plot_c1_vs_c2(
     :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*,
         *'interconnect'*. Defaults to None, which allows
         :func:`powersimdata.network.model.area_to_loadzone` to infer the type.
-    :param bool plot: If True, the c1 vs. c2 plot is shown. If False, the plot is not
-        shown.
-    :param bool zoom: If True, filters out c2 outliers to enable better visualization.
-        If False, there is no filtering.
-    :param float/int num_sd: The number of standard deviations used to filter out c2
-        outliers.
+    :param bool plot: If True, the linear term vs. quadratic term plot is shown. If
+        False, the plot is not shown.
+    :param bool zoom: If True, filters out quadratic term outliers to enable better
+        visualization. If False, there is no filtering.
+    :param float/int num_sd: The number of standard deviations used to filter out
+        quadratic term outliers.
     :param float alpha: The alpha blending value for the scatter plot; takes values
         between 0 (transparent) and 1 (opaque).
-    :return: (*None*) -- The c1 vs. c2 plot is displayed according to the user.
+    :return: (*None*) -- The linear term vs. quadratic term plot is displayed according
+        to the user.
     :raises TypeError: if a powersimdata.input.grid.Grid object is not input.
     :raises ValueError: if the specified area or generator type is not applicable.
     """
@@ -425,7 +450,7 @@ def plot_c1_vs_c2(
     plant_df = grid.plant
 
     # Create a new DataFrame with the desired columns
-    data = pd.concat(
+    supply_data = pd.concat(
         [
             plant_df[["type", "interconnect", "zone_name", "Pmin", "Pmax"]],
             gencost_df[
@@ -438,37 +463,37 @@ def plot_c1_vs_c2(
     )
 
     # Check to make sure the generator type is valid
-    if gen_type not in data["type"].unique():
+    if gen_type not in supply_data["type"].unique():
         raise ValueError(f"{gen_type} is not a valid generation type.")
 
     # Identify the load zones that correspond to the specified area and area_type
     returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)
 
     # Trim the DataFrame to only be of the desired area and generation type
-    data = data.loc[data.zone_name.isin(returned_zones)]
-    data = data.loc[data["type"] == gen_type]
+    supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)]
+    supply_data = supply_data.loc[supply_data["type"] == gen_type]
 
     # Remove generators that have no capacity (e.g., Maine coal generators)
-    data = data[data["Pmin"] != data["Pmax"]]
+    supply_data = supply_data[supply_data["Pmin"] != supply_data["Pmax"]]
 
     # Check if the area contains generators of the specified type
-    if data.empty:
+    if supply_data.empty:
         return
 
     # Filters out large c2 outlier values so the overall trend can be better visualized
     zoom_name = ""
     if zoom:
         # Drop values outside a specified number of standard deviations of c2
-        sd_c2 = np.std(data["c2"])
-        mean_c2 = np.mean(data["c2"])
-        cutoff = mean_c2 + num_sd * sd_c2
-        if len(data[data["c2"] > cutoff]) > 0:
+        quad_term_sd = np.std(supply_data["c2"])
+        quad_term_mean = np.mean(supply_data["c2"])
+        cutoff = quad_term_mean + num_sd * quad_term_sd
+        if len(supply_data[supply_data["c2"] > cutoff]) > 0:
             zoom = True
-            data = data[data["c2"] <= cutoff]
-            max_ylim = np.max(data["c2"] + 0.01)
-            min_ylim = np.min(data["c2"] - 0.01)
-            max_xlim = np.max(data["c1"] + 1)
-            min_xlim = np.min(data["c1"] - 1)
+            supply_data = supply_data[supply_data["c2"] <= cutoff]
+            max_ylim = np.max(supply_data["c2"] + 0.01)
+            min_ylim = np.min(supply_data["c2"] - 0.01)
+            max_xlim = np.max(supply_data["c1"] + 1)
+            min_xlim = np.min(supply_data["c1"] - 1)
             zoom_name = "(zoomed)"
         else:
             zoom = False
@@ -478,22 +503,24 @@ def plot_c1_vs_c2(
         fig, ax = plt.subplots()
         fig.set_size_inches(20, 10)
         plt.scatter(
-            data["c1"],
-            data["c2"],
-            s=np.sqrt(data["Pmax"]) * 10,
+            supply_data["c1"],
+            supply_data["c2"],
+            s=np.sqrt(supply_data["Pmax"]) * 10,
             alpha=alpha,
-            c=data["Pmax"],
+            c=supply_data["Pmax"],
             cmap="plasma",
         )
         plt.grid()
         plt.title(
-            f"c1 vs. c2 for {gen_type} generators in {area} {zoom_name}", fontsize=20
+            f"Linear term vs. Quadratic term for {gen_type} generator cost curves in "
+            + f"{area} {zoom_name}",
+            fontsize=20,
         )
         if zoom:
             plt.ylim([min_ylim, max_ylim])
             plt.xlim([min_xlim, max_xlim])
-        plt.xlabel("c1", fontsize=20)
-        plt.ylabel("c2", fontsize=20)
+        plt.xlabel("Linear Term", fontsize=20)
+        plt.ylabel("Quadratic Term", fontsize=20)
         plt.xticks(fontsize=20)
         plt.yticks(fontsize=20)
         cbar = plt.colorbar()
@@ -537,51 +564,55 @@ def plot_capacity_vs_price(
         )
 
     # Obtain the desired generator cost and plant information data
-    data = get_supply_data(grid, num_segments)
+    supply_data = get_supply_data(grid, num_segments)
 
     # Check the input supply data
-    check_supply_data(data, num_segments)
+    check_supply_data(supply_data, num_segments)
 
     # Check to make sure the generator type is valid
-    if gen_type not in data["type"].unique():
+    if gen_type not in supply_data["type"].unique():
         raise ValueError(f"{gen_type} is not a valid generation type.")
 
     # Identify the load zones that correspond to the specified area and area_type
     returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)
 
     # Trim the DataFrame to only be of the desired area and generation type
-    data = data.loc[data.zone_name.isin(returned_zones)]
-    data = data.loc[data["type"] == gen_type]
+    supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)]
+    supply_data = supply_data.loc[supply_data["type"] == gen_type]
 
     # Remove generators that have no capacity (e.g., Maine coal generators)
-    if data["slope1"].isnull().values.any():
-        data.dropna(subset=["slope1"], inplace=True)
+    if supply_data["slope1"].isnull().values.any():
+        supply_data.dropna(subset=["slope1"], inplace=True)
 
     # Check if the area contains generators of the specified type
-    if data.empty:
+    if supply_data.empty:
         return
 
     # Combine the p_diff and slope information for each cost segment
-    df_cols = []
+    supply_df_cols = []
     for i in range(num_segments):
-        df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))])
-        df_cols[i].rename(
+        supply_df_cols.append(
+            supply_data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))]
+        )
+        supply_df_cols[i].rename(
             columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"},
             inplace=True,
         )
-    df = pd.concat(df_cols, axis=0)
-    df = df.reset_index(drop=True)
+    supply_df = pd.concat(supply_df_cols, axis=0)
+    supply_df = supply_df.reset_index(drop=True)
 
-    # Determine the average
-    total_cap = df["p_diff"].sum()
-    if total_cap == 0:
-        data_avg = 0
+    # Determine the average price
+    total_capacity = supply_df["p_diff"].sum()
+    if total_capacity == 0:
+        average_price = 0
     else:
-        data_avg = (df["slope"] * df["p_diff"]).sum() / total_cap
+        average_price = (
+            supply_df["slope"] * supply_df["p_diff"]
+        ).sum() / total_capacity
 
     # Plot the comparison
     if plot:
-        ax = df.plot.scatter(
+        ax = supply_df.plot.scatter(
             x="p_diff", y="slope", s=50, figsize=[20, 10], grid=True, fontsize=20
         )
         plt.title(
@@ -589,5 +620,5 @@ def plot_capacity_vs_price(
         )
         plt.xlabel("Segment Capacity (MW)", fontsize=20)
         plt.ylabel("Segment Price ($/MW)", fontsize=20)
-        ax.plot(df["p_diff"], [data_avg] * len(df.index), c="red")
+        ax.plot(supply_df["p_diff"], [average_price] * len(supply_df.index), c="red")
         plt.show()
diff --git a/powersimdata/design/generation/tests/test_cost_curves.py b/powersimdata/design/generation/tests/test_cost_curves.py
index ceea40c59..38dfec72f 100644
--- a/powersimdata/design/generation/tests/test_cost_curves.py
+++ b/powersimdata/design/generation/tests/test_cost_curves.py
@@ -176,11 +176,11 @@ def test_get_supply_data():
 
 
 def test_build_supply_curve_1seg():
-    Ptest, Ftest = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(  # noqa: N806
         grid, 1, "Colorado", "ng", "loadzone", plot=False
     )
-    Pexp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]  # noqa: N806
-    Fexp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]  # noqa: N806
+    price_exp = [  # noqa: N806
         25.10,
         25.10,
         30.40,
@@ -192,16 +192,16 @@ def test_build_supply_curve_1seg():
         40.00,
         40.00,
     ]
-    assert all([Ptest[i] == Pexp[i] for i in range(len(Ptest))])
-    assert all([Ftest[i] == Fexp[i] for i in range(len(Ptest))])
+    assert all([capacity_test[i] == capacity_exp[i] for i in range(len(capacity_test))])
+    assert all([price_test[i] == price_exp[i] for i in range(len(capacity_test))])
 
 
 def test_build_supply_curve_2seg():
-    Ptest, Ftest = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(  # noqa: N806
         grid, 2, "Utah", "coal", "loadzone", plot=False
     )
-    Pexp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]  # noqa: N806
-    Fexp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]  # noqa: N806
+    price_exp = [  # noqa: N806
         30.100,
         30.100,
         30.300,
@@ -215,15 +215,15 @@ def test_build_supply_curve_2seg():
         42.500,
         42.500,
     ]
-    assert all([Ptest[i] == Pexp[i] for i in range(len(Ptest))])
-    assert all([Ftest[i] == Fexp[i] for i in range(len(Ptest))])
+    assert all([capacity_test[i] == capacity_exp[i] for i in range(len(capacity_test))])
+    assert all([price_test[i] == price_exp[i] for i in range(len(capacity_test))])
 
 
 def test_ks_test():
-    P1, F1 = build_supply_curve(  # noqa: N806
+    capacity_data1, price_data1 = build_supply_curve(  # noqa: N806
         grid, 1, "Washington", "coal", "loadzone", plot=False
     )
-    P2 = [  # noqa: N806
+    capacity_data2 = [  # noqa: N806
         0,
         15,
         15,
@@ -237,9 +237,9 @@ def test_ks_test():
         190,
         225,
         225,
-        max(P1),
+        max(capacity_data1),
     ]
-    F2 = [  # noqa: N806
+    price_data2 = [  # noqa: N806
         23.00,
         23.00,
         27.00,
@@ -255,14 +255,16 @@ def test_ks_test():
         38.00,
         38.00,
     ]
-    test_diff = ks_test(P1, F1, P2, F2, plot=False)
+    test_diff = ks_test(
+        capacity_data1, price_data1, capacity_data2, price_data2, plot=False
+    )
     exp_diff = 4.5
     assert test_diff == exp_diff
 
 
 def test_lower_bound_index():
-    x = 10
-    l = [0, 5, 5, 9, 9, 12, 12, 18]
-    ind_test = lower_bound_index(x, l)
+    desired_capacity = 10
+    capacity_data = [0, 5, 5, 9, 9, 12, 12, 18]
+    ind_test = lower_bound_index(desired_capacity, capacity_data)
     ind_exp = 4
     assert ind_test == ind_exp

From 9f1598eb4bbdf05d6d58881b6d4a99b22231b68e Mon Sep 17 00:00:00 2001
From: Lane Smith <lane.smith@breakthroughenergy.org>
Date: Thu, 25 Mar 2021 20:02:39 -0700
Subject: [PATCH 046/108] docs: update README to reflect cost curve
 visualization refactor

---
 README.md | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index afb608e34..ec7bf9abc 100644
--- a/README.md
+++ b/README.md
@@ -418,51 +418,51 @@ where `scenario` is a `Scenario` instance.
 #### I. Accessing and Saving Relevant Supply Information
 Analyzing generator supply and cost curves requires the proper generator cost and plant information to be accessed from a Grid object. This data can be accessed using the following:
 ```python
-supply_df = powersimdata.design.generation.cost_curves.get_supply_data(grid, num_segments)
-```
-where `grid` is a `Grid` object and `num_segments` is the number of linearized cost curve segments into which the provided quadratic cost curve should be split.
+from powersimdata.design.generation.cost_curves import get_supply_data
 
-The above returns a data frame that contains information about each generator's fuel type, quadratic cost curve, and linearized cost curve, as well as the interconnect and load zone to which the generator belongs. The above function can store the data frame as a CSV file if `save` is passed a valid file path and file name string in `get_supply_data`; by default, `save=None`. The `get_supply_data` function is used within many of the following supply and cost curve visualization and analysis functions.
+supply_df = get_supply_data(grid, num_segments, save)
+```
+where `grid` is a `Grid` object, `num_segments` is the number of linearized cost curve segments into which the provided quadratic cost curve should be split, and `save` is a string representing the desired file path and file name to which the resulting data will be saved. `save` defaults to `None`. `get_supply_data` returns a DataFrame that contains information about each generator's fuel type, quadratic cost curve, and linearized cost curve, as well as the interconnect and load zone to which the generator belongs. `get_supply_data` is used within many of the following supply and cost curve visualization and analysis functions.
 
 
 #### II. Visualizing Generator Supply Curves
 To obtain the supply curve for a particular fuel type and area, the following is used:
 ```python
-P, F = powersimdata.design.generation.cost_curves.build_supply_curve(grid, num_segments, area, type)
-```
-where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; and `type` is a string describing an appropriate fuel type.
+from powersimdata.design.generation.cost_curves import build_supply_curve
 
-By default, the above function plots the created supply curve (plotting can be suppressed by including `plot=False` in `build_supply_curve`). `P` and `F`, the supply curve capacity and bid quantities, respectively, are also returned. This function also allows for the area type (e.g., load zone, state, and interconnect are different area types) to be specified. By default, the area type is inferred, though there are instances where specifying the area type can be useful (e.g., Texas can refer to both a state and an interconnect, though they are not the same thing). To specify the area type, `area_type` must be passed a valid area type string in `build_supply_curve`.
+P, F = build_supply_curve(grid, num_segments, area, gen_type, area_type, plot)
+```
+where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; and `plot` is a boolean that indicates whether or not the plot is shown. `area_type` defaults to `None`, which allows the area type to be inferred; there are instances where specifying the area type can be useful (e.g., Texas can refer to both a state and an interconnect, though they are not the same thing). `plot` defaults to `True`. `build_supply_curve` returns `P` and `F`, the supply curve capacity and price quantities, respectively.
 
 
 #### III. Comparing Supply Curves
 When updating generator cost curve information, it can be useful to see the corresponding effect on the supply curve for a particular area and fuel type pair. Instead of only performing a visual inspection between the original and new supply curves, the maximum price difference between the two supply curves can be calculated. This metric, which is similar to the Kolmogorov-Smirnov test, serves as a goodness-of-fit test between the two supply curves, where a lower score is desired. This metric can be calculated as follows:
 ```python
-max_diff = powersimdata.design.generation.cost_curves.ks_test(P1, F1, P2, F2)
-```
-where `P1` and `P2` are lists containing supply curve capacity data and `F1` and `F2` are lists containing corresponding supply curve price data. These lists can be created using `build_supply_curve` or can be created manually.
+from powersimdata.design.generation.cost_curves import ks_test
 
-It should be noted that the two supply curves must offer the same amount of capacity (i.e., `max(P1) = max(P2)`). By default, the above function plots the two supply curves overlaid on a single plot (plotting can be suppressed by including `plot=False` in `ks_test()`).
+max_diff = ks_test(P1, F1, P2, F2, area, gen_type, plot)
+```
+where `P1` and `P2` are lists containing supply curve capacity data; `F1` and `F2` are lists containing corresponding supply curve price data; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; and `plot` is a boolean that indicates whether or not the plot is shown. The pairs of supply curve data, (`P1`, `F1`) and (`P2`, `F2`), can be created using `build_supply_curve` or can be created manually.  It should be noted that the two supply curves must offer the same amount of capacity (i.e., `max(P1) = max(P2)`). `area` and `gen_type` both default to `None`. `plot` defaults to `True`. `ks_test` returns `max_diff`, which is the maximum price difference between the two supply curves.
 
 
 #### IV. Comparing Cost Curve Parameters
-When designing generator cost curves, it can be instructive to visually compare the quadratic cost curve parameters for generators in a particular area and fuel type pair. The `c1` and `c2` parameters for a given area and fuel type can be compared in a plot using the following:
+When designing generator cost curves, it can be instructive to visually compare the quadratic cost curve parameters for generators in a particular area and fuel type pair. The linear terms (`c1`) and quadratic terms (`c2`) for a given area and fuel type can be compared in a plot using the following:
 ```python
-powersimdata.design.generation.cost_curves.plot_c1_vs_c2(grid, area, type)
-```
-where `grid` is a `Grid` object; `area` is a string describing an appropriate load zone, interconnect, or state; and `type` is a string describing an appropriate fuel type.
+from powersimdata.design.generation.cost_curves import plot_linear_vs_quadratic_terms
 
-This function features a zoom capability (enabled by including `zoom=True` in `plot_c1_vs_c2`) that filters out `c2` outliers to enable better visualization. `c2` outliers outside of a specified number of standard deviations (the default is `num_sd=3`) are filtered out. The desired number of standard deviations can be changed by defining `num_sd` in `plot_c1_vs_c2`. Similar to `build_supply_curve`, this function also provides users with the ability to specify a particular area type.
+plot_linear_vs_quadratic_terms(grid, area, gen_type, area_type, plot, zoom, num_sd, alpha)
+```
+where `grid` is a `Grid` object; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; `plot` is a boolean that indicates whether or not the plot is shown; `zoom` is a boolean that indicates whether or not the zoom capability that filters out quadratic term outliers for better visualization is enabled; `num_sd` is the number of standard deviations outside of which quadratic terms are filtered; and `alpha` is the alpha blending parameter for the scatter plot. `area_type` defaults to `None`, which allows the area type to be inferred. `plot` defaults to `True`. `zoom` defaults to `False`. `num_sd` defaults to `3`. `alpha`, which can take values between `0` and `1`, defaults to `0.1`. 
 
 
 #### V. Comparing Generators by Capacity and Price
 When designing generator cost curves, it can be useful to visually compare the capacity and price parameters for each generator in a specified area and fuel type pair. The generator capacity and price parameters for a given area and fuel type can be compared in a plot using the following:
 ```python
-powersimdata.design.generation.cost_curves.plot_capacity_vs_price(grid, num_segments, area, type)
-```
-where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; and `type` is a string describing an appropriate fuel type.
+from powersimdata.design.generation.cost_curves import plot_capacity_vs_price
 
-Similar to `build_supply_curve` and `plot_c1_vs_c2`, this function also provides users with the ability to specify a particular area type.
+plot_capacity_vs_price(grid, num_segments, area, gen_type, area_type, plot)
+```
+where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; and `plot` is a boolean that indicates whether or not the plot is shown. `area_type` defaults to `None`, which allows the area type to be inferred. `plot` defaults to `True`.
 
 
 [PreREISE]: https://github.com/Breakthrough-Energy/PreREISE

From 5bb27f8fa49b6ebd6481f0dda6b881e44fdcbf02 Mon Sep 17 00:00:00 2001
From: Lane Smith <lane.smith@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 11:08:09 -0700
Subject: [PATCH 047/108] chore: remove noqa comments following cost curve
 refactor

---
 powersimdata/design/generation/cost_curves.py  | 12 ++++++------
 .../generation/tests/test_cost_curves.py       | 18 +++++++++---------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/powersimdata/design/generation/cost_curves.py b/powersimdata/design/generation/cost_curves.py
index f1511f269..9ff257017 100644
--- a/powersimdata/design/generation/cost_curves.py
+++ b/powersimdata/design/generation/cost_curves.py
@@ -259,8 +259,8 @@ def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=
     supply_df = supply_df.reset_index(drop=True)
 
     # Determine the points that comprise the supply curve
-    capacity_data = []  # noqa: N806
-    price_data = []  # noqa: N806
+    capacity_data = []
+    price_data = []
     capacity_diff_sum = 0
     for i in supply_df.index:
         capacity_data.append(capacity_diff_sum)
@@ -315,7 +315,7 @@ def ks_test(
     area=None,
     gen_type=None,
     plot=True,
-):  # noqa: N803
+):
     """Runs a test that is similar to the Kolmogorov-Smirnov test. This function takes
     two supply curves as inputs and returns the greatest difference in price between
     the two supply curves. This function requires that the supply curves offer the same
@@ -349,11 +349,11 @@ def ks_test(
         )
 
     # Create a list that has every capacity value in which either supply curve steps up
-    capacity_data_all = list(set(capacity_data1) | set(capacity_data2))  # noqa: N806
+    capacity_data_all = list(set(capacity_data1) | set(capacity_data2))
     capacity_data_all.sort()
 
     # For each capacity value, associate the two corresponding price values
-    price_data_all = []  # noqa: N806
+    price_data_all = []
     for i in range(len(capacity_data_all)):
         # Determine the correpsonding price from the first supply curve
         if capacity_data_all[i] == capacity_data1[-1]:
@@ -374,7 +374,7 @@ def ks_test(
     price_data_diff = [
         abs(price_data_all[i][0] - price_data_all[i][1])
         for i in range(len(price_data_all))
-    ]  # noqa: N806
+    ]
 
     # Determine the maximum price difference
     max_diff = max(price_data_diff)
diff --git a/powersimdata/design/generation/tests/test_cost_curves.py b/powersimdata/design/generation/tests/test_cost_curves.py
index 38dfec72f..5ae2347bd 100644
--- a/powersimdata/design/generation/tests/test_cost_curves.py
+++ b/powersimdata/design/generation/tests/test_cost_curves.py
@@ -176,11 +176,11 @@ def test_get_supply_data():
 
 
 def test_build_supply_curve_1seg():
-    capacity_test, price_test = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(
         grid, 1, "Colorado", "ng", "loadzone", plot=False
     )
-    capacity_exp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]  # noqa: N806
-    price_exp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]
+    price_exp = [
         25.10,
         25.10,
         30.40,
@@ -197,11 +197,11 @@ def test_build_supply_curve_1seg():
 
 
 def test_build_supply_curve_2seg():
-    capacity_test, price_test = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(
         grid, 2, "Utah", "coal", "loadzone", plot=False
     )
-    capacity_exp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]  # noqa: N806
-    price_exp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]
+    price_exp = [
         30.100,
         30.100,
         30.300,
@@ -220,10 +220,10 @@ def test_build_supply_curve_2seg():
 
 
 def test_ks_test():
-    capacity_data1, price_data1 = build_supply_curve(  # noqa: N806
+    capacity_data1, price_data1 = build_supply_curve(
         grid, 1, "Washington", "coal", "loadzone", plot=False
     )
-    capacity_data2 = [  # noqa: N806
+    capacity_data2 = [
         0,
         15,
         15,
@@ -239,7 +239,7 @@ def test_ks_test():
         225,
         max(capacity_data1),
     ]
-    price_data2 = [  # noqa: N806
+    price_data2 = [
         23.00,
         23.00,
         27.00,

From 12c9db7596a256acf9dc4f7aef2994466d78ffc6 Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 15:21:22 -0700
Subject: [PATCH 048/108] docs: fix and improve docstrings (#424)

---
 .../design/investment/create_mapping_files.py | 105 ++++++-------
 .../design/investment/investment_costs.py     | 138 ++++++++----------
 powersimdata/scenario/analyze.py              |   8 +-
 powersimdata/scenario/create.py               |   4 +-
 4 files changed, 109 insertions(+), 146 deletions(-)

diff --git a/powersimdata/design/investment/create_mapping_files.py b/powersimdata/design/investment/create_mapping_files.py
index 08d8b2167..9c0857ea4 100644
--- a/powersimdata/design/investment/create_mapping_files.py
+++ b/powersimdata/design/investment/create_mapping_files.py
@@ -8,25 +8,23 @@
 
 
 def sjoin_nearest(left_df, right_df, search_dist=0.06):
-    """
-    Perform a spatial join between two input layers.
-    If a geometry in left_df falls outside (all) geometries in right_df, the data from
-        nearest Polygon will be used as a result.
-    To make queries faster, change "search_dist."
+    """Perform a spatial join between two input layers.
+
     :param geopandas.GeoDataFrame left_df: A dataframe of Points.
-    :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons
-    :param float/int search_dist: parameter (specified in map units) is used to limit
-        the search area for geometries around source points. Smaller -> faster runtime.
-    :return: (*geopandas.GeoDataFrame*) -- A dataframe of Points mapped to each polygon
-        in right_df.
+    :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons.
+    :param float/int search_dist: radius (in map units) around point to detect polygons.
+    :return: (*geopandas.GeoDataFrame*) -- data frame of Points mapped to each Polygon.
+
+    .. note:: data from nearest Polygon/Multipolygon will be used as a result if a
+        Point falls outside all available Polygon/Multipolygons.
     """
 
     def _find_nearest(series, polygons, search_dist):
-        """Given a row with a bus id and a Point, find the closest polygon.
+        """Find the closest polygon.
 
         :param pandas.Series series: point to map.
         :param geopandas.geodataframe.GeoDataFrame polygons: polygons to select from.
-        :param float search_dist: radius around point to detect polygons in.
+        :param float search_dist: radius around point to detect polygons.
         """
         geom = series[left_df.geometry.name]
         # Get geometries within search distance
@@ -83,17 +81,16 @@ def _find_nearest(series, polygons, search_dist):
 
 
 def points_to_polys(df, name, shpfile, search_dist=0.04):
-    """Given a dataframe which includes 'lat' and 'lon' columns, and a shapefile of
-        Polygons/Multipolygon regions, map df.index to closest regions.
-
-    :param pandas.DataFrame df: includes an index, and 'lat' and 'lon' columns.
-    :param str name: what to name the id (bus, plant, substation, etc)
-    :param str shpfile: name of shapefile containing a collection Polygon/Multipolygon
-        shapes with region IDs.
-    :param float/int search_dist: distance to search from point for nearest polygon.
-    :raises ValueError: if some points are dropped because too far away from polys.
-    :return: (*geopandas.GeoDataFrame*) --
-        columns: index id, (point) geometry, [region, other properties of region]
+    """Map node to closest region.
+
+    :param pandas.DataFrame df: data frame with node id as index and *'lat'* and
+        *'lon'* as columns.
+    :param str name: name of node, e.g., bus, plant, substation, etc.
+    :param str shpfile: shapefile enclosing Polygon/Multipolygon with region id.
+    :param float/int search_dist: radius around point to detect polygons.
+    :raises ValueError: if some points are dropped because too far away from polygons.
+    :return: (*geopandas.GeoDataFrame*) -- columns: id name, (point) geometry,
+        region and properties of region.
     """
     gpd = _check_import("geopandas")
     polys = gpd.read_file(shpfile)
@@ -123,7 +120,7 @@ def points_to_polys(df, name, shpfile, search_dist=0.04):
         err_msg = (
             "Some points dropped because could not be mapped to regions. "
             "Check your lat/lon values to be sure it's in the US. "
-            f"Or increase search_dist if close. Problem ids: {dropped}"
+            f"Or increase search_dist. ids dropped: {dropped}"
         )
         raise ValueError(err_msg)
 
@@ -131,14 +128,11 @@ def points_to_polys(df, name, shpfile, search_dist=0.04):
 
 
 def bus_to_reeds_reg(df):
-    """Given a dataframe of buses, return a dataframe of bus_id's with associated
-        ReEDS regions (wind resource regions (rs) and BA regions (rb)).
-    Used to map regional generation investment cost multipliers.
-    region_map.csv is from: "/bokehpivot/in/reeds2/region_map.csv".
-    rs/rs.shp is created with :py:func:`write_poly_shapefile`.
-
-    :param pandas.DataFrame df: grid bus dataframe.
-    :return: (*pandas.DataFrame*) -- bus_id map. columns: bus_id, rs, rb
+    """Map bus to ReEDS regions.
+
+    :param pandas.DataFrame df: bus data frame.
+    :return: (*pandas.DataFrame*) -- index: bus id, columns rs (wind resource region)
+        and rb (BA region).
     """
     pts_poly = points_to_polys(
         df, "bus", const.reeds_wind_shapefile_path, search_dist=2
@@ -156,18 +150,15 @@ def bus_to_reeds_reg(df):
 
 
 def bus_to_neem_reg(df):
-    """Given a dataframe of buses, return a dataframe of bus_id's with associated
-        NEEM region, lat, and lon of bus.
-    Used to map regional transmission investment cost multipliers.
-    Shapefile used to map is 'data/NEEM/NEEMregions.shp' which is pulled from Energy
-        Zones `Mapping tool <http://ezmt.anl.gov>`_. This map is overly detailed, so I
-        simplified the shapes using 1 km distance (Douglas-Peucker) method in QGIS.
-
-    :param pandas.DataFrame df: grid.bus instance.
-    :return: (*pandas.DataFrame*) -- bus_id map.
-        columns: bus_id, lat, lon, name_abbr (NEEM region)
-
-    Note: mapping may take a while, especially for many points.
+    """Map bus to NEEM regions.
+
+    :param pandas.DataFrame df: bus data frame.
+    :return: (*pandas.DataFrame*) -- index: bus id, columns: lat, lon, name_abbr
+        (NEEM region)
+
+    .. note:: the shapefile used for mapping is pulled from the Energy Zones `Mapping
+        tool <http://ezmt.anl.gov>`_. This map is overly detailed, so the shapes are
+        simplified using 1 km distance (Douglas-Peucker) method in QGIS.
     """
 
     pts_poly = points_to_polys(df, "bus", const.neem_shapefile_path, search_dist=1)
@@ -184,11 +175,7 @@ def bus_to_neem_reg(df):
 
 
 def write_bus_neem_map():
-    """
-    Maps the bus locations from the base USA grid to NEEM regions.
-    Writes out csv with bus numbers, associated NEEM region, and lat/lon of bus
-        (to check if consistent with bus location in _calculate_ac_inv_costs).
-    """
+    """Write bus location to NEEM region mapping to file"""
     base_grid = Grid(["USA"])
     df_pts_bus = bus_to_neem_reg(base_grid.bus)
     df_pts_bus.sort_index(inplace=True)
@@ -197,10 +184,7 @@ def write_bus_neem_map():
 
 
 def write_bus_reeds_map():
-    """
-    Maps the bus locations from the base USA grid to ReEDS regions.
-    Writes out csv with bus numbers, associated ReEDS regions, and distances.
-    """
+    """Write bus location to ReEDS region mapping to file."""
     base_grid = Grid(["USA"])
     df_pts_bus = bus_to_reeds_reg(base_grid.bus)
     df_pts_bus.sort_index(inplace=True)
@@ -209,17 +193,10 @@ def write_bus_reeds_map():
 
 
 def write_poly_shapefile():
-    """
-    Converts a ReEDS csv-format file to a shapefile. Shouldn't need to run again
-        unless new source data.
-    Right now, hard-coded read ReEDS wind resource regions (labelled rs).
-    gis_rs.csv is from ReEDS open-source: "/bokehpivot/in/gis_rs.csv"
-    hierarchy.csv is from: "/bokehpivot/in/reeds2/hierarchy.csv"
-    writes out the shapefile in "rs/rs.shp"
-
-    Note: These ReEDS wind resource region shapes are approximate. Thus, there are
-        probably some mistakes, but this is currently only used for mapping plant
-        regional multipliers, which are approximate anyway, so it should be fine.
+    """Convert ReEDS wind resource csv-format file to a shapefile.
+
+    .. note:: *gis_rs.csv* is from ReEDS open-source: */bokehpivot/in/gis_rs.csv*,
+        *hierarchy.csv* is from: */bokehpivot/in/reeds2/hierarchy.csv*.
     """
     fiona = _check_import("fiona")
     shapely_geometry = _check_import("shapely.geometry")
diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index e1cab962b..04a8044d0 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -14,16 +14,12 @@
 
 
 def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None):
-    """Given a Scenario object, calculate the total cost of building that scenario's
-    upgrades of lines and transformers.
-    Currently uses NEEM regions to find regional multipliers.
-    Currently ignores financials, but all values are in 2010 $-year.
-    Need to test that there aren't any na values in regional multipliers
-    (some empty parts of table)
+    """Calculate cost of upgrading AC lines and/or transformers in a scenario.
+    NEEM regions are used to find regional multipliers.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param boolean sum_results: if True, sum dataframe for each category.
-    :return: (*dict*) -- Total costs (line costs, transformer costs) (in $2010).
+    :param bool sum_results: sum data frame for each branch type.
+    :return: (*dict*) -- cost of upgrading branches in $2010.
     """
 
     base_grid = Grid(scenario.info["interconnect"].split("_"))
@@ -44,25 +40,24 @@ def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None):
 
 
 def _calculate_ac_inv_costs(grid_new, sum_results=True):
-    """Given a grid, calculate the total cost of building that grid's
-    lines and transformers.
-    This function is separate from calculate_ac_inv_costs() for testing purposes.
-    Currently counts Transformer and TransformerWinding as transformers.
-    Currently uses NEEM regions to find regional multipliers.
+    """Calculate cost of upgrading AC lines and/or transformers. NEEM regions are
+    used to find regional multipliers. Note that a transformer winding is considered
+    as a transformer.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param boolean sum_results: if True, sum dataframe for each category.
-    :return: (*dict*) -- Total costs (line costs, transformer costs).
+    :param bool sum_results: sum data frame for each branch type.
+    :return: (*dict*) -- cost of upgrading branches in $2010.
     """
 
     def select_mw(x, cost_df):
-        """Given a single branch, determine the closest kV/MW combination and return
-        the corresponding cost $/MW-mi.
-
-        :param pandas.core.series.Series x: data for a single branch
-        :param pandas.core.frame.DataFrame cost_df: DataFrame with kV, MW, cost columns
-        :return: (*pandas.core.series.Series*) -- series of ['MW', 'costMWmi'] to be
-            assigned to given branch
+        """Determine the closest kV/MW combination for a single branch and return
+        the corresponding cost (in $/MW-mi).
+
+        :param pandas.Series x: data for a single branch
+        :param pandas.DataFrame cost_df: data frame with *'kV'*, *'MW'*, *'costMWmi'*
+            as columns
+        :return: (*pandas.Series*) -- series of [*'MW'*, *'costMWmi'*] to be assigned
+            to branch.
         """
 
         # select corresponding cost table of selected kV
@@ -75,9 +70,9 @@ def select_mw(x, cost_df):
     def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
         """Determine the regional multiplier based on kV and power (closest).
 
-        :param pandas.core.series.Series x: data for a single transformer.
-        :param pandas.core.frame.DataFrame bus_reg: data frame with bus regions
-        :param pandas.core.frame.DataFrame ac_reg_mult: data frame with regional mults.
+        :param pandas.Series x: data for a single transformer.
+        :param pandas.DataFrame bus_reg: data frame with bus regions.
+        :param pandas.DataFrame ac_reg_mult: data frame with regional multipliers.
         :param set xfmr_lookup_alerted: set of (voltage, region) tuples for which
             a message has already been printed that this lookup was not found.
         :return: (*float*) -- regional multiplier.
@@ -140,8 +135,8 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
     lines[["MW", "costMWmi"]] = lines.apply(lambda x: select_mw(x, ac_cost), axis=1)
 
     # check that all buses included in this file and lat/long values match,
-    #   otherwise re-run mapping script on mis-matching buses.
-    # these buses are missing in region file
+    # otherwise re-run mapping script on mis-matching buses. These buses are missing
+    # in region file
     bus_fix_index = bus[~bus.index.isin(bus_reg.index)].index
     bus_mask = bus[~bus.index.isin(bus_fix_index)]
     bus_mask = bus_mask.merge(bus_reg, how="left", on="bus_id")
@@ -213,12 +208,11 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
 
 
 def calculate_dc_inv_costs(scenario, sum_results=True):
-    """Given a Scenario object, calculate the total cost of that grid's dc line
-        investment. Currently ignores financials, but all values are in 2015 $-year.
+    """Calculate cost of upgrading HVDC lines in a scenario.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param boolean sum_results: if True, sum Series to return float.
-    :return: (*pandas.Series/float*) -- [Summed] dc line costs.
+    :param bool sum_results: sum series to return total cost.
+    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015.
     """
     base_grid = Grid(scenario.info["interconnect"].split("_"))
     grid = scenario.state.get_grid()
@@ -235,22 +229,20 @@ def calculate_dc_inv_costs(scenario, sum_results=True):
 
 
 def _calculate_dc_inv_costs(grid_new, sum_results=True):
-    """Given a grid, calculate the total cost of that grid's dc line investment.
-    This function is separate from calculate_dc_inv_costs() for testing purposes.
+    """Calculate cost of upgrading HVDC lines.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param boolean sum_results: if True, sum Series to return float.
-    :return: (*pandas.Series/float*) -- [Summed] dc line costs.
+    :param bool sum_results: sum series to return total cost.
+    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015.
     """
 
     def _calculate_single_line_cost(line, bus):
-        """Given a series representing a DC line upgrade/addition, and a dataframe of
-        bus locations, calculate this line's upgrade cost.
+        """Calculate cost of upgrading a single HVDC line.
 
-        :param pandas.Series line: DC line series featuring:
-            {"from_bus_id", "to_bus_id", "Pmax"}.
-        :param pandas.Dataframe bus: Bus data frame featuring {"lat", "lon"}.
-        :return: (*float*) -- DC line upgrade cost (in $2015).
+        :param pandas.Series line: HVDC line series featuring *'from_bus_id'*',
+            *'to_bus_id'* and *'Pmax'*.
+        :param pandas.Dataframe bus: bus data frame featuring *'lat'*, *'lon'*.
+        :return: (*float*) -- HVDC line upgrade cost in $2015.
         """
         # Calculate distance
         from_lat = bus.loc[line.from_bus_id, "lat"]
@@ -280,20 +272,19 @@ def _calculate_single_line_cost(line, bus):
 
 
 def calculate_gen_inv_costs(scenario, year, cost_case, sum_results=True):
-    """Given a Scenario object, calculate the total cost of building that scenario's
-        upgrades of generation.
-    Currently only uses one (arbutrary) sub-technology. Drops the rest of the costs.
-        Will want to fix for wind/solar (based on resource supply curves).
-    Currently uses ReEDS regions to find regional multipliers.
+    """Calculate cost of upgrading generators in a scenario. ReEDS regions are used to
+    find regional multipliers.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param int/str year: year of builds.
-    :param str cost_case: the ATB cost case of data:
-        'Moderate': mid cost case,
-        'Conservative': generally higher costs,
-        'Advanced': generally lower costs
-    :return: (*pandas.DataFrame*) -- Total generation investment cost summed by
+    :param int/str year: building year.
+    :param str cost_case: ATB cost case of data. *'Moderate'*: mid cost case,
+        *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs
+    :return: (*pandas.DataFrame*) -- total generation investment cost summed by
         technology.
+
+    .. todo:: it currently uses one (arbitrary) sub-technology. The rest of the costs
+        are dropped. Wind and solar will need to be fixed based on the resource supply
+        curves.
     """
 
     base_grid = Grid(scenario.info["interconnect"].split("_"))
@@ -322,38 +313,33 @@ def calculate_gen_inv_costs(scenario, year, cost_case, sum_results=True):
 
 
 def _calculate_gen_inv_costs(grid_new, year, cost_case, sum_results=True):
-    """Given a grid, calculate the total cost of building that generation investment.
-    Computes total capital cost as CAPEX_total =
-        CAPEX ($/MW) * Pmax (MW) * reg_cap_cost_mult (regional cost multiplier)
-    This function is separate from calculate_gen_inv_costs() for testing purposes.
-    Currently only uses one (arbutrary) sub-technology. Drops the rest of the costs.
-        Will want to fix for wind/solar (based on resource supply curves).
-    Currently uses ReEDS regions to find regional multipliers.
+    """Calculate cost of upgrading generators. ReEDS regions are used to find
+    regional multipliers.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param int/str year: year of builds (used in financials).
-    :param str cost_case: the ATB cost case of data:
-        'Moderate': mid cost case
-        'Conservative': generally higher costs
-        'Advanced': generally lower costs
+    :param int/str year: year of builds.
+    :param str cost_case: ATB cost case of data. *'Moderate'*: mid cost case
+        *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs.
     :raises ValueError: if year not 2020 - 2050, or cost case not an allowed option.
-    :raises TypeError: if year gets the wrong type, or if cost_case is not str.
-    :return: (*pandas.Series*) -- Total generation investment cost,
-        summed by technology.
+    :raises TypeError: if year not int/str or cost_case not str.
+    :return: (*pandas.Series*) -- total generation investment cost, summed by
+        technology.
+
+    .. note:: the function computes the total capital cost as:
+        CAPEX_total = CAPEX ($/MW) * Pmax (MW) * regional multiplier
     """
 
     def load_cost(year, cost_case):
-        """
-        Load in base costs from NREL's 2020 ATB for generation technologies (CAPEX).
-            Can be adapted in the future for FOM, VOM, & CAPEX.
-        This data is pulled from the ATB xlsx file Summary pages (saved as csv's).
-        Therefore, currently uses default financials, but will want to create custom
-            financial functions in the future.
+        """Load in base costs from NREL's 2020 ATB for generation technologies (CAPEX).
 
         :param int/str year: year of cost projections.
-        :param str cost_case: the ATB cost case of data
-            (see :py:func:`write_poly_shapefile` for details).
-        :return: (*pandas.DataFrame*) -- Cost by technology/subtype (in $2018).
+        :param str cost_case: ATB cost case of data (see
+        :return: (*pandas.DataFrame*) -- cost by technology/subtype in $2018.
+
+        .. todo:: it can be adapted in the future for FOM, VOM, & CAPEX. This data is
+            pulled from the ATB xlsx file summary pages. Therefore, it currently uses
+            default financials, but will want to create custom financial functions in
+            the future.
         """
         cost = pd.read_csv(const.gen_inv_cost_path)
         cost = cost.dropna(axis=0, how="all")
diff --git a/powersimdata/scenario/analyze.py b/powersimdata/scenario/analyze.py
index aa88c21e7..950896bf8 100644
--- a/powersimdata/scenario/analyze.py
+++ b/powersimdata/scenario/analyze.py
@@ -88,10 +88,10 @@ def print_scenario_info(self):
             print("%s: %s" % (key, val))
 
     def _parse_infeasibilities(self):
-        """Parses infeasibilities. When the optimizer cannot find a solution in
-            a time interval, the remedy is to decrease demand by some amount
-            until a solution is found. The purpose of this function is to get
-            the interval number(s) and the associated decrease(s).
+        """Parses infeasibilities. When the optimizer cannot find a solution in a time
+        interval, the remedy is to decrease demand by some amount until a solution is
+        found. The purpose of this function is to get the interval number(s) and the
+        associated decrease(s).
 
         :return: (*dict*) -- keys are the interval number and the values are
             the decrease in percent (%) applied to the original demand profile.
diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 924aa267f..19d8573fb 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -175,7 +175,7 @@ def print_scenario_info(self):
             print("%s: %s" % (key, val))
 
     def set_builder(self, *args, **kwargs):
-        """Alias to set_grid."""
+        """Alias to :func:`~powersimdata.scenario.create.Create.set_grid`"""
         warnings.warn(
             "set_builder is deprecated, use set_grid instead", DeprecationWarning
         )
@@ -327,7 +327,7 @@ def set_name(self, plan_name, scenario_name):
 
     def set_time(self, start_date, end_date, interval):
         """Sets scenario start and end dates as well as the interval that will
-            be used to split the date range.
+        be used to split the date range.
 
         :param str start_date: start date.
         :param str end_date: start date.

From 934c62a20691a4d75932471759fc974ed5937b9b Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Wed, 31 Mar 2021 14:45:43 -0700
Subject: [PATCH 049/108] doc: fix docstring (#435)

---
 powersimdata/input/case_mat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powersimdata/input/case_mat.py b/powersimdata/input/case_mat.py
index 9409f8b43..0d264d728 100644
--- a/powersimdata/input/case_mat.py
+++ b/powersimdata/input/case_mat.py
@@ -9,7 +9,7 @@ def export_case_mat(grid, filepath, storage_filepath=None):
 
     :param powersimdata.input.grid.Grid grid: Grid instance.
     :param str filepath: path where main grid file will be saved.
-    :param str filepath: path where storage data file will be saved (if present).
+    :param str storage_filepath: path where storage data file will be saved, if present.
     """
     grid = copy.deepcopy(grid)
 

From f00bb406ff26c3581322842724c2fe0646a37c06 Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Thu, 1 Apr 2021 08:14:57 -0700
Subject: [PATCH 050/108] docs: improve docstring of Grid class (#437)

---
 powersimdata/input/grid.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/powersimdata/input/grid.py b/powersimdata/input/grid.py
index dedf87680..008d3b373 100644
--- a/powersimdata/input/grid.py
+++ b/powersimdata/input/grid.py
@@ -14,7 +14,9 @@
 class Grid(object):
     """Grid
 
-    :param str/list interconnect: interconnect name(s).
+    :param str/list interconnect: geographical region covered. Either *'USA'*, one of
+        the three interconnections, i.e., *'Eastern'*, *'Western'* or *'Texas'* or a
+        combination of two interconnections.
     :param str source: model used to build the network.
     :param str engine: engine used to run scenario, if using ScenarioGrid.
     :raises TypeError: if source and engine are not both strings.

From c614883eb3998310318425b6f1a987abe3d407a6 Mon Sep 17 00:00:00 2001
From: jon-hagg <66005238+jon-hagg@users.noreply.github.com>
Date: Fri, 2 Apr 2021 10:26:28 -0700
Subject: [PATCH 051/108] ci: build and push docker image (#431)

---
 .github/workflows/docker-build.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/docker-build.yml

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
new file mode 100644
index 000000000..5e9f231a0
--- /dev/null
+++ b/.github/workflows/docker-build.yml
@@ -0,0 +1,25 @@
+name: Publish docker image
+
+on:
+  push:
+    branches:
+      - 'develop'
+
+jobs:
+  push_to_registry:
+    name: Push Docker image to GitHub Packages
+    runs-on: ubuntu-latest
+    steps:
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.CR_PAT }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          push: true
+          tags: |
+            ghcr.io/breakthrough-energy/powersimdata:latest

From eb217bc7dbf42b0540059e8b86687d462f2ae73b Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Tue, 30 Mar 2021 10:47:20 -0700
Subject: [PATCH 052/108] docs: write grid usage

---
 docs/grid.rst | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 docs/grid.rst

diff --git a/docs/grid.rst b/docs/grid.rst
new file mode 100644
index 000000000..54d739ba8
--- /dev/null
+++ b/docs/grid.rst
@@ -0,0 +1,51 @@
+
+Grid Object
+-----------
+A ``Grid`` object contains data representing an electric power system. An object has various attributes that are listed below:
+
+- ``data_loc`` (``str``) gives the path to the data used to create a ``Grid`` object
+- ``model_immutables`` (``object``) contains static data specific to the power system
+- ``zone2id`` and ``id2zone`` (``dict``) map load zone name (id) to load zone id
+  (name)
+- ``interconnect`` (``str``) indicates the geographical region covered
+- ``bus`` (``pandas.DataFrame``) encloses the characteristics of the buses
+- ``sub`` (``pandas.DataFrame``) encloses the characteristics of the substations
+- ``bus2sub`` (``pandas.DataFrame``) maps buses to substations
+- ``plant`` (``pandas.DataFrame``) encloses the characteristics of the plants
+- ``branch`` (``pandas.DataFrame``) encloses the characteristics of the AC lines,
+  transformers and transformer windings
+- ``gencost`` (``dict``) encloses the generation cost curves
+- ``dcline`` (``pandas.DataFrame``) encloses the characteristics of the HVDC lines
+- ``storage`` (``dict``) encloses information related to storage units
+
+Only the U.S. Test system presented `here <https://arxiv.org/pdf/2002.06155.pdf>`_  is
+available at this time. Thus, a ``Grid`` object can represent in addition to the full
+continental U.S., one of the three interconnections -- Eastern, Western or Texas-- or
+a combination of two interconnections.
+
+A ``Grid`` object can be created as follows:
+
+- U.S. grid
+
+  .. code-block:: python
+
+    from powersimdata.input.grid import Grid
+    usa = Grid("USA")
+
+- Western interconnection
+
+  .. code-block:: python
+
+      from powersimdata.input.grid import Grid
+      western = Grid("Western")
+
+- combination of two interconnections
+
+  .. code-block:: python
+
+      from powersimdata.input.grid import Grid
+      eastern_western = Grid(["Eastern", "Western"])
+      texas_western = Grid(["Texas", "Western"])
+
+A ``Grid`` object can be transformed, i.e., generators/lines can be scaled or added.
+This is achieved in the scenario framework.

From 87dc10293946aee7bfe27d308ef6a7d50b317cca Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Tue, 30 Mar 2021 10:47:43 -0700
Subject: [PATCH 053/108] docs: write scenario framework usage

---
 docs/scenario.rst | 321 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 321 insertions(+)
 create mode 100644 docs/scenario.rst

diff --git a/docs/scenario.rst b/docs/scenario.rst
new file mode 100644
index 000000000..a776e91ea
--- /dev/null
+++ b/docs/scenario.rst
@@ -0,0 +1,321 @@
+Scenario Framework
+------------------
+A scenario is defined by the following objects:
+
+- a **power grid**, an interconnected network delivering electricity from producers
+  to load buses and consisting of:
+
+  - thermal (coal, natural gas, etc.) and renewable generators (wind turbines, etc.)
+    that produce electrical power
+  - substations that change voltage levels (from high to low, or the reverse)
+  - transmission lines that carry power from one place to the other (between two
+    substations, between a substation and load bus, between a generator bus and a
+    substation, etc.) - Both, high voltage AC and DC lines are used in our model
+  - generator cost curve that specifies the cost as a function of power generated ($/
+    MWh) - These are determined by fuel cost and generator efficiency
+
+- **time series** for renewable generators and demand - These profiles are calculated
+  in the `PreREISE <https://github.com/Breakthrough-Energy/PreREISE>`_ package
+
+  - profile for the renewable generators consists of hourly power output
+  - load profile gives the hourly demand (MW) in various load zones, which are
+    geographic entities such as a state or a portion of a state
+
+- a **change table** used to alter the grid and profiles. To illustrate:
+
+  - generators and transmission lines (AC and DC) capacity can be scaled up and down
+  - storage units, generators and transmission lines can be added
+
+- some **simulation parameters** such as the start and end date along with the duration
+  of the intervals
+
+The ``Scenario`` class handles the following tasks:
+
+- Build a scenario (**create** state)
+- Launch the scenario and extract the output data (**execute** state)
+- Retrieve the output data (**analyze** state)
+- Delete a scenario (**delete** state)
+- Move a scenario to a backup disk (**move** state)
+
+When a ``Scenario`` class is instantiated, its state is set either to **create**,
+**execute** or **analyze**. The initial state of the ``Scenario`` object is set in the
+constructor of the class. The ``Scenario`` class can be instantiated as follows:
+
+- no parameter will instantiate the `Scenario` class in the **create** state and a new
+  scenario can then be built
+- a valid scenario identification number (``str`` or ``int``) or name (``str``) - Then:
+
+  - if the scenario has been ran and its output data have been extracted, it will be
+    in the **analyze** state
+  - If the scenario has only been created or ran but not extracted, it will be in the
+    **execute** state
+
+Note that instantiating a ``Scenario`` object with a string that doesn't match any
+existing scenarios identification number or name will result in a printout of the list
+of existing scenarios and their information.
+
+
+Creating a Scenario
++++++++++++++++++++
+A scenario can be created using few lines of code. This is illustrated below:
+
+.. code-block:: python
+
+    from powersimdata.scenario.scenario import Scenario
+
+    scenario = Scenario()
+    # print name of Scenario object state
+    print(scenario.state.name)
+
+    # Start building a scenario
+    scenario.set_grid(grid_model="usa_tamu", interconnect="Western")
+
+    # set plan and scenario names
+    scenario.set_name("test", "dummy")
+    # set start date, end date and interval
+    scenario.set_time("2016-08-01 00:00:00", "2016-08-31 23:00:00", "24H")
+    # set demand profile version
+    scenario.set_base_profile("demand", "vJan2021")
+    # set hydro profile version
+    scenario.set_base_profile("hydro", "vJan2021")
+    # set solar profile version
+    scenario.set_base_profile("solar", "vJan2021")
+    # set wind profile version
+    scenario.set_base_profile("wind", "vJan2021")
+
+    # scale capacity of solar plants in WA and AZ by 5 and 2.5, respectively
+    scenario.change_table.scale_plant_capacity(
+      "solar", zone_name={"Washington": 5, "Arizona": 2.5})
+    # scale capacity of wind farms in OR and MT by 1.5 and 2, respectively
+    scenario.change_table.scale_plant_capacity(
+        "wind", zone_name={"Oregon": 1.5, "Montana Western": 2})
+    # scale capacity of branches in NV and WY by 2
+    scenario.change_table.scale_branch_capacity(
+        zone_name={"Nevada": 2, "Wyoming": 2})
+
+    # add AC lines in NM and CO
+    scenario.change_table.add_branch(
+        [{"capacity": 200, "from_bus_id": 2053002, "to_bus_id": 2053303},
+         {"capacity": 150, "from_bus_id": 2060002, "to_bus_id": 2060046}])
+
+    # add DC line between CO and CA (Bay Area)
+    scenario.change_table.add_dcline(
+        [{"capacity": 2000, "from_bus_id": 2060771, "to_bus_id": 2021598}])
+
+    # add a solar plant in NV, a coal plant in ID and a natural gas plant in OR
+    scenario.change_table.add_plant(
+        [{"type": "solar", "bus_id": 2030454, "Pmax": 75},
+         {"type": "coal", "bus_id": 2074334, "Pmin": 25, "Pmax": 750, "c0": 1800, "c1": 30, "c2": 0.0025},
+         {"type": "ng", "bus_id": 2090018, "Pmax": 75, "c0": 900, "c1": 30, "c2": 0.0015}])
+
+    # add a new bus, and a new one-way DC line connected to this bus
+    scenario.change_table.add_bus(
+    	[{"lat": 48, "lon": -125, "zone_id": 201, "baseKV": 138}])
+    scenario.state.builder.change_table.add_dcline(
+    	[{"from_bus_id": 2090023, "to_bus_id": 2090024, "Pmin": 0, "Pmax": 200}])
+
+    # get grid used in scenario
+    grid = scenario.get_grid()
+    # get change table used to alter the base grid.
+    ct = scenario.get_ct()
+
+It can be convenient to clear the change table when creating a scenario. Let's say for
+instance that a wrong scaling factor has been applied or a generator has been attached
+to the wrong bus. To do so, the ``clear`` method of the ``ChangeTable`` class can be
+used.
+
+There are also a couple of more advanced methods which can selectively scale branches
+based on the topology of the existing grid, or based on power flow results from a
+previous scenario. These can be called as:
+
+.. code-block:: python
+
+    scenario.state.builder.change_table.scale_renewable_stubs()
+
+or
+
+.. code-block:: python
+
+    scenario.state.builder.change_table.scale_congested_mesh_branches(ref_scenario)
+
+where ``ref_scenario`` is a ``Scenario`` object in **analyze** state.
+
+The final step is to run the ``create_scenario`` method:
+
+.. code-block:: python
+
+    # review information
+    scenario.print_scenario_info()
+    # create scenario
+    scenario.create_scenario()
+    # print name of Scenario object state
+    print(scenario.state.name)
+    # print status of scenario
+    scenario.print_scenario_status()
+
+Once the scenario is successfully created, a scenario id is printed on screen and the
+state of the `Scenario` object is switched to **execute**. printed on screen.
+
+
+Running the Scenario and Extracting Output Data
++++++++++++++++++++++++++++++++++++++++++++++++
+It is possible to execute the scenario immediately right after it has been created. One
+can also create a new `Scenario` object. This is the option we follow here.
+
+The **execute** state accomplishes the three following tasks:
+
+- Prepare simulation inputs: the scaled profiles and the MAT-file enclosing all the
+  information related to the electrical grid
+- Launch the simulation
+- Extract output data - This operation is performed once the simulation has finished
+  running.
+
+.. code-block:: python
+
+    from powersimdata.scenario.scenario import Scenario
+
+    scenario = Scenario("dummy")
+    # print scenario information
+    scenario.print_scenario_info()
+
+    # prepare simulation inputs
+    scenario.prepare_simulation_input()
+
+    # launch simulation
+    process_run = scenario.launch_simulation()
+
+    # Get simulation status
+    scenario.print_scenario_status()
+
+Note that the status of the simulation can be accessed using the
+``print_scenario_status`` method.
+
+As an optional parameter, the number of threads used to run the simulation can be
+specified using for example:
+
+.. code-block:: python
+
+    process_run = scenario.state.launch_simulation(threads=8)
+
+Extracting data from the simulation engine outputs can be a memory intensive process. If
+there are resource constraints where the engine resides, it is possible to pause the
+data from being extracted using an optional parameter and then manually extracting the
+data at a suitable time:
+
+.. code-block:: python
+
+    process_run = scenario.launch_simulation(extract_data=False)
+    # Extract data
+    process_extract = scenario.extract_simulation_output()
+
+Note that you will need to create a new ``Scenario`` object via the scenario id/name to
+access the output data.
+
+
+Retrieving Scenario Output Data
++++++++++++++++++++++++++++++++
+When the ``Scenario`` object is in the **analyze** state, the user can access various
+scenario information and data. The following code snippet lists the methods implemented
+to do so:
+
+.. code-block:: python
+
+    from powersimdata.scenario.scenario import Scenario
+
+    scenario = Scenario(600)
+    # print name of Scenario object state
+    print(scenario.state.name)
+
+    # print scenario information
+    scenario.print_scenario_info()
+
+    # get change table
+    ct = scenario.get_ct()
+    # get grid
+    grid = scenario.get_grid()
+
+    # get demand profile
+    demand = scenario.get_demand()
+    # get hydro profile
+    hydro = scenario.get_hydro()
+    # get solar profile
+    solar = scenario.get_solar()
+    # get wind profile
+    wind = scenario.get_wind()
+
+    # get generation profile for generators
+    pg = scenario.get_pg()
+    # get generation profile for storage units (if present in scenario)
+    pg_storage = scenario.get_storage_pg()
+    # get energy state of charge of storage units (if present in scenario)
+    e_storage = scenario.get_storage_e()
+    # get power flow profile for AC lines
+    pf_ac = scenario.get_pf()
+    # get power flow profile for DC lines
+    pf_dc = scenario.get_dcline_pf()
+    # get locational marginal price profile for each bus
+    lmp = scenario.get_lmp()
+    # get congestion (upper power flow limit) profile for AC lines
+    congu = scenario.get_congu()
+    # get congestion (lower power flow limit) profile for AC lines
+    congl = scenario.get_congl()
+    # get time averaged congestion (lower and power flow limits) for AC lines
+    avg_cong = scenario.get_averaged_cong()
+    # get load shed profile for each load bus
+    load_shed = scenario.get_load_shed()
+
+If generators or AC/DC lines have been scaled or added to the grid, and/or if the demand
+in one or multiple load zones has been scaled for this scenario then the change table
+will enclose these changes and the retrieved grid and profiles will be modified
+accordingly. Note that the analysis of the scenario using the output data is done in the
+`PostREISE <https://github.com/Breakthrough-Energy/PostREISE>`_ package.
+
+
+Deleting a Scenario
++++++++++++++++++++
+A scenario can be deleted. All the input and output files as well as any entries in
+monitoring files will be removed. The **delete** state is only accessible from the
+**analyze** state.
+
+.. code-block::python
+
+    from powersimdata.scenario.scenario import Scenario
+    from powersimdata.scenario.delete import Delete
+
+    scenario = Scenario("dummy")
+    # print name of Scenario object state
+    print(scenario.state.name)
+    # print list of accessible states
+    print(scenario.state.allowed)
+
+    # switch state
+    scenario.change(Delete)
+    # print name of Scenario object state
+    print(scenario.state.name)
+
+    # delete scenario
+    scenario.delete_scenario()
+
+
+Moving a Scenario to Backup disk
+++++++++++++++++++++++++++++++++
+A scenario can be move to a backup disk. The **move** state is only accessible from the **analyze** state. The functionality is illustrated below:
+
+.. code-block:: python
+
+    from powersimdata.scenario.scenario import Scenario
+    from powersimdata.scenario.move import Move
+
+    scenario = Scenario("dummy")
+    # print name of Scenario object state
+    print(scenario.state.name)
+    # print list of accessible states
+    print(scenario.state.allowed)
+
+    # switch state
+    scenario.change(Move)
+    # print name of Scenario object state
+    print(scenario.state.name)
+
+    # move scenario
+    scenario.move_scenario()

From e4d7e03692d0a1c1ef79212a24aaebe774e21acf Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Tue, 30 Mar 2021 10:48:13 -0700
Subject: [PATCH 054/108] docs: write capacity planning framework usage

---
 docs/capacity_planning.rst | 123 +++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 docs/capacity_planning.rst

diff --git a/docs/capacity_planning.rst b/docs/capacity_planning.rst
new file mode 100644
index 000000000..1df99ea0d
--- /dev/null
+++ b/docs/capacity_planning.rst
@@ -0,0 +1,123 @@
+Capacity Planning Framework
+---------------------------
+The capacity planning framework is intended to estimate the amount of new capacity that
+will be required to meet future clean energy goals.
+
+
+Required Inputs
++++++++++++++++
+At minimum, this framework requires a *reference* ``Scenario`` object--used to specify
+the current capacities and capacity factors of resources which *count* towards
+state-level clean energy goals (this ``Scenario`` object must be in **analyze**
+state)--and a list of target areas (comprised of one or more zones) and their target
+clean energy penetrations. A strategy must also be specified, either ``independent``
+(each area meets it own goal) or ``collaborative`` (all areas with non-zero goals work
+together to meet a shared goal, resembling REC trading).
+
+The list of targets may be specified in either a CSV file or a data frame, as long as
+the required columns are present: ``region_name`` and ``ce_target_fraction``. Optional
+columns are: ``allowed_resources`` (defaulting to solar & wind),
+``external_ce_addl_historical_amount`` (clean energy not modeled in our grid, defaulting
+to 0), and ``solar_percentage`` (how much of the new capacity will be solar, defaulting
+to the current solar:wind ratio. This input only applies to the *independent* strategy,
+a shared-goal new solar fraction for *collaborative* planning is specified in the
+function call to ``calculate_clean_capacity_scaling``.
+
+
+Optional Inputs
++++++++++++++++
+Since increasing penetration of renewable capacity is often associated with increased
+curtailment, an expectation of this new curtailment can be passed as the
+``addl_curtailment`` parameter. For the *collaborative* method, this must be passed as a
+dictionary of ``{resource_name: value}`` pairs, for the *independent* method this must
+be passed as a data frame or as a two-layer nested dictionary which can be interpreted
+as a data frame. For either method, additional curtailment must be a value between 0 and
+1, representing a percentage, not percentage points. For example, if the previous
+capacity factor was 30%, and additional curtailment of 10% is specified, the expected
+new capacity factor will be 27%, not 20%.
+
+Another ``Scenario`` object can be passed as ``next_scenario`` to specify the magnitude
+of future demand (relevant for energy goals which are expressed as a fraction of total
+consumption); this `Scenario` object may be any state, as long as
+``Scenario.get_demand()`` can be called successfully, i.e., if the ``Scenario`` object
+is in **create** state, an interconnection must be defined. This allows calculation of
+new capacity for a scenario which is being designed, using the demand scaling present in
+the change table.
+
+Finally, for the *collaborative* method, a ``solar_fraction`` may be defined, which
+determines scenario-wide how much of the new capacity should be solar (the remainder
+will be wind).
+
+
+Example Capacity Planning Function Calls
+++++++++++++++++++++++++++++++++++++++++
+Basic independent call, using the demand from the reference scenario to approximate the
+future demand:
+
+.. code-block:: python
+
+    from powersimdata.design.generation.clean_capacity_scaling import calculate_clean_capacity_scaling
+    from powersimdata.scenario.scenario import Scenario
+
+    ref_scenario = Scenario(403)
+    targets_and_new_capacities_df = calculate_clean_capacity_scaling(
+        ref_scenario,
+        method="independent",
+        targets_filename="eastern_2030_clean_energy_targets.csv"
+    )
+
+
+Complex collaborative call, using all optional parameters:
+
+.. code-block:: python
+
+    from powersimdata.design.generation.clean_capacity_scaling import calculate_clean_capacity_scaling
+    from powersimdata.scenario.scenario import Scenario
+
+    ref_scenario = Scenario(403)
+    # Start building a new scenario, to plan capacity for greater demand
+    new_scenario = Scenario()
+    new_scenario.set_grid("Eastern")
+    zone_demand_scaling = {"Massachusetts": 1.1, "New York City": 1.2}
+    new_scenario.change_table.scale_demand(zone_name=zone_demand_scaling)
+    # Define additional expected curtailment
+    addl_curtailment = {"solar": 0.1, "wind": 0.15}
+
+    targets_and_new_capacities_df = calculate_clean_capacity_scaling(
+      ref_scenario,
+      method="collaborative",
+      targets_filename="eastern_2030_clean_energy_targets.csv",
+      addl_curtailment=addl_curtailment,
+      next_scenario=new_scenario,
+      solar_fraction=0.55
+    )
+
+
+Creating a Change Table from Capacity Planning Results
+++++++++++++++++++++++++++++++++++++++++++++++++++++++
+The capacity planning framework returns a data frame of capacities by resource type and
+target area, but the scenario creation process ultimately requires scaling factors by
+resource type and zone or plant id. A function ``create_change_table`` exists to perform
+this conversion process. Using a reference scenario, a set of scaling factors by
+resource type, zone, and plant id is calculated. When applied to a base ``Grid`` object,
+these scaling factors will result in capacities that are nearly identical to the
+reference scenario on a per-plant basis (subject to rounding), with the exception of
+solar and wind generators, which will be scaled up to meet clean energy goals.
+
+.. code-block:: python
+
+    from powersimdata.design.generation.clean_capacity_scaling import create_change_table
+
+    change_table = create_change_table(targets_and_new_capacities_df, ref_scenario)
+    # The change table method only accepts zone names, not zone IDs, so we have to translate
+    id2zone = new_scenario.state.get_grid().id2zone
+    # Plants can only be scaled one resource at a time, so we need to loop through
+    for resource in change_table:
+    	new_scenario.change_table.scale_plant_capacity(
+    		resource=resource,
+    		zone_name={
+    			id2zone[id]: value
+    			for id, value in change_table[resource]["zone_name"].items()
+    		},
+    		plant_id=change_table[resource]["zone_name"]
+    	)

From e15a216de52e3b3341d042c601b2fda9067f91c0 Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Tue, 30 Mar 2021 10:48:55 -0700
Subject: [PATCH 055/108] docs: write analysis of scenario design usage

---
 docs/scenario_design.rst | 150 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 docs/scenario_design.rst

diff --git a/docs/scenario_design.rst b/docs/scenario_design.rst
new file mode 100644
index 000000000..812b463e4
--- /dev/null
+++ b/docs/scenario_design.rst
@@ -0,0 +1,150 @@
+Analyzing Scenario Designs
+--------------------------
+Analysis of Transmission Upgrades
++++++++++++++++++++++++++++++++++
+Cumulative Upgrade Quantity
+###########################
+Using the change table of a scenario, the number of upgrades lines/transformers and
+their cumulative upgraded capacity (for transformers) and cumulative upgraded
+megawatt-miles (for lines) can be calculated with:
+
+.. code-block:: python
+
+    powersimdata.design.transmission.mwmiles.calculate_mw_miles(scenario)
+
+
+where ``scenario`` is a ``Scenario`` instance.
+
+
+Classify Upgrades
+#################
+The upgraded branches can also be classified into either interstate or intrastate
+branches by calling:
+
+.. code-block:: python
+
+    powersimdata.design.transmission.statelines.classify_interstate_intrastate(scenario)
+
+where ``scenario`` is a ``Scenario`` instance.
+
+
+Analysis of Generation Upgrades
++++++++++++++++++++++++++++++++
+Accessing and Saving Relevant Supply Information
+################################################
+Analyzing generator supply and cost curves requires the proper generator cost and plant
+information to be accessed from a ``Grid`` object. This data can be accessed using the
+following:
+
+.. code-block:: python
+
+     from powersimdata.design.generation.cost_curves import get_supply_data
+
+    supply_df = get_supply_data(grid, num_segments, save)
+
+where ``grid`` is a ``Grid`` object, ``num_segments`` is the number of linearized cost
+curve segments into which the provided quadratic cost curve should be split, and
+``save`` is a string representing the desired file path and file name to which the
+resulting data will be saved. ``save`` defaults to ``None``. ``get_supply_data()``
+returns a ``pandas.DataFrame`` that contains information about each generator's fuel
+type, quadratic cost curve, and linearized cost curve, as well as the interconnect and
+load zone to which the generator belongs. ``get_supply_data()`` is used within many of
+the following supply and cost curve visualization and analysis functions.
+
+
+Visualizing Generator Supply Curves
+###################################
+To obtain the supply curve for a particular fuel type and area, the following is used:
+
+.. code-block:: python
+
+    from powersimdata.design.generation.cost_curves import build_supply_curve
+
+    P, F = build_supply_curve(grid, num_segments, area, gen_type, area_type, plot)
+
+where ``grid`` is a ``Grid`` object; ``num_segments`` is the number of linearized cost
+curve segments to create; ``area`` is a string describing an appropriate load zone,
+interconnect, or state; ``gen_type`` is a string describing an appropriate fuel type;
+``area_type`` is a string describing the type of region that is being considered; and
+``plot`` is a boolean that indicates whether or not the plot is shown. ``area_type``
+defaults to ``None``, which allows the area type to be inferred; there are instances
+where specifying the area type can be useful (e.g., Texas can refer to both a state and
+an interconnect, though they are not the same thing). ``plot`` defaults to ``True``.
+``build_supply_curve()`` returns ``P`` and ``F``, the supply curve capacity and price
+quantities, respectively.
+
+
+Comparing Supply Curves
+#######################
+When updating generator cost curve information, it can be useful to see the
+corresponding effect on the supply curve for a particular area and fuel type pair.
+Instead of only performing a visual inspection between the original and new supply
+curves, the maximum price difference between the two supply curves can be calculated.
+This metric, which is similar to the Kolmogorov-Smirnov test, serves as a
+goodness-of-fit test between the two supply curves, where a lower score is desired. This
+metric can be calculated as follows:
+
+.. code-block:: python
+
+    from powersimdata.design.generation.cost_curves import ks_test
+
+    max_diff = ks_test(P1, F1, P2, F2, area, gen_type, plot)
+
+where ``P1`` and ``P2`` are lists containing supply curve capacity data; ``F1`` and
+``F2`` are lists containing corresponding supply curve price data; ``area`` is a string
+describing an appropriate load zone, interconnect, or state; ``gen_type`` is a string
+describing an appropriate fuel type; and ``plot`` is a boolean that indicates whether or
+not the plot is shown. The pairs of supply curve data, (``P1``, ``F1``) and (``P2``,
+``F2``), can be created using ``build_supply_curve()`` or can be created manually.  It
+should be noted that the two supply curves must offer the same amount of capacity (i.e.,
+``max(P1) = max(P2)``). ``area`` and ``gen_type`` both default to ``None``. ``plot``
+defaults to ``True``. ``ks_test()`` returns ``max_diff``, which is the maximum price
+difference between the two supply curves.
+
+
+Comparing Cost Curve Parameters
+###############################
+When designing generator cost curves, it can be instructive to visually compare the
+quadratic cost curve parameters for generators in a particular area and fuel type pair.
+The linear terms (``c1``) and quadratic terms (``c2``) for a given area and fuel type
+can be compared in a plot using the following:
+
+.. code-block:: python
+
+    from powersimdata.design.generation.cost_curves import plot_linear_vs_quadratic_terms
+
+    plot_linear_vs_quadratic_terms(grid, area, gen_type, area_type, plot, zoom, num_sd, alpha)
+
+where ``grid`` is a ``Grid`` object; ``area`` is a string describing an appropriate load
+zone, interconnect, or state; ``gen_type`` is a string describing an appropriate fuel
+type; ``area_type`` is a string describing the type of region that is being considered;
+``plot`` is a boolean that indicates whether or not the plot is shown; ``zoom`` is a
+boolean that indicates whether or not the zoom capability that filters out quadratic
+term outliers for better visualization is enabled; ``num_sd`` is the number of standard
+deviations outside of which quadratic terms are filtered; and ``alpha`` is the alpha
+blending parameter for the scatter plot. ``area_type`` defaults to ``None``, which
+allows the area type to be inferred. ``plot`` defaults to ``True``. ``zoom`` defaults to
+``False``. ``num_sd`` defaults to 3. ``alpha``, which can take values between 0 and
+1, defaults to 0.1.
+
+
+Comparing Generators by Capacity and Price
+##########################################
+When designing generator cost curves, it can be useful to visually compare the capacity
+and price parameters for each generator in a specified area and fuel type pair. The
+generator capacity and price parameters for a given area and fuel type can be compared
+in a plot using the following:
+
+.. code-block:: python
+
+    from powersimdata.design.generation.cost_curves import plot_capacity_vs_price
+
+    plot_capacity_vs_price(grid, num_segments, area, gen_type, area_type, plot)
+
+where ``grid`` is a ``Grid`` object; ``num_segments`` is the number of linearized cost
+curve segments to create; ``area`` is a string describing an appropriate load zone,
+interconnect, or state; ``gen_type`` is a string describing an appropriate fuel type;
+``area_type`` is a string describing the type of region that is being considered; and
+``plot`` is a boolean that indicates whether or not the plot is shown. ``area_type``
+defaults to ``None``, which allows the area type to be inferred. ``plot`` defaults to
+``True``.

From 51bf37536b70fef76672a5775d70e711ec8854ff Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Tue, 30 Mar 2021 10:49:11 -0700
Subject: [PATCH 056/108] docs: create index

---
 docs/index.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 docs/index.rst

diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 000000000..79c276d49
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,17 @@
+PowerSimData
+============
+This tutorial is designed to help users to use our software to carry power flow study in
+the U.S. electrical grid. PowerSimData is an open source package written in Python that
+is available on `GitHub <https://github.com/Breakthrough-Energy/PowerSimData>`_.
+
+.. include::
+   grid.rst
+
+.. include::
+   scenario.rst
+
+.. include::
+   capacity_planning.rst
+
+.. include::
+   scenario_design.rst

From ab0281843362d39121716ae49bedcb452d6aabd8 Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Tue, 30 Mar 2021 16:58:18 -0700
Subject: [PATCH 057/108] docs: rewrite README

---
 README.md | 488 +++++-------------------------------------------------
 1 file changed, 46 insertions(+), 442 deletions(-)

diff --git a/README.md b/README.md
index ec7bf9abc..5c62be1b9 100644
--- a/README.md
+++ b/README.md
@@ -1,470 +1,74 @@
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 ![Tests](https://github.com/Breakthrough-Energy/PowerSimData/workflows/Pytest/badge.svg)
-
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![Documentation](https://github.com/Breakthrough-Energy/docs/actions/workflows/publish.yml/badge.svg)](https://breakthrough-energy.github.io/docs/)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4538590.svg)](https://doi.org/10.5281/zenodo.4538590)
 
 
 # PowerSimData
-This package has been written in order to carry out power flow study in the U.S. electrical grid. This framework allows the user to easily build extensive scenarios.
-
-PowerSimData is part of a set of packages representing Breakthrough Energy's power system model. More information regarding the installation of the model as well as the contribution guide can be found [here](https://breakthrough-energy.github.io/docs/).
-
-
-## 1. Setup/Install
-Here are the instructions to install the **PowerSimData** package. We strongly recommend that you pick one of the following options.
-
-
-### A. Using pipenv
-If not already done, install `pipenv` (see their [webpage](https://pipenv.pypa.io/en/latest/)) and run:
-```bash
-pipenv sync
-pipenv shell
-```
-in the root folder of the package. The first command will create a virtual environment and install the dependencies. The second command will activate the environment.
-
-
-### B. Using the ***requirements.txt*** file
-First create an environment using `venv` (more details [here](https://docs.python.org/3/library/venv.html)). Note that `venv` is included in the Python standard library and requires no additional installation. Then, activate your environment and run:
-```bash
-pip install -r requirements.txt
-```
-in the root folder of the package.
-
-
-### C. Path
-Whatever method you choose, if you wish to access the modules located in **PowerSimData** from anywhere on your machine, do:
-```bash
-pip install .
-```
-in the root folder of your package or alternatively, setup the `PYTHONPATH` global variable to include the folder into which you have cloned the repository.
-
-
-## 2. Scenario Framework
-A scenario is defined by the following objects:
-* **The power grid**, an interconnected network delivering electricity from producers to load buses and consisting of:
-  - Thermal (coal, natural gas, etc.) and renewable generators (wind turbines, etc.) that produce electrical power
-  - Substations that change voltage levels (from high to low, or the reverse)
-  - Transmission lines that carry power from one place to the other (between two substations, between a substation and load bus, between a generator bus and a substation, etc.) - Both, high voltage AC and DC lines are used in our model
-  - Generator cost curve that specifies the cost as a function of power generated ($/MWh) - These are determined by fuel cost and generator efficiency
-* **Time series** for renewable generators and demand - These profiles are calculated in the [PreREISE] package and the list of profiles generated can be consulted through the following links: [demand](https://github.com/Breakthrough-Energy/PreREISE/tree/develop/prereise/gather/demanddata), [hydro](https://github.com/Breakthrough-Energy/PreREISE/tree/develop/prereise/gather/hydrodata), [solar](https://github.com/Breakthrough-Energy/PreREISE/tree/develop/prereise/gather/solardata) and [wind](https://github.com/Breakthrough-Energy/PreREISE/tree/develop/prereise/gather/winddata).
-  - Profile for the renewable generators consists of hourly power output
-  - Load profile gives the hourly demand (MW) in various load zones, which are geographic entities such as a state or a portion of a state
-* **Change table** used to alter the grid and profiles. To illustrate:
-  - Generators and transmission lines (AC and DC) capacity can be scaled up and down
-  - Storage units, generators and transmission lines can be added
-* **Simulation parameters** such as the start and end date along with the duration of the intervals - The simulation engine can also be selected in the building phase of the scenario
-
-The `Scenario` class handles the following tasks:
-* Build a scenario (**create** state)
-* Launch the scenario and extract the output data (**execute** state)
-* Retrieve the output data (**analyze** state)
-* Delete a scenario (**delete** state)
-* Move a scenario to a backup disk (**move** state)
-
-When a `Scenario` class is instantiated, its state is set either to **create**, **execute** or **analyze**. The initial state of the `Scenario` object is set in the constructor of the class. Only one argument is required to create a `Scenario` object:
-* An empty string instantiates the `Scenario` class in the **create** state. A scenario can then be built
-* If a valid scenario identification number (`str` or `int`) or name (`str`) is provided:
-  - If the scenario has been ran and its output data have been extracted, the state will be set to **analyze**
-  - If the scenario has only been created or ran but not extracted the state will be then set to **execute**
-
-Note that instantiating a `Scenario` object with a string that doesn't match any existing scenarios identification number or name will result in a printout of the list of existing scenarios and their information.
-
-
-### A. Retrieving Scenario Output Data
-When the `Scenario` object is in the **analyze** state, the user can access various scenario information and data. The following code snippet lists the methods implemented to do so.
-```python
-from powersimdata.scenario.scenario import Scenario
-
-scenario = Scenario(600)
-# print name of Scenario object state
-print(scenario.state.name)
-
-# print scenario information
-scenario.state.print_scenario_info()
-
-# get change table
-ct = scenario.state.get_ct()
-# get grid
-grid = scenario.state.get_grid()
+**PowerSimData** is part of a Python software ecosystem developed by [Breakthrough
+Energy Sciences](https://science.breakthroughenergy.org/) to carry out power flow study
+in the U.S. electrical grid.
 
-# get demand profile
-demand = scenario.state.get_demand()
-# get hydro profile
-hydro = scenario.state.get_hydro()
-# get solar profile
-solar = scenario.state.get_solar()
-# get wind profile
-wind = scenario.state.get_wind()
 
-# get generation profile for generators
-pg = scenario.state.get_pg()
-# get generation profile for storage units (if present in scenario)
-pg_storage = scenario.state.get_storage_pg()
-# get energy state of charge of storage units (if present in scenario)
-e_storage = scenario.state.get_storage_e()
-# get power flow profile for AC lines
-pf_ac = scenario.state.get_pf()
-# get power flow profile for DC lines
-pf_dc = scenario.state.get_dcline_pf()
-# get locational marginal price profile for each bus
-lmp = scenario.state.get_lmp()
-# get congestion (upper power flow limit) profile for AC lines
-congu = scenario.state.get_congu()
-# get congestion (lower power flow limit) profile for AC lines
-congl = scenario.state.get_congl()
-# get time averaged congestion (lower and power flow limits) for AC lines
-avg_cong = scenario.state.get_averaged_cong()
-# get load shed profile for each load bus
-load_shed = scenario.state.get_load_shed()
-```
-If generators or AC/DC lines have been scaled or added to the grid, and/or if the demand in one or multiple load zones has been scaled for this scenario then the change table will enclose these changes and the retrieved grid and profiles will be modified accordingly. Note that the analysis of the scenario using the output data is done in the [PostREISE] package.
-
-
-### B. Creating a Scenario
-A scenario can be created using few lines of code. This is illustrated below:
-```python
-from powersimdata.scenario.scenario import Scenario
-
-scenario = Scenario('')
-# print name of Scenario object state
-print(scenario.state.name)
-
-# Start building a scenario
-scenario.state.set_builder(grid_model="usa_tamu", interconnect="Western")
-
-# set plan and scenario names
-scenario.state.builder.set_name("test", "dummy")
-# set start date, end date and interval
-scenario.state.builder.set_time("2016-08-01 00:00:00",
-                                "2016-08-31 23:00:00",
-                                "24H")
-# set demand profile version
-scenario.state.builder.set_base_profile("demand", "vJan2021")
-# set hydro profile version
-scenario.state.builder.set_base_profile("hydro", "vJan2021")
-# set solar profile version
-scenario.state.builder.set_base_profile("solar", "vJan2021")
-# set wind profile version
-scenario.state.builder.set_base_profile("wind", "vJan2021")
-
-# scale capacity of solar plants in WA and AZ by 5 and 2.5, respectively
-scenario.state.builder.change_table.scale_plant_capacity(
-    "solar", zone_name={"Washington": 5, "Arizona": 2.5})
-# scale capacity of wind farms in OR and MT by 1.5 and 2, respectively
-scenario.state.builder.change_table.scale_plant_capacity(
-    "wind", zone_name={"Oregon": 1.5, "Montana Western": 2})
-# scale capacity of branches in NV and WY by 2
-scenario.state.builder.change_table.scale_branch_capacity(
-    zone_name={"Nevada": 2, "Wyoming": 2})
-
-# add AC lines in NM and CO
-scenario.state.builder.change_table.add_branch(
-    [{"capacity": 200, "from_bus_id": 2053002, "to_bus_id": 2053303},
-     {"capacity": 150, "from_bus_id": 2060002, "to_bus_id": 2060046}])
-
-# add DC line between CO and CA (Bay Area)
-scenario.state.builder.change_table.add_dcline(
-    [{"capacity": 2000, "from_bus_id": 2060771, "to_bus_id": 2021598}])
-
-# add a solar plant in NV, a coal plant in ID and a natural gas plant in OR
-scenario.state.builder.change_table.add_plant(
-    [{"type": "solar", "bus_id": 2030454, "Pmax": 75},
-     {"type": "coal", "bus_id": 2074334, "Pmin": 25, "Pmax": 750, "c0": 1800, "c1": 30, "c2": 0.0025},
-     {"type": "ng", "bus_id": 2090018, "Pmax": 75, "c0": 900, "c1": 30, "c2": 0.0015}])
-
-# add a new bus, and a new one-way DC line connected to this bus
-scenario.state.builder.change_table.add_bus(
-	[{"lat": 48, "lon": -125, "zone_id": 201, "baseKV": 138}])
-scenario.state.builder.change_table.add_dcline(
-	[{"from_bus_id": 2090023, "to_bus_id": 2090024, "Pmin": 0, "Pmax": 200}])
-
-# get grid used in scenario
-grid = scenario.state.get_grid()
-# get change table used to alter the base grid.
-ct = scenario.state.get_ct()
-```
-It can be convenient to clear the change table when creating a scenario. Let's say for instance that a wrong scaling factor has been applied or a generator has been attached to the wrong bus. To do so, the `clear` method of the `ChangeTable` class can be used.
+## Main Features
+Here are a few things that **PowerSimData** can do:
+* Provide a flexible modeling tool to create complex scenarios
+* Perform investment cost studies
+* Run power flow study using interface to external simulation engine
+* Manage data throughout the lifecycle of a simulation
 
-There are also a couple of more advanced methods which can selectively scale branches based on the topology of the existing grid, or based on power flow results from a previous scenario. These can be called as:
-```python
-scenario.state.builder.change_table.scale_renewable_stubs()
-```
-or
-```python
-scenario.state.builder.change_table.scale_congested_mesh_branches(ref_scenario)
-```
-where `ref_scenario` is a `Scenario` object in **analyze** state.
-
-The final step is to run the `create_scenario` method:
-```python
-# review information
-scenario.state.print_scenario_info()
-# create scenario
-scenario.state.create_scenario()
-# print name of Scenario object state
-print(scenario.state.name)
-# print status of scenario
-scenario.state.print_scenario_status()
-```
-Once the scenario is successfully created, a scenario id is printed on screen and the state of the `Scenario` object is switched to **execute**. printed on screen.
-
-
-### C. Running the Scenario and Extracting Output Data
-It is possible to execute the scenario immediately right after it has been created. One can also create a new `Scenario` object. This is the option we follow here.
-
-The **execute** state accomplishes the three following tasks:
-* It prepares the simulation inputs: the scaled profiles and the MAT-file enclosing all the information related to the electrical grid
-* It launches the simulation
-* It extracts the output data - This operation is performed once the simulation has finished running.
+A detailed tutorial can be found on our [docs].
 
-```python
-from powersimdata.scenario.scenario import Scenario
 
-scenario = Scenario("dummy")
-# print scenario information
-scenario.print_scenario_info()
+## Where to get it
+For now, only the source code is available. Clone or Fork the code here on GitHub.
 
-# prepare simulation inputs
-scenario.state.prepare_simulation_input()
-
-# launch simulation
-process_run = scenario.state.launch_simulation()
-
-# Get simulation status
-scenario.state.print_scenario_status()
-```
-Note that the status of the simulation can be accessed using the `print_scenario_status` method.
-
-As an optional parameter, the number of threads used to run the simulation can be specified using for example:
-```python
-process_run = scenario.state.launch_simulation(threads=8)
-```
-Extracting data from the simulation engine outputs can be a memory intensive process. If there are resource constraints where the engine resides, it is possible to pause the data from being extracted using an optional parameter and then manually extracting the data at a suitable time:
-```python
-process_run = scenario.state.launch_simulation(extract_data=False)
-# Extract data
-process_extract = scenario.state.extract_simulation_output()
-```
 
+## Dependencies
+**PowerSimData** relies on several Python packages all available on
+[PyPi](https://pypi.org/). The list can be found in the ***requirements.txt*** or
+***Pipfile*** files both located at the root of this package.
 
-### D. Deleting a Scenario
-A scenario can be deleted. All the input and output files as well as any entries in monitoring files will be removed. The **delete** state is only accessible from the **analyze** state.
-```python
-from powersimdata.scenario.scenario import Scenario
-from powersimdata.scenario.delete import Delete
 
-scenario = Scenario("dummy")
-# print name of Scenario object state
-print(scenario.state.name)
-# print list of accessible states
-print(scenario.state.allowed)
+## Installation
+To take full advantage of our software, we recommend that you clone/fork
+**[plug](https://github.com/Breakthrough-Energy/plug)** and follow the information
+therein to get our containerized framework up and running. A client/server installation
+is also possible and outlined in our [Installation
+Guide](https://breakthrough-energy.github.io/docs/user/installation_guide.html). Either
+way, you will need a powerful solver, e.g. Gurobi, to run complex scenarios.
 
-# switch state
-scenario.change(Delete)
-# print name of Scenario object state
-print(scenario.state.name)
-
-# delete scenario
-scenario.state.delete_scenario()
-```
-
-
-### E. Moving a Scenario to Backup disk
-A scenario can be move to a backup disk. The **move** state is only accessible from the **analyze** state. The functionality is illustrated below:
-```python
-from powersimdata.scenario.scenario import Scenario
-from powersimdata.scenario.move import Move
-
-scenario = Scenario("dummy")
-# print name of Scenario object state
-print(scenario.state.name)
-# print list of accessible states
-print(scenario.state.allowed)
-
-# switch state
-scenario.change(Move)
-# print name of Scenario object state
-print(scenario.state.name)
-
-# move scenario
-scenario.state.move_scenario()
-```
-
-
-## 3. U.S. Electric Grid and Interconnection
-A `Grid` object encapsulates all the information related to the synthetic network used in this project for a single interconnection (**Eastern**, **Texas** or **Western**), a combination of two interconnections (**Eastern** and **Texas** for example) or the full U.S. electric grid (**USA**). Only one argument is required to instantiate the `Grid` class, a `list` of interconnections (as `str`) in any order and a `str` for single interconnection or **USA**.
-```python
-from powersimdata.input.grid import Grid
-western_texas = Grid(["Western", "Texas"])
-```
-The object has various attributes. These are listed below and a short description is given:
-* **zone2id (id2zone)**: `dict` -- load zone name (load zone id) to load zone id (load zone name).
-* **interconnect**: `str` --  interconnection name.
-* **bus**: `pandas.DataFrame` -- bus id as index and bus characteristics as columns.
-* **sub**: `pandas.DataFrame` -- substation id as index and substation information as columns.
-* **bus2sub**: `pandas.DataFrame` -- bus id as index and substation id as column.
-* **plant**: `pandas.DataFrame` -- plant id as index and plant characteristics as columns.
-* **branch**: `pandas.DataFrame` -- branch id as index and branch characteristics as columns.
-* **gencost**: `dict` -- has two keys: `before` and `after`. Values are `pandas.DataFrame` with plant id as index and generator cost curve information as columns. The `before` key points to the original set of cost curves (polynomials) whereas the `after` key gives the ones that has been used in the simulation (linearized or piece-wise linearized version).
-* **dcline**: `pandas.DataFrame` -- DC line id as index and DC line characteristics as columns.
-```python
-from powersimdata.input.grid import Grid
-usa = Grid("USA")
-usa.plant.head()
-# get all wind farm in the U.S. electrical grid
-wind_farm = usa.plant.groupby("type").get_group("wind")
-# get DC lines in the grid
-dcline = usa.dcline
-```
-The synthetic U.S. network used in our simulation framework can be found at the following url: <https://electricgrids.engr.tamu.edu>. Our team has altered the original network in many ways to make it more realistic. These have been achieved by comparing our simulation results with historical generation level. Our data along with their description can be found on [zenodo].
-
-
-## 4. Capacity Planning Framework
-The capacity planning framework is intended to estimate the amount of new capacity that will be required to meet future clean energy goals.
-
-
-### A. Required Inputs
-At minimum, this framework requires a *reference* `Scenario` object--used to specify the current capacities and capacity factors of resources which *count* towards state-level clean energy goals (this `Scenario` object must be in **analyze** state)--and a list of target areas (comprised of one or more zones) and their target clean energy penetrations. A strategy must also be specified, either `independent` (each area meets it own goal) or `collaborative` (all areas with non-zero goals work together to meet a shared goal, resembling REC trading).
-
-The list of targets may be specified in either a CSV file or a data frame, as long as the required columns are present: `region_name` and `ce_target_fraction`. Optional columns are: `allowed_resources` (defaulting to solar & wind), `external_ce_addl_historical_amount` (clean energy not modeled in our grid, defaulting to 0), and `solar_percentage` (how much of the new capacity will be solar, defaulting to the current solar:wind ratio. This input only applies to the *independent* strategy, a shared-goal new solar fraction for *collaborative* planning is specified in the function call to `calculate_clean_capacity_scaling`.
-
-
-### B. Optional Inputs
-Since increasing penetration of renewable capacity is often associated with increased curtailment, an expectation of this new curtailment can be passed as the `addl_curtailment` parameter. For the *collaborative* method, this must be passed as a dictionary of `{resource_name: value}` pairs, for the *independent* method this must be passed as a data frame or as a two-layer nested dictionary which can be interpreted as a data frame. For either method, additional curtailment must be a value between 0 and 1, representing a percentage, not percentage points. For example, if the previous capacity factor was 30%, and additional curtailment of 10% is specified, the expected new capacity factor will be 27%, not 20%.
-
-Another `Scenario` object can be passed as `next_scenario` to specify the magnitude of future demand (relevant for energy goals which are expressed as a fraction of total consumption); this `Scenario` object may be any state, as long as `Scenario.state.get_demand()` can be called successfully, i.e. if the `Scenario` object is in **create** state, an interconnection must be defined. This allows calculation of new capacity for a scenario which is being designed, using the demand scaling present in the change table.
-
-Finally, for the *collaborative* method, a `solar_fraction` may be defined, which determines scenario-wide how much of the new capacity should be solar (the remainder will be wind).
-
-
-### C. Example Capacity Planning Function Calls
-Basic independent call, using the demand from the reference scenario to approximate the future demand:
-```python
-from powersimdata.design.generation.clean_capacity_scaling import calculate_clean_capacity_scaling
-from powersimdata.scenario.scenario import Scenario
-
-ref_scenario = Scenario(403)
-targets_and_new_capacities_df = calculate_clean_capacity_scaling(
-    ref_scenario,
-    method="independent",
-    targets_filename="eastern_2030_clean_energy_targets.csv"
-)
-```
-
-Complex collaborative call, using all optional parameters:
-```python
-from powersimdata.design.generation.clean_capacity_scaling import calculate_clean_capacity_scaling
-from powersimdata.scenario.scenario import Scenario
-
-ref_scenario = Scenario(403)
-# Start building a new scenario, to plan capacity for greater demand
-new_scenario = Scenario("")
-new_scenario.state.set_builder(["Eastern"])
-zone_demand_scaling = {"Massachusetts": 1.1, "New York City": 1.2}
-new_scenario.state.builder.change_table.scale_demand(zone_name=zone_demand_scaling)
-# Define additional expected curtailment
-addl_curtailment = {"solar": 0.1, "wind": 0.15}
-
-targets_and_new_capacities_df = calculate_clean_capacity_scaling(
-  ref_scenario,
-  method="collaborative",
-  targets_filename="eastern_2030_clean_energy_targets.csv",
-  addl_curtailment=addl_curtailment,
-  next_scenario=new_scenario,
-  solar_fraction=0.55
-)
-```
-
-
-### D. Creating a Change Table from Capacity Planning Results
-The capacity planning framework returns a data frame of capacities by resource type and target area, but the Scenario creation process ultimately requires scaling factors by resource type and zone or plant_id. A function `create_change_table` exists to perform this conversion process. Using a reference scenario, a set of scaling factors by resource type, zone, and plant_id is calculated. When applied to a base `Grid` object, these scaling factors will result in capacities that are nearly identical to the reference scenario on a per-plant basis (subject to rounding), with the exception of solar and wind generators, which will be scaled up to meet clean energy goals.
-```python
-from powersimdata.design.generation.clean_capacity_scaling import create_change_table
-
-change_table = create_change_table(targets_and_new_capacities_df, ref_scenario)
-# The change table method only accepts zone names, not zone IDs, so we have to translate
-id2zone = new_scenario.state.get_grid().id2zone
-# Plants can only be scaled one resource at a time, so we need to loop through
-for resource in change_table:
-	new_scenario.state.builder.change_table.scale_plant_capacity(
-		resource=resource,
-		zone_name={
-			id2zone[id]: value
-			for id, value in change_table[resource]["zone_name"].items()
-		},
-		plant_id=change_table[resource]["zone_name"]
-	)
-```
-
-
-## 5. Analyzing Scenario Designs
-### A. Analysis of Transmission Upgrades
-#### I. Cumulative Upgrade Quantity
-Using the change table of a scenario, the number of upgrades lines/transformers and their cumulative upgraded capacity (for transformers) and cumulative upgraded megawatt-miles (for lines) can be calculated with:
-```python
-powersimdata.design.transmission.mwmiles.calculate_mw_miles(scenario)
-```
-where `scenario` is a `Scenario` instance.
-
-
-#### II. Classify Upgrades
-The upgraded branches can also be classified into either interstate or intrastate branches by calling:
-```python
-powersimdata.design.transmission.statelines.classify_interstate_intrastate(scenario)
-```
-where `scenario` is a `Scenario` instance.
-
-
-### B. Analysis of Generation Upgrades
-#### I. Accessing and Saving Relevant Supply Information
-Analyzing generator supply and cost curves requires the proper generator cost and plant information to be accessed from a Grid object. This data can be accessed using the following:
-```python
-from powersimdata.design.generation.cost_curves import get_supply_data
-
-supply_df = get_supply_data(grid, num_segments, save)
+Only a limited set of features are available when solely installing **PowerSimData**. If you choose this option, we recommend that you use `pipenv`:
+```sh
+pipenv sync
+pipenv shell
 ```
-where `grid` is a `Grid` object, `num_segments` is the number of linearized cost curve segments into which the provided quadratic cost curve should be split, and `save` is a string representing the desired file path and file name to which the resulting data will be saved. `save` defaults to `None`. `get_supply_data` returns a DataFrame that contains information about each generator's fuel type, quadratic cost curve, and linearized cost curve, as well as the interconnect and load zone to which the generator belongs. `get_supply_data` is used within many of the following supply and cost curve visualization and analysis functions.
-
-
-#### II. Visualizing Generator Supply Curves
-To obtain the supply curve for a particular fuel type and area, the following is used:
-```python
-from powersimdata.design.generation.cost_curves import build_supply_curve
-
-P, F = build_supply_curve(grid, num_segments, area, gen_type, area_type, plot)
+since the dependencies will be installed in an isolated environment. It is of course
+possible to install the dependencies using the requirements file:
+```sh
+pip install -r requirements.txt
 ```
-where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; and `plot` is a boolean that indicates whether or not the plot is shown. `area_type` defaults to `None`, which allows the area type to be inferred; there are instances where specifying the area type can be useful (e.g., Texas can refer to both a state and an interconnect, though they are not the same thing). `plot` defaults to `True`. `build_supply_curve` returns `P` and `F`, the supply curve capacity and price quantities, respectively.
 
 
-#### III. Comparing Supply Curves
-When updating generator cost curve information, it can be useful to see the corresponding effect on the supply curve for a particular area and fuel type pair. Instead of only performing a visual inspection between the original and new supply curves, the maximum price difference between the two supply curves can be calculated. This metric, which is similar to the Kolmogorov-Smirnov test, serves as a goodness-of-fit test between the two supply curves, where a lower score is desired. This metric can be calculated as follows:
-```python
-from powersimdata.design.generation.cost_curves import ks_test
+## License
+[MIT](LICENSE)
 
-max_diff = ks_test(P1, F1, P2, F2, area, gen_type, plot)
-```
-where `P1` and `P2` are lists containing supply curve capacity data; `F1` and `F2` are lists containing corresponding supply curve price data; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; and `plot` is a boolean that indicates whether or not the plot is shown. The pairs of supply curve data, (`P1`, `F1`) and (`P2`, `F2`), can be created using `build_supply_curve` or can be created manually.  It should be noted that the two supply curves must offer the same amount of capacity (i.e., `max(P1) = max(P2)`). `area` and `gen_type` both default to `None`. `plot` defaults to `True`. `ks_test` returns `max_diff`, which is the maximum price difference between the two supply curves.
 
+## Documentation
+The official documentation can be found [here][docs].
 
-#### IV. Comparing Cost Curve Parameters
-When designing generator cost curves, it can be instructive to visually compare the quadratic cost curve parameters for generators in a particular area and fuel type pair. The linear terms (`c1`) and quadratic terms (`c2`) for a given area and fuel type can be compared in a plot using the following:
-```python
-from powersimdata.design.generation.cost_curves import plot_linear_vs_quadratic_terms
 
-plot_linear_vs_quadratic_terms(grid, area, gen_type, area_type, plot, zoom, num_sd, alpha)
-```
-where `grid` is a `Grid` object; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; `plot` is a boolean that indicates whether or not the plot is shown; `zoom` is a boolean that indicates whether or not the zoom capability that filters out quadratic term outliers for better visualization is enabled; `num_sd` is the number of standard deviations outside of which quadratic terms are filtered; and `alpha` is the alpha blending parameter for the scatter plot. `area_type` defaults to `None`, which allows the area type to be inferred. `plot` defaults to `True`. `zoom` defaults to `False`. `num_sd` defaults to `3`. `alpha`, which can take values between `0` and `1`, defaults to `0.1`. 
+## Communication Channels
+[Sign up](https://science.breakthroughenergy.org/#get-updates) to our email list and
+our Slack workspace to get in touch with us.
 
 
-#### V. Comparing Generators by Capacity and Price
-When designing generator cost curves, it can be useful to visually compare the capacity and price parameters for each generator in a specified area and fuel type pair. The generator capacity and price parameters for a given area and fuel type can be compared in a plot using the following:
-```python
-from powersimdata.design.generation.cost_curves import plot_capacity_vs_price
+## Contributing
+All contributions (bug report, documentation, feature development, etc.) are welcome. An
+overview on how to contribute to this project can be found in our [Contribution
+Guide](https://breakthrough-energy.github.io/docs/dev/contribution_guide.html).
 
-plot_capacity_vs_price(grid, num_segments, area, gen_type, area_type, plot)
-```
-where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; and `plot` is a boolean that indicates whether or not the plot is shown. `area_type` defaults to `None`, which allows the area type to be inferred. `plot` defaults to `True`.
 
 
-[PreREISE]: https://github.com/Breakthrough-Energy/PreREISE
-[PostREISE]: https://github.com/Breakthrough-Energy/PostREISE
-[zenodo]: https://zenodo.org/record/3530898
+[docs]: https://breakthrough-energy.github.io/docs/index.html

From d2986c5f0224c00814db3f9a1bd6b3d88040ece9 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 11:11:48 -0700
Subject: [PATCH 058/108] fix: copy_from write to temp file

---
 powersimdata/data_access/data_access.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 879dc5af5..89977bf30 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -1,6 +1,8 @@
 import operator
 import os
 import posixpath
+import shutil
+import tempfile
 import time
 from subprocess import PIPE, Popen
 
@@ -285,13 +287,15 @@ def copy_from(self, file_name, from_dir=None):
         os.makedirs(to_dir, exist_ok=True)
 
         from_path = posixpath.join(self.root, from_dir, file_name)
+        to_path = os.path.join(to_dir, file_name)
         self._check_file_exists(from_path, should_exist=True)
 
         with self.ssh.open_sftp() as sftp:
             print(f"Transferring {file_name} from server")
             cbk, bar = progress_bar(ascii=True, unit="b", unit_scale=True)
-            to_path = os.path.join(to_dir, file_name)
-            sftp.get(from_path, to_path, callback=cbk)
+            _, tmp_path = tempfile.mkstemp()
+            sftp.get(from_path, tmp_path, callback=cbk)
+            shutil.move(tmp_path, to_path)
             bar.close()
 
     def move_to(self, file_name, to_dir=None, change_name_to=None, preserve=False):

From c67d3485c9434e567188db34412e7a1b4b5d7233 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 12:37:50 -0700
Subject: [PATCH 059/108] fix: upload from temp file

---
 powersimdata/data_access/csv_store.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index 461a0da7c..3245a0caf 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -1,5 +1,7 @@
 import functools
 import os
+import shutil
+import tempfile
 from pathlib import Path
 
 import pandas as pd
@@ -72,5 +74,7 @@ def commit(self, table, checksum):
         :param pandas.DataFrame table: the data frame to save
         :param str checksum: the checksum prior to download
         """
-        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
-        self.data_access.push(self._FILE_NAME, checksum)
+        _, tmp_path = tempfile.mkstemp()
+        table.to_csv(tmp_path)
+        self.data_access.push(tmp_path, checksum)
+        shutil.move(tmp_path, os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))

From 745f0115170b8a76bc699c089365e82db3348747 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 15:07:50 -0700
Subject: [PATCH 060/108] fix: move id generation to atomic scope

---
 powersimdata/data_access/scenario_list.py     |  7 +++--
 .../data_access/tests/test_scenario_csv.py    | 27 ++++++++-----------
 powersimdata/scenario/create.py               |  8 ------
 3 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 2285f81d4..9a8a92ce0 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -94,12 +94,12 @@ def get_scenario_table(self):
         """
         return self.get_table()
 
-    def generate_scenario_id(self):
+    def _generate_scenario_id(self, table):
         """Generates scenario id.
 
+        :param pandas.DataFrame table: the current scenario list
         :return: (*str*) -- new scenario id.
         """
-        table = self.get_scenario_table()
         max_value = table.index.max()
         result = 1 if pd.isna(max_value) else max_value + 1
         return str(result)
@@ -146,6 +146,9 @@ def add_entry(self, scenario_info):
         :return: (*pandas.DataFrame*) -- the updated data frame
         """
         table = self.get_scenario_table()
+        scenario_id = self._generate_scenario_id(table)
+        scenario_info["id"] = scenario_id
+        scenario_info.move_to_end("id", last=False)
         table.reset_index(inplace=True)
         entry = pd.DataFrame({k: [v] for k, v in scenario_info.items()})
         table = table.append(entry)
diff --git a/powersimdata/data_access/tests/test_scenario_csv.py b/powersimdata/data_access/tests/test_scenario_csv.py
index 74d02a225..232eba5ee 100644
--- a/powersimdata/data_access/tests/test_scenario_csv.py
+++ b/powersimdata/data_access/tests/test_scenario_csv.py
@@ -82,10 +82,9 @@ def manager():
     os.remove(test_csv)
 
 
-def mock_row(sid=1):
+def mock_row():
     return OrderedDict(
         [
-            ("id", str(sid)),
             ("plan", "test"),
             ("name", "dummy"),
             ("state", "create"),
@@ -104,21 +103,17 @@ def mock_row(sid=1):
     )
 
 
-def test_generate_id(manager):
-    new_id = manager.generate_scenario_id()
-    assert new_id == "1"
-
-
 def test_blank_csv_append(manager):
-    manager.add_entry(mock_row(1))
-    table = manager.add_entry(mock_row(2))
-    assert table.shape == (2, 16)
+    entry = mock_row()
+    table = manager.add_entry(entry)
+    assert entry["id"] == "1"
+    assert table.shape == (1, 16)
 
 
 def test_get_scenario(manager):
-    manager.add_entry(mock_row(1))
-    manager.add_entry(mock_row(2))
-    manager.add_entry(mock_row(3))
+    manager.add_entry(mock_row())
+    manager.add_entry(mock_row())
+    manager.add_entry(mock_row())
     entry = manager.get_scenario(2)
     assert entry["id"] == "2"
     entry = manager.get_scenario("2")
@@ -126,8 +121,8 @@ def test_get_scenario(manager):
 
 
 def test_delete_entry(manager):
-    manager.add_entry(mock_row(1))
-    manager.add_entry(mock_row(2))
-    manager.add_entry(mock_row(3))
+    manager.add_entry(mock_row())
+    manager.add_entry(mock_row())
+    manager.add_entry(mock_row())
     table = manager.delete_entry(2)
     assert table.shape == (2, 16)
diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 19d8573fb..c87d03091 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -93,12 +93,6 @@ def _update_scenario_info(self):
             else:
                 self._scenario_info["change_table"] = "No"
 
-    def _generate_and_set_scenario_id(self):
-        """Generates scenario id."""
-        scenario_id = self._scenario_list_manager.generate_scenario_id()
-        self._scenario_info["id"] = scenario_id
-        self._scenario_info.move_to_end("id", last=False)
-
     def _add_entry_in_execute_list(self):
         """Adds scenario to the execute list file on server and update status
         information.
@@ -144,8 +138,6 @@ def create_scenario(self):
                 % (self._scenario_info["plan"], self._scenario_info["name"])
             )
 
-            # Generate scenario id
-            self._generate_and_set_scenario_id()
             # Add missing information
             self._scenario_info["state"] = "execute"
             self._scenario_info["runtime"] = ""

From 4cfd9ee66ebb30c764e9d7c3d4072fb0ea00f902 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 15:35:50 -0700
Subject: [PATCH 061/108] chore: remove unused code

---
 powersimdata/data_access/execute_list.py  | 13 +------------
 powersimdata/data_access/scenario_list.py | 12 +-----------
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/powersimdata/data_access/execute_list.py b/powersimdata/data_access/execute_list.py
index b21f4700c..cdf42b44e 100644
--- a/powersimdata/data_access/execute_list.py
+++ b/powersimdata/data_access/execute_list.py
@@ -1,8 +1,5 @@
-import posixpath
-
 from powersimdata.data_access.csv_store import CsvStore, verify_hash
 from powersimdata.data_access.sql_store import SqlStore, to_data_frame
-from powersimdata.utility import server_setup
 
 
 class ExecuteTable(SqlStore):
@@ -71,18 +68,10 @@ def delete_entry(self, scenario_id):
 
 
 class ExecuteListManager(CsvStore):
-    """Storage abstraction for execute list using a csv file on the server.
-
-    :param paramiko.client.SSHClient ssh_client: session with an SSH server.
-    """
+    """Storage abstraction for execute list using a csv file."""
 
     _FILE_NAME = "ExecuteList.csv"
 
-    def __init__(self, ssh_client):
-        """Constructor"""
-        super().__init__(ssh_client)
-        self._server_path = posixpath.join(server_setup.DATA_ROOT_DIR, self._FILE_NAME)
-
     def get_execute_table(self):
         """Returns execute table from server if possible, otherwise read local
         copy. Updates the local copy upon successful server connection.
diff --git a/powersimdata/data_access/scenario_list.py b/powersimdata/data_access/scenario_list.py
index 9a8a92ce0..da65f8515 100644
--- a/powersimdata/data_access/scenario_list.py
+++ b/powersimdata/data_access/scenario_list.py
@@ -1,11 +1,9 @@
-import posixpath
 from collections import OrderedDict
 
 import pandas as pd
 
 from powersimdata.data_access.csv_store import CsvStore, verify_hash
 from powersimdata.data_access.sql_store import SqlStore, to_data_frame
-from powersimdata.utility import server_setup
 
 
 class ScenarioTable(SqlStore):
@@ -74,18 +72,10 @@ def delete_entry(self, scenario_id):
 
 
 class ScenarioListManager(CsvStore):
-    """Storage abstraction for scenario list using a csv file on the server.
-
-    :param paramiko.client.SSHClient ssh_client: session with an SSH server.
-    """
+    """Storage abstraction for scenario list using a csv file."""
 
     _FILE_NAME = "ScenarioList.csv"
 
-    def __init__(self, ssh_client):
-        """Constructor"""
-        super().__init__(ssh_client)
-        self._server_path = posixpath.join(server_setup.DATA_ROOT_DIR, self._FILE_NAME)
-
     def get_scenario_table(self):
         """Returns scenario table from server if possible, otherwise read local
         copy. Updates the local copy upon successful server connection.

From 4856ca6670c1aaeb27e5a054a3e6595345e52d39 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 17:10:19 -0700
Subject: [PATCH 062/108] fix: temp path handling for upload

---
 powersimdata/data_access/csv_store.py   |  8 ++------
 powersimdata/data_access/data_access.py | 16 +++++++++-------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index 3245a0caf..461a0da7c 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -1,7 +1,5 @@
 import functools
 import os
-import shutil
-import tempfile
 from pathlib import Path
 
 import pandas as pd
@@ -74,7 +72,5 @@ def commit(self, table, checksum):
         :param pandas.DataFrame table: the data frame to save
         :param str checksum: the checksum prior to download
         """
-        _, tmp_path = tempfile.mkstemp()
-        table.to_csv(tmp_path)
-        self.data_access.push(tmp_path, checksum)
-        shutil.move(tmp_path, os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
+        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
+        self.data_access.push(self._FILE_NAME, checksum)
diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 89977bf30..f742057a8 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -2,9 +2,9 @@
 import os
 import posixpath
 import shutil
-import tempfile
 import time
 from subprocess import PIPE, Popen
+from tempfile import mkstemp
 
 import paramiko
 from tqdm import tqdm
@@ -293,12 +293,12 @@ def copy_from(self, file_name, from_dir=None):
         with self.ssh.open_sftp() as sftp:
             print(f"Transferring {file_name} from server")
             cbk, bar = progress_bar(ascii=True, unit="b", unit_scale=True)
-            _, tmp_path = tempfile.mkstemp()
+            _, tmp_path = mkstemp()
             sftp.get(from_path, tmp_path, callback=cbk)
             shutil.move(tmp_path, to_path)
             bar.close()
 
-    def move_to(self, file_name, to_dir=None, change_name_to=None, preserve=False):
+    def move_to(self, file_name, to_dir=None, change_name_to=None):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
@@ -325,9 +325,8 @@ def move_to(self, file_name, to_dir=None, change_name_to=None, preserve=False):
             print(f"Transferring {from_path} to server")
             sftp.put(from_path, to_path)
 
-        if not preserve:
-            print(f"--> Deleting {from_path} on local machine")
-            os.remove(from_path)
+        print(f"--> Deleting {from_path} on local machine")
+        os.remove(from_path)
 
     def execute_command(self, command):
         """Execute a command locally at the data access.
@@ -371,7 +370,10 @@ def push(self, file_name, checksum):
         :raises IOError: if command generated stderr
         """
         backup = f"{file_name}.temp"
-        self.move_to(file_name, change_name_to=backup, preserve=True)
+        _, tmp_path = mkstemp(dir=self.local_root)
+        shutil.copy(posixpath.join(self.local_root, file_name), tmp_path)
+        temp_name = os.path.basename(tmp_path)
+        self.move_to(temp_name, change_name_to=backup)
 
         values = {
             "original": posixpath.join(self.root, file_name),

From 7fc49650880cff14fee65578fa521f52d08ea18d Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 17:30:53 -0700
Subject: [PATCH 063/108] chore: keep signatures consistent

---
 powersimdata/data_access/data_access.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index f742057a8..509c60fd7 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -25,13 +25,12 @@ def copy_from(self, file_name, from_dir):
         """
         raise NotImplementedError
 
-    def move_to(self, file_name, to_dir, change_name_to=None, preserve=False):
+    def move_to(self, file_name, to_dir, change_name_to=None):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
-        :param bool preserve: whether to keep the local copy
         """
         raise NotImplementedError
 
@@ -163,13 +162,12 @@ def checksum(self, relative_path):
         """
         return "dummy_value"
 
-    def move_to(self, file_name, to_dir, change_name_to=None, preserve=False):
+    def move_to(self, file_name, to_dir, change_name_to=None):
         """Copy a file from userspace to data store.
 
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
-        :param bool preserve: whether to keep the local copy
         """
         self._check_filename(file_name)
         src = posixpath.join(server_setup.LOCAL_DIR, file_name)
@@ -178,9 +176,8 @@ def move_to(self, file_name, to_dir, change_name_to=None, preserve=False):
         print(f"--> Moving file {src} to {dest}")
         self._check_file_exists(dest, should_exist=False)
         self.copy(src, dest)
-        if not preserve:
-            print("--> Deleting original copy")
-            self.remove(src)
+        print("--> Deleting original copy")
+        self.remove(src)
 
     def execute_command(self, command):
         """Execute a command locally at the data access.
@@ -304,7 +301,6 @@ def move_to(self, file_name, to_dir=None, change_name_to=None):
         :param str file_name: file name to copy.
         :param str to_dir: data store directory to copy file to.
         :param str change_name_to: new name for file when copied to data store.
-        :param bool preserve: whether to keep the local copy
         :raises FileNotFoundError: if specified file does not exist
         """
         self._check_filename(file_name)

From 60e5eefbbdfed35dfb73f3e00a175099310a9859 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 17:35:16 -0700
Subject: [PATCH 064/108] fix: wrong path join

---
 powersimdata/data_access/data_access.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 509c60fd7..db3abf9c7 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -367,7 +367,7 @@ def push(self, file_name, checksum):
         """
         backup = f"{file_name}.temp"
         _, tmp_path = mkstemp(dir=self.local_root)
-        shutil.copy(posixpath.join(self.local_root, file_name), tmp_path)
+        shutil.copy(os.path.join(self.local_root, file_name), tmp_path)
         temp_name = os.path.basename(tmp_path)
         self.move_to(temp_name, change_name_to=backup)
 

From abd7bf9bff608d6f3ded34f2b4559fa5735b16e7 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 29 Mar 2021 14:18:29 -0700
Subject: [PATCH 065/108] refactor: use temp file to push changes, remove
 redundant print

---
 powersimdata/data_access/csv_store.py   |  9 +++++++--
 powersimdata/data_access/data_access.py | 21 ++++++++++-----------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index 461a0da7c..e8834bd25 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -1,6 +1,8 @@
 import functools
 import os
+import shutil
 from pathlib import Path
+from tempfile import mkstemp
 
 import pandas as pd
 
@@ -72,5 +74,8 @@ def commit(self, table, checksum):
         :param pandas.DataFrame table: the data frame to save
         :param str checksum: the checksum prior to download
         """
-        table.to_csv(os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
-        self.data_access.push(self._FILE_NAME, checksum)
+        _, tmp_path = mkstemp(dir=server_setup.LOCAL_DIR)
+        table.to_csv(tmp_path)
+        shutil.copy(tmp_path, os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
+        tmp_name = os.path.basename(tmp_path)
+        self.data_access.push(tmp_name, checksum, change_name_to=self._FILE_NAME)
diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index db3abf9c7..721f70ca1 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -109,11 +109,12 @@ def checksum(self, relative_path):
         """
         raise NotImplementedError
 
-    def push(self, file_name, checksum):
+    def push(self, file_name, checksum, change_name_to=None):
         """Push the file from local to remote root folder, ensuring integrity
 
         :param str file_name: the file name, located at the local root
         :param str checksum: the checksum prior to download
+        :param str change_name_to: new name for file when copied to data store.
         """
         raise NotImplementedError
 
@@ -145,11 +146,12 @@ def copy_from(self, file_name, from_dir=None):
         """
         pass
 
-    def push(self, file_name, checksum):
+    def push(self, file_name, checksum, change_name_to=None):
         """Nothing to be done due to symlink
 
         :param str file_name: the file name, located at the local root
         :param str checksum: the checksum prior to download
+        :param str change_name_to: new name for file when copied to data store.
         """
         pass
 
@@ -176,7 +178,6 @@ def move_to(self, file_name, to_dir, change_name_to=None):
         print(f"--> Moving file {src} to {dest}")
         self._check_file_exists(dest, should_exist=False)
         self.copy(src, dest)
-        print("--> Deleting original copy")
         self.remove(src)
 
     def execute_command(self, command):
@@ -321,7 +322,6 @@ def move_to(self, file_name, to_dir=None, change_name_to=None):
             print(f"Transferring {from_path} to server")
             sftp.put(from_path, to_path)
 
-        print(f"--> Deleting {from_path} on local machine")
         os.remove(from_path)
 
     def execute_command(self, command):
@@ -358,21 +358,20 @@ def checksum(self, relative_path):
         lines = stdout.readlines()
         return lines[0].strip()
 
-    def push(self, file_name, checksum):
+    def push(self, file_name, checksum, change_name_to=None):
         """Push file_name to remote root
 
         :param str file_name: the file name, located at the local root
         :param str checksum: the checksum prior to download
+        :param str change_name_to: new name for file when copied to data store.
         :raises IOError: if command generated stderr
         """
-        backup = f"{file_name}.temp"
-        _, tmp_path = mkstemp(dir=self.local_root)
-        shutil.copy(os.path.join(self.local_root, file_name), tmp_path)
-        temp_name = os.path.basename(tmp_path)
-        self.move_to(temp_name, change_name_to=backup)
+        new_name = file_name if change_name_to is None else change_name_to
+        backup = f"{new_name}.temp"
+        self.move_to(file_name, change_name_to=backup)
 
         values = {
-            "original": posixpath.join(self.root, file_name),
+            "original": posixpath.join(self.root, new_name),
             "updated": posixpath.join(self.root, backup),
             "lockfile": posixpath.join(self.root, "scenario.lockfile"),
             "checksum": checksum,

From 3ced6fec27db44745f23565c8d9f4030956514ef Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 29 Mar 2021 14:42:31 -0700
Subject: [PATCH 066/108] refactor: combine methods and clarify docs

---
 powersimdata/data_access/data_access.py |  2 +-
 powersimdata/scenario/create.py         | 18 +++++-------------
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 721f70ca1..e7b162dc3 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -359,7 +359,7 @@ def checksum(self, relative_path):
         return lines[0].strip()
 
     def push(self, file_name, checksum, change_name_to=None):
-        """Push file_name to remote root
+        """Push file to server and verify the checksum matches a prior value
 
         :param str file_name: the file name, located at the local root
         :param str checksum: the checksum prior to download
diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index c87d03091..3e452cfd3 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -93,15 +93,6 @@ def _update_scenario_info(self):
             else:
                 self._scenario_info["change_table"] = "No"
 
-    def _add_entry_in_execute_list(self):
-        """Adds scenario to the execute list file on server and update status
-        information.
-
-        """
-        self._execute_list_manager.add_entry(self._scenario_info)
-        self._scenario_status = "created"
-        self.allowed.append("execute")
-
     def _upload_change_table(self):
         """Uploads change table to server."""
         print("--> Writing change table on local machine")
@@ -144,13 +135,14 @@ def create_scenario(self):
             self._scenario_info["infeasibilities"] = ""
             self.grid = self.builder.get_grid()
             self.ct = self.builder.change_table.ct
-            # Add scenario to scenario list file on server
+            # Add to scenario list and set the id in scenario_info
             self._scenario_list_manager.add_entry(self._scenario_info)
-            # Upload change table to server
+
             if bool(self.builder.change_table.ct):
                 self._upload_change_table()
-            # Add scenario to execute list file on server
-            self._add_entry_in_execute_list()
+            self._execute_list_manager.add_entry(self._scenario_info)
+            self._scenario_status = "created"
+            self.allowed.append("execute")
 
             print(
                 "SCENARIO SUCCESSFULLY CREATED WITH ID #%s" % self._scenario_info["id"]

From 13a84aef9da40448e74af15f0df0c1071c3eb2ca Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 29 Mar 2021 14:59:01 -0700
Subject: [PATCH 067/108] chore: minor print statement updates

---
 powersimdata/data_access/data_access.py    | 2 +-
 powersimdata/data_access/profile_helper.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index e7b162dc3..e364fde6e 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -319,7 +319,7 @@ def move_to(self, file_name, to_dir=None, change_name_to=None):
         self._check_file_exists(to_path, should_exist=False)
 
         with self.ssh.open_sftp() as sftp:
-            print(f"Transferring {from_path} to server")
+            print(f"Transferring {file_name} to server")
             sftp.put(from_path, to_path)
 
         os.remove(from_path)
diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py
index 6b6d14567..eb4a6fabc 100644
--- a/powersimdata/data_access/profile_helper.py
+++ b/powersimdata/data_access/profile_helper.py
@@ -52,7 +52,6 @@ def download_file(file_name, from_dir):
                     f.write(chunk)
                     pbar.update(len(chunk))
 
-        print("--> Done!")
         return dest
 
     @staticmethod

From 438aa0ccf89459b626f5ec2fbcf759e7fd968cbb Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Mon, 5 Apr 2021 12:13:20 -0700
Subject: [PATCH 068/108] fix: use os-specific path join for testing (#441)

---
 powersimdata/data_access/tests/test_profile_helper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/powersimdata/data_access/tests/test_profile_helper.py b/powersimdata/data_access/tests/test_profile_helper.py
index 03423a525..df90e2036 100644
--- a/powersimdata/data_access/tests/test_profile_helper.py
+++ b/powersimdata/data_access/tests/test_profile_helper.py
@@ -1,3 +1,5 @@
+import os
+
 from powersimdata.data_access.profile_helper import ProfileHelper
 
 
@@ -21,4 +23,4 @@ def test_get_file_components():
     s_info = {"base_wind": "v8", "grid_model": "europe"}
     file_name, from_dir = ProfileHelper.get_file_components(s_info, "wind")
     assert "wind_v8.csv" == file_name
-    assert "raw/europe" == from_dir
+    assert os.path.join("raw", "europe") == from_dir

From 5a1dbbf18ab3d02068398490a27f6bf37671416a Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Mon, 5 Apr 2021 14:07:46 -0700
Subject: [PATCH 069/108] Correct bug for Scenario init with bad descriptor
 (#440)

* test: add test for Scenario init with bad descriptor

* fix: correct bug for Scenario init with bad descriptor
---
 powersimdata/scenario/scenario.py            |  8 ++++++--
 powersimdata/scenario/tests/test_scenario.py | 10 ++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 powersimdata/scenario/tests/test_scenario.py

diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py
index 6bb07db1a..3181a1491 100644
--- a/powersimdata/scenario/scenario.py
+++ b/powersimdata/scenario/scenario.py
@@ -85,8 +85,12 @@ def _set_info(self, descriptor):
         :param str descriptor: scenario descriptor.
         """
         info = self._scenario_list_manager.get_scenario(descriptor)
-        if info is not None:
-            self.info = info
+        if info is None:
+            raise ValueError(
+                f"{descriptor} not found in Scenario List. "
+                "See available scenarios with Scenario().get_scenario_table()"
+            )
+        self.info = info
 
     def _set_status(self):
         """Sets execution status of scenario."""
diff --git a/powersimdata/scenario/tests/test_scenario.py b/powersimdata/scenario/tests/test_scenario.py
new file mode 100644
index 000000000..393b590ee
--- /dev/null
+++ b/powersimdata/scenario/tests/test_scenario.py
@@ -0,0 +1,10 @@
+import pytest
+
+from powersimdata.scenario.scenario import Scenario
+
+
+@pytest.mark.ssh
+def test_bad_scenario_name():
+    # This test will fail if we do add a scenario with this name
+    with pytest.raises(ValueError):
+        Scenario("this_scenario_does_not_exist")

From 4e929824fc2bb1ced537601ebee1b01a973d8a95 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Apr 2021 14:09:16 -0700
Subject: [PATCH 070/108] chore(deps): bump urllib3 from 1.26.3 to 1.26.4
 (#442)

Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.26.3 to 1.26.4.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/1.26.3...1.26.4)

Signed-off-by: dependabot[bot] <support@github.com>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Pipfile.lock | 133 +++++++++++++++++++++++----------------------------
 1 file changed, 61 insertions(+), 72 deletions(-)

diff --git a/Pipfile.lock b/Pipfile.lock
index da29acba1..aad2b6521 100644
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -19,7 +19,6 @@
                 "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
                 "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==20.3.0"
         },
         "bcrypt": {
@@ -32,7 +31,6 @@
                 "sha256:cd1ea2ff3038509ea95f687256c46b79f5fc382ad0aa3664d200047546d511d1",
                 "sha256:cdcdcb3972027f83fe24a48b1e90ea4b584d35f1cc279d76de6fc4b13376239d"
             ],
-            "markers": "python_version >= '3.6'",
             "version": "==3.2.0"
         },
         "certifi": {
@@ -89,28 +87,30 @@
                 "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
                 "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==4.0.0"
         },
         "cryptography": {
             "hashes": [
-                "sha256:2d32223e5b0ee02943f32b19245b61a62db83a882f0e76cc564e1cec60d48f87",
-                "sha256:57ad77d32917bc55299b16d3b996ffa42a1c73c6cfa829b14043c561288d2799",
-                "sha256:5ecf2bcb34d17415e89b546dbb44e73080f747e504273e4d4987630493cded1b",
-                "sha256:66b57a9ca4b3221d51b237094b0303843b914b7d5afd4349970bb26518e350b0",
-                "sha256:93cfe5b7ff006de13e1e89830810ecbd014791b042cbe5eec253be11ac2b28f3",
-                "sha256:df186fcbf86dc1ce56305becb8434e4b6b7504bc724b71ad7a3239e0c9d14ef2",
-                "sha256:fec7fb46b10da10d9e1d078d1ff8ed9e05ae14f431fdbd11145edd0550b9a964"
-            ],
-            "markers": "python_version >= '3.6'",
-            "version": "==3.4.6"
+                "sha256:0f1212a66329c80d68aeeb39b8a16d54ef57071bf22ff4e521657b27372e327d",
+                "sha256:1e056c28420c072c5e3cb36e2b23ee55e260cb04eee08f702e0edfec3fb51959",
+                "sha256:240f5c21aef0b73f40bb9f78d2caff73186700bf1bc6b94285699aff98cc16c6",
+                "sha256:26965837447f9c82f1855e0bc8bc4fb910240b6e0d16a664bb722df3b5b06873",
+                "sha256:37340614f8a5d2fb9aeea67fd159bfe4f5f4ed535b1090ce8ec428b2f15a11f2",
+                "sha256:3d10de8116d25649631977cb37da6cbdd2d6fa0e0281d014a5b7d337255ca713",
+                "sha256:3d8427734c781ea5f1b41d6589c293089704d4759e34597dce91014ac125aad1",
+                "sha256:7ec5d3b029f5fa2b179325908b9cd93db28ab7b85bb6c1db56b10e0b54235177",
+                "sha256:8e56e16617872b0957d1c9742a3f94b43533447fd78321514abbe7db216aa250",
+                "sha256:de4e5f7f68220d92b7637fc99847475b59154b7a1b3868fb7385337af54ac9ca",
+                "sha256:eb8cc2afe8b05acbd84a43905832ec78e7b3873fb124ca190f574dca7389a87d",
+                "sha256:ee77aa129f481be46f8d92a1a7db57269a2f23052d5f2433b4621bb457081cc9"
+            ],
+            "version": "==3.4.7"
         },
         "idna": {
             "hashes": [
                 "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
                 "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.10"
         },
         "more-itertools": {
@@ -118,7 +118,6 @@
                 "sha256:5652a9ac72209ed7df8d9c15daf4e1aa0e3d2ccd3c87f8265a0673cd9cbc9ced",
                 "sha256:c5d6da9ca3ff65220c3bfd2a8db06d698f05d4d2b9be57e1deb2be5a45019713"
             ],
-            "markers": "python_version >= '3.5'",
             "version": "==8.7.0"
         },
         "numpy": {
@@ -156,7 +155,6 @@
                 "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
                 "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==20.9"
         },
         "pandas": {
@@ -196,7 +194,6 @@
                 "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0",
                 "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==0.13.1"
         },
         "psycopg2": {
@@ -225,7 +222,6 @@
                 "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3",
                 "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==1.10.0"
         },
         "pycparser": {
@@ -233,7 +229,6 @@
                 "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
                 "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.20"
         },
         "pynacl": {
@@ -257,7 +252,6 @@
                 "sha256:ea6841bc3a76fa4942ce00f3bda7d436fda21e2d91602b9e21b7ca9ecab8f3ff",
                 "sha256:f8851ab9041756003119368c1e6cd0b9c631f46d686b3904b18c0139f4419f80"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==1.4.0"
         },
         "pyparsing": {
@@ -265,7 +259,6 @@
                 "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1",
                 "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"
             ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.4.7"
         },
         "pytest": {
@@ -281,7 +274,6 @@
                 "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
                 "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.8.1"
         },
         "pytz": {
@@ -329,7 +321,6 @@
                 "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
                 "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==1.15.0"
         },
         "tqdm": {
@@ -342,11 +333,11 @@
         },
         "urllib3": {
             "hashes": [
-                "sha256:1b465e494e3e0d8939b50680403e3aedaa2bc434b7d5af64dfd3c958d7f5ae80",
-                "sha256:de3eedaad74a2683334e282005cd8d7f22f4d55fa690a2a1020a416cb0a47e73"
+                "sha256:2f4da4594db7e1e110a944bb1b551fdf4e6c136ad42e4234131391e21eb5b0df",
+                "sha256:e7b021f7241115872f92f43c6508082facffbd1c048e3c6e2bb9c2a157e28937"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
-            "version": "==1.26.3"
+            "index": "pypi",
+            "version": "==1.26.4"
         },
         "wcwidth": {
             "hashes": [
@@ -376,7 +367,6 @@
                 "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a",
                 "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"
             ],
-            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
             "version": "==7.1.2"
         },
         "mypy-extensions": {
@@ -395,56 +385,55 @@
         },
         "regex": {
             "hashes": [
-                "sha256:02951b7dacb123d8ea6da44fe45ddd084aa6777d4b2454fa0da61d569c6fa538",
-                "sha256:0d08e71e70c0237883d0bef12cad5145b84c3705e9c6a588b2a9c7080e5af2a4",
-                "sha256:1862a9d9194fae76a7aaf0150d5f2a8ec1da89e8b55890b1786b8f88a0f619dc",
-                "sha256:1ab79fcb02b930de09c76d024d279686ec5d532eb814fd0ed1e0051eb8bd2daa",
-                "sha256:1fa7ee9c2a0e30405e21031d07d7ba8617bc590d391adfc2b7f1e8b99f46f444",
-                "sha256:262c6825b309e6485ec2493ffc7e62a13cf13fb2a8b6d212f72bd53ad34118f1",
-                "sha256:2a11a3e90bd9901d70a5b31d7dd85114755a581a5da3fc996abfefa48aee78af",
-                "sha256:2c99e97d388cd0a8d30f7c514d67887d8021541b875baf09791a3baad48bb4f8",
-                "sha256:3128e30d83f2e70b0bed9b2a34e92707d0877e460b402faca908c6667092ada9",
-                "sha256:38c8fd190db64f513fe4e1baa59fed086ae71fa45083b6936b52d34df8f86a88",
-                "sha256:3bddc701bdd1efa0d5264d2649588cbfda549b2899dc8d50417e47a82e1387ba",
-                "sha256:4902e6aa086cbb224241adbc2f06235927d5cdacffb2425c73e6570e8d862364",
-                "sha256:49cae022fa13f09be91b2c880e58e14b6da5d10639ed45ca69b85faf039f7a4e",
-                "sha256:56e01daca75eae420bce184edd8bb341c8eebb19dd3bce7266332258f9fb9dd7",
-                "sha256:5862975b45d451b6db51c2e654990c1820523a5b07100fc6903e9c86575202a0",
-                "sha256:6a8ce43923c518c24a2579fda49f093f1397dad5d18346211e46f134fc624e31",
-                "sha256:6c54ce4b5d61a7129bad5c5dc279e222afd00e721bf92f9ef09e4fae28755683",
-                "sha256:6e4b08c6f8daca7d8f07c8d24e4331ae7953333dbd09c648ed6ebd24db5a10ee",
-                "sha256:717881211f46de3ab130b58ec0908267961fadc06e44f974466d1887f865bd5b",
-                "sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884",
-                "sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c",
-                "sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e",
-                "sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562",
-                "sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85",
-                "sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c",
-                "sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6",
-                "sha256:a15f64ae3a027b64496a71ab1f722355e570c3fac5ba2801cafce846bf5af01d",
-                "sha256:a3d748383762e56337c39ab35c6ed4deb88df5326f97a38946ddd19028ecce6b",
-                "sha256:a63f1a07932c9686d2d416fb295ec2c01ab246e89b4d58e5fa468089cab44b70",
-                "sha256:b2b1a5ddae3677d89b686e5c625fc5547c6e492bd755b520de5332773a8af06b",
-                "sha256:b2f4007bff007c96a173e24dcda236e5e83bde4358a557f9ccf5e014439eae4b",
-                "sha256:baf378ba6151f6e272824b86a774326f692bc2ef4cc5ce8d5bc76e38c813a55f",
-                "sha256:bafb01b4688833e099d79e7efd23f99172f501a15c44f21ea2118681473fdba0",
-                "sha256:bba349276b126947b014e50ab3316c027cac1495992f10e5682dc677b3dfa0c5",
-                "sha256:c084582d4215593f2f1d28b65d2a2f3aceff8342aa85afd7be23a9cad74a0de5",
-                "sha256:d1ebb090a426db66dd80df8ca85adc4abfcbad8a7c2e9a5ec7513ede522e0a8f",
-                "sha256:d2d8ce12b7c12c87e41123997ebaf1a5767a5be3ec545f64675388970f415e2e",
-                "sha256:e32f5f3d1b1c663af7f9c4c1e72e6ffe9a78c03a31e149259f531e0fed826512",
-                "sha256:e3faaf10a0d1e8e23a9b51d1900b72e1635c2d5b0e1bea1c18022486a8e2e52d",
-                "sha256:f7d29a6fc4760300f86ae329e3b6ca28ea9c20823df123a2ea8693e967b29917",
-                "sha256:f8f295db00ef5f8bae530fc39af0b40486ca6068733fb860b42115052206466f"
-            ],
-            "version": "==2020.11.13"
+                "sha256:01afaf2ec48e196ba91b37451aa353cb7eda77efe518e481707e0515025f0cd5",
+                "sha256:11d773d75fa650cd36f68d7ca936e3c7afaae41b863b8c387a22aaa78d3c5c79",
+                "sha256:18c071c3eb09c30a264879f0d310d37fe5d3a3111662438889ae2eb6fc570c31",
+                "sha256:1e1c20e29358165242928c2de1482fb2cf4ea54a6a6dea2bd7a0e0d8ee321500",
+                "sha256:281d2fd05555079448537fe108d79eb031b403dac622621c78944c235f3fcf11",
+                "sha256:314d66636c494ed9c148a42731b3834496cc9a2c4251b1661e40936814542b14",
+                "sha256:32e65442138b7b76dd8173ffa2cf67356b7bc1768851dded39a7a13bf9223da3",
+                "sha256:339456e7d8c06dd36a22e451d58ef72cef293112b559010db3d054d5560ef439",
+                "sha256:3916d08be28a1149fb97f7728fca1f7c15d309a9f9682d89d79db75d5e52091c",
+                "sha256:3a9cd17e6e5c7eb328517969e0cb0c3d31fd329298dd0c04af99ebf42e904f82",
+                "sha256:47bf5bf60cf04d72bf6055ae5927a0bd9016096bf3d742fa50d9bf9f45aa0711",
+                "sha256:4c46e22a0933dd783467cf32b3516299fb98cfebd895817d685130cc50cd1093",
+                "sha256:4c557a7b470908b1712fe27fb1ef20772b78079808c87d20a90d051660b1d69a",
+                "sha256:52ba3d3f9b942c49d7e4bc105bb28551c44065f139a65062ab7912bef10c9afb",
+                "sha256:563085e55b0d4fb8f746f6a335893bda5c2cef43b2f0258fe1020ab1dd874df8",
+                "sha256:598585c9f0af8374c28edd609eb291b5726d7cbce16be6a8b95aa074d252ee17",
+                "sha256:619d71c59a78b84d7f18891fe914446d07edd48dc8328c8e149cbe0929b4e000",
+                "sha256:67bdb9702427ceddc6ef3dc382455e90f785af4c13d495f9626861763ee13f9d",
+                "sha256:6d1b01031dedf2503631d0903cb563743f397ccaf6607a5e3b19a3d76fc10480",
+                "sha256:741a9647fcf2e45f3a1cf0e24f5e17febf3efe8d4ba1281dcc3aa0459ef424dc",
+                "sha256:7c2a1af393fcc09e898beba5dd59196edaa3116191cc7257f9224beaed3e1aa0",
+                "sha256:7d9884d86dd4dd489e981d94a65cd30d6f07203d90e98f6f657f05170f6324c9",
+                "sha256:90f11ff637fe8798933fb29f5ae1148c978cccb0452005bf4c69e13db951e765",
+                "sha256:919859aa909429fb5aa9cf8807f6045592c85ef56fdd30a9a3747e513db2536e",
+                "sha256:96fcd1888ab4d03adfc9303a7b3c0bd78c5412b2bfbe76db5b56d9eae004907a",
+                "sha256:97f29f57d5b84e73fbaf99ab3e26134e6687348e95ef6b48cfd2c06807005a07",
+                "sha256:980d7be47c84979d9136328d882f67ec5e50008681d94ecc8afa8a65ed1f4a6f",
+                "sha256:a91aa8619b23b79bcbeb37abe286f2f408d2f2d6f29a17237afda55bb54e7aac",
+                "sha256:ade17eb5d643b7fead300a1641e9f45401c98eee23763e9ed66a43f92f20b4a7",
+                "sha256:b9c3db21af35e3b3c05764461b262d6f05bbca08a71a7849fd79d47ba7bc33ed",
+                "sha256:bd28bc2e3a772acbb07787c6308e00d9626ff89e3bfcdebe87fa5afbfdedf968",
+                "sha256:bf5824bfac591ddb2c1f0a5f4ab72da28994548c708d2191e3b87dd207eb3ad7",
+                "sha256:c0502c0fadef0d23b128605d69b58edb2c681c25d44574fc673b0e52dce71ee2",
+                "sha256:c38c71df845e2aabb7fb0b920d11a1b5ac8526005e533a8920aea97efb8ec6a4",
+                "sha256:ce15b6d103daff8e9fee13cf7f0add05245a05d866e73926c358e871221eae87",
+                "sha256:d3029c340cfbb3ac0a71798100ccc13b97dddf373a4ae56b6a72cf70dfd53bc8",
+                "sha256:e512d8ef5ad7b898cdb2d8ee1cb09a8339e4f8be706d27eaa180c2f177248a10",
+                "sha256:e8e5b509d5c2ff12f8418006d5a90e9436766133b564db0abaec92fd27fcee29",
+                "sha256:ee54ff27bf0afaf4c3b3a62bcd016c12c3fdb4ec4f413391a90bd38bc3624605",
+                "sha256:fa4537fb4a98fe8fde99626e4681cc644bdcf2a795038533f9f711513a862ae6",
+                "sha256:fd45ff9293d9274c5008a2054ecef86a9bfe819a67c7be1afb65e69b405b3042"
+            ],
+            "version": "==2021.4.4"
         },
         "toml": {
             "hashes": [
                 "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b",
                 "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"
             ],
-            "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==0.10.2"
         },
         "typed-ast": {

From d2e1dc574487d9d3b60c00a1c31ec360e954954f Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Wed, 7 Apr 2021 10:18:52 -0700
Subject: [PATCH 071/108] fix: add deepcopy on the way into MemoryCache (#444)

---
 powersimdata/utility/helpers.py            |  2 +-
 powersimdata/utility/tests/test_helpers.py | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/powersimdata/utility/helpers.py b/powersimdata/utility/helpers.py
index 9aaec9228..a56534cb9 100644
--- a/powersimdata/utility/helpers.py
+++ b/powersimdata/utility/helpers.py
@@ -63,7 +63,7 @@ def put(self, key, obj):
         :param tuple key: a tuple used to lookup the cached value
         :param Any obj: the object to cache
         """
-        self._cache[key] = obj
+        self._cache[key] = copy.deepcopy(obj)
 
     def get(self, key):
         """Retrieve the value associated with key if it exists.
diff --git a/powersimdata/utility/tests/test_helpers.py b/powersimdata/utility/tests/test_helpers.py
index 7b75438c1..d6cea1552 100644
--- a/powersimdata/utility/tests/test_helpers.py
+++ b/powersimdata/utility/tests/test_helpers.py
@@ -68,6 +68,17 @@ def test_mem_cache_get_returns_copy():
     assert id(cache.get(key)) != id(obj)
 
 
+def test_mem_cache_put_version_never_changes():
+    cache = MemoryCache()
+    key = cache_key("foo", 4)
+    obj = {"key1": "value1"}
+    cache.put(key, obj)
+    obj["key2"] = "value2"
+    assert "key1" in cache.get(key)
+    assert "key2" not in cache.get(key)
+    assert "key2" in obj
+
+
 def test_copy_command():
     expected = r"\cp -p source dest"
     command = CommandBuilder.copy("source", "dest")

From 74579a19eba274c52809c20bbf2510a896e7a6ba Mon Sep 17 00:00:00 2001
From: jon-hagg <66005238+jon-hagg@users.noreply.github.com>
Date: Fri, 9 Apr 2021 14:00:47 -0700
Subject: [PATCH 072/108] fix: cleanup temp file and fix PermissionError when
 file is in use (#446)

---
 powersimdata/data_access/csv_store.py   | 2 ++
 powersimdata/data_access/data_access.py | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index e8834bd25..1219ce52c 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -79,3 +79,5 @@ def commit(self, table, checksum):
         shutil.copy(tmp_path, os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
         tmp_name = os.path.basename(tmp_path)
         self.data_access.push(tmp_name, checksum, change_name_to=self._FILE_NAME)
+        if os.path.exists(tmp_path):  # only required if data_access is LocalDataAccess
+            os.remove(tmp_path)
diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index e364fde6e..9a38e93c3 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -293,8 +293,9 @@ def copy_from(self, file_name, from_dir=None):
             cbk, bar = progress_bar(ascii=True, unit="b", unit_scale=True)
             _, tmp_path = mkstemp()
             sftp.get(from_path, tmp_path, callback=cbk)
-            shutil.move(tmp_path, to_path)
             bar.close()
+        # wait for file handle to be available
+        shutil.move(tmp_path, to_path)
 
     def move_to(self, file_name, to_dir=None, change_name_to=None):
         """Copy a file from userspace to data store.

From 53aea2a5c373151479c250135433f86d9f677204 Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Tue, 13 Apr 2021 11:04:58 -0700
Subject: [PATCH 073/108] fix: close temp file before trying to move/delete
 (#449)

---
 powersimdata/data_access/csv_store.py   | 3 ++-
 powersimdata/data_access/data_access.py | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/powersimdata/data_access/csv_store.py b/powersimdata/data_access/csv_store.py
index 1219ce52c..62e753911 100644
--- a/powersimdata/data_access/csv_store.py
+++ b/powersimdata/data_access/csv_store.py
@@ -74,9 +74,10 @@ def commit(self, table, checksum):
         :param pandas.DataFrame table: the data frame to save
         :param str checksum: the checksum prior to download
         """
-        _, tmp_path = mkstemp(dir=server_setup.LOCAL_DIR)
+        tmp_file, tmp_path = mkstemp(dir=server_setup.LOCAL_DIR)
         table.to_csv(tmp_path)
         shutil.copy(tmp_path, os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
+        os.close(tmp_file)
         tmp_name = os.path.basename(tmp_path)
         self.data_access.push(tmp_name, checksum, change_name_to=self._FILE_NAME)
         if os.path.exists(tmp_path):  # only required if data_access is LocalDataAccess
diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 9a38e93c3..6f3807ee5 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -291,9 +291,10 @@ def copy_from(self, file_name, from_dir=None):
         with self.ssh.open_sftp() as sftp:
             print(f"Transferring {file_name} from server")
             cbk, bar = progress_bar(ascii=True, unit="b", unit_scale=True)
-            _, tmp_path = mkstemp()
+            tmp_file, tmp_path = mkstemp()
             sftp.get(from_path, tmp_path, callback=cbk)
             bar.close()
+            os.close(tmp_file)
         # wait for file handle to be available
         shutil.move(tmp_path, to_path)
 

From 974f7fa170ad222c283467f45ce06f527cb1cf12 Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Wed, 14 Apr 2021 16:48:55 -0700
Subject: [PATCH 074/108] doc: update ChangeTable docstrings (#451)

---
 powersimdata/input/change_table.py | 47 +++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/powersimdata/input/change_table.py b/powersimdata/input/change_table.py
index db1f48322..bd2d69204 100644
--- a/powersimdata/input/change_table.py
+++ b/powersimdata/input/change_table.py
@@ -91,8 +91,11 @@ class ChangeTable(object):
         the and the scaling factor for the increase/decrease in capacity
         of the line as value.
     * *'storage'*:
-        value is a dictionary. The latter has *'bus_id'* as keys and the
-        capacity of storage (in MW) to add as value.
+        value is a list. Each entry in this list is a dictionary enclosing all the
+        information needed to add a new storage device to the grid. The keys in the
+        dictionary are: *'bus_id'*, *'capacity'*, "duration", "min_stor", "max_stor",
+        "energy_value", "InEff", "OutEff", "LossFactor", "terminal_min",
+        and "terminal_max". See the :meth:`add_storage_capacity` method for details.
     * *'new_dcline'*:
         value is a list. Each entry in this list is a dictionary enclosing
         all the information needed to add a new dcline to the grid. The
@@ -467,7 +470,23 @@ def add_storage_capacity(self, info):
 
         :param list info: each entry is a dictionary. The dictionary gathers
             the information needed to create a new storage device.
-        :raises TypeError: if info is not a list.
+            Required keys: "bus_id", "capacity".
+            "capacity" denotes the symmetric input and output power limits (MW).
+            Optional keys: "duration", "min_stor", "max_stor", "energy_value", "InEff",
+                "OutEff", "LossFactor", "terminal_min", "terminal_max".
+            "duration" denotes the energy to power ratio (hours).
+            "min_stor" denotes the minimum energy limit (unitless), e.g. 0.05 = 5%.
+            "max_stor" denotes the maximum energy limit (unitless), e.g. 0.95 = 95%.
+            "energy_value" denotes the value of stored energy at interval end ($/MWh).
+            "InEff" denotes the input efficiency (unitless), e.g. 0.95 = 95%.
+            "OutEff" denotes the output efficiency (unitless), e.g. 0.95 = 95%.
+            "LossFactor" denotes the per-hour relative losses,
+            e.g. 0.01 means that 1% of the current state of charge is lost per hour).
+            "terminal_min" denotes the minimum state of charge at interval end,
+            e.g. 0.5 means that the storage must end the interval with at least 50%.
+            "terminal_max" denotes the maximum state of charge at interval end,
+            e.g. 0.9 means that the storage must end the interval with no more than 90%.
+        :raises TypeError: if ``info`` is not a list.
         :raises ValueError: if any of the new storages to be added have bad values.
         """
         if not isinstance(info, list):
@@ -520,7 +539,14 @@ def add_dcline(self, info):
 
         :param list info: each entry is a dictionary. The dictionary gathers
             the information needed to create a new dcline.
-        :raises TypeError: if info is not a list.
+            Required keys: "from_bus_id", "to_bus_id".
+            Optional keys: "capacity", "Pmax", "Pmin".
+            "capacity" denotes a bidirectional power limit (MW).
+            "Pmax" denotes a limit on power flowing from 'from' end to 'to' end.
+            "Pmin" denotes a limit on power flowing from 'from' end to 'to' end.
+            Either "capacity" XOR ("Pmax" and "Pmin") must be provided.
+            `capacity: 200` is equivalent to `Pmax: 200, Pmin: -200`.
+        :raises TypeError: if ``info`` is not a list.
         """
         if not isinstance(info, list):
             raise TypeError("Argument enclosing new HVDC line(s) must be a list")
@@ -531,7 +557,8 @@ def add_branch(self, info):
 
         :param list info: each entry is a dictionary. The dictionary gathers
             the information needed to create a new branch.
-        :raises TypeError: if info is not a list.
+            Required keys: "from_bus_id", "to_bus_id", "capacity".
+        :raises TypeError: if ``info`` is not a list.
         """
         if not isinstance(info, list):
             raise TypeError("Argument enclosing new AC line(s) must be a list")
@@ -646,7 +673,13 @@ def add_plant(self, info):
 
         :param list info: each entry is a dictionary. The dictionary gathers
             the information needed to create a new generator.
-        :raises TypeError: if info is not a list.
+            Required keys: "bus_id", "Pmax", "type".
+            Optional keys: "c0", "c1", "c2", "Pmin".
+            "c0", "c1", and "c2" are the coefficients for the cost curve, representing
+            the fixed cost ($/hour), linear cost ($/MWh),
+            and quadratic cost ($/:math:`\rm{MW}^2 \rm{h}`).
+            These are optional for hydro, solar, and wind, and required for other types.
+        :raises TypeError: if ``info`` is not a list.
         :raises ValueError: if any of the new plants to be added have bad values.
         """
         if not isinstance(info, list):
@@ -701,7 +734,7 @@ def add_bus(self, info):
             the information needed to create a new bus.
             Required keys: "lat", "lon", ["zone_id" XOR "zone_name"].
             Optional key: "Pd", "baseKV".
-        :raises TypeError: if info is not a list.
+        :raises TypeError: if ``info`` is not a list.
         :raises ValueError: if any new bus doesn't have appropriate keys/values.
         """
         if not isinstance(info, list):

From 36b5c0716aaf3d324f277aa4ccc5d363e9b1de00 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 11:23:01 -0700
Subject: [PATCH 075/108] test: enable input of StorageData to MockGrid objects

---
 powersimdata/tests/mock_grid.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/powersimdata/tests/mock_grid.py b/powersimdata/tests/mock_grid.py
index f7f7af84a..1d56a7066 100644
--- a/powersimdata/tests/mock_grid.py
+++ b/powersimdata/tests/mock_grid.py
@@ -14,7 +14,7 @@
 }
 
 gencost_names = {"gencost_before": "before", "gencost_after": "after"}
-storage_names = {"storage_gen": "gen"}
+storage_names = {"storage_gen": "gen", "storage_StorageData": "StorageData"}
 acceptable_keys = (
     set(indices.keys()) | set(gencost_names.keys()) | set(storage_names.keys())
 )
@@ -151,6 +151,22 @@
 storage_columns = {
     # The first 21 columns of plant are all that's necessary
     "gen": plant_columns[:21],
+    "StorageData": [
+        "UnitIdx",
+        "InitialStorage",
+        "InitialStorageLowerBound",
+        "InitialStorageUpperBound",
+        "InitialStorageCost",
+        "TerminalStoragePrice",
+        "MinStorageLevel",
+        "MaxStorageLevel",
+        "OutEff",
+        "InEff",
+        "LossFactor",
+        "rho",
+        "ExpectedTerminalStorageMax",
+        "ExpectedTerminalStorageMin",
+    ],
 }
 
 

From 70b744bfa04f5b06c0e32b69ca18dcc77e2e0efd Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 08:48:11 -0700
Subject: [PATCH 076/108] test: add test for non-summed generation investment
 costs

---
 .../investment/tests/test_investment_costs.py | 30 ++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/powersimdata/design/investment/tests/test_investment_costs.py b/powersimdata/design/investment/tests/test_investment_costs.py
index 223768e04..e0b90e52e 100644
--- a/powersimdata/design/investment/tests/test_investment_costs.py
+++ b/powersimdata/design/investment/tests/test_investment_costs.py
@@ -39,7 +39,7 @@
 ]
 
 mock_plant = {
-    "plant_id": ["A", "B", "C", "D", "E", "F", "G", "H"],
+    "plant_id": [3, 5, 6, 7, 8, 9, 10, 11],
     "bus_id": [2010228, 2010228, 2021106, 2010319, 2010319, 2010319, 2010320, 2021106],
     "type": ["solar", "coal", "wind", "solar", "solar", "ng", "wind", "nuclear"],
     "Pmax": [15, 30, 10, 12, 8, 20, 15, 1000],
@@ -63,6 +63,7 @@
     "bus_id": [2010228, 2021106],
     "type": ["storage"] * 2,
 }
+mock_storage_data = {"UnitIdx": [12, 13]}
 
 grid_attrs = {
     "plant": mock_plant,
@@ -70,6 +71,7 @@
     "branch": mock_branch,
     "dcline": mock_dcline,
     "storage_gen": mock_storage_gen,
+    "storage_StorageData": mock_storage_data,
 }
 
 
@@ -129,3 +131,29 @@ def test_calculate_gen_inv_costs_2030(mock_grid):
     assert gen_inv_cost.keys() == expected_gen_inv_cost.keys()
     for k in gen_inv_cost.keys():
         assert gen_inv_cost[k] == pytest.approx(expected_gen_inv_cost[k])
+
+
+def test_calculate_gen_inv_costs_not_summed(mock_grid):
+    gen_inv_cost = _calculate_gen_inv_costs(
+        mock_grid, 2025, "Advanced", sum_results=False
+    )
+    expected_gen_inv_cost = {
+        # for each: capacity (kW) * regional multiplier * base technology cost
+        3: 15e3 * 1.01701 * 1013.912846,
+        5: 30e3 * 1.05221 * 4099.115851,
+        6: 10e3 * 1.16979 * 1301.120135,
+        7: 12e3 * 1.01701 * 1013.912846,
+        8: 8e3 * 1.01701 * 1013.912846,
+        9: 20e3 * 1.050755 * 1008.001936,
+        10: 15e3 * 1.04348 * 1301.120135,
+        11: 1000e3 * 1.07252 * 6928.866991,
+        12: 100e3 * 1.012360 * 779,
+        13: 200e3 * 1.043730 * 779,
+    }
+    inflation = calculate_inflation(2018)
+    expected_gen_inv_cost = {k: v * inflation for k, v in expected_gen_inv_cost.items()}
+    assert set(gen_inv_cost.index) == set(expected_gen_inv_cost.keys())
+    for k in gen_inv_cost.index:
+        assert gen_inv_cost.loc[k, "CAPEX_total"] == pytest.approx(
+            expected_gen_inv_cost[k]
+        )

From 405c91a6674544ab2dcfc3e5338f5b8560e481e8 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 12:09:55 -0700
Subject: [PATCH 077/108] test: add test for non-summed ac investment cost

---
 .../investment/tests/test_investment_costs.py | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/powersimdata/design/investment/tests/test_investment_costs.py b/powersimdata/design/investment/tests/test_investment_costs.py
index e0b90e52e..c80c7a726 100644
--- a/powersimdata/design/investment/tests/test_investment_costs.py
+++ b/powersimdata/design/investment/tests/test_investment_costs.py
@@ -98,6 +98,29 @@ def test_calculate_ac_inv_costs(mock_grid):
         assert ac_cost[k] == pytest.approx(expected_ac_cost[k])
 
 
+def test_calculate_ac_inv_costs_not_summed(mock_grid):
+    inflation_2010 = calculate_inflation(2010)
+    inflation_2020 = calculate_inflation(2020)
+    expected_ac_cost = {
+        # ((reg_mult1 + reg_mult2) / 2) * sum(basecost * rateA * miles)
+        "line_cost": {
+            10: 0,  # This branch would normally be dropped by calculate_ac_inv_costs
+            11: ((1 + 2.25) / 2) * 3666.67 * 10 * 679.179925842 * inflation_2010,
+            12: ((1 + 2.25) / 2) * 1500 * 1100 * 680.986501516 * inflation_2010,
+        },
+        # for each: rateA * basecost * regional multiplier
+        "transformer_cost": {
+            13: (30 * 7670 * 1) * inflation_2020,
+            14: (40 * 8880 * 2.25) * inflation_2020,
+        },
+    }
+    ac_cost = _calculate_ac_inv_costs(mock_grid, sum_results=False)
+    for branch_type, upgrade_costs in expected_ac_cost.items():
+        assert set(upgrade_costs.keys()) == set(ac_cost[branch_type].index)
+        for branch, cost in upgrade_costs.items():
+            assert cost == pytest.approx(ac_cost[branch_type].loc[branch, "Cost"])
+
+
 def test_calculate_dc_inv_costs(mock_grid):
     expected_dc_cost = (
         # lines

From efc255ae8a2bf360c2b3763c615c98a0f182c3ad Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 12:31:51 -0700
Subject: [PATCH 078/108] test: add test for Line with no regional multipliers

---
 .../investment/tests/test_investment_costs.py | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/powersimdata/design/investment/tests/test_investment_costs.py b/powersimdata/design/investment/tests/test_investment_costs.py
index c80c7a726..9c384a142 100644
--- a/powersimdata/design/investment/tests/test_investment_costs.py
+++ b/powersimdata/design/investment/tests/test_investment_costs.py
@@ -10,20 +10,21 @@
 
 # bus_id is the index
 mock_bus = {
-    "bus_id": [2010228, 2021106, 2010319, 2010320],
-    "lat": [47.6146, 37.7849, 47.6408, 47.6408],
-    "lon": [-122.326, -122.407, -122.339, -122.339],
-    "baseKV": [100, 346, 230, 800],
+    "bus_id": [2010228, 2021106, 2010319, 2010320, 29409, 30778],
+    "lat": [47.6146, 37.7849, 47.6408, 47.6408, 30.7252, 30.5581],
+    "lon": [-122.326, -122.407, -122.339, -122.339, -88.2648, -88.5396],
+    "baseKV": [100, 346, 230, 800, 345, 345],
 }
 
 # branch 10-12 from Seattle (s3, p1, NWPP Coal) to San Francisco (s25, p9, NP15) (~679 miles)
 # branch 13-14 are transformers (0 miles)
+# branch 15 is in Southern Company territory (testing for no multiplier in data)
 mock_branch = {
-    "branch_id": [10, 11, 12, 13, 14],
-    "rateA": [0, 10, 1100, 30, 40],
-    "from_bus_id": [2010228, 2010228, 2010319, 2010319, 2021106],
-    "to_bus_id": [2021106, 2021106, 2021106, 2010320, 2021106],
-    "branch_device_type": 3 * ["Line"] + 2 * ["Transformer"],
+    "branch_id": [10, 11, 12, 13, 14, 15],
+    "rateA": [0, 10, 1100, 30, 40, 50],
+    "from_bus_id": [2010228, 2010228, 2010319, 2010319, 2021106, 29409],
+    "to_bus_id": [2021106, 2021106, 2021106, 2010320, 2021106, 30778],
+    "branch_device_type": 3 * ["Line"] + 2 * ["Transformer"] + ["Line"],
 }
 mock_branch["from_lat"] = [
     mock_bus["lat"][mock_bus["bus_id"].index(bus)] for bus in mock_branch["from_bus_id"]
@@ -84,9 +85,14 @@ def test_calculate_ac_inv_costs(mock_grid):
     expected_ac_cost = {
         # ((reg_mult1 + reg_mult2) / 2) * sum(basecost * rateA * miles)
         "line_cost": (
-            ((1 + 2.25) / 2)
-            * (3666.67 * 10 * 679.179925842 + 1500 * 1100 * 680.986501516)
-            * calculate_inflation(2010)
+            calculate_inflation(2010)
+            * (
+                (
+                    ((1 + 2.25) / 2)
+                    * (3666.67 * 10 * 679.179925842 + 1500 * 1100 * 680.986501516)
+                )
+                + ((1 + 1) / 2) * 2333.33 * 50 * 20.003889808
+            )
         ),
         # for each: rateA * basecost * regional multiplier
         "transformer_cost": ((30 * 7670 * 1) + (40 * 8880 * 2.25))
@@ -107,6 +113,7 @@ def test_calculate_ac_inv_costs_not_summed(mock_grid):
             10: 0,  # This branch would normally be dropped by calculate_ac_inv_costs
             11: ((1 + 2.25) / 2) * 3666.67 * 10 * 679.179925842 * inflation_2010,
             12: ((1 + 2.25) / 2) * 1500 * 1100 * 680.986501516 * inflation_2010,
+            15: ((1 + 1) / 2) * 2333.33 * 50 * 20.003889808 * inflation_2010,
         },
         # for each: rateA * basecost * regional multiplier
         "transformer_cost": {

From 61bcbcc3832751b0a07017c9f07008ce1eda710b Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 13:20:38 -0700
Subject: [PATCH 079/108] test: add test for non-summed DC lines

---
 .../investment/tests/test_investment_costs.py | 30 +++++++++++++++----
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/powersimdata/design/investment/tests/test_investment_costs.py b/powersimdata/design/investment/tests/test_investment_costs.py
index 9c384a142..842561459 100644
--- a/powersimdata/design/investment/tests/test_investment_costs.py
+++ b/powersimdata/design/investment/tests/test_investment_costs.py
@@ -53,10 +53,10 @@
 ]
 
 mock_dcline = {
-    "dcline_id": [5],
-    "Pmax": [10],
-    "from_bus_id": [2010228],
-    "to_bus_id": [2021106],
+    "dcline_id": [5, 14],
+    "Pmax": [10, 200],
+    "from_bus_id": [2010228, 29409],
+    "to_bus_id": [2021106, 30778],
 }
 
 mock_storage_gen = {
@@ -131,14 +131,32 @@ def test_calculate_ac_inv_costs_not_summed(mock_grid):
 def test_calculate_dc_inv_costs(mock_grid):
     expected_dc_cost = (
         # lines
-        10 * 679.1799258421203 * 457.1428571 * calculate_inflation(2015)
+        calculate_inflation(2015)
+        * 457.1428571
+        * (10 * 679.1799258421203 + 200 * 20.003889808)
         # terminals
-        + 135e3 * 10 * 2 * calculate_inflation(2020)
+        + 135e3 * (10 + 200) * 2 * calculate_inflation(2020)
     )
     dc_cost = _calculate_dc_inv_costs(mock_grid)
     assert dc_cost == pytest.approx(expected_dc_cost)
 
 
+def test_calculate_dc_inv_costs_not_summed(mock_grid):
+    expected_dc_cost = {
+        5: (
+            457.1428571 * 10 * 679.1799258421203 * calculate_inflation(2015)
+            + 135e3 * 10 * 2 * calculate_inflation(2020)
+        ),
+        14: (
+            457.1428571 * 200 * 20.003889808 * calculate_inflation(2015)
+            + 135e3 * 200 * 2 * calculate_inflation(2020)
+        ),
+    }
+    dc_cost = _calculate_dc_inv_costs(mock_grid, sum_results=False)
+    for dcline_id, expected_cost in expected_dc_cost.items():
+        assert expected_cost == pytest.approx(dc_cost.loc[dcline_id])
+
+
 def test_calculate_gen_inv_costs_2030(mock_grid):
     gen_inv_cost = _calculate_gen_inv_costs(mock_grid, 2030, "Moderate").to_dict()
     expected_gen_inv_cost = {

From aa96f2524a254fc5b1010b795942ea0704a738dd Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 16 Apr 2021 13:33:57 -0700
Subject: [PATCH 080/108] test: add test for upgrading lines only

---
 .../investment/tests/test_investment_costs.py | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/powersimdata/design/investment/tests/test_investment_costs.py b/powersimdata/design/investment/tests/test_investment_costs.py
index 842561459..e6902dbd7 100644
--- a/powersimdata/design/investment/tests/test_investment_costs.py
+++ b/powersimdata/design/investment/tests/test_investment_costs.py
@@ -1,3 +1,5 @@
+import copy
+
 import pytest
 
 from powersimdata.design.investment.inflation import calculate_inflation
@@ -104,6 +106,30 @@ def test_calculate_ac_inv_costs(mock_grid):
         assert ac_cost[k] == pytest.approx(expected_ac_cost[k])
 
 
+def test_calculate_ac_inv_costs_lines_only(mock_grid):
+    expected_ac_cost = {
+        # ((reg_mult1 + reg_mult2) / 2) * sum(basecost * rateA * miles)
+        "line_cost": (
+            calculate_inflation(2010)
+            * (
+                (
+                    ((1 + 2.25) / 2)
+                    * (3666.67 * 10 * 679.179925842 + 1500 * 1100 * 680.986501516)
+                )
+                + ((1 + 1) / 2) * 2333.33 * 50 * 20.003889808
+            )
+        ),
+        # for each: rateA * basecost * regional multiplier
+        "transformer_cost": 0,
+    }
+    this_grid = copy.deepcopy(mock_grid)
+    this_grid.branch = this_grid.branch.query("branch_device_type == 'Line'")
+    ac_cost = _calculate_ac_inv_costs(this_grid)
+    assert ac_cost.keys() == expected_ac_cost.keys()
+    for k in ac_cost.keys():
+        assert ac_cost[k] == pytest.approx(expected_ac_cost[k])
+
+
 def test_calculate_ac_inv_costs_not_summed(mock_grid):
     inflation_2010 = calculate_inflation(2010)
     inflation_2020 = calculate_inflation(2020)

From 83f61a99dcf85940cd89fc040f23fa3eadb15ccb Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 16 Apr 2021 13:40:39 -0700
Subject: [PATCH 081/108] test: add test for upgrading transformers only

---
 .../investment/tests/test_investment_costs.py    | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/powersimdata/design/investment/tests/test_investment_costs.py b/powersimdata/design/investment/tests/test_investment_costs.py
index e6902dbd7..a7f34512b 100644
--- a/powersimdata/design/investment/tests/test_investment_costs.py
+++ b/powersimdata/design/investment/tests/test_investment_costs.py
@@ -130,6 +130,22 @@ def test_calculate_ac_inv_costs_lines_only(mock_grid):
         assert ac_cost[k] == pytest.approx(expected_ac_cost[k])
 
 
+def test_calculate_ac_inv_costs_transformers_only(mock_grid):
+    expected_ac_cost = {
+        # ((reg_mult1 + reg_mult2) / 2) * sum(basecost * rateA * miles)
+        "line_cost": 0,
+        # for each: rateA * basecost * regional multiplier
+        "transformer_cost": ((30 * 7670 * 1) + (40 * 8880 * 2.25))
+        * calculate_inflation(2020),
+    }
+    this_grid = copy.deepcopy(mock_grid)
+    this_grid.branch = this_grid.branch.query("branch_device_type == 'Transformer'")
+    ac_cost = _calculate_ac_inv_costs(this_grid)
+    assert ac_cost.keys() == expected_ac_cost.keys()
+    for k in ac_cost.keys():
+        assert ac_cost[k] == pytest.approx(expected_ac_cost[k])
+
+
 def test_calculate_ac_inv_costs_not_summed(mock_grid):
     inflation_2010 = calculate_inflation(2010)
     inflation_2020 = calculate_inflation(2020)

From 02aa3a4458d72c63552e226a99f06a597727c9de Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Mon, 12 Apr 2021 15:24:40 -0700
Subject: [PATCH 082/108] refactor: add index-preserving data frame merge
 function

---
 .../design/investment/investment_costs.py     | 45 +++++++++++++++----
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index 04a8044d0..deaeef7a2 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -13,6 +13,17 @@
 from powersimdata.utility.distance import haversine
 
 
+def merge_keep_index(df1, df2, **kwargs):
+    """Execute a pandas DataFrame merge, preserving the index of the first dataframe.
+
+    :param pandas.DataFrame df1: first data frame, to call pandas merge from.
+    :param pandas.DataFrame df2: second data frame, argument to pandas merge.
+    :param \\*\\*kwargs: arbitrary keyword arguments passed to pandas merge call.
+    :return: (*pandas.DataFrame*) -- df1 merged with df2 with indices preserved.
+    """
+    return df1.reset_index().merge(df2, **kwargs).set_index(df1.index.names)
+
+
 def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None):
     """Calculate cost of upgrading AC lines and/or transformers in a scenario.
     NEEM regions are used to find regional multipliers.
@@ -139,7 +150,7 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
     # in region file
     bus_fix_index = bus[~bus.index.isin(bus_reg.index)].index
     bus_mask = bus[~bus.index.isin(bus_fix_index)]
-    bus_mask = bus_mask.merge(bus_reg, how="left", on="bus_id")
+    bus_mask = merge_keep_index(bus_mask, bus_reg, how="left", on="bus_id")
     # these buses have incorrect lat/lon values in the region mapping file.
     #   re-running the region mapping script on those buses only.
     bus_fix_index2 = bus_mask[
@@ -160,12 +171,20 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
         id_vars=["kV", "MW"], var_name="name_abbr", value_name="mult"
     )
 
-    lines = lines.merge(bus_reg, left_on="to_bus_id", right_on="bus_id", how="inner")
-    lines = lines.merge(ac_reg_mult, on=["name_abbr", "kV", "MW"], how="left")
+    lines = merge_keep_index(
+        lines, bus_reg, left_on="to_bus_id", right_on="bus_id", how="inner"
+    )
+    lines = merge_keep_index(
+        lines, ac_reg_mult, on=["name_abbr", "kV", "MW"], how="left"
+    )
     lines.rename(columns={"name_abbr": "reg_to", "mult": "mult_to"}, inplace=True)
 
-    lines = lines.merge(bus_reg, left_on="from_bus_id", right_on="bus_id", how="inner")
-    lines = lines.merge(ac_reg_mult, on=["name_abbr", "kV", "MW"], how="left")
+    lines = merge_keep_index(
+        lines, bus_reg, left_on="from_bus_id", right_on="bus_id", how="inner"
+    )
+    lines = merge_keep_index(
+        lines, ac_reg_mult, on=["name_abbr", "kV", "MW"], how="left"
+    )
     lines.rename(columns={"name_abbr": "reg_from", "mult": "mult_from"}, inplace=True)
 
     # take average between 2 buses' region multipliers
@@ -402,7 +421,9 @@ def load_cost(year, cost_case):
     gen_costs.replace(const.gen_inv_cost_translation, inplace=True)
     gen_costs.drop(["Key", "FinancialCase", "CRPYears"], axis=1, inplace=True)
     # ATB technology costs merge
-    plants = plants.merge(gen_costs, right_on="Technology", left_on="type", how="left")
+    plants = merge_keep_index(
+        plants, gen_costs, right_on="Technology", left_on="type", how="left"
+    )
 
     # REGIONAL COST MULTIPLIER
 
@@ -417,7 +438,9 @@ def load_cost(year, cost_case):
     except FileNotFoundError:
         bus_reg = bus_to_reeds_reg(grid_new.bus.loc[plant_buses])
         bus_reg.sort_index().to_csv(const.bus_reeds_regions_path)
-    plants = plants.merge(bus_reg, left_on="bus_id", right_index=True, how="left")
+    plants = merge_keep_index(
+        plants, bus_reg, left_on="bus_id", right_index=True, how="left"
+    )
 
     # Determine one region 'r' for each plant, based on one of two mappings
     plants.loc[:, "r"] = ""
@@ -432,8 +455,12 @@ def load_cost(year, cost_case):
     # merge regional multipliers with plants
     region_multiplier = pd.read_csv(const.regional_multiplier_path)
     region_multiplier.replace(const.regional_multiplier_gen_translation, inplace=True)
-    plants = plants.merge(
-        region_multiplier, left_on=["r", "Technology"], right_on=["r", "i"], how="left"
+    plants = merge_keep_index(
+        plants,
+        region_multiplier,
+        left_on=["r", "Technology"],
+        right_on=["r", "i"],
+        how="left",
     )
 
     # multiply all together to get summed CAPEX ($)

From d8c27970d4c68a5235f074f1913f511dd5b09679 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Mon, 12 Apr 2021 16:05:20 -0700
Subject: [PATCH 083/108] refactor: add append_keep_index_name function

---
 .../design/investment/investment_costs.py       | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index deaeef7a2..5425f024a 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -24,6 +24,21 @@ def merge_keep_index(df1, df2, **kwargs):
     return df1.reset_index().merge(df2, **kwargs).set_index(df1.index.names)
 
 
+def append_keep_index_name(df1, other, *args, **kwargs):
+    """Execute a pandas DataFrame append, preserving the index name of the dataframe.
+
+    :param pandas.DataFrame df1: first data frame, to call pandas append from.
+    :param pandas.DataFrame/pandas.Series/list: first argument to pandas append method.
+    :param \\*args: arbitrary positional arguments passed to pandas append call.
+    :param \\*\\*kwargs: arbitrary keyword arguments passed to pandas append call.
+    :return: (*pandas.DataFrame*) -- df1 appended with other with index name preserved.
+    """
+    original_index_name = df1.index.name
+    new_df = df1.append(other, *args, **kwargs)
+    new_df.index.name = original_index_name
+    return new_df
+
+
 def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None):
     """Calculate cost of upgrading AC lines and/or transformers in a scenario.
     NEEM regions are used to find regional multipliers.
@@ -404,7 +419,7 @@ def load_cost(year, cost_case):
     else:
         raise TypeError("cost_case must be str.")
 
-    plants = grid_new.plant.append(grid_new.storage["gen"])
+    plants = append_keep_index_name(grid_new.plant, grid_new.storage["gen"])
     plants = plants[
         ~plants.type.isin(["dfo", "other"])
     ]  # drop these technologies, no cost data

From 48b06283591e90f05afe7e7c0fcaaba4ffe8aea4 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Mon, 12 Apr 2021 17:12:59 -0700
Subject: [PATCH 084/108] refactor: generalize transformer regional multiplier
 lookup to branches

---
 .../design/investment/investment_costs.py     | 41 +++++++------------
 1 file changed, 14 insertions(+), 27 deletions(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index 5425f024a..5e23c185c 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -93,19 +93,22 @@ def select_mw(x, cost_df):
         # find closest MW & corresponding cost
         return tmp.iloc[np.argmin(np.abs(tmp["MW"] - x.rateA))][["MW", "costMWmi"]]
 
-    def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
+    def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
         """Determine the regional multiplier based on kV and power (closest).
 
-        :param pandas.Series x: data for a single transformer.
+        :param pandas.Series x: data for a single branch.
         :param pandas.DataFrame bus_reg: data frame with bus regions.
         :param pandas.DataFrame ac_reg_mult: data frame with regional multipliers.
-        :param set xfmr_lookup_alerted: set of (voltage, region) tuples for which
+        :param set branch_lookup_alerted: set of (voltage, region) tuples for which
             a message has already been printed that this lookup was not found.
         :return: (*float*) -- regional multiplier.
         """
+        # Select the highest voltage for transformers (branch end voltages should match)
         max_kV = bus.loc[[x.from_bus_id, x.to_bus_id], "baseKV"].max()  # noqa: N806
-        region = bus_reg.loc[x.from_bus_id, "name_abbr"]
-        region_mults = ac_reg_mult.loc[ac_reg_mult.name_abbr == region]
+        # Average the multipliers for branches (transformer regions should match)
+        regions = tuple(bus_reg.loc[[x.from_bus_id, x.to_bus_id], "name_abbr"])
+        region_mults = ac_reg_mult.loc[ac_reg_mult.name_abbr.isin(regions)]
+        region_mults = region_mults.groupby(["kV", "MW"]).mean().reset_index()
 
         mult_lookup_kV = region_mults.loc[  # noqa: N806
             (region_mults.kV - max_kV).abs().idxmin()
@@ -116,11 +119,10 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
         ]
         if len(region_kV_mults) == 0:
             mult = 1
-            if (mult_lookup_kV, region) not in xfmr_lookup_alerted:
-                print(f"No multiplier for voltage {mult_lookup_kV} in {region}")
-                xfmr_lookup_alerted.add((mult_lookup_kV, region))
+            if (mult_lookup_kV, regions) not in branch_lookup_alerted:
+                print(f"No multiplier for voltage {mult_lookup_kV} in {regions}")
+                branch_lookup_alerted.add((mult_lookup_kV, regions))
         else:
-
             mult_lookup_MW = region_kV_mults.loc[  # noqa: N806
                 (region_kV_mults.MW - x.rateA).abs().idxmin(), "MW"
             ]
@@ -186,24 +188,9 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
         id_vars=["kV", "MW"], var_name="name_abbr", value_name="mult"
     )
 
-    lines = merge_keep_index(
-        lines, bus_reg, left_on="to_bus_id", right_on="bus_id", how="inner"
-    )
-    lines = merge_keep_index(
-        lines, ac_reg_mult, on=["name_abbr", "kV", "MW"], how="left"
-    )
-    lines.rename(columns={"name_abbr": "reg_to", "mult": "mult_to"}, inplace=True)
-
-    lines = merge_keep_index(
-        lines, bus_reg, left_on="from_bus_id", right_on="bus_id", how="inner"
-    )
-    lines = merge_keep_index(
-        lines, ac_reg_mult, on=["name_abbr", "kV", "MW"], how="left"
+    lines["mult"] = lines.apply(
+        lambda x: get_branch_mult(x, bus_reg, ac_reg_mult), axis=1
     )
-    lines.rename(columns={"name_abbr": "reg_from", "mult": "mult_from"}, inplace=True)
-
-    # take average between 2 buses' region multipliers
-    lines.loc[:, "mult"] = (lines["mult_to"] + lines["mult_from"]) / 2.0
 
     # calculate MWmi
     lines.loc[:, "lengthMi"] = lines.apply(
@@ -223,7 +210,7 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()):
         axis=1,
     )
     transformers["mult"] = transformers.apply(
-        lambda x: get_transformer_mult(x, bus_reg, ac_reg_mult), axis=1
+        lambda x: get_branch_mult(x, bus_reg, ac_reg_mult), axis=1
     )
 
     transformers["Cost"] = (

From 8708b7325a6afe1fd8c071a23184d76a40f9eff1 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 09:06:04 -0700
Subject: [PATCH 085/108] fix: ensure that storage indices are properly set in
 gen inv cost calculation

---
 powersimdata/design/investment/investment_costs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index 5e23c185c..5ca34e2e8 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -406,7 +406,10 @@ def load_cost(year, cost_case):
     else:
         raise TypeError("cost_case must be str.")
 
-    plants = append_keep_index_name(grid_new.plant, grid_new.storage["gen"])
+    storage_plants = grid_new.storage["gen"].set_index(
+        grid_new.storage["StorageData"].UnitIdx.astype(int)
+    )
+    plants = append_keep_index_name(grid_new.plant, storage_plants)
     plants = plants[
         ~plants.type.isin(["dfo", "other"])
     ]  # drop these technologies, no cost data

From 7e52e590728554fe0f58851f488e9be3db0df337 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 11:45:13 -0700
Subject: [PATCH 086/108] fix: use Moderate cost case data for Nuclear, add
 warning

---
 powersimdata/design/investment/investment_costs.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index 5ca34e2e8..3cc018b17 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -1,4 +1,5 @@
 import copy as cp
+import warnings
 
 import numpy as np
 import pandas as pd
@@ -388,6 +389,17 @@ def load_cost(year, cost_case):
         cost.rename(columns={"value": "CAPEX"}, inplace=True)
 
         # select scenario of interest
+        if cost_case != "Moderate":
+            # The 2020 ATB only has "Moderate" for nuclear, so we need to make due.
+            warnings.warn(
+                f"No cost data available for Nuclear for {cost_case} cost case, "
+                "using Moderate cost case data instead"
+            )
+            new_nuclear = cost.query(
+                "Technology == 'Nuclear' and CostCase == 'Moderate'"
+            ).copy()
+            new_nuclear.CostCase = cost_case
+            cost = cost.append(new_nuclear, ignore_index=True)
         cost = cost[cost["CostCase"] == cost_case]
         cost.drop(["CostCase"], axis=1, inplace=True)
 

From 5b7e1fda24ccaefc5785210fc06155f291250135 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 13 Apr 2021 13:08:51 -0700
Subject: [PATCH 087/108] fix: ensure that high-priced underground line costs
 are only used when appropriate

---
 .../design/investment/investment_costs.py     | 52 +++++++++++--------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index 3cc018b17..f059634de 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -86,13 +86,20 @@ def select_mw(x, cost_df):
         :return: (*pandas.Series*) -- series of [*'MW'*, *'costMWmi'*] to be assigned
             to branch.
         """
-
+        underground_regions = ("NEISO", "NYISO J-K")
+        filtered_cost_df = cost_df.copy()
+        # Unless we are entirely within an underground region, drop this cost class
+        if not (x.from_region == x.to_region and x.from_region in underground_regions):
+            filtered_cost_df = filtered_cost_df.query("kV != 345 or MW != 500")
         # select corresponding cost table of selected kV
-        tmp = cost_df[cost_df["kV"] == x.kV]
+        filtered_cost_df = filtered_cost_df[filtered_cost_df["kV"] == x.kV]
         # get rid of NaN values in this kV table
-        tmp = tmp[~tmp["MW"].isna()]
+        filtered_cost_df = filtered_cost_df[~filtered_cost_df["MW"].isna()]
         # find closest MW & corresponding cost
-        return tmp.iloc[np.argmin(np.abs(tmp["MW"] - x.rateA))][["MW", "costMWmi"]]
+        filtered_cost_df = filtered_cost_df.iloc[
+            np.argmin(np.abs(filtered_cost_df["MW"] - x.rateA))
+        ]
+        return filtered_cost_df.loc[["MW", "costMWmi"]]
 
     def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
         """Determine the regional multiplier based on kV and power (closest).
@@ -107,7 +114,7 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
         # Select the highest voltage for transformers (branch end voltages should match)
         max_kV = bus.loc[[x.from_bus_id, x.to_bus_id], "baseKV"].max()  # noqa: N806
         # Average the multipliers for branches (transformer regions should match)
-        regions = tuple(bus_reg.loc[[x.from_bus_id, x.to_bus_id], "name_abbr"])
+        regions = (x.from_region, x.to_region)
         region_mults = ac_reg_mult.loc[ac_reg_mult.name_abbr.isin(regions)]
         region_mults = region_mults.groupby(["kV", "MW"]).mean().reset_index()
 
@@ -145,27 +152,10 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
     # Mirror across diagonal
     xfmr_cost += xfmr_cost.to_numpy().T - np.diag(np.diag(xfmr_cost.to_numpy()))
 
-    # map line kV
-    bus = grid_new.bus
-    branch = grid_new.branch
-    branch.loc[:, "kV"] = branch.apply(
-        lambda x: bus.loc[x.from_bus_id, "baseKV"], axis=1
-    )
-
-    # separate transformers and lines
-    t_mask = branch["branch_device_type"].isin(["Transformer", "TransformerWinding"])
-    transformers = branch[t_mask].copy()
-    lines = branch[~t_mask].copy()
-    # Find closest kV rating
-    lines.loc[:, "kV"] = lines.apply(
-        lambda x: ac_cost.loc[(ac_cost["kV"] - x.kV).abs().idxmin(), "kV"],
-        axis=1,
-    )
-    lines[["MW", "costMWmi"]] = lines.apply(lambda x: select_mw(x, ac_cost), axis=1)
-
     # check that all buses included in this file and lat/long values match,
     # otherwise re-run mapping script on mis-matching buses. These buses are missing
     # in region file
+    bus = grid_new.bus
     bus_fix_index = bus[~bus.index.isin(bus_reg.index)].index
     bus_mask = bus[~bus.index.isin(bus_fix_index)]
     bus_mask = merge_keep_index(bus_mask, bus_reg, how="left", on="bus_id")
@@ -184,6 +174,22 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
 
     bus_reg.drop(["lat", "lon"], axis=1, inplace=True)
 
+    # Add extra information to branch data frame
+    branch = grid_new.branch
+    branch.loc[:, "kV"] = bus.loc[branch.from_bus_id, "baseKV"].tolist()
+    branch.loc[:, "from_region"] = bus_reg.loc[branch.from_bus_id, "name_abbr"].tolist()
+    branch.loc[:, "to_region"] = bus_reg.loc[branch.to_bus_id, "name_abbr"].tolist()
+    # separate transformers and lines
+    t_mask = branch["branch_device_type"].isin(["Transformer", "TransformerWinding"])
+    transformers = branch[t_mask].copy()
+    lines = branch[~t_mask].copy()
+    # Find closest kV rating
+    lines.loc[:, "kV"] = lines.apply(
+        lambda x: ac_cost.loc[(ac_cost["kV"] - x.kV).abs().idxmin(), "kV"],
+        axis=1,
+    )
+    lines[["MW", "costMWmi"]] = lines.apply(lambda x: select_mw(x, ac_cost), axis=1)
+
     # map region multipliers onto lines
     ac_reg_mult = ac_reg_mult.melt(
         id_vars=["kV", "MW"], var_name="name_abbr", value_name="mult"

From 22ebe240b8f2770315a4949636632191ac284fe3 Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 16 Apr 2021 08:08:46 -0700
Subject: [PATCH 088/108] fix: add if/else to handle all-branch upgrades

---
 .../design/investment/investment_costs.py     | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index f059634de..c27bb0787 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -209,16 +209,25 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
     lines.loc[:, "Cost"] = lines["MWmi"] * lines["costMWmi"] * lines["mult"]
 
     # calculate transformer costs
-    transformers["per_MW_cost"] = transformers.apply(
-        lambda x: xfmr_cost.iloc[
-            xfmr_cost.index.get_loc(bus.loc[x.from_bus_id, "baseKV"], method="nearest"),
-            xfmr_cost.columns.get_loc(bus.loc[x.to_bus_id, "baseKV"], method="nearest"),
-        ],
-        axis=1,
-    )
-    transformers["mult"] = transformers.apply(
-        lambda x: get_branch_mult(x, bus_reg, ac_reg_mult), axis=1
-    )
+    if len(transformers) > 0:
+        transformers["per_MW_cost"] = transformers.apply(
+            lambda x: xfmr_cost.iloc[
+                xfmr_cost.index.get_loc(
+                    bus.loc[x.from_bus_id, "baseKV"], method="nearest"
+                ),
+                xfmr_cost.columns.get_loc(
+                    bus.loc[x.to_bus_id, "baseKV"], method="nearest"
+                ),
+            ],
+            axis=1,
+        )
+        transformers["mult"] = transformers.apply(
+            lambda x: get_branch_mult(x, bus_reg, ac_reg_mult), axis=1
+        )
+    else:
+        # Properly handle case with no transformers, where apply returns wrong dims
+        transformers["per_MW_cost"] = []
+        transformers["mult"] = []
 
     transformers["Cost"] = (
         transformers["rateA"] * transformers["per_MW_cost"] * transformers["mult"]

From dfca9a7d7c8d5d4f9fca49f098848225239aa4aa Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 16 Apr 2021 13:46:08 -0700
Subject: [PATCH 089/108] fix: add if/else to handle all transformer upgrades

---
 .../design/investment/investment_costs.py     | 38 ++++++++++---------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index c27bb0787..4ff1aa612 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -142,6 +142,9 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
     # import data
     ac_cost = pd.DataFrame(const.ac_line_cost)
     ac_reg_mult = pd.read_csv(const.ac_reg_mult_path)
+    ac_reg_mult = ac_reg_mult.melt(
+        id_vars=["kV", "MW"], var_name="name_abbr", value_name="mult"
+    )
     try:
         bus_reg = pd.read_csv(const.bus_neem_regions_path, index_col="bus_id")
     except FileNotFoundError:
@@ -183,26 +186,25 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
     t_mask = branch["branch_device_type"].isin(["Transformer", "TransformerWinding"])
     transformers = branch[t_mask].copy()
     lines = branch[~t_mask].copy()
-    # Find closest kV rating
-    lines.loc[:, "kV"] = lines.apply(
-        lambda x: ac_cost.loc[(ac_cost["kV"] - x.kV).abs().idxmin(), "kV"],
-        axis=1,
-    )
-    lines[["MW", "costMWmi"]] = lines.apply(lambda x: select_mw(x, ac_cost), axis=1)
-
-    # map region multipliers onto lines
-    ac_reg_mult = ac_reg_mult.melt(
-        id_vars=["kV", "MW"], var_name="name_abbr", value_name="mult"
-    )
+    if len(lines) > 0:
+        # Find closest kV rating
+        lines.loc[:, "kV"] = lines.apply(
+            lambda x: ac_cost.loc[(ac_cost["kV"] - x.kV).abs().idxmin(), "kV"],
+            axis=1,
+        )
+        lines[["MW", "costMWmi"]] = lines.apply(lambda x: select_mw(x, ac_cost), axis=1)
 
-    lines["mult"] = lines.apply(
-        lambda x: get_branch_mult(x, bus_reg, ac_reg_mult), axis=1
-    )
+        lines["mult"] = lines.apply(
+            lambda x: get_branch_mult(x, bus_reg, ac_reg_mult), axis=1
+        )
 
-    # calculate MWmi
-    lines.loc[:, "lengthMi"] = lines.apply(
-        lambda x: haversine((x.from_lat, x.from_lon), (x.to_lat, x.to_lon)), axis=1
-    )
+        # calculate MWmi
+        lines.loc[:, "lengthMi"] = lines.apply(
+            lambda x: haversine((x.from_lat, x.from_lon), (x.to_lat, x.to_lon)), axis=1
+        )
+    else:
+        new_columns = ["kV", "MW", "costMWmi", "mult", "lengthMi"]
+        lines = lines.reindex(columns=[*lines.columns.tolist(), *new_columns])
     lines.loc[:, "MWmi"] = lines["lengthMi"] * lines["rateA"]
 
     # calculate cost of each line

From 0c5ee94f04e8f4a74563b59a73148e5dbb9e85eb Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Fri, 16 Apr 2021 16:36:58 -0700
Subject: [PATCH 090/108] fix: correct malformed math in add_plant docstring

---
 powersimdata/input/change_table.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/powersimdata/input/change_table.py b/powersimdata/input/change_table.py
index bd2d69204..ddc2cdd86 100644
--- a/powersimdata/input/change_table.py
+++ b/powersimdata/input/change_table.py
@@ -676,8 +676,7 @@ def add_plant(self, info):
             Required keys: "bus_id", "Pmax", "type".
             Optional keys: "c0", "c1", "c2", "Pmin".
             "c0", "c1", and "c2" are the coefficients for the cost curve, representing
-            the fixed cost ($/hour), linear cost ($/MWh),
-            and quadratic cost ($/:math:`\rm{MW}^2 \rm{h}`).
+            the fixed cost ($/hour), linear cost ($/MWh), and quadratic cost ($/MW^2·h).
             These are optional for hydro, solar, and wind, and required for other types.
         :raises TypeError: if ``info`` is not a list.
         :raises ValueError: if any of the new plants to be added have bad values.

From 57bddfa9c3413d47c0b0c89a198cc7e2cf474743 Mon Sep 17 00:00:00 2001
From: jon-hagg <66005238+jon-hagg@users.noreply.github.com>
Date: Wed, 21 Apr 2021 15:22:57 -0700
Subject: [PATCH 091/108] feat: change makedir implementation to be os agnostic
 (#456)

* feat: change makedir implementation to be os agnostic

* chore: remove unused attribute
---
 powersimdata/data_access/data_access.py | 22 ++++++++++++++++++++--
 powersimdata/scenario/execute.py        | 12 ++----------
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 6f3807ee5..ad8093bdb 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -83,8 +83,7 @@ def makedir(self, relative_path):
 
         :param str relative_path: the path, without filename, relative to root
         """
-        full_path = posixpath.join(self.root, relative_path)
-        return self.execute_command(f"mkdir -p {full_path}")
+        raise NotImplementedError
 
     def execute_command(self, command):
         """Execute a command locally at the data access.
@@ -180,6 +179,14 @@ def move_to(self, file_name, to_dir, change_name_to=None):
         self.copy(src, dest)
         self.remove(src)
 
+    def makedir(self, relative_path):
+        """Create paths relative to the instance root
+
+        :param str relative_path: the path, without filename, relative to root
+        """
+        target = os.path.join(self.root, relative_path)
+        os.makedirs(target, exist_ok=True)
+
     def execute_command(self, command):
         """Execute a command locally at the data access.
 
@@ -395,6 +402,17 @@ def push(self, file_name, checksum, change_name_to=None):
                 print(e)
             raise IOError("Failed to push file - most likely a conflict was detected.")
 
+    def makedir(self, relative_path):
+        """Create paths relative to the instance root
+
+        :param str relative_path: the path, without filename, relative to root
+        :raises IOError: if command generated stderr
+        """
+        full_path = posixpath.join(self.root, relative_path)
+        _, _, stderr = self.execute_command(f"mkdir -p {full_path}")
+        if len(stderr.readlines()) != 0:
+            raise IOError("Failed to create %s on server" % full_path)
+
     def close(self):
         """Close the connection that was opened when the object was created."""
         self.ssh.close()
diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py
index 5036fb729..fffc1c381 100644
--- a/powersimdata/scenario/execute.py
+++ b/powersimdata/scenario/execute.py
@@ -334,22 +334,14 @@ def __init__(self, data_access, scenario_info, grid, ct):
         self.server_config = server_setup.PathConfig(server_setup.DATA_ROOT_DIR)
         self.scenario_folder = "scenario_%s" % scenario_info["id"]
 
-        self.TMP_DIR = posixpath.join(
-            self.server_config.execute_dir(), self.scenario_folder
-        )
         self.REL_TMP_DIR = posixpath.join(
             server_setup.EXECUTE_DIR, self.scenario_folder
         )
 
     def create_folder(self):
-        """Creates folder on server that will enclose simulation inputs.
-
-        :raises IOError: if folder cannot be created.
-        """
+        """Creates folder on server that will enclose simulation inputs."""
         print("--> Creating temporary folder on server for simulation inputs")
-        _, _, stderr = self._data_access.makedir(self.TMP_DIR)
-        if len(stderr.readlines()) != 0:
-            raise IOError("Failed to create %s on server" % self.TMP_DIR)
+        self._data_access.makedir(self.REL_TMP_DIR)
 
     def prepare_mpc_file(self):
         """Creates MATPOWER case file."""

From 7e13d82036192989efc1f154c46b32d069e2ac38 Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Thu, 22 Apr 2021 09:01:09 -0700
Subject: [PATCH 092/108] feat: add 'cost' method to upgrade prioritization
 (#452)

---
 powersimdata/design/transmission/upgrade.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/powersimdata/design/transmission/upgrade.py b/powersimdata/design/transmission/upgrade.py
index 410d28191..eee8c6cbb 100644
--- a/powersimdata/design/transmission/upgrade.py
+++ b/powersimdata/design/transmission/upgrade.py
@@ -1,5 +1,6 @@
 import pandas as pd
 
+from powersimdata.design.investment.investment_costs import _calculate_ac_inv_costs
 from powersimdata.input.grid import Grid
 from powersimdata.network.model import area_to_loadzone
 from powersimdata.utility.distance import haversine
@@ -262,14 +263,13 @@ def _identify_mesh_branch_upgrades(
     zero_length_value = 1  # miles
 
     # Validate method input
-    allowed_methods = ("branches", "MW", "MWmiles")
+    allowed_methods = ("branches", "MW", "MWmiles", "cost")
     if method not in allowed_methods:
         allowed_list = ", ".join(allowed_methods)
         raise ValueError(f"method must be one of: {allowed_list}")
 
     # Get raw congestion dual values, add them
-    rss = ref_scenario.state
-    ref_cong_abs = rss.get_congu() + rss.get_congl()
+    ref_cong_abs = ref_scenario.state.get_congu() + ref_scenario.state.get_congl()
     all_branches = set(ref_cong_abs.columns.tolist())
     # Create validated composite allow list
     composite_allow_list = _construct_composite_allow_list(
@@ -277,12 +277,12 @@ def _identify_mesh_branch_upgrades(
     )
 
     # Parse 2-D array to vector of quantile values
+    ref_cong_abs = ref_cong_abs.filter(items=composite_allow_list)
     quantile_cong_abs = ref_cong_abs.quantile(quantile)
     # Filter out insignificant values
     significance_bitmask = quantile_cong_abs > cong_significance_cutoff
     quantile_cong_abs = quantile_cong_abs.where(significance_bitmask).dropna()
     # Filter based on composite allow list
-    quantile_cong_abs = quantile_cong_abs.filter(items=composite_allow_list)
     congested_indices = list(quantile_cong_abs.index)
 
     # Ensure that we have enough congested branches to upgrade
@@ -293,6 +293,15 @@ def _identify_mesh_branch_upgrades(
         raise ValueError(err_msg)
 
     # Calculate selected metric for congested branches
+    if method == "cost":
+        # Calculate costs for an upgrade dataframe containing only composite_allow_list
+        base_grid = Grid(
+            ref_scenario.info["interconnect"], ref_scenario.info["grid_model"]
+        )
+        base_grid.branch = base_grid.branch.filter(items=congested_indices, axis=0)
+        upgrade_costs = _calculate_ac_inv_costs(base_grid, sum_results=False)
+        # Merge the individual line/transformer data into a single Series
+        merged_upgrade_costs = pd.concat([v.Cost for v in upgrade_costs.values()])
     if method in ("MW", "MWmiles"):
         ref_grid = ref_scenario.state.get_grid()
         branch_ratings = ref_grid.branch.loc[congested_indices, "rateA"]
@@ -315,6 +324,8 @@ def _identify_mesh_branch_upgrades(
         # Replace zero-length branches by designated default, don't divide by 0
         branch_lengths = branch_lengths.replace(0, value=zero_length_value)
         branch_metric = quantile_cong_abs / (branch_ratings * branch_lengths)
+    elif method == "cost":
+        branch_metric = quantile_cong_abs / merged_upgrade_costs
     else:
         # By process of elimination, all that's left is method 'branches'
         branch_metric = quantile_cong_abs

From 800008c5b52ac8b5017b5134f2bc7f868b4a7488 Mon Sep 17 00:00:00 2001
From: jon-hagg <66005238+jon-hagg@users.noreply.github.com>
Date: Thu, 22 Apr 2021 11:37:26 -0700
Subject: [PATCH 093/108] feat: os agnostic file existence check (#459)

---
 powersimdata/data_access/data_access.py | 33 ++++++++++++++++++++++---
 powersimdata/utility/helpers.py         |  8 ------
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index ad8093bdb..c51ffb502 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -56,6 +56,14 @@ def remove(self, target, recursive=False, force=False):
         command = CommandBuilder.remove(target, recursive, force)
         return self.execute_command(command)
 
+    def _exists(self, filepath):
+        """Return whether the file exists
+
+        :param str filepath: the path to the file
+        :return: (*bool*) -- whether the file exists
+        """
+        raise NotImplementedError
+
     def _check_file_exists(self, filepath, should_exist=True):
         """Check that file exists (or not) at the given path
 
@@ -63,11 +71,11 @@ def _check_file_exists(self, filepath, should_exist=True):
         :param bool should_exist: whether the file is expected to exist
         :raises OSError: if the expected condition is not met
         """
-        _, _, stderr = self.execute_command(CommandBuilder.list(filepath))
+        result = self._exists(filepath)
         compare = operator.ne if should_exist else operator.eq
-        if compare(len(stderr.readlines()), 0):
+        if compare(result, True):
             msg = "not found" if should_exist else "already exists"
-            raise OSError(f"{filepath} {msg} on server")
+            raise OSError(f"{filepath} {msg} on {self.description}")
 
     def _check_filename(self, filename):
         """Check that filename is only the name part
@@ -136,6 +144,7 @@ class LocalDataAccess(DataAccess):
 
     def __init__(self, root=None):
         self.root = root if root else server_setup.DATA_ROOT_DIR
+        self.description = "local machine"
 
     def copy_from(self, file_name, from_dir=None):
         """Copy a file from data store to userspace.
@@ -219,6 +228,14 @@ def get_profile_version(self, grid_model, kind):
         local_version = ProfileHelper.get_profile_version_local(grid_model, kind)
         return list(set(blob_version + local_version))
 
+    def _exists(self, filepath):
+        """Return whether the file exists
+
+        :param str filepath: the path to the file
+        :return: (*bool*) -- whether the file exists
+        """
+        return os.path.exists(filepath)
+
 
 class SSHDataAccess(DataAccess):
     """Interface to a remote data store, accessed via SSH."""
@@ -231,6 +248,7 @@ def __init__(self, root=None):
         self._retry_after = 5
         self.root = server_setup.DATA_ROOT_DIR if root is None else root
         self.local_root = server_setup.LOCAL_DIR
+        self.description = "server"
 
     @property
     def ssh(self):
@@ -413,6 +431,15 @@ def makedir(self, relative_path):
         if len(stderr.readlines()) != 0:
             raise IOError("Failed to create %s on server" % full_path)
 
+    def _exists(self, filepath):
+        """Return whether the file exists
+
+        :param str filepath: the path to the file
+        :return: (*bool*) -- whether the file exists
+        """
+        _, _, stderr = self.execute_command(f"ls {filepath}")
+        return len(stderr.readlines()) == 0
+
     def close(self):
         """Close the connection that was opened when the object was created."""
         self.ssh.close()
diff --git a/powersimdata/utility/helpers.py b/powersimdata/utility/helpers.py
index a56534cb9..dc1752f59 100644
--- a/powersimdata/utility/helpers.py
+++ b/powersimdata/utility/helpers.py
@@ -39,14 +39,6 @@ def remove(target, recursive=False, force=False):
             return f"rm {flags} {target}"
         return f"rm {target}"
 
-    @staticmethod
-    def list(path):
-        """Builds an ls command
-
-        :param str path: the path argument
-        """
-        return f"ls {path}"
-
 
 class MemoryCache:
     """Wrapper around a dict object that exposes a cache interface. Users should

From 68005c18fb05ba6a4edc74af4698917b6fd705f0 Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Sat, 24 Apr 2021 07:00:15 -0700
Subject: [PATCH 094/108] fix: enable AC investment costs to be calculated for
 branches connected to new buses (#462)

---
 .../design/investment/investment_costs.py     | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index 4ff1aa612..e56e0b9da 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -159,21 +159,24 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
     # otherwise re-run mapping script on mis-matching buses. These buses are missing
     # in region file
     bus = grid_new.bus
-    bus_fix_index = bus[~bus.index.isin(bus_reg.index)].index
-    bus_mask = bus[~bus.index.isin(bus_fix_index)]
-    bus_mask = merge_keep_index(bus_mask, bus_reg, how="left", on="bus_id")
+    mapped_buses = bus.query("index in @bus_reg.index")
+    missing_bus_indices = set(bus.index) - set(bus_reg.index)
+    mapped_buses = merge_keep_index(mapped_buses, bus_reg, how="left", on="bus_id")
     # these buses have incorrect lat/lon values in the region mapping file.
     #   re-running the region mapping script on those buses only.
-    bus_fix_index2 = bus_mask[
-        ~np.isclose(bus_mask.lat_x, bus_mask.lat_y)
-        | ~np.isclose(bus_mask.lon_x, bus_mask.lon_y)
+    misaligned_bus_indices = mapped_buses[
+        ~np.isclose(mapped_buses.lat_x, mapped_buses.lat_y)
+        | ~np.isclose(mapped_buses.lon_x, mapped_buses.lon_y)
     ].index
-    bus_fix_index_all = bus_fix_index.tolist() + bus_fix_index2.tolist()
+    all_buses_to_fix = set(missing_bus_indices) | set(misaligned_bus_indices)
     # fix the identified buses, if necessary
-    if len(bus_fix_index_all) > 0:
-        bus_fix = bus_to_neem_reg(bus[bus.index.isin(bus_fix_index_all)])
+    if len(all_buses_to_fix) > 0:
+        bus_fix = bus_to_neem_reg(bus.query("index in @all_buses_to_fix"))
         fix_cols = ["name_abbr", "lat", "lon"]
-        bus_reg.loc[bus_reg.index.isin(bus_fix.index), fix_cols] = bus_fix[fix_cols]
+        corrected_bus_mappings = bus_fix.loc[misaligned_bus_indices, fix_cols]
+        new_bus_mappings = bus_fix.loc[missing_bus_indices, fix_cols]
+        bus_reg.loc[misaligned_bus_indices, fix_cols] = corrected_bus_mappings
+        bus_reg = append_keep_index_name(bus_reg, new_bus_mappings)
 
     bus_reg.drop(["lat", "lon"], axis=1, inplace=True)
 

From 4d88dd2eef712761eda012ceecdd834760e679a8 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Thu, 22 Apr 2021 15:51:30 -0700
Subject: [PATCH 095/108] feat: os agnostic rm functionality

---
 powersimdata/data_access/data_access.py       | 31 ++++++++++++++++---
 ...t_transfer_data.py => test_data_access.py} |  0
 powersimdata/scenario/delete.py               | 12 ++-----
 powersimdata/scenario/move.py                 | 15 ++++-----
 powersimdata/utility/helpers.py               | 12 +++----
 powersimdata/utility/tests/test_helpers.py    |  8 ++---
 6 files changed, 44 insertions(+), 34 deletions(-)
 rename powersimdata/data_access/tests/{test_transfer_data.py => test_data_access.py} (100%)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index c51ffb502..2f1b2b283 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -1,3 +1,4 @@
+import glob
 import operator
 import os
 import posixpath
@@ -46,15 +47,13 @@ def copy(self, src, dest, recursive=False, update=False):
         command = CommandBuilder.copy(src, dest, recursive, update)
         return self.execute_command(command)
 
-    def remove(self, target, recursive=False, force=False):
+    def remove(self, target, recursive=False):
         """Wrapper around rm command
 
         :param str target: path to remove
         :param bool recursive: delete directories recursively
-        :param bool force: remove without confirmation
         """
-        command = CommandBuilder.remove(target, recursive, force)
-        return self.execute_command(command)
+        raise NotImplementedError
 
     def _exists(self, filepath):
         """Return whether the file exists
@@ -196,6 +195,18 @@ def makedir(self, relative_path):
         target = os.path.join(self.root, relative_path)
         os.makedirs(target, exist_ok=True)
 
+    def remove(self, target, recursive=False):
+        """Remove target using rm semantics
+
+        :param str target: path to remove
+        :param bool recursive: delete directories recursively
+        """
+        if recursive:
+            target = os.path.join(target, "**")
+        files = glob.glob(target, recursive=recursive)
+        for f in files:
+            os.remove(f)
+
     def execute_command(self, command):
         """Execute a command locally at the data access.
 
@@ -431,6 +442,18 @@ def makedir(self, relative_path):
         if len(stderr.readlines()) != 0:
             raise IOError("Failed to create %s on server" % full_path)
 
+    def remove(self, target, recursive=False):
+        """Run rm command on server
+
+        :param str target: path to remove
+        :param bool recursive: delete directories recursively
+        :raises IOError: if command generated stderr
+        """
+        command = CommandBuilder.remove(target, recursive)
+        _, _, stderr = self.execute_command(command)
+        if len(stderr.readlines()) != 0:
+            raise IOError(f"Failed to delete target={target} on server")
+
     def _exists(self, filepath):
         """Return whether the file exists
 
diff --git a/powersimdata/data_access/tests/test_transfer_data.py b/powersimdata/data_access/tests/test_data_access.py
similarity index 100%
rename from powersimdata/data_access/tests/test_transfer_data.py
rename to powersimdata/data_access/tests/test_data_access.py
diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py
index d00cd9981..d77020a60 100644
--- a/powersimdata/scenario/delete.py
+++ b/powersimdata/scenario/delete.py
@@ -43,25 +43,19 @@ def delete_scenario(self):
         # Delete links to base profiles on server
         print("--> Deleting scenario input data on server")
         target = posixpath.join(self.path_config.input_dir(), wildcard)
-        _, _, stderr = self._data_access.remove(target, recursive=False, force=True)
-        if len(stderr.readlines()) != 0:
-            raise IOError("Failed to delete scenario input data on server")
+        self._data_access.remove(target, recursive=False)
 
         # Delete output profiles
         print("--> Deleting scenario output data on server")
         target = posixpath.join(self.path_config.output_dir(), wildcard)
-        _, _, stderr = self._data_access.remove(target, recursive=False, force=True)
-        if len(stderr.readlines()) != 0:
-            raise IOError("Failed to delete scenario output data on server")
+        self._data_access.remove(target, recursive=False)
 
         # Delete temporary folder enclosing simulation inputs
         print("--> Deleting temporary folder on server")
         tmp_dir = posixpath.join(
             self.path_config.execute_dir(), f"scenario_{scenario_id}"
         )
-        _, _, stderr = self._data_access.remove(tmp_dir, recursive=True, force=True)
-        if len(stderr.readlines()) != 0:
-            raise IOError("Failed to delete temporary folder on server")
+        self._data_access.remove(tmp_dir, recursive=True)
 
         # Delete local files
         print("--> Deleting input and output data on local machine")
diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py
index 657d8d827..344ff9dd7 100644
--- a/powersimdata/scenario/move.py
+++ b/powersimdata/scenario/move.py
@@ -67,36 +67,37 @@ def __init__(self, data_access, scenario_info):
         self._scenario_info = scenario_info
         self.backup_config = server_setup.PathConfig(server_setup.BACKUP_DATA_ROOT_DIR)
         self.server_config = server_setup.PathConfig(server_setup.DATA_ROOT_DIR)
+        self.scenario_id = self._scenario_info["id"]
+        self.wildcard = f"{self.scenario_id}_*"
 
     def move_input_data(self):
         """Moves input data."""
         print("--> Moving scenario input data to backup disk")
         source = posixpath.join(
             self.server_config.input_dir(),
-            self._scenario_info["id"] + "_*",
+            self.wildcard,
         )
         target = self.backup_config.input_dir()
         self._data_access.copy(source, target, update=True)
-        self._data_access.remove(source, recursive=True, force=True)
+        self._data_access.remove(source, recursive=False)
 
     def move_output_data(self):
         """Moves output data"""
         print("--> Moving scenario output data to backup disk")
         source = posixpath.join(
             self.server_config.output_dir(),
-            self._scenario_info["id"] + "_*",
+            self.wildcard,
         )
         target = self.backup_config.output_dir()
         self._data_access.copy(source, target, update=True)
-        self._data_access.remove(source, recursive=True, force=True)
+        self._data_access.remove(source, recursive=False)
 
     def move_temporary_folder(self):
         """Moves temporary folder."""
         print("--> Moving temporary folder to backup disk")
         source = posixpath.join(
-            self.server_config.execute_dir(),
-            "scenario_" + self._scenario_info["id"],
+            self.server_config.execute_dir(), "scenario_" + self.scenario_id
         )
         target = self.backup_config.execute_dir()
         self._data_access.copy(source, target, recursive=True, update=True)
-        self._data_access.remove(source, recursive=True, force=True)
+        self._data_access.remove(source, recursive=True)
diff --git a/powersimdata/utility/helpers.py b/powersimdata/utility/helpers.py
index dc1752f59..13c450277 100644
--- a/powersimdata/utility/helpers.py
+++ b/powersimdata/utility/helpers.py
@@ -25,19 +25,15 @@ def copy(src, dest, recursive=False, update=False):
         return fr"\cp {flags} {src} {dest}"
 
     @staticmethod
-    def remove(target, recursive=False, force=False):
+    def remove(target, recursive=False):
         """Builds a rm command with some options
 
         :param str target: the path or file to be removed
         :param bool recursive: whether to pass -r option
-        :param bool force: whether to pass -f option
         """
-        r_flag = "r" if recursive else ""
-        f_flag = "f" if force else ""
-        if recursive or force:
-            flags = f"-{r_flag}{f_flag}"
-            return f"rm {flags} {target}"
-        return f"rm {target}"
+        if recursive:
+            return f"rm -rf {target}"
+        return f"rm -f {target}"
 
 
 class MemoryCache:
diff --git a/powersimdata/utility/tests/test_helpers.py b/powersimdata/utility/tests/test_helpers.py
index d6cea1552..c90abbdb6 100644
--- a/powersimdata/utility/tests/test_helpers.py
+++ b/powersimdata/utility/tests/test_helpers.py
@@ -98,14 +98,10 @@ def test_copy_command():
 
 
 def test_remove_command():
-    expected = "rm target"
+    expected = "rm -f target"
     command = CommandBuilder.remove("target")
     assert expected == command
 
-    expected = "rm -r target"
-    command = CommandBuilder.remove("target", recursive=True)
-    assert expected == command
-
     expected = "rm -rf target"
-    command = CommandBuilder.remove("target", recursive=True, force=True)
+    command = CommandBuilder.remove("target", recursive=True)
     assert expected == command

From c32d3a48386d0d6652e8965eb007946c575572fa Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Thu, 22 Apr 2021 18:28:22 -0700
Subject: [PATCH 096/108] feat: add confirmation prompt for delete operations

---
 powersimdata/data_access/data_access.py | 25 +++++++++++++++++++++----
 powersimdata/scenario/delete.py         | 22 +++++++++++-----------
 powersimdata/scenario/move.py           | 21 +++++++++++----------
 3 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 2f1b2b283..9466191a6 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -47,11 +47,12 @@ def copy(self, src, dest, recursive=False, update=False):
         command = CommandBuilder.copy(src, dest, recursive, update)
         return self.execute_command(command)
 
-    def remove(self, target, recursive=False):
+    def remove(self, target, recursive=False, confirm=True):
         """Wrapper around rm command
 
         :param str target: path to remove
         :param bool recursive: delete directories recursively
+        :param bool confirm: prompt before executing command
         """
         raise NotImplementedError
 
@@ -195,17 +196,26 @@ def makedir(self, relative_path):
         target = os.path.join(self.root, relative_path)
         os.makedirs(target, exist_ok=True)
 
-    def remove(self, target, recursive=False):
+    def remove(self, target, recursive=False, confirm=True):
         """Remove target using rm semantics
 
         :param str target: path to remove
         :param bool recursive: delete directories recursively
+        :param bool confirm: prompt before executing command
         """
-        if recursive:
+        if recursive and "**" not in target:
             target = os.path.join(target, "**")
         files = glob.glob(target, recursive=recursive)
+        if confirm:
+            print("This will delete the following files:")
+            print(files)
+            confirmed = input("Proceed? [y/n] (default is 'n')")
+            if confirmed.lower() != "y":
+                print("Operation cancelled.")
+                return
         for f in files:
             os.remove(f)
+        print("--> Done!")
 
     def execute_command(self, command):
         """Execute a command locally at the data access.
@@ -442,17 +452,24 @@ def makedir(self, relative_path):
         if len(stderr.readlines()) != 0:
             raise IOError("Failed to create %s on server" % full_path)
 
-    def remove(self, target, recursive=False):
+    def remove(self, target, recursive=False, confirm=True):
         """Run rm command on server
 
         :param str target: path to remove
         :param bool recursive: delete directories recursively
+        :param bool confirm: prompt before executing command
         :raises IOError: if command generated stderr
         """
         command = CommandBuilder.remove(target, recursive)
+        if confirm:
+            confirmed = input(f"Execute '{command}'? [y/n] (default is 'n')")
+            if confirmed.lower() != "y":
+                print("Operation cancelled.")
+                return
         _, _, stderr = self.execute_command(command)
         if len(stderr.readlines()) != 0:
             raise IOError(f"Failed to delete target={target} on server")
+        print("--> Done!")
 
     def _exists(self, filepath):
         """Return whether the file exists
diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py
index d77020a60..2f8fc0886 100644
--- a/powersimdata/scenario/delete.py
+++ b/powersimdata/scenario/delete.py
@@ -1,7 +1,7 @@
-import glob
 import os
 import posixpath
 
+from powersimdata.data_access.data_access import LocalDataAccess
 from powersimdata.scenario.state import State
 from powersimdata.utility import server_setup
 
@@ -30,8 +30,11 @@ def print_scenario_info(self):
         except AttributeError:
             print("Scenario has been deleted")
 
-    def delete_scenario(self):
-        """Deletes scenario on server."""
+    def delete_scenario(self, confirm=True):
+        """Deletes scenario on server.
+
+        :param bool confirm: prompt before each batch
+        """
 
         # Delete entry in scenario list
         scenario_id = self._scenario_info["id"]
@@ -43,27 +46,24 @@ def delete_scenario(self):
         # Delete links to base profiles on server
         print("--> Deleting scenario input data on server")
         target = posixpath.join(self.path_config.input_dir(), wildcard)
-        self._data_access.remove(target, recursive=False)
+        self._data_access.remove(target, recursive=False, confirm=confirm)
 
         # Delete output profiles
         print("--> Deleting scenario output data on server")
         target = posixpath.join(self.path_config.output_dir(), wildcard)
-        self._data_access.remove(target, recursive=False)
+        self._data_access.remove(target, recursive=False, confirm=confirm)
 
         # Delete temporary folder enclosing simulation inputs
         print("--> Deleting temporary folder on server")
         tmp_dir = posixpath.join(
             self.path_config.execute_dir(), f"scenario_{scenario_id}"
         )
-        self._data_access.remove(tmp_dir, recursive=True)
+        self._data_access.remove(tmp_dir, recursive=True, confirm=confirm)
 
         # Delete local files
         print("--> Deleting input and output data on local machine")
-        local_file = glob.glob(
-            os.path.join(server_setup.LOCAL_DIR, "data", "**", wildcard)
-        )
-        for f in local_file:
-            os.remove(f)
+        target = os.path.join(server_setup.LOCAL_DIR, "data", "**", wildcard)
+        LocalDataAccess().remove(target, recursive=True, confirm=confirm)
 
         # Delete attributes
         self._clean()
diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py
index 344ff9dd7..e2916ed6c 100644
--- a/powersimdata/scenario/move.py
+++ b/powersimdata/scenario/move.py
@@ -25,10 +25,11 @@ def print_scenario_info(self):
         for key, val in self._scenario_info.items():
             print("%s: %s" % (key, val))
 
-    def move_scenario(self, target="disk"):
+    def move_scenario(self, target="disk", confirm=True):
         """Move scenario.
 
         :param str target: optional argument specifying the backup system.
+        :param bool confirm: prompt before deleting each batch of files
         """
         if not isinstance(target, str):
             raise TypeError("string is expected for optional argument target")
@@ -38,9 +39,9 @@ def move_scenario(self, target="disk"):
 
         backup = BackUpDisk(self._data_access, self._scenario_info)
 
-        backup.move_input_data()
-        backup.move_output_data()
-        backup.move_temporary_folder()
+        backup.move_input_data(confirm=confirm)
+        backup.move_output_data(confirm=confirm)
+        backup.move_temporary_folder(confirm=confirm)
 
         sid = self._scenario_info["id"]
         self._execute_list_manager.set_status(sid, "moved")
@@ -70,7 +71,7 @@ def __init__(self, data_access, scenario_info):
         self.scenario_id = self._scenario_info["id"]
         self.wildcard = f"{self.scenario_id}_*"
 
-    def move_input_data(self):
+    def move_input_data(self, confirm=True):
         """Moves input data."""
         print("--> Moving scenario input data to backup disk")
         source = posixpath.join(
@@ -79,9 +80,9 @@ def move_input_data(self):
         )
         target = self.backup_config.input_dir()
         self._data_access.copy(source, target, update=True)
-        self._data_access.remove(source, recursive=False)
+        self._data_access.remove(source, recursive=False, confirm=confirm)
 
-    def move_output_data(self):
+    def move_output_data(self, confirm=True):
         """Moves output data"""
         print("--> Moving scenario output data to backup disk")
         source = posixpath.join(
@@ -90,9 +91,9 @@ def move_output_data(self):
         )
         target = self.backup_config.output_dir()
         self._data_access.copy(source, target, update=True)
-        self._data_access.remove(source, recursive=False)
+        self._data_access.remove(source, recursive=False, confirm=confirm)
 
-    def move_temporary_folder(self):
+    def move_temporary_folder(self, confirm=True):
         """Moves temporary folder."""
         print("--> Moving temporary folder to backup disk")
         source = posixpath.join(
@@ -100,4 +101,4 @@ def move_temporary_folder(self):
         )
         target = self.backup_config.execute_dir()
         self._data_access.copy(source, target, recursive=True, update=True)
-        self._data_access.remove(source, recursive=True)
+        self._data_access.remove(source, recursive=True, confirm=confirm)

From 20e1bbd3da0daea7397e1d81778da9a2989d3e16 Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Fri, 23 Apr 2021 14:56:39 -0700
Subject: [PATCH 097/108] fix: more predictable recursive delete

---
 powersimdata/data_access/data_access.py | 15 +++++++--------
 powersimdata/scenario/delete.py         |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py
index 9466191a6..34b550e62 100644
--- a/powersimdata/data_access/data_access.py
+++ b/powersimdata/data_access/data_access.py
@@ -203,18 +203,17 @@ def remove(self, target, recursive=False, confirm=True):
         :param bool recursive: delete directories recursively
         :param bool confirm: prompt before executing command
         """
-        if recursive and "**" not in target:
-            target = os.path.join(target, "**")
-        files = glob.glob(target, recursive=recursive)
         if confirm:
-            print("This will delete the following files:")
-            print(files)
-            confirmed = input("Proceed? [y/n] (default is 'n')")
+            confirmed = input(f"Delete {target}? [y/n] (default is 'n')")
             if confirmed.lower() != "y":
                 print("Operation cancelled.")
                 return
-        for f in files:
-            os.remove(f)
+        if recursive:
+            shutil.rmtree(target)
+        else:
+            files = [f for f in glob.glob(target) if os.path.isfile(f)]
+            for f in files:
+                os.remove(f)
         print("--> Done!")
 
     def execute_command(self, command):
diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py
index 2f8fc0886..b8f93ac34 100644
--- a/powersimdata/scenario/delete.py
+++ b/powersimdata/scenario/delete.py
@@ -63,7 +63,7 @@ def delete_scenario(self, confirm=True):
         # Delete local files
         print("--> Deleting input and output data on local machine")
         target = os.path.join(server_setup.LOCAL_DIR, "data", "**", wildcard)
-        LocalDataAccess().remove(target, recursive=True, confirm=confirm)
+        LocalDataAccess().remove(target, recursive=False, confirm=confirm)
 
         # Delete attributes
         self._clean()

From 5809d3c590c721771fc569d882541dcfd5abb90d Mon Sep 17 00:00:00 2001
From: Jon Hagg <jon.hagg@breakthroughenergy.org>
Date: Mon, 26 Apr 2021 13:10:44 -0700
Subject: [PATCH 098/108] docs: cleanup comments and docstrings

---
 powersimdata/scenario/delete.py | 3 ---
 powersimdata/scenario/move.py   | 4 +++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py
index b8f93ac34..5be2c455c 100644
--- a/powersimdata/scenario/delete.py
+++ b/powersimdata/scenario/delete.py
@@ -43,12 +43,10 @@ def delete_scenario(self, confirm=True):
 
         wildcard = f"{scenario_id}_*"
 
-        # Delete links to base profiles on server
         print("--> Deleting scenario input data on server")
         target = posixpath.join(self.path_config.input_dir(), wildcard)
         self._data_access.remove(target, recursive=False, confirm=confirm)
 
-        # Delete output profiles
         print("--> Deleting scenario output data on server")
         target = posixpath.join(self.path_config.output_dir(), wildcard)
         self._data_access.remove(target, recursive=False, confirm=confirm)
@@ -60,7 +58,6 @@ def delete_scenario(self, confirm=True):
         )
         self._data_access.remove(tmp_dir, recursive=True, confirm=confirm)
 
-        # Delete local files
         print("--> Deleting input and output data on local machine")
         target = os.path.join(server_setup.LOCAL_DIR, "data", "**", wildcard)
         LocalDataAccess().remove(target, recursive=False, confirm=confirm)
diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py
index e2916ed6c..ee5eb840d 100644
--- a/powersimdata/scenario/move.py
+++ b/powersimdata/scenario/move.py
@@ -30,6 +30,8 @@ def move_scenario(self, target="disk", confirm=True):
 
         :param str target: optional argument specifying the backup system.
         :param bool confirm: prompt before deleting each batch of files
+        :raises TypeError: if target is not a str
+        :raises ValueError: if target is unknown (only "disk" is supported)
         """
         if not isinstance(target, str):
             raise TypeError("string is expected for optional argument target")
@@ -59,7 +61,7 @@ class BackUpDisk(object):
 
     :param powersimdata.data_access.data_access.DataAccess data_access:
         data access object.
-    :param dict scenario: scenario information.
+    :param dict scenario_info: scenario information.
     """
 
     def __init__(self, data_access, scenario_info):

From ea72d87df1591a68daa92b0ea0c9159ab767f61d Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Wed, 21 Apr 2021 18:33:20 -0700
Subject: [PATCH 099/108] refactor: add info attribute for Scenario in Create
 state

---
 powersimdata/scenario/create.py   | 20 +-------------------
 powersimdata/scenario/scenario.py | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py
index 3e452cfd3..bb11e27b5 100644
--- a/powersimdata/scenario/create.py
+++ b/powersimdata/scenario/create.py
@@ -1,7 +1,6 @@
 import copy
 import pickle
 import warnings
-from collections import OrderedDict
 
 import numpy as np
 import pandas as pd
@@ -39,24 +38,7 @@ def __init__(self, scenario):
         self.grid = None
         self.ct = None
         self._scenario_status = None
-        self._scenario_info = OrderedDict(
-            [
-                ("plan", ""),
-                ("name", ""),
-                ("state", "create"),
-                ("grid_model", ""),
-                ("interconnect", ""),
-                ("base_demand", ""),
-                ("base_hydro", ""),
-                ("base_solar", ""),
-                ("base_wind", ""),
-                ("change_table", ""),
-                ("start_date", ""),
-                ("end_date", ""),
-                ("interval", ""),
-                ("engine", ""),
-            ]
-        )
+        self._scenario_info = scenario.info
         self.exported_methods = set(self.default_exported_methods)
         super().__init__(scenario)
 
diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py
index 3181a1491..63e804b5e 100644
--- a/powersimdata/scenario/scenario.py
+++ b/powersimdata/scenario/scenario.py
@@ -1,3 +1,5 @@
+from collections import OrderedDict
+
 import pandas as pd
 
 from powersimdata.data_access.context import Context
@@ -25,6 +27,22 @@ class Scenario(object):
         "_scenario_list_manager",
         "_execute_list_manager",
     }
+    _default_info = [
+        ("plan", ""),
+        ("name", ""),
+        ("state", "create"),
+        ("grid_model", ""),
+        ("interconnect", ""),
+        ("base_demand", ""),
+        ("base_hydro", ""),
+        ("base_solar", ""),
+        ("base_wind", ""),
+        ("change_table", ""),
+        ("start_date", ""),
+        ("end_date", ""),
+        ("interval", ""),
+        ("engine", ""),
+    ]
 
     def __init__(self, descriptor=None):
         """Constructor."""
@@ -38,6 +56,7 @@ def __init__(self, descriptor=None):
         self._execute_list_manager = ExecuteListManager(self.data_access)
 
         if not descriptor:
+            self.info = OrderedDict(self._default_info)
             self.state = Create(self)
         else:
             self._set_info(descriptor)

From 314c982402dc38d0954a56490b37a09f226d98fa Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 22 Apr 2021 17:58:05 -0700
Subject: [PATCH 100/108] feat: allow MW-miles calculation for new branches

---
 powersimdata/design/transmission/mwmiles.py | 31 ++++++++++++++-------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/powersimdata/design/transmission/mwmiles.py b/powersimdata/design/transmission/mwmiles.py
index f9ffa9a7b..d89d35088 100644
--- a/powersimdata/design/transmission/mwmiles.py
+++ b/powersimdata/design/transmission/mwmiles.py
@@ -1,4 +1,5 @@
 from powersimdata.input.grid import Grid
+from powersimdata.input.transform_grid import TransformGrid
 from powersimdata.utility.distance import haversine
 
 
@@ -51,20 +52,30 @@ def _calculate_mw_miles(original_grid, ct, exclude_branches=None):
     else:
         raise TypeError("exclude_branches must be None, list, tuple, or set")
 
-    base_branch = original_grid.branch
-    upgraded_branches = ct["branch"]["branch_id"]
-    for b, v in upgraded_branches.items():
+    base_branch_ids = set(original_grid.branch.index)
+    upgraded_branch_ids = set(ct["branch"]["branch_id"].keys())
+    transformed_branch = TransformGrid(original_grid, ct).get_grid().branch
+    if "new_branch" in ct:
+        upgraded_branch_ids |= set(transformed_branch.index) - base_branch_ids
+    for b in upgraded_branch_ids:
         if b in exclude_branches:
             continue
-        # 'upgraded' capacity is v-1 because a scale of 1 = an upgrade of 0
-        upgraded_capacity = base_branch.loc[b, "rateA"] * (v - 1)
-        device_type = base_branch.loc[b, "branch_device_type"]
+        if b in base_branch_ids:
+            # 'upgraded' capacity is (scale - 1) because a scale of 1 = an upgrade of 0
+            scale = ct["branch"]["branch_id"][b]
+            upgraded_capacity = transformed_branch.loc[b, "rateA"] / scale * (scale - 1)
+        else:
+            upgraded_capacity = transformed_branch.loc[b, "rateA"]
+        device_type = transformed_branch.loc[b, "branch_device_type"]
         if device_type == "Line":
             from_coords = (
-                base_branch.loc[b, "from_lat"],
-                base_branch.loc[b, "from_lon"],
+                transformed_branch.loc[b, "from_lat"],
+                transformed_branch.loc[b, "from_lon"],
+            )
+            to_coords = (
+                transformed_branch.loc[b, "to_lat"],
+                transformed_branch.loc[b, "to_lon"],
             )
-            to_coords = (base_branch.loc[b, "to_lat"], base_branch.loc[b, "to_lon"])
             addtl_mw_miles = upgraded_capacity * haversine(from_coords, to_coords)
             upgrades["mw_miles"] += addtl_mw_miles
             upgrades["num_lines"] += 1
@@ -75,6 +86,6 @@ def _calculate_mw_miles(original_grid, ct, exclude_branches=None):
             upgrades["transformer_mw"] += upgraded_capacity
             upgrades["num_transformers"] += 1
         else:
-            raise Exception("Unknown branch: " + str(b))
+            raise Exception("Unknown branch type: " + str(b))
 
     return upgrades

From c599b9f51da81aa24c686113fb11483475f8233d Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Thu, 22 Apr 2021 18:55:36 -0700
Subject: [PATCH 101/108] chore: update MW-miles test to include impedance data

---
 powersimdata/design/transmission/tests/test_mwmiles.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/powersimdata/design/transmission/tests/test_mwmiles.py b/powersimdata/design/transmission/tests/test_mwmiles.py
index 6e396df3f..0fcbff3c7 100644
--- a/powersimdata/design/transmission/tests/test_mwmiles.py
+++ b/powersimdata/design/transmission/tests/test_mwmiles.py
@@ -14,6 +14,7 @@
     "to_lat": [37.78, 47.66, 47.61, 47.61, 47.61],
     "to_lon": [-122.42, -117.43, -122.33, -122.33, -122.33],
     "branch_device_type": 2 * ["Line"] + 3 * ["Transformer"],
+    "x": 5 * [1],
 }
 
 expected_keys = {"mw_miles", "transformer_mw", "num_lines", "num_transformers"}

From f96d5ce2e06720bfeb17b4381c1a802dc05b186e Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Tue, 27 Apr 2021 07:10:16 -0700
Subject: [PATCH 102/108] doc/refactor: improve documentation and usability of
 investment costs (#465)

* doc: update documentation to reflect that inflation calculations are included

* refactor: change return type to Series for non-summed generators, branches

* test: refactor tests to expect new return type

* refactor: change cost prioritization to match new call signature
---
 .../design/investment/investment_costs.py     | 70 ++++++++++++-------
 .../investment/tests/test_investment_costs.py |  6 +-
 powersimdata/design/transmission/upgrade.py   |  2 +-
 3 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py
index e56e0b9da..d673e7476 100644
--- a/powersimdata/design/investment/investment_costs.py
+++ b/powersimdata/design/investment/investment_costs.py
@@ -45,8 +45,11 @@ def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None):
     NEEM regions are used to find regional multipliers.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param bool sum_results: sum data frame for each branch type.
-    :return: (*dict*) -- cost of upgrading branches in $2010.
+    :param bool sum_results: whether to sum data frame for each branch type. Defaults to
+        True.
+    :return: (*dict*) -- keys are {'line_cost', 'transformer_cost'}, values are either
+        float if ``sum_results``, or pandas Series indexed by branch ID.
+        Whether summed or not, values are $USD, inflation-adjusted to today.
     """
 
     base_grid = Grid(scenario.info["interconnect"].split("_"))
@@ -72,8 +75,11 @@ def _calculate_ac_inv_costs(grid_new, sum_results=True):
     as a transformer.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param bool sum_results: sum data frame for each branch type.
-    :return: (*dict*) -- cost of upgrading branches in $2010.
+    :param bool sum_results: whether to sum data frame for each branch type. Defaults to
+        True.
+    :return: (*dict*) -- keys are {'line_cost', 'transformer_cost'}, values are either
+        float if ``sum_results``, or pandas Series indexed by branch ID.
+        Whether summed or not, values are $USD, inflation-adjusted to today.
     """
 
     def select_mw(x, cost_df):
@@ -211,7 +217,7 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
     lines.loc[:, "MWmi"] = lines["lengthMi"] * lines["rateA"]
 
     # calculate cost of each line
-    lines.loc[:, "Cost"] = lines["MWmi"] * lines["costMWmi"] * lines["mult"]
+    lines.loc[:, "cost"] = lines["MWmi"] * lines["costMWmi"] * lines["mult"]
 
     # calculate transformer costs
     if len(transformers) > 0:
@@ -234,27 +240,30 @@ def get_branch_mult(x, bus_reg, ac_reg_mult, branch_lookup_alerted=set()):
         transformers["per_MW_cost"] = []
         transformers["mult"] = []
 
-    transformers["Cost"] = (
+    transformers["cost"] = (
         transformers["rateA"] * transformers["per_MW_cost"] * transformers["mult"]
     )
 
-    lines.Cost *= calculate_inflation(2010)
-    transformers.Cost *= calculate_inflation(2020)
+    lines.cost *= calculate_inflation(2010)
+    transformers.cost *= calculate_inflation(2020)
     if sum_results:
         return {
-            "line_cost": lines.Cost.sum(),
-            "transformer_cost": transformers.Cost.sum(),
+            "line_cost": lines.cost.sum(),
+            "transformer_cost": transformers.cost.sum(),
         }
     else:
-        return {"line_cost": lines, "transformer_cost": transformers}
+        return {"line_cost": lines.cost, "transformer_cost": transformers.cost}
 
 
 def calculate_dc_inv_costs(scenario, sum_results=True):
     """Calculate cost of upgrading HVDC lines in a scenario.
 
     :param powersimdata.scenario.scenario.Scenario scenario: scenario instance.
-    :param bool sum_results: sum series to return total cost.
-    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015.
+    :param bool sum_results: whether to sum series to return total cost. Defaults to
+        True.
+    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines, in $USD,
+        inflation-adjusted to today. If ``sum_results``, a float is returned, otherwise
+        a Series.
     """
     base_grid = Grid(scenario.info["interconnect"].split("_"))
     grid = scenario.state.get_grid()
@@ -274,8 +283,11 @@ def _calculate_dc_inv_costs(grid_new, sum_results=True):
     """Calculate cost of upgrading HVDC lines.
 
     :param powersimdata.input.grid.Grid grid_new: grid instance.
-    :param bool sum_results: sum series to return total cost.
-    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015.
+    :param bool sum_results: whether to sum series to return total cost. Defaults to
+        True.
+    :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines, in $USD,
+        inflation-adjusted to today. If ``sum_results``, a float is returned, otherwise
+        a Series.
     """
 
     def _calculate_single_line_cost(line, bus):
@@ -321,8 +333,13 @@ def calculate_gen_inv_costs(scenario, year, cost_case, sum_results=True):
     :param int/str year: building year.
     :param str cost_case: ATB cost case of data. *'Moderate'*: mid cost case,
         *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs
-    :return: (*pandas.DataFrame*) -- total generation investment cost summed by
-        technology.
+    :param bool sum_results: whether to sum data frame for plant costs. Defaults to
+        True.
+    :return: (*pandas.Series*) -- Overnight generation investment cost.
+        If ``sum_results``, indices are technologies and values are total cost.
+        Otherwise, indices are IDs of plants (including storage, which is given
+        pseudo-plant-IDs), and values are individual generator costs.
+        Whether summed or not, values are $USD, inflation-adjusted to today.
 
     .. todo:: it currently uses one (arbitrary) sub-technology. The rest of the costs
         are dropped. Wind and solar will need to be fixed based on the resource supply
@@ -364,11 +381,16 @@ def _calculate_gen_inv_costs(grid_new, year, cost_case, sum_results=True):
         *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs.
     :raises ValueError: if year not 2020 - 2050, or cost case not an allowed option.
     :raises TypeError: if year not int/str or cost_case not str.
-    :return: (*pandas.Series*) -- total generation investment cost, summed by
-        technology.
+    :param bool sum_results: whether to sum data frame for plant costs. Defaults to
+        True.
+    :return: (*pandas.Series*) -- Overnight generation investment cost.
+        If ``sum_results``, indices are technologies and values are total cost.
+        Otherwise, indices are IDs of plants (including storage, which is given
+        pseudo-plant-IDs), and values are individual generator costs.
+        Whether summed or not, values are $USD, inflation-adjusted to today.
 
     .. note:: the function computes the total capital cost as:
-        CAPEX_total = CAPEX ($/MW) * Pmax (MW) * regional multiplier
+        CAPEX_total = overnight CAPEX ($/MW) * Power capacity (MW) * regional multiplier
     """
 
     def load_cost(year, cost_case):
@@ -501,13 +523,13 @@ def load_cost(year, cost_case):
     )
 
     # multiply all together to get summed CAPEX ($)
-    plants.loc[:, "CAPEX_total"] = (
+    plants.loc[:, "cost"] = (
         plants["CAPEX"] * plants["Pmax"] * plants["reg_cap_cost_mult"]
     )
 
     # sum cost by technology
-    plants.loc[:, "CAPEX_total"] *= calculate_inflation(2018)
+    plants.loc[:, "cost"] *= calculate_inflation(2018)
     if sum_results:
-        return plants.groupby(["Technology"])["CAPEX_total"].sum()
+        return plants.groupby(["Technology"])["cost"].sum()
     else:
-        return plants
+        return plants["cost"]
diff --git a/powersimdata/design/investment/tests/test_investment_costs.py b/powersimdata/design/investment/tests/test_investment_costs.py
index a7f34512b..f18d4b3ae 100644
--- a/powersimdata/design/investment/tests/test_investment_costs.py
+++ b/powersimdata/design/investment/tests/test_investment_costs.py
@@ -167,7 +167,7 @@ def test_calculate_ac_inv_costs_not_summed(mock_grid):
     for branch_type, upgrade_costs in expected_ac_cost.items():
         assert set(upgrade_costs.keys()) == set(ac_cost[branch_type].index)
         for branch, cost in upgrade_costs.items():
-            assert cost == pytest.approx(ac_cost[branch_type].loc[branch, "Cost"])
+            assert cost == pytest.approx(ac_cost[branch_type].loc[branch])
 
 
 def test_calculate_dc_inv_costs(mock_grid):
@@ -244,6 +244,4 @@ def test_calculate_gen_inv_costs_not_summed(mock_grid):
     expected_gen_inv_cost = {k: v * inflation for k, v in expected_gen_inv_cost.items()}
     assert set(gen_inv_cost.index) == set(expected_gen_inv_cost.keys())
     for k in gen_inv_cost.index:
-        assert gen_inv_cost.loc[k, "CAPEX_total"] == pytest.approx(
-            expected_gen_inv_cost[k]
-        )
+        assert gen_inv_cost.loc[k] == pytest.approx(expected_gen_inv_cost[k])
diff --git a/powersimdata/design/transmission/upgrade.py b/powersimdata/design/transmission/upgrade.py
index eee8c6cbb..c9b6e2df4 100644
--- a/powersimdata/design/transmission/upgrade.py
+++ b/powersimdata/design/transmission/upgrade.py
@@ -301,7 +301,7 @@ def _identify_mesh_branch_upgrades(
         base_grid.branch = base_grid.branch.filter(items=congested_indices, axis=0)
         upgrade_costs = _calculate_ac_inv_costs(base_grid, sum_results=False)
         # Merge the individual line/transformer data into a single Series
-        merged_upgrade_costs = pd.concat([v.Cost for v in upgrade_costs.values()])
+        merged_upgrade_costs = pd.concat([v for v in upgrade_costs.values()])
     if method in ("MW", "MWmiles"):
         ref_grid = ref_scenario.state.get_grid()
         branch_ratings = ref_grid.branch.loc[congested_indices, "rateA"]

From 896be44708a290885aed3f22a0e51c28b9dcc043 Mon Sep 17 00:00:00 2001
From: Ben RdO <ben.rouilledorfeuil@breakthroughenergy.org>
Date: Tue, 27 Apr 2021 16:22:02 -0700
Subject: [PATCH 103/108] chore: create bug report issue template (#454)

---
 .github/ISSUE_TEMPLATE/bug_report.md | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 000000000..fc826015b
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,47 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: Bug report
+labels: bug
+assignees: ahurli, BainanXia, danielolsen, jon-hagg, rouille
+
+---
+
+# :beetle:
+
+- [ ] I have checked that this issue has not already been reported.
+
+
+### Bug summary
+A short 1-2 sentences that succinctly describes the bug.
+
+### Code for reproduction
+A minimum code snippet required to reproduce the bug. Please make sure to minimize the
+number of dependencies required.
+```python
+# Paste your code here
+#
+#
+```
+
+### Actual outcome
+The output produced by the above code, which may be a screenshot, console output, etc.
+```shell
+# If applicable, paste the console output here
+#
+#
+```
+
+### Expected outcome
+A description of the expected outcome from the code snippet.
+
+### Environment
+Please specify your platform and versions of the relevant libraries you are using:
+* Operating system:
+* PowerSimData revision (run `git rev-parse origin/HEAD`):
+* Python version:
+* Jupyter version (if applicable):
+* Other libraries:
+
+### Additional context
+Add any other context about the problem here.

From 990b61c54caa47977b2dae833591508fec917f0d Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 27 Apr 2021 11:48:04 -0700
Subject: [PATCH 104/108] test: add test for scaling of new branch in
 TransformGrid

---
 .../input/tests/test_transform_grid.py        | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/powersimdata/input/tests/test_transform_grid.py b/powersimdata/input/tests/test_transform_grid.py
index 61bc49fd5..2e52c8cac 100644
--- a/powersimdata/input/tests/test_transform_grid.py
+++ b/powersimdata/input/tests/test_transform_grid.py
@@ -365,6 +365,29 @@ def test_add_branch(ct):
     )
 
 
+def test_added_branch_scaled(ct):
+    new_branch = [
+        {"capacity": 150, "from_bus_id": 8, "to_bus_id": 100},
+        {"capacity": 250, "from_bus_id": 8000, "to_bus_id": 30000},
+        {"capacity": 50, "from_bus_id": 1, "to_bus_id": 655},
+        {"capacity": 125, "from_bus_id": 3001005, "to_bus_id": 3008157},
+    ]
+    ct.add_branch(new_branch)
+    prev_max_branch_id = grid.branch.index.max()
+    new_branch_ids = list(
+        range(prev_max_branch_id + 1, prev_max_branch_id + 1 + len(new_branch))
+    )
+    ct.scale_branch_capacity(branch_id={new_branch_ids[0]: 2})
+    new_grid = TransformGrid(grid, ct.ct).get_grid()
+    new_capacity = new_grid.branch.rateA
+
+    for i, new_id in enumerate(new_branch_ids):
+        if i == 0:
+            assert new_capacity.loc[new_branch_ids[i]] == new_branch[i]["capacity"] * 2
+        else:
+            assert new_capacity.loc[new_id] == new_branch[i]["capacity"]
+
+
 def test_add_dcline(ct):
     new_dcline = [
         {"capacity": 2000, "from_bus_id": 200, "to_bus_id": 2000},

From 8d3566a5984051a6f27c39ba5913fbf184c8235b Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 27 Apr 2021 11:01:55 -0700
Subject: [PATCH 105/108] feat: allow scaling of added elements in
 TransformGrid

---
 powersimdata/input/transform_grid.py | 70 +++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 16 deletions(-)

diff --git a/powersimdata/input/transform_grid.py b/powersimdata/input/transform_grid.py
index c30aa1037..cb3bd6151 100644
--- a/powersimdata/input/transform_grid.py
+++ b/powersimdata/input/transform_grid.py
@@ -43,20 +43,19 @@ def get_grid(self):
 
     def _apply_change_table(self):
         """Apply changes listed in change table to the grid."""
+        # First scale by zones, so that zone factors are not applied to additions.
         for g in self.gen_types:
             if g in self.ct.keys():
-                self._scale_gen(g)
+                self._scale_gen_by_zone(g)
             if f"{g}_cost" in self.ct.keys():
-                self._scale_gencost(g)
+                self._scale_gencost_by_zone(g)
             if f"{g}_pmin" in self.ct.keys():
-                self._scale_gen_pmin(g)
+                self._scale_gen_pmin_by_zone(g)
 
         if "branch" in self.ct.keys():
-            self._scale_branch()
-
-        if "dcline" in self.ct.keys():
-            self._scale_dcline()
+            self._scale_branch_by_zone()
 
+        # Then, add new elements
         if "new_bus" in self.ct.keys():
             self._add_bus()
 
@@ -72,9 +71,24 @@ def _apply_change_table(self):
         if "storage" in self.ct.keys():
             self._add_storage()
 
-    def _scale_gen(self, gen_type):
-        """Scales capacity of generators and the associated generation cost curve
-            (to maintain the same slopes at the start/end of the curve).
+        # Finally, scale by IDs, so that additions can be scaled.
+        for g in self.gen_types:
+            if g in self.ct.keys():
+                self._scale_gen_by_id(g)
+            if f"{g}_cost" in self.ct.keys():
+                self._scale_gencost_by_id(g)
+            if f"{g}_pmin" in self.ct.keys():
+                self._scale_gen_pmin_by_id(g)
+
+        if "branch" in self.ct.keys():
+            self._scale_branch_by_id()
+
+        if "dcline" in self.ct.keys():
+            self._scale_dcline()
+
+    def _scale_gen_by_zone(self, gen_type):
+        """Scales capacity of generators, by zone. Also scales the associated generation
+            cost curve (to maintain the same slopes at the start/end of the curve).
 
         :param str gen_type: type of generator.
         """
@@ -88,14 +102,21 @@ def _scale_gen(self, gen_type):
                 self._scale_gen_capacity(plant_id, factor)
                 if gen_type in self.thermal_gen_types:
                     self._scale_gencost_by_capacity(plant_id, factor)
+
+    def _scale_gen_by_id(self, gen_type):
+        """Scales capacity of generators by ID. Also scales the associated generation
+            cost curve (to maintain the same slopes at the start/end of the curve).
+
+        :param str gen_type: type of generator.
+        """
         if "plant_id" in self.ct[gen_type].keys():
             for plant_id, factor in self.ct[gen_type]["plant_id"].items():
                 self._scale_gen_capacity(plant_id, factor)
                 if gen_type in self.thermal_gen_types:
                     self._scale_gencost_by_capacity(plant_id, factor)
 
-    def _scale_gencost(self, gen_type):
-        """Scales cost of generators.
+    def _scale_gencost_by_zone(self, gen_type):
+        """Scales cost of generators, by zone.
 
         :param str gen_type: type of generator.
         """
@@ -108,12 +129,19 @@ def _scale_gencost(self, gen_type):
                     .index.tolist()
                 )
                 self.grid.gencost["before"].loc[plant_id, ["c0", "c1", "c2"]] *= factor
+
+    def _scale_gencost_by_id(self, gen_type):
+        """Scales cost of generators, by ID.
+
+        :param str gen_type: type of generator.
+        """
+        cost_key = f"{gen_type}_cost"
         if "plant_id" in self.ct[cost_key].keys():
             for plant_id, factor in self.ct[cost_key]["plant_id"].items():
                 self.grid.gencost["before"].loc[plant_id, ["c0", "c1", "c2"]] *= factor
 
-    def _scale_gen_pmin(self, gen_type):
-        """Scales cost of generators.
+    def _scale_gen_pmin_by_zone(self, gen_type):
+        """Scales minimum generation of generators, by zone.
 
         :param str gen_type: type of generator.
         """
@@ -126,6 +154,13 @@ def _scale_gen_pmin(self, gen_type):
                     .index.tolist()
                 )
                 self.grid.plant.loc[plant_id, "Pmin"] *= factor
+
+    def _scale_gen_pmin_by_id(self, gen_type):
+        """Scales minimum generation of generators, by ID.
+
+        :param str gen_type: type of generator.
+        """
+        pmin_key = f"{gen_type}_pmin"
         if "plant_id" in self.ct[pmin_key].keys():
             for plant_id, factor in self.ct[pmin_key]["plant_id"].items():
                 self.grid.plant.loc[plant_id, "Pmin"] *= factor
@@ -151,8 +186,8 @@ def _scale_gencost_by_capacity(self, plant_id, factor):
         if factor != 0:
             self.grid.gencost["before"].loc[plant_id, "c2"] /= factor
 
-    def _scale_branch(self):
-        """Scales capacity of AC lines."""
+    def _scale_branch_by_zone(self):
+        """Scales capacity of AC lines, by zone, for lines entirely within that zone."""
         if "zone_id" in self.ct["branch"].keys():
             for zone_id, factor in self.ct["branch"]["zone_id"].items():
                 branch_id = (
@@ -161,6 +196,9 @@ def _scale_branch(self):
                     .index.tolist()
                 )
                 self._scale_branch_capacity(branch_id, factor)
+
+    def _scale_branch_by_id(self):
+        """Scales capacity of AC lines, by ID."""
         if "branch_id" in self.ct["branch"].keys():
             for branch_id, factor in self.ct["branch"]["branch_id"].items():
                 self._scale_branch_capacity(branch_id, factor)

From a945df2e30e5bec56fa78148d3de162aac2b062f Mon Sep 17 00:00:00 2001
From: Daniel Olsen <daniel.olsen@breakthroughenergy.org>
Date: Tue, 27 Apr 2021 11:59:19 -0700
Subject: [PATCH 106/108] feat: allow new elements to be scaled in ChangeTable
 via TransformGrid caches

---
 powersimdata/input/change_table.py | 47 +++++++++++++++++-------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/powersimdata/input/change_table.py b/powersimdata/input/change_table.py
index ddc2cdd86..69d0b4e3b 100644
--- a/powersimdata/input/change_table.py
+++ b/powersimdata/input/change_table.py
@@ -133,7 +133,7 @@ def __init__(self, grid):
         """
         self.grid = grid
         self.ct = {}
-        self.new_bus_cache = {}
+        self._new_element_caches = {k: {} for k in {"branch", "bus", "dcline", "plant"}}
 
     @staticmethod
     def _check_resource(resource):
@@ -262,10 +262,8 @@ def _add_plant_entries(self, resource, ct_key, zone_name=None, plant_id=None):
                 if len(self.ct[ct_key]["zone_id"]) == 0:
                     self.ct.pop(ct_key)
             if plant_id is not None:
-                plant_id_interconnect = set(
-                    self.grid.plant.groupby("type").get_group(resource).index
-                )
-                diff = set(plant_id.keys()).difference(plant_id_interconnect)
+                anticipated_plant = self._get_df_with_new_elements("plant")
+                diff = set(plant_id.keys()) - set(anticipated_plant.index)
                 if len(diff) != 0:
                     err_msg = f"No {resource} plant(s) with the following id: "
                     err_msg += ", ".join(sorted([str(d) for d in diff]))
@@ -354,6 +352,7 @@ def scale_branch_capacity(self, zone_name=None, branch_id=None):
             is (are) the id of the line(s) and the associated value is the
             scaling factor for the increase/decrease in capacity of the line(s).
         """
+        anticipated_branch = self._get_df_with_new_elements("branch")
         if bool(zone_name) or bool(branch_id) is True:
             if "branch" not in self.ct:
                 self.ct["branch"] = {}
@@ -368,8 +367,7 @@ def scale_branch_capacity(self, zone_name=None, branch_id=None):
                 for z in zone_name.keys():
                     self.ct["branch"]["zone_id"][self.grid.zone2id[z]] = zone_name[z]
             if branch_id is not None:
-                branch_id_interconnect = set(self.grid.branch.index)
-                diff = set(branch_id.keys()).difference(branch_id_interconnect)
+                diff = set(branch_id.keys()) - set(anticipated_branch.index)
                 if len(diff) != 0:
                     print("No branch with the following id:")
                     for i in list(diff):
@@ -394,7 +392,8 @@ def scale_dcline_capacity(self, dcline_id):
         """
         if "dcline" not in self.ct:
             self.ct["dcline"] = {}
-        diff = set(dcline_id.keys()).difference(set(self.grid.dcline.index))
+        anticipated_dcline = self._get_df_with_new_elements("dcline")
+        diff = set(dcline_id.keys()) - set(anticipated_dcline.index)
         if len(diff) != 0:
             print("No dc line with the following id:")
             for i in list(diff):
@@ -506,7 +505,7 @@ def add_storage_capacity(self, info):
             "terminal_min",
             "terminal_max",
         }
-        anticipated_bus = self._get_new_bus()
+        anticipated_bus = self._get_df_with_new_elements("bus")
         for i, storage in enumerate(info):
             self._check_entry_keys(storage, i, "storage", required, None, optional)
             if storage["bus_id"] not in anticipated_bus.index:
@@ -610,7 +609,7 @@ def _add_line(self, key, info):
         :raises ValueError: if any of the new lines to be added have nonsensical values.
         """
         info = copy.deepcopy(info)
-        anticipated_bus = self._get_new_bus()
+        anticipated_bus = self._get_df_with_new_elements("bus")
         new_lines = []
         required = {"from_bus_id", "to_bus_id"}
         xor_sets = {("capacity", "Pmax"), ("capacity", "Pmin")}
@@ -685,7 +684,7 @@ def add_plant(self, info):
             raise TypeError("Argument enclosing new plant(s) must be a list")
 
         info = copy.deepcopy(info)
-        anticipated_bus = self._get_new_bus()
+        anticipated_bus = self._get_df_with_new_elements("bus")
         new_plants = []
         required = {"bus_id", "Pmax", "type"}
         optional = {"c0", "c1", "c2", "Pmin"}
@@ -782,16 +781,24 @@ def add_bus(self, info):
             self.ct["new_bus"] = []
         self.ct["new_bus"] += new_buses
 
-    def _get_new_bus(self):
-        if "new_bus" not in self.ct:
-            return self.grid.bus
-        new_bus_tuple = tuple(tuple(sorted(b.items())) for b in self.ct["new_bus"])
-        if new_bus_tuple in self.new_bus_cache:
-            return self.new_bus_cache[new_bus_tuple]
+    def _get_df_with_new_elements(self, table):
+        """Get a post-transformation data table, for use with adding elements at new
+        buses, or scaling new elements.
+
+        :param str table: the table of the grid to be fetched:
+            'branch', 'bus', 'dcline', or 'plant'.
+        :return: (*pandas.DataFrame*) -- the post-transformation table.
+        """
+        add_key = f"new_{table}"
+        if add_key not in self.ct:
+            return getattr(self.grid, table)
+        new_elements_tuple = tuple(tuple(sorted(b.items())) for b in self.ct[add_key])
+        if new_elements_tuple in self._new_element_caches[table]:
+            return self._new_element_caches[table][new_elements_tuple]
         else:
-            bus = TransformGrid(self.grid, self.ct).get_grid().bus
-            self.new_bus_cache[new_bus_tuple] = bus
-            return bus
+            transformed = getattr(TransformGrid(self.grid, self.ct).get_grid(), table)
+            self._new_element_caches[table][new_elements_tuple] = transformed
+            return transformed.copy()
 
     def write(self, scenario_id):
         """Saves change table to disk.

From ececa4271458899c772a5710fa3c5209353de6aa Mon Sep 17 00:00:00 2001
From: jon-hagg <66005238+jon-hagg@users.noreply.github.com>
Date: Wed, 28 Apr 2021 08:48:17 -0700
Subject: [PATCH 107/108] ci: add workflow_dispatch trigger to docker build
 (#469)

---
 .github/workflows/docker-build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index 5e9f231a0..214410027 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -1,6 +1,7 @@
 name: Publish docker image
 
 on:
+  workflow_dispatch:
   push:
     branches:
       - 'develop'

From 6e05438e5e1b530a056b89ca98ed8aeb78302879 Mon Sep 17 00:00:00 2001
From: danielolsen <danielolsen@users.noreply.github.com>
Date: Wed, 28 Apr 2021 13:35:49 -0700
Subject: [PATCH 108/108] chore: bump version number to v0.4.1 (#470)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 083865c04..bf384c476 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
 
 setup(
     name="powersimdata",
-    version="0.4",
+    version="0.4.1",
     description="Power Simulation Data",
     url="https://github.com/Breakthrough-Energy/powersimdata",
     author="Kaspar Mueller",