separate into demo and cli scripts

lsst-uk · Oct 15, 2024 · e49274c · e49274c
1 parent ae83173
commit e49274c
Show file tree

Hide file tree

Showing 3 changed files with 400 additions and 119 deletions.
diff --git a/src/adler/adler.py b/src/adler/adler.py
@@ -78,6 +78,9 @@ def runAdler(cli_args):
         # now let's do some phase curves!
         logger.info("Calculating phase curves...")
 
+        # make an empty figure
+        fig = plot_errorbar(planetoid, filt_list=[])
+
         # operate on each filter in turn
         for filt in cli_args.filter_list:
             logger.info("fit {} filter data".format(filt))
@@ -95,62 +98,9 @@ def runAdler(cli_args):
             df_obs["outlier"] = [False] * len(df_obs)
             logger.info("{} observations retrieved".format(len(df_obs)))
 
-            # load and merge the previous obs
-            # TODO: replace this part with classifications loaded from adlerData
-            save_file = "{}/df_outlier_{}_{}.csv".format(cli_args.outpath, cli_args.ssObjectId, filt)
-            if os.path.isfile(save_file):
-                logger.info("load previously classified observations: {}".format(save_file))
-                _df_obs = pd.read_csv(save_file, index_col=0)
-                df_obs = df_obs.merge(_df_obs, on=["diaSourceId", "midPointMjdTai"], how="left")
-                df_obs.loc[pd.isnull(df_obs["outlier_y"]), "outlier_y"] = (
-                    False  # ensure that classifications exist (nan entries can only be false?). Weird behaviour here for g filter, is it to do with when new g obs appear relative to r/i etc?
-                )
-                df_obs = df_obs.rename({"outlier_y": "outlier"}, axis=1)
-                df_obs = df_obs.drop("outlier_x", axis=1)
-            else:
-                logger.info("no previously classified observations to load")
-
-            # define the date range to for new observations taken in the night to be analysed
-            logger.info(
-                "Most recent {} filter observation in query: date = {}".format(
-                    filt, np.amax(df_obs["midPointMjdTai"])
-                )
-            )
-            t1 = int(np.amax(df_obs["midPointMjdTai"])) + 1
-            t0 = t1 - 1
-
-            # get all past observations
-            # mask = df_obs["midPointMjdTai"] < t0
-            mask = (df_obs["midPointMjdTai"] < t0) & (df_obs["outlier"] == False)  # reject any past outliers
-
-            # split observations into "old" and "new"
-            df_obs_old = df_obs[(mask)]
-            df_obs_new = df_obs[~mask]
-            logger.info("Previous observations (date < {}): {}".format(t0, len(df_obs_old)))
-            logger.info("New observations ({} <= date < {}): {}".format(t0, t1, len(df_obs_new)))
-
             # Determine the reference phase curve model
             # TODO: We would load the best phase curve model available in AdlerData here
 
-            # we need sufficient past observations to fit the phase curve model
-            if len(df_obs_old) < 2:
-                print("save {}".format(save_file))
-                df_save = df_obs[obs_cols]
-                df_save.to_csv(save_file)
-                print("insufficient data, use default SSObject phase model and continue")
-                logger.info("insufficient data, use default SSObject phase model and continue")
-
-                # use the default SSObject phase parameter if there is no better information
-                pc_dict = {
-                    "H": sso.H * u.mag,
-                    "H_err": sso.Herr * u.mag,
-                    "phase_parameter_1": sso.G12,
-                    "phase_parameter_1_err": sso.G12err,
-                    "model_name": "HG12_Pen16",
-                }
-                adler_data.populate_phase_parameters(filt, **pc_dict)
-                continue
-
             # initial simple phase curve filter model with fixed G12
             pc = PhaseCurve(
                 H=sso.H * u.mag,
@@ -159,62 +109,51 @@ def runAdler(cli_args):
             )
 
             # only fit G12 when sufficient data is available
-            if len(df_obs_old) < N_pc_fit:
+            if len(df_obs) < N_pc_fit:
+                msg = "Do not fit G12, use G12={:.2f}".format(pc.phase_parameter_1)
+                logger.info(msg)
                 pc.model_function.G12.fixed = True
             else:
                 pc.model_function.G12.fixed = False
 
             # do a HG12_Pen16 fit to the past data
             pc_fit = pc.FitModel(
-                np.array(df_obs_old["phaseAngle"]) * u.deg,
-                np.array(df_obs_old["reduced_mag"]) * u.mag,
-                np.array(df_obs_old["magErr"]) * u.mag,
+                np.array(df_obs["phaseAngle"]) * u.deg,
+                np.array(df_obs["reduced_mag"]) * u.mag,
+                np.array(df_obs["magErr"]) * u.mag,
             )
             pc_fit = pc.InitModelSbpy(pc_fit)
 
-            # TODO: Here the best fit should be pushed back to our AdlerData tables
+            # Store the fitted values in an AdlerData object
             adler_data.populate_phase_parameters(filt, **pc_fit.__dict__)
 
-            # find outliers in new data
-            # calculate data - model residuals
-            res = (np.array(df_obs_new["reduced_mag"]) * u.mag) - pc_fit.ReducedMag(
-                np.array(df_obs_new["phaseAngle"]) * u.deg
-            )
-            outlier_flag = sci_utils.outlier_diff(res.value, diff_cut=diff_cut)
-            df_obs.loc[~mask, "outlier"] = outlier_flag
-
-            # save the df_obs subset with outlier classification
-            df_save = df_obs[obs_cols]
-            print("save classifications: {}".format(save_file))
-            logger.info("save classifications: {}".format(save_file))
-            df_save.to_csv(save_file)
-
-            # make a plot
-            fig = plot_errorbar(planetoid, filt_list=[])
+            # add to plot
             ax1 = fig.axes[0]
-            ax1.scatter(df_obs_old["phaseAngle"], df_obs_old["reduced_mag"], c="C0")
+            # TODO: set colours based on filter
+            ax1.scatter(df_obs["phaseAngle"], df_obs["reduced_mag"])
             alpha = np.linspace(0, np.amax(obs.phaseAngle)) * u.deg
-            ax1.plot(alpha.value, pc_fit.ReducedMag(alpha).value, label="t={}".format(int(t0)))
-            ax1.scatter(
-                df_obs_new["phaseAngle"], df_obs_new["reduced_mag"], edgecolor="r", facecolor="none", zorder=3
+            ax1.plot(
+                alpha.value,
+                pc_fit.ReducedMag(alpha).value,
+                label="{}, H={:.2f}, G12={:.2f}".format(filt, pc_fit.H.value, pc_fit.phase_parameter_1),
             )
-            out_mask = df_obs["outlier"] == True
-            ax1.scatter(
-                df_obs.loc[out_mask]["phaseAngle"],
-                df_obs.loc[out_mask]["reduced_mag"],
-                c="r",
-                marker="x",
-                s=75,
-                zorder=3,
-            )
-            fig_file = "{}/plots/phase_curve_{}_{}_{}.png".format(
-                cli_args.outpath, cli_args.ssObjectId, filt, int(t0)
+
+        # TODO: save the figures if an outpath is provided
+        ax1.legend()
+        if cli_args.outpath:
+            fig_file = "{}/phase_curve_{}_{}_{}.png".format(
+                cli_args.outpath, cli_args.ssObjectId, filt, int(np.amax(df_obs["midPointMjdTai"]))
             )
-            # TODO: make the plots folder if it does not already exist?
-            print("Save figure: {}".format(fig_file))
-            logger.info("Save figure: {}".format(fig_file))
+            msg = "Save figure: {}".format(fig_file)
+            print(msg)
+            logger.info(msg)
             fig = plot_errorbar(planetoid, fig=fig, filename=fig_file)  # TODO: add titles with filter name?
             plt.close()
+        else:
+            plt.show()
+
+        # TODO: output adler values to a database
+        print(adler_data.__dict__)
 
         # analyse colours for the filters provided
         logger.info("Calculate colours: {}".format(cli_args.colour_list))
@@ -242,23 +181,13 @@ def runAdler(cli_args):
 
             logger.info("Determine {} colour".format(colour))
 
-            # TODO: replace this with a colour loaded from adlerData
-            save_file_colour = "{}/df_colour_{}_{}.csv".format(cli_args.outpath, cli_args.ssObjectId, colour)
-            if os.path.isfile(save_file_colour):
-                print("load previous colours from file: {}".format(save_file_colour))
-                df_col = pd.read_csv(save_file_colour, index_col=0)
-                # Check the last colour calculation date (x_obs) to avoid recalculation
-                obs = planetoid.observations_in_filter(filt_obs)
-                df_obs = pd.DataFrame(obs.__dict__)
-                if np.amax(df_col["midPointMjdTai"]) >= np.amax(df_obs["midPointMjdTai"]):
-                    print("colour already calculated, skip")
-                    continue
-
-            else:
-                df_col = pd.DataFrame()
-
             # determine the filt_obs - filt_ref colour
             # generate a plot
+            if cli_args.outpath:
+                plot_dir = cli_args.outpath
+            else:
+                plot_dir = None
+
             col_dict = col_obs_ref(
                 planetoid,
                 adler_data,
@@ -268,21 +197,11 @@ def runAdler(cli_args):
                 N_ref=N_ref,
                 # x1 = x1,
                 # x2 = x2,
-                plot_dir="{}/plots".format(cli_args.outpath),
+                plot_dir=plot_dir,
             )
 
             print(col_dict)
 
-            # save the colour data
-            print("Append new colour and save to file: {}".format(save_file_colour))
-            df_col = pd.concat([df_col, pd.DataFrame([col_dict])])
-            df_col = df_col.reset_index(drop=True)
-            df_col.to_csv(save_file_colour)
-
-            # TODO: reject unreliable colours, e.g. high colErr or delta_t_col
-            # TODO: determine if colour is outlying
-            # compare this new colour to previous colour(s)
-
 
 def main():
     parser = argparse.ArgumentParser(description="Runs Adler for select planetoid(s) and given user input.")
@@ -327,7 +246,7 @@ def main():
     optional_group.add_argument(
         "-o",
         "--outpath",
-        help="Output path location. Default is current working directory.",  # TODO: make adler create the outpath directory on start up if it does not exist? Also the "plots" dir within?
+        help="Output path location. Default is current working directory.",  # TODO: make adler create the outpath directory on start up if it does not exist?
         type=str,
         default="./",
     )