From 3edba034b603f62b9ff6373281b9c62375b09ac0 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Wed, 16 Oct 2024 12:22:08 -0400
Subject: [PATCH 1/9] fixed error in the full label list (was missing `U`)

---
 modeling/__init__.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/modeling/__init__.py b/modeling/__init__.py
index 7d4f9a5..8f7d265 100644
--- a/modeling/__init__.py
+++ b/modeling/__init__.py
@@ -2,10 +2,14 @@
 positive_label = '+'
 
 # full typology from https://github.com/clamsproject/app-swt-detection/issues/1
-FRAME_TYPES = ["B", "S", "W", "L", "O",
-               "M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"]
-FRAME_TYPES_WITH_SUBTYPES = ["B", "SH", "SC", "SD", "SB", "SG", "W", "L", "O",
-                             "M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"]
+FRAME_TYPES = [
+    "B", "S", "I", "C", "R", "M", "O", "W",
+    "N", "Y", "U", "K",
+    "L", "G", "F", "E", "T",
+    "P",
+]
+FRAME_TYPES_WITH_SUBTYPES = FRAME_TYPES.copy() + ['SH', 'SC', 'SD', 'SB', 'SG']
+FRAME_TYPES_WITH_SUBTYPES.remove('S')
 
 # These are time frames that are typically static (that is, the text does not
 # move around or change as with rolling credits). These are frame names after

From 30b74f901011afab538ad4c1f250fcba1a6643ad Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Wed, 16 Oct 2024 12:25:53 -0400
Subject: [PATCH 2/9] added config key for pre-binning (as `prebin` instead of
 old `bins`)

---
 modeling/classify.py   |  2 +-
 modeling/gridsearch.py | 11 +++++++----
 modeling/train.py      | 30 ++++++++++++++++++------------
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/modeling/classify.py b/modeling/classify.py
index 3d7088b..b8c9fb9 100644
--- a/modeling/classify.py
+++ b/modeling/classify.py
@@ -20,7 +20,7 @@ def __init__(self, model_stem, logger_name=None):
         model_config_file = f"{model_stem}.yml"
         model_checkpoint = f"{model_stem}.pt"
         model_config = yaml.safe_load(open(model_config_file))
-        self.training_labels = train.pretraining_binned_label(model_config)
+        self.training_labels = train.get_prebinned_labelset(model_config)
         self.featurizer = data_loader.FeatureExtractor(
             img_enc_name=model_config["img_enc_name"],
             pos_length=model_config.get("pos_length", 0),
diff --git a/modeling/gridsearch.py b/modeling/gridsearch.py
index 737b2bc..cb8a00e 100644
--- a/modeling/gridsearch.py
+++ b/modeling/gridsearch.py
@@ -119,12 +119,15 @@
     ] + guids_with_challenging_images,  # also block the challenging images
     # {"cpb-aacip-254-75r7szdz"},  # effectively no block except
 ]
-# we no longer use bins, keeping this just for historical reference
-# bins = [{'pre': {'slate': ['S'], 'chyron': ['I', 'N', 'Y'], 'credit': ['C']}}]
+nobinning = {t: t for t in modeling.FRAME_TYPES}
+binning_schemes = {
+    "nobinning": nobinning,
+}
 
-param_keys = ['split_size', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 'block_guids_train', 'block_guids_valid']
+prebin = list(binning_schemes.keys())
+
+param_keys = ['split_size', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 'block_guids_train', 'block_guids_valid', 'prebin']
 l = locals()
 configs = []
 for vals in itertools.product(*[l[key] for key in param_keys]):
     configs.append(dict(zip(param_keys, vals)))
-
diff --git a/modeling/train.py b/modeling/train.py
index 33129f3..705c97b 100644
--- a/modeling/train.py
+++ b/modeling/train.py
@@ -56,12 +56,12 @@ def get_guids(data_dir):
 
 
 def pretraining_bin(label, specs):
-    if specs is None or "bins" not in specs:
+    if specs is None or "prebin" not in specs:
         return int_encode(label)
-    for i, ptbin in enumerate(specs["bins"].values()):
+    for i, ptbin in enumerate(specs["prebin"].values()):
         if label and label in ptbin:
             return i
-    return len(specs["bins"].keys())
+    return len(specs["prebin"].keys())
 
 
 def load_config(config):
@@ -161,11 +161,12 @@ def train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     
     # the number of labels (after "pre"-binning)
-    if configs and 'bins' in configs:
-        num_labels = len(configs['bins'].keys()) + 1
+    if configs and 'prebin' in configs:
+        num_labels = len(configs['prebin'].keys()) + 1
     else:
         num_labels = len(FRAME_TYPES) + 1
-        
+    labelset = get_prebinned_labelset(configs)
+
     # if split_size > #videos, nothing to "hold-out". Hence, single fold training and validate against the "fixed" set
     if configs['split_size'] >= len(train_all_guids):
         valid_guids = gridsearch.guids_for_fixed_validation_set
@@ -181,7 +182,7 @@ def train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H
             loss, device, train_loader, configs)
         torch.save(model.state_dict(), export_model_file)
         p_config = Path(f'{base_fname}.yml')
-        validate(model, valid_loader, pretraining_binned_label(config), export_fname=f'{base_fname}.csv')
+        validate(model, valid_loader, labelset, export_fname=f'{base_fname}.csv')
         export_train_config(config_file, configs, p_config)
         return
     # otherwise, do k-fold training with k's size = split_size
@@ -206,7 +207,7 @@ def train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H
                 get_net(train.feat_dim, num_labels, configs['num_layers'], configs['dropouts']),
                 loss, device, train_loader, configs)
         torch.save(model.state_dict(), export_model_file)
-        p, r, f = validate(model, valid_loader, pretraining_binned_label(config), export_fname=export_csv_file)
+        p, r, f = validate(model, valid_loader, labelset, export_fname=export_csv_file)
         val_set_spec.append(validation_guids)
         p_scores.append(p)
         r_scores.append(r)
@@ -247,9 +248,9 @@ def export_kfold_results(trial_specs, p_scores, r_scores, f_scores, p_results):
         out.write(f'\trecall = {sum(r_scores) / len(r_scores)}\n')
 
 
-def pretraining_binned_label(config):
-    if 'bins' in config:
-        return list(config["bins"].keys()) + [modeling.negative_label]
+def get_prebinned_labelset(config):
+    if 'prebin' in config:
+        return list(config["prebin"].keys()) + [modeling.negative_label]
     return modeling.FRAME_TYPES + [modeling.negative_label]
 
 
@@ -309,8 +310,13 @@ def train_model(model, loss_fn, device, train_loader, configs):
     for config in configs:
         timestamp = time.strftime("%Y%m%d-%H%M%S")
         backbonename = config['img_enc_name']
+        if isinstance(config['prebin'], str):
+            prebin_name = config['prebin']
+            config['prebin'] = gridsearch.binning_schemes[prebin_name]
+        else:
+            prebin_name = ''
         positionalencoding = "pos" + ("F" if config["pos_vec_coeff"] == 0 else "T")
         train(
             indir=args.indir, outdir=args.outdir, config_file=args.config, configs=config,
-            train_id='.'.join([timestamp, backbonename, positionalencoding])
+            train_id='.'.join(filter(None, [timestamp, backbonename, prebin_name, positionalencoding]))
         )

From 5374c7768ef7c9a26abd339b0050cfd3f132eccd Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Wed, 16 Oct 2024 12:26:40 -0400
Subject: [PATCH 3/9] added prebin configs for grid search experiments and
 updated see_results script for new output format

---
 modeling/gridsearch.py | 61 +++++++++++++++++++++++++++++++++++++
 scripts/see_results.py | 69 +++++++++++++++++++++++++++++-------------
 2 files changed, 109 insertions(+), 21 deletions(-)

diff --git a/modeling/gridsearch.py b/modeling/gridsearch.py
index cb8a00e..111302c 100644
--- a/modeling/gridsearch.py
+++ b/modeling/gridsearch.py
@@ -120,8 +120,69 @@
     # {"cpb-aacip-254-75r7szdz"},  # effectively no block except
 ]
 nobinning = {t: t for t in modeling.FRAME_TYPES}
+
+label_bins = {
+    "Bars": ["B"],
+    "Chyron-other": ["Y", "U", "K"],
+    "Chyron-person": ["I", "N"],
+    "Credits": ["C", "R"],
+    "Main": ["M"],
+    "Opening": ["O", "W"],
+    "Slate": ["S", "S:H", "S:C", "S:D", "S:B", "S:G"],
+    "Other-text-sm": ["L", "G", "F", "E", "T"],
+    "Other-text-md": ["M", "O", "W", "L", "G", "F", "E", "T"],
+    "Other-text-lg": ["M", "O", "W", "Y", "U", "K", "L", "G", "F", "E", "T"],
+}
+
 binning_schemes = {
     "nobinning": nobinning,
+    
+    "strict": {
+        "Bars": label_bins["Bars"],
+        "Slate": label_bins["Slate"],
+        "Chyron-person": label_bins["Chyron-person"],
+        "Credits": label_bins["Credits"],
+        "Main": label_bins["Main"],
+        "Opening": label_bins["Opening"],
+        "Chyron-other": label_bins["Chyron-other"],
+        "Other-text": label_bins["Other-text-sm"],
+    },
+
+    "simple": {
+        "Bars": label_bins["Bars"],
+        "Slate": label_bins["Slate"],
+        "Chyron-person": label_bins["Chyron-person"],
+        "Credits": label_bins["Credits"],
+        "Other-text": label_bins["Other-text-lg"],
+    },
+
+    "relaxed": {
+        "Bars": label_bins["Bars"],
+        "Slate": label_bins["Slate"],
+        "Chyron": label_bins["Chyron-other"] + label_bins["Chyron-person"],
+        "Credits": label_bins["Credits"],
+        "Other-text": label_bins["Other-text-md"],
+    },
+
+    "binary-bars": {
+        "Bars": label_bins["Bars"],
+    },
+
+    "binary-slate": {
+        "Slate": label_bins["Slate"],
+    },
+
+    "binary-chyron-strict": {
+        "Chyron-person": label_bins["Chyron-person"],
+    },
+
+    "binary-chyron-relaxed": {
+        "Chyron": label_bins["Chyron-other"] + label_bins["Chyron-person"],
+    },
+
+    "binary-credits": {
+        "Credits": label_bins["Credits"],
+    }
 }
 
 prebin = list(binning_schemes.keys())
diff --git a/scripts/see_results.py b/scripts/see_results.py
index 3b36a16..a95e330 100644
--- a/scripts/see_results.py
+++ b/scripts/see_results.py
@@ -2,6 +2,7 @@
 import base64
 import csv
 import os
+import pathlib
 from collections import defaultdict
 from io import BytesIO
 from itertools import product
@@ -11,28 +12,16 @@
 import numpy as np
 import yaml
 
+
 # list of bins
 # Since the bins parameters are too long to print or show on the plot, they are numbered by index.
-bins = [
-    {'pre': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'other-opening': ['W', 'L', 'O', 'M'],
-             'chyron': ['I', 'N', 'Y'], 'not-chyron': ['P', 'K', 'G', 'T', 'F'], 'credits': ['C'], 'copyright': ['R']},
-     'post': {'bars': ['bars'], 'slate': ['slate'], 'chyron': ['chyron'], 'credits': ['credits']}},
-    {'post': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'chyron': ['I', 'N', 'Y'], 'credits': ['C']}},
-
-
-    {'pre': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'warning': ['W'], 'opening': ['O'],
-             'main_title': ['M'], 'chyron': ['I'], 'credits': ['C'], 'copyright': ['R']},
-     'post': {'bars': ['bars'], 'slate': ['slate'], 'chyron': ['chyron'], 'credits': ['credits']}},
-    {'post': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'chyron': ['I'], 'credits': ['C']}},
-
 
-    {'pre': {'chyron': ['I', 'N', 'Y'], 'person-not-chyron': ['E', 'P', 'K']}, 'post': {'chyron': ['chyron']}},
-    {'post': {'chyron': ['I', 'N', 'Y']}},
-]
 
-
-def get_configs_and_macroavgs(directory):
+def process_kfold_validation_results(directory):
     """
+    THIS FUNCTION IS OUTDATED since we no longer actively use k-fold validation.
+    Hence, the code is not compatible with new file naming convention and structure for new "fixed" validateion experiment results.
+    
     1. Iterate over all files in the directory
     2. Get configuration information
     3. Calculate the averages of accuracy, precision, recall, and f1-score for each label for each set of k_fold results.
@@ -92,6 +81,36 @@ def get_configs_and_macroavgs(directory):
 
     return configs, macro_avgs
 
+def process_fixed_validation_results(directory):
+    configs = {}
+    scores = {}
+    for csv_fname in pathlib.Path(directory).glob('*.csv'):
+        key = csv_fname.stem
+        timestamp, bb_name, bin_name, posenc = key.split('.')
+        posenc = posenc[-1] == 'T'
+        score = defaultdict(lambda: defaultdict(float))
+        with open(csv_fname, "r") as csv_f:
+            csv_reader = csv.DictReader(csv_f)
+            for row in csv_reader:
+                if 'Confusion Matrix' in row['Model_Name'] or not row:
+                    break
+                score[row['Label']]['Accuracy'] += float(row['Accuracy'])
+                score[row['Label']]['Precision'] += float(row['Precision'])
+                score[row['Label']]['Recall'] += float(row['Recall'])
+                score[row['Label']]['F1-Score'] += float(row['F1-Score'])
+        config_fname = csv_fname.with_suffix('.yml')
+        with open(config_fname, "r") as yml_f:
+            data = yaml.safe_load(yml_f)
+        # delete unnecessary items
+            data['block_guids_train'] = f"{len(data['block_guids_train'])}@{hash(str(sorted(data['block_guids_train'])))}"
+            data['block_guids_valid'] = f"{len(data['block_guids_valid'])}@{hash(str(sorted(data['block_guids_valid'])))}"
+            del data['split_size']
+            data['prebin'] = bin_name
+            data['posenc'] = posenc
+            configs[key] = data
+        scores[key] = score
+    return configs, scores
+
 
 def get_inverse_configs(configs):
     """
@@ -118,10 +137,12 @@ def get_grid(configs):
         for k, v in value.items():
             grid[k].add(v)
 
+    refined_grid = {}
     for key, val in grid.items():
-        grid[key] = list(val)
+        if len(val) > 1:
+            refined_grid[key] = list(val)
 
-    return grid
+    return refined_grid
 
 
 def get_labels(macroavgs):
@@ -133,6 +154,7 @@ def get_labels(macroavgs):
     labels = set()
     for key, val in macroavgs.items():
         labels.update(val.keys())
+    labels.remove('-')
     return list(labels)
 
 
@@ -162,7 +184,8 @@ def get_pairs_to_compare(grid, inverse_configs, variable):
         for s in list_of_sets[1:]:
             intersection_result = intersection_result.intersection(s)
 
-        pair_list.append(list(intersection_result))
+        if len(intersection_result) > 0:
+            pair_list.append(list(intersection_result))
 
     return pair_list
 
@@ -326,7 +349,11 @@ def user_input_label(label_list):
     args = parser.parse_args()
 
     # Get necessary dictionaries and lists for processing the comparison.
-    configs, macroavgs = get_configs_and_macroavgs(args.directory)
+    is_kfold = bool(any(pathlib.Path(args.directory).glob("*kfold*.csv")))
+    if is_kfold:
+        configs, macroavgs = process_kfold_validation_results(args.directory)
+    else:
+        configs, macroavgs = process_fixed_validation_results(args.directory)
     label_list = get_labels(macroavgs)
     inverse_configs = get_inverse_configs(configs)
     grid = get_grid(configs)

From 985e51ef4f04c757669fdb8435e271a14789d874 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Thu, 17 Oct 2024 12:27:20 -0400
Subject: [PATCH 4/9] refactoring see_res script

---
 scripts/see_results.py | 112 +++++++++++++++++++++++------------------
 1 file changed, 64 insertions(+), 48 deletions(-)

diff --git a/scripts/see_results.py b/scripts/see_results.py
index a95e330..b88d9bc 100644
--- a/scripts/see_results.py
+++ b/scripts/see_results.py
@@ -59,10 +59,7 @@ def process_kfold_validation_results(directory):
                 with open(file, "r") as f:
                     data = yaml.safe_load(f)
                 # delete unnecessary items
-                data['block_guids_train'] = f"{len(data['block_guids_train'])}@{hash(str(sorted(data['block_guids_train'])))}"
-                data['block_guids_valid'] = f"{len(data['block_guids_valid'])}@{hash(str(sorted(data['block_guids_valid'])))}"
-                del data['split_size']
-                configs[key] = data
+                configs[key] = clean_config(data)
 
         # Calculate macro averages
         for k, v in macro_avg.items():
@@ -81,6 +78,29 @@ def process_kfold_validation_results(directory):
 
     return configs, macro_avgs
 
+
+def clean_config(config, prebin_name=None):
+    """
+    Clean up the configuration found in a yml file with more human friendly names. 
+    """
+    config['block_guids_train'] = f'{len(config["block_guids_train"])}@{hash(str(sorted(config["block_guids_train"])))}'
+    config['block_guids_valid'] = f'{len(config["block_guids_valid"])}@{hash(str(sorted(config["block_guids_valid"])))}'
+    
+    # a short string name of the prebin can be passed as an argument or can be generated from dictionary in the config 
+    if prebin_name:
+        config['prebin'] = prebin_name
+    elif 'prebin' in config:
+        config['prebin'] = f'{len(config["prebin"])}way@{hash(str(config["prebin"]))}'
+    else:
+        config['prebin'] = 'None'
+        
+    config['posenc'] = config['pos_vec_coeff'] > 0
+    del config['pos_vec_coeff']
+    
+    del config['split_size']
+    return config
+
+
 def process_fixed_validation_results(directory):
     configs = {}
     scores = {}
@@ -100,14 +120,10 @@ def process_fixed_validation_results(directory):
                 score[row['Label']]['F1-Score'] += float(row['F1-Score'])
         config_fname = csv_fname.with_suffix('.yml')
         with open(config_fname, "r") as yml_f:
-            data = yaml.safe_load(yml_f)
+            config = yaml.safe_load(yml_f)
+            config = clean_config(config, bin_name)
         # delete unnecessary items
-            data['block_guids_train'] = f"{len(data['block_guids_train'])}@{hash(str(sorted(data['block_guids_train'])))}"
-            data['block_guids_valid'] = f"{len(data['block_guids_valid'])}@{hash(str(sorted(data['block_guids_valid'])))}"
-            del data['split_size']
-            data['prebin'] = bin_name
-            data['posenc'] = posenc
-            configs[key] = data
+            configs[key] = config
         scores[key] = score
     return configs, scores
 
@@ -190,19 +206,19 @@ def get_pairs_to_compare(grid, inverse_configs, variable):
     return pair_list
 
 
-def compare_pairs(list_of_pairs, macroavgs, configs, grid, variable, label_to_show, variable_values, interactive_plots=True):
+def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label_to_show, variable_values, interactive_plots=True):
     """
     For list of pairs got from get_pairs_to_compare function, compare each pair by plotting bar graphs for given label.
     :param list_of_pairs: got from get_pairs_to_compare function for given variable
     :param macroavgs:
     :param configs:
     :param grid:
-    :param variable:
-    :param label_to_show: User choice of label (including overall) to show scores in the graph.
+    :param var_to_compare:
+    :param label_to_show: User choice of label (including overall) to show scores in the graph. 
     """
 
     # Form parameter to color dictionary for consistency in color across all pairs
-    param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(grid[variable]))
+    param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(grid[var_to_compare]))
 
     html = '<html><head><title>Comparison of pairs</title></head><body>'
 
@@ -216,7 +232,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, variable, label_to_sh
         ordered_pair = [None] * len(variable_values)
         for i, value in enumerate(variable_values):
             for exp_id in pair:
-                if configs[exp_id][variable] == value:
+                if configs[exp_id][var_to_compare] == value:
                     ordered_pair[i] = exp_id
         scores = macroavgs[ordered_pair[0]][label_to_show]
         data = defaultdict(list)
@@ -233,42 +249,42 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, variable, label_to_sh
                     data[exp_id].append(0.0)
         data = dict(data)
 
+        if len(data) == 0:
+            continue
         # plot a bar graph
         x = np.arange(len(metric_list))  # the label locations
-        l = len(data) # length of data (it varies by set)
-        width = 1/(l+1)  # the width of the bars
+        width = 1/(len(data)+1)  # the width of the bars
         multiplier = 0
 
-        if l != 0:
-            for exp_id, scores in data.items():
-                id_variable = str(variable) + ": " + str(configs[exp_id][variable])
-                offset = width * multiplier
-                rects = ax.bar(x + offset, scores, width, label=id_variable, color=param_to_color[str(configs[exp_id][variable])])
-                ax.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3)
-                multiplier += 1
-
-            # Add some text for labels, title and custom x-axis tick labels, etc.
-            ax.set_ylabel('Score')
-            ax.set_title(str(label_to_show))
-            ax.set_xticks(x + width*(l-1)/2, metric_list)
-            ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5))
-            ax.set_ylim(0.0, 1.15)
-            # Show information on fixed parameters.
-            configs[exp_id].pop(variable)
-            string_configs = ""
-            for k, v in configs[exp_id].items():
-                string_configs += str(k) + ": " + str(v) + "\n"
-            ax.text(0.99, 0.97, string_configs,
-                    verticalalignment='bottom', horizontalalignment='right',
-                    transform=ax.transAxes,
-                    color='green', fontsize='small')
-
-            if interactive_plots:
-                plt.show()
-            else:
-                temp_io_stream = BytesIO()
-                fig.savefig(temp_io_stream, format='png', bbox_inches='tight')
-                html += f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>'
+        for exp_id, scores in data.items():
+            id_variable = str(var_to_compare) + ": " + str(configs[exp_id][var_to_compare])
+            offset = width * multiplier
+            rects = ax.bar(x + offset, scores, width, label=id_variable, color=param_to_color[str(configs[exp_id][var_to_compare])])
+            ax.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3)
+            multiplier += 1
+
+        # Add some text for labels, title and custom x-axis tick labels, etc.
+        ax.set_ylabel('Score')
+        ax.set_title(str(label_to_show))
+        ax.set_xticks(x + width*(len(data)-1)/2, metric_list)
+        ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5))
+        ax.set_ylim(0.0, 1.15)
+        # Show information on fixed parameters.
+        configs[exp_id].pop(var_to_compare)
+        string_configs = f'{exp_id}\n'
+        for k, v in configs[exp_id].items():
+            string_configs += str(k) + ": " + str(v) + "\n"
+        ax.text(0.99, 0.97, string_configs,
+                verticalalignment='bottom', horizontalalignment='right',
+                transform=ax.transAxes,
+                color='green', fontsize='small')
+
+        if interactive_plots:
+            plt.show()
+        else:
+            temp_io_stream = BytesIO()
+            fig.savefig(temp_io_stream, format='png', bbox_inches='tight')
+            html += f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>'
         plt.cla()
     for i, var_val in enumerate(variable_values):
         if interactive_plots:

From be8b40bf457527cc1aaa3418c0170511149b1b2b Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Thu, 17 Oct 2024 12:37:48 -0400
Subject: [PATCH 5/9] now can ignore negative label when comparing exp results

---
 scripts/see_results.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/scripts/see_results.py b/scripts/see_results.py
index b88d9bc..b0b0f1c 100644
--- a/scripts/see_results.py
+++ b/scripts/see_results.py
@@ -101,7 +101,7 @@ def clean_config(config, prebin_name=None):
     return config
 
 
-def process_fixed_validation_results(directory):
+def process_fixed_validation_results(directory, include_negative_label=False):
     configs = {}
     scores = {}
     for csv_fname in pathlib.Path(directory).glob('*.csv'):
@@ -114,6 +114,9 @@ def process_fixed_validation_results(directory):
             for row in csv_reader:
                 if 'Confusion Matrix' in row['Model_Name'] or not row:
                     break
+                # ignore negative class
+                if row['Label'] == '-' and not include_negative_label:
+                    continue
                 score[row['Label']]['Accuracy'] += float(row['Accuracy'])
                 score[row['Label']]['Precision'] += float(row['Precision'])
                 score[row['Label']]['Recall'] += float(row['Recall'])
@@ -170,7 +173,8 @@ def get_labels(macroavgs):
     labels = set()
     for key, val in macroavgs.items():
         labels.update(val.keys())
-    labels.remove('-')
+    if '-' in labels:
+        labels.remove('-')
     return list(labels)
 
 
@@ -294,7 +298,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label
 
     if not interactive_plots:
         html += '</body></html>'
-        with open(f'results-comparison-{variable}-{label_to_show}.html', 'w') as f:
+        with open(f'results-comparison-{var_to_compare}-{label_to_show}.html', 'w') as f:
             f.write(html)
 
 
@@ -361,6 +365,11 @@ def user_input_label(label_list):
         action='store_true',
         help='Flag to show plots in interactive mode. If not set, the program will save all the plots in a html file.'
     )
+    parser.add_argument(
+        '-n', '--negativelabel', 
+        action='store_true',
+        help='Flag to include the negative label when averaging scores.'
+    )
 
     args = parser.parse_args()
 
@@ -369,7 +378,7 @@ def user_input_label(label_list):
     if is_kfold:
         configs, macroavgs = process_kfold_validation_results(args.directory)
     else:
-        configs, macroavgs = process_fixed_validation_results(args.directory)
+        configs, macroavgs = process_fixed_validation_results(args.directory, args.negativelabel)
     label_list = get_labels(macroavgs)
     inverse_configs = get_inverse_configs(configs)
     grid = get_grid(configs)

From 061c4745ce34e9413c2bc4584e1bc6da5d863527 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Thu, 17 Oct 2024 16:58:37 -0400
Subject: [PATCH 6/9] see_res now use prefix-based label targetting instead of
 strict str match

---
 scripts/see_results.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/scripts/see_results.py b/scripts/see_results.py
index b0b0f1c..a04c60a 100644
--- a/scripts/see_results.py
+++ b/scripts/see_results.py
@@ -218,7 +218,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label
     :param configs:
     :param grid:
     :param var_to_compare:
-    :param label_to_show: User choice of label (including overall) to show scores in the graph. 
+    :param target_label: User choice of label (including overall) to show scores in the graph. 
     """
 
     # Form parameter to color dictionary for consistency in color across all pairs
@@ -238,17 +238,21 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label
             for exp_id in pair:
                 if configs[exp_id][var_to_compare] == value:
                     ordered_pair[i] = exp_id
-        scores = macroavgs[ordered_pair[0]][label_to_show]
+        for _, labels in macroavgs.items():
+            for label in labels.keys():
+                if label.startswith(target_label):
+                    target_label = label
+        scores = macroavgs[ordered_pair[0]][target_label]
         data = defaultdict(list)
         metric_list = ['Avg Accuracy', 'Avg Precision', 'Avg Recall', 'Avg F1-Score']
         for i, exp_id in enumerate(ordered_pair):
             for metric, score in scores.items():
-                if label_to_show in macroavgs[exp_id]:
-                    data[exp_id].append(macroavgs[exp_id][label_to_show][metric])
+                if target_label in macroavgs[exp_id]:
+                    data[exp_id].append(macroavgs[exp_id][target_label][metric])
                     if 'preci' in metric.lower():
-                        all_ps[i].append(macroavgs[exp_id][label_to_show][metric])
+                        all_ps[i].append(macroavgs[exp_id][target_label][metric])
                     if 'recal' in metric.lower():
-                        all_rs[i].append(macroavgs[exp_id][label_to_show][metric])
+                        all_rs[i].append(macroavgs[exp_id][target_label][metric])
                 else:
                     data[exp_id].append(0.0)
         data = dict(data)
@@ -269,7 +273,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label
 
         # Add some text for labels, title and custom x-axis tick labels, etc.
         ax.set_ylabel('Score')
-        ax.set_title(str(label_to_show))
+        ax.set_title(str(target_label))
         ax.set_xticks(x + width*(len(data)-1)/2, metric_list)
         ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5))
         ax.set_ylim(0.0, 1.15)
@@ -298,7 +302,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label
 
     if not interactive_plots:
         html += '</body></html>'
-        with open(f'results-comparison-{var_to_compare}-{label_to_show}.html', 'w') as f:
+        with open(f'results-comparison-{var_to_compare}-{target_label}.html', 'w') as f:
             f.write(html)
 
 

From 1d77c5e31039c4e3e83be90469942a5da15eea6f Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Thu, 17 Oct 2024 19:18:39 -0400
Subject: [PATCH 7/9] added special value to 2-d render bar charts per label to
 see_res script

---
 scripts/see_results.py | 166 +++++++++++++++++++++--------------------
 1 file changed, 86 insertions(+), 80 deletions(-)

diff --git a/scripts/see_results.py b/scripts/see_results.py
index a04c60a..e282762 100644
--- a/scripts/see_results.py
+++ b/scripts/see_results.py
@@ -6,17 +6,12 @@
 from collections import defaultdict
 from io import BytesIO
 from itertools import product
-from statistics import mean
 
 import matplotlib.pyplot as plt
 import numpy as np
 import yaml
 
 
-# list of bins
-# Since the bins parameters are too long to print or show on the plot, they are numbered by index.
-
-
 def process_kfold_validation_results(directory):
     """
     THIS FUNCTION IS OUTDATED since we no longer actively use k-fold validation.
@@ -178,6 +173,64 @@ def get_labels(macroavgs):
     return list(labels)
 
 
+def plot_bar_graphs(axis, exp_group, score_dict, config_dict, target_label, target_var, var_vals, colorscheme):
+    # For each pair, form a data dictionary as data = { ID1: [accuracy, precision, recall, f1], ...}
+    # and plot a bar graph
+    # re-order the pair to show the variable values in the same order as in the grid
+    ordered_group = [None] * len(var_vals)
+    for i, value in enumerate(var_vals):
+        for exp_id in exp_group:
+            if config_dict[exp_id][target_var] == value:
+                ordered_group[i] = exp_id
+    metrics = score_dict[ordered_group[0]]["Overall"].keys()
+    data = defaultdict(list)
+    metric_list = [f'Avg {m}' for m in metrics]
+    for i, exp_id in enumerate(ordered_group):
+        label_found = False
+        for l in score_dict[exp_id].keys():
+            if l.startswith(target_label):
+                for metric in metrics:
+                    data[exp_id].append(score_dict[exp_id][l][metric])
+                label_found = True
+                break
+        if not label_found:
+            data[exp_id].append(0.0)
+    data = dict(data)
+
+    if len(data) == 0:
+        return None, None
+    # plot a bar graph
+    x = np.arange(len(metric_list))  # the label locations
+    width = 1/(len(data)+1)  # the width of the bars
+    multiplier = 0
+
+    for exp_id, scores in data.items():
+        if len(scores) == 1 and scores[0] == 0.0:
+            continue
+        id_variable = str(target_var) + ": " + str(config_dict[exp_id][target_var])
+        offset = width * multiplier
+        rects = axis.bar(x + offset, scores, width, label=id_variable, color=colorscheme[str(config_dict[exp_id][target_var])])
+        axis.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3)
+        multiplier += 1
+
+    # Add some text for labels, title and custom x-axis tick labels, etc.
+    axis.set_ylabel('Score')
+    axis.set_title(str(target_label))
+    axis.set_xticks(x + width * (len(data) - 1) / 2, metric_list)
+    axis.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5))
+    axis.set_ylim(0.0, 1.15)
+    # Show information on fixed parameters.
+    string_configs = f'{exp_id}\n'
+    for k, v in config_dict[exp_id].items():
+        if k != target_var:
+            string_configs += str(k) + ": " + str(v) + "\n"
+    axis.text(0.99, 0.97, string_configs,
+              verticalalignment='bottom', horizontalalignment='right',
+              transform=axis.transAxes,
+              color='green', fontsize='small')
+    return axis
+
+
 def get_pairs_to_compare(grid, inverse_configs, variable):
     """
     Get a list of pairs(lists of IDs) where all configurations are the same except for one given variable.
@@ -193,7 +246,7 @@ def get_pairs_to_compare(grid, inverse_configs, variable):
     # Form all possible configurations of parameters from grid and store it as a list of dictionary form.
     conf_dicts = [dict(zip(grid.keys(), config)) for config in list(product(*grid.values()))]
 
-    # Get all the possible lists of pairs(IDs) using inverse_configs dictionary and intersection of them for every configuration.
+    # Get all the possible lists of exps using inverse_configs dictionary and intersection of them for every config
     pair_list = []
     for conf_dict in conf_dicts:
         list_of_sets = [inverse_configs[param_name][val] for param_name, val in conf_dict.items()]
@@ -210,99 +263,50 @@ def get_pairs_to_compare(grid, inverse_configs, variable):
     return pair_list
 
 
-def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label_to_show, variable_values, interactive_plots=True):
+def compare_pairs(exp_groups, scores, conf_grid, configs, target_lbl, target_var, var_vals, interactive_plots=True):
     """
     For list of pairs got from get_pairs_to_compare function, compare each pair by plotting bar graphs for given label.
-    :param list_of_pairs: got from get_pairs_to_compare function for given variable
-    :param macroavgs:
-    :param configs:
-    :param grid:
-    :param var_to_compare:
-    :param target_label: User choice of label (including overall) to show scores in the graph. 
+    :param exp_groups: got from get_pairs_to_compare function for given variable
+    :param scores: PRF scores from each experiment configuration
+    :param conf_grid: grid of configurations used in this experiment
+    :param configs: actual configurations used in this experiment
+    :param target_lbl: User choice of label (including overall) to show scores in the graph. 
+                       a special value `all` will generate plots for all "normalized" labels (put them horizontally)
+    :param target_var: configuration key name to use as a variable to compare, all other keys are fixed.
+    :param var_vals: list of values for the variable to compare
+    :param interactive_plots: flag to show plots in realtime. If false, the program will save all the plots in a html
     """
 
     # Form parameter to color dictionary for consistency in color across all pairs
-    param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(grid[var_to_compare]))
+    param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(conf_grid[target_var]))
 
     html = '<html><head><title>Comparison of pairs</title></head><body>'
 
     # For each pair, form a data dictionary as data = { ID1: [accuracy, precision, recall, f1], ...}
     # and plot a bar graph
-    fig, ax = plt.subplots()
-    all_ps = [[] for _ in range(len(list_of_pairs[0]))]
-    all_rs = [[] for _ in range(len(list_of_pairs[0]))]
-    for pair in list_of_pairs:
-        # re-order the pair to show the variable values in the same order as in the grid
-        ordered_pair = [None] * len(variable_values)
-        for i, value in enumerate(variable_values):
-            for exp_id in pair:
-                if configs[exp_id][var_to_compare] == value:
-                    ordered_pair[i] = exp_id
-        for _, labels in macroavgs.items():
-            for label in labels.keys():
-                if label.startswith(target_label):
-                    target_label = label
-        scores = macroavgs[ordered_pair[0]][target_label]
-        data = defaultdict(list)
-        metric_list = ['Avg Accuracy', 'Avg Precision', 'Avg Recall', 'Avg F1-Score']
-        for i, exp_id in enumerate(ordered_pair):
-            for metric, score in scores.items():
-                if target_label in macroavgs[exp_id]:
-                    data[exp_id].append(macroavgs[exp_id][target_label][metric])
-                    if 'preci' in metric.lower():
-                        all_ps[i].append(macroavgs[exp_id][target_label][metric])
-                    if 'recal' in metric.lower():
-                        all_rs[i].append(macroavgs[exp_id][target_label][metric])
-                else:
-                    data[exp_id].append(0.0)
-        data = dict(data)
-
-        if len(data) == 0:
-            continue
-        # plot a bar graph
-        x = np.arange(len(metric_list))  # the label locations
-        width = 1/(len(data)+1)  # the width of the bars
-        multiplier = 0
-
-        for exp_id, scores in data.items():
-            id_variable = str(var_to_compare) + ": " + str(configs[exp_id][var_to_compare])
-            offset = width * multiplier
-            rects = ax.bar(x + offset, scores, width, label=id_variable, color=param_to_color[str(configs[exp_id][var_to_compare])])
-            ax.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3)
-            multiplier += 1
-
-        # Add some text for labels, title and custom x-axis tick labels, etc.
-        ax.set_ylabel('Score')
-        ax.set_title(str(target_label))
-        ax.set_xticks(x + width*(len(data)-1)/2, metric_list)
-        ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5))
-        ax.set_ylim(0.0, 1.15)
-        # Show information on fixed parameters.
-        configs[exp_id].pop(var_to_compare)
-        string_configs = f'{exp_id}\n'
-        for k, v in configs[exp_id].items():
-            string_configs += str(k) + ": " + str(v) + "\n"
-        ax.text(0.99, 0.97, string_configs,
-                verticalalignment='bottom', horizontalalignment='right',
-                transform=ax.transAxes,
-                color='green', fontsize='small')
+    for group in exp_groups:
+        if target_lbl == 'all':
+            interested_lbls = "Ba Sl Ch Cr".split()
+            fig, axes = plt.subplots(1, len(interested_lbls), figsize=(45, 5), sharex=True, sharey=True)
+            plt.subplots_adjust(wspace=1)
+            for ax, lbl in zip(np.ravel(axes), interested_lbls):
+                plot_bar_graphs(ax, group, scores, configs, lbl, target_var, var_vals, param_to_color)
+        else:
+            fig, ax = plt.subplots()
+            plot_bar_graphs(ax, group, scores, configs, target_lbl, target_var, var_vals, param_to_color)
 
         if interactive_plots:
             plt.show()
         else:
             temp_io_stream = BytesIO()
             fig.savefig(temp_io_stream, format='png', bbox_inches='tight')
-            html += f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>'
+            htmlized = f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>'
+            html += htmlized
         plt.cla()
-    for i, var_val in enumerate(variable_values):
-        if interactive_plots:
-            print(f'{var_val}\t{round(mean(all_ps[i]), 4)}\t{round(mean(all_rs[i]), 4)}')
-        else:
-            html += f'<p>{var_val}\t{round(mean(all_ps[i]), 4)}\t{round(mean(all_rs[i]), 4)}</p>'
 
     if not interactive_plots:
         html += '</body></html>'
-        with open(f'results-comparison-{var_to_compare}-{target_label}.html', 'w') as f:
+        with open(f'results-comparison-{target_var}-{target_lbl}.html', 'w') as f:
             f.write(html)
 
 
@@ -384,6 +388,7 @@ def user_input_label(label_list):
     else:
         configs, macroavgs = process_fixed_validation_results(args.directory, args.negativelabel)
     label_list = get_labels(macroavgs)
+    label_list.append('all')
     inverse_configs = get_inverse_configs(configs)
     grid = get_grid(configs)
     if args.config_key is None:
@@ -402,4 +407,5 @@ def user_input_label(label_list):
     variable_values = sorted(grid[choice_variable].copy())
     list_of_pairs = get_pairs_to_compare(grid.copy(), inverse_configs, choice_variable)
     # Show the comparison results of pairs in bar graphs
-    compare_pairs(list_of_pairs, macroavgs, configs.copy(), grid, choice_variable, choice_label, variable_values, interactive_plots=args.interactive_plots)
+    compare_pairs(list_of_pairs, macroavgs, grid, configs.copy(), choice_label, choice_variable, variable_values,
+                  interactive_plots=args.interactive_plots)

From 328aa4d5dca60a61fbb9384fe8724af7383e334f Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Fri, 25 Oct 2024 14:04:53 -0400
Subject: [PATCH 8/9] updated see_res script to handle prebin and no-prebin
 altogether

---
 scripts/see_results.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/scripts/see_results.py b/scripts/see_results.py
index e282762..6a63a5b 100644
--- a/scripts/see_results.py
+++ b/scripts/see_results.py
@@ -173,6 +173,15 @@ def get_labels(macroavgs):
     return list(labels)
 
 
+def find_best_matching_label(target_label, existing_labels):
+    """
+    Find the best base label that matches the target label, given a target string. 
+    """
+    for i in range(0, len(target_label)):
+        
+        if target_label[:i] in existing_labels:
+            return target_label[:i]
+
 def plot_bar_graphs(axis, exp_group, score_dict, config_dict, target_label, target_var, var_vals, colorscheme):
     # For each pair, form a data dictionary as data = { ID1: [accuracy, precision, recall, f1], ...}
     # and plot a bar graph
@@ -187,6 +196,11 @@ def plot_bar_graphs(axis, exp_group, score_dict, config_dict, target_label, targ
     metric_list = [f'Avg {m}' for m in metrics]
     for i, exp_id in enumerate(ordered_group):
         label_found = False
+        existing_labels = list(score_dict[exp_id].keys())
+        if all(len(x) == 1 for x in existing_labels):
+            # meaning it's `nobinning`, so we just use some manual mapping
+            data[exp_id].append(score_dict[exp_id][l][metric])
+
         for l in score_dict[exp_id].keys():
             if l.startswith(target_label):
                 for metric in metrics:

From e3add4a006210a68cc9b30f9b2408cc964a4e5a6 Mon Sep 17 00:00:00 2001
From: Keigh Rim <keigh.rim@gmail.com>
Date: Fri, 25 Oct 2024 14:43:47 -0400
Subject: [PATCH 9/9] prebinning experiment done, and decided to do no-binning

---
 modeling/gridsearch.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modeling/gridsearch.py b/modeling/gridsearch.py
index 111302c..91136eb 100644
--- a/modeling/gridsearch.py
+++ b/modeling/gridsearch.py
@@ -185,7 +185,9 @@
     }
 }
 
-prebin = list(binning_schemes.keys())
+# for single binning configuration, just use the binning dict
+# for multiple binning configurations (for experimental reasons), use the binning scheme names (str)
+prebin = [nobining]
 
 param_keys = ['split_size', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 'block_guids_train', 'block_guids_valid', 'prebin']
 l = locals()