From 3edba034b603f62b9ff6373281b9c62375b09ac0 Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Wed, 16 Oct 2024 12:22:08 -0400 Subject: [PATCH 1/9] fixed error in the full label list (was missing `U`) --- modeling/__init__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modeling/__init__.py b/modeling/__init__.py index 7d4f9a5..8f7d265 100644 --- a/modeling/__init__.py +++ b/modeling/__init__.py @@ -2,10 +2,14 @@ positive_label = '+' # full typology from https://github.com/clamsproject/app-swt-detection/issues/1 -FRAME_TYPES = ["B", "S", "W", "L", "O", - "M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"] -FRAME_TYPES_WITH_SUBTYPES = ["B", "SH", "SC", "SD", "SB", "SG", "W", "L", "O", - "M", "I", "N", "E", "P", "Y", "K", "G", "T", "F", "C", "R"] +FRAME_TYPES = [ + "B", "S", "I", "C", "R", "M", "O", "W", + "N", "Y", "U", "K", + "L", "G", "F", "E", "T", + "P", +] +FRAME_TYPES_WITH_SUBTYPES = FRAME_TYPES.copy() + ['SH', 'SC', 'SD', 'SB', 'SG'] +FRAME_TYPES_WITH_SUBTYPES.remove('S') # These are time frames that are typically static (that is, the text does not # move around or change as with rolling credits). These are frame names after From 30b74f901011afab538ad4c1f250fcba1a6643ad Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Wed, 16 Oct 2024 12:25:53 -0400 Subject: [PATCH 2/9] added config key for pre-binning (as `prebin` instead of old `bins`) --- modeling/classify.py | 2 +- modeling/gridsearch.py | 11 +++++++---- modeling/train.py | 30 ++++++++++++++++++------------ 3 files changed, 26 insertions(+), 17 deletions(-) diff --git a/modeling/classify.py b/modeling/classify.py index 3d7088b..b8c9fb9 100644 --- a/modeling/classify.py +++ b/modeling/classify.py @@ -20,7 +20,7 @@ def __init__(self, model_stem, logger_name=None): model_config_file = f"{model_stem}.yml" model_checkpoint = f"{model_stem}.pt" model_config = yaml.safe_load(open(model_config_file)) - self.training_labels = train.pretraining_binned_label(model_config) + self.training_labels = train.get_prebinned_labelset(model_config) self.featurizer = data_loader.FeatureExtractor( img_enc_name=model_config["img_enc_name"], pos_length=model_config.get("pos_length", 0), diff --git a/modeling/gridsearch.py b/modeling/gridsearch.py index 737b2bc..cb8a00e 100644 --- a/modeling/gridsearch.py +++ b/modeling/gridsearch.py @@ -119,12 +119,15 @@ ] + guids_with_challenging_images, # also block the challenging images # {"cpb-aacip-254-75r7szdz"}, # effectively no block except ] -# we no longer use bins, keeping this just for historical reference -# bins = [{'pre': {'slate': ['S'], 'chyron': ['I', 'N', 'Y'], 'credit': ['C']}}] +nobinning = {t: t for t in modeling.FRAME_TYPES} +binning_schemes = { + "nobinning": nobinning, +} -param_keys = ['split_size', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 'block_guids_train', 'block_guids_valid'] +prebin = list(binning_schemes.keys()) + +param_keys = ['split_size', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 'block_guids_train', 'block_guids_valid', 'prebin'] l = locals() configs = [] for vals in itertools.product(*[l[key] for key in param_keys]): configs.append(dict(zip(param_keys, vals))) - diff --git a/modeling/train.py b/modeling/train.py index 33129f3..705c97b 100644 --- a/modeling/train.py +++ b/modeling/train.py @@ -56,12 +56,12 @@ def get_guids(data_dir): def pretraining_bin(label, specs): - if specs is None or "bins" not in specs: + if specs is None or "prebin" not in specs: return int_encode(label) - for i, ptbin in enumerate(specs["bins"].values()): + for i, ptbin in enumerate(specs["prebin"].values()): if label and label in ptbin: return i - return len(specs["bins"].keys()) + return len(specs["prebin"].keys()) def load_config(config): @@ -161,11 +161,12 @@ def train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # the number of labels (after "pre"-binning) - if configs and 'bins' in configs: - num_labels = len(configs['bins'].keys()) + 1 + if configs and 'prebin' in configs: + num_labels = len(configs['prebin'].keys()) + 1 else: num_labels = len(FRAME_TYPES) + 1 - + labelset = get_prebinned_labelset(configs) + # if split_size > #videos, nothing to "hold-out". Hence, single fold training and validate against the "fixed" set if configs['split_size'] >= len(train_all_guids): valid_guids = gridsearch.guids_for_fixed_validation_set @@ -181,7 +182,7 @@ def train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H loss, device, train_loader, configs) torch.save(model.state_dict(), export_model_file) p_config = Path(f'{base_fname}.yml') - validate(model, valid_loader, pretraining_binned_label(config), export_fname=f'{base_fname}.csv') + validate(model, valid_loader, labelset, export_fname=f'{base_fname}.csv') export_train_config(config_file, configs, p_config) return # otherwise, do k-fold training with k's size = split_size @@ -206,7 +207,7 @@ def train(indir, outdir, config_file, configs, train_id=time.strftime("%Y%m%d-%H get_net(train.feat_dim, num_labels, configs['num_layers'], configs['dropouts']), loss, device, train_loader, configs) torch.save(model.state_dict(), export_model_file) - p, r, f = validate(model, valid_loader, pretraining_binned_label(config), export_fname=export_csv_file) + p, r, f = validate(model, valid_loader, labelset, export_fname=export_csv_file) val_set_spec.append(validation_guids) p_scores.append(p) r_scores.append(r) @@ -247,9 +248,9 @@ def export_kfold_results(trial_specs, p_scores, r_scores, f_scores, p_results): out.write(f'\trecall = {sum(r_scores) / len(r_scores)}\n') -def pretraining_binned_label(config): - if 'bins' in config: - return list(config["bins"].keys()) + [modeling.negative_label] +def get_prebinned_labelset(config): + if 'prebin' in config: + return list(config["prebin"].keys()) + [modeling.negative_label] return modeling.FRAME_TYPES + [modeling.negative_label] @@ -309,8 +310,13 @@ def train_model(model, loss_fn, device, train_loader, configs): for config in configs: timestamp = time.strftime("%Y%m%d-%H%M%S") backbonename = config['img_enc_name'] + if isinstance(config['prebin'], str): + prebin_name = config['prebin'] + config['prebin'] = gridsearch.binning_schemes[prebin_name] + else: + prebin_name = '' positionalencoding = "pos" + ("F" if config["pos_vec_coeff"] == 0 else "T") train( indir=args.indir, outdir=args.outdir, config_file=args.config, configs=config, - train_id='.'.join([timestamp, backbonename, positionalencoding]) + train_id='.'.join(filter(None, [timestamp, backbonename, prebin_name, positionalencoding])) ) From 5374c7768ef7c9a26abd339b0050cfd3f132eccd Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Wed, 16 Oct 2024 12:26:40 -0400 Subject: [PATCH 3/9] added prebin configs for grid search experiments and updated see_results script for new output format --- modeling/gridsearch.py | 61 +++++++++++++++++++++++++++++++++++++ scripts/see_results.py | 69 +++++++++++++++++++++++++++++------------- 2 files changed, 109 insertions(+), 21 deletions(-) diff --git a/modeling/gridsearch.py b/modeling/gridsearch.py index cb8a00e..111302c 100644 --- a/modeling/gridsearch.py +++ b/modeling/gridsearch.py @@ -120,8 +120,69 @@ # {"cpb-aacip-254-75r7szdz"}, # effectively no block except ] nobinning = {t: t for t in modeling.FRAME_TYPES} + +label_bins = { + "Bars": ["B"], + "Chyron-other": ["Y", "U", "K"], + "Chyron-person": ["I", "N"], + "Credits": ["C", "R"], + "Main": ["M"], + "Opening": ["O", "W"], + "Slate": ["S", "S:H", "S:C", "S:D", "S:B", "S:G"], + "Other-text-sm": ["L", "G", "F", "E", "T"], + "Other-text-md": ["M", "O", "W", "L", "G", "F", "E", "T"], + "Other-text-lg": ["M", "O", "W", "Y", "U", "K", "L", "G", "F", "E", "T"], +} + binning_schemes = { "nobinning": nobinning, + + "strict": { + "Bars": label_bins["Bars"], + "Slate": label_bins["Slate"], + "Chyron-person": label_bins["Chyron-person"], + "Credits": label_bins["Credits"], + "Main": label_bins["Main"], + "Opening": label_bins["Opening"], + "Chyron-other": label_bins["Chyron-other"], + "Other-text": label_bins["Other-text-sm"], + }, + + "simple": { + "Bars": label_bins["Bars"], + "Slate": label_bins["Slate"], + "Chyron-person": label_bins["Chyron-person"], + "Credits": label_bins["Credits"], + "Other-text": label_bins["Other-text-lg"], + }, + + "relaxed": { + "Bars": label_bins["Bars"], + "Slate": label_bins["Slate"], + "Chyron": label_bins["Chyron-other"] + label_bins["Chyron-person"], + "Credits": label_bins["Credits"], + "Other-text": label_bins["Other-text-md"], + }, + + "binary-bars": { + "Bars": label_bins["Bars"], + }, + + "binary-slate": { + "Slate": label_bins["Slate"], + }, + + "binary-chyron-strict": { + "Chyron-person": label_bins["Chyron-person"], + }, + + "binary-chyron-relaxed": { + "Chyron": label_bins["Chyron-other"] + label_bins["Chyron-person"], + }, + + "binary-credits": { + "Credits": label_bins["Credits"], + } } prebin = list(binning_schemes.keys()) diff --git a/scripts/see_results.py b/scripts/see_results.py index 3b36a16..a95e330 100644 --- a/scripts/see_results.py +++ b/scripts/see_results.py @@ -2,6 +2,7 @@ import base64 import csv import os +import pathlib from collections import defaultdict from io import BytesIO from itertools import product @@ -11,28 +12,16 @@ import numpy as np import yaml + # list of bins # Since the bins parameters are too long to print or show on the plot, they are numbered by index. -bins = [ - {'pre': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'other-opening': ['W', 'L', 'O', 'M'], - 'chyron': ['I', 'N', 'Y'], 'not-chyron': ['P', 'K', 'G', 'T', 'F'], 'credits': ['C'], 'copyright': ['R']}, - 'post': {'bars': ['bars'], 'slate': ['slate'], 'chyron': ['chyron'], 'credits': ['credits']}}, - {'post': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'chyron': ['I', 'N', 'Y'], 'credits': ['C']}}, - - - {'pre': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'warning': ['W'], 'opening': ['O'], - 'main_title': ['M'], 'chyron': ['I'], 'credits': ['C'], 'copyright': ['R']}, - 'post': {'bars': ['bars'], 'slate': ['slate'], 'chyron': ['chyron'], 'credits': ['credits']}}, - {'post': {'bars': ['B'], 'slate': ['S', 'S:H', 'S:C', 'S:D', 'S:G'], 'chyron': ['I'], 'credits': ['C']}}, - - {'pre': {'chyron': ['I', 'N', 'Y'], 'person-not-chyron': ['E', 'P', 'K']}, 'post': {'chyron': ['chyron']}}, - {'post': {'chyron': ['I', 'N', 'Y']}}, -] - -def get_configs_and_macroavgs(directory): +def process_kfold_validation_results(directory): """ + THIS FUNCTION IS OUTDATED since we no longer actively use k-fold validation. + Hence, the code is not compatible with new file naming convention and structure for new "fixed" validateion experiment results. + 1. Iterate over all files in the directory 2. Get configuration information 3. Calculate the averages of accuracy, precision, recall, and f1-score for each label for each set of k_fold results. @@ -92,6 +81,36 @@ def get_configs_and_macroavgs(directory): return configs, macro_avgs +def process_fixed_validation_results(directory): + configs = {} + scores = {} + for csv_fname in pathlib.Path(directory).glob('*.csv'): + key = csv_fname.stem + timestamp, bb_name, bin_name, posenc = key.split('.') + posenc = posenc[-1] == 'T' + score = defaultdict(lambda: defaultdict(float)) + with open(csv_fname, "r") as csv_f: + csv_reader = csv.DictReader(csv_f) + for row in csv_reader: + if 'Confusion Matrix' in row['Model_Name'] or not row: + break + score[row['Label']]['Accuracy'] += float(row['Accuracy']) + score[row['Label']]['Precision'] += float(row['Precision']) + score[row['Label']]['Recall'] += float(row['Recall']) + score[row['Label']]['F1-Score'] += float(row['F1-Score']) + config_fname = csv_fname.with_suffix('.yml') + with open(config_fname, "r") as yml_f: + data = yaml.safe_load(yml_f) + # delete unnecessary items + data['block_guids_train'] = f"{len(data['block_guids_train'])}@{hash(str(sorted(data['block_guids_train'])))}" + data['block_guids_valid'] = f"{len(data['block_guids_valid'])}@{hash(str(sorted(data['block_guids_valid'])))}" + del data['split_size'] + data['prebin'] = bin_name + data['posenc'] = posenc + configs[key] = data + scores[key] = score + return configs, scores + def get_inverse_configs(configs): """ @@ -118,10 +137,12 @@ def get_grid(configs): for k, v in value.items(): grid[k].add(v) + refined_grid = {} for key, val in grid.items(): - grid[key] = list(val) + if len(val) > 1: + refined_grid[key] = list(val) - return grid + return refined_grid def get_labels(macroavgs): @@ -133,6 +154,7 @@ def get_labels(macroavgs): labels = set() for key, val in macroavgs.items(): labels.update(val.keys()) + labels.remove('-') return list(labels) @@ -162,7 +184,8 @@ def get_pairs_to_compare(grid, inverse_configs, variable): for s in list_of_sets[1:]: intersection_result = intersection_result.intersection(s) - pair_list.append(list(intersection_result)) + if len(intersection_result) > 0: + pair_list.append(list(intersection_result)) return pair_list @@ -326,7 +349,11 @@ def user_input_label(label_list): args = parser.parse_args() # Get necessary dictionaries and lists for processing the comparison. - configs, macroavgs = get_configs_and_macroavgs(args.directory) + is_kfold = bool(any(pathlib.Path(args.directory).glob("*kfold*.csv"))) + if is_kfold: + configs, macroavgs = process_kfold_validation_results(args.directory) + else: + configs, macroavgs = process_fixed_validation_results(args.directory) label_list = get_labels(macroavgs) inverse_configs = get_inverse_configs(configs) grid = get_grid(configs) From 985e51ef4f04c757669fdb8435e271a14789d874 Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Thu, 17 Oct 2024 12:27:20 -0400 Subject: [PATCH 4/9] refactoring see_res script --- scripts/see_results.py | 112 +++++++++++++++++++++++------------------ 1 file changed, 64 insertions(+), 48 deletions(-) diff --git a/scripts/see_results.py b/scripts/see_results.py index a95e330..b88d9bc 100644 --- a/scripts/see_results.py +++ b/scripts/see_results.py @@ -59,10 +59,7 @@ def process_kfold_validation_results(directory): with open(file, "r") as f: data = yaml.safe_load(f) # delete unnecessary items - data['block_guids_train'] = f"{len(data['block_guids_train'])}@{hash(str(sorted(data['block_guids_train'])))}" - data['block_guids_valid'] = f"{len(data['block_guids_valid'])}@{hash(str(sorted(data['block_guids_valid'])))}" - del data['split_size'] - configs[key] = data + configs[key] = clean_config(data) # Calculate macro averages for k, v in macro_avg.items(): @@ -81,6 +78,29 @@ def process_kfold_validation_results(directory): return configs, macro_avgs + +def clean_config(config, prebin_name=None): + """ + Clean up the configuration found in a yml file with more human friendly names. + """ + config['block_guids_train'] = f'{len(config["block_guids_train"])}@{hash(str(sorted(config["block_guids_train"])))}' + config['block_guids_valid'] = f'{len(config["block_guids_valid"])}@{hash(str(sorted(config["block_guids_valid"])))}' + + # a short string name of the prebin can be passed as an argument or can be generated from dictionary in the config + if prebin_name: + config['prebin'] = prebin_name + elif 'prebin' in config: + config['prebin'] = f'{len(config["prebin"])}way@{hash(str(config["prebin"]))}' + else: + config['prebin'] = 'None' + + config['posenc'] = config['pos_vec_coeff'] > 0 + del config['pos_vec_coeff'] + + del config['split_size'] + return config + + def process_fixed_validation_results(directory): configs = {} scores = {} @@ -100,14 +120,10 @@ def process_fixed_validation_results(directory): score[row['Label']]['F1-Score'] += float(row['F1-Score']) config_fname = csv_fname.with_suffix('.yml') with open(config_fname, "r") as yml_f: - data = yaml.safe_load(yml_f) + config = yaml.safe_load(yml_f) + config = clean_config(config, bin_name) # delete unnecessary items - data['block_guids_train'] = f"{len(data['block_guids_train'])}@{hash(str(sorted(data['block_guids_train'])))}" - data['block_guids_valid'] = f"{len(data['block_guids_valid'])}@{hash(str(sorted(data['block_guids_valid'])))}" - del data['split_size'] - data['prebin'] = bin_name - data['posenc'] = posenc - configs[key] = data + configs[key] = config scores[key] = score return configs, scores @@ -190,19 +206,19 @@ def get_pairs_to_compare(grid, inverse_configs, variable): return pair_list -def compare_pairs(list_of_pairs, macroavgs, configs, grid, variable, label_to_show, variable_values, interactive_plots=True): +def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label_to_show, variable_values, interactive_plots=True): """ For list of pairs got from get_pairs_to_compare function, compare each pair by plotting bar graphs for given label. :param list_of_pairs: got from get_pairs_to_compare function for given variable :param macroavgs: :param configs: :param grid: - :param variable: - :param label_to_show: User choice of label (including overall) to show scores in the graph. + :param var_to_compare: + :param label_to_show: User choice of label (including overall) to show scores in the graph. """ # Form parameter to color dictionary for consistency in color across all pairs - param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(grid[variable])) + param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(grid[var_to_compare])) html = '<html><head><title>Comparison of pairs</title></head><body>' @@ -216,7 +232,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, variable, label_to_sh ordered_pair = [None] * len(variable_values) for i, value in enumerate(variable_values): for exp_id in pair: - if configs[exp_id][variable] == value: + if configs[exp_id][var_to_compare] == value: ordered_pair[i] = exp_id scores = macroavgs[ordered_pair[0]][label_to_show] data = defaultdict(list) @@ -233,42 +249,42 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, variable, label_to_sh data[exp_id].append(0.0) data = dict(data) + if len(data) == 0: + continue # plot a bar graph x = np.arange(len(metric_list)) # the label locations - l = len(data) # length of data (it varies by set) - width = 1/(l+1) # the width of the bars + width = 1/(len(data)+1) # the width of the bars multiplier = 0 - if l != 0: - for exp_id, scores in data.items(): - id_variable = str(variable) + ": " + str(configs[exp_id][variable]) - offset = width * multiplier - rects = ax.bar(x + offset, scores, width, label=id_variable, color=param_to_color[str(configs[exp_id][variable])]) - ax.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3) - multiplier += 1 - - # Add some text for labels, title and custom x-axis tick labels, etc. - ax.set_ylabel('Score') - ax.set_title(str(label_to_show)) - ax.set_xticks(x + width*(l-1)/2, metric_list) - ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5)) - ax.set_ylim(0.0, 1.15) - # Show information on fixed parameters. - configs[exp_id].pop(variable) - string_configs = "" - for k, v in configs[exp_id].items(): - string_configs += str(k) + ": " + str(v) + "\n" - ax.text(0.99, 0.97, string_configs, - verticalalignment='bottom', horizontalalignment='right', - transform=ax.transAxes, - color='green', fontsize='small') - - if interactive_plots: - plt.show() - else: - temp_io_stream = BytesIO() - fig.savefig(temp_io_stream, format='png', bbox_inches='tight') - html += f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>' + for exp_id, scores in data.items(): + id_variable = str(var_to_compare) + ": " + str(configs[exp_id][var_to_compare]) + offset = width * multiplier + rects = ax.bar(x + offset, scores, width, label=id_variable, color=param_to_color[str(configs[exp_id][var_to_compare])]) + ax.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3) + multiplier += 1 + + # Add some text for labels, title and custom x-axis tick labels, etc. + ax.set_ylabel('Score') + ax.set_title(str(label_to_show)) + ax.set_xticks(x + width*(len(data)-1)/2, metric_list) + ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5)) + ax.set_ylim(0.0, 1.15) + # Show information on fixed parameters. + configs[exp_id].pop(var_to_compare) + string_configs = f'{exp_id}\n' + for k, v in configs[exp_id].items(): + string_configs += str(k) + ": " + str(v) + "\n" + ax.text(0.99, 0.97, string_configs, + verticalalignment='bottom', horizontalalignment='right', + transform=ax.transAxes, + color='green', fontsize='small') + + if interactive_plots: + plt.show() + else: + temp_io_stream = BytesIO() + fig.savefig(temp_io_stream, format='png', bbox_inches='tight') + html += f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>' plt.cla() for i, var_val in enumerate(variable_values): if interactive_plots: From be8b40bf457527cc1aaa3418c0170511149b1b2b Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Thu, 17 Oct 2024 12:37:48 -0400 Subject: [PATCH 5/9] now can ignore negative label when comparing exp results --- scripts/see_results.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/see_results.py b/scripts/see_results.py index b88d9bc..b0b0f1c 100644 --- a/scripts/see_results.py +++ b/scripts/see_results.py @@ -101,7 +101,7 @@ def clean_config(config, prebin_name=None): return config -def process_fixed_validation_results(directory): +def process_fixed_validation_results(directory, include_negative_label=False): configs = {} scores = {} for csv_fname in pathlib.Path(directory).glob('*.csv'): @@ -114,6 +114,9 @@ def process_fixed_validation_results(directory): for row in csv_reader: if 'Confusion Matrix' in row['Model_Name'] or not row: break + # ignore negative class + if row['Label'] == '-' and not include_negative_label: + continue score[row['Label']]['Accuracy'] += float(row['Accuracy']) score[row['Label']]['Precision'] += float(row['Precision']) score[row['Label']]['Recall'] += float(row['Recall']) @@ -170,7 +173,8 @@ def get_labels(macroavgs): labels = set() for key, val in macroavgs.items(): labels.update(val.keys()) - labels.remove('-') + if '-' in labels: + labels.remove('-') return list(labels) @@ -294,7 +298,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label if not interactive_plots: html += '</body></html>' - with open(f'results-comparison-{variable}-{label_to_show}.html', 'w') as f: + with open(f'results-comparison-{var_to_compare}-{label_to_show}.html', 'w') as f: f.write(html) @@ -361,6 +365,11 @@ def user_input_label(label_list): action='store_true', help='Flag to show plots in interactive mode. If not set, the program will save all the plots in a html file.' ) + parser.add_argument( + '-n', '--negativelabel', + action='store_true', + help='Flag to include the negative label when averaging scores.' + ) args = parser.parse_args() @@ -369,7 +378,7 @@ def user_input_label(label_list): if is_kfold: configs, macroavgs = process_kfold_validation_results(args.directory) else: - configs, macroavgs = process_fixed_validation_results(args.directory) + configs, macroavgs = process_fixed_validation_results(args.directory, args.negativelabel) label_list = get_labels(macroavgs) inverse_configs = get_inverse_configs(configs) grid = get_grid(configs) From 061c4745ce34e9413c2bc4584e1bc6da5d863527 Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Thu, 17 Oct 2024 16:58:37 -0400 Subject: [PATCH 6/9] see_res now use prefix-based label targetting instead of strict str match --- scripts/see_results.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/scripts/see_results.py b/scripts/see_results.py index b0b0f1c..a04c60a 100644 --- a/scripts/see_results.py +++ b/scripts/see_results.py @@ -218,7 +218,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label :param configs: :param grid: :param var_to_compare: - :param label_to_show: User choice of label (including overall) to show scores in the graph. + :param target_label: User choice of label (including overall) to show scores in the graph. """ # Form parameter to color dictionary for consistency in color across all pairs @@ -238,17 +238,21 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label for exp_id in pair: if configs[exp_id][var_to_compare] == value: ordered_pair[i] = exp_id - scores = macroavgs[ordered_pair[0]][label_to_show] + for _, labels in macroavgs.items(): + for label in labels.keys(): + if label.startswith(target_label): + target_label = label + scores = macroavgs[ordered_pair[0]][target_label] data = defaultdict(list) metric_list = ['Avg Accuracy', 'Avg Precision', 'Avg Recall', 'Avg F1-Score'] for i, exp_id in enumerate(ordered_pair): for metric, score in scores.items(): - if label_to_show in macroavgs[exp_id]: - data[exp_id].append(macroavgs[exp_id][label_to_show][metric]) + if target_label in macroavgs[exp_id]: + data[exp_id].append(macroavgs[exp_id][target_label][metric]) if 'preci' in metric.lower(): - all_ps[i].append(macroavgs[exp_id][label_to_show][metric]) + all_ps[i].append(macroavgs[exp_id][target_label][metric]) if 'recal' in metric.lower(): - all_rs[i].append(macroavgs[exp_id][label_to_show][metric]) + all_rs[i].append(macroavgs[exp_id][target_label][metric]) else: data[exp_id].append(0.0) data = dict(data) @@ -269,7 +273,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label # Add some text for labels, title and custom x-axis tick labels, etc. ax.set_ylabel('Score') - ax.set_title(str(label_to_show)) + ax.set_title(str(target_label)) ax.set_xticks(x + width*(len(data)-1)/2, metric_list) ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5)) ax.set_ylim(0.0, 1.15) @@ -298,7 +302,7 @@ def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label if not interactive_plots: html += '</body></html>' - with open(f'results-comparison-{var_to_compare}-{label_to_show}.html', 'w') as f: + with open(f'results-comparison-{var_to_compare}-{target_label}.html', 'w') as f: f.write(html) From 1d77c5e31039c4e3e83be90469942a5da15eea6f Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Thu, 17 Oct 2024 19:18:39 -0400 Subject: [PATCH 7/9] added special value to 2-d render bar charts per label to see_res script --- scripts/see_results.py | 166 +++++++++++++++++++++-------------------- 1 file changed, 86 insertions(+), 80 deletions(-) diff --git a/scripts/see_results.py b/scripts/see_results.py index a04c60a..e282762 100644 --- a/scripts/see_results.py +++ b/scripts/see_results.py @@ -6,17 +6,12 @@ from collections import defaultdict from io import BytesIO from itertools import product -from statistics import mean import matplotlib.pyplot as plt import numpy as np import yaml -# list of bins -# Since the bins parameters are too long to print or show on the plot, they are numbered by index. - - def process_kfold_validation_results(directory): """ THIS FUNCTION IS OUTDATED since we no longer actively use k-fold validation. @@ -178,6 +173,64 @@ def get_labels(macroavgs): return list(labels) +def plot_bar_graphs(axis, exp_group, score_dict, config_dict, target_label, target_var, var_vals, colorscheme): + # For each pair, form a data dictionary as data = { ID1: [accuracy, precision, recall, f1], ...} + # and plot a bar graph + # re-order the pair to show the variable values in the same order as in the grid + ordered_group = [None] * len(var_vals) + for i, value in enumerate(var_vals): + for exp_id in exp_group: + if config_dict[exp_id][target_var] == value: + ordered_group[i] = exp_id + metrics = score_dict[ordered_group[0]]["Overall"].keys() + data = defaultdict(list) + metric_list = [f'Avg {m}' for m in metrics] + for i, exp_id in enumerate(ordered_group): + label_found = False + for l in score_dict[exp_id].keys(): + if l.startswith(target_label): + for metric in metrics: + data[exp_id].append(score_dict[exp_id][l][metric]) + label_found = True + break + if not label_found: + data[exp_id].append(0.0) + data = dict(data) + + if len(data) == 0: + return None, None + # plot a bar graph + x = np.arange(len(metric_list)) # the label locations + width = 1/(len(data)+1) # the width of the bars + multiplier = 0 + + for exp_id, scores in data.items(): + if len(scores) == 1 and scores[0] == 0.0: + continue + id_variable = str(target_var) + ": " + str(config_dict[exp_id][target_var]) + offset = width * multiplier + rects = axis.bar(x + offset, scores, width, label=id_variable, color=colorscheme[str(config_dict[exp_id][target_var])]) + axis.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3) + multiplier += 1 + + # Add some text for labels, title and custom x-axis tick labels, etc. + axis.set_ylabel('Score') + axis.set_title(str(target_label)) + axis.set_xticks(x + width * (len(data) - 1) / 2, metric_list) + axis.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5)) + axis.set_ylim(0.0, 1.15) + # Show information on fixed parameters. + string_configs = f'{exp_id}\n' + for k, v in config_dict[exp_id].items(): + if k != target_var: + string_configs += str(k) + ": " + str(v) + "\n" + axis.text(0.99, 0.97, string_configs, + verticalalignment='bottom', horizontalalignment='right', + transform=axis.transAxes, + color='green', fontsize='small') + return axis + + def get_pairs_to_compare(grid, inverse_configs, variable): """ Get a list of pairs(lists of IDs) where all configurations are the same except for one given variable. @@ -193,7 +246,7 @@ def get_pairs_to_compare(grid, inverse_configs, variable): # Form all possible configurations of parameters from grid and store it as a list of dictionary form. conf_dicts = [dict(zip(grid.keys(), config)) for config in list(product(*grid.values()))] - # Get all the possible lists of pairs(IDs) using inverse_configs dictionary and intersection of them for every configuration. + # Get all the possible lists of exps using inverse_configs dictionary and intersection of them for every config pair_list = [] for conf_dict in conf_dicts: list_of_sets = [inverse_configs[param_name][val] for param_name, val in conf_dict.items()] @@ -210,99 +263,50 @@ def get_pairs_to_compare(grid, inverse_configs, variable): return pair_list -def compare_pairs(list_of_pairs, macroavgs, configs, grid, var_to_compare, label_to_show, variable_values, interactive_plots=True): +def compare_pairs(exp_groups, scores, conf_grid, configs, target_lbl, target_var, var_vals, interactive_plots=True): """ For list of pairs got from get_pairs_to_compare function, compare each pair by plotting bar graphs for given label. - :param list_of_pairs: got from get_pairs_to_compare function for given variable - :param macroavgs: - :param configs: - :param grid: - :param var_to_compare: - :param target_label: User choice of label (including overall) to show scores in the graph. + :param exp_groups: got from get_pairs_to_compare function for given variable + :param scores: PRF scores from each experiment configuration + :param conf_grid: grid of configurations used in this experiment + :param configs: actual configurations used in this experiment + :param target_lbl: User choice of label (including overall) to show scores in the graph. + a special value `all` will generate plots for all "normalized" labels (put them horizontally) + :param target_var: configuration key name to use as a variable to compare, all other keys are fixed. + :param var_vals: list of values for the variable to compare + :param interactive_plots: flag to show plots in realtime. If false, the program will save all the plots in a html """ # Form parameter to color dictionary for consistency in color across all pairs - param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(grid[var_to_compare])) + param_to_color = dict((str(value), f'C{i}') for i, value in enumerate(conf_grid[target_var])) html = '<html><head><title>Comparison of pairs</title></head><body>' # For each pair, form a data dictionary as data = { ID1: [accuracy, precision, recall, f1], ...} # and plot a bar graph - fig, ax = plt.subplots() - all_ps = [[] for _ in range(len(list_of_pairs[0]))] - all_rs = [[] for _ in range(len(list_of_pairs[0]))] - for pair in list_of_pairs: - # re-order the pair to show the variable values in the same order as in the grid - ordered_pair = [None] * len(variable_values) - for i, value in enumerate(variable_values): - for exp_id in pair: - if configs[exp_id][var_to_compare] == value: - ordered_pair[i] = exp_id - for _, labels in macroavgs.items(): - for label in labels.keys(): - if label.startswith(target_label): - target_label = label - scores = macroavgs[ordered_pair[0]][target_label] - data = defaultdict(list) - metric_list = ['Avg Accuracy', 'Avg Precision', 'Avg Recall', 'Avg F1-Score'] - for i, exp_id in enumerate(ordered_pair): - for metric, score in scores.items(): - if target_label in macroavgs[exp_id]: - data[exp_id].append(macroavgs[exp_id][target_label][metric]) - if 'preci' in metric.lower(): - all_ps[i].append(macroavgs[exp_id][target_label][metric]) - if 'recal' in metric.lower(): - all_rs[i].append(macroavgs[exp_id][target_label][metric]) - else: - data[exp_id].append(0.0) - data = dict(data) - - if len(data) == 0: - continue - # plot a bar graph - x = np.arange(len(metric_list)) # the label locations - width = 1/(len(data)+1) # the width of the bars - multiplier = 0 - - for exp_id, scores in data.items(): - id_variable = str(var_to_compare) + ": " + str(configs[exp_id][var_to_compare]) - offset = width * multiplier - rects = ax.bar(x + offset, scores, width, label=id_variable, color=param_to_color[str(configs[exp_id][var_to_compare])]) - ax.bar_label(rects, fmt='%.6s', fontsize='small', rotation='vertical', padding=3) - multiplier += 1 - - # Add some text for labels, title and custom x-axis tick labels, etc. - ax.set_ylabel('Score') - ax.set_title(str(target_label)) - ax.set_xticks(x + width*(len(data)-1)/2, metric_list) - ax.legend(loc='center left', fontsize='small', ncol=1, bbox_to_anchor=(1, 0.5)) - ax.set_ylim(0.0, 1.15) - # Show information on fixed parameters. - configs[exp_id].pop(var_to_compare) - string_configs = f'{exp_id}\n' - for k, v in configs[exp_id].items(): - string_configs += str(k) + ": " + str(v) + "\n" - ax.text(0.99, 0.97, string_configs, - verticalalignment='bottom', horizontalalignment='right', - transform=ax.transAxes, - color='green', fontsize='small') + for group in exp_groups: + if target_lbl == 'all': + interested_lbls = "Ba Sl Ch Cr".split() + fig, axes = plt.subplots(1, len(interested_lbls), figsize=(45, 5), sharex=True, sharey=True) + plt.subplots_adjust(wspace=1) + for ax, lbl in zip(np.ravel(axes), interested_lbls): + plot_bar_graphs(ax, group, scores, configs, lbl, target_var, var_vals, param_to_color) + else: + fig, ax = plt.subplots() + plot_bar_graphs(ax, group, scores, configs, target_lbl, target_var, var_vals, param_to_color) if interactive_plots: plt.show() else: temp_io_stream = BytesIO() fig.savefig(temp_io_stream, format='png', bbox_inches='tight') - html += f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>' + htmlized = f'<p><img src="data:image/png;base64,{base64.b64encode(temp_io_stream.getvalue()).decode("utf-8")}"></p>' + html += htmlized plt.cla() - for i, var_val in enumerate(variable_values): - if interactive_plots: - print(f'{var_val}\t{round(mean(all_ps[i]), 4)}\t{round(mean(all_rs[i]), 4)}') - else: - html += f'<p>{var_val}\t{round(mean(all_ps[i]), 4)}\t{round(mean(all_rs[i]), 4)}</p>' if not interactive_plots: html += '</body></html>' - with open(f'results-comparison-{var_to_compare}-{target_label}.html', 'w') as f: + with open(f'results-comparison-{target_var}-{target_lbl}.html', 'w') as f: f.write(html) @@ -384,6 +388,7 @@ def user_input_label(label_list): else: configs, macroavgs = process_fixed_validation_results(args.directory, args.negativelabel) label_list = get_labels(macroavgs) + label_list.append('all') inverse_configs = get_inverse_configs(configs) grid = get_grid(configs) if args.config_key is None: @@ -402,4 +407,5 @@ def user_input_label(label_list): variable_values = sorted(grid[choice_variable].copy()) list_of_pairs = get_pairs_to_compare(grid.copy(), inverse_configs, choice_variable) # Show the comparison results of pairs in bar graphs - compare_pairs(list_of_pairs, macroavgs, configs.copy(), grid, choice_variable, choice_label, variable_values, interactive_plots=args.interactive_plots) + compare_pairs(list_of_pairs, macroavgs, grid, configs.copy(), choice_label, choice_variable, variable_values, + interactive_plots=args.interactive_plots) From 328aa4d5dca60a61fbb9384fe8724af7383e334f Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Fri, 25 Oct 2024 14:04:53 -0400 Subject: [PATCH 8/9] updated see_res script to handle prebin and no-prebin altogether --- scripts/see_results.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/scripts/see_results.py b/scripts/see_results.py index e282762..6a63a5b 100644 --- a/scripts/see_results.py +++ b/scripts/see_results.py @@ -173,6 +173,15 @@ def get_labels(macroavgs): return list(labels) +def find_best_matching_label(target_label, existing_labels): + """ + Find the best base label that matches the target label, given a target string. + """ + for i in range(0, len(target_label)): + + if target_label[:i] in existing_labels: + return target_label[:i] + def plot_bar_graphs(axis, exp_group, score_dict, config_dict, target_label, target_var, var_vals, colorscheme): # For each pair, form a data dictionary as data = { ID1: [accuracy, precision, recall, f1], ...} # and plot a bar graph @@ -187,6 +196,11 @@ def plot_bar_graphs(axis, exp_group, score_dict, config_dict, target_label, targ metric_list = [f'Avg {m}' for m in metrics] for i, exp_id in enumerate(ordered_group): label_found = False + existing_labels = list(score_dict[exp_id].keys()) + if all(len(x) == 1 for x in existing_labels): + # meaning it's `nobinning`, so we just use some manual mapping + data[exp_id].append(score_dict[exp_id][l][metric]) + for l in score_dict[exp_id].keys(): if l.startswith(target_label): for metric in metrics: From e3add4a006210a68cc9b30f9b2408cc964a4e5a6 Mon Sep 17 00:00:00 2001 From: Keigh Rim <keigh.rim@gmail.com> Date: Fri, 25 Oct 2024 14:43:47 -0400 Subject: [PATCH 9/9] prebinning experiment done, and decided to do no-binning --- modeling/gridsearch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modeling/gridsearch.py b/modeling/gridsearch.py index 111302c..91136eb 100644 --- a/modeling/gridsearch.py +++ b/modeling/gridsearch.py @@ -185,7 +185,9 @@ } } -prebin = list(binning_schemes.keys()) +# for single binning configuration, just use the binning dict +# for multiple binning configurations (for experimental reasons), use the binning scheme names (str) +prebin = [nobining] param_keys = ['split_size', 'num_epochs', 'num_layers', 'pos_length', 'pos_unit', 'dropouts', 'img_enc_name', 'pos_abs_th_front', 'pos_abs_th_end', 'pos_vec_coeff', 'block_guids_train', 'block_guids_valid', 'prebin'] l = locals()