From 31d3dd575b0e4388e431e7275d43097b36ff4562 Mon Sep 17 00:00:00 2001 From: bmramor Date: Thu, 21 Mar 2024 21:36:38 +0000 Subject: [PATCH] some more bug handling --- outrank/core_ranking.py | 13 ++++--------- outrank/task_selftest.py | 14 +++++--------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index c2b70da..dc56aaa 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -116,8 +116,8 @@ def mixed_rank_graph( out_time_struct['encoding_columns'] = end_enc_timer - start_enc_timer combinations = get_combinations_from_columns(all_columns, args) - combinations = prior_combinations_sample(combinations, args) - random.shuffle(combinations) + #combinations = prior_combinations_sample(combinations, args) + #random.shuffle(combinations) reference_model_features = {} if is_prior_heuristic(args): @@ -195,17 +195,14 @@ def compute_combined_features( interaction_order = 2 if is_3mr else args.interaction_order model_combinations = [] + full_combination_space = [] if is_prior_heuristic(args): model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] - feature_combination_space = [] if args.interaction_order > 1: - feature_combination_space = list( + full_combination_space = list( itertools.combinations(all_columns, interaction_order), ) - - full_combination_space = feature_combination_space + [tuple for tuple in model_combinations if tuple not in feature_combination_space] - del feature_combination_space else: if args.reference_model_JSON != '': model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) @@ -247,7 +244,6 @@ def compute_combined_features( pbar.set_description('Concatenating into final frame ..') input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1) del tmp_df - return input_dataframe @@ -707,7 +703,6 @@ def estimate_importances_minibatches( logger, local_pbar, ) - print(importances_batch) bounds_storage_batch.append(bounds_storage) memory_storage_batch.append(memory_storage) diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index a63abd5..9335843 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -1,38 +1,31 @@ # helper set of methods that enable anywhere verification of core functions from __future__ import annotations - import logging import os import shutil import subprocess - import pandas as pd - logging.basicConfig( format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', ) logger = logging.getLogger('syn-logger') logger.setLevel(logging.DEBUG) - - def conduct_self_test(): # Simulate full flow, ranking only subprocess.run( 'outrank --task data_generator --num_synthetic_rows 100000', shell=True, ) subprocess.run( - 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-SGD-prior --reference_model_JSON tests/test_ref_model.json --interaction_order 2;', + 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;', shell=True, ) dfx = pd.read_csv('ranking_outputs/pairwise_ranks.tsv', sep='\t') - logger.info("Verifying output's properties ..") assert dfx.shape[0] == 120 assert dfx.shape[1] == 3 assert dfx['FeatureA'].values.tolist().pop() == 'label-(81; 100)' or dfx['FeatureB'].values.tolist().pop() == 'label-(81; 100)' - to_remove = ['ranking_outputs', 'test_data_synthetic'] for path in to_remove: if os.path.exists(path) and os.path.isdir(path): @@ -41,6 +34,9 @@ def conduct_self_test(): logger.info('All tests passed, OutRank seems in shape \N{winking face}') + shutil.rmtree(path) + + logger.info('All tests passed, OutRank seems in shape \N{winking face}') + if __name__ == '__main__': conduct_self_test() -