Skip to content

Commit

Permalink
some more bug handling
Browse files Browse the repository at this point in the history
  • Loading branch information
bmramor committed Mar 21, 2024
1 parent 92408bb commit 31d3dd5
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 18 deletions.
13 changes: 4 additions & 9 deletions outrank/core_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def mixed_rank_graph(
out_time_struct['encoding_columns'] = end_enc_timer - start_enc_timer

combinations = get_combinations_from_columns(all_columns, args)
combinations = prior_combinations_sample(combinations, args)
random.shuffle(combinations)
#combinations = prior_combinations_sample(combinations, args)
#random.shuffle(combinations)

reference_model_features = {}
if is_prior_heuristic(args):
Expand Down Expand Up @@ -195,17 +195,14 @@ def compute_combined_features(
interaction_order = 2 if is_3mr else args.interaction_order

model_combinations = []
full_combination_space = []
if is_prior_heuristic(args):
model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations]
feature_combination_space = []
if args.interaction_order > 1:
feature_combination_space = list(
full_combination_space = list(
itertools.combinations(all_columns, interaction_order),
)

full_combination_space = feature_combination_space + [tuple for tuple in model_combinations if tuple not in feature_combination_space]
del feature_combination_space
else:
if args.reference_model_JSON != '':
model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
Expand Down Expand Up @@ -247,7 +244,6 @@ def compute_combined_features(
pbar.set_description('Concatenating into final frame ..')
input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1)
del tmp_df

return input_dataframe


Expand Down Expand Up @@ -707,7 +703,6 @@ def estimate_importances_minibatches(
logger,
local_pbar,
)
print(importances_batch)

bounds_storage_batch.append(bounds_storage)
memory_storage_batch.append(memory_storage)
Expand Down
14 changes: 5 additions & 9 deletions outrank/task_selftest.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,31 @@
# helper set of methods that enable anywhere verification of core functions
from __future__ import annotations

import logging
import os
import shutil
import subprocess

import pandas as pd

logging.basicConfig(
format='%(asctime)s - %(message)s',
datefmt='%d-%b-%y %H:%M:%S',
)
logger = logging.getLogger('syn-logger')
logger.setLevel(logging.DEBUG)


def conduct_self_test():
# Simulate full flow, ranking only
subprocess.run(
'outrank --task data_generator --num_synthetic_rows 100000', shell=True,
)
subprocess.run(
'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-SGD-prior --reference_model_JSON tests/test_ref_model.json --interaction_order 2;',
'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;',
shell=True,
)

dfx = pd.read_csv('ranking_outputs/pairwise_ranks.tsv', sep='\t')

logger.info("Verifying output's properties ..")
assert dfx.shape[0] == 120
assert dfx.shape[1] == 3
assert dfx['FeatureA'].values.tolist().pop() == 'label-(81; 100)' or dfx['FeatureB'].values.tolist().pop() == 'label-(81; 100)'

to_remove = ['ranking_outputs', 'test_data_synthetic']
for path in to_remove:
if os.path.exists(path) and os.path.isdir(path):
Expand All @@ -41,6 +34,9 @@ def conduct_self_test():

logger.info('All tests passed, OutRank seems in shape \N{winking face}')

shutil.rmtree(path)

logger.info('All tests passed, OutRank seems in shape \N{winking face}')

if __name__ == '__main__':
conduct_self_test()

0 comments on commit 31d3dd5

Please sign in to comment.