some more bug handling

outbrain-inc · Mar 21, 2024 · 31d3dd5 · 31d3dd5
1 parent 92408bb
commit 31d3dd5
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 18 deletions.
diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py
@@ -116,8 +116,8 @@ def mixed_rank_graph(
     out_time_struct['encoding_columns'] = end_enc_timer - start_enc_timer
 
     combinations = get_combinations_from_columns(all_columns, args)
-    combinations = prior_combinations_sample(combinations, args)
-    random.shuffle(combinations)
+    #combinations = prior_combinations_sample(combinations, args)
+    #random.shuffle(combinations)
 
     reference_model_features = {}
     if is_prior_heuristic(args):
@@ -195,17 +195,14 @@ def compute_combined_features(
     interaction_order = 2 if is_3mr else args.interaction_order
 
     model_combinations = []
+    full_combination_space = []
     if is_prior_heuristic(args):
         model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
         model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations]
-        feature_combination_space = []
         if args.interaction_order > 1:
-            feature_combination_space = list(
+            full_combination_space = list(
                 itertools.combinations(all_columns, interaction_order),
             )
-
-        full_combination_space = feature_combination_space + [tuple for tuple in model_combinations if tuple not in feature_combination_space]
-        del feature_combination_space
     else:
         if args.reference_model_JSON != '':
             model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
@@ -247,7 +244,6 @@ def compute_combined_features(
     pbar.set_description('Concatenating into final frame ..')
     input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1)
     del tmp_df
-
     return input_dataframe
 
 
@@ -707,7 +703,6 @@ def estimate_importances_minibatches(
                 logger,
                 local_pbar,
             )
-            print(importances_batch)
 
             bounds_storage_batch.append(bounds_storage)
             memory_storage_batch.append(memory_storage)

diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py
@@ -1,38 +1,31 @@
 # helper set of methods that enable anywhere verification of core functions
 from __future__ import annotations
-
 import logging
 import os
 import shutil
 import subprocess
-
 import pandas as pd
-
 logging.basicConfig(
     format='%(asctime)s - %(message)s',
     datefmt='%d-%b-%y %H:%M:%S',
 )
 logger = logging.getLogger('syn-logger')
 logger.setLevel(logging.DEBUG)
-
-
 def conduct_self_test():
     # Simulate full flow, ranking only
     subprocess.run(
         'outrank --task data_generator --num_synthetic_rows 100000', shell=True,
     )
     subprocess.run(
-        'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-SGD-prior --reference_model_JSON tests/test_ref_model.json --interaction_order 2;',
+        'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;',
         shell=True,
     )
 
     dfx = pd.read_csv('ranking_outputs/pairwise_ranks.tsv', sep='\t')
-
     logger.info("Verifying output's properties ..")
     assert dfx.shape[0] == 120
     assert dfx.shape[1] == 3
     assert dfx['FeatureA'].values.tolist().pop() == 'label-(81; 100)' or dfx['FeatureB'].values.tolist().pop() == 'label-(81; 100)'
-
     to_remove = ['ranking_outputs', 'test_data_synthetic']
     for path in to_remove:
         if os.path.exists(path) and os.path.isdir(path):
@@ -41,6 +34,9 @@ def conduct_self_test():
 
     logger.info('All tests passed, OutRank seems in shape \N{winking face}')
 
+    shutil.rmtree(path)
+
+    logger.info('All tests passed, OutRank seems in shape \N{winking face}')
+
 if __name__ == '__main__':
     conduct_self_test()
-