Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ All the merge functions are in core.py.

After that it will be auto loaded and everything will be taken care of on its own.

Profile a function/list combo
-----------------------------

$ python3 profiler.py -f alexis -l timing_1

This will open a snakeviz visualisation of the execution profile in your browser at the end.

Test all the functions
----------------------

Expand All @@ -47,4 +54,3 @@ $ python3 timing.py

- Ctrl-C to skip a test.
- Two fast Ctrl-C to exit.

62 changes: 54 additions & 8 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
#
# Evey function with a name ending with '_merge' will be auto loaded


import networkx
from collections import deque
import heapq
from itertools import chain
from collections import deque
from typing import Iterable, TypeVar

import networkx


def rik_merge(lsts):
Expand Down Expand Up @@ -100,10 +101,13 @@ def pairs(lst):
prev = item
yield item, first


def kat_merge(lsts):
"""katrielalex"""
g = networkx.Graph()
for sub_list in lsts:
if not sub_list:
continue
for edge in pairs(sub_list):
g.add_edge(*edge)

Expand Down Expand Up @@ -206,7 +210,6 @@ def che_merge(lsts):
def locatebin(bins, n):
"""Find the bin where list n has ended up: Follow bin references until
we find a bin that has not moved.

"""
while bins[n] != n:
n = bins[n]
Expand All @@ -218,7 +221,7 @@ def ale_merge(data):
bins = list(range(len(data))) # Initialize each bin[n] == n
nums = dict()

data = [set(m) for m in data ] # Convert to sets
data = [set(m) for m in data] # Convert to sets
for r, row in enumerate(data):
for num in row:
if num not in nums:
Expand All @@ -233,19 +236,19 @@ def ale_merge(data):
if dest > r:
dest, r = r, dest # always merge into the smallest bin

data[dest].update(data[r])
data[dest].update(data[r])
data[r] = None
# Update our indices to reflect the move
bins[r] = dest
r = dest
r = dest

# Filter out the empty bins
have = [ m for m in data if m ]
#print len(have), "groups in result" #removed this line
return have


def nik_rew_merge_skip(lsts):
def nik_rew_merge(lsts):
"""Nik's rewrite"""
sets = list(map(set,lsts))
results = []
Expand All @@ -264,3 +267,46 @@ def nik_rew_merge_skip(lsts):
else:
results.append(first)
return results


T = TypeVar('T')


def takeshi_merge(lists: Iterable[Iterable[T]]) -> list[set[T]]:
"""takeshi"""
bins: dict[T: set[T]] = dict()
bin_refs: dict[T: T] = dict()
for lst in lists:
if not lst:
continue

# Gather the bin refs of all items in the list that we have
# already seen.
encountered_items_bin_refs = {
bin_refs[item]
for item in lst
if item in bin_refs
}
if len(encountered_items_bin_refs) >= 1:
# Some of the items in `lst` have already been seen in a
# previous iteration. They are therefore already attached
# to a bin. Select any of their corresponding bin ref.
bin_ref = encountered_items_bin_refs.pop()
# If the previously-seen items were not all attached to the
# same bin, their respective bins need to be merged into
# the selected one.
if len(encountered_items_bin_refs) > 0:
to_merge_bins = [bins.pop(ref) for ref in encountered_items_bin_refs]
bins[bin_ref].update(chain(*to_merge_bins))
for item in chain(*to_merge_bins):
bin_refs[item] = bin_ref
bins[bin_ref].update(lst)
else:
# None of the items in `lst` have already been seen in a
# previous iteration. Therefore, we can safely pick any
# item as our new bin ref and create the corresponding bin.
bin_ref = next(iter(lst))
bins[bin_ref] = set(lst)
for item in lst:
bin_refs[item] = bin_ref
return list(bins.values())
66 changes: 66 additions & 0 deletions profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from argparse import ArgumentParser
import cProfile
import json
from pathlib import Path
import random
import subprocess

import core
from timing import build_all_timing_lists


if __name__ == '__main__':
parser = ArgumentParser(
prog='Algorithm profiler',
description='Profile any given function from "core.py", '
'with any dataset from "lists/".',
)
parser.add_argument(
'-f', '--function', dest='function', action='store', required=True, type=str,
help='Name of the function to profile. You can give the function\'s full name '
'("ale_merge"), the function\'s abbreviated name ("ale") or the function\'s '
'display name ("alexis", as found in the docstring).'
)
parser.add_argument(
'-l', '--list', dest='list', action='store', required=True, type=str,
help='Name of the list to profile with (e.g.: "timing_1" or "timing_1.txt").'
)
parser.add_argument(
'-n', '--new', dest='new', action='store_true', default=False,
help='Rebuild all "timing_*.txt" test lists.'
)
args = parser.parse_args()
if args.new:
build_all_timing_lists()

func_name: str = args.function.strip()
func = getattr(core, func_name, getattr(core, f'{func_name}_merge', None))
if not func:
for obj in core.__dict__.values():
if func_name == getattr(obj, '__doc__', None):
func = obj
break
assert callable(func), f'Object {func.__name__} is not a function.'

list_name: str = args.list.strip()
if not list_name.endswith('.txt'):
list_name = f'{list_name}.txt'
list_path = Path('.', 'lists', list_name)
if list_name in ('sven_list.txt', 'test_list.txt'):
lists = json.loads(list_path.read_text())
elif list_name == 'agf_list.txt':
lists = [
random.sample(range(10000), random.randint(0, 500))
for _ in range(2000)
]
else:
with open(list_path, 'r') as f:
lists = [[int(x) for x in line.split()] for line in f]
assert len(lists) > 0, 'It would be better if the dataset had some data.'

prof_file_name = f'{func.__name__}_{list_name}.prof'
cProfile.runctx('func(lists)', globals(), locals(), prof_file_name)
try:
subprocess.run(['snakeviz', prof_file_name])
except KeyboardInterrupt:
exit()
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
networkx==3.1
snakeviz==2.2.0
4 changes: 2 additions & 2 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ class MergeTestCase(unittest.TestCase):
def setUp(self):
with open('./lists/test_list.txt') as f:
self.lsts = json.loads(f.read())
self.merged = self.merge_func(deepcopy(self.lsts))
self.merged = list(self.merge_func(deepcopy(self.lsts)))

def test_disjoint(self):
"""Check disjoint-ness of merged results"""
from itertools import combinations
for a,b in combinations(self.merged, 2):
for a, b in combinations(self.merged, 2):
self.assertTrue(a.isdisjoint(b))

def test_coverage(self): # Credit to katrielalex
Expand Down
Loading