Skip to content

Commit

Permalink
make get_hierarchy_for_dt() an outer function
Browse files Browse the repository at this point in the history
  • Loading branch information
lmcmicu committed Nov 16, 2023
1 parent 14e00a0 commit 2e12dda
Showing 1 changed file with 22 additions and 14 deletions.
36 changes: 22 additions & 14 deletions scripts/guess.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,19 @@ def get_valve_config(valve_table):
return json.loads(result.stdout.decode())


def get_hierarchy_for_dt(config, primary_dt_name):
def get_parents(dt_name):
datatypes = []
if dt_name is not None:
datatype = config["datatype"][dt_name]
if datatype["datatype"] != primary_dt_name:
datatypes.append(datatype)
datatypes += get_parents(datatype.get("parent"))
return datatypes

return [config["datatype"][primary_dt_name]] + get_parents(primary_dt_name)


def get_datatype_hierarchy(config):
"""
Given a VALVE configuration, return a datatype hierarchy that looks like this:
Expand All @@ -82,19 +95,6 @@ def get_datatype_hierarchy(config):
...],
'dt_name_2': etc.
"""

def get_hierarchy_for_dt(primary_dt_name):
def get_parents(dt_name):
datatypes = []
if dt_name is not None:
datatype = config["datatype"][dt_name]
if datatype["datatype"] != primary_dt_name:
datatypes.append(datatype)
datatypes += get_parents(datatype.get("parent"))
return datatypes

return [config["datatype"][primary_dt_name]] + get_parents(primary_dt_name)

dt_config = config["datatype"]
dt_names = [dt_name for dt_name in dt_config]
leaf_dts = []
Expand All @@ -105,7 +105,7 @@ def get_parents(dt_name):

dt_hierarchy = {}
for leaf_dt in leaf_dts:
dt_hierarchy[leaf_dt] = get_hierarchy_for_dt(leaf_dt)
dt_hierarchy[leaf_dt] = get_hierarchy_for_dt(config, leaf_dt)
return dt_hierarchy


Expand Down Expand Up @@ -235,6 +235,14 @@ def is_match(datatype):
return success_rate

def tiebreak(datatypes):
# TODO: There is a problem with this algorithm, since it implicitly assumes that if two
# datatypes are of the same depth, then neither can be a parent of the other. But this
# is false. We could have, for example,
# leaf_1 -> non_space -> trimmed_line
# leaf_2 -> word -> non_space -> trimmed_line
# Even though non-space is a parent of word, the algorithm classifies both as depth 1.
# We need to have another check in this function to determine whether there are any
# parent-child dependencies between the datatypes in the tiebreaker list.
in_types = []
other_types = []
for dt in datatypes:
Expand Down

0 comments on commit 2e12dda

Please sign in to comment.