Skip to content

Commit

Permalink
shortest strings balancing
Browse files Browse the repository at this point in the history
  • Loading branch information
vhavlena committed Feb 1, 2022
1 parent 0f3a0bd commit 86d9601
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 157 deletions.
58 changes: 54 additions & 4 deletions noodler/algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
* split_segment_aut: split segment automaton into segments
* single_final_init: restrict numbers of initial and final states to 1
"""
from typing import Callable, Sequence, Dict
from typing import Callable, Sequence, Dict, Set

import awalipy
import ast
import copy
from collections import defaultdict

from .core import Aut, SegAut, TransID

Expand Down Expand Up @@ -52,6 +54,50 @@ def multiop(automata: Sequence[Aut],
return res


def get_shortest_strings(aut: Aut) -> Set[str]:
"""!
Get shortest strings (regarding their length) of a given automaton.
@param aut: Automaton
@return Set of shortest strings
"""

short = defaultdict(lambda: (-1, set()))

concat = lambda x, s: set([ x + u for u in s ])

for fin in aut.final_states():
short[fin] = (0, set([""]))

changed = True
while changed:
changed = False
for tr in aut.transitions():
orig_l, orig_w = short[aut.src_of(tr)]
act_l, act_w = orig_l, copy.deepcopy(orig_w)
dst_l, dst_w = short[aut.dst_of(tr)]

if act_l == -1:
label = awali_to_string(aut.label_of(tr))
act_w = concat(label, dst_w)
act_l = dst_l + 1
elif dst_l + 1 < dst_l:
label = awali_to_string(aut.label_of(tr))
act_w = concat(label, dst_w)
act_l = dst_l + 1
elif dst_l + 1 == act_l:
label = awali_to_string(aut.label_of(tr))
act_w.union(concat(label, dst_w))
act_l = dst_l + 1
if orig_w != act_w:
short.update([(aut.src_of(tr), (act_l, act_w))])
short[aut.src_of(tr)] = act_l, act_w
changed = True

assert(len(aut.initial_states()) == 1)
return short[aut.initial_states()[0]][1]


def equivalent_all(auts: Sequence[Aut]) -> bool:
"""
Check whether all automata are equivalent.
Expand Down Expand Up @@ -138,6 +184,12 @@ def chain_automata(automata: Sequence[Aut]) -> SegAut:
return res


def awali_to_string(str):
alp = str.replace("\\0x0", "\\x")
alp = ast.parse("\""+alp+"\"")
return alp.body[0].value.value


# noinspection PyIncorrectDocstring
def eps_preserving_product(aut_l: SegAut,
aut_r: Aut,
Expand Down Expand Up @@ -176,9 +228,7 @@ def eps_preserving_product(aut_l: SegAut,
todo = []

alphabet = aut_l.alphabet()
alphabet = alphabet.replace("\\0x0", "\\x")
alp = ast.parse("\""+alphabet+"\"")
alphabet = alp.body[0].value.value
alphabet = awali_to_string(alphabet)

new_aut = awalipy.Automaton(alphabet)
new_aut.allow_eps_transition_here()
Expand Down
133 changes: 17 additions & 116 deletions noodler/formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,23 +131,38 @@ def get_side(self, side: str) -> str:


def get_vars(self) -> Set[str]:
"""!
Get variables of the equation
"""
return set(self.left) | set(self.right)


def get_vars_side(self, side: str) -> Set[str]:
"""!
Get variables of the equation for a give side (left/right)
"""
if side == "left":
return set(self.left)
return set(self.right)


def more_occur_side(self, side: str) -> bool:
"""!
Is there any variable occurring multiple times on a given side?
"""
if side == "left":
return len(set(self.left)) != len(self.left)
return len(set(self.right)) != len(self.right)


def is_sub_equation(self) -> bool:
return (not self.more_occur_side("right")) and (len(self.get_vars_side("right") & self.get_vars_side("left")) == 0)
def is_straightline(self) -> bool:
"""!
Is the equation of the form Xn = X1 X2 ... X(n-1)
"""

if not len(self.get_vars_side("right") & self.get_vars_side("left")) == 0:
return False
return len(self.get_side("left")) == 1 or len(self.get_side("right")) == 1


def __str__(self):
Expand Down Expand Up @@ -284,117 +299,3 @@ def build_op(type: ConstraintType, lst: Sequence["StringConstraint"]):
for i in range(1, len(lst)):
act = StringConstraint(type, act, lst[i])
return act


@dataclass
class StringEqNode:
succ : Sequence["StringEqNode"]
eq : StringEquation
id : int


class StringEqGraph:

def __init__(self, vert: Sequence[StringEqNode], eqs: Set[StringEquation]):
self.vertices = vert
self.equations = eqs
#self.sub_eqs = set()
#self._compute_sub_eqs()


def _compute_sub_eqs(self, vert):

ret = set()
disjoint = set()
for v in vert:
if v.is_sub_equation() and len(disjoint & v.get_vars_side("right")) == 0:
ret.add(v)
disjoint = disjoint.union(v.get_vars_side("right"))
return ret


def are_eq_sub(self, tmp: Set[StringEquation]) -> bool:
return tmp <= self.sub_eqs


def are_all_nontriv_sub(self) -> bool:
for v in self.vertices:
if len(v.succ) == 0:
continue
if not (v.eq in self.sub_eqs):
return False
return True

def get_sccs(self) -> Sequence[Sequence[StringEqNode]]:
n = len(self.vertices)
graph = [[0]*n for i in range(n)]
for v in self.vertices:
for vp in v.succ:
graph[vp.id][v.id] = 1

mtx = csr_matrix(graph)
k, labels = connected_components(csgraph=mtx, connection="strong", directed=True, return_labels=True)

scc = [[] for i in range(n)]
for i in range(n):
scc[labels[i]].append(self.vertices[i])

return scc


def reduce(self):
other_vars: dict[StringEquation, Set[str]] = defaultdict(lambda: set())

for v1 in self.vertices:
for v2 in self.vertices:
if v1.eq == v2.eq:
continue
other_vars[v1.eq] = other_vars[v1.eq].union(v2.eq.get_vars())

# for v in self.vertices:
# if v.eq.is_sub_equation():
# v.succ = [s for s in v.succ if s.eq != v.eq.switched]

for v in self.vertices:
if v.eq.is_sub_equation():
succp = []
for prime in v.succ:
if len(prime.eq.get_vars() & v.eq.get_vars_side("left")) == 0:
continue
succp.append(prime)
v.succ = succp


for scc in self.get_sccs():
rem = []
for s in scc:
if not s.eq.switched in rem:
rem.append(s.eq)
if len(self._compute_sub_eqs(rem)) == len(rem):
for v in scc:
v.succ = [s for s in v.succ if s.eq != v.eq.switched]




def to_graphwiz(self):
"""!
Convert the graph into the DOT format.
"""

ret = "digraph \"Equation\" { rankdir=LR; \n"
ret += "{ rank = LR }\n"
ret += "node [shape = rectangle];\n"
num: Dict[StringEquation, int] = dict()

c = 0
for eq in self.vertices:
num[eq.eq] = c
ret += "\"{0}\" [label=\"{1}\"]\n".format(c, eq.eq)
c = c + 1

for eq in self.vertices:
for s in eq.succ:
ret += "\"{0}\" -> \"{1}\";\n".format(num[eq.eq], num[s.eq])
ret += "}\n";
return ret
2 changes: 1 addition & 1 deletion noodler/graph_noodler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# Types
from .core import Aut, AutConstraints, SegAut

from .formula import StringEqNode, StringEqGraph
from .graph_formula import StringEqNode, StringEqGraph
from .algos import eps_preserving_product, eps_segmentation, multiop, single_final_init, split_segment_aut
from .noodler import noodlify, noodlify_query, create_unified_query, is_unified, SimpleNoodler

Expand Down
75 changes: 39 additions & 36 deletions noodler/sequery.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,17 @@
from typing import Sequence, Dict, Collection, Optional

import awalipy
import copy

from .utils import show_automata
from .algos import chain_automata, multiop
from .algos import chain_automata, multiop, get_shortest_strings
#types
from .core import AutConstraints, Aut, Constraints, SegAut, RE
# classes
from .core import StringEquation, StringConstraint, ConstraintType
# functions
from .core import create_automata_constraints

from .formula import StringEqGraph, StringEqNode


DEFAULTALPHABET = "abc"

Expand Down Expand Up @@ -201,26 +200,53 @@ def automata_for_side(self, side: str,


def is_sub_balanced(self) -> bool:
"""!
Check if the query is one side balanced.
query is balanced if the shortest strings of the first-side automaton
belong to the language of the second-side automaton.
@return Balanced?
"""

auts_l = self.automata_for_side("left")
auts_r = self.automata_for_side("right")

aut_l = multiop(auts_l, lambda x,y: x.concatenate(y))
aut_r = multiop(auts_r, lambda x,y: x.concatenate(y))

tmp_l = aut_l.proper().minimal_automaton()
tmp_r = aut_r.proper().minimal_automaton()
short = get_shortest_strings(tmp_l)

for w in short:
if tmp_r.eval(w) == 0:
return False
return True

comp = aut_r.minimal_automaton().complement()
tmp = aut_l.product(comp).trim()

return len(tmp.useful_states()) == 0
# comp = aut_r.minimal_automaton().complement()
# tmp = aut_l.product(comp).trim()

# return len(tmp.useful_states()) == 0


def automaton_for_side(self, side: str) -> Aut:
"""!
Get an automaton for a given side.
@param side: Side (left, right)
@return Concatenation of automata for a given side
"""
auts_l = self.automata_for_side(side)
return multiop(auts_l, lambda x,y: x.concatenate(y))


def is_balanced(self) -> bool:
"""
Check if query is balanced.
query is balanced if automata representing both
sides recognize equivalent languages.
sides recognize equivalent shortest languages.
Returns
-------
Expand All @@ -232,7 +258,12 @@ def is_balanced(self) -> bool:
aut_l = multiop(auts_l, lambda x,y: x.concatenate(y))
aut_r = multiop(auts_r, lambda x,y: x.concatenate(y))

return awalipy.are_equivalent(aut_l, aut_r)
tmp_l = aut_l.proper().minimal_automaton()
tmp_r = aut_r.proper().minimal_automaton()

return get_shortest_strings(tmp_l) == get_shortest_strings(tmp_r)

#return awalipy.are_equivalent(aut_l, aut_r)

def show_constraints(self):
show_automata(self.constraints)
Expand Down Expand Up @@ -351,34 +382,6 @@ def __repr__(self) -> str:
return str(self)


def get_eqs_graph(self) -> StringEqGraph:

nodes: Dict[StringEquation, StringEqNode] = dict()
all_nodes = []
c = 0
for eq in self.equations:
nn: StringEqNode = StringEqNode([], eq, c)
nodes[eq] = nn
all_nodes.append(nn)
c += 1

for eq in self.equations:
for eqprime in self.equations:
if eqprime == eq:
continue
if len(eqprime.get_vars() & eq.get_vars()) != 0:
nodes[eq].succ.append(nodes[eqprime])

for k, v in nodes.items():
nn: StringEqNode = StringEqNode([v], k.switched, c)
v.succ.append(nn)
all_nodes.append(nn)
c += 1

return StringEqGraph(all_nodes, set(self.equations))




class StringConstraintQuery:

Expand Down

0 comments on commit 86d9601

Please sign in to comment.