Skip to content

Commit

Permalink
simplify unit prod rule
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Jul 10, 2024
1 parent 2d9ef60 commit d52c715
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 12 deletions.
7 changes: 6 additions & 1 deletion latex/popl2025/experiments/ablation_dfa_walker.tex
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,9 @@
4 0.10
};
\end{axis}
\end{tikzpicture}
\end{tikzpicture}

%Δ(1)= Top-1/total: 147 / 148 = 0.99
%Δ(2)= Top-1/total: 166 / 168 = 0.99
%Δ(3)= Top-1/total: 96 / 150 = 0.64
%Δ(4)= Top-1/total: 26 / 152 = 0.17
Binary file modified latex/popl2025/popl.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions latex/popl2025/popl.tex
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
%\documentclass[sigplan,review]{acmart}\settopmatter{printfolios=true}
%% For final camera-ready submission, w/ required CCS and ACM Reference
%\documentclass[sigplan,nonacm]{acmart}
\documentclass[sigplan,review,acmsmall,nonacm]{acmart}\settopmatter{printfolios=false,printccs=false,printacmref=false}
\documentclass[sigplan,review,acmsmall,nonacm,screen,anonymous]{acmart}\settopmatter{printfolios=false,printccs=false,printacmref=false}


%% Conference information
Expand Down Expand Up @@ -733,7 +733,7 @@
Now, we know that if the Parikh divergence between two intervals is nonzero, those intervals must be incompatible as no two strings, one from each Parikh interval, can be transformed into the other with fewer than $\pi \parallel \pi'$ edits.

\begin{definition}[Parikh compatibility]
Let $q, q'$ be NFA states and $v$ be a CFG nonterminal. We call $\langle q, v, q'\rangle: Q\times V\times Q$ \textit{compatible} iff their divergence is zero, i.e., $v \lhd qq' \iff (\pi(v) \parallel \pi(q, q')) = 0$.
Let $q, q'$ be NFA states and $v$ be a CFG nonterminal. We call $\langle q, v, q'\rangle: Q\times V\times Q$ \textit{compatible} iff their divergence is zero, i.e., $v \lhd qq' \iff \big(\pi(v) \parallel \pi(q, q')\big) = 0$.
\end{definition}

Finally, we define the modified Bar-Hillel construction for nominal Levenshtein automata as:\vspace{-2pt}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,14 @@ fun CFG.intersectLevFSAP(fsa: FSA, parikhMap: ParikhMap = this.parikhMap): CFG {
// For every production A → σ in P, for every (p, σ, q) ∈ Q × Σ × Q
// such that δ(p, σ) = q we have the production [p, A, q] → σ in P′.
fun CFG.unitProdRules(fsa: FSA): List<Pair<String, List<Σᐩ>>> =
(unitProductions * fsa.nominalize().flattenedTriples)
.filter { (_, σ: Σᐩ, arc) -> (arc.π2)(σ) }
.map { (A, σ, arc) -> "[${arc.π1}~$A~${arc.π3}]" to listOf(σ) }
// (unitProductions * fsa.nominalize().flattenedTriples)
// .filter { (_, σ: Σᐩ, arc) -> (arc.π2)(σ) }
// .map { (A, σ, arc) -> "[${arc.π1}~$A~${arc.π3}]" to listOf(σ) }
(unitProductions * fsa.Q).map { (A, σ, arc) ->
if (arc.π2.startsWith("[!=]") && σ != arc.π2.drop(4)) "[${arc.π1}~$A~${arc.π3}]" to listOf("<$A>")
else if (arc.π2.startsWith("[.*]")) "[${arc.π1}~$A~${arc.π3}]" to listOf("<$A>")
else "[${arc.π1}~$A~${arc.π3}]" to listOf(σ)
}

fun CFG.postProcess() =
this.also { println("∩-grammar has ${it.size} total productions") }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
@file:Suppress("NonAsciiCharacters")
package ai.hypergraph.kaliningraph.parsing

import ai.hypergraph.kaliningraph.*
import ai.hypergraph.kaliningraph.graphs.LabeledGraph
import ai.hypergraph.kaliningraph.sampling.choose
import ai.hypergraph.kaliningraph.tokenizeByWhitespace
import ai.hypergraph.kaliningraph.types.*
import kotlin.jvm.JvmName
import kotlin.random.Random
Expand Down Expand Up @@ -55,6 +55,8 @@ val CFG.tmap: Map<Set<Σᐩ>, Set<Σᐩ>> by cache {
.mapValues { it.value.map { it.second }.toSet() }
}

val CFG.unicodeMap by cache { terminals.associateBy { Random(it.hashCode()).nextInt().toChar().toUnicodeEscaped() } }

val CFG.ntLst by cache { (symbols + "ε").toList() }
val CFG.ntMap by cache { ntLst.mapIndexed { i, s -> s to i }.toMap() }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ val HOLE_MARKER = "_"
fun Σᐩ.containsHole(): Bln = HOLE_MARKER in tokenizeByWhitespace()
fun Σᐩ.isHoleTokenIn(cfg: CFG) = containsHole() || isNonterminalStubIn(cfg)
//val ntRegex = Regex("<[^\\s>]*>")
fun Σᐩ.isNonterminalStub() = isNotEmpty() && first() == '<' && last() == '>'
fun Σᐩ.isNonterminalStub() = length > 3 && first() == '<' && last() == '>'
fun Σᐩ.isNonterminalStubInNTs(nts: Set<Σᐩ>): Bln = isNonterminalStub() && drop(1).dropLast(1) in nts
fun Σᐩ.isNonterminalStubIn(cfg: CFG): Bln = isNonterminalStub() && drop(1).dropLast(1) in cfg.nonterminals
fun Σᐩ.isNonterminalStubIn(CJL: CJL): Bln = CJL.cfgs.map { isNonterminalStubIn(it) }.all { it }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ fun PTree.toDFA(minimize: Boolean = false) =
var j = 0
propagator(
both = { a, b -> if (a == null) b else if (b == null) a
// Only periodically minimize the automata during construction
else if (i++ % 13 == 0) a.concatenate(b).min() else a.concatenate(b) },
// Only periodically minimize the automata during construction
else if (i++ % 13 == 0) a.concatenate(b).min() else a.concatenate(b) },
either = { a, b -> if (a == null) b else if (b == null) a
else if (j++ % 13 == 0) a.union(b).min() else a.union(b) },
else if (j++ % 13 == 0) a.union(b).min() else a.union(b) },
unit = { a ->
if ("ε" in a.root) null
else BAutomaton.makeChar(Random(a.root.hashCode()).nextInt().toChar())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ fun CFG.jvmIntersectLevFSAP(fsa: FSA,

// For every production A → σ in P, for every (p, σ, q) ∈ Q × Σ × Q
// such that δ(p, σ) = q we have the production [p, A, q] → σ in P′.
val unitProds = unitProdRules(fsa)
val unitProds = unitProdRules(fsa).toSet()
.onEach { (a, _) -> nts.add(a) }

// For each production A → BC in P, for every p, q, r ∈ Q,
Expand Down Expand Up @@ -244,6 +244,7 @@ fun CFG.jvmIntersectLevFSAP(fsa: FSA,
.collect(Collectors.toSet())
.also { println("Eliminated ${totalProds - it.size} extra productions before normalization") }
.jvmPostProcess(clock)
.expandNonterminalStubs(origCFG = this@jvmIntersectLevFSAP)
// .jdvpNew()
}

Expand All @@ -255,6 +256,11 @@ fun CFG.jvmPostProcess(clock: TimeSource.Monotonic.ValueTimeMark) =
.also { println("Normalization eliminated ${size - it.size} productions in ${clock.elapsedNow()}") }
.freeze()

fun CFG.expandNonterminalStubs(origCFG: CFG) = flatMap {
if (it.RHS.size != 1 || !it.RHS.first().isNonterminalStub()) listOf(it)
else origCFG.bimap.NDEPS[it.RHS.first().drop(1).dropLast(1)]!!.map { t -> it.LHS to listOf(t) }
}.toSet().freeze().also { println("Expanded ${it.size - size} nonterminal stubs") }

tailrec fun CFG.jvmElimVarUnitProds(
toVisit: Set<Σᐩ> = nonterminals,
vars: Set<Σᐩ> = nonterminals,
Expand Down

0 comments on commit d52c715

Please sign in to comment.