Skip to content

Commit

Permalink
added Zhang Shasha Tree Edit Distance
Browse files Browse the repository at this point in the history
SQUASHED: AUTO-COMMIT-src-external-tree-edit-distance-demo.md,AUTO-COMMIT-src-external-tree-edit-distance-LICENSE,AUTO-COMMIT-src-external-tree-edit-distance-zhang-shasha.js,
  • Loading branch information
JensLincke committed Sep 25, 2023
1 parent f0c65ea commit 21e478b
Show file tree
Hide file tree
Showing 3 changed files with 329 additions and 0 deletions.
65 changes: 65 additions & 0 deletions src/external/tree-edit-distance/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
License Zero Reciprocal Public License 2.0.1

Copyright: Kyle E. Mitchell

Source: https://github.com/kemitchell/zhang-shasha.js

**This software comes as is, without any warranty at all. As far
as the law allows, I will not be liable for any damages related
to this software or this license, for any kind of legal claim.**

As long as you meet the conditions below, you may do everything
with this software that would otherwise infringe my copyright in
it or any covered patent claim. Your permission covers a patent
claim that I can license, or become able to license, if you would
infringe it by using this software as of my latest contribution.

1. You must ensure that everyone who gets a copy of this software
from you, in source code or any other form, also gets the
complete text of this license and the copyright and source
notices above.

2. You must not make any legal claim against anyone for
infringing any patent claim they would infringe by using this
software alone, accusing this software, with or without
changes, alone or combined into a larger program.

3. If you change this software, you must release source code for
your changes.

4. If you combine this software with other software into a larger
program, you must release any source code for that larger
program that has not yet been released.

5. If you run this software to analyze, change, or generate
software, you must release source code for that software that
has not yet been released.

Releasing source code means publicly licensing it under either
this license or a license approved by the Open Source Initiative,
and promptly publishing it, in the preferred form for making
changes, to a freely accessible distribution system widely used
for similarly licensed source code.

Any unknowing failure to meet condition 3, 4, or 5 is excused if
you release source code as required, or stop doing anything
requiring permission under this license, within 30 days of
learning that this license required you to release source code.

---

Licensor Signature (Ed25519):

309fd396e3c323f6c88f3a8add63ff7a
3ab9f78c35e28d77e8cfd2684e2a9f1e
255282d1c85b012e2eebcdec65422070
6065d78294e2949d244593b0720b8801

---

Agent Signature (Ed25519):

49ff7855974fb0e4243fe86c38826c2c
d6762a9ffb1d4e2b861c8560d336e048
36df8de459faae32428b607636f9bec3
faf0054ae399ec52d6331fc65850f80c
41 changes: 41 additions & 0 deletions src/external/tree-edit-distance/demo.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Zhang Shasha Tree Edit Distance


<script>

import {distance, mapping} from "src/external/tree-edit-distance/zhang-shasha.js"


var a = {
label: 'a',
children: [
{label: 'b', children: []},
{label: 'c', children: []}
]
}

var b = {
label: 'a',
children: [
{label: 'b', children: []}
]
}
try {
var result = <div>
<h3>A</h3>
<div style="white-space: pre"> {JSON.stringify(a)}</div>
<h3>B</h3>
<div style="white-space: pre">{JSON.stringify(b)}</div>

<div><h3>distance:</h3> {distance(a, b)}</div>
<h3>mapping:</h3>
<div style="white-space: pre"> {mapping(a, b).map(ea => ea.type + " "
+ ea.t1.label + " "+ (ea.t2 ? ea.t2.label : ""))}</div>
</div>
} catch(e) {
debugger
throw e
}

result
</script>
223 changes: 223 additions & 0 deletions src/external/tree-edit-distance/zhang-shasha.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
/*
License Zero Reciprocal Public License 2.0.1
Copyright: Kyle E. Mitchell
Source: https://github.com/kemitchell/zhang-shasha.js
(Modified by @JensLincke)
*/


var INSERT = 'insert'
var MATCH = 'match'
var REMOVE = 'remove'
var UPDATE = 'update'

export function distance(rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost ) {
return zhangShasha(false, rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost)
}

export function mapping(rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost ) {
return zhangShasha(true, rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost)
}

function zhangShasha(isMapping, rootOfT1, rootOfT2,
childrenOf=function(node) {return node.children},
insertCost=function() { return 1 },
removeCost=function() { return 1 },
updateCost=function (from, to) {return from.label === to.label ? 0 : 1}) {
// Paper: "Preprocessing"
var T1 = preprocess(rootOfT1, childrenOf)
var T2 = preprocess(rootOfT2, childrenOf)

var orderOfT1 = T1.nodes.length
var LR_keyroots1 = T1.keyroots
var T1l = T1.l
var T1nodes = T1.nodes

var orderOfT2 = T2.nodes.length
var LR_keyroots2 = T2.keyroots
var T2l = T2.l
var T2nodes = T2.nodes

// Initialize Matrices
var treedist = initializeMatrix(orderOfT1, orderOfT2)
if(isMapping) {
var operations = initializeMappingMatrix(orderOfT1, orderOfT2, true)
}

// Paper: "Main loop"
for (var iprime = 0; iprime < LR_keyroots1.length; iprime++) {
for (var jprime = 0; jprime < LR_keyroots2.length; jprime++) {
compute_treedist(LR_keyroots1[iprime], LR_keyroots2[jprime])
}
}

if(isMapping) {
return operations[orderOfT1 - 1][orderOfT2 - 1].reverse()
} else {
return treedist[orderOfT1 - 1][orderOfT2 - 1]
}

// Paper: "The computation of treedist(i, j)."
function compute_treedist (i, j) {
var iOffset = T1l[i] - 1
var iRange = i - T1l[i] + 2
var jOffset = T2l[j] - 1
var jRange = j - T2l[j] + 2
var forestDistances = initializeMatrix(iRange, jRange)
if(isMapping) {
var forestOperations = initializeMappingMatrix(iRange, jRange, true)
}
var node
var i1, j1

for (i1 = 1; i1 < iRange; i1++) {
node = T1nodes[i1 + iOffset]
forestDistances[i1][0] = (
forestDistances[i1 - 1][0] +
removeCost(node)
)
if(isMapping) {
forestOperations[i1][0] = (
forestOperations[i1 - 1][0].concat({
type: REMOVE, t1: node, t2: null
})
)
}
}

for (j1 = 1; j1 < jRange; j1++) {
node = T2nodes[j1 + jOffset]
forestDistances[0][j1] = (
forestDistances[0][j1 - 1] +
insertCost(node)
)
if(isMapping) {
forestOperations[0][j1] = (
forestOperations[0][j1 - 1].concat({
type: INSERT, t1: null, t2: node
})
)
}
}

for (i1 = 1; i1 < iRange; i1++) {
for (j1 = 1; j1 < jRange; j1++) {
var T1node = T1nodes[i1 + iOffset]
var T2node = T2nodes[j1 + jOffset]
var remove, insert, update, min
// Is i1 an ancestor of i, and j1 an ancestor of j?
if (T1l[i] === T1l[i1 + iOffset] && T2l[j] === T2l[j1 + jOffset]) {
remove = forestDistances[i1 - 1][j1] + removeCost(T1node)
insert = forestDistances[i1][j1 - 1] + insertCost(T2node)
update = (
forestDistances[i1 - 1][j1 - 1] +
updateCost(T1node, T2node)
)
min = Math.min(remove, insert, update)
forestDistances[i1][j1] = min
if(isMapping) {
if (min === remove) {
forestOperations[i1][j1] = forestOperations[i1 - 1][j1]
.concat({type: REMOVE, t1: T1node, t2: null})
} else if (min === insert) {
forestOperations[i1][j1] = forestOperations[i1][j1 - 1]
.concat({type: INSERT, t1: null, t2: T2node})
} else {
var type = forestDistances[i1][j1] === forestDistances[i1 - 1][j1 - 1]
? MATCH : UPDATE
forestOperations[i1][j1] = forestOperations[i1 - 1][j1 - 1]
.concat({type: type, t1: T1node, t2: T2node})
}
operations[i1 + iOffset][j1 + jOffset] = forestOperations[i1][j1]
}
treedist[i1 + iOffset][j1 + jOffset] = forestDistances[i1][j1]
} else {
remove = forestDistances[i1 - 1][j1] + removeCost(T1node)
insert = forestDistances[i1][j1 - 1] + insertCost(T2node)
var p = T1l[i1 + iOffset] - 1 - iOffset
var q = T2l[j1 + jOffset] - 1 - jOffset
update = (
forestDistances[p][q] +
treedist[i1 + iOffset][j1 + jOffset]
)
min = Math.min(remove, insert, update)
forestDistances[i1][j1] = min
if(isMapping) {
if (min === remove) {
forestOperations[i1][j1] = forestOperations[i1 - 1][j1]
.concat({type: REMOVE, t1: T1node, t2: null})
} else if (min === insert) {
forestOperations[i1][j1] = forestOperations[i1][j1 - 1]
.concat({type: INSERT, t1: null, t2: T2node})
} else {
forestOperations[i1][j1] = forestOperations[p][q]
.concat(operations[i1 + iOffset][j1 + jOffset])
}
}
}
}
}
}
}

function preprocess (root, childrenOf) {
var returned = {nodes: [], l: [], keyroots: []}
postOrderWalk(root, childrenOf, function (data) {
var index = data.index
var node = data.node
var firstChild = data.firstChild
var nodesLength = returned.nodes.length
returned.nodes.push(node)
returned.l.push(
firstChild
? returned.l[returned.nodes.indexOf(firstChild)]
: nodesLength
)
if (index !== 0) returned.keyroots.push(nodesLength)
})
returned.keyroots.sort()
return returned
}

function postOrderWalk (root, childrenOf, iterator) {
var from = []
var to = []
from.push({index: null, node: root})
while (from.length !== 0) {
var popped = from.pop()
var index = popped.index
var node = popped.node
var children = childrenOf(node) || []
var firstChild = children[0] || null
to.push({index: index, node: node, firstChild: firstChild})
for (var childIndex = 0; childIndex < children.length; childIndex++) {
from.push({index: childIndex, node: children[childIndex]})
}
}
for (var i = to.length - 1; i >= 0; i--) {
iterator(to[i])
}
}

function initializeMappingMatrix(width, height, arrays) {
var returned = new Array(width)
for (var x = 0; x < width; x++) {
returned[x] = new Array(height)
for (var y = 0; y < height; y++) {
returned[x][y] = arrays ? [] : 0
}
}
return returned
}

function initializeMatrix(width, height,) {
var returned = new Array(width)
for (var x = 0; x < width; x++) {
returned[x] = new Array(height).fill(0)
}
return returned
}

0 comments on commit 21e478b

Please sign in to comment.