-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added Zhang Shasha Tree Edit Distance
SQUASHED: AUTO-COMMIT-src-external-tree-edit-distance-demo.md,AUTO-COMMIT-src-external-tree-edit-distance-LICENSE,AUTO-COMMIT-src-external-tree-edit-distance-zhang-shasha.js,
- Loading branch information
1 parent
f0c65ea
commit 21e478b
Showing
3 changed files
with
329 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
License Zero Reciprocal Public License 2.0.1 | ||
|
||
Copyright: Kyle E. Mitchell | ||
|
||
Source: https://github.com/kemitchell/zhang-shasha.js | ||
|
||
**This software comes as is, without any warranty at all. As far | ||
as the law allows, I will not be liable for any damages related | ||
to this software or this license, for any kind of legal claim.** | ||
|
||
As long as you meet the conditions below, you may do everything | ||
with this software that would otherwise infringe my copyright in | ||
it or any covered patent claim. Your permission covers a patent | ||
claim that I can license, or become able to license, if you would | ||
infringe it by using this software as of my latest contribution. | ||
|
||
1. You must ensure that everyone who gets a copy of this software | ||
from you, in source code or any other form, also gets the | ||
complete text of this license and the copyright and source | ||
notices above. | ||
|
||
2. You must not make any legal claim against anyone for | ||
infringing any patent claim they would infringe by using this | ||
software alone, accusing this software, with or without | ||
changes, alone or combined into a larger program. | ||
|
||
3. If you change this software, you must release source code for | ||
your changes. | ||
|
||
4. If you combine this software with other software into a larger | ||
program, you must release any source code for that larger | ||
program that has not yet been released. | ||
|
||
5. If you run this software to analyze, change, or generate | ||
software, you must release source code for that software that | ||
has not yet been released. | ||
|
||
Releasing source code means publicly licensing it under either | ||
this license or a license approved by the Open Source Initiative, | ||
and promptly publishing it, in the preferred form for making | ||
changes, to a freely accessible distribution system widely used | ||
for similarly licensed source code. | ||
|
||
Any unknowing failure to meet condition 3, 4, or 5 is excused if | ||
you release source code as required, or stop doing anything | ||
requiring permission under this license, within 30 days of | ||
learning that this license required you to release source code. | ||
|
||
--- | ||
|
||
Licensor Signature (Ed25519): | ||
|
||
309fd396e3c323f6c88f3a8add63ff7a | ||
3ab9f78c35e28d77e8cfd2684e2a9f1e | ||
255282d1c85b012e2eebcdec65422070 | ||
6065d78294e2949d244593b0720b8801 | ||
|
||
--- | ||
|
||
Agent Signature (Ed25519): | ||
|
||
49ff7855974fb0e4243fe86c38826c2c | ||
d6762a9ffb1d4e2b861c8560d336e048 | ||
36df8de459faae32428b607636f9bec3 | ||
faf0054ae399ec52d6331fc65850f80c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Zhang Shasha Tree Edit Distance | ||
|
||
|
||
<script> | ||
|
||
import {distance, mapping} from "src/external/tree-edit-distance/zhang-shasha.js" | ||
|
||
|
||
var a = { | ||
label: 'a', | ||
children: [ | ||
{label: 'b', children: []}, | ||
{label: 'c', children: []} | ||
] | ||
} | ||
|
||
var b = { | ||
label: 'a', | ||
children: [ | ||
{label: 'b', children: []} | ||
] | ||
} | ||
try { | ||
var result = <div> | ||
<h3>A</h3> | ||
<div style="white-space: pre"> {JSON.stringify(a)}</div> | ||
<h3>B</h3> | ||
<div style="white-space: pre">{JSON.stringify(b)}</div> | ||
|
||
<div><h3>distance:</h3> {distance(a, b)}</div> | ||
<h3>mapping:</h3> | ||
<div style="white-space: pre"> {mapping(a, b).map(ea => ea.type + " " | ||
+ ea.t1.label + " "+ (ea.t2 ? ea.t2.label : ""))}</div> | ||
</div> | ||
} catch(e) { | ||
debugger | ||
throw e | ||
} | ||
|
||
result | ||
</script> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
/* | ||
License Zero Reciprocal Public License 2.0.1 | ||
Copyright: Kyle E. Mitchell | ||
Source: https://github.com/kemitchell/zhang-shasha.js | ||
(Modified by @JensLincke) | ||
*/ | ||
|
||
|
||
var INSERT = 'insert' | ||
var MATCH = 'match' | ||
var REMOVE = 'remove' | ||
var UPDATE = 'update' | ||
|
||
export function distance(rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost ) { | ||
return zhangShasha(false, rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost) | ||
} | ||
|
||
export function mapping(rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost ) { | ||
return zhangShasha(true, rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost) | ||
} | ||
|
||
function zhangShasha(isMapping, rootOfT1, rootOfT2, | ||
childrenOf=function(node) {return node.children}, | ||
insertCost=function() { return 1 }, | ||
removeCost=function() { return 1 }, | ||
updateCost=function (from, to) {return from.label === to.label ? 0 : 1}) { | ||
// Paper: "Preprocessing" | ||
var T1 = preprocess(rootOfT1, childrenOf) | ||
var T2 = preprocess(rootOfT2, childrenOf) | ||
|
||
var orderOfT1 = T1.nodes.length | ||
var LR_keyroots1 = T1.keyroots | ||
var T1l = T1.l | ||
var T1nodes = T1.nodes | ||
|
||
var orderOfT2 = T2.nodes.length | ||
var LR_keyroots2 = T2.keyroots | ||
var T2l = T2.l | ||
var T2nodes = T2.nodes | ||
|
||
// Initialize Matrices | ||
var treedist = initializeMatrix(orderOfT1, orderOfT2) | ||
if(isMapping) { | ||
var operations = initializeMappingMatrix(orderOfT1, orderOfT2, true) | ||
} | ||
|
||
// Paper: "Main loop" | ||
for (var iprime = 0; iprime < LR_keyroots1.length; iprime++) { | ||
for (var jprime = 0; jprime < LR_keyroots2.length; jprime++) { | ||
compute_treedist(LR_keyroots1[iprime], LR_keyroots2[jprime]) | ||
} | ||
} | ||
|
||
if(isMapping) { | ||
return operations[orderOfT1 - 1][orderOfT2 - 1].reverse() | ||
} else { | ||
return treedist[orderOfT1 - 1][orderOfT2 - 1] | ||
} | ||
|
||
// Paper: "The computation of treedist(i, j)." | ||
function compute_treedist (i, j) { | ||
var iOffset = T1l[i] - 1 | ||
var iRange = i - T1l[i] + 2 | ||
var jOffset = T2l[j] - 1 | ||
var jRange = j - T2l[j] + 2 | ||
var forestDistances = initializeMatrix(iRange, jRange) | ||
if(isMapping) { | ||
var forestOperations = initializeMappingMatrix(iRange, jRange, true) | ||
} | ||
var node | ||
var i1, j1 | ||
|
||
for (i1 = 1; i1 < iRange; i1++) { | ||
node = T1nodes[i1 + iOffset] | ||
forestDistances[i1][0] = ( | ||
forestDistances[i1 - 1][0] + | ||
removeCost(node) | ||
) | ||
if(isMapping) { | ||
forestOperations[i1][0] = ( | ||
forestOperations[i1 - 1][0].concat({ | ||
type: REMOVE, t1: node, t2: null | ||
}) | ||
) | ||
} | ||
} | ||
|
||
for (j1 = 1; j1 < jRange; j1++) { | ||
node = T2nodes[j1 + jOffset] | ||
forestDistances[0][j1] = ( | ||
forestDistances[0][j1 - 1] + | ||
insertCost(node) | ||
) | ||
if(isMapping) { | ||
forestOperations[0][j1] = ( | ||
forestOperations[0][j1 - 1].concat({ | ||
type: INSERT, t1: null, t2: node | ||
}) | ||
) | ||
} | ||
} | ||
|
||
for (i1 = 1; i1 < iRange; i1++) { | ||
for (j1 = 1; j1 < jRange; j1++) { | ||
var T1node = T1nodes[i1 + iOffset] | ||
var T2node = T2nodes[j1 + jOffset] | ||
var remove, insert, update, min | ||
// Is i1 an ancestor of i, and j1 an ancestor of j? | ||
if (T1l[i] === T1l[i1 + iOffset] && T2l[j] === T2l[j1 + jOffset]) { | ||
remove = forestDistances[i1 - 1][j1] + removeCost(T1node) | ||
insert = forestDistances[i1][j1 - 1] + insertCost(T2node) | ||
update = ( | ||
forestDistances[i1 - 1][j1 - 1] + | ||
updateCost(T1node, T2node) | ||
) | ||
min = Math.min(remove, insert, update) | ||
forestDistances[i1][j1] = min | ||
if(isMapping) { | ||
if (min === remove) { | ||
forestOperations[i1][j1] = forestOperations[i1 - 1][j1] | ||
.concat({type: REMOVE, t1: T1node, t2: null}) | ||
} else if (min === insert) { | ||
forestOperations[i1][j1] = forestOperations[i1][j1 - 1] | ||
.concat({type: INSERT, t1: null, t2: T2node}) | ||
} else { | ||
var type = forestDistances[i1][j1] === forestDistances[i1 - 1][j1 - 1] | ||
? MATCH : UPDATE | ||
forestOperations[i1][j1] = forestOperations[i1 - 1][j1 - 1] | ||
.concat({type: type, t1: T1node, t2: T2node}) | ||
} | ||
operations[i1 + iOffset][j1 + jOffset] = forestOperations[i1][j1] | ||
} | ||
treedist[i1 + iOffset][j1 + jOffset] = forestDistances[i1][j1] | ||
} else { | ||
remove = forestDistances[i1 - 1][j1] + removeCost(T1node) | ||
insert = forestDistances[i1][j1 - 1] + insertCost(T2node) | ||
var p = T1l[i1 + iOffset] - 1 - iOffset | ||
var q = T2l[j1 + jOffset] - 1 - jOffset | ||
update = ( | ||
forestDistances[p][q] + | ||
treedist[i1 + iOffset][j1 + jOffset] | ||
) | ||
min = Math.min(remove, insert, update) | ||
forestDistances[i1][j1] = min | ||
if(isMapping) { | ||
if (min === remove) { | ||
forestOperations[i1][j1] = forestOperations[i1 - 1][j1] | ||
.concat({type: REMOVE, t1: T1node, t2: null}) | ||
} else if (min === insert) { | ||
forestOperations[i1][j1] = forestOperations[i1][j1 - 1] | ||
.concat({type: INSERT, t1: null, t2: T2node}) | ||
} else { | ||
forestOperations[i1][j1] = forestOperations[p][q] | ||
.concat(operations[i1 + iOffset][j1 + jOffset]) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
function preprocess (root, childrenOf) { | ||
var returned = {nodes: [], l: [], keyroots: []} | ||
postOrderWalk(root, childrenOf, function (data) { | ||
var index = data.index | ||
var node = data.node | ||
var firstChild = data.firstChild | ||
var nodesLength = returned.nodes.length | ||
returned.nodes.push(node) | ||
returned.l.push( | ||
firstChild | ||
? returned.l[returned.nodes.indexOf(firstChild)] | ||
: nodesLength | ||
) | ||
if (index !== 0) returned.keyroots.push(nodesLength) | ||
}) | ||
returned.keyroots.sort() | ||
return returned | ||
} | ||
|
||
function postOrderWalk (root, childrenOf, iterator) { | ||
var from = [] | ||
var to = [] | ||
from.push({index: null, node: root}) | ||
while (from.length !== 0) { | ||
var popped = from.pop() | ||
var index = popped.index | ||
var node = popped.node | ||
var children = childrenOf(node) || [] | ||
var firstChild = children[0] || null | ||
to.push({index: index, node: node, firstChild: firstChild}) | ||
for (var childIndex = 0; childIndex < children.length; childIndex++) { | ||
from.push({index: childIndex, node: children[childIndex]}) | ||
} | ||
} | ||
for (var i = to.length - 1; i >= 0; i--) { | ||
iterator(to[i]) | ||
} | ||
} | ||
|
||
function initializeMappingMatrix(width, height, arrays) { | ||
var returned = new Array(width) | ||
for (var x = 0; x < width; x++) { | ||
returned[x] = new Array(height) | ||
for (var y = 0; y < height; y++) { | ||
returned[x][y] = arrays ? [] : 0 | ||
} | ||
} | ||
return returned | ||
} | ||
|
||
function initializeMatrix(width, height,) { | ||
var returned = new Array(width) | ||
for (var x = 0; x < width; x++) { | ||
returned[x] = new Array(height).fill(0) | ||
} | ||
return returned | ||
} |