diff --git a/src/external/tree-edit-distance/LICENSE b/src/external/tree-edit-distance/LICENSE new file mode 100644 index 000000000..f67286fe9 --- /dev/null +++ b/src/external/tree-edit-distance/LICENSE @@ -0,0 +1,65 @@ +License Zero Reciprocal Public License 2.0.1 + +Copyright: Kyle E. Mitchell + +Source: https://github.com/kemitchell/zhang-shasha.js + +**This software comes as is, without any warranty at all. As far +as the law allows, I will not be liable for any damages related +to this software or this license, for any kind of legal claim.** + +As long as you meet the conditions below, you may do everything +with this software that would otherwise infringe my copyright in +it or any covered patent claim. Your permission covers a patent +claim that I can license, or become able to license, if you would +infringe it by using this software as of my latest contribution. + +1. You must ensure that everyone who gets a copy of this software + from you, in source code or any other form, also gets the + complete text of this license and the copyright and source + notices above. + +2. You must not make any legal claim against anyone for + infringing any patent claim they would infringe by using this + software alone, accusing this software, with or without + changes, alone or combined into a larger program. + +3. If you change this software, you must release source code for + your changes. + +4. If you combine this software with other software into a larger + program, you must release any source code for that larger + program that has not yet been released. + +5. If you run this software to analyze, change, or generate + software, you must release source code for that software that + has not yet been released. + +Releasing source code means publicly licensing it under either +this license or a license approved by the Open Source Initiative, +and promptly publishing it, in the preferred form for making +changes, to a freely accessible distribution system widely used +for similarly licensed source code. + +Any unknowing failure to meet condition 3, 4, or 5 is excused if +you release source code as required, or stop doing anything +requiring permission under this license, within 30 days of +learning that this license required you to release source code. + +--- + +Licensor Signature (Ed25519): + +309fd396e3c323f6c88f3a8add63ff7a +3ab9f78c35e28d77e8cfd2684e2a9f1e +255282d1c85b012e2eebcdec65422070 +6065d78294e2949d244593b0720b8801 + +--- + +Agent Signature (Ed25519): + +49ff7855974fb0e4243fe86c38826c2c +d6762a9ffb1d4e2b861c8560d336e048 +36df8de459faae32428b607636f9bec3 +faf0054ae399ec52d6331fc65850f80c \ No newline at end of file diff --git a/src/external/tree-edit-distance/demo.md b/src/external/tree-edit-distance/demo.md new file mode 100644 index 000000000..c3d0bc49a --- /dev/null +++ b/src/external/tree-edit-distance/demo.md @@ -0,0 +1,41 @@ +# Zhang Shasha Tree Edit Distance + + + \ No newline at end of file diff --git a/src/external/tree-edit-distance/zhang-shasha.js b/src/external/tree-edit-distance/zhang-shasha.js new file mode 100644 index 000000000..da3a79f36 --- /dev/null +++ b/src/external/tree-edit-distance/zhang-shasha.js @@ -0,0 +1,223 @@ +/* +License Zero Reciprocal Public License 2.0.1 + +Copyright: Kyle E. Mitchell + +Source: https://github.com/kemitchell/zhang-shasha.js + +(Modified by @JensLincke) + +*/ + + +var INSERT = 'insert' +var MATCH = 'match' +var REMOVE = 'remove' +var UPDATE = 'update' + +export function distance(rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost ) { + return zhangShasha(false, rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost) +} + +export function mapping(rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost ) { + return zhangShasha(true, rootOfT1, rootOfT2, childrenOf, insertCost, removeCost, updateCost) +} + +function zhangShasha(isMapping, rootOfT1, rootOfT2, + childrenOf=function(node) {return node.children}, + insertCost=function() { return 1 }, + removeCost=function() { return 1 }, + updateCost=function (from, to) {return from.label === to.label ? 0 : 1}) { + // Paper: "Preprocessing" + var T1 = preprocess(rootOfT1, childrenOf) + var T2 = preprocess(rootOfT2, childrenOf) + + var orderOfT1 = T1.nodes.length + var LR_keyroots1 = T1.keyroots + var T1l = T1.l + var T1nodes = T1.nodes + + var orderOfT2 = T2.nodes.length + var LR_keyroots2 = T2.keyroots + var T2l = T2.l + var T2nodes = T2.nodes + + // Initialize Matrices + var treedist = initializeMatrix(orderOfT1, orderOfT2) + if(isMapping) { + var operations = initializeMappingMatrix(orderOfT1, orderOfT2, true) + } + + // Paper: "Main loop" + for (var iprime = 0; iprime < LR_keyroots1.length; iprime++) { + for (var jprime = 0; jprime < LR_keyroots2.length; jprime++) { + compute_treedist(LR_keyroots1[iprime], LR_keyroots2[jprime]) + } + } + + if(isMapping) { + return operations[orderOfT1 - 1][orderOfT2 - 1].reverse() + } else { + return treedist[orderOfT1 - 1][orderOfT2 - 1] + } + + // Paper: "The computation of treedist(i, j)." + function compute_treedist (i, j) { + var iOffset = T1l[i] - 1 + var iRange = i - T1l[i] + 2 + var jOffset = T2l[j] - 1 + var jRange = j - T2l[j] + 2 + var forestDistances = initializeMatrix(iRange, jRange) + if(isMapping) { + var forestOperations = initializeMappingMatrix(iRange, jRange, true) + } + var node + var i1, j1 + + for (i1 = 1; i1 < iRange; i1++) { + node = T1nodes[i1 + iOffset] + forestDistances[i1][0] = ( + forestDistances[i1 - 1][0] + + removeCost(node) + ) + if(isMapping) { + forestOperations[i1][0] = ( + forestOperations[i1 - 1][0].concat({ + type: REMOVE, t1: node, t2: null + }) + ) + } + } + + for (j1 = 1; j1 < jRange; j1++) { + node = T2nodes[j1 + jOffset] + forestDistances[0][j1] = ( + forestDistances[0][j1 - 1] + + insertCost(node) + ) + if(isMapping) { + forestOperations[0][j1] = ( + forestOperations[0][j1 - 1].concat({ + type: INSERT, t1: null, t2: node + }) + ) + } + } + + for (i1 = 1; i1 < iRange; i1++) { + for (j1 = 1; j1 < jRange; j1++) { + var T1node = T1nodes[i1 + iOffset] + var T2node = T2nodes[j1 + jOffset] + var remove, insert, update, min + // Is i1 an ancestor of i, and j1 an ancestor of j? + if (T1l[i] === T1l[i1 + iOffset] && T2l[j] === T2l[j1 + jOffset]) { + remove = forestDistances[i1 - 1][j1] + removeCost(T1node) + insert = forestDistances[i1][j1 - 1] + insertCost(T2node) + update = ( + forestDistances[i1 - 1][j1 - 1] + + updateCost(T1node, T2node) + ) + min = Math.min(remove, insert, update) + forestDistances[i1][j1] = min + if(isMapping) { + if (min === remove) { + forestOperations[i1][j1] = forestOperations[i1 - 1][j1] + .concat({type: REMOVE, t1: T1node, t2: null}) + } else if (min === insert) { + forestOperations[i1][j1] = forestOperations[i1][j1 - 1] + .concat({type: INSERT, t1: null, t2: T2node}) + } else { + var type = forestDistances[i1][j1] === forestDistances[i1 - 1][j1 - 1] + ? MATCH : UPDATE + forestOperations[i1][j1] = forestOperations[i1 - 1][j1 - 1] + .concat({type: type, t1: T1node, t2: T2node}) + } + operations[i1 + iOffset][j1 + jOffset] = forestOperations[i1][j1] + } + treedist[i1 + iOffset][j1 + jOffset] = forestDistances[i1][j1] + } else { + remove = forestDistances[i1 - 1][j1] + removeCost(T1node) + insert = forestDistances[i1][j1 - 1] + insertCost(T2node) + var p = T1l[i1 + iOffset] - 1 - iOffset + var q = T2l[j1 + jOffset] - 1 - jOffset + update = ( + forestDistances[p][q] + + treedist[i1 + iOffset][j1 + jOffset] + ) + min = Math.min(remove, insert, update) + forestDistances[i1][j1] = min + if(isMapping) { + if (min === remove) { + forestOperations[i1][j1] = forestOperations[i1 - 1][j1] + .concat({type: REMOVE, t1: T1node, t2: null}) + } else if (min === insert) { + forestOperations[i1][j1] = forestOperations[i1][j1 - 1] + .concat({type: INSERT, t1: null, t2: T2node}) + } else { + forestOperations[i1][j1] = forestOperations[p][q] + .concat(operations[i1 + iOffset][j1 + jOffset]) + } + } + } + } + } + } +} + +function preprocess (root, childrenOf) { + var returned = {nodes: [], l: [], keyroots: []} + postOrderWalk(root, childrenOf, function (data) { + var index = data.index + var node = data.node + var firstChild = data.firstChild + var nodesLength = returned.nodes.length + returned.nodes.push(node) + returned.l.push( + firstChild + ? returned.l[returned.nodes.indexOf(firstChild)] + : nodesLength + ) + if (index !== 0) returned.keyroots.push(nodesLength) + }) + returned.keyroots.sort() + return returned +} + +function postOrderWalk (root, childrenOf, iterator) { + var from = [] + var to = [] + from.push({index: null, node: root}) + while (from.length !== 0) { + var popped = from.pop() + var index = popped.index + var node = popped.node + var children = childrenOf(node) || [] + var firstChild = children[0] || null + to.push({index: index, node: node, firstChild: firstChild}) + for (var childIndex = 0; childIndex < children.length; childIndex++) { + from.push({index: childIndex, node: children[childIndex]}) + } + } + for (var i = to.length - 1; i >= 0; i--) { + iterator(to[i]) + } +} + +function initializeMappingMatrix(width, height, arrays) { + var returned = new Array(width) + for (var x = 0; x < width; x++) { + returned[x] = new Array(height) + for (var y = 0; y < height; y++) { + returned[x][y] = arrays ? [] : 0 + } + } + return returned + } + +function initializeMatrix(width, height,) { + var returned = new Array(width) + for (var x = 0; x < width; x++) { + returned[x] = new Array(height).fill(0) + } + return returned +}