From b60033a94bce666d81f75281cbb1bdc012acba5b Mon Sep 17 00:00:00 2001 From: Yin Wang Date: Sat, 1 Jun 2013 14:40:57 -0700 Subject: [PATCH] improvements on progress message and modularization --- demos/diff2.rkt | 482 +++++++----------------------------------------- diff-cpp.rkt | 6 +- diff-js.rkt | 7 +- diff-lisp.rkt | 20 +- diff.css | 2 +- diff.rkt | 432 +++---------------------------------------- nav.js | 2 +- parse-cpp.rkt | 4 +- parse-js.rkt | 5 +- parse-lisp.rkt | 5 +- parsec.rkt | 2 +- structs.rkt | 214 +++++++++++++++++++-- utils.rkt | 17 +- 13 files changed, 334 insertions(+), 864 deletions(-) diff --git a/demos/diff2.rkt b/demos/diff2.rkt index 40d5ca7..eb23708 100644 --- a/demos/diff2.rkt +++ b/demos/diff2.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify @@ -17,8 +17,9 @@ #lang racket -(require "utils.rkt") (require "structs.rkt") +(require "utils.rkt") + (provide (all-defined-out)) @@ -33,6 +34,12 @@ (define *move-size* 5) +;; Similar to *move-size*, but this number is used for internal moves inside a +;; named body (for example a function). This number can be smaller than +;; *move-size*, usually set to 2 for maxmum accuracy without much noise. +(define *inner-move-size* 2) + + ;; How long must a string be in order for us to use string-dist ;; function, which is costly when used on long strings but the most ;; accurate method to use. Currently this parameter is set to 0, @@ -49,83 +56,11 @@ - -;------------------------------------------------------------- -; data types -;------------------------------------------------------------- - -;; Change - a change in the data structure -;; - old : the old version, #f for insertions -;; - new : the new version, #f for deletions -;; - cost : the cost of change from old to new -;; - type : insertion, deletion, or modification? -(struct Change (old new cost type) #:transparent) - -;; HTML tag structure used HTML generation code -(struct Tag (tag idx start) #:transparent) - -(define ins? - (lambda (c) - (eq? 'ins (Change-type c)))) - -(define del? - (lambda (c) - (eq? 'del (Change-type c)))) - -(define mod? - (lambda (c) - (eq? 'mod (Change-type c)))) - - - -;----------------- utils for creating changes ---------------- -(define ins - (lambda (node) - (let ([size (node-size node)]) - (list (Change #f node size 'ins))))) - - -(define del - (lambda (node) - (let ([size (node-size node)]) - (list (Change node #f size 'del))))) - - -(define mod - (lambda (node1 node2 cost) - (list (Change node1 node2 cost 'mod)))) - - -(define mov - (lambda (node1 node2 cost) - (list (Change node1 node2 cost 'mov)))) - - -;; create a "total change" -;; (delete node1 completely and then insert node2) -(define total - (lambda (node1 node2) - (let ([size1 (node-size node1)] - [size2 (node-size node2)]) - (values (append (del node1) (ins node2)) - (+ size1 size2))))) - - -;; temporary workaround before the algorithm stablizes -(define mod->mov - (lambda (c) - (match c - [(Change node1 node2 cost 'mod) - (Change node1 node2 cost 'mov)] - [other other]))) - - - ;;------------------ frames utils -------------------- (define deframe (lambda (node) (match node - [(Node 'frame _ _ elts) + [(Node 'frame _ _ elts _ _) (apply append (map deframe elts))] [else (list node)]))) @@ -145,11 +80,11 @@ (define extract-frame (lambda (node1 node2 type) (match node1 - [(Node type1 start1 end1 elts1) + [(Node type1 start1 end1 elts1 size ctx) (let ([frame-elts (filter (lambda (x) (not (eq? x node2))) elts1)]) - (type (Node 'frame start1 start1 frame-elts)))] + (type (Node 'frame start1 start1 frame-elts (- size (node-size node2)) ctx)))] [_ fatal 'extract-frame "I only accept Node"]))) @@ -159,118 +94,6 @@ - - - -;------------------ operations on nodes --------------------- - -;; "virtual function" - get definition name -;; can be overridden by individual languages -(define get-name (lambda (node) #f)) - -(define set-get-name - (lambda (fun) - (set! get-name fun))) - - -;; "virtual function" - get node type -;; can be overridden by individual languages -(define get-type Node-type) - -(define set-get-type - (lambda (fun) - (set! get-type fun))) - - -;; same-def? only depend on get-name, so they need not be overridden -;; by individual languages. -(define same-def? - (lambda (e1 e2) - (cond - [(not (eq? (get-type e1) (get-type e2))) - #f] - [else - (let ([name1 (get-name e1)] - [name2 (get-name e2)]) - (and name1 name2 (equal? name1 name2)))]))) - - -(define set-same-def - (lambda (fun) - (set! same-def? fun))) - - - -;----------- node size function ------------ -(define *node-size-hash* (make-hasheq)) - -(define node-size - (lambda (node) - (define memo - (lambda (v) - (if (> v 1) - (hash-set! *node-size-hash* node v) - (void)) - v)) - (cond - [(pair? node) - (apply + (map node-size node))] - [(or (token? node) (str? node) (character? node)) 1] - [(Node? node) - (cond - [(hash-has-key? *node-size-hash* node) - (hash-ref *node-size-hash* node)] - [else - (memo (node-size (Node-elts node)))])] - [else 0]))) - - -(define node-depth - (lambda (node) - (cond - [(null? node) 0] - [(pair? node) - (apply max (map node-depth node))] - [(Node? node) - (add1 (node-depth (Node-elts node)))] - [else 0]))) - - -; (node-depth (parse-scheme "(lambda (x (x (y)) (y)) x)")) - - -(define uid - (let ([count 1] - [table (box '())]) - (lambda (node) - (let ([p (assq node (unbox table))]) - (cond - [(not p) - (let ([id count]) - (set! count (add1 count)) - (set-box! table (cons `(,node . ,id) (unbox table))) - id)] - [else - (cdr p)]))))) - - - -;; similarity string from a change -(define similarity - (lambda (change) - (let ([total (+ (node-size (Change-old change)) - (node-size (Change-new change)))]) - (cond - [(or (= 0 total) (= 0 (Change-cost change))) - "100%"] - [else - (string-append - (real->decimal-string - (* 100 (- 1.0 (/ (Change-cost change) total))) 1) - "%")])))) - - - ;------------------------------------------------------------- ; diff proper ;------------------------------------------------------------- @@ -479,42 +302,58 @@ +(define same-ctx? + (lambda (x y) + (and (Node? x) + (Node? y) + (Node-ctx x) + (Node-ctx y) + (>= (node-size x) *inner-move-size*) + (>= (node-size y) *inner-move-size*) + (eq? (Node-ctx x) (Node-ctx y))))) + ;; structure extraction (define diff-extract (lambda (node1 node2 move?) (cond - [(or (< (node-size node1) *move-size*) - (< (node-size node2) *move-size*)) - (values #f #f)] - [(and (Node? node1) (Node? node2)) + [(and (Node? node1) (Node? node2) + (or (same-ctx? node1 node2) + (and (>= (node-size node1) *move-size*) + (>= (node-size node2) *move-size*)))) (cond [(<= (node-size node1) (node-size node2)) (let loop ([elts2 (Node-elts node2)]) (cond - [(null? elts2) (values #f #f)] - [else - (letv ([(m0 c0) (diff-node node1 (car elts2) move?)]) + [(pair? elts2) + (letv ([(m0 c0) (diff-node node1 (car elts2) move?)]) (cond [(or (same-def? node1 (car elts2)) - (zero? c0)) + (and (zero? c0) + (or (> (node-size node1) *move-size*) + (same-ctx? node1 (car elts2))))) (let ([frame (extract-frame node2 (car elts2) ins)]) (values (append m0 frame) c0))] [else - (loop (cdr elts2))]))]))] + (loop (cdr elts2))]))] + [else + (values #f #f)]))] [else (let loop ([elts1 (Node-elts node1)]) (cond - [(null? elts1) (values #f #f)] - [else + [(pair? elts1) (letv ([(m0 c0) (diff-node (car elts1) node2 move?)]) (cond [(or (same-def? (car elts1) node2) - (zero? c0)) + (and (zero? c0) + (or (> (node-size node2) *move-size*) + (same-ctx? (car elts1) node2)))) (let ([frame (extract-frame node1 (car elts1) del)]) (values (append m0 frame) c0))] [else - (loop (cdr elts1))]))]))])] + (loop (cdr elts1))]))] + [else + (values #f #f)]))])] [else (values #f #f)]))) @@ -558,11 +397,10 @@ ;; iterate diff-list on the list of changes -(define closure +(define find-moves (lambda (changes) (set! *diff-hash* (make-hasheq)) (let loop ([changes changes] [closed '()] [count 1]) - (printf "~n[move pass #~a] " count) (letv ([dels (filter (predand del? big-change?) changes)] [adds (filter (predand ins? big-change?) changes)] [rest (set- changes (append dels adds))] @@ -570,7 +408,6 @@ [ls2 (sort (map Change-new adds) node-sort-fn)] [(m c) (diff-list ls1 ls2 #t)] [new-moves (map mod->mov (filter mod? m))]) - (printf "~n~a new moves found" (length new-moves)) (cond [(null? new-moves) (let ([all-changes (append m rest closed)]) @@ -582,232 +419,37 @@ (add1 count)))]))))) - - - -;------------------------------------------------------------- -; HTML generation -;------------------------------------------------------------- - -;----------------- utils ---------------- -(define qs - (lambda (x) - (let ([x (cond - [(symbol? x) (symbol->string x)] - [(number? x) (number->string x)] - [(string? x) x])]) - (string-append "'" x "'")))) - - -(define line - (lambda (port . s) - (display (string-append (apply string-append s) "\n") port))) - - - -(define change-tags - (lambda (changes side) - (let loop ([cs changes] [tags '()]) - (cond - [(null? cs) tags] - [else - (let ([key (if (eq? side 'left) - (Change-old (car cs)) - (Change-new (car cs)))]) - (cond - [(or (not key) - (= (Node-start key) (Node-end key))) - (loop (cdr cs) tags)] - [(and (Change-old (car cs)) (Change-new (car cs))) - (let ([startTag (Tag (link-start (car cs) side) - (Node-start key) -1)] - [endTag (Tag "" (Node-end key) (Node-start key))]) - (loop (cdr cs) (cons endTag (cons startTag tags))))] - [else - (let ([startTag (Tag (span-start (car cs) side) - (Node-start key) -1)] - [endTag (Tag "" (Node-end key) (Node-start key))]) - (loop (cdr cs) (cons endTag (cons startTag tags))))]))])))) - - -(define apply-tags - (lambda (s tags) - (let ([tags (sort tags tag-sort-fn)]) - (let loop ([tags tags] [curr 0] [out '()]) - (cond - [(null? tags) - (cond - [(< curr (string-length s)) - (loop tags (add1 curr) (cons (escape (string-ref s curr)) out))] - [else - (apply string-append (reverse out))])] - [else - (cond - [(< curr (Tag-idx (car tags))) - (loop tags (add1 curr) (cons (escape (string-ref s curr)) out))] - [else - (loop (cdr tags) curr (cons (Tag-tag (car tags)) out))])]))))) - - - -;; get the CSS class for the change -(define change-class - (lambda (change) - (cond - [(and (eq? (Change-type change) 'mov) - (> (Change-cost change) 0)) - "mc"] ; move-change - [(eq? (Change-type change) 'mov) "m"] ; move - [(> (Change-cost change) 0) "c"] ; change - [else "u"]))) ; unchanged - - - -(define link-start - (lambda (change side) - (let ([cls (change-class change)] - [me (if (eq? side 'left) - (Change-old change) - (Change-new change))] - [other (if (eq? side 'left) - (Change-new change) - (Change-old change))]) - (string-append - "")))) - - - -(define span-start - (lambda (change side) - (let ([cls (if (eq? 'del (Change-type change)) "d" "i")]) ; del or ins - (string-append "")))) - - - -(define tag-sort-fn - (lambda (t1 t2) - (cond - [(= (Tag-idx t1) (Tag-idx t2)) - (> (Tag-start t1) (Tag-start t2))] - [else - (< (Tag-idx t1) (Tag-idx t2))]))) - - -(define *escape-table* - '((#\" . """) - (#\' . "'") - (#\< . "<") - (#\> . ">"))) - - -(define escape - (lambda (c) - (cond - [(assq c *escape-table*) => cdr] - [else (char->string c)]))) - - - -; getting the base name of a path/file name -; (base-name "mk/mk-c.scm") => mk-c -(define base-name - (lambda (fn) - (let loop ([i (- (string-length fn) 1)] - [start -1] - [end (- (string-length fn) 1)]) - (cond - [(= i 0) - (substring fn i end)] - [(eq? (string-ref fn i) #\.) - (loop (sub1 i) start i)] - [(eq? (string-ref fn i) #\/) - (substring fn (add1 i) end)] - [else - (loop (sub1 i) start end)])))) - - - -(define html-header - (lambda (port) - (line port "") - (line port "") - (line port "") - (line port "") - (line port "") - (line port "") - (line port ""))) - - - -(define html-footer - (lambda (port) - (line port "") - (line port ""))) - - - -(define write-html - (lambda (port text side) - (line port (string-append "
")) - (line port "
")
-    (if (string=? side "left")
-        (line port "")
-        (line port ""))
-    (line port text)
-    (line port "
") - (line port "
"))) - - - ;; poor man's progress bar (define diff-progress (new-progress 10000)) - (define cleanup (lambda () - (set! *node-size-hash* (make-hasheq)) (set! *diff-hash* (make-hasheq)))) - ;; main diff function ;; returns all changes after diffing and moving (define diff (lambda (node1 node2) - (letv ([start (current-seconds)] - [(changes _) (diff-node node1 node2 #f)] - [_ (diff-progress "\nDone diffing")] -; [changes (closure changes)] - [_ (diff-progress "\nDone moving")] - [end (current-seconds)]) - (printf "finished in ~a seconds~n" (- end start)) - (cleanup) - changes))) - - - -(define htmlize - (lambda (changes file1 file2 text1 text2) - (letv ([tags1 (change-tags changes 'left)] - [tags2 (change-tags changes 'right)] - [tagged-text1 (apply-tags text1 tags1)] - [tagged-text2 (apply-tags text2 tags2)] - [out-file (string-append (base-name file1) "-" - (base-name file2) ".html")] - [port (open-output-file out-file - #:mode 'text - #:exists 'replace)]) - (html-header port) - (write-html port tagged-text1 "left") - (write-html port tagged-text2 "right") - (html-footer port) - (close-output-port port)))) - + (letv ([start (current-seconds)] ; start timer + [size1 (node-size node1)] + [size2 (node-size node2)]) + + (printf "[info] size of program 1: ~a~n" size1) + (printf "[info] size of program 2: ~a~n" size2) + + (set-node-context node1 'top) + (set-node-context node2 'top) + + (printf "[diffing]~n") + + (letv ([(changes cost) (diff-node node1 node2 #f)]) + (diff-progress 'reset) + (printf "~n[moving]~n") + (letv ([changes (find-moves changes)] + [end (current-seconds)]) + (printf "~n[finished] ~a seconds~n" (- end start)) + (cleanup) + changes))))) diff --git a/diff-cpp.rkt b/diff-cpp.rkt index 1ba0cd0..b606ed8 100644 --- a/diff-cpp.rkt +++ b/diff-cpp.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -18,10 +18,10 @@ #lang racket (require "structs.rkt") +(require "utils.rkt") (require "parse-cpp.rkt") (require "diff.rkt") -(require "utils.rkt") - +(require "htmlize.rkt") diff --git a/diff-js.rkt b/diff-js.rkt index 17a6853..ea574a7 100644 --- a/diff-js.rkt +++ b/diff-js.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -19,11 +19,10 @@ #lang racket (require "structs.rkt") +(require "utils.rkt") (require "parse-js.rkt") (require "diff.rkt") -(require "utils.rkt") - - +(require "htmlize.rkt") diff --git a/diff-lisp.rkt b/diff-lisp.rkt index be59fbe..ee767b4 100644 --- a/diff-lisp.rkt +++ b/diff-lisp.rkt @@ -1,9 +1,27 @@ +;; ydiff - a language-aware tool for comparing programs +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + + #lang racket (require "structs.rkt") +(require "utils.rkt") (require "parse-lisp.rkt") (require "diff.rkt") -(require "utils.rkt") +(require "htmlize.rkt") (define *keywords* diff --git a/diff.css b/diff.css index 874dece..f2aa7f2 100644 --- a/diff.css +++ b/diff.css @@ -1,5 +1,5 @@ /* ydiff - a language-aware tool for comparing programs */ -/* Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) */ +/* Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) */ /* This program is free software: you can redistribute it and/or modify */ diff --git a/diff.rkt b/diff.rkt index 4990892..eb23708 100644 --- a/diff.rkt +++ b/diff.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify @@ -17,8 +17,9 @@ #lang racket -(require "utils.rkt") (require "structs.rkt") +(require "utils.rkt") + (provide (all-defined-out)) @@ -55,78 +56,6 @@ - -;------------------------------------------------------------- -; data types -;------------------------------------------------------------- - -;; Change - a change in the data structure -;; - old : the old version, #f for insertions -;; - new : the new version, #f for deletions -;; - cost : the cost of change from old to new -;; - type : insertion, deletion, or modification? -(struct Change (old new cost type) #:transparent) - -;; HTML tag structure used HTML generation code -(struct Tag (tag idx start) #:transparent) - -(define ins? - (lambda (c) - (eq? 'ins (Change-type c)))) - -(define del? - (lambda (c) - (eq? 'del (Change-type c)))) - -(define mod? - (lambda (c) - (eq? 'mod (Change-type c)))) - - - -;----------------- utils for creating changes ---------------- -(define ins - (lambda (node) - (let ([size (node-size node)]) - (list (Change #f node size 'ins))))) - - -(define del - (lambda (node) - (let ([size (node-size node)]) - (list (Change node #f size 'del))))) - - -(define mod - (lambda (node1 node2 cost) - (list (Change node1 node2 cost 'mod)))) - - -(define mov - (lambda (node1 node2 cost) - (list (Change node1 node2 cost 'mov)))) - - -;; create a "total change" -;; (delete node1 completely and then insert node2) -(define total - (lambda (node1 node2) - (let ([size1 (node-size node1)] - [size2 (node-size node2)]) - (values (append (del node1) (ins node2)) - (+ size1 size2))))) - - -;; temporary workaround before the algorithm stablizes -(define mod->mov - (lambda (c) - (match c - [(Change node1 node2 cost 'mod) - (Change node1 node2 cost 'mov)] - [other other]))) - - - ;;------------------ frames utils -------------------- (define deframe (lambda (node) @@ -165,122 +94,6 @@ - - - -;------------------ operations on nodes --------------------- - -;; "virtual function" - get definition name -;; can be overridden by individual languages -(define get-name (lambda (node) #f)) - -(define set-get-name - (lambda (fun) - (set! get-name fun))) - - -;; "virtual function" - get node type -;; can be overridden by individual languages -(define get-type Node-type) - -(define set-get-type - (lambda (fun) - (set! get-type fun))) - - -;; same-def? only depend on get-name, so they need not be overridden -;; by individual languages. -(define same-def? - (lambda (e1 e2) - (cond - [(not (eq? (get-type e1) (get-type e2))) - #f] - [else - (let ([name1 (get-name e1)] - [name2 (get-name e2)]) - (and name1 name2 (equal? name1 name2)))]))) - - -(define set-same-def - (lambda (fun) - (set! same-def? fun))) - - - -;----------- node size function ------------ -(define node-size - (lambda (node) - (cond - [(and (Node? node) (Node-size node)) - (Node-size node)] - [(pair? node) - (apply + (map node-size node))] - [(or (token? node) (str? node) (character? node)) 1] - [(Node? node) - (let ([size (node-size (Node-elts node))]) - (set-Node-size! node size) - size)] - [else 0]))) - - -(define node-depth - (lambda (node) - (cond - [(null? node) 0] - [(pair? node) - (apply max (map node-depth node))] - [(Node? node) - (add1 (node-depth (Node-elts node)))] - [else 0]))) - - -; (node-depth (parse-scheme "(lambda (x (x (y)) (y)) x)")) - - -(define set-node-context - (lambda (node ctx) - (cond - [(pair? node) - (map (lambda (n) (set-node-context n ctx)) node)] - [(Node? node) - (let ([name (or (get-name node) ctx)]) - (set-Node-ctx! node name) - (set-node-context (Node-elts node) name))]))) - - - -(define uid - (let ([count 1] - [table (box '())]) - (lambda (node) - (let ([p (assq node (unbox table))]) - (cond - [(not p) - (let ([id count]) - (set! count (add1 count)) - (set-box! table (cons `(,node . ,id) (unbox table))) - id)] - [else - (cdr p)]))))) - - - -;; similarity string from a change -(define similarity - (lambda (change) - (let ([total (+ (node-size (Change-old change)) - (node-size (Change-new change)))]) - (cond - [(or (= 0 total) (= 0 (Change-cost change))) - "100%"] - [else - (string-append - (real->decimal-string - (* 100 (- 1.0 (/ (Change-cost change) total))) 1) - "%")])))) - - - ;------------------------------------------------------------- ; diff proper ;------------------------------------------------------------- @@ -588,7 +401,6 @@ (lambda (changes) (set! *diff-hash* (make-hasheq)) (let loop ([changes changes] [closed '()] [count 1]) - (printf "~n[move pass #~a] " count) (letv ([dels (filter (predand del? big-change?) changes)] [adds (filter (predand ins? big-change?) changes)] [rest (set- changes (append dels adds))] @@ -596,7 +408,6 @@ [ls2 (sort (map Change-new adds) node-sort-fn)] [(m c) (diff-list ls1 ls2 #t)] [new-moves (map mod->mov (filter mod? m))]) - (printf "~n~a new moves found" (length new-moves)) (cond [(null? new-moves) (let ([all-changes (append m rest closed)]) @@ -608,236 +419,37 @@ (add1 count)))]))))) - - - -;------------------------------------------------------------- -; HTML generation -;------------------------------------------------------------- - -;----------------- utils ---------------- -(define qs - (lambda (x) - (let ([x (cond - [(symbol? x) (symbol->string x)] - [(number? x) (number->string x)] - [(string? x) x])]) - (string-append "'" x "'")))) - - -(define line - (lambda (port . s) - (display (string-append (apply string-append s) "\n") port))) - - - -(define change-tags - (lambda (changes side) - (let loop ([cs changes] [tags '()]) - (cond - [(null? cs) tags] - [else - (let ([key (if (eq? side 'left) - (Change-old (car cs)) - (Change-new (car cs)))]) - (cond - [(or (not key) - (= (Node-start key) (Node-end key))) - (loop (cdr cs) tags)] - [(and (Change-old (car cs)) (Change-new (car cs))) - (let ([startTag (Tag (link-start (car cs) side) - (Node-start key) -1)] - [endTag (Tag "" (Node-end key) (Node-start key))]) - (loop (cdr cs) (cons endTag (cons startTag tags))))] - [else - (let ([startTag (Tag (span-start (car cs) side) - (Node-start key) -1)] - [endTag (Tag "" (Node-end key) (Node-start key))]) - (loop (cdr cs) (cons endTag (cons startTag tags))))]))])))) - - -(define apply-tags - (lambda (s tags) - (let ([tags (sort tags tag-sort-fn)]) - (let loop ([tags tags] [curr 0] [out '()]) - (cond - [(null? tags) - (cond - [(< curr (string-length s)) - (loop tags (add1 curr) (cons (escape (string-ref s curr)) out))] - [else - (apply string-append (reverse out))])] - [else - (cond - [(< curr (Tag-idx (car tags))) - (loop tags (add1 curr) (cons (escape (string-ref s curr)) out))] - [else - (loop (cdr tags) curr (cons (Tag-tag (car tags)) out))])]))))) - - - -;; get the CSS class for the change -(define change-class - (lambda (change) - (cond - [(and (eq? (Change-type change) 'mov) - (> (Change-cost change) 0)) - "mc"] ; move-change - [(eq? (Change-type change) 'mov) "m"] ; move - [(> (Change-cost change) 0) "c"] ; change - [else "u"]))) ; unchanged - - - -(define link-start - (lambda (change side) - (let ([cls (change-class change)] - [me (if (eq? side 'left) - (Change-old change) - (Change-new change))] - [other (if (eq? side 'left) - (Change-new change) - (Change-old change))]) - (string-append - "")))) - - - -(define span-start - (lambda (change side) - (let ([cls (if (eq? 'del (Change-type change)) "d" "i")]) ; del or ins - (string-append "")))) - - - -(define tag-sort-fn - (lambda (t1 t2) - (cond - [(= (Tag-idx t1) (Tag-idx t2)) - (> (Tag-start t1) (Tag-start t2))] - [else - (< (Tag-idx t1) (Tag-idx t2))]))) - - -(define *escape-table* - '((#\" . """) - (#\' . "'") - (#\< . "<") - (#\> . ">"))) - - -(define escape - (lambda (c) - (cond - [(assq c *escape-table*) => cdr] - [else (char->string c)]))) - - - -; getting the base name of a path/file name -; (base-name "mk/mk-c.scm") => mk-c -(define base-name - (lambda (fn) - (let loop ([i (- (string-length fn) 1)] - [start -1] - [end (- (string-length fn) 1)]) - (cond - [(= i 0) - (substring fn i end)] - [(eq? (string-ref fn i) #\.) - (loop (sub1 i) start i)] - [(eq? (string-ref fn i) #\/) - (substring fn (add1 i) end)] - [else - (loop (sub1 i) start end)])))) - - - -(define html-header - (lambda (port) - (line port "") - (line port "") - (line port "") - (line port "") - (line port "") - (line port "") - (line port ""))) - - - -(define html-footer - (lambda (port) - (line port "") - (line port ""))) - - - -(define write-html - (lambda (port text side) - (line port (string-append "
")) - (line port "
")
-    (if (string=? side "left")
-        (line port "")
-        (line port ""))
-    (line port text)
-    (line port "
") - (line port "
"))) - - - ;; poor man's progress bar (define diff-progress (new-progress 10000)) - (define cleanup (lambda () (set! *diff-hash* (make-hasheq)))) - ;; main diff function ;; returns all changes after diffing and moving (define diff (lambda (node1 node2) - (letv ([size1 (node-size node1)] + (letv ([start (current-seconds)] ; start timer + [size1 (node-size node1)] [size2 (node-size node2)]) - (printf "size of program 1: ~a~n" size1) - (printf "size of program 2: ~a~n" size2)) - (letv ([start (current-seconds)] - [_ (set-node-context node1 'top)] - [_ (set-node-context node2 'top)] - [(changes _) (diff-node node1 node2 #f)] - [_ (diff-progress "\nDone diffing")] - [changes (find-moves changes)] - [_ (diff-progress "\nDone moving")] - [end (current-seconds)]) - (printf "finished in ~a seconds~n" (- end start)) - (cleanup) - changes))) - - - -(define htmlize - (lambda (changes file1 file2 text1 text2) - (letv ([tags1 (change-tags changes 'left)] - [tags2 (change-tags changes 'right)] - [tagged-text1 (apply-tags text1 tags1)] - [tagged-text2 (apply-tags text2 tags2)] - [out-file (string-append (base-name file1) "-" - (base-name file2) ".html")] - [port (open-output-file out-file - #:mode 'text - #:exists 'replace)]) - (html-header port) - (write-html port tagged-text1 "left") - (write-html port tagged-text2 "right") - (html-footer port) - (close-output-port port)))) + + (printf "[info] size of program 1: ~a~n" size1) + (printf "[info] size of program 2: ~a~n" size2) + + (set-node-context node1 'top) + (set-node-context node2 'top) + + (printf "[diffing]~n") + + (letv ([(changes cost) (diff-node node1 node2 #f)]) + (diff-progress 'reset) + (printf "~n[moving]~n") + (letv ([changes (find-moves changes)] + [end (current-seconds)]) + (printf "~n[finished] ~a seconds~n" (- end start)) + (cleanup) + changes))))) diff --git a/nav.js b/nav.js index a052846..11ae2b7 100644 --- a/nav.js +++ b/nav.js @@ -1,5 +1,5 @@ // ydiff - a language-aware tool for comparing programs -// Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +// Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) // This program is free software: you can redistribute it and/or modify diff --git a/parse-cpp.rkt b/parse-cpp.rkt index abe4af9..cdd6803 100644 --- a/parse-cpp.rkt +++ b/parse-cpp.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ #lang racket (require "structs.rkt") -(require "parsec.rkt") (require "utils.rkt") +(require "parsec.rkt") (provide parse-cpp) diff --git a/parse-js.rkt b/parse-js.rkt index b287e21..f59c254 100644 --- a/parse-js.rkt +++ b/parse-js.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -19,8 +19,9 @@ #lang racket (require "structs.rkt") -(require "parsec.rkt") (require "utils.rkt") +(require "parsec.rkt") + (provide parse-js) diff --git a/parse-lisp.rkt b/parse-lisp.rkt index cac0725..47d2570 100644 --- a/parse-lisp.rkt +++ b/parse-lisp.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by @@ -17,8 +17,9 @@ #lang racket -(require "parsec.rkt") +(require "structs.rkt") (require "utils.rkt") +(require "parsec.rkt") (provide parse-lisp) diff --git a/parsec.rkt b/parsec.rkt index bdf019b..2240627 100644 --- a/parsec.rkt +++ b/parsec.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by diff --git a/structs.rkt b/structs.rkt index fcc573e..99c477c 100644 --- a/structs.rkt +++ b/structs.rkt @@ -1,3 +1,21 @@ +;; ydiff - a language-aware tool for comparing programs +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) + + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + + #lang racket (provide (all-defined-out)) @@ -6,19 +24,26 @@ ;------------------------------------------------------------- ; data types ;------------------------------------------------------------- -(struct Node (type start end elts [size #:mutable] [ctx #:mutable]) #:transparent) + + +;---------------------------- Node --------------------------- +(struct Node (type + start + end + elts + [size #:mutable] + [ctx #:mutable]) + #:transparent) (define comment? (lambda (n) (and (Node? n) (eq? 'comment (Node-type n))))) - (define phantom? (lambda (n) (and (Node? n) (eq? 'phantom (Node-type n))))) - (define token? (lambda (n) (and (Node? n) (eq? 'token (Node-type n))))) @@ -36,14 +61,151 @@ (and (Node? n) (eq? 'newline (Node-type n))))) -(define decode-ast - (lambda (exp) - (match exp - [`(Node ',type ,start ,end ,elts ,size ,ctx) - (Node start end type (decode-ast elts) size ctx)] - [`(list ,elts ...) - (map decode-ast elts)] - [''() '()]))) +;----------- node size function ------------ +(define node-size + (lambda (node) + (cond + [(and (Node? node) (Node-size node)) + (Node-size node)] + [(pair? node) + (apply + (map node-size node))] + [(or (token? node) (str? node) (character? node)) 1] + [(Node? node) + (let ([size (node-size (Node-elts node))]) + (set-Node-size! node size) + size)] + [else 0]))) + + +(define node-depth + (lambda (node) + (cond + [(null? node) 0] + [(pair? node) + (apply max (map node-depth node))] + [(Node? node) + (add1 (node-depth (Node-elts node)))] + [else 0]))) + + +; (node-depth (parse-scheme "(lambda (x (x (y)) (y)) x)")) + + +(define set-node-context + (lambda (node ctx) + (cond + [(pair? node) + (map (lambda (n) (set-node-context n ctx)) node)] + [(Node? node) + (let ([name (or (get-name node) ctx)]) + (set-Node-ctx! node name) + (set-node-context (Node-elts node) name))]))) + + +;------------------ operations on nodes --------------------- + +;; "virtual function" - get definition name +;; can be overridden by individual languages +(define get-name (lambda (node) #f)) + +(define set-get-name + (lambda (fun) + (set! get-name fun))) + + +;; "virtual function" - get node type +;; can be overridden by individual languages +(define get-type Node-type) + +(define set-get-type + (lambda (fun) + (set! get-type fun))) + + +;; same-def? only depend on get-name, so they need not be overridden +;; by individual languages. +(define same-def? + (lambda (e1 e2) + (cond + [(not (eq? (get-type e1) (get-type e2))) + #f] + [else + (let ([name1 (get-name e1)] + [name2 (get-name e2)]) + (and name1 name2 (equal? name1 name2)))]))) + + +(define set-same-def + (lambda (fun) + (set! same-def? fun))) + + + +;---------------------------- Change --------------------------- +;; Change - a change in the data structure +;; - old : the old version, #f for insertions +;; - new : the new version, #f for deletions +;; - cost : the cost of change from old to new +;; - type : insertion, deletion, or modification? +(struct Change (old new cost type) #:transparent) + +(define ins? + (lambda (c) + (eq? 'ins (Change-type c)))) + +(define del? + (lambda (c) + (eq? 'del (Change-type c)))) + +(define mod? + (lambda (c) + (eq? 'mod (Change-type c)))) + + + +;----------------- utils for creating changes ---------------- +(define ins + (lambda (node) + (let ([size (node-size node)]) + (list (Change #f node size 'ins))))) + +(define del + (lambda (node) + (let ([size (node-size node)]) + (list (Change node #f size 'del))))) + +(define mod + (lambda (node1 node2 cost) + (list (Change node1 node2 cost 'mod)))) + +(define mov + (lambda (node1 node2 cost) + (list (Change node1 node2 cost 'mov)))) + + +;; create a "total change" +;; (delete node1 completely and then insert node2) +(define total + (lambda (node1 node2) + (let ([size1 (node-size node1)] + [size2 (node-size node2)]) + (values (append (del node1) (ins node2)) + (+ size1 size2))))) + + +;; temporary workaround before the algorithm stablizes +(define mod->mov + (lambda (c) + (match c + [(Change node1 node2 cost 'mod) + (Change node1 node2 cost 'mov)] + [other other]))) + + + +;---------------------------- Tag --------------------------- +;; HTML tag structure used HTML generation code +(struct Tag (tag idx start) #:transparent) (define get-symbol @@ -89,3 +251,33 @@ ;; (match-tags (car (parse1 $statement "function f(x) {}")) ;; '(function name)) + +(define uid + (let ([count 1] + [table (box '())]) + (lambda (node) + (let ([p (assq node (unbox table))]) + (cond + [(not p) + (let ([id count]) + (set! count (add1 count)) + (set-box! table (cons `(,node . ,id) (unbox table))) + id)] + [else + (cdr p)]))))) + + + +;; similarity string from a change +(define similarity + (lambda (change) + (let ([total (+ (node-size (Change-old change)) + (node-size (Change-new change)))]) + (cond + [(or (= 0 total) (= 0 (Change-cost change))) + "100%"] + [else + (string-append + (real->decimal-string + (* 100 (- 1.0 (/ (Change-cost change) total))) 1) + "%")])))) diff --git a/utils.rkt b/utils.rkt index edf6aff..bb1742e 100644 --- a/utils.rkt +++ b/utils.rkt @@ -1,5 +1,5 @@ ;; ydiff - a language-aware tool for comparing programs -;; Copyright (C) 2011 Yin Wang (yinwang0@gmail.com) +;; Copyright (C) 2011-2013 Yin Wang (yinwang0@gmail.com) ;; This program is free software: you can redistribute it and/or modify @@ -113,21 +113,26 @@ (define new-progress (lambda (size) - (let ([counter 0]) + (let ([counter 0] + [dots 0]) (lambda (x) (cond - [(string? x) - (display x) - (display "\n") - (flush-output)] + [(eq? x 'reset) + (set! counter 0) + (set! dots 0)] [(= 0 (remainder counter size)) (set! counter (+ x counter)) (display ".") + (set! dots (+ dots 1)) + (if (= 0 (modulo dots 60)) + (display "\n") + (void)) (flush-output)] [else (set! counter (+ x counter))]))))) + ;;----------------- multi dimensional eq hash -------------------- (define hash-put!