Skip to content

Commit aceb282

Browse files
authored
Merge pull request #4 from trill-lang/fast-levenstein
Speed up edit distance algorithm
2 parents 0830734 + 40da474 commit aceb282

File tree

2 files changed

+45
-44
lines changed

2 files changed

+45
-44
lines changed

Sources/FileCheck/CheckString.swift

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -374,19 +374,21 @@ private func diagnoseFailedCheck(
374374
var BestLine : Int? = nil
375375
var BestQuality = 0.0
376376

377-
for i in 0..<min(buffer.count, 4096) {
378-
let exampleString : String
379-
if pattern.fixedString.isEmpty {
380-
exampleString = pattern.regExPattern
381-
} else {
382-
exampleString = pattern.fixedString
383-
}
377+
let exampleString : Substring
378+
if pattern.fixedString.isEmpty {
379+
exampleString = Substring(pattern.regExPattern)
380+
} else {
381+
exampleString = Substring(pattern.fixedString)
382+
}
384383

385-
if exampleString.isEmpty {
386-
break
387-
}
384+
// Bail with an empty check string.
385+
guard !exampleString.isEmpty else {
386+
return
387+
}
388388

389-
let char = buffer[buffer.index(buffer.startIndex, offsetBy: i)]
389+
for i in 0..<min(buffer.count, 4096) {
390+
let strIndex = buffer.index(buffer.startIndex, offsetBy: i)
391+
let char = buffer[strIndex]
390392
if char == "\n" {
391393
NumLinesForward += 1
392394
}
@@ -397,10 +399,18 @@ private func diagnoseFailedCheck(
397399
continue;
398400
}
399401

402+
let subEndIdx: String.Index
403+
if buffer.count < exampleString.count + i {
404+
subEndIdx = buffer.endIndex
405+
} else {
406+
subEndIdx = buffer.index(buffer.startIndex, offsetBy: exampleString.count + i)
407+
}
408+
let subBuffer = buffer[strIndex..<subEndIdx]
409+
400410
// Compute the "quality" of this match as an arbitrary combination of
401411
// the match distance and the number of lines skipped to get to this
402412
// match.
403-
let distance = editDistance(from: buffer.map{$0}, to: exampleString.map{$0})
413+
let distance = editDistance(from: subBuffer, to: exampleString)
404414
let quality = Double(distance) + (Double(NumLinesForward) / 100.0)
405415
if quality < BestQuality || BestLine == nil {
406416
BestLine = i

Sources/FileCheck/EditDistance.swift

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,15 @@
1212
/// - returns: the minimum number of element insertions, removals, or (if
1313
/// `allowReplacements` is `true`) replacements needed to transform one of
1414
/// the given sequences into the other. If zero, the sequences are identical.
15-
func editDistance<T: Equatable>(from fa : [T], to ta : [T], allowReplacements : Bool = true, maxEditDistance : Int = 0) -> Int {
15+
func editDistance(from fa : Substring, to ta : Substring, allowReplacements : Bool = true, maxEditDistance : Int = 0) -> Int {
16+
guard !fa.isEmpty else {
17+
return ta.count
18+
}
19+
20+
guard !ta.isEmpty else {
21+
return fa.count
22+
}
23+
1624
// The algorithm implemented below is the "classic"
1725
// dynamic-programming algorithm for computing the Levenshtein
1826
// distance, which is described here:
@@ -25,38 +33,21 @@ func editDistance<T: Equatable>(from fa : [T], to ta : [T], allowReplacements :
2533
// only the entries to the left, top, and top-left are needed. The left
2634
// entry is in `row[x-1]`, the top entry is what's in `row[x]` from the last
2735
// iteration, and the top-left entry is stored in Previous.
28-
let m = fa.count
29-
let n = ta.count
30-
31-
var row = [Int](1...(n+1))
36+
var pre = [Int](0..<(ta.count + 1))
37+
var cur = [Int](repeating: 0, count: ta.count + 1)
3238

33-
for y in 1...m {
34-
row[0] = y
35-
var bestThisRow = row[0]
36-
37-
var previous = y - 1
38-
for x in 1...n {
39-
let oldRow = row[x]
40-
if allowReplacements {
41-
row[x] = min(
42-
previous + (fa[y - 1] == ta[x - 1] ? 0 : 1),
43-
min(row[x - 1], row[x]) + 1
44-
)
45-
} else {
46-
if fa[y-1] == ta[x-1] {
47-
row[x] = previous
48-
} else {
49-
row[x] = min(row[x-1], row[x]) + 1
50-
}
51-
}
52-
previous = oldRow
53-
bestThisRow = min(bestThisRow, row[x])
54-
}
55-
56-
if maxEditDistance != 0 && bestThisRow > maxEditDistance {
57-
return maxEditDistance + 1
39+
for (i, ca) in fa.enumerated() {
40+
cur[0] = i + 1;
41+
for (j, cb) in ta.enumerated() {
42+
cur[j + 1] = min(
43+
// deletion
44+
pre[j + 1] + 1, min(
45+
// insertion
46+
cur[j] + 1,
47+
// match or substitution
48+
pre[j] + (ca == cb ? 0 : 1)))
5849
}
50+
swap(&cur, &pre)
5951
}
60-
61-
return row[n]
52+
return pre[ta.count]
6253
}

0 commit comments

Comments
 (0)