From a4587c08547a3050ab54fe9e3cbadbbd454eeb94 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Tue, 7 Mar 2023 10:15:05 +0800 Subject: [PATCH] Fix an issue that update() can ruin a span unit object. (#97) --- Sources/Megrez/1_Compositor.swift | 2 +- Sources/Megrez/4_SpanUnit.swift | 19 +++++++++++++++---- Tests/MegrezTests/LMDataForTests.swift | 5 ++++- Tests/MegrezTests/MegrezTests.swift | 8 ++++++++ 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/Sources/Megrez/1_Compositor.swift b/Sources/Megrez/1_Compositor.swift index 863c2c6..7c3c7a2 100644 --- a/Sources/Megrez/1_Compositor.swift +++ b/Sources/Megrez/1_Compositor.swift @@ -291,7 +291,7 @@ extension Megrez.Compositor { // 自動銷毀無效的節點。 if unigrams.isEmpty { if theNode.keyArray.count == 1 { continue } - spans[position].nodes.removeAll { $0 == theNode } + spans[position].nullify(node: theNode) } else { theNode.syncingUnigrams(from: unigrams) } diff --git a/Sources/Megrez/4_SpanUnit.swift b/Sources/Megrez/4_SpanUnit.swift index c74d271..27c3c81 100644 --- a/Sources/Megrez/4_SpanUnit.swift +++ b/Sources/Megrez/4_SpanUnit.swift @@ -24,10 +24,7 @@ extension Megrez.Compositor { /// 清除該幅位單元的全部的節點,且重設最長節點長度為 0,然後再在節點陣列內預留空位。 public func clear() { - nodes.removeAll() - for _ in 0 ..< maxSpanLength { - nodes.append(nil) - } + nodes = .init(repeating: nil, count: maxSpanLength) maxLength = 0 } @@ -43,6 +40,18 @@ extension Megrez.Compositor { return true } + /// 丟掉任何與給定節點完全雷同的節點。 + /// - Remark: Swift 不像 C# 那樣有容量鎖定型陣列, + /// 對某個位置的內容的刪除行為都可能會導致其它內容錯位、繼發其它不可知故障。 + /// 於是就提供了這個專門的工具函式。 + /// - Parameter node: 要參照的節點。 + public func nullify(node givenNode: Node) { + nodes.enumerated().forEach { index, theNode in + guard theNode == givenNode else { return } + nodes[index] = nil + } + } + /// 丟掉任何不小於給定幅位長度的節點。 /// - Parameter length: 給定的幅位長度。 /// - Returns: 該操作是否成功執行。 @@ -51,12 +60,14 @@ extension Megrez.Compositor { return false } for i in length ... maxSpanLength { + guard (0 ..< nodes.count).contains(i - 1) else { continue } // 防呆 nodes[i - 1] = nil } maxLength = 0 guard length > 1 else { return false } let maxR = length - 2 for i in 0 ... maxR { + guard (0 ..< nodes.count).contains(maxR - i) else { continue } // 防呆 if nodes[maxR - i] == nil { continue } maxLength = maxR - i + 1 break diff --git a/Tests/MegrezTests/LMDataForTests.swift b/Tests/MegrezTests/LMDataForTests.swift index 6f9445a..61a1386 100644 --- a/Tests/MegrezTests/LMDataForTests.swift +++ b/Tests/MegrezTests/LMDataForTests.swift @@ -40,7 +40,10 @@ class SimpleLM: LangModelProtocol { func trim(key: String, value: String) { guard var arr = mutDatabase[key] else { return } arr = arr.compactMap { $0.value == value ? nil : $0 } - guard !arr.isEmpty else { return } + guard !arr.isEmpty else { + mutDatabase[key] = nil + return + } mutDatabase[key] = arr } } diff --git a/Tests/MegrezTests/MegrezTests.swift b/Tests/MegrezTests/MegrezTests.swift index 27daf0c..44e4c6b 100644 --- a/Tests/MegrezTests/MegrezTests.swift +++ b/Tests/MegrezTests/MegrezTests.swift @@ -537,5 +537,13 @@ final class MegrezTests: XCTestCase { let newResult = compositor.walk().0.values.joined() print(newResult) XCTAssertEqual([oldResult, newResult], ["年中獎金", "年終獎金"]) + compositor.cursor = 4 + compositor.dropKey(direction: .rear) + compositor.dropKey(direction: .rear) + theLM.trim(key: "nian2zhong1", value: "年終") + compositor.update(updateExisting: true) + let newResult2 = compositor.walk().0.values + print(newResult2) + XCTAssertEqual(newResult2, ["年", "中"]) } }