From 8a31f49a30645aa4f90b1a08f3226ae91af4d3e0 Mon Sep 17 00:00:00 2001 From: ShikiSuen Date: Mon, 24 Oct 2022 23:43:27 +0800 Subject: [PATCH] Add extra security procedures to insertKey(). (#90) - This also turns Span into struct. --- Sources/Megrez/1_Compositor.swift | 19 +++++++++++++++---- Sources/Megrez/4_Span.swift | 8 ++++---- Tests/MegrezTests/MegrezTests.swift | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Sources/Megrez/1_Compositor.swift b/Sources/Megrez/1_Compositor.swift index 6e1175c..e0168cc 100644 --- a/Sources/Megrez/1_Compositor.swift +++ b/Sources/Megrez/1_Compositor.swift @@ -73,8 +73,14 @@ extension Megrez { @discardableResult public mutating func insertKey(_ key: String) -> Bool { guard !key.isEmpty, key != separator, langModel.hasUnigramsFor(key: key) else { return false } keys.insert(key, at: cursor) + let gridBackup = spans resizeGrid(at: cursor, do: .expand) - update() + let nodesInserted = update() + // 用來在 langModel.hasUnigramsFor() 結果不準確的時候防呆、恢復被搞壞的 spans。 + if nodesInserted == 0 { + spans = gridBackup + return false + } cursor += 1 // 游標必須得在執行 update() 之後才可以變動。 return true } @@ -219,7 +225,7 @@ extension Megrez.Compositor { /// (XXXXXXX? <-被砍爛的節點 /// ``` /// - Parameter location: 給定的幅位座標。 - func dropWreckedNodes(at location: Int) { + mutating func dropWreckedNodes(at location: Int) { let location = max(min(location, spans.count), 0) // 防呆 guard !spans.isEmpty else { return } let affectedLength = Megrez.Compositor.maxSpanLength - 1 @@ -230,7 +236,7 @@ extension Megrez.Compositor { } } - @discardableResult func insertNode(_ node: Node, at location: Int) -> Bool { + @discardableResult mutating func insertNode(_ node: Node, at location: Int) -> Bool { let location = max(min(location, spans.count - 1), 0) // 防呆 spans[location].append(node: node) return true @@ -254,9 +260,12 @@ extension Megrez.Compositor { return key == node.key } - func update() { + /// 根據當前狀況更新整個組字器的節點文脈。 + /// - Returns: 新增了多少節點。 + @discardableResult mutating func update() -> Int { let maxSpanLength = Megrez.Compositor.maxSpanLength let range = max(0, cursor - maxSpanLength).. Bool { + @discardableResult public mutating func append(node: Node) -> Bool { guard (1...maxSpanLength).contains(node.spanLength) else { return false } @@ -36,7 +36,7 @@ extension Megrez.Compositor { /// 丟掉任何不小於給定幅位長度的節點。 /// - Parameter length: 給定的幅位長度。 /// - Returns: 該操作是否成功執行。 - @discardableResult public func dropNodesOfOrBeyond(length: Int) -> Bool { + @discardableResult public mutating func dropNodesOfOrBeyond(length: Int) -> Bool { guard (1...maxSpanLength).contains(length) else { return false } diff --git a/Tests/MegrezTests/MegrezTests.swift b/Tests/MegrezTests/MegrezTests.swift index 4e0a225..484a62c 100644 --- a/Tests/MegrezTests/MegrezTests.swift +++ b/Tests/MegrezTests/MegrezTests.swift @@ -11,7 +11,7 @@ import XCTest final class MegrezTests: XCTestCase { func testSpan() throws { let langModel = SimpleLM(input: strSampleData) - let span = Megrez.Compositor.Span() + var span = Megrez.Compositor.Span() let n1 = Megrez.Compositor.Node(keyArray: ["gao1"], spanLength: 1, unigrams: langModel.unigramsFor(key: "gao1")) let n3 = Megrez.Compositor.Node( keyArray: ["gao1ke1ji4"], spanLength: 3, unigrams: langModel.unigramsFor(key: "gao1ke1ji4")