From f9135f9f798af5ea9c2ab571ceaa128a03f69ea6 Mon Sep 17 00:00:00 2001 From: Karl Wagner <5254025+karwa@users.noreply.github.com> Date: Wed, 3 Jan 2024 00:14:59 +0100 Subject: [PATCH] SegmentedLine improvements --- Package.swift | 2 +- Package@swift-5.5.swift | 2 +- .../Shared/SegmentedLine.swift | 754 ++++++++++++------ .../SegmentedLineTests.swift | 278 ++++++- 4 files changed, 780 insertions(+), 256 deletions(-) diff --git a/Package.swift b/Package.swift index fe967fa18..47fa09958 100644 --- a/Package.swift +++ b/Package.swift @@ -64,7 +64,7 @@ let package = Package( ), .testTarget( name: "UnicodeDataStructuresTests", - dependencies: ["UnicodeDataStructures"], + dependencies: ["UnicodeDataStructures", "Checkit"], resources: [.copy("GenerateData/TableDefinitions")] ), diff --git a/Package@swift-5.5.swift b/Package@swift-5.5.swift index 215916357..1b454b276 100644 --- a/Package@swift-5.5.swift +++ b/Package@swift-5.5.swift @@ -64,7 +64,7 @@ let package = Package( ), .testTarget( name: "UnicodeDataStructuresTests", - dependencies: ["UnicodeDataStructures"], + dependencies: ["UnicodeDataStructures", "Checkit"], resources: [.copy("GenerateData/TableDefinitions")] ), diff --git a/Sources/UnicodeDataStructures/Shared/SegmentedLine.swift b/Sources/UnicodeDataStructures/Shared/SegmentedLine.swift index 09f5416d8..f6c140629 100644 --- a/Sources/UnicodeDataStructures/Shared/SegmentedLine.swift +++ b/Sources/UnicodeDataStructures/Shared/SegmentedLine.swift @@ -21,21 +21,23 @@ /// ```swift /// var line = SegmentedLine(bounds: 0..<100, value: nil) /// -/// // After setting values <5 to "small" and values >10 to "large", -/// // the gap is left with its previous value, "medium". -/// -/// line.set(0..<20, to: "medium") -/// line.set(0..<5, to: "small") +/// line.set(0..<10, to: "small") +/// line.set(5..<20, to: "medium") /// line.set(10..<60, to: "large") /// print(line) -/// // | [0..<5]: "small" | [5..<10]: "medium" | [10..<60]: "large" | [60..<100]: nil | +/// // ┬ +/// // ├ [0..<5]: "small" +/// // ├ [5..<10]: "medium" +/// // ├ [10..<60]: "large" +/// // ├ [60..<100]: nil +/// // ┴ /// ``` /// -/// The locations on a `SegmentedLine` do not have to be integers - they can be any `Comparable` type, -/// including dates, strings, Unicode scalars (for building character sets), or `Collection` indexes. +/// The locations on a `SegmentedLine` do not have to be integers - they can be any `Comparable` values, +/// including dates, strings, and `Collection` indexes. /// -/// In the latter case, we can model a Collection's elements as a line from its `startIndex` to its `endIndex`, -/// allowing us to annotate regions of any Collection. In a way, it can be used as a generalized `AttributedString`. +/// For example, a line from a collection's `startIndex` to its `endIndex` can be used to annotate +/// regions of its elements, and can be used in a similar way to `AttributedString`. /// /// ```swift /// let string = "Bob is feeling great" @@ -48,11 +50,10 @@ /// value: [Font.custom("Comic Sans")] as [Any] /// ) /// -/// // Set each word to a different color. -/// // Use 'modify' to append the attribute, but only for the region -/// // we're modifying. +/// // Use the '.modify' function to append a color attribute +/// // to each word. /// -/// for word: Substring in string.split(separator: " ") { +/// for word in string.split(separator: " ") { /// tags.modify(word.startIndex.. where Bound: Comparable { - @usableFromInline internal typealias BreakPoint = (location: Bound, value: Value) + /// The segment information for this line. + /// + /// Each entry contains a location, and a value which applies from that location + /// until the next entry's location. + /// + /// The list is sorted by location and is never empty. + /// The first entry defines the line's lowerBound. + /// + @usableFromInline + internal var _breakpoints: BreakpointStorage - // This array must never be empty. - // There must always be an initial breakPoint which defines our lowerBound and starting value. - @usableFromInline internal var _data: [BreakPoint] + /// The line's overall upperBound. + /// + /// The final entry in `_breakpoints` ends at this location. + /// + @usableFromInline + internal var _upperBound: Bound - // The value of `Bound` at which the final breakPoint ends. - // This is necessary to ensure that all regions of the table cover an expressible range of positions, - // without requiring a concept like Swift's `PartialRangeFrom` for the final region. - @usableFromInline internal var _upperBound: Bound + /// Memberwise initializer. + /// + /// Would be **fileprivate**, but must be internal so it can be @inlinable. + /// + @inlinable + internal init(_breakpoints: BreakpointStorage, _upperBound: Bound) { + precondition(_upperBound > _breakpoints.locations[0], "Attempt to create a SegmentedLine with invalid bounds") + self._breakpoints = _breakpoints + self._upperBound = _upperBound + } /// Creates a new space with the given bounds and value. /// /// All locations within the bounds will be assigned the initial value. /// /// ```swift - /// let line = SegmentedLine(bounds: 0..<100, value: "default") - /// print(line) // | [0..<100]: default | + /// let line = SegmentedLine(bounds: 0..<100, value: "default") + /// print(line) // [0..<100]: "default" /// ``` /// /// `bounds` must not be empty. /// @inlinable public init(bounds: Range, value: Value) { - precondition(bounds.lowerBound < bounds.upperBound, "Invalid range for SegmentedLine bounds") - self._data = [(location: bounds.lowerBound, value: value)] - self._upperBound = bounds.upperBound + self.init( + _breakpoints: BreakpointStorage(locations: [bounds.lowerBound], values: [value]), + _upperBound: bounds.upperBound + ) } +} - /// Memberwise initializer. Would be fileprivate, but must be internal so it can also be @inlinable. + +// -------------------------------------------- +// MARK: - Breakpoint Storage +// -------------------------------------------- + + +extension SegmentedLine { + + /// The storage for a SegmentedLine. + /// + /// This type manages two separate physical allocations (`locations` and `values`) + /// as a single logical list of `(Bound, Value)` pairs. It maintains the following invariants: + /// + /// - `locations` and `values` always have the same length. + /// - `locations` and `values` are never empty. /// + @usableFromInline + internal struct BreakpointStorage { + + @usableFromInline + internal private(set) var locations: [Bound] + + @usableFromInline + internal private(set) var values: [Value] + + @inlinable + internal init(locations: [Bound], values: [Value]) { + precondition(!locations.isEmpty && locations.count == values.count) + self.locations = locations + self.values = values + } + } +} + +extension SegmentedLine.BreakpointStorage: RandomAccessCollection { + + @usableFromInline + internal typealias Index = Int + + @usableFromInline + internal typealias Element = (location: Bound, value: Value) + @inlinable - internal init(_upperBound: Bound, _data: [BreakPoint]) { - precondition(!_data.isEmpty && _upperBound > _data[0].0, "SegmentedLine is invalid") - self._upperBound = _upperBound - self._data = _data + internal var startIndex: Index { locations.startIndex } + + @inlinable + internal var endIndex: Index { locations.endIndex } + + @inlinable + internal var count: Int { locations.count } + + @inlinable + internal var isEmpty: Bool { false } + + @inlinable + internal func index(after i: Index) -> Index { + let (result, overflow) = i.addingReportingOverflow(1) + assert(!overflow, "Invalid index - operation overflowed") + return result + } + + @inlinable + internal func index(before i: Index) -> Index { + let (result, overflow) = i.subtractingReportingOverflow(1) + assert(!overflow, "Invalid index - operation overflowed") + return result } @inlinable - internal var _lowerBound: Bound { - _data[0].location + internal func index(_ i: Index, offsetBy distance: Int) -> Index { + let (result, overflow) = i.addingReportingOverflow(distance) + assert(!overflow, "Invalid index - operation overflowed") + return result + } + + @inlinable + internal func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? { + let (l, overflow) = limit.subtractingReportingOverflow(i) + assert(!overflow, "Invalid index - operation overflowed") + if distance > 0 ? l >= 0 && l < distance : l <= 0 && distance < l { + return nil + } + return index(i, offsetBy: distance) + } + + @inlinable + internal func formIndex(after i: inout Index) { + let overflow: Bool + (i, overflow) = i.addingReportingOverflow(1) + assert(!overflow, "Invalid index - operation overflowed") + } + + @inlinable + internal func formIndex(before i: inout Index) { + let overflow: Bool + (i, overflow) = i.subtractingReportingOverflow(1) + assert(!overflow, "Invalid index - operation overflowed") + } + + @inlinable + internal func formIndex(_ i: inout Index, offsetBy distance: Int) { + let overflow: Bool + (i, overflow) = i.addingReportingOverflow(distance) + assert(!overflow, "Invalid index - operation overflowed") + } + + @inlinable + internal func distance(from i: Index, to j: Index) -> Int { + let (result, overflow) = j.subtractingReportingOverflow(i) + assert(!overflow, "Invalid index - operation overflowed") + return result + } + + @inlinable + internal subscript(i: Index) -> Element { + precondition(i >= startIndex && i < endIndex, "Index out of bounds") + return locations.withUnsafeBufferPointer { locationsPtr in + values.withUnsafeBufferPointer { valuesPtr in + (locationsPtr[i], valuesPtr[i]) + } + } } } -extension SegmentedLine { +extension SegmentedLine.BreakpointStorage { - /// The bounds of this space. - /// - /// All locations within these bounds have an assigned value. - /// @inlinable - public var bounds: Range { - Range(uncheckedBounds: (_lowerBound, _upperBound)) + internal subscript(valueAt i: Index) -> Value { + get { + values[i] + } + _modify { + yield &values[i] + } + set { + values[i] = newValue + } + } + + @inlinable + internal mutating func insert(_ newElement: Element, at index: Index) { + locations.insert(newElement.location, at: index) + values.insert(newElement.value, at: index) + } + + @inlinable + internal mutating func append(_ newElement: Element) { + locations.append(newElement.location) + values.append(newElement.value) + } + + @inlinable + internal mutating func removeSubrange(_ bounds: Range) { + locations.removeSubrange(bounds) + values.removeSubrange(bounds) + precondition(!locations.isEmpty, "Removed all breakpoints") } } // -------------------------------------------- -// MARK: - Segments +// MARK: - Segments View // -------------------------------------------- extension SegmentedLine { - /// The assigned regions of the space. + /// The segments of this line. /// - /// A `SegmentedLine` divides its bounds in to segments. Values are assigned to entire segments, - /// and apply to all locations within the segment. + /// A `SegmentedLine` divides its bounds in to segments. Each segment starts where its predecessor ends, + /// with no gaps, so that every value within the bounds belongs to a segment. + /// Each segment has an associated value. /// /// ```swift /// var line = SegmentedLine(bounds: 0..<100, value: nil) /// - /// line.set(0..<20, to: "medium") - /// line.set(0..<5, to: "small") + /// for (range, value) in line.segments { + /// print(range, value) + /// // Prints: + /// // 0..<100 nil + /// } + /// + /// line.set(0..<10, to: "small") + /// line.set(5..<20, to: "medium") /// line.set(10..<60, to: "large") /// /// for (range, value) in line.segments { /// print(range, value) /// // Prints: - /// // 0..<5 small - /// // 5..<10 medium - /// // 10..<60 large + /// // 0..<5 "small" + /// // 5..<10 "medium" + /// // 10..<60 "large" /// // 60..<100 nil /// } /// ``` /// - /// There are no gaps between segments - each segment starts where its predecessor ends. - /// Every `SegmentedLine` begins with at least one segment, assigning a value to its entire ``bounds``. - /// /// Segments are created as needed when values are assigned or modified. Consecutive segments with the same value - /// are _not_ automatically merged (there is not even any requirement that values are `Equatable`), - /// but they can be merged explicitly using the ``combineSegments(while:)`` function. + /// are _not_ merged automatically, but can be merged manually using the ``combineSegments(while:)`` function. + /// + /// The segment containing a location can be found by using the ``Segments-swift.struct/index(of:)`` function. /// @inlinable public var segments: Segments { - Segments(_line: self) + Segments(self) } public struct Segments { @@ -181,7 +339,7 @@ extension SegmentedLine { internal var _line: SegmentedLine @inlinable - internal init(_line: SegmentedLine) { + internal init(_ _line: SegmentedLine) { self._line = _line } } @@ -191,257 +349,347 @@ extension SegmentedLine.Segments: RandomAccessCollection { public struct Index: Comparable { - /// An index in to the line's `_data` Array. @usableFromInline - internal var _breakPointIndex: Int + internal var _breakpointIndex: SegmentedLine.BreakpointStorage.Index @inlinable - internal init(_breakPointIndex: Int) { - self._breakPointIndex = _breakPointIndex + internal init(_ _breakpointsIndex: SegmentedLine.BreakpointStorage.Index) { + self._breakpointIndex = _breakpointsIndex } @inlinable public static func < (lhs: Self, rhs: Self) -> Bool { - lhs._breakPointIndex < rhs._breakPointIndex + lhs._breakpointIndex < rhs._breakpointIndex } @inlinable public static func == (lhs: Self, rhs: Self) -> Bool { - lhs._breakPointIndex == rhs._breakPointIndex + lhs._breakpointIndex == rhs._breakpointIndex } } @inlinable public var startIndex: Index { - Index(_breakPointIndex: _line._data.startIndex) + Index(_line._breakpoints.startIndex) } @inlinable public var endIndex: Index { - Index(_breakPointIndex: _line._data.endIndex) + Index(_line._breakpoints.endIndex) } @inlinable public var count: Int { - _line._data.count + _line._breakpoints.count } @inlinable public var isEmpty: Bool { - false + _line._breakpoints.isEmpty } @inlinable public func index(after i: Index) -> Index { - let (result, overflow) = i._breakPointIndex.addingReportingOverflow(1) - assert(!overflow, "Invalid index - encountered overflow in indexing operation") - return Index(_breakPointIndex: result) + Index(_line._breakpoints.index(after: i._breakpointIndex)) } @inlinable public func index(before i: Index) -> Index { - let (result, overflow) = i._breakPointIndex.subtractingReportingOverflow(1) - assert(!overflow, "Invalid index - encountered overflow in indexing operation") - return Index(_breakPointIndex: result) + Index(_line._breakpoints.index(before: i._breakpointIndex)) } @inlinable public func index(_ i: Index, offsetBy distance: Int) -> Index { - let (result, overflow) = i._breakPointIndex.addingReportingOverflow(distance) - assert(!overflow, "Invalid index - encountered overflow in indexing operation") - return Index(_breakPointIndex: result) + Index(_line._breakpoints.index(i._breakpointIndex, offsetBy: distance)) + } + + @inlinable + public func index(_ i: Index, offsetBy distance: Int, limitedBy limit: Index) -> Index? { + _line._breakpoints.index(i._breakpointIndex, offsetBy: distance, limitedBy: limit._breakpointIndex) + .map { Index($0) } } @inlinable public func formIndex(after i: inout Index) { - let overflow: Bool - (i._breakPointIndex, overflow) = i._breakPointIndex.addingReportingOverflow(1) - assert(!overflow, "Invalid index - encountered overflow in indexing operation") + _line._breakpoints.formIndex(after: &i._breakpointIndex) } @inlinable public func formIndex(before i: inout Index) { - let overflow: Bool - (i._breakPointIndex, overflow) = i._breakPointIndex.subtractingReportingOverflow(1) - assert(!overflow, "Invalid index - encountered overflow in indexing operation") + _line._breakpoints.formIndex(before: &i._breakpointIndex) } @inlinable public func formIndex(_ i: inout Index, offsetBy distance: Int) { - let overflow: Bool - (i._breakPointIndex, overflow) = i._breakPointIndex.subtractingReportingOverflow(distance) - assert(!overflow, "Invalid index - encountered overflow in indexing operation") + _line._breakpoints.formIndex(&i._breakpointIndex, offsetBy: distance) } @inlinable public func distance(from i: Index, to j: Index) -> Int { - let (result, overflow) = j._breakPointIndex.subtractingReportingOverflow(i._breakPointIndex) - assert(!overflow, "Invalid index - encountered overflow in indexing operation") - return result + _line._breakpoints.distance(from: i._breakpointIndex, to: j._breakpointIndex) } @inlinable public subscript(i: Index) -> (range: Range, value: Value) { - let (start, value) = _line._data[i._breakPointIndex] - let valueEndIndex = index(after: i)._breakPointIndex - let end = (valueEndIndex < _line._data.endIndex) ? _line._data[valueEndIndex].location : _line._upperBound + let (start, value) = _line._breakpoints[i._breakpointIndex] + let nextBreakIndex = index(after: i)._breakpointIndex + let end = + (nextBreakIndex < _line._breakpoints.endIndex) + ? _line._breakpoints.locations[nextBreakIndex] + : _line._upperBound assert(start < end, "We should never have empty segments") return (range: Range(uncheckedBounds: (start, end)), value: value) } } +extension SegmentedLine.Segments { + + /// The index of the segment containing the given location. + /// + /// The location must be within the line's bounds. + /// + /// ```swift + /// var line = SegmentedLine(bounds: 0..<50, value: 42) + /// line.set(10..<20, to: 99) + /// line.set(30..<50, to: 1024) + /// print(line) + /// // ┬ + /// // ├ [0..<10]: 42 + /// // ├ [10..<20]: 99 + /// // ├ [20..<30]: 42 + /// // ├ [30..<50]: 1024 + /// // ┴ + /// + /// let i = line.segments.index(of: 35) + /// print(line.segments[i]) // (range: 30..<50, value: 1024) + /// ``` + /// + /// - complexity: O(log *n*) + /// + @inlinable + public func index(of location: Bound) -> Index { + _line.boundsCheck(location) + var idx = _line._breakpoints.locations._codepointdatabase_partitionedIndex { $0 < location } + if idx == _line._breakpoints.endIndex || _line._breakpoints.locations[idx] != location { + _line._breakpoints.formIndex(before: &idx) + } + return Index(idx) + } +} + // -------------------------------------------- // MARK: - Standard Protocols // -------------------------------------------- -extension SegmentedLine: CustomStringConvertible { +extension SegmentedLine: Equatable where Value: Equatable { @inlinable - public var description: String { - segments.reduce(into: "") { partial, segment in - partial += "| [\(segment.range)]: \(segment.value) " - } + "|" + public static func == (lhs: Self, rhs: Self) -> Bool { + lhs._upperBound == rhs._upperBound + && lhs._breakpoints.locations == rhs._breakpoints.locations + && lhs._breakpoints.values == rhs._breakpoints.values } } -extension SegmentedLine: Equatable where Value: Equatable { +extension SegmentedLine: Hashable where Bound: Hashable, Value: Hashable { @inlinable - public static func == (lhs: Self, rhs: Self) -> Bool { - guard lhs._upperBound == rhs._upperBound else { return false } - // Unfortunately, tuples are not Equatable so we need to write our own Array.== - return lhs._data.withUnsafeBufferPointer { lhsBuffer in - rhs._data.withUnsafeBufferPointer { rhsBuffer in - guard lhsBuffer.count == rhsBuffer.count else { return false } - if lhsBuffer.baseAddress == rhsBuffer.baseAddress { return true } - return lhsBuffer.elementsEqual(rhsBuffer, by: { $0.location == $1.location && $0.value == $1.value }) - } + public func hash(into hasher: inout Hasher) { + hasher.combine(_upperBound) + hasher.combine(_breakpoints.locations) + hasher.combine(_breakpoints.values) + } +} + +extension SegmentedLine: CustomStringConvertible { + + @inlinable + public var description: String { + guard segments.count > 1 else { + let singleSegment = segments.first! + return "[\(singleSegment.range)]: \(singleSegment.value)" } + return segments.reduce(into: "┬\n") { partial, segment in + partial += "├ [\(segment.range)]: \(segment.value)\n" + } + "┴" } } -// TODO: Hashable, Codable, etc +#if swift(>=5.5) && canImport(_Concurrency) + + extension SegmentedLine: Sendable where Bound: Sendable, Value: Sendable {} + extension SegmentedLine.BreakpointStorage: Sendable where Bound: Sendable, Value: Sendable {} + extension SegmentedLine.Segments: Sendable where Bound: Sendable, Value: Sendable {} -//#if swift(>=5.5) && canImport(_Concurrency) -// extension SegmentedLine: Sendable where Bound: Sendable, Value: Sendable {} -//#endif +#endif // -------------------------------------------- -// MARK: - Get, GetAll (TODO) +// MARK: - Bounds // -------------------------------------------- -// TODO: Add 'get' (single location and range variants) -- and/or add APIs to .segments? +extension SegmentedLine { + + /// The bounds of this space. + /// + /// All locations within these bounds have an assigned value. + /// + @inlinable + public var bounds: Range { + Range(uncheckedBounds: (_breakpoints.locations[0], _upperBound)) + } + + /// Ensures the given location is within this line's ``bounds``. + /// + /// If the location is not within the line's bounds, the program terminates. + /// This function should only be used for diagnostics, not memory safety. + /// + @inlinable + internal func boundsCheck(_ location: Bound) { + precondition(bounds.lowerBound <= location, "\(location) is out of bounds. Valid bounds are \(bounds)") + precondition(bounds.upperBound > location, "\(location) is out of bounds. Valid bounds are \(bounds)") + } + + /// Ensures the given range is within this line's ``bounds``. + /// + /// If the range is not within the line's bounds, the program terminates. + /// This function should only be used for diagnostics, not memory safety. + /// + @inlinable + internal func boundsCheck(_ range: Range) { + precondition(bounds.lowerBound <= range.lowerBound, "\(range) is out of bounds. Valid bounds are \(bounds)") + precondition(bounds.upperBound >= range.upperBound, "\(range) is out of bounds. Valid bounds are \(bounds)") + } +} // -------------------------------------------- -// MARK: - Set, Modify +// MARK: - Get // -------------------------------------------- extension SegmentedLine { + /// The value assigned to a given location. + /// + /// The location must be within the line's ``bounds``. + /// + /// ```swift + /// var line = SegmentedLine(bounds: 0..<50, value: 42) + /// line.set(10..<20, to: 99) + /// line.set(30..<50, to: 1024) + /// print(line) + /// // ┬ + /// // ├ [0..<10]: 42 + /// // ├ [10..<20]: 99 + /// // ├ [20..<30]: 42 + /// // ├ [30..<50]: 1024 + /// // ┴ + /// + /// line[5] // 42 + /// line[12] // 99 + /// line[35] // 1024 + /// ``` + /// + /// - complexity: O(log *n*), where *n* is the number of segments in this line. + /// @inlinable - internal func _boundsCheck(_ range: Range) { - precondition(self._lowerBound <= range.lowerBound, "\(range) is out of bounds. Valid bounds are \(self.bounds)") - precondition(self._upperBound >= range.upperBound, "\(range) is out of bounds. Valid bounds are \(self.bounds)") + public subscript(_ location: Bound) -> Value { + _breakpoints.values[segments.index(of: location)._breakpointIndex] } +} + + +// -------------------------------------------- +// MARK: - Set, Modify +// -------------------------------------------- + + +extension SegmentedLine { - /// Ensures that the line's `_data` array contains a breakPoint at the given location. + /// Ensures that the line's `_breakpoints` contains a breakpoint at the given location. + /// The location is assumed to be within the line's bounds. + /// + /// This operation does not change the values assigned to any locations. /// - /// - returns: The index of the breakPoint for the given location, and a flag marking - /// whether was inserted or existed before this function was called. + /// - returns: The index of the breakpoint which begins at the given location. /// @inlinable - internal mutating func _ensureSegmentBreak(at location: Bound) -> (Array.Index, inserted: Bool) { + internal mutating func _ensureSegmentBreak(at location: Bound) -> BreakpointStorage.Index { - assert(location < self._upperBound, "location is not in bounds") - if location == self.bounds.lowerBound { - return (_data.startIndex, inserted: false) - } + guard location > bounds.lowerBound else { return _breakpoints.startIndex } - // TODO: Limit search. - let idx = _data._codepointdatabase_partitionedIndex { $0.location < location } - if idx == _data.endIndex || _data[idx].location != location { - let valueAtLocation = _data[idx - 1].value - _data.insert((location: location, value: valueAtLocation), at: idx) - return (idx, inserted: true) - } - return (idx, inserted: false) + let containingSegment = segments.index(of: location)._breakpointIndex + guard _breakpoints.locations[containingSegment] != location else { return containingSegment } + + let newBreakpointLocation = _breakpoints.index(after: containingSegment) + _breakpoints.insert((location, _breakpoints.values[containingSegment]), at: newBreakpointLocation) + return newBreakpointLocation } - /// Ensures that the line's `_data` array contains breakPoints for the given range's `upperBound` and `lowerBound`. + /// Ensures that the line's `_breakpoints` contains breakpoints for the given range's `lowerBound` and `upperBound`. + /// Both locations are assumed to be within the line's bounds. + /// + /// This operation does not change the values assigned to any locations. /// - /// - returns: A range of breakPoint indices. The lowerBound of this range is the index of a breakPoint - /// whose location is the lowerBound of the given range. Similarly, the upperBound of the range - /// refers to a breakPoint for the given range's upperBound. + /// - returns: A range of breakpoint indices. + /// The lowerBound is the index of the breakpoint which begins at the lowerBound of the given range. + /// The upperBound is either `endIndex` or the index of the breakpoint which begins at the upperBound + /// of the given range. /// If the given range was empty, the result is `nil`. /// @inlinable - internal mutating func _ensureSegmentBreaks(for boundsToSplit: Range) -> Range.Index>? { + internal mutating func _ensureSegmentBreaks(for boundsToSplit: Range) -> Range? { - _boundsCheck(boundsToSplit) guard !boundsToSplit.isEmpty else { return nil } - // Ensure there is a break to preserve values >= 'upperBound'. - - var dataIndexForUpperBound: Array.Index - - if boundsToSplit.upperBound == bounds.upperBound { - dataIndexForUpperBound = _data.endIndex - } else { - let (idx, _) = _ensureSegmentBreak(at: boundsToSplit.upperBound) - assert(idx > _data.startIndex, "A non-empty in-bounds range cannot end at startIndex") - dataIndexForUpperBound = idx - } - - // Ensure there is a break to apply the new value at locations >= 'lowerBound'. - // If we insert anything, we must increment 'endOfOldData' to keep it pointing to the correct element. - - let dataIndexForLowerBound: Array.Index - - let startDataWasInserted: Bool - (dataIndexForLowerBound, startDataWasInserted) = _ensureSegmentBreak(at: boundsToSplit.lowerBound) - if startDataWasInserted { dataIndexForUpperBound += 1 } + // TODO: Limit search range when finding upperBoundBreakpoint - we know the break will be >lowerBoundBreakpoint. + let lowerBoundBreakpoint = _ensureSegmentBreak(at: boundsToSplit.lowerBound) + let upperBoundBreakpoint = + (boundsToSplit.upperBound < bounds.upperBound) + ? _ensureSegmentBreak(at: boundsToSplit.upperBound) + : _breakpoints.endIndex - assert(dataIndexForLowerBound < dataIndexForUpperBound) // Ensure not empty. - return dataIndexForLowerBound..(bounds: 0..<100, value: nil) /// - /// // After setting values <5 to "small" and values >10 to "large", - /// // the gap is left with its previous value, "medium". - /// - /// line.set(0..<20, to: "medium") - /// line.set(0..<5, to: "small") + /// line.set(0..<10, to: "small") + /// line.set(5..<20, to: "medium") /// line.set(10..<60, to: "large") /// print(line) - /// // | [0..<5]: "small" | [5..<10]: "medium" | [10..<60]: "large" | [60..<100]: nil | + /// // ┬ + /// // ├ [0..<5]: "small" + /// // ├ [5..<10]: "medium" + /// // ├ [10..<60]: "large" + /// // ├ [60..<100]: nil + /// // ┴ /// - /// // After setting, there will be a single span covering the given region. + /// // After setting, there will be a single segment covering the given region. /// /// line.set(5..<100, to: "not small") /// print(line) - /// // | [0..<5]: "small" | [5..<100]: "not small" | + /// // ┬ + /// // ├ [0..<5]: "small" + /// // ├ [5..<100]: "not small" + /// // ┴ /// ``` /// /// `boundsToReplace` must be entirely within the ``bounds`` of this space. - /// Assigning a value to an empty range will not modify any segments. - /// - /// Every location within the bounds of this space is assigned a value. - /// Every `SegmentedLine` begins with at least one segment, assigning a value to its entire bounds - /// (in the above example, the value's type is an `Optional` and the initial value is `nil`). + /// If `boundsToReplace` is empty, this method is a no-op. /// /// - parameters: /// - boundsToReplace: The locations which should be assigned the new value. @@ -451,24 +699,21 @@ extension SegmentedLine { @inlinable public mutating func set(_ boundsToReplace: Range, to newValue: Value) { - guard let breakPointIndices = _ensureSegmentBreaks(for: boundsToReplace) else { - return // Range is empty. - } - - assert(_data[breakPointIndices.lowerBound].location == boundsToReplace.lowerBound) + boundsCheck(boundsToReplace) + guard let breakPointIndices = _ensureSegmentBreaks(for: boundsToReplace) else { return /* Empty range */ } + assert(_breakpoints[breakPointIndices.lowerBound].location == boundsToReplace.lowerBound) - // To apply: assign the new value at the first breakPoint (lowerBound), - // then remove all other breakPoints in the range. + // To apply: ensure a single breakpoint covers this range, then set the value for that breakpoint. - _data[breakPointIndices.lowerBound].value = newValue - _data.removeSubrange(breakPointIndices.lowerBound + 1.., _ body: (inout Value) -> Void) { - guard let breakPointIndices = _ensureSegmentBreaks(for: boundsToModify) else { - return // Range is empty. - } - - assert(_data[breakPointIndices.lowerBound].location == boundsToModify.lowerBound) + boundsCheck(boundsToModify) + guard let breakPointIndices = _ensureSegmentBreaks(for: boundsToModify) else { return /* Empty range */ } + assert(_breakpoints[breakPointIndices.lowerBound].location == boundsToModify.lowerBound) // To apply: visit the values of all segments in the range. - for i in breakPointIndices { - body(&_data[i].value) - } + for i in breakPointIndices { body(&_breakpoints[valueAt: i]) } } } @@ -541,9 +781,9 @@ extension SegmentedLine { extension SegmentedLine { - /// Returns a new `SegmentedLine`, created by transforming this line's values using the given closure. + /// Returns a new `SegmentedLine` created by transforming this line's values using the given closure. /// - /// The result will have the same bounds and number of segments as this line, at the same locations. + /// The result will have the same bounds as this line, and the same number of segments at the same locations. /// /// This function can be particularly effective at simplifying lines with lots of segments, as by mapping /// complex values to simplified ones (for example, mapping to an `enum` with fewer cases), we can discard @@ -559,9 +799,14 @@ extension SegmentedLine { /// } /// let complexLine: SegmentedLine = // ... /// print(complexLine) - /// // | [0..<2]: categoryA | [2..<4]: categoryB | [4..<12]: categoryC | ... + /// // ┬ + /// // ├ [0..<2]: categoryA + /// // ├ [2..<4]: categoryB + /// // ├ [4..<12]: categoryC + /// // ├ ... + /// // ┴ /// - /// // 1️⃣ Perhaps we can map these to a smaller number of states. + /// // 1️⃣ We can map these to a smaller number of states. /// /// enum SimplifiedData { /// case valid, invalid @@ -570,69 +815,97 @@ extension SegmentedLine { /// SimplifiedData(validating: complex) /// } /// print(simplifiedLine) - /// // | [0..<2]: valid | [2..<4]: valid | [4..<12]: valid | ... + /// // ┬ + /// // ├ [0..<2]: valid + /// // ├ [2..<4]: valid + /// // ├ [4..<12]: valid + /// // ├ ... + /// // ┴ /// /// // 2️⃣ Notice that we have lots of segments for boundaries which /// // which are no longer important. 'combineSegments' can clean them up. /// /// simplifiedLine.combineSegments() /// print(simplifiedLine) - /// // | [0..<2000]: valid | [2000..<2024]: invalid | [2024..<2056]: valid | ... + /// // ┬ + /// // ├ [0..<2000]: valid + /// // ├ [2000..<2024]: invalid + /// // ├ [2024..<2056]: valid + /// // ├ ... + /// // ┴ /// ``` /// @inlinable public func mapValues(_ transform: (Value) throws -> T) rethrows -> SegmentedLine { SegmentedLine( - _upperBound: _upperBound, - _data: try _data.map { ($0.0, try transform($0.1)) } + _breakpoints: .init(locations: _breakpoints.locations, values: try _breakpoints.values.map(transform)), + _upperBound: _upperBound ) } - /// Merges segments according to the given closure. + /// Merges strings of adjacent segments. /// - /// This function implements a left-fold, similar to Collection's `reduce`, except that the folding closure - /// can decide to preserve a segment break and reset the fold operation. + /// This function implements a kind of left-fold, similar to `Collection.reduce`, + /// with the key difference that the closure can decide _not_ to combine two elements + /// and instead to restart the fold operation. /// - /// The closure is invoked with two segments as arguments - an `accumulator`, which has a mutable value, - /// and `next`, which is its successor on this line. Given these segments, the closure may decide: + /// When the closure is invoked, two segments are provided to it as parameters - + /// an `accumulator` with a mutable value, and `next`, which is its successor on this line. + /// Given these two segments, the closure decides either: /// - /// - To combine `next` and `accumulator`. + /// - To merge `next` in to `accumulator`. /// - /// To fold segments, the closure performs any required adjustments to merge `next.value` - /// in to `accumulator.value`, and returns `true`. The segment `next` will be discarded, - /// and the accumulator's range will expand up to `next.range.upperBound`. + /// To merge segments, the closure performs any required adjustments to merge `next.value` + /// in to `accumulator.value` and returns `true`. /// - /// Folding continues with the same accumulator for as long as the closure returns `true`; - /// this process is similar to Collection's `reduce(into:)` function. + /// The segment `next` will automatically be discarded, + /// and the accumulator's range will be expanded to include `next.range`. + /// Folding continues with the same accumulator for as long as the closure returns `true`. /// - /// - To maintain the segment break. + /// - To maintain `next` and `accumulator` as separate sections. /// - /// If it is not desirable to combine the segments, the closure may return `false`. + /// If it is not desirable to merge the segments, the closure may return `false`. /// This finalizes the current accumulator, and restarts folding with `next` as the new accumulator. /// @inlinable public mutating func combineSegments( while shouldMerge: (_ accumulator: inout Segments.Element, _ next: Segments.Element) -> Bool ) { - var reduced: [BreakPoint] = [] + // TODO: It would be nice to perform this in-place. + // - For locations, we can overwrite values (MutableCollection-style) and chop off the tail at the end. + // - For values, it's a little more awkward because we'd want to *move* Value elements in to the accumulator. + // It's possible, but needs to be done carefully. + + var reducedLocations = [Bound]() + var reducedValues = [Value]() + var accumulator = segments[segments.startIndex] - for next in segments.dropFirst() { + var i = segments.index(after: segments.startIndex) + while i < segments.endIndex { + let next = segments[i] + + // Ignore any modifications the closure makes to the 'range' part of the accumulator. let accumulatorStart = accumulator.range.lowerBound if shouldMerge(&accumulator, next) { accumulator.range = Range(uncheckedBounds: (accumulatorStart, next.range.upperBound)) } else { - reduced.append((accumulatorStart, accumulator.value)) + reducedLocations.append(accumulatorStart) + reducedValues.append(accumulator.value) accumulator = next } + + segments.formIndex(after: &i) } - reduced.append((accumulator.range.lowerBound, accumulator.value)) - self._data = reduced + + reducedLocations.append(accumulator.range.lowerBound) + reducedValues.append(accumulator.value) + self._breakpoints = BreakpointStorage(locations: reducedLocations, values: reducedValues) } } extension SegmentedLine where Value: Equatable { - /// Merges segments of consecutive equal elements. + /// Merges strings of adjacent segments with the same value. /// /// This function can be particularly effective at simplifying lines with lots of segments, as by mapping /// complex values to simplified ones (for example, mapping to an `enum` with fewer cases) using ``mapValues(_:)``, @@ -648,9 +921,14 @@ extension SegmentedLine where Value: Equatable { /// } /// let complexLine: SegmentedLine = // ... /// print(complexLine) - /// // | [0..<2]: categoryA | [2..<4]: categoryB | [4..<12]: categoryC | ... + /// // ┬ + /// // ├ [0..<2]: categoryA + /// // ├ [2..<4]: categoryB + /// // ├ [4..<12]: categoryC + /// // ├ ... + /// // ┴ /// - /// // 1️⃣ Perhaps we can map these to a smaller number of states. + /// // 1️⃣ We can map these to a smaller number of states. /// /// enum SimplifiedData { /// case valid, invalid @@ -659,14 +937,24 @@ extension SegmentedLine where Value: Equatable { /// SimplifiedData(validating: complex) /// } /// print(simplifiedLine) - /// // | [0..<2]: valid | [2..<4]: valid | [4..<12]: valid | ... + /// // ┬ + /// // ├ [0..<2]: valid + /// // ├ [2..<4]: valid + /// // ├ [4..<12]: valid + /// // ├ ... + /// // ┴ /// /// // 2️⃣ Notice that we have lots of segments for boundaries which /// // which are no longer important. 'combineSegments' can clean them up. /// /// simplifiedLine.combineSegments() /// print(simplifiedLine) - /// // | [0..<2000]: valid | [2000..<2024]: invalid | [2024..<2056]: valid | ... + /// // ┬ + /// // ├ [0..<2000]: valid + /// // ├ [2000..<2024]: invalid + /// // ├ [2024..<2056]: valid + /// // ├ ... + /// // ┴ /// ``` /// @inlinable diff --git a/Tests/UnicodeDataStructuresTests/SegmentedLineTests.swift b/Tests/UnicodeDataStructuresTests/SegmentedLineTests.swift index eb2c57b5b..f80eab808 100644 --- a/Tests/UnicodeDataStructuresTests/SegmentedLineTests.swift +++ b/Tests/UnicodeDataStructuresTests/SegmentedLineTests.swift @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +import Checkit import UnicodeDataStructures import XCTest -fileprivate func XCTAssertSegments( +private func XCTAssertSegments( _ line: SegmentedLine, _ expected: [(Range, Value)] ) where Bound: Comparable, Value: Equatable { @@ -49,24 +50,28 @@ extension SegmentedLineTests { func testDocumentationExamples() { - // SegmentedLine, .set, .segments + // SegmentedLine, .set do { var line = SegmentedLine(bounds: 0..<100, value: nil) - // After setting values <5 to "small" and values >10 to "large", - // the gap is left with its previous value, "medium". - - line.set(0..<20, to: "medium") - line.set(0..<5, to: "small") + line.set(0..<10, to: "small") + line.set(5..<20, to: "medium") line.set(10..<60, to: "large") XCTAssertEqual( line.description, - #"| [0..<5]: Optional("small") | [5..<10]: Optional("medium") | [10..<60]: Optional("large") | [60..<100]: nil |"# + #""" + ┬ + ├ [0..<5]: Optional("small") + ├ [5..<10]: Optional("medium") + ├ [10..<60]: Optional("large") + ├ [60..<100]: nil + ┴ + """# ) } - // modify. + // SegmentedLine, .modify do { enum Font: Equatable { case custom(String) @@ -83,26 +88,27 @@ extension SegmentedLineTests { } let string = "Bob is feeling great" + + // Create a SegmentedLine for the collection's contents. + // Start by setting a font attribute over the entire string. + var tags = SegmentedLine( bounds: string.startIndex..(bounds: 0..<100, value: nil) + + // for (range, value) in line.segments { + // print(range, value) + // } + XCTAssertSegments(line, [(0..<100, nil)]) + + line.set(0..<10, to: "small") + line.set(5..<20, to: "medium") + line.set(10..<60, to: "large") + + // for (range, value) in line.segments { + // print(range, value) + // } + + // swift-format-ignore + XCTAssertSegments(line, [ + (0..<5, "small"), + (5..<10, "medium"), + (10..<60, "large"), + (60..<100, nil), + ]) + } + + // .segments.indexOf + do { + var line = SegmentedLine(bounds: 0..<50, value: 42) + line.set(10..<20, to: 99) + line.set(30..<50, to: 1024) + + // swift-format-ignore + XCTAssertSegments(line, [ + (0..<10, 42), + (10..<20, 99), + (20..<30, 42), + (30..<50, 1024), + ]) + + let i = line.segments.index(of: 35) + XCTAssertTrue(line.segments[i] == (range: 30..<50, value: 1024)) + } + + // subscript(Bound) + do { + var line = SegmentedLine(bounds: 0..<50, value: 42) + line.set(10..<20, to: 99) + line.set(30..<50, to: 1024) + + // swift-format-ignore + XCTAssertSegments(line, [ + (0..<10, 42), + (10..<20, 99), + (20..<30, 42), + (30..<50, 1024), + ]) + + XCTAssertEqual(line[5], 42) + XCTAssertEqual(line[12], 99) + XCTAssertEqual(line[35], 1024) + } } func testInitWithBoundsAndValue() { @@ -157,6 +226,139 @@ extension SegmentedLineTests { } +// -------------------------------------------- +// MARK: - Segments Collection +// -------------------------------------------- + +extension SegmentedLineTests { + + func testSegmentsCollectionConformance() { + + // Single segment. + do { + let line = SegmentedLine(bounds: 0..<100, value: nil) + CollectionChecker.check(line.segments) + } + // Many segments. + do { + var line = SegmentedLine(bounds: 0..<100, value: nil) + line.set(0..<10, to: "small") + line.set(5..<20, to: "medium") + line.set(20..<60, to: "large") + + // swift-format-ignore + XCTAssertSegments(line, [ + (0..<5, "small"), + (5..<20, "medium"), + (20..<60, "large"), + (60..<100, nil), + ]) + CollectionChecker.check(line.segments) + } + } +} + + +// -------------------------------------------- +// MARK: - Get +// -------------------------------------------- + + +extension SegmentedLineTests { + + func testIndexOfSegment() { + + // Single segment. + do { + let line = SegmentedLine(bounds: 0..<100, value: 42) + for location in line.bounds { + let index = line.segments.index(of: location) + XCTAssertEqual(index, line.segments.startIndex) + XCTAssertEqual(line.segments.index(after: index), line.segments.endIndex) + XCTAssertTrue(line.segments[index] == (range: 0..<100, value: 42)) + } + } + + // Multiple segments. + do { + var line = SegmentedLine(bounds: 0..<160, value: "-") + line.set(0..<20, to: "A") + line.set(20..<40, to: "B") + line.set(40..<60, to: "C") + line.set(60..<80, to: "D") + line.set(80..<100, to: "E") + line.set(100..<120, to: "F") + line.set(120..<140, to: "G") + line.set(140..<160, to: "H") + // swift-format-ignore + let expectedSegments = [ + (0..<20, "A"), + (20..<40, "B"), + (40..<60, "C"), + (60..<80, "D"), + (80..<100, "E"), + (100..<120, "F"), + (120..<140, "G"), + (140..<160, "H"), + ] + XCTAssertSegments(line, expectedSegments) + + var expectedSegmentOffset = 0 + for (range, expectedValue) in expectedSegments { + for location in range { + let index = line.segments.index(of: location) + XCTAssertEqual(line.segments.index(line.segments.startIndex, offsetBy: expectedSegmentOffset), index) + XCTAssertTrue(line.segments[index] == (range: range, value: expectedValue)) + } + expectedSegmentOffset += 1 + } + } + } + + func testGetAtLocation() { + + // Single segment. + do { + let line = SegmentedLine(bounds: 0..<100, value: 42) + for location in 0..<100 { + XCTAssertEqual(line[location], 42) + } + } + + // Multiple segments. + do { + var line = SegmentedLine(bounds: 0..<160, value: "-") + line.set(0..<20, to: "A") + line.set(20..<40, to: "B") + line.set(40..<60, to: "C") + line.set(60..<80, to: "D") + line.set(80..<100, to: "E") + line.set(100..<120, to: "F") + line.set(120..<140, to: "G") + line.set(140..<160, to: "H") + // swift-format-ignore + let expectedSegments = [ + (0..<20, "A"), + (20..<40, "B"), + (40..<60, "C"), + (60..<80, "D"), + (80..<100, "E"), + (100..<120, "F"), + (120..<140, "G"), + (140..<160, "H"), + ] + XCTAssertSegments(line, expectedSegments) + + for (range, expectedValue) in expectedSegments { + for location in range { + XCTAssertEqual(line[location], expectedValue) + } + } + } + } +} + + // -------------------------------------------- // MARK: - Set // -------------------------------------------- @@ -177,6 +379,23 @@ extension SegmentedLineTests { (0..<100, 99) ]) } + do { + var line = SegmentedLine(bounds: 0..<100, value: 99) + line.set(0..<50, to: 0) + line.set(50..<75, to: 1) + // swift-format-ignore + XCTAssertSegments(line, [ + (0..<50, 0), + (50..<75, 1), + (75..<100, 99), + ]) + + line.set(line.bounds, to: 2) + // swift-format-ignore + XCTAssertSegments(line, [ + (0..<100, 2) + ]) + } // Setting a region from lowerBound. do { @@ -1331,19 +1550,36 @@ extension SegmentedLineTests { do { let line = SegmentedLine(bounds: 0..<100, value: "") let description = line.description - XCTAssertEqual(description, #"| [0..<100]: |"#) + XCTAssertEqual(description, #"[0..<100]: "#) } do { var line = SegmentedLine(bounds: 0..<100, value: "") line.set(50..<100, to: "hi 👋") let description = line.description - XCTAssertEqual(description, #"| [0..<50]: | [50..<100]: hi 👋 |"#) + XCTAssertEqual( + description, + #""" + ┬ + ├ [0..<50]: + ├ [50..<100]: hi 👋 + ┴ + """# + ) } do { var line = SegmentedLine(bounds: 0..<100, value: "") line.set(20..<40, to: "hello") let description = line.description - XCTAssertEqual(description, #"| [0..<20]: | [20..<40]: hello | [40..<100]: |"#) + XCTAssertEqual( + description, + #""" + ┬ + ├ [0..<20]: + ├ [20..<40]: hello + ├ [40..<100]: + ┴ + """# + ) } } }