Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement AttributedString UTF8 and UTF16 views #1066

Merged
merged 3 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2024 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#if FOUNDATION_FRAMEWORK
@_spi(Unstable) internal import CollectionsInternal
#elseif canImport(_RopeModule)
internal import _RopeModule
#elseif canImport(_FoundationCollections)
internal import _FoundationCollections
#endif

@available(FoundationPreview 6.2, *)
extension AttributedString {
public struct UTF16View: Sendable {
internal var _guts: Guts
internal var _range: Range<BigString.Index>
internal var _identity: Int = 0

internal init(_ guts: AttributedString.Guts) {
self.init(guts, in: guts.stringBounds)
}

internal init(_ guts: Guts, in range: Range<BigString.Index>) {
_guts = guts
_range = range
}

public init() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentionally public? It wasn't included in the proposal. I don't think you can init a String.UTF16View either.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call, this was copied from the CharacterView and UnicodeScalarView code, but I didn't include it in the proposal since there's no need to ever create this yourself especially since it's immutable - I'll remove this from the implementation

self.init(Guts())
}
}

public var utf16: UTF16View {
UTF16View(_guts)
}
}

@available(FoundationPreview 6.2, *)
extension AttributedSubstring {
public var utf16: AttributedString.UTF16View {
AttributedString.UTF16View(_guts, in: _range)
}
}

@available(FoundationPreview 6.2, *)
extension AttributedString.UTF16View {
var _utf16: BigSubstring.UTF16View {
BigSubstring.UTF16View(_unchecked: _guts.string, in: _range)
}
}

@available(FoundationPreview 6.2, *)
extension AttributedString.UTF16View: BidirectionalCollection {
public typealias Element = UTF16.CodeUnit
public typealias Index = AttributedString.Index
public typealias Subsequence = Self

public var startIndex: AttributedString.Index {
.init(_range.lowerBound)
}

public var endIndex: AttributedString.Index {
.init(_range.upperBound)
}

public var count: Int {
_utf16.count
}

public func index(before i: AttributedString.Index) -> AttributedString.Index {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
let j = Index(_guts.string.utf16.index(before: i._value))
precondition(j >= startIndex, "Can't advance AttributedString index before start index")
return j
}

public func index(after i: AttributedString.Index) -> AttributedString.Index {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
let j = Index(_guts.string.utf16.index(after: i._value))
precondition(j <= endIndex, "Can't advance AttributedString index after end index")
return j
}

public func index(_ i: AttributedString.Index, offsetBy distance: Int) -> AttributedString.Index {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
let j = Index(_guts.string.utf16.index(i._value, offsetBy: distance))
precondition(j >= startIndex && j <= endIndex, "AttributedString index out of bounds")
return j
}

public func index(
_ i: AttributedString.Index,
offsetBy distance: Int,
limitedBy limit: AttributedString.Index
) -> AttributedString.Index? {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
precondition(limit >= startIndex && limit <= endIndex, "AttributedString index out of bounds")
guard let j = _guts.string.utf16.index(
i._value, offsetBy: distance, limitedBy: limit._value
) else {
return nil
}
precondition(j >= startIndex._value && j <= endIndex._value,
"AttributedString index out of bounds")
return Index(j)
}

public func distance(
from start: AttributedString.Index,
to end: AttributedString.Index
) -> Int {
precondition(start >= startIndex && start <= endIndex, "AttributedString index out of bounds")
precondition(end >= startIndex && end <= endIndex, "AttributedString index out of bounds")
return _guts.string.utf16.distance(from: start._value, to: end._value)
}

public subscript(index: AttributedString.Index) -> UTF16.CodeUnit {
precondition(index >= startIndex && index < endIndex, "AttributedString index out of bounds")
return _guts.string.utf16[index._value]
}

public subscript(bounds: Range<AttributedString.Index>) -> Self {
let bounds = bounds._bstringRange
precondition(
bounds.lowerBound >= _range.lowerBound && bounds.lowerBound < _range.upperBound &&
bounds.upperBound >= _range.lowerBound && bounds.upperBound <= _range.upperBound,
"AttributedString index range out of bounds")
return Self(_guts, in: bounds)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2024 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#if FOUNDATION_FRAMEWORK
@_spi(Unstable) internal import CollectionsInternal
#elseif canImport(_RopeModule)
internal import _RopeModule
#elseif canImport(_FoundationCollections)
internal import _FoundationCollections
#endif

@available(FoundationPreview 6.2, *)
extension AttributedString {
public struct UTF8View: Sendable {
internal var _guts: Guts
internal var _range: Range<BigString.Index>
internal var _identity: Int = 0

internal init(_ guts: AttributedString.Guts) {
self.init(guts, in: guts.stringBounds)
}

internal init(_ guts: Guts, in range: Range<BigString.Index>) {
_guts = guts
_range = range
}

public init() {
self.init(Guts())
}
}

public var utf8: UTF8View {
UTF8View(_guts)
}
}

@available(FoundationPreview 6.2, *)
extension AttributedSubstring {
public var utf8: AttributedString.UTF8View {
AttributedString.UTF8View(_guts, in: _range)
}
}

@available(FoundationPreview 6.2, *)
extension AttributedString.UTF8View {
var _utf8: BigSubstring.UTF8View {
BigSubstring.UTF8View(_unchecked: _guts.string, in: _range)
}
}

@available(FoundationPreview 6.2, *)
extension AttributedString.UTF8View: BidirectionalCollection {
public typealias Element = UTF8.CodeUnit
public typealias Index = AttributedString.Index
public typealias Subsequence = Self

public var startIndex: AttributedString.Index {
.init(_range.lowerBound)
}

public var endIndex: AttributedString.Index {
.init(_range.upperBound)
}

public var count: Int {
_utf8.count
}

public func index(before i: AttributedString.Index) -> AttributedString.Index {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
let j = Index(_guts.string.utf8.index(before: i._value))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this throw if i == startIndex? I believe calling string.utf8.index(before: string.utf8.startIndex) throws the index out of bound error. If so, should we just enforce this at the precondition above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still do hit a precondition from swift-collections:

_RopeModule/BigString+Contents.swift:292: Precondition failed: Can't advance below start index

But we might as well account for this here, good call - I'll update the above >= to be >

precondition(j >= startIndex, "Can't advance AttributedString index before start index")
return j
}

public func index(after i: AttributedString.Index) -> AttributedString.Index {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
let j = Index(_guts.string.utf8.index(after: i._value))
precondition(j <= endIndex, "Can't advance AttributedString index after end index")
return j
}

public func index(_ i: AttributedString.Index, offsetBy distance: Int) -> AttributedString.Index {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
let j = Index(_guts.string.utf8.index(i._value, offsetBy: distance))
precondition(j >= startIndex && j <= endIndex, "AttributedString index out of bounds")
return j
}

public func index(
_ i: AttributedString.Index,
offsetBy distance: Int,
limitedBy limit: AttributedString.Index
) -> AttributedString.Index? {
precondition(i >= startIndex && i <= endIndex, "AttributedString index out of bounds")
precondition(limit >= startIndex && limit <= endIndex, "AttributedString index out of bounds")
guard let j = _guts.string.utf8.index(
i._value, offsetBy: distance, limitedBy: limit._value
) else {
return nil
}
precondition(j >= startIndex._value && j <= endIndex._value,
"AttributedString index out of bounds")
return Index(j)
}

public func distance(
from start: AttributedString.Index,
to end: AttributedString.Index
) -> Int {
precondition(start >= startIndex && start <= endIndex, "AttributedString index out of bounds")
precondition(end >= startIndex && end <= endIndex, "AttributedString index out of bounds")
return _guts.string.utf8.distance(from: start._value, to: end._value)
}

public subscript(index: AttributedString.Index) -> UTF8.CodeUnit {
precondition(index >= startIndex && index < endIndex, "AttributedString index out of bounds")
return _guts.string.utf8[index._value]
}

public subscript(bounds: Range<AttributedString.Index>) -> Self {
let bounds = bounds._bstringRange
precondition(
bounds.lowerBound >= _range.lowerBound && bounds.lowerBound < _range.upperBound &&
bounds.upperBound >= _range.lowerBound && bounds.upperBound <= _range.upperBound,
"AttributedString index range out of bounds")
return Self(_guts, in: bounds)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ public protocol AttributedStringProtocol
var runs : AttributedString.Runs { get }
var characters : AttributedString.CharacterView { get }
var unicodeScalars : AttributedString.UnicodeScalarView { get }

@available(FoundationPreview 6.2, *)
var utf8 : AttributedString.UTF8View { get }

@available(FoundationPreview 6.2, *)
var utf16 : AttributedString.UTF16View { get }

@preconcurrency subscript<K: AttributedStringKey>(_: K.Type) -> K.Value? where K.Value : Sendable { get set }
@preconcurrency subscript<K: AttributedStringKey>(dynamicMember keyPath: KeyPath<AttributeDynamicLookup, K>) -> K.Value? where K.Value : Sendable { get set }
Expand All @@ -59,6 +65,18 @@ public protocol AttributedStringProtocol
subscript<R: RangeExpression>(bounds: R) -> AttributedSubstring where R.Bound == AttributedString.Index { get }
}


@available(FoundationPreview 6.2, *)
extension AttributedStringProtocol {
var utf8 : AttributedString.UTF8View {
AttributedString.UTF8View(__guts, in: Range(uncheckedBounds: (startIndex._value, endIndex._value)))
}

var utf16 : AttributedString.UTF16View {
AttributedString.UTF16View(__guts, in: Range(uncheckedBounds: (startIndex._value, endIndex._value)))
}
}

@available(macOS 12, iOS 15, tvOS 15, watchOS 8, *)
extension AttributedStringProtocol {
public func settingAttributes(_ attributes: AttributeContainer) -> AttributedString {
Expand Down
2 changes: 2 additions & 0 deletions Sources/FoundationEssentials/AttributedString/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ target_sources(FoundationEssentials PRIVATE
AttributedString+Runs+Run.swift
AttributedString+Runs.swift
AttributedString+UnicodeScalarView.swift
AttributedString+UTF8View.swift
AttributedString+UTF16View.swift
AttributedString+_InternalRun.swift
AttributedString+_InternalRuns.swift
AttributedString+_InternalRunsSlice.swift
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2504,4 +2504,62 @@ E {

XCTAssertEqual(attrStr, AttributedString("XYZ", attributes: .init().testInt(1)))
}

func testUTF8View() {
let testStrings = [
"Hello, world",
"🎺😄abc🎶def",
"¡Hola! ¿Cómo estás?",
"שָׁלוֹם"
]

for string in testStrings {
let attrStr = AttributedString(string)
XCTAssertEqual(attrStr.utf8.count, string.utf8.count, "Counts are not equal for string \(string)")
XCTAssertTrue(attrStr.utf8.elementsEqual(string.utf8), "Full elements are not equal for string \(string)")
for offset in 0 ..< string.utf8.count {
let idxInString = string.utf8.index(string.startIndex, offsetBy: offset)
let idxInAttrStr = attrStr.utf8.index(attrStr.startIndex, offsetBy: offset)
XCTAssertEqual(
string.utf8.distance(from: string.startIndex, to: idxInString),
attrStr.utf8.distance(from: attrStr.startIndex, to: idxInAttrStr),
"Offsets to \(idxInString) are not equal for string \(string)"
)
XCTAssertEqual(string.utf8[idxInString], attrStr.utf8[idxInAttrStr], "Elements at offset \(offset) are not equal for string \(string)")
XCTAssertTrue(string.utf8[..<idxInString].elementsEqual(attrStr.utf8[..<idxInAttrStr]), "Slices up to \(offset) are not equal for string \(string)")
XCTAssertTrue(string.utf8[idxInString...].elementsEqual(attrStr.utf8[idxInAttrStr...]), "Slices from \(offset) are not equal for string \(string)")
XCTAssertTrue(string[..<idxInString].utf8.elementsEqual(attrStr[..<idxInAttrStr].utf8), "Slices up to \(offset) are not equal for string \(string)")
XCTAssertTrue(string[idxInString...].utf8.elementsEqual(attrStr[idxInAttrStr...].utf8), "Slices from \(offset) are not equal for string \(string)")
}
}
}

func testUTF16View() {
let testStrings = [
"Hello, world",
"🎺😄abc🎶def",
"¡Hola! ¿Cómo estás?",
"שָׁלוֹם"
]

for string in testStrings {
let attrStr = AttributedString(string)
XCTAssertEqual(attrStr.utf16.count, string.utf16.count, "Counts are not equal for string \(string)")
XCTAssertTrue(attrStr.utf16.elementsEqual(string.utf16), "Full elements are not equal for string \(string)")
for offset in 0 ..< string.utf16.count {
let idxInString = string.utf16.index(string.startIndex, offsetBy: offset)
let idxInAttrStr = attrStr.utf16.index(attrStr.startIndex, offsetBy: offset)
XCTAssertEqual(
string.utf16.distance(from: string.startIndex, to: idxInString),
attrStr.utf16.distance(from: attrStr.startIndex, to: idxInAttrStr),
"Offsets to \(idxInString) are not equal for string \(string)"
)
XCTAssertEqual(string.utf16[idxInString], attrStr.utf16[idxInAttrStr], "Elements at offset \(offset) are not equal for string \(string)")
XCTAssertTrue(string.utf16[..<idxInString].elementsEqual(attrStr.utf16[..<idxInAttrStr]), "Slices up to \(offset) are not equal for string \(string)")
XCTAssertTrue(string.utf16[idxInString...].elementsEqual(attrStr.utf16[idxInAttrStr...]), "Slices from \(offset) are not equal for string \(string)")
XCTAssertTrue(string[..<idxInString].utf16.elementsEqual(attrStr[..<idxInAttrStr].utf16), "Slices up to \(offset) are not equal for string \(string)")
XCTAssertTrue(string[idxInString...].utf16.elementsEqual(attrStr[idxInAttrStr...].utf16), "Slices from \(offset) are not equal for string \(string)")
}
}
}
}