Skip to content

Commit

Permalink
Merge pull request #1062 from hylo-lang/utf8-array
Browse files Browse the repository at this point in the history
Add 'UTF8Array' to the standard library
  • Loading branch information
kyouko-taiga authored Oct 5, 2023
2 parents 15d5987 + 2579d68 commit d44becd
Show file tree
Hide file tree
Showing 9 changed files with 272 additions and 4 deletions.
8 changes: 4 additions & 4 deletions Library/Hylo/Core/CollectionOfOne.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ public type CollectionOfOne<Element: Movable & Deinitializable>: Deinitializable
&self.contents = contents
}

public fun start_index() -> Bool { true }
public fun start_index() -> Bool { false }

public fun end_index() -> Bool { false }
public fun end_index() -> Bool { true }

public fun index(after i: Bool) -> Bool { false }
public fun index(after i: Bool) -> Bool { true }

public subscript(_ position: Bool): Element {
let {
// TODO: uncomment when #1046 is implemented
// precondition(position, "index is out of bounds")
// precondition(!position, "index is out of bounds")
yield contents
}
}
Expand Down
3 changes: 3 additions & 0 deletions Library/Hylo/Core/Numbers/Integers/FixedWidthInteger.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
/// catch overflows, or access the minimum or maximum representable values of an integer type.
public trait FixedWidthInteger: BinaryInteger {

/// Returns `true` if the bits set in `mask` are also set in `self`.
fun matches(_ mask: Self) -> Bool

/// Returns the sum of `self` and `other` along with a flag indicating whether overflow occurred
/// in the operation.
fun adding_reporting_overflow(_ other: Self) -> {partial_value: Self, overflow: Bool}
Expand Down
4 changes: 4 additions & 0 deletions Library/Hylo/Core/Numbers/Integers/Int.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ public conformance Int: BinaryInteger {

public conformance Int: FixedWidthInteger {

public fun matches(_ mask: Self) -> Bool {
(self & mask) == mask
}

public fun adding_reporting_overflow(_ other: Self) -> {partial_value: Self, overflow: Bool} {
let r = Builtin.sadd_with_overflow_word(value, other.value)
return (partial_value: Int(value: r.0), overflow: Bool(value: r.1))
Expand Down
4 changes: 4 additions & 0 deletions Library/Hylo/Core/Numbers/Integers/UInt.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ public conformance UInt: BinaryInteger {

public conformance UInt: FixedWidthInteger {

public fun matches(_ mask: Self) -> Bool {
(self & mask) == mask
}

public fun adding_reporting_overflow(_ other: Self) -> {partial_value: Self, overflow: Bool} {
let r = Builtin.uadd_with_overflow_word(value, other.value)
return (partial_value: UInt(value: r.0), overflow: Bool(value: r.1))
Expand Down
3 changes: 3 additions & 0 deletions Library/Hylo/Core/Operators.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ public operator infix/= : assignment
public operator infix%= : assignment
public operator infix+= : assignment
public operator infix-= : assignment
public operator infix^= : assignment
public operator infix&= : assignment
public operator infix&&= : assignment
public operator infix|= : assignment
public operator infix||= : assignment

public operator infix** : exponentiation

Expand Down
12 changes: 12 additions & 0 deletions Library/Hylo/Core/Pointer.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ public type Pointer<Pointee>: Regular {
yield base as* (remote let Pointee)
}

/// Creates an instance with the same memory representation as `address`.
public init(bit_pattern address: UInt) {
&self.base = Builtin.inttoptr_word(address.value)
}

/// Creates an instance representing the same address as `p`.
public init(_ p: PointerToMutable<Pointee>) {
&base = p.base
Expand All @@ -27,6 +32,13 @@ public type Pointer<Pointee>: Regular {
&base = p.base
}

/// Returns `self` offset forward by `n` array elements of `Pointee` type.
public fun advance(by n: Int) -> Self {
let offset_in_bytes = MemoryLayout<Pointee>.stride() * n
return Pointer<Pointee>.new(
base: Builtin.advanced_by_bytes_word(base, offset_in_bytes.value))
}

/// Creates an instance that does not address any usable storage.
public static fun null() -> Self {
.new(base: Builtin.zeroinitializer_ptr())
Expand Down
6 changes: 6 additions & 0 deletions Library/Hylo/Core/PointerToMutable.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ public extension PointerToMutable where Pointee: Movable {

}

/// The address of `x`.
public subscript mutable_pointer<T>(to x: inout T): PointerToMutable<T> {
let { yield PointerToMutable(base: Builtin.address(of: x)) }
}


/// Initializes `x` to `y`.
///
/// - Note: This function is a workaround for the lack of `set` bindings (see #925).
Expand Down
25 changes: 25 additions & 0 deletions Library/Hylo/LibC.hylo
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,28 @@ public fun fdopen(_ descriptor: Int, _ mode: CVoidPointer) -> CVoidPointer
/// returning the number of elements written.
@ffi("fwrite")
public fun fwrite(_ data: CVoidPointer, _ size: Int, _ count: Int, _ stream: CVoidPointer) -> Int

/// Copies `count` elements from the object pointed to by `source` to the object pointed to by
/// `destination` and returns `destination`.
///
/// Both objects are reinterpreted as buffers of `Int8`.
///
/// If the objects overlap, the behavior is undefined. If either `source` or `destination` is
/// invalid or null, the behavior is undefined.
@ffi("memcpy")
public fun memcpy(
_ destination: CVoidPointer, _ source: CVoidPointer, _ count: Int
) -> CVoidPointer

/// Copies `count` elements from the object pointed to by `source` to the object pointed to by
/// `destination` and returns `destination`.
///
/// Both objects are reinterpreted as buffers of `Int8`.
///
/// Objects may overlap: copying takes place as if the elements from `source` were copied to a
/// temporary buffer and then copied to `destination`. If either `source` or `destination` is
/// invalid or null, the behavior is undefined.
@ffi("memmove")
public fun memmove(
_ destination: CVoidPointer, _ source: CVoidPointer, _ count: Int
) -> CVoidPointer
211 changes: 211 additions & 0 deletions Library/Hylo/UTF8Array.hylo
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
/// A collection of UTF-8 code units.
public type UTF8Array {

// TODO: Remove when `UInt64` is implemented
typealias UInt64 = UInt

/// The units in the collection.
///
/// The two highest bits of `units`, b63 and b62, encode the representation discriminator:
///
/// ┌──────────────────────╥─────┬─────┐
/// │ Form ║ b63 │ b62 │
/// ├──────────────────────╫─────┼─────┤
/// │ inline, owned ║ 0 │ 0 │
/// │ out-of-line, owned ║ 1 │ 0 │
/// │ out-of-line, unowned ║ 1 │ 1 │
/// └──────────────────────╨─────┴─────┘
///
/// b63 indicates whether the payload of the view is stored out-of-line. If it is, `units` with
/// b63 and b62 unset stores a pointer to the out-of-line payload, which is a buffer storing an
/// `Int`, which is the number of units in the view, followed by a contiguous array of bytes,
/// with contains the units themselves, and finally a null terminator.
///
/// If the payload is inline, the number of units in the view is stored in the 6 lowest bits of
/// `units`'s most significant byte and the units themselves are stored in the following bytes.
/// For example, the inline UTF-8 view of "Salut" is as follows:
///
/// most significant byte
/// ↓
/// ┌────┬────┬────┬────┬────┬────┬────┬────┐
/// | 05 | 53 | 61 | 6C | 75 | 74 | 00 | 00 |
/// └────┴────┴────┴────┴────┴────┴────┴────┘
///
/// b62 indicates if the view owns its storage and is responsible for its deallocation if it is
/// out-of-line. Unowned, out-of-line storage typically correspond to static allocations.
let units: UInt64

/// Creates an instance with given representation.
memberwise init

}

/// A collection of UTF-8 code units.
public extension UTF8Array {

/// Creates a view taking ownership of the out-of-line payload referred by `p`.
init(taking_ownership_of p: MemoryAddress) {
var u = UInt64(truncating_or_extending: UInt(bit_pattern: p))
&u |= (0b10 as UInt64) << 62
&self = .new(units: u)
}

/// Creates an empty view.
public init() {
&self = .new(units: 0)
}

/// Projects the units in `self` as a null-terminated buffer.
///
/// Use this method to read the contents of the view as a C-style null-terminated string. The
/// returned buffer has a size `count() + 1`. It is alive only for the duration of the projection
/// and shall not be mutated.
public property nullterminated: Pointer<Int8> {
let {
if is_inline() {
var storage: UInt = 0
let buffer = PointerToMutable<Int8>(type_punning: mutable_pointer[to: &storage])

// Note: The copy could be optimized away if we stored the units in memory the same way
// they would be stored in an array, i.e., in reverse order on big-endian machines.
var i = 0
while i < 7 {
let s = 8 * (6 - i)
let v = Int8(truncating_or_extending: units >> s)
buffer.unsafe_initialize_pointee(v)
&i += 1
}

yield Pointer<Int8>(buffer)
} else {
yield unsafe_heap_payload.0
}
}
}

/// Returns `true` if the payload of the `self` is stored inline.
fun is_inline() -> Bool {
// Note: the flag is stored inversed so that `0` is an empty string.
(units & ((1 as UInt64) << 63)) == 0
}

/// Returns `true` if `self` owns its payload.
fun is_owned() -> Bool {
(units & ((1 as UInt64) << 62)) == 0
}

/// Projects the address and size of `self`'s payload, assuming it is allocated out-of-line.
///
/// - Requires: `!is_inline()`.
property unsafe_heap_payload: {start: Pointer<Int8>, count: Int} {
let {
// TODO: uncomment when #1046 is implemented
// assert(!is_inline())
let buffer = Pointer<Int>(
bit_pattern: UInt(truncating_or_extending: units & ~((0xff as UInt64) << 56)))
yield (
start: Pointer<Int8>(type_punning: buffer.advance(by: 1)),
count: buffer.unsafe[].copy())
}
}

}

public conformance UTF8Array: Deinitializable {

public fun deinit() sink {
if !is_inline() {
PointerToMutable(adding_mutation_to: unsafe_heap_payload.0).deallocate()
}
}

}

public conformance UTF8Array: Copyable {

public fun copy() -> Self {
if is_inline() || !is_owned() {
return .new(units: units.copy())
} else {
let payload = unsafe_heap_payload
let payload_size = MemoryLayout<Int>.stride() + payload.1 + 1
let payload_clone = MemoryAddress.allocate_bytes(
count: payload_size,
aligned_at: MemoryLayout<Int>.alignment())

// Note: copy the entire payload at once.
let d = CVoidPointer(base: payload_clone.base)
let s = CVoidPointer(base: payload.0.copy().base)
_ = memmove(d, s, payload_size)

return .new(taking_ownership_of: payload_clone)
}
}

}

public conformance UTF8Array: Equatable {

public fun infix== (_ other: Self) -> Bool {
// If both LHS and RHS are stored inline, their representation are bitwise equal.
if self.is_inline() && other.is_inline() {
return self.units == other.units
}

// LHS and RHS are equal if they point to the same buffer.
if !self.is_inline() && !other.is_inline() {
return self.unsafe_heap_payload.0 == other.unsafe_heap_payload.0
}

// LHS and RHS are equal if they contain the same elements in the same order.
// TODO: Rewrite as `self.elements_equal(other)`.
if self.count() != other.count() { return false }
var i = 0
while i < self.count() {
if self[i] != other[i] { return false }
&i += 1
}
return true
}

}

// public conformance UTF8Array: Collection {
public extension UTF8Array {

/// An index in an UTF8Array.
public typealias Index = Int

/// A single UTF-8 code unit.
public typealias Element = Int

public fun start_index() -> Int {
0
}

/// Returns the number of elements in `self`.
public fun count() -> Int {
if is_inline() {
Int(truncating_or_extending: units >> 56)
} else {
unsafe_heap_payload.1.copy()
}
}

/// Accesses the unit at `position` in `self`.
public subscript(_ position: Int): Int8 {
yield 0
if is_inline() {
// TODO: uncomment when #1046 is implemented
// precondition((0 <= position) && (position < Int(units >> 56)))
let s = 8 * (6 - position)
yield Int8(truncating_or_extending: units >> s)
} else {
let p = unsafe_heap_payload
// TODO: uncomment when #1046 is implemented
// precondition((0 <= position) && (position < p.1))
yield p.0.advance(by: position).unsafe[]
}
}

}

0 comments on commit d44becd

Please sign in to comment.