From 2e819dd5385465b729352af4934c42f64aa29d76 Mon Sep 17 00:00:00 2001 From: thedavidmeister Date: Wed, 30 Oct 2024 23:15:52 +0400 Subject: [PATCH] tests --- .gas-snapshot | 6 ++- README.md | 60 ++++++++++++++++++++++++ src/lib/parse/LibParseChar.sol | 10 +++- test/lib/parse/LibParseCharSlow.sol | 14 ++++++ test/src/lib/LibParseChar.skipMask.t.sol | 35 ++++++++++++++ 5 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 test/src/lib/LibParseChar.skipMask.t.sol diff --git a/.gas-snapshot b/.gas-snapshot index 0fe40b9..c223e54 100644 --- a/.gas-snapshot +++ b/.gas-snapshot @@ -1,2 +1,4 @@ -LibParseCharIsMaskTest:testIsMaskPastEnd(uint256,uint256,uint256) (runs: 256, μ: 16949, ~: 19250) -LibParseCharIsMaskTest:testIsMaskReference(string,uint256,uint256) (runs: 256, μ: 8346, ~: 8396) \ No newline at end of file +LibParseCharIsMaskTest:testIsMaskPastEnd(uint256,uint256,uint256) (runs: 257, μ: 16977, ~: 19628) +LibParseCharIsMaskTest:testIsMaskReference(string,uint256,uint256) (runs: 257, μ: 8346, ~: 8396) +LibParseCharSkipMaskTest:testSkipMaskPastEnd(uint256,uint256,uint256) (runs: 257, μ: 17219, ~: 19710) +LibParseCharSkipMaskTest:testSkipMaskReference(string,uint256,uint256) (runs: 257, μ: 8587, ~: 8461) \ No newline at end of file diff --git a/README.md b/README.md index 9700ed9..bc25373 100644 --- a/README.md +++ b/README.md @@ -1 +1,61 @@ # rain.string + +Tools for working with strings that we've found useful to build Rainlang. + +More specialised and complex parsing logic exists in other Rainlang repos, but +this stuff is broadly applicable and low level enough to be gas efficient enough +to do what needs to be done. + +Generally parsing in rainlang works like a bloom filter on individual characters. +We read characters from memory one byte at a time then bit shift to compare it +against a bitmap mask that represents characters of interest. For example we +might need to know if a character is numeric `0-9` or alphanumeric `a-zA-Z0-9`, +and we cannot rely on regexes, in memory sets, or even loops, that might be +easily at hand for similar tasks in other languages. + +Luckily, EVM values are 32 bytes and so we can fit all posssible ASCII characters +in a single value as a bloom without any ambiguity. + +## Dev stuff + +### Local environment & CI + +Uses nixos. + +Install `nix develop` - https://nixos.org/download.html. + +Run `nix develop` in this repo to drop into the shell. Please ONLY use the nix +version of `foundry` for development, to ensure versions are all compatible. + +Read the `flake.nix` file to find some additional commands included for dev and +CI usage. + +## Legal stuff + +Everything is under DecentraLicense 1.0 (DCL-1.0) which can be found in `LICENSES/`. + +This is basically `CAL-1.0` which is an open source license +https://opensource.org/license/cal-1-0 + +The non-legal summary of DCL-1.0 is that the source is open, as expected, but +also user data in the systems that this code runs on must also be made available +to those users as relevant, and that private keys remain private. + +Roughly it's "not your keys, not your coins" aware, as close as we could get in +legalese. + +This is the default situation on permissionless blockchains, so shouldn't require +any additional effort by dev-users to adhere to the license terms. + +This repo is REUSE 3.2 compliant https://reuse.software/spec-3.2/ and compatible +with `reuse` tooling (also available in the nix shell here). + +``` +nix develop -c rainix-sol-legal +``` + +## Contributions + +Contributions are welcome **under the same license** as above. + +Contributors agree and warrant that their contributions are compliant. \ No newline at end of file diff --git a/src/lib/parse/LibParseChar.sol b/src/lib/parse/LibParseChar.sol index 38da72d..efdc698 100644 --- a/src/lib/parse/LibParseChar.sol +++ b/src/lib/parse/LibParseChar.sol @@ -4,7 +4,15 @@ pragma solidity ^0.8.25; library LibParseChar { /// Skip an unlimited number of chars until we find one that is not in the - /// mask. + /// mask. If the cursor is at or past the end, the result is the cursor. + /// This function DOES NOT check if the cursor is in range of the end as it + /// is expected to be used in very hot gas sensitive loops so we want to + /// avoid jumps. The function IS guaranteed never to move the cursor past + /// the end if it was not already there. + /// Otherwise, the result points to the first char that is not in the mask. + /// @param cursor The current position in the data. + /// @param end The end of the data. + /// @param mask The mask to check against. function skipMask(uint256 cursor, uint256 end, uint256 mask) internal pure returns (uint256) { assembly ("memory-safe") { //slither-disable-next-line incorrect-shift diff --git a/test/lib/parse/LibParseCharSlow.sol b/test/lib/parse/LibParseCharSlow.sol index e7b6e26..a4971ce 100644 --- a/test/lib/parse/LibParseCharSlow.sol +++ b/test/lib/parse/LibParseCharSlow.sol @@ -3,6 +3,20 @@ pragma solidity ^0.8.25; library LibParseCharSlow { + function skipMaskSlow(uint256 cursor, uint256 end, uint256 mask) internal pure returns (uint256) { + while (cursor < end) { + uint256 wordAtCursor; + assembly ("memory-safe") { + wordAtCursor := mload(cursor) + } + if ((1 << uint256(wordAtCursor >> 0xF8)) & mask == 0) { + break; + } + cursor += 1; + } + return cursor; + } + function isMaskSlow(uint256 cursor, uint256 end, uint256 mask) internal pure returns (uint256) { if (cursor < end) { uint256 wordAtCursor; diff --git a/test/src/lib/LibParseChar.skipMask.t.sol b/test/src/lib/LibParseChar.skipMask.t.sol new file mode 100644 index 0000000..8cd2696 --- /dev/null +++ b/test/src/lib/LibParseChar.skipMask.t.sol @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: LicenseRef-DCL-1.0 +// SPDX-FileCopyrightText: Copyright (c) 2020 thedavidmeister +pragma solidity =0.8.25; + +import {Test} from "forge-std/Test.sol"; + +import {LibParseChar} from "src/lib/parse/LibParseChar.sol"; +import {LibPointer, Pointer} from "rain.solmem/lib/LibPointer.sol"; +import {LibBytes} from "rain.solmem/lib/LibBytes.sol"; +import {LibParseCharSlow} from "test/lib/parse/LibParseCharSlow.sol"; + +/// @title LibParseCharSkipMaskTest +/// @notice Tests that the isMask function works correctly. +contract LibParseCharSkipMaskTest is Test { + using LibBytes for bytes; + + /// Test that cursor at or past end is always the end for skipMask. + function testSkipMaskPastEnd(uint256 cursor, uint256 end, uint256 mask) external pure { + // Limit to 16-bit values to avoid OOM reads. + end = bound(end, 0, type(uint16).max); + cursor = bound(cursor, end, type(uint16).max); + assertEq(LibParseChar.skipMask(cursor, end, mask), cursor); + } + + /// Test that skipMask matches a reference implementation. + function testSkipMaskReference(string memory s, uint256 index, uint256 mask) external pure { + vm.assume(bytes(s).length > 0); + index = bound(index, 0, bytes(s).length - 1); + + uint256 cursor = Pointer.unwrap(bytes(s).dataPointer()) + index; + uint256 end = Pointer.unwrap(bytes(s).endDataPointer()); + + assertEq(LibParseChar.skipMask(cursor, end, mask), LibParseCharSlow.skipMaskSlow(cursor, end, mask)); + } +}