From 0903554ec50c51a10ff932611c02888d646048de Mon Sep 17 00:00:00 2001 From: Nick Krichevsky Date: Sun, 6 Oct 2024 12:46:57 -0400 Subject: [PATCH] Add trim_chars, trim_chars_left, and trim_chars_right --- src/gleam/string.gleam | 60 +++++++++++++++++++++++ src/gleam_stdlib.mjs | 92 ++++++++++++++++++++++++++++++++++-- test/gleam/string_test.gleam | 42 ++++++++++++++++ 3 files changed, 191 insertions(+), 3 deletions(-) diff --git a/src/gleam/string.gleam b/src/gleam/string.gleam index e530c8fa..200c4ab5 100644 --- a/src/gleam/string.gleam +++ b/src/gleam/string.gleam @@ -553,9 +553,35 @@ fn do_trim(string: String) -> String { erl_trim(string, Both) } +/// Like `trim`, but removes the specified chars on both sides of a `String` +/// +/// ## Examples +/// +/// ```gleam +/// trim_chars_left("..,hats,..", ".,") +/// // -> "hats" +/// ``` +pub fn trim_chars(string: String, charset: String) -> String { + do_trim_chars(string, charset) +} + +@external(javascript, "../gleam_stdlib.mjs", "trim_chars") +fn do_trim_chars(string: String, charset: String) -> String { + erl_trim_chars(string, Both, erl_to_graphemes(charset)) +} + @external(erlang, "string", "trim") fn erl_trim(a: String, b: Direction) -> String +@external(erlang, "string", "trim") +fn erl_trim_chars(a: String, b: Direction, c: ErlGraphemes) -> String + +@external(erlang, "string", "to_graphemes") +fn erl_to_graphemes(a: String) -> ErlGraphemes + +// erlang's string:to_graphemes returns char() | [char()], which cannot be directly represented +type ErlGraphemes + type Direction { Leading Trailing @@ -598,6 +624,40 @@ fn do_trim_right(string: String) -> String { erl_trim(string, Trailing) } +/// Like `trim_left`, but removes the specified chars on the left of a `String` +/// +/// ## Examples +/// +/// ```gleam +/// trim_chars_left("..,hats,..", ".,") +/// // -> "hats,.." +/// ``` +pub fn trim_chars_left(string: String, charset: String) -> String { + do_trim_chars_left(string, charset) +} + +@external(javascript, "../gleam_stdlib.mjs", "trim_chars_left") +fn do_trim_chars_left(string: String, charset: String) -> String { + erl_trim_chars(string, Leading, erl_to_graphemes(charset)) +} + +/// Like `trim_right`, but removes the specified chars on the right of a `String` +/// +/// ## Examples +/// +/// ```gleam +/// trim_chars_right("..,hats,..", ".,") +/// // -> "..,hats" +/// ``` +pub fn trim_chars_right(string: String, charset: String) -> String { + do_trim_chars_right(string, charset) +} + +@external(javascript, "../gleam_stdlib.mjs", "trim_chars_right") +fn do_trim_chars_right(string: String, charset: String) -> String { + erl_trim_chars(string, Trailing, erl_to_graphemes(charset)) +} + /// Splits a non-empty `String` into its first element (head) and rest (tail). /// This lets you pattern match on `String`s exactly as you would with lists. /// diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 50ebb46f..cbeaf843 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -21,6 +21,9 @@ import Dict from "./dict.mjs"; const Nil = undefined; const NOT_FOUND = {}; +// See license note in escape_regexp_chars +const reRegExpChar = /[\\^$.*+?()[\]{}|]/g; +const reHasRegExpChar = RegExp(reRegExpChar.source); export function identity(x) { return x; @@ -259,8 +262,8 @@ const unicode_whitespaces = [ "\u2029", // Paragraph separator ].join(""); -const left_trim_regex = new RegExp(`^([${unicode_whitespaces}]*)`, "g"); -const right_trim_regex = new RegExp(`([${unicode_whitespaces}]*)$`, "g"); +const left_trim_regex = new_left_trim_regexp(unicode_whitespaces); +const right_trim_regex = new_right_trim_regexp(unicode_whitespaces); export function trim(string) { return trim_left(trim_right(string)); @@ -274,6 +277,23 @@ export function trim_right(string) { return string.replace(right_trim_regex, ""); } +export function trim_chars(string, charset) { + const trimmed_right = trim_chars_right(string, charset); + return trim_chars_left(trimmed_right, charset); +} + +export function trim_chars_left(string, charset) { + const trim_regexp = new_left_trim_regexp(charset); + + return string.replace(trim_regexp, "") +} + +export function trim_chars_right(string, charset) { + const trim_regexp = new_right_trim_regexp(charset); + + return string.replace(trim_regexp, "") +} + export function bit_array_from_string(string) { return toBitArray([stringBits(string)]); } @@ -296,7 +316,7 @@ export function crash(message) { export function bit_array_to_string(bit_array) { try { - const decoder = new TextDecoder("utf-8", { fatal: true }); + const decoder = new TextDecoder("utf-8", { fatarl: true }); return new Ok(decoder.decode(bit_array.buffer)); } catch { return new Error(Nil); @@ -953,3 +973,69 @@ export function bit_array_compare(first, second) { } return new Lt(); // second has more items } + +function new_left_trim_regexp(charset) { + return new RegExp(`^([${charset}]*)`, "g"); +} + +function new_right_trim_regexp(charset) { + const escaped_charset = escape_regexp_chars(charset); + return new RegExp(`([${escaped_charset}]*)$`, "g"); +} + +function escape_regexp_chars(string) { + /* + * The MIT License + + * Copyright JS Foundation and other contributors + * + * Based on Underscore.js, copyright Jeremy Ashkenas, + * DocumentCloud and Investigative Reporters & Editors + * + * This software consists of voluntary contributions made by many + * individuals. For exact contribution history, see the revision history + * available at https://github.com/lodash/lodash + * + * The following license applies to all parts of this software except as + * documented below: + * + * ==== + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ==== + * + * Copyright and related rights for sample code are waived via CC0. Sample + * code is defined as all source code displayed within the prose of the + * documentation. + * + * CC0: http://creativecommons.org/publicdomain/zero/1.0/ + * + * ==== + * + * Files located in the node_modules and vendor directories are externally + * maintained libraries used by this software which have their own + * licenses; we recommend you read them, as their terms may differ from the + * terms above. + */ + return string && reHasRegExpChar.test(string) + ? string.replace(reRegExpChar, '\\$&') + : string || ''; +} diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index 6d3031e4..a0ad4036 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -176,6 +176,18 @@ pub fn trim_right_test() { |> should.equal(" hats") } +pub fn trim_chars_left_test() { + ",..hats..," + |> string.trim_chars_left(",.") + |> should.equal("hats..,") +} + +pub fn trim_chars_right_test() { + ",..hats..," + |> string.trim_chars_right(",.") + |> should.equal(",..hats") +} + // unicode whitespaces pub fn trim_horizontal_tab_test() { "hats\u{0009}" @@ -364,6 +376,36 @@ pub fn trim_comma_test() { |> should.equal("hats,") } +pub fn trim_chars_test() { + ",,hats," + |> string.trim_chars(",") + |> should.equal("hats") +} + +pub fn trim_chars_commas_and_periods_test() { + ",,hats,..." + |> string.trim_chars(",.") + |> should.equal("hats") +} + +pub fn trim_chars_keeps_whitespace_not_in_charset_test() { + ",,hats ,..." + |> string.trim_chars(",.") + |> should.equal("hats ") +} + +pub fn trim_chars_does_not_trim_from_middle_of_string_test() { + ",,hats,hats,hats,..." + |> string.trim_chars(",.") + |> should.equal("hats,hats,hats") +} + +pub fn trim_chars_trims_complex_graphemes_test() { + "hatsπŸ‘πŸ‘πŸ‘πŸ‘" + |> string.trim_chars("πŸ‘") + |> should.equal("hats") +} + pub fn starts_with_test() { "theory" |> string.starts_with("")