From b8a30cb2b8f035f549206f5a1849c5d301b3be4c Mon Sep 17 00:00:00 2001 From: Frixuu Date: Sun, 1 Dec 2024 02:28:53 +0100 Subject: [PATCH 1/2] [std] Add StringBuf.clear --- src/macro/eval/evalStdLib.ml | 7 ++++ src/macro/eval/evalString.ml | 4 +++ std/StringBuf.hx | 9 +++++ std/cpp/_std/StringBuf.hx | 30 ++++++++++------- std/eval/_std/StringBuf.hx | 1 + std/hl/_std/StringBuf.hx | 4 +++ std/jvm/_std/StringBuf.hx | 4 +++ std/lua/_std/StringBuf.hx | 39 ++++++++++++++++------ std/neko/_std/StringBuf.hx | 5 +++ std/php/_std/StringBuf.hx | 4 +++ std/python/_std/StringBuf.hx | 17 +++++----- tests/unit/src/unitstd/StringBuf.unit.hx | 42 +++++++++++++++++++++++- 12 files changed, 136 insertions(+), 30 deletions(-) diff --git a/src/macro/eval/evalStdLib.ml b/src/macro/eval/evalStdLib.ml index 9af617ed42b..68153d526e5 100644 --- a/src/macro/eval/evalStdLib.ml +++ b/src/macro/eval/evalStdLib.ml @@ -2429,6 +2429,12 @@ module StdStringBuf = struct vnull ) + let clear = vifun0 (fun vthis -> + let this = this vthis in + VStringBuffer.clear this; + vnull + ) + let get_length = vifun0 (fun vthis -> let this = this vthis in vint this.blength @@ -3692,6 +3698,7 @@ let init_standard_library builtins = "add",StdStringBuf.add; "addChar",StdStringBuf.addChar; "addSub",StdStringBuf.addSub; + "clear",StdStringBuf.clear; "get_length",StdStringBuf.get_length; "toString",StdStringBuf.toString; ]; diff --git a/src/macro/eval/evalString.ml b/src/macro/eval/evalString.ml index 047a6e4d5a8..7e0245b35ae 100644 --- a/src/macro/eval/evalString.ml +++ b/src/macro/eval/evalString.ml @@ -284,6 +284,10 @@ module VStringBuffer = struct Buffer.add_substring this.bbuffer s.sstring b_pos b_len; this.blength <- this.blength + c_len + let clear this = + Buffer.clear this.bbuffer; + this.blength <- 0 + let contents this = create_with_length (Buffer.contents this.bbuffer) this.blength end \ No newline at end of file diff --git a/std/StringBuf.hx b/std/StringBuf.hx index 72a9aefa92a..6cbc140f533 100644 --- a/std/StringBuf.hx +++ b/std/StringBuf.hx @@ -88,6 +88,15 @@ class StringBuf { b += (len == null ? s.substr(pos) : s.substr(pos, len)); } + /** + Removes all characters from `this` StringBuf, making it possible to reuse it. + + It will always allocate less memory than constructing a new `StringBuf` instance. + **/ + public inline function clear():Void { + b = ""; + } + /** Returns the content of `this` StringBuf as String. diff --git a/std/cpp/_std/StringBuf.hx b/std/cpp/_std/StringBuf.hx index aca39e22dbe..b9980b701b3 100644 --- a/std/cpp/_std/StringBuf.hx +++ b/std/cpp/_std/StringBuf.hx @@ -21,31 +21,34 @@ */ import cpp.NativeString; - -using cpp.NativeArray; +import cpp.Pointer; @:coreApi class StringBuf { - private var b:Array; + private var b:Null> = null; public var length(get, never):Int; - var charBuf:Array; + var charBuf:Null> = null; public function new():Void {} - private function charBufAsString():String { - var len = charBuf.length; - charBuf.push(0); - return NativeString.fromGcPointer(charBuf.address(0), len); + private function drainCharBuf():String { + final buffer = this.charBuf; + final length = buffer.length; + buffer.push(0); + final bufferPtr = Pointer.arrayElem(buffer, 0); + final bufferString = NativeString.fromGcPointer(bufferPtr, length); + this.charBuf = null; + return bufferString; } private function flush():Void { + final charBufAsString = drainCharBuf(); if (b == null) - b = [charBufAsString()]; + b = [charBufAsString]; else - b.push(charBufAsString()); - charBuf = null; + b.push(charBufAsString); } function get_length():Int { @@ -89,6 +92,11 @@ class StringBuf { } } + public function clear():Void { + this.charBuf?.resize(0); + this.b?.resize(0); + } + public function toString():String { if (charBuf != null) flush(); diff --git a/std/eval/_std/StringBuf.hx b/std/eval/_std/StringBuf.hx index 4d4fdf23dd3..da18e883a45 100644 --- a/std/eval/_std/StringBuf.hx +++ b/std/eval/_std/StringBuf.hx @@ -27,5 +27,6 @@ extern class StringBuf { function add(x:T):Void; function addChar(c:Int):Void; function addSub(s:String, pos:Int, ?len:Int):Void; + function clear():Void; function toString():String; } diff --git a/std/hl/_std/StringBuf.hx b/std/hl/_std/StringBuf.hx index ab5dcdb0450..a4b43e57cff 100644 --- a/std/hl/_std/StringBuf.hx +++ b/std/hl/_std/StringBuf.hx @@ -101,6 +101,10 @@ throw "Invalid unicode char " + c; } + public function clear():Void { + pos = 0; + } + public function toString():String { if (pos + 2 > size) __expand(0); diff --git a/std/jvm/_std/StringBuf.hx b/std/jvm/_std/StringBuf.hx index b19c117fe96..74ebc0853b3 100644 --- a/std/jvm/_std/StringBuf.hx +++ b/std/jvm/_std/StringBuf.hx @@ -99,6 +99,10 @@ class StringBuf { b.appendCodePoint(c); } + public function clear():Void { + b.setLength(0); + } + public function toString():String { return b.toString(); } diff --git a/std/lua/_std/StringBuf.hx b/std/lua/_std/StringBuf.hx index de33387acbb..2dcf15b5ab1 100644 --- a/std/lua/_std/StringBuf.hx +++ b/std/lua/_std/StringBuf.hx @@ -20,15 +20,25 @@ * DEALINGS IN THE SOFTWARE. */ +import lua.Lua; import lua.Table; class StringBuf { - var b:Table; + private var b:Table; + + /** + Count of "good" elements in the internal buffer table. + + If `this` StringBuf has been `clear`ed previously, + this value might not be equal to the length (`#`) of that table. + **/ + private var bufferLength:Int; public var length(get, null):Int; public inline function new() { b = Table.create(); + this.bufferLength = 0; this.length = 0; } @@ -37,23 +47,32 @@ class StringBuf { } public inline function add(x:T):Void { - var str = Std.string(x); - Table.insert(b, str); - length += str.length; + final str = Std.string(x); + final i = this.bufferLength += 1; + Lua.rawset(this.b, i, str); + this.length += str.length; } public inline function addChar(c:Int):Void { - Table.insert(b, String.fromCharCode(c)); - length += 1; + final i = this.bufferLength += 1; + Lua.rawset(this.b, i, String.fromCharCode(c)); + this.length += 1; } public inline function addSub(s:String, pos:Int, ?len:Int):Void { - var part = len == null ? s.substr(pos) : s.substr(pos, len); - Table.insert(b, part); - length += part.length; + this.add(s.substr(pos, len)); + } + + public inline function clear():Void { + this.bufferLength = 0; + this.length = 0; } public inline function toString():String { - return Table.concat(b); + final len = this.bufferLength; + if (len == 0) { + return ""; + } + return Table.concat(this.b, "", 1, len); } } diff --git a/std/neko/_std/StringBuf.hx b/std/neko/_std/StringBuf.hx index 32a9e29ec53..42f849b71f7 100644 --- a/std/neko/_std/StringBuf.hx +++ b/std/neko/_std/StringBuf.hx @@ -45,6 +45,10 @@ __add_char(b, c); } + public inline function clear():Void { + buffer_reset(b); + } + public inline function toString():String { return new String(__to_string(b)); } @@ -54,5 +58,6 @@ static var __add_char:Dynamic = neko.Lib.load("std", "buffer_add_char", 2); static var __add_sub:Dynamic = neko.Lib.load("std", "buffer_add_sub", 4); static var __to_string:Dynamic = neko.Lib.load("std", "buffer_string", 1); + static var buffer_reset:Dynamic = neko.Lib.load("std", "buffer_reset", 1); static var __get_length:Dynamic = try neko.Lib.load("std", "buffer_get_length", 1) catch (e:Dynamic) null; } diff --git a/std/php/_std/StringBuf.hx b/std/php/_std/StringBuf.hx index 12014111715..4f6d0159500 100644 --- a/std/php/_std/StringBuf.hx +++ b/std/php/_std/StringBuf.hx @@ -56,6 +56,10 @@ import php.Syntax; b += String.fromCharCode(c); } + public inline function clear():Void { + b = ""; + } + public inline function toString():String { return b; } diff --git a/std/python/_std/StringBuf.hx b/std/python/_std/StringBuf.hx index e225d2a8a74..2a9ba9d5a6c 100644 --- a/std/python/_std/StringBuf.hx +++ b/std/python/_std/StringBuf.hx @@ -20,7 +20,6 @@ * DEALINGS IN THE SOFTWARE. */ -import python.lib.io.IOBase.SeekSet; import python.lib.io.StringIO; @:coreApi @@ -34,11 +33,7 @@ class StringBuf { public var length(get, never):Int; function get_length():Int { - var pos = b.tell(); - b.seek(0, SeekEnd); - var len = b.tell(); - b.seek(pos, SeekSet); - return len; + return b.tell(); } public inline function add(x:T):Void { @@ -57,7 +52,13 @@ class StringBuf { add1((len == null ? s.substr(pos) : s.substr(pos, len))); } - public inline function toString():String { - return b.getvalue(); + public inline function clear():Void { + b.seek(0, SeekSet); + } + + public function toString():String { + final length = this.length; + b.seek(0, SeekSet); + return b.read(length); } } diff --git a/tests/unit/src/unitstd/StringBuf.unit.hx b/tests/unit/src/unitstd/StringBuf.unit.hx index 2b1829f1117..d83bd0a1b02 100644 --- a/tests/unit/src/unitstd/StringBuf.unit.hx +++ b/tests/unit/src/unitstd/StringBuf.unit.hx @@ -1,6 +1,7 @@ // add, toString var x = new StringBuf(); x.toString() == ""; +x.length == 0; x.add(null); x.toString() == "null"; @@ -37,8 +38,47 @@ x.addSub("a👽b", 1, 1); x.toString() == "👽"; #end +// StringBuf can store multiple elements +final x = new StringBuf(); +x.add("ab"); +x.add("cd"); +x.addChar("e".code); +x.add("fg"); +x.toString() == "abcdefg"; + +// Calling toString() does not empty the buffer +x.toString() == "abcdefg"; +x.toString() == "abcdefg"; +x.length == 7; + // identity function identityTest(s:StringBuf) { return s; } -identityTest(x) == x; \ No newline at end of file +identityTest(x) == x; + +// Clearing a buffer resets its visible state +x.length > 0; +x.clear(); +x.toString() == ""; +x.length == 0; + +// Previously cleared buffers do not leak past state +x.add("foo"); +x.toString() == "foo"; +x.length == 3; + +// Buffers can be cleared multiple times +x.clear(); +x.length == 0; +x.clear(); +x.clear(); +x.clear(); +x.length == 0; + +// Buffers can be cleared immediately after creation +// (ie. `clear` does not depend on any private state being non-null) +final x = new StringBuf(); +x.clear(); +x.toString() == ""; +x.length == 0; From b658699fac944d78d7bb13b16d24c0d8b922cfd0 Mon Sep 17 00:00:00 2001 From: Frixuu Date: Wed, 4 Dec 2024 10:04:08 +0100 Subject: [PATCH 2/2] [std] Clarify StringBuf.clear documentation --- std/StringBuf.hx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/std/StringBuf.hx b/std/StringBuf.hx index 6cbc140f533..52f69bcbf12 100644 --- a/std/StringBuf.hx +++ b/std/StringBuf.hx @@ -89,9 +89,12 @@ class StringBuf { } /** - Removes all characters from `this` StringBuf, making it possible to reuse it. + Visibly removes all characters from `this` StringBuf, making it possible to reuse it. - It will always allocate less memory than constructing a new `StringBuf` instance. + Implementation detail: On some targets, `clear`ing a StringBuf + MAY not reallocate the internal buffer, preserving its capacity. + This is done to avoid unnecessary allocations on later `add` operations, + but might be incorrectly perceived as a memory leak. **/ public inline function clear():Void { b = "";