Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[std] Add StringBuf.clear() #11848

Open
wants to merge 2 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/macro/eval/evalStdLib.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2429,6 +2429,12 @@ module StdStringBuf = struct
vnull
)

let clear = vifun0 (fun vthis ->
let this = this vthis in
VStringBuffer.clear this;
vnull
)

let get_length = vifun0 (fun vthis ->
let this = this vthis in
vint this.blength
Expand Down Expand Up @@ -3692,6 +3698,7 @@ let init_standard_library builtins =
"add",StdStringBuf.add;
"addChar",StdStringBuf.addChar;
"addSub",StdStringBuf.addSub;
"clear",StdStringBuf.clear;
"get_length",StdStringBuf.get_length;
"toString",StdStringBuf.toString;
];
Expand Down
4 changes: 4 additions & 0 deletions src/macro/eval/evalString.ml
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,10 @@ module VStringBuffer = struct
Buffer.add_substring this.bbuffer s.sstring b_pos b_len;
this.blength <- this.blength + c_len

let clear this =
Buffer.clear this.bbuffer;
this.blength <- 0

let contents this =
create_with_length (Buffer.contents this.bbuffer) this.blength
end
12 changes: 12 additions & 0 deletions std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,18 @@ class StringBuf {
b += (len == null ? s.substr(pos) : s.substr(pos, len));
}

/**
Visibly removes all characters from `this` StringBuf, making it possible to reuse it.

Implementation detail: On some targets, `clear`ing a StringBuf
MAY not reallocate the internal buffer, preserving its capacity.
This is done to avoid unnecessary allocations on later `add` operations,
but might be incorrectly perceived as a memory leak.
**/
public inline function clear():Void {
b = "";
}

/**
Returns the content of `this` StringBuf as String.

Expand Down
30 changes: 19 additions & 11 deletions std/cpp/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,34 @@
*/

import cpp.NativeString;

using cpp.NativeArray;
import cpp.Pointer;

@:coreApi
class StringBuf {
private var b:Array<String>;
private var b:Null<Array<String>> = null;

public var length(get, never):Int;

var charBuf:Array<cpp.Char>;
var charBuf:Null<Array<cpp.Char>> = null;

public function new():Void {}

private function charBufAsString():String {
var len = charBuf.length;
charBuf.push(0);
return NativeString.fromGcPointer(charBuf.address(0), len);
private function drainCharBuf():String {
final buffer = this.charBuf;
final length = buffer.length;
buffer.push(0);
final bufferPtr = Pointer.arrayElem(buffer, 0);
final bufferString = NativeString.fromGcPointer(bufferPtr, length);
this.charBuf = null;
return bufferString;
}

private function flush():Void {
final charBufAsString = drainCharBuf();
if (b == null)
b = [charBufAsString()];
b = [charBufAsString];
else
b.push(charBufAsString());
charBuf = null;
b.push(charBufAsString);
}

function get_length():Int {
Expand Down Expand Up @@ -89,6 +92,11 @@ class StringBuf {
}
}

public function clear():Void {
this.charBuf?.resize(0);
this.b?.resize(0);
}

public function toString():String {
if (charBuf != null)
flush();
Expand Down
1 change: 1 addition & 0 deletions std/eval/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@ extern class StringBuf {
function add<T>(x:T):Void;
function addChar(c:Int):Void;
function addSub(s:String, pos:Int, ?len:Int):Void;
function clear():Void;
function toString():String;
}
4 changes: 4 additions & 0 deletions std/hl/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@
throw "Invalid unicode char " + c;
}

public function clear():Void {
pos = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if the hl.Bytes instance should also be reinitialized for memory reasons. In theory you could have a huge StringBuf that never releases its memory after being cleared.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The StringBuf staying huge after being cleared is intentional! For example, if you built a huge string once:

hl.Gc.major();
Sys.println("before start: " + hl.Gc.stats());

var sb = new StringBuf();
for (i in 0...200000) {
	sb.add("hello");
}

hl.Gc.major();
Sys.println("after filling: " + hl.Gc.stats());
before start: {currentMemory : 589824, allocationCount : 137, totalAllocated : 7104}
after filling: {currentMemory : 3145728, allocationCount : 191, totalAllocated : 7706696}

You may also want to build another one in the future. In that case, you don't pay the cost of allocating buffers. Multiple, if dealing with small elements and the internals need to constantly reallocate and blit.

@:privateAccess sb.pos = 0; // clear()
for (i in 0...200000) {
	sb.add("world");
}

hl.Gc.major();
Sys.println("after reusing: " + hl.Gc.stats());
after reusing: {currentMemory : 3145728, allocationCount : 212, totalAllocated : 7707912}

If you worry however about hl.Bytes leaking, the semantics around it did not change - if Bytes are indeed GC-able, they should leak if and only if the StringBuf also leaks (user error 😅), the same as it currently functions. (Ditto for compacting the heap/releasing memory back to the OS - there's currently nothing stopping users from creating big, non-clearable StringBufs. If this was a concern, this would be a concern right now on stable.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what you're saying, it's just that this is not exactly the behavior I'd expect when clearing a buffer. From a design perspective, I expect existingBuf = new StringBuf() and existingBuf.clear() to have the same memory behavior. Introducing a subtle difference like this can easily lead to confusion.

}

public function toString():String {
if (pos + 2 > size)
__expand(0);
Expand Down
4 changes: 4 additions & 0 deletions std/jvm/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ class StringBuf {
b.appendCodePoint(c);
}

public function clear():Void {
b.setLength(0);
}

public function toString():String {
return b.toString();
}
Expand Down
39 changes: 29 additions & 10 deletions std/lua/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,25 @@
* DEALINGS IN THE SOFTWARE.
*/

import lua.Lua;
import lua.Table;

class StringBuf {
var b:Table<Int, String>;
private var b:Table<Int, String>;

/**
Count of "good" elements in the internal buffer table.

If `this` StringBuf has been `clear`ed previously,
this value might not be equal to the length (`#`) of that table.
**/
private var bufferLength:Int;

public var length(get, null):Int;

public inline function new() {
b = Table.create();
this.bufferLength = 0;
this.length = 0;
}

Expand All @@ -37,23 +47,32 @@ class StringBuf {
}

public inline function add<T>(x:T):Void {
var str = Std.string(x);
Table.insert(b, str);
length += str.length;
final str = Std.string(x);
final i = this.bufferLength += 1;
Lua.rawset(this.b, i, str);
this.length += str.length;
}

public inline function addChar(c:Int):Void {
Table.insert(b, String.fromCharCode(c));
length += 1;
final i = this.bufferLength += 1;
Lua.rawset(this.b, i, String.fromCharCode(c));
this.length += 1;
}

public inline function addSub(s:String, pos:Int, ?len:Int):Void {
var part = len == null ? s.substr(pos) : s.substr(pos, len);
Table.insert(b, part);
length += part.length;
this.add(s.substr(pos, len));
}

public inline function clear():Void {
this.bufferLength = 0;
this.length = 0;
}

public inline function toString():String {
return Table.concat(b);
final len = this.bufferLength;
if (len == 0) {
return "";
}
return Table.concat(this.b, "", 1, len);
}
}
5 changes: 5 additions & 0 deletions std/neko/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
__add_char(b, c);
}

public inline function clear():Void {
buffer_reset(b);
}

public inline function toString():String {
return new String(__to_string(b));
}
Expand All @@ -54,5 +58,6 @@
static var __add_char:Dynamic = neko.Lib.load("std", "buffer_add_char", 2);
static var __add_sub:Dynamic = neko.Lib.load("std", "buffer_add_sub", 4);
static var __to_string:Dynamic = neko.Lib.load("std", "buffer_string", 1);
static var buffer_reset:Dynamic = neko.Lib.load("std", "buffer_reset", 1);
static var __get_length:Dynamic = try neko.Lib.load("std", "buffer_get_length", 1) catch (e:Dynamic) null;
}
4 changes: 4 additions & 0 deletions std/php/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ import php.Syntax;
b += String.fromCharCode(c);
}

public inline function clear():Void {
b = "";
}

public inline function toString():String {
return b;
}
Expand Down
17 changes: 9 additions & 8 deletions std/python/_std/StringBuf.hx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
* DEALINGS IN THE SOFTWARE.
*/

import python.lib.io.IOBase.SeekSet;
import python.lib.io.StringIO;

@:coreApi
Expand All @@ -34,11 +33,7 @@ class StringBuf {
public var length(get, never):Int;

function get_length():Int {
var pos = b.tell();
b.seek(0, SeekEnd);
var len = b.tell();
b.seek(pos, SeekSet);
return len;
return b.tell();
}

public inline function add<T>(x:T):Void {
Expand All @@ -57,7 +52,13 @@ class StringBuf {
add1((len == null ? s.substr(pos) : s.substr(pos, len)));
}

public inline function toString():String {
return b.getvalue();
public inline function clear():Void {
b.seek(0, SeekSet);
}

public function toString():String {
final length = this.length;
b.seek(0, SeekSet);
return b.read(length);
}
}
42 changes: 41 additions & 1 deletion tests/unit/src/unitstd/StringBuf.unit.hx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// add, toString
var x = new StringBuf();
x.toString() == "";
x.length == 0;
x.add(null);
x.toString() == "null";

Expand Down Expand Up @@ -37,8 +38,47 @@ x.addSub("a👽b", 1, 1);
x.toString() == "👽";
#end

// StringBuf can store multiple elements
final x = new StringBuf();
x.add("ab");
x.add("cd");
x.addChar("e".code);
x.add("fg");
x.toString() == "abcdefg";

// Calling toString() does not empty the buffer
x.toString() == "abcdefg";
x.toString() == "abcdefg";
x.length == 7;

// identity
function identityTest(s:StringBuf) {
return s;
}
identityTest(x) == x;
identityTest(x) == x;

// Clearing a buffer resets its visible state
x.length > 0;
x.clear();
x.toString() == "";
x.length == 0;

// Previously cleared buffers do not leak past state
x.add("foo");
x.toString() == "foo";
x.length == 3;

// Buffers can be cleared multiple times
x.clear();
x.length == 0;
x.clear();
x.clear();
x.clear();
x.length == 0;

// Buffers can be cleared immediately after creation
// (ie. `clear` does not depend on any private state being non-null)
final x = new StringBuf();
x.clear();
x.toString() == "";
x.length == 0;
Loading