From 38abfd54f6d7573fc6b1f9cd985b55eca091997d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Wed, 25 Sep 2024 21:37:10 +0200 Subject: [PATCH 1/3] Move boxing/unboxing outside of some runtime primitives This makes it visible to binaryen, which than is able to eliminate so unncessary boxing. --- compiler/lib/wasm/wa_generate.ml | 103 ++++++++++++++++++++++++--- runtime/wasm/bigarray.wat | 108 ++++++++++++++-------------- runtime/wasm/float.wat | 60 ++++++---------- runtime/wasm/int32.wat | 42 +++-------- runtime/wasm/int64.wat | 32 ++++----- runtime/wasm/prng.wat | 6 +- runtime/wasm/string.wat | 116 ++++++++++++++----------------- 7 files changed, 240 insertions(+), 227 deletions(-) diff --git a/compiler/lib/wasm/wa_generate.ml b/compiler/lib/wasm/wa_generate.ml index 6b3c0d807..be77eb246 100644 --- a/compiler/lib/wasm/wa_generate.ml +++ b/compiler/lib/wasm/wa_generate.ml @@ -38,6 +38,71 @@ module Generate (Target : Wa_target_sig.S) = struct ; debug : Parse_bytecode.Debug.t } + type repr = + | Value + | Float + | Int32 + | Nativeint + | Int64 + + let repr_type r = + match r with + | Value -> Value.value + | Float -> F64 + | Int32 -> I32 + | Nativeint -> I32 + | Int64 -> I64 + + let specialized_func_type (params, result) = + { W.params = List.map ~f:repr_type params; result = [ repr_type result ] } + + let box_value r e = + match r with + | Value -> e + | Float -> Memory.box_float e + | Int32 -> Memory.box_int32 e + | Nativeint -> Memory.box_nativeint e + | Int64 -> Memory.box_int64 e + + let unbox_value r e = + match r with + | Value -> e + | Float -> Memory.unbox_float e + | Int32 -> Memory.unbox_int32 e + | Nativeint -> Memory.unbox_nativeint e + | Int64 -> Memory.unbox_int64 e + + let specialized_primitives = + let h = Hashtbl.create 18 in + List.iter + ~f:(fun (nm, typ) -> Hashtbl.add h nm typ) + [ "caml_int32_bswap", ([ Int32 ], Int32) + ; "caml_nativeint_bswap", ([ Nativeint ], Nativeint) + ; "caml_int64_bswap", ([ Int64 ], Int64) + ; "caml_int32_compare", ([ Int32; Int32 ], Value) + ; "caml_nativeint_compare", ([ Nativeint; Nativeint ], Value) + ; "caml_int64_compare", ([ Int64; Int64 ], Value) + ; "caml_string_get32", ([ Value; Value ], Int32) + ; "caml_string_get64", ([ Value; Value ], Int64) + ; "caml_bytes_get32", ([ Value; Value ], Int32) + ; "caml_bytes_get64", ([ Value; Value ], Int64) + ; "caml_bytes_set32", ([ Value; Value; Int32 ], Value) + ; "caml_bytes_set64", ([ Value; Value; Int64 ], Value) + ; "caml_lxm_next", ([ Value ], Int64) + ; "caml_ba_uint8_get32", ([ Value; Value ], Int32) + ; "caml_ba_uint8_get64", ([ Value; Value ], Int64) + ; "caml_ba_uint8_set32", ([ Value; Value; Int32 ], Value) + ; "caml_ba_uint8_set64", ([ Value; Value; Int64 ], Value) + ; "caml_nextafter_float", ([ Float; Float ], Float) + ; "caml_classify_float", ([ Float ], Value) + ; "caml_ldexp_float", ([ Float; Value ], Float) + ; "caml_signbit_float", ([ Float ], Value) + ; "caml_erf_float", ([ Float ], Float) + ; "caml_erfc_float", ([ Float ], Float) + ; "caml_float_compare", ([ Float; Float ], Value) + ]; + h + let func_type n = { W.params = List.init ~len:n ~f:(fun _ -> Value.value); result = [ Value.value ] } @@ -386,6 +451,10 @@ module Generate (Target : Wa_target_sig.S) = struct int32_shift_op (Shr U) i j | Extern "caml_int32_to_int", [ i ] -> Value.val_int (Memory.unbox_int32 i) | Extern "caml_int32_of_int", [ i ] -> Memory.box_int32 (Value.int_val i) + | Extern "caml_nativeint_of_int32", [ i ] -> + Memory.box_nativeint (Memory.unbox_int32 i) + | Extern "caml_nativeint_to_int32", [ i ] -> + Memory.box_int32 (Memory.unbox_nativeint i) | Extern "caml_int64_bits_of_float", [ f ] -> let* f = Memory.unbox_float f in Memory.box_int64 (return (W.UnOp (I64 ReinterpretF, f))) @@ -565,18 +634,30 @@ module Generate (Target : Wa_target_sig.S) = struct ~init:(return []) in Memory.allocate ~tag:0 l - | Extern name, l -> + | Extern name, l -> ( let name = Primitive.resolve name in - (*ZZZ Different calling convention when large number of parameters *) - let* f = register_import ~name (Fun (func_type (List.length l))) in - let rec loop acc l = - match l with - | [] -> return (W.Call (f, List.rev acc)) - | x :: r -> - let* x = x in - loop (x :: acc) r - in - loop [] l + try + let typ = Hashtbl.find specialized_primitives name in + let* f = register_import ~name (Fun (specialized_func_type typ)) in + let rec loop acc arg_typ l = + match arg_typ, l with + | [], [] -> box_value (snd typ) (return (W.Call (f, List.rev acc))) + | repr :: rem, x :: r -> + let* x = unbox_value repr x in + loop (x :: acc) rem r + | [], _ :: _ | _ :: _, [] -> assert false + in + loop [] (fst typ) l + with Not_found -> + let* f = register_import ~name (Fun (func_type (List.length l))) in + let rec loop acc l = + match l with + | [] -> return (W.Call (f, List.rev acc)) + | x :: r -> + let* x = x in + loop (x :: acc) r + in + loop [] l) | Not, [ x ] -> Value.not x | Lt, [ x; y ] -> Value.lt x y | Le, [ x; y ] -> Value.le x y diff --git a/runtime/wasm/bigarray.wat b/runtime/wasm/bigarray.wat index b049c59af..3e8d79943 100644 --- a/runtime/wasm/bigarray.wat +++ b/runtime/wasm/bigarray.wat @@ -1919,7 +1919,7 @@ (i32.const 8))))) (func (export "caml_ba_uint8_get32") - (param $vba (ref eq)) (param $i (ref eq)) (result (ref eq)) + (param $vba (ref eq)) (param $i (ref eq)) (result i32) (local $ba (ref $bigarray)) (local $data (ref extern)) (local $p i32) @@ -1933,23 +1933,22 @@ (struct.get $bigarray $ba_dim (local.get $ba)) (i32.const 0))) (then (call $caml_bound_error))) - (return_call $caml_copy_int32 + (i32.or + (i32.or + (call $ta_get_ui8 (local.get $data) (local.get $p)) + (i32.shl (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 1))) + (i32.const 8))) (i32.or - (i32.or - (call $ta_get_ui8 (local.get $data) (local.get $p)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1))) - (i32.const 8))) - (i32.or - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 2))) - (i32.const 16)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 3))) - (i32.const 24)))))) + (i32.shl (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 2))) + (i32.const 16)) + (i32.shl (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 3))) + (i32.const 24))))) (func (export "caml_ba_uint8_get64") - (param $vba (ref eq)) (param $i (ref eq)) (result (ref eq)) + (param $vba (ref eq)) (param $i (ref eq)) (result i64) (local $ba (ref $bigarray)) (local $data (ref extern)) (local $p i32) @@ -1963,44 +1962,43 @@ (struct.get $bigarray $ba_dim (local.get $ba)) (i32.const 0))) (then (call $caml_bound_error))) - (return_call $caml_copy_int64 + (i64.or + (i64.or + (i64.or + (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) (local.get $p))) + (i64.shl (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 1)))) + (i64.const 8))) + (i64.or + (i64.shl (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 2)))) + (i64.const 16)) + (i64.shl (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 3)))) + (i64.const 24)))) (i64.or (i64.or - (i64.or - (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) (local.get $p))) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1)))) - (i64.const 8))) - (i64.or - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 2)))) - (i64.const 16)) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 3)))) - (i64.const 24)))) + (i64.shl (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 4)))) + (i64.const 32)) + (i64.shl (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 5)))) + (i64.const 40))) (i64.or - (i64.or - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 4)))) - (i64.const 32)) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 5)))) - (i64.const 40))) - (i64.or - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 6)))) - (i64.const 48)) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 7)))) - (i64.const 56))))))) + (i64.shl (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 6)))) + (i64.const 48)) + (i64.shl (i64.extend_i32_u + (call $ta_get_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 7)))) + (i64.const 56)))))) (func (export "caml_ba_uint8_set16") (param $vba (ref eq)) (param $i (ref eq)) (param $v (ref eq)) @@ -2026,15 +2024,14 @@ (ref.i31 (i32.const 0))) (func (export "caml_ba_uint8_set32") - (param $vba (ref eq)) (param $i (ref eq)) (param $v (ref eq)) + (param $vba (ref eq)) (param $i (ref eq)) (param $d i32) (result (ref eq)) (local $ba (ref $bigarray)) (local $data (ref extern)) - (local $p i32) (local $d i32) + (local $p i32) (local.set $ba (ref.cast (ref $bigarray) (local.get $vba))) (local.set $data (struct.get $bigarray $ba_data (local.get $ba))) (local.set $p (i31.get_s (ref.cast (ref i31) (local.get $i)))) - (local.set $d (call $Int32_val (local.get $v))) (if (i32.lt_s (local.get $p) (i32.const 0)) (then (call $caml_bound_error))) (if (i32.ge_u (i32.add (local.get $p) (i32.const 3)) @@ -2056,15 +2053,14 @@ (ref.i31 (i32.const 0))) (func (export "caml_ba_uint8_set64") - (param $vba (ref eq)) (param $i (ref eq)) (param $v (ref eq)) + (param $vba (ref eq)) (param $i (ref eq)) (param $d i64) (result (ref eq)) (local $ba (ref $bigarray)) (local $data (ref extern)) - (local $p i32) (local $d i64) + (local $p i32) (local.set $ba (ref.cast (ref $bigarray) (local.get $vba))) (local.set $data (struct.get $bigarray $ba_data (local.get $ba))) (local.set $p (i31.get_s (ref.cast (ref i31) (local.get $i)))) - (local.set $d (call $Int64_val (local.get $v))) (if (i32.lt_s (local.get $p) (i32.const 0)) (then (call $caml_bound_error))) (if (i32.ge_u (i32.add (local.get $p) (i32.const 7)) diff --git a/runtime/wasm/float.wat b/runtime/wasm/float.wat index 4705b04c9..95a3759ae 100644 --- a/runtime/wasm/float.wat +++ b/runtime/wasm/float.wat @@ -661,19 +661,17 @@ (return (ref.i31 (i32.const 0)))) (func (export "caml_nextafter_float") - (param (ref eq)) (param (ref eq)) (result (ref eq)) - (local $x f64) (local $y f64) (local $i i64) (local $j i64) - (local.set $x (struct.get $float 0 (ref.cast (ref $float) (local.get 0)))) - (local.set $y (struct.get $float 0 (ref.cast (ref $float) (local.get 1)))) - (if (f64.ne (local.get $x) (local.get $x)) (then (return (local.get 0)))) - (if (f64.ne (local.get $y) (local.get $y)) (then (return (local.get 1)))) + (param $x f64) (param $y f64) (result f64) + (local $i i64) (local $j i64) + (if (f64.ne (local.get $x) (local.get $x)) (then (return (local.get $x)))) + (if (f64.ne (local.get $y) (local.get $y)) (then (return (local.get $y)))) (if (f64.eq (local.get $x) (local.get $y)) - (then (return (local.get 1)))) - (if (result (ref eq)) (f64.eq (local.get $x) (f64.const 0)) + (then (return (local.get $y)))) + (if (f64.eq (local.get $x) (f64.const 0)) (then (if (f64.ge (local.get $y) (f64.const 0)) - (then (return (struct.new $float (f64.const 0x1p-1074)))) - (else (return (struct.new $float (f64.const -0x1p-1074)))))) + (then (return (f64.const 0x1p-1074))) + (else (return (f64.const -0x1p-1074))))) (else (local.set $i (i64.reinterpret_f64 (local.get $x))) (local.set $j (i64.reinterpret_f64 (local.get $y))) @@ -681,12 +679,11 @@ (i64.lt_u (local.get $i) (local.get $j))) (then (local.set $i (i64.add (local.get $i) (i64.const 1)))) (else (local.set $i (i64.sub (local.get $i) (i64.const 1))))) - (return (struct.new $float (f64.reinterpret_i64 (local.get $i))))))) + (return (f64.reinterpret_i64 (local.get $i)))))) - (func (export "caml_classify_float") (param (ref eq)) (result (ref eq)) + (func (export "caml_classify_float") (param $x f64) (result (ref eq)) (local $a f64) - (local.set $a - (f64.abs (struct.get $float 0 (ref.cast (ref $float) (local.get 0))))) + (local.set $a (f64.abs (local.get $x))) (ref.i31 (if (result i32) (f64.ge (local.get $a) (f64.const 0x1p-1022)) (then @@ -753,11 +750,10 @@ (i64.const 52))))) (func (export "caml_ldexp_float") - (param (ref eq)) (param (ref eq)) (result (ref eq)) - (struct.new $float - (call $ldexp - (struct.get $float 0 (ref.cast (ref $float) (local.get 0))) - (i31.get_s (ref.cast (ref i31) (local.get 1)))))) + (param $x f64) (param $i (ref eq)) (result f64) + (call $ldexp + (local.get $x) + (i31.get_s (ref.cast (ref i31) (local.get $i))))) (func $frexp (param $x f64) (result f64 i32) (local $y i64) @@ -799,15 +795,12 @@ (struct.new $float (tuple.extract 2 0 (local.get $r))) (ref.i31 (tuple.extract 2 1 (local.get $r))))) - (func (export "caml_signbit_float") (param (ref eq)) (result (ref eq)) + (func (export "caml_signbit_float") (param $x f64) (result (ref eq)) (ref.i31 (i32.wrap_i64 - (i64.shr_u - (i64.reinterpret_f64 - (struct.get $float 0 (ref.cast (ref $float) (local.get 0)))) - (i64.const 63))))) + (i64.shr_u (i64.reinterpret_f64 (local.get $x)) (i64.const 63))))) - (func $erf (param $x f64) (result f64) + (func $erf (export "caml_erf_float") (param $x f64) (result f64) (local $a1 f64) (local $a2 f64) (local $a3 f64) (local $a4 f64) (local $a5 f64) (local $p f64) (local $t f64) (local $y f64) @@ -844,16 +837,8 @@ (f64.neg (f64.mul (local.get $x) (local.get $x)))))))) (f64.copysign (local.get $y) (local.get $x))) - (func (export "caml_erf_float") (param (ref eq)) (result (ref eq)) - (struct.new $float - (call $erf - (struct.get $float 0 (ref.cast (ref $float) (local.get 0)))))) - - (func (export "caml_erfc_float") (param (ref eq)) (result (ref eq)) - (struct.new $float - (f64.sub (f64.const 1) - (call $erf - (struct.get $float 0 (ref.cast (ref $float) (local.get 0))))))) + (func (export "caml_erfc_float") (param $x f64) (result f64) + (f64.sub (f64.const 1) (call $erf (local.get $x)))) (func (export "caml_fma_float") (param $vx (ref eq)) (param $vy (ref eq)) (param $vz (ref eq)) @@ -1154,10 +1139,7 @@ (struct.new $float (local.get $y))) (func (export "caml_float_compare") - (param (ref eq)) (param (ref eq)) (result (ref eq)) - (local $x f64) (local $y f64) - (local.set $x (struct.get $float 0 (ref.cast (ref $float) (local.get 0)))) - (local.set $y (struct.get $float 0 (ref.cast (ref $float) (local.get 1)))) + (param $x f64) (param $y f64) (result (ref eq)) (ref.i31 (i32.add (i32.sub (f64.gt (local.get $x) (local.get $y)) diff --git a/runtime/wasm/int32.wat b/runtime/wasm/int32.wat index 6a56cbb31..79ce5095d 100644 --- a/runtime/wasm/int32.wat +++ b/runtime/wasm/int32.wat @@ -108,15 +108,14 @@ (func $Int32_val (export "Int32_val") (param (ref eq)) (result i32) (struct.get $int32 1 (ref.cast (ref $int32) (local.get 0)))) - (func (export "caml_int32_bswap") (param (ref eq)) (result (ref eq)) - (local $i i32) - (local.set $i (struct.get $int32 1 (ref.cast (ref $int32) (local.get 0)))) - (return_call $caml_copy_int32 - (i32.or - (i32.rotr (i32.and (local.get $i) (i32.const 0x00FF00FF)) - (i32.const 8)) - (i32.rotl (i32.and (local.get $i) (i32.const 0xFF00FF00)) - (i32.const 8))))) + (export "caml_nativeint_bswap" (func $caml_int32_bswap)) + (func $caml_int32_bswap (export "caml_int32_bswap") + (param $i i32) (result i32) + (i32.or + (i32.rotr (i32.and (local.get $i) (i32.const 0x00FF00FF)) + (i32.const 8)) + (i32.rotl (i32.and (local.get $i) (i32.const 0xFF00FF00)) + (i32.const 8)))) (global $INT32_ERRMSG (ref $string) (array.new_fixed $string 15 ;; "Int32.of_string" @@ -132,12 +131,7 @@ (export "caml_nativeint_compare" (func $caml_int32_compare)) (func $caml_int32_compare (export "caml_int32_compare") - (param (ref eq)) (param (ref eq)) (result (ref eq)) - (local $i1 i32) (local $i2 i32) - (local.set $i1 - (struct.get $int32 1 (ref.cast (ref $int32) (local.get 0)))) - (local.set $i2 - (struct.get $int32 1 (ref.cast (ref $int32) (local.get 1)))) + (param $i1 i32) (param $i2 i32) (result (ref eq)) (ref.i31 (i32.sub (i32.gt_s (local.get $i1) (local.get $i2)) (i32.lt_s (local.get $i1) (local.get $i2))))) @@ -177,16 +171,6 @@ (param $i i32) (result (ref eq)) (struct.new $int32 (global.get $nativeint_ops) (local.get $i))) - (func (export "caml_nativeint_bswap") (param (ref eq)) (result (ref eq)) - (local $i i32) - (local.set $i (struct.get $int32 1 (ref.cast (ref $int32) (local.get 0)))) - (return_call $caml_copy_nativeint - (i32.or - (i32.rotr (i32.and (local.get $i) (i32.const 0x00FF00FF)) - (i32.const 8)) - (i32.rotl (i32.and (local.get $i) (i32.const 0xFF00FF00)) - (i32.const 8))))) - (global $NATIVEINT_ERRMSG (ref $string) (array.new_fixed $string 16 ;; "Nativeint.of_string" (i32.const 78) (i32.const 97) (i32.const 116) (i32.const 105) @@ -206,12 +190,4 @@ (return_call $format_int (local.get 0) (struct.get $int32 1 (ref.cast (ref $int32) (local.get 1))) (i32.const 0))) - - (func (export "caml_nativeint_of_int32") (param (ref eq)) (result (ref eq)) - (return_call $caml_copy_nativeint - (struct.get $int32 1 (ref.cast (ref $int32) (local.get 0))))) - - (func (export "caml_nativeint_to_int32") (param (ref eq)) (result (ref eq)) - (return_call $caml_copy_int32 - (struct.get $int32 1 (ref.cast (ref $int32) (local.get 0))))) ) diff --git a/runtime/wasm/int64.wat b/runtime/wasm/int64.wat index 102b7d176..605962312 100644 --- a/runtime/wasm/int64.wat +++ b/runtime/wasm/int64.wat @@ -111,29 +111,21 @@ (func (export "Int64_val") (param (ref eq)) (result i64) (struct.get $int64 1 (ref.cast (ref $int64) (local.get 0)))) - (func (export "caml_int64_bswap") (param (ref eq)) (result (ref eq)) - (local $i i64) - (local.set $i (struct.get $int64 1 (ref.cast (ref $int64) (local.get 0)))) - (return_call $caml_copy_int64 + (func (export "caml_int64_bswap") (param $i i64) (result i64) + (i64.or + (i64.or + (i64.rotr (i64.and (local.get $i) (i64.const 0x000000FF000000FF)) + (i64.const 8)) + (i64.rotr (i64.and (local.get $i) (i64.const 0x0000FF000000FF00)) + (i64.const 24))) (i64.or - (i64.or - (i64.rotr (i64.and (local.get $i) (i64.const 0x000000FF000000FF)) - (i64.const 8)) - (i64.rotr (i64.and (local.get $i) (i64.const 0x0000FF000000FF00)) - (i64.const 24))) - (i64.or - (i64.rotl (i64.and (local.get $i) (i64.const 0x00FF000000FF0000)) - (i64.const 24)) - (i64.rotl (i64.and (local.get $i) (i64.const 0xFF000000FF000000)) - (i64.const 8)))))) + (i64.rotl (i64.and (local.get $i) (i64.const 0x00FF000000FF0000)) + (i64.const 24)) + (i64.rotl (i64.and (local.get $i) (i64.const 0xFF000000FF000000)) + (i64.const 8))))) (func (export "caml_int64_compare") - (param (ref eq)) (param (ref eq)) (result (ref eq)) - (local $i1 i64) (local $i2 i64) - (local.set $i1 - (struct.get $int64 1 (ref.cast (ref $int64) (local.get 0)))) - (local.set $i2 - (struct.get $int64 1 (ref.cast (ref $int64) (local.get 1)))) + (param $i1 i64) (param $i2 i64) (result (ref eq)) (ref.i31 (i32.sub (i64.gt_s (local.get $i1) (local.get $i2)) (i64.lt_s (local.get $i1) (local.get $i2))))) diff --git a/runtime/wasm/prng.wat b/runtime/wasm/prng.wat index 082a7b1bf..4918eaa0b 100644 --- a/runtime/wasm/prng.wat +++ b/runtime/wasm/prng.wat @@ -16,8 +16,6 @@ ;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. (module - (import "int64" "caml_copy_int64" - (func $caml_copy_int64 (param i64) (result (ref eq)))) (import "bindings" "ta_get_i32" (func $ta_get_i32 (param (ref extern)) (param i32) (result i32))) (import "bindings" "ta_set_i32" @@ -25,7 +23,7 @@ (import "bigarray" "caml_ba_get_data" (func $caml_ba_get_data (param (ref eq)) (result (ref extern)))) - (func (export "caml_lxm_next") (param $v (ref eq)) (result (ref eq)) + (func (export "caml_lxm_next") (param $v (ref eq)) (result i64) (local $data (ref extern)) (local $a i64) (local $s i64) (local $q0 i64) (local $q1 i64) (local $z i64) @@ -93,5 +91,5 @@ (i32.wrap_i64 (local.get $q1))) (call $ta_set_i32 (local.get $data) (i32.const 7) (i32.wrap_i64 (i64.shr_u (local.get $q1) (i64.const 32)))) - (return_call $caml_copy_int64 (local.get $z))) + (return (local.get $z))) ) diff --git a/runtime/wasm/string.wat b/runtime/wasm/string.wat index 1f41937bd..bf43b2f9e 100644 --- a/runtime/wasm/string.wat +++ b/runtime/wasm/string.wat @@ -19,14 +19,6 @@ (import "fail" "caml_bound_error" (func $caml_bound_error)) (import "fail" "caml_invalid_argument" (func $caml_invalid_argument (param $arg (ref eq)))) - (import "int32" "caml_copy_int32" - (func $caml_copy_int32 (param i32) (result (ref eq)))) - (import "int32" "Int32_val" - (func $Int32_val (param (ref eq)) (result i32))) - (import "int64" "caml_copy_int64" - (func $caml_copy_int64 (param i64) (result (ref eq)))) - (import "int64" "Int64_val" - (func $Int64_val (param (ref eq)) (result i64))) (type $string (array (mut i8))) @@ -182,7 +174,7 @@ (export "caml_string_get32" (func $caml_bytes_get32)) (func $caml_bytes_get32 (export "caml_bytes_get32") - (param $v (ref eq)) (param $i (ref eq)) (result (ref eq)) + (param $v (ref eq)) (param $i (ref eq)) (result i32) (local $s (ref $string)) (local $p i32) (local.set $s (ref.cast (ref $string) (local.get $v))) (local.set $p (i31.get_s (ref.cast (ref i31) (local.get $i)))) @@ -191,24 +183,23 @@ (if (i32.ge_u (i32.add (local.get $p) (i32.const 3)) (array.len (local.get $s))) (then (call $caml_bound_error))) - (return_call $caml_copy_int32 + (i32.or + (i32.or + (array.get_u $string (local.get $s) (local.get $p)) + (i32.shl (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 1))) + (i32.const 8))) (i32.or - (i32.or - (array.get_u $string (local.get $s) (local.get $p)) - (i32.shl (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 1))) - (i32.const 8))) - (i32.or - (i32.shl (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 2))) - (i32.const 16)) - (i32.shl (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 3))) - (i32.const 24)))))) + (i32.shl (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 2))) + (i32.const 16)) + (i32.shl (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 3))) + (i32.const 24))))) (export "caml_string_get64" (func $caml_bytes_get64)) (func $caml_bytes_get64 (export "caml_bytes_get64") - (param $v (ref eq)) (param $i (ref eq)) (result (ref eq)) + (param $v (ref eq)) (param $i (ref eq)) (result i64) (local $s (ref $string)) (local $p i32) (local.set $s (ref.cast (ref $string) (local.get $v))) (local.set $p (i31.get_s (ref.cast (ref i31) (local.get $i)))) @@ -217,44 +208,43 @@ (if (i32.ge_u (i32.add (local.get $p) (i32.const 7)) (array.len (local.get $s))) (then (call $caml_bound_error))) - (return_call $caml_copy_int64 + (i64.or + (i64.or + (i64.or + (i64.extend_i32_u + (array.get_u $string (local.get $s) (local.get $p))) + (i64.shl (i64.extend_i32_u + (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 1)))) + (i64.const 8))) + (i64.or + (i64.shl (i64.extend_i32_u + (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 2)))) + (i64.const 16)) + (i64.shl (i64.extend_i32_u + (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 3)))) + (i64.const 24)))) (i64.or (i64.or - (i64.or - (i64.extend_i32_u - (array.get_u $string (local.get $s) (local.get $p))) - (i64.shl (i64.extend_i32_u - (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 1)))) - (i64.const 8))) - (i64.or - (i64.shl (i64.extend_i32_u - (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 2)))) - (i64.const 16)) - (i64.shl (i64.extend_i32_u - (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 3)))) - (i64.const 24)))) + (i64.shl (i64.extend_i32_u + (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 4)))) + (i64.const 32)) + (i64.shl (i64.extend_i32_u + (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 5)))) + (i64.const 40))) (i64.or - (i64.or - (i64.shl (i64.extend_i32_u - (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 4)))) - (i64.const 32)) - (i64.shl (i64.extend_i32_u - (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 5)))) - (i64.const 40))) - (i64.or - (i64.shl (i64.extend_i32_u - (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 6)))) - (i64.const 48)) - (i64.shl (i64.extend_i32_u - (array.get_u $string (local.get $s) - (i32.add (local.get $p) (i32.const 7)))) - (i64.const 56))))))) + (i64.shl (i64.extend_i32_u + (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 6)))) + (i64.const 48)) + (i64.shl (i64.extend_i32_u + (array.get_u $string (local.get $s) + (i32.add (local.get $p) (i32.const 7)))) + (i64.const 56)))))) (func (export "caml_bytes_set16") (param (ref eq) (ref eq) (ref eq)) (result (ref eq)) @@ -274,11 +264,10 @@ (ref.i31 (i32.const 0))) (func (export "caml_bytes_set32") - (param (ref eq) (ref eq) (ref eq)) (result (ref eq)) - (local $s (ref $string)) (local $p i32) (local $v i32) + (param (ref eq)) (param (ref eq)) (param $v i32) (result (ref eq)) + (local $s (ref $string)) (local $p i32) (local.set $s (ref.cast (ref $string) (local.get 0))) (local.set $p (i31.get_s (ref.cast (ref i31) (local.get 1)))) - (local.set $v (call $Int32_val (local.get 2))) (if (i32.lt_s (local.get $p) (i32.const 0)) (then (call $caml_bound_error))) (if (i32.ge_u (i32.add (local.get $p) (i32.const 3)) @@ -297,11 +286,10 @@ (ref.i31 (i32.const 0))) (func (export "caml_bytes_set64") - (param (ref eq) (ref eq) (ref eq)) (result (ref eq)) - (local $s (ref $string)) (local $p i32) (local $v i64) + (param (ref eq)) (param (ref eq)) (param $v i64) (result (ref eq)) + (local $s (ref $string)) (local $p i32) (local.set $s (ref.cast (ref $string) (local.get 0))) (local.set $p (i31.get_s (ref.cast (ref i31) (local.get 1)))) - (local.set $v (call $Int64_val (local.get 2))) (if (i32.lt_s (local.get $p) (i32.const 0)) (then (call $caml_bound_error))) (if (i32.ge_u (i32.add (local.get $p) (i32.const 7)) From 6d62d29b9e7e373141e4ff31dd5e22adcba339a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Thu, 26 Sep 2024 12:59:09 +0200 Subject: [PATCH 2/3] Bigstring: add JavaScript functions to access 2 or 4 bytes at once Calling a JavaScript function is costly. This reduces the number of calls to a JavaScript function that we need to make to get or set a 16-bit or 32-bit integer. --- runtime/wasm/bigarray.wat | 127 +++++++------------------------------ runtime/wasm/bigstring.wat | 16 +---- runtime/wasm/runtime.js | 13 ++++ 3 files changed, 39 insertions(+), 117 deletions(-) diff --git a/runtime/wasm/bigarray.wat b/runtime/wasm/bigarray.wat index 3e8d79943..412d9723f 100644 --- a/runtime/wasm/bigarray.wat +++ b/runtime/wasm/bigarray.wat @@ -38,6 +38,10 @@ (func $ta_get_i8 (param (ref extern)) (param i32) (result i32))) (import "bindings" "ta_get_ui8" (func $ta_get_ui8 (param (ref extern)) (param i32) (result i32))) + (import "bindings" "ta_get32_ui8" + (func $ta_get32_ui8 (param (ref extern)) (param i32) (result i32))) + (import "bindings" "ta_get16_ui8" + (func $ta_get16_ui8 (param (ref extern)) (param i32) (result i32))) (import "bindings" "ta_set_f64" (func $ta_set_f64 (param (ref extern)) (param i32) (param f64))) (import "bindings" "ta_set_f32" @@ -52,6 +56,10 @@ (func $ta_set_i8 (param (ref extern)) (param i32) (param (ref i31)))) (import "bindings" "ta_set_ui8" (func $ta_set_ui8 (param (ref extern)) (param i32) (param (ref i31)))) + (import "bindings" "ta_set16_ui8" + (func $ta_set16_ui8 (param (ref extern)) (param i32) (param (ref i31)))) + (import "bindings" "ta_set32_ui8" + (func $ta_set32_ui8 (param (ref extern)) (param i32) (param i32))) (import "bindings" "ta_fill" (func $ta_fill_int (param (ref extern)) (param i32))) (import "bindings" "ta_fill" @@ -268,19 +276,7 @@ (local.set $h (call $caml_hash_mix_int (local.get $h) - (i32.or - (i32.or - (call $ta_get_ui8 (local.get $data) (local.get $i)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $i) (i32.const 1))) - (i32.const 8))) - (i32.or - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $i) (i32.const 2))) - (i32.const 16)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $i) (i32.const 3))) - (i32.const 24)))))) + (call $ta_get32_ui8 (local.get $data) (local.get $i)))) (local.set $i (i32.add (local.get $i) (i32.const 4))) (br $loop)))) (local.set $w (i32.const 0)) @@ -1912,11 +1908,7 @@ (struct.get $bigarray $ba_dim (local.get $ba)) (i32.const 0))) (then (call $caml_bound_error))) - (ref.i31 (i32.or - (call $ta_get_ui8 (local.get $data) (local.get $p)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1))) - (i32.const 8))))) + (ref.i31 (call $ta_get16_ui8 (local.get $data) (local.get $p)))) (func (export "caml_ba_uint8_get32") (param $vba (ref eq)) (param $i (ref eq)) (result i32) @@ -1933,19 +1925,7 @@ (struct.get $bigarray $ba_dim (local.get $ba)) (i32.const 0))) (then (call $caml_bound_error))) - (i32.or - (i32.or - (call $ta_get_ui8 (local.get $data) (local.get $p)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1))) - (i32.const 8))) - (i32.or - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 2))) - (i32.const 16)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 3))) - (i32.const 24))))) + (return_call $ta_get32_ui8 (local.get $data) (local.get $p))) (func (export "caml_ba_uint8_get64") (param $vba (ref eq)) (param $i (ref eq)) (result i64) @@ -1963,42 +1943,12 @@ (i32.const 0))) (then (call $caml_bound_error))) (i64.or - (i64.or - (i64.or - (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) (local.get $p))) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1)))) - (i64.const 8))) - (i64.or - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 2)))) - (i64.const 16)) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 3)))) - (i64.const 24)))) - (i64.or - (i64.or - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 4)))) - (i64.const 32)) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 5)))) - (i64.const 40))) - (i64.or - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 6)))) - (i64.const 48)) - (i64.shl (i64.extend_i32_u - (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 7)))) - (i64.const 56)))))) + (i64.extend_i32_u + (call $ta_get32_ui8 (local.get $data) (local.get $p))) + (i64.shl (i64.extend_i32_u + (call $ta_get32_ui8 (local.get $data) + (i32.add (local.get $p) (i32.const 4)))) + (i64.const 32)))) (func (export "caml_ba_uint8_set16") (param $vba (ref eq)) (param $i (ref eq)) (param $v (ref eq)) @@ -2017,10 +1967,7 @@ (struct.get $bigarray $ba_dim (local.get $ba)) (i32.const 0))) (then (call $caml_bound_error))) - (call $ta_set_ui8 (local.get $data) (local.get $p) (local.get $d)) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1)) - (ref.i31 (i32.shr_u (i31.get_s (local.get $d)) (i32.const 8)))) + (call $ta_set16_ui8 (local.get $data) (local.get $p) (local.get $d)) (ref.i31 (i32.const 0))) (func (export "caml_ba_uint8_set32") @@ -2039,17 +1986,7 @@ (struct.get $bigarray $ba_dim (local.get $ba)) (i32.const 0))) (then (call $caml_bound_error))) - (call $ta_set_ui8 (local.get $data) (local.get $p) - (ref.i31 (local.get $d))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1)) - (ref.i31 (i32.shr_u (local.get $d) (i32.const 8)))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 2)) - (ref.i31 (i32.shr_u (local.get $d) (i32.const 16)))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 3)) - (ref.i31 (i32.shr_u (local.get $d) (i32.const 24)))) + (call $ta_set32_ui8 (local.get $data) (local.get $p) (local.get $d)) (ref.i31 (i32.const 0))) (func (export "caml_ba_uint8_set64") @@ -2068,29 +2005,11 @@ (struct.get $bigarray $ba_dim (local.get $ba)) (i32.const 0))) (then (call $caml_bound_error))) - (call $ta_set_ui8 (local.get $data) (local.get $p) - (ref.i31 (i32.wrap_i64 (local.get $d)))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 1)) - (ref.i31 (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 8))))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 2)) - (ref.i31 (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 16))))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 3)) - (ref.i31 (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 24))))) - (call $ta_set_ui8 (local.get $data) + (call $ta_set32_ui8 (local.get $data) (local.get $p) + (i32.wrap_i64 (local.get $d))) + (call $ta_set32_ui8 (local.get $data) (i32.add (local.get $p) (i32.const 4)) - (ref.i31 (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 32))))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 5)) - (ref.i31 (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 40))))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 6)) - (ref.i31 (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 48))))) - (call $ta_set_ui8 (local.get $data) - (i32.add (local.get $p) (i32.const 7)) - (ref.i31 (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 56))))) + (i32.wrap_i64 (i64.shr_u (local.get $d) (i64.const 32)))) (ref.i31 (i32.const 0))) (export "caml_bytes_of_array" (func $caml_string_of_array)) diff --git a/runtime/wasm/bigstring.wat b/runtime/wasm/bigstring.wat index 6d404b19a..eb2ca8f13 100644 --- a/runtime/wasm/bigstring.wat +++ b/runtime/wasm/bigstring.wat @@ -35,6 +35,8 @@ (func $ta_create (param i32) (param anyref) (result anyref))) (import "bindings" "ta_get_ui8" (func $ta_get_ui8 (param (ref extern)) (param i32) (result i32))) + (import "bindings" "ta_get32_ui8" + (func $ta_get32_ui8 (param (ref extern)) (param i32) (result i32))) (import "bindings" "ta_set_ui8" (func $ta_set_ui8 (param (ref extern)) (param i32) (param (ref i31)))) (import "bindings" "ta_subarray" @@ -63,19 +65,7 @@ (local.set $h (call $caml_hash_mix_int (local.get $h) - (i32.or - (i32.or - (call $ta_get_ui8 (local.get $data) (local.get $i)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $i) (i32.const 1))) - (i32.const 8))) - (i32.or - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $i) (i32.const 2))) - (i32.const 16)) - (i32.shl (call $ta_get_ui8 (local.get $data) - (i32.add (local.get $i) (i32.const 3))) - (i32.const 24)))))) + (call $ta_get32_ui8 (local.get $data) (local.get $i)))) (local.set $i (i32.add (local.get $i) (i32.const 4))) (br $loop)))) (local.set $w (i32.const 0)) diff --git a/runtime/wasm/runtime.js b/runtime/wasm/runtime.js index 341e224ba..93f7a5d91 100644 --- a/runtime/wasm/runtime.js +++ b/runtime/wasm/runtime.js @@ -194,6 +194,9 @@ ta_get_ui16: (a, i) => a[i], ta_get_i8: (a, i) => a[i], ta_get_ui8: (a, i) => a[i], + ta_get16_ui8: (a, i) => a[i] | (a[i + 1] << 8), + ta_get32_ui8: (a, i) => + a[i] | (a[i + 1] << 8) | (a[i + 2] << 16) | (a[i + 3] << 24), ta_set_f64: (a, i, v) => (a[i] = v), ta_set_f32: (a, i, v) => (a[i] = v), ta_set_i32: (a, i, v) => (a[i] = v), @@ -201,6 +204,16 @@ ta_set_ui16: (a, i, v) => (a[i] = v), ta_set_i8: (a, i, v) => (a[i] = v), ta_set_ui8: (a, i, v) => (a[i] = v), + ta_set16_ui8: (a, i, v) => { + a[i] = v; + a[i + 1] = v >> 8; + }, + ta_set32_ui8: (a, i, v) => { + a[i] = v; + a[i + 1] = v >> 8; + a[i + 2] = v >> 16; + a[i + 3] = v >> 24; + }, ta_fill: (a, v) => a.fill(v), ta_blit: (s, d) => d.set(s), ta_subarray: (a, i, j) => a.subarray(i, j), From c3ba9956b7c88c8a67bd0e337ff9304aa3a27aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Vouillon?= Date: Thu, 26 Sep 2024 13:47:36 +0200 Subject: [PATCH 3/3] Optimized blitting between strings and bigstrings --- runtime/wasm/bigarray.wat | 46 +++++++++++++++++++++++--------------- runtime/wasm/bigstring.wat | 39 +++++++++++++++----------------- runtime/wasm/deps.json | 10 ++++++++- runtime/wasm/io.wat | 42 +++++++++++++--------------------- runtime/wasm/runtime.js | 8 +++++++ 5 files changed, 78 insertions(+), 67 deletions(-) diff --git a/runtime/wasm/bigarray.wat b/runtime/wasm/bigarray.wat index 412d9723f..812055eca 100644 --- a/runtime/wasm/bigarray.wat +++ b/runtime/wasm/bigarray.wat @@ -69,6 +69,14 @@ (import "bindings" "ta_subarray" (func $ta_subarray (param (ref extern)) (param i32) (param i32) (result (ref extern)))) + (import "bindings" "ta_blit_from_string" + (func $ta_blit_from_string + (param (ref $string)) (param i32) (param (ref extern)) (param i32) + (param i32))) + (import "bindings" "ta_blit_to_string" + (func $ta_blit_to_string + (param (ref extern)) (param i32) (param (ref $string)) (param i32) + (param i32))) (import "fail" "caml_bound_error" (func $caml_bound_error)) (import "fail" "caml_raise_out_of_memory" (func $caml_raise_out_of_memory)) (import "fail" "caml_invalid_argument" @@ -2016,26 +2024,22 @@ (func $caml_string_of_array (export "caml_string_of_array") (param (ref eq)) (result (ref eq)) ;; used to convert a typed array to a string - (local $a (ref extern)) (local $len i32) (local $i i32) + (local $a (ref extern)) (local $len i32) (local $s (ref $string)) (local.set $a (ref.as_non_null (extern.convert_any (call $unwrap (local.get 0))))) (local.set $len (call $ta_length (local.get $a))) (local.set $s (array.new $string (i32.const 0) (local.get $len))) - (loop $loop - (if (i32.lt_u (local.get $i) (local.get $len)) - (then - (array.set $string (local.get $s) (local.get $i) - (call $ta_get_ui8 (local.get $a) (local.get $i))) - (local.set $i (i32.add (local.get $i) (i32.const 1))) - (br $loop)))) + (call $ta_blit_to_string + (local.get $a) (i32.const 0) (local.get $s) (i32.const 0) + (local.get $len)) (local.get $s)) (export "caml_uint8_array_of_bytes" (func $caml_uint8_array_of_string)) (func $caml_uint8_array_of_string (export "caml_uint8_array_of_string") (param (ref eq)) (result (ref eq)) ;; Convert a string to a typed array - (local $ta (ref extern)) (local $len i32) (local $i i32) + (local $ta (ref extern)) (local $len i32) (local $s (ref $string)) (local.set $s (ref.cast (ref $string) (local.get 0))) (local.set $len (array.len (local.get $s))) @@ -2043,15 +2047,9 @@ (call $ta_create (i32.const 3) ;; Uint8Array (local.get $len))) - (loop $loop - (if (i32.lt_u (local.get $i) (local.get $len)) - (then - (call $ta_set_ui8 - (local.get $ta) - (local.get $i) - (ref.i31 (array.get $string (local.get $s) (local.get $i)))) - (local.set $i (i32.add (local.get $i) (i32.const 1))) - (br $loop)))) + (call $ta_blit_from_string + (local.get $s) (i32.const 0) (local.get $ta) (i32.const 0) + (local.get $len)) (call $wrap (any.convert_extern (local.get $ta)))) (func (export "caml_ba_get_kind") (param (ref eq)) (result i32) @@ -2082,4 +2080,16 @@ (local.get $num_dims) (local.get $kind) (local.get $layout))) + + (func (export "string_set") + (param $s externref) (param $i i32) (param $v i32) + (array.set $string + (ref.cast (ref null $string) (any.convert_extern (local.get $s))) + (local.get $i) (local.get $v))) + + (func (export "string_get") + (param $s externref) (param $i i32) (result i32) + (array.get $string + (ref.cast (ref null $string) (any.convert_extern (local.get $s))) + (local.get $i))) ) diff --git a/runtime/wasm/bigstring.wat b/runtime/wasm/bigstring.wat index eb2ca8f13..94a962174 100644 --- a/runtime/wasm/bigstring.wat +++ b/runtime/wasm/bigstring.wat @@ -48,6 +48,14 @@ (func $ta_length (param (ref extern)) (result i32))) (import "bindings" "ta_bytes" (func $ta_bytes (param anyref) (result anyref))) + (import "bindings" "ta_blit_from_string" + (func $ta_blit_from_string + (param (ref $string)) (param i32) (param (ref extern)) (param i32) + (param i32))) + (import "bindings" "ta_blit_to_string" + (func $ta_blit_to_string + (param (ref extern)) (param i32) (param (ref $string)) (param i32) + (param i32))) (import "hash" "caml_hash_mix_int" (func $caml_hash_mix_int (param i32) (param i32) (result i32))) @@ -202,7 +210,7 @@ (param $str1 (ref eq)) (param $vpos1 (ref eq)) (param $ba2 (ref eq)) (param $vpos2 (ref eq)) (param $vlen (ref eq)) (result (ref eq)) - (local $i i32) (local $pos1 i32) (local $pos2 i32) (local $len i32) + (local $pos1 i32) (local $pos2 i32) (local $len i32) (local $s1 (ref $string)) (local $d2 (ref extern)) (local.set $s1 (ref.cast (ref $string) (local.get $str1))) @@ -210,23 +218,17 @@ (local.set $d2 (call $caml_ba_get_data (local.get $ba2))) (local.set $pos2 (i31.get_s (ref.cast (ref i31) (local.get $vpos2)))) (local.set $len (i31.get_s (ref.cast (ref i31) (local.get $vlen)))) - (loop $loop - (if (i32.lt_u (local.get $i) (local.get $len)) - (then - (call $ta_set_ui8 (local.get $d2) - (i32.add (local.get $pos2) (local.get $i)) - (ref.i31 - (array.get_u $string (local.get $s1) - (i32.add (local.get $pos1) (local.get $i))))) - (local.set $i (i32.add (local.get $i) (i32.const 1))) - (br $loop)))) + (call $ta_blit_from_string + (local.get $s1) (local.get $pos1) + (local.get $d2) (local.get $pos2) + (local.get $len)) (ref.i31 (i32.const 0))) (func (export "caml_bigstring_blit_ba_to_bytes") (param $ba1 (ref eq)) (param $vpos1 (ref eq)) (param $str2 (ref eq)) (param $vpos2 (ref eq)) (param $vlen (ref eq)) (result (ref eq)) - (local $i i32) (local $pos1 i32) (local $pos2 i32) (local $len i32) + (local $pos1 i32) (local $pos2 i32) (local $len i32) (local $d1 (ref extern)) (local $s2 (ref $string)) (local.set $d1 (call $caml_ba_get_data (local.get $ba1))) @@ -234,15 +236,10 @@ (local.set $s2 (ref.cast (ref $string) (local.get $str2))) (local.set $pos2 (i31.get_s (ref.cast (ref i31) (local.get $vpos2)))) (local.set $len (i31.get_s (ref.cast (ref i31) (local.get $vlen)))) - (loop $loop - (if (i32.lt_u (local.get $i) (local.get $len)) - (then - (array.set $string (local.get $s2) - (i32.add (local.get $pos2) (local.get $i)) - (call $ta_get_ui8 (local.get $d1) - (i32.add (local.get $pos1) (local.get $i)))) - (local.set $i (i32.add (local.get $i) (i32.const 1))) - (br $loop)))) + (call $ta_blit_to_string + (local.get $d1) (local.get $pos1) + (local.get $s2) (local.get $pos2) + (local.get $len)) (ref.i31 (i32.const 0))) (func (export "caml_bigstring_blit_ba_to_ba") diff --git a/runtime/wasm/deps.json b/runtime/wasm/deps.json index 42b8150fc..c06b08f8a 100644 --- a/runtime/wasm/deps.json +++ b/runtime/wasm/deps.json @@ -1,7 +1,7 @@ [ { "name": "root", - "reaches": ["init", "exn", "mem", "strings"], + "reaches": ["init", "exn", "mem", "strings", "string_get", "string_set"], "root": true }, { @@ -20,6 +20,14 @@ "name": "strings", "export": "caml_extract_string" }, + { + "name": "string_get", + "export": "string_get" + }, + { + "name": "string_set", + "export": "string_set" + }, { "name": "callback", "export": "caml_callback" diff --git a/runtime/wasm/io.wat b/runtime/wasm/io.wat index 01fd089fc..e5b523c8c 100644 --- a/runtime/wasm/io.wat +++ b/runtime/wasm/io.wat @@ -56,6 +56,14 @@ (func $ta_set_ui8 (param (ref extern)) (param i32) (param i32))) ;; ZZZ ?? (import "bindings" "ta_get_ui8" (func $ta_get_ui8 (param (ref extern)) (param i32) (result i32))) + (import "bindings" "ta_blit_from_string" + (func $ta_blit_from_string + (param (ref $string)) (param i32) (param (ref extern)) (param i32) + (param i32))) + (import "bindings" "ta_blit_to_string" + (func $ta_blit_to_string + (param (ref extern)) (param i32) (param (ref $string)) (param i32) + (param i32))) (import "custom" "custom_compare_id" (func $custom_compare_id (param (ref eq)) (param (ref eq)) (param i32) (result i32))) @@ -330,20 +338,6 @@ (i64.add (local.get $offset) (i64.extend_i32_u (local.get $n)))) (local.get $n)) - (func $copy_from_buffer - (param $buf (ref extern)) (param $curr i32) - (param $s (ref $string)) (param $pos i32) (param $len i32) - (local $i i32) - (loop $loop - (if (i32.lt_u (local.get $i) (local.get $len)) - (then - (array.set $string (local.get $s) - (i32.add (local.get $pos) (local.get $i)) - (call $ta_get_ui8 (local.get $buf) - (i32.add (local.get $curr) (local.get $i)))) - (local.set $i (i32.add (local.get $i) (i32.const 1))) - (br $loop))))) - (func $caml_refill (param $ch (ref $channel)) (result i32) (local $n i32) (local $buf (ref extern)) @@ -374,7 +368,7 @@ (then (if (i32.gt_u (local.get $len) (local.get $avail)) (then (local.set $len (local.get $avail)))) - (call $copy_from_buffer + (call $ta_blit_to_string (struct.get $channel $buffer (local.get $ch)) (struct.get $channel $curr (local.get $ch)) (local.get $s) (local.get $pos) @@ -389,7 +383,7 @@ (struct.set $channel $max (local.get $ch) (local.get $nread)) (if (i32.gt_u (local.get $len) (local.get $nread)) (then (local.set $len (local.get $nread)))) - (call $copy_from_buffer + (call $ta_blit_to_string (struct.get $channel $buffer (local.get $ch)) (i32.const 0) (local.get $s) (local.get $pos) @@ -445,7 +439,7 @@ (local.set $curr (i32.const 0)) (if (i32.gt_u (local.get $len) (local.get $nread)) (then (local.set $len (local.get $nread)))))))) - (call $copy_from_buffer + (call $ta_blit_to_string (local.get $buf) (local.get $curr) (local.get $s) (local.get $pos) (local.get $len)) (struct.set $channel $curr (local.get $ch) @@ -730,7 +724,7 @@ (func $caml_putblock (param $ch (ref $channel)) (param $s (ref $string)) (param $pos i32) (param $len i32) (result i32) - (local $free i32) (local $curr i32) (local $i i32) + (local $free i32) (local $curr i32) (local $buf (ref extern)) (local.set $curr (struct.get $channel $curr (local.get $ch))) (local.set $free @@ -738,15 +732,9 @@ (if (i32.ge_u (local.get $len) (local.get $free)) (then (local.set $len (local.get $free)))) (local.set $buf (struct.get $channel $buffer (local.get $ch))) - (loop $loop - (if (i32.lt_u (local.get $i) (local.get $len)) - (then - (call $ta_set_ui8 (local.get $buf) - (i32.add (local.get $curr) (local.get $i)) - (array.get_u $string (local.get $s) - (i32.add (local.get $pos) (local.get $i)))) - (local.set $i (i32.add (local.get $i) (i32.const 1))) - (br $loop)))) + (call $ta_blit_from_string + (local.get $s) (local.get $pos) + (local.get $buf) (local.get $curr) (local.get $len)) (struct.set $channel $curr (local.get $ch) (i32.add (local.get $curr) (local.get $len))) (if (i32.ge_u (local.get $len) (local.get $free)) diff --git a/runtime/wasm/runtime.js b/runtime/wasm/runtime.js index 93f7a5d91..2a0d73f48 100644 --- a/runtime/wasm/runtime.js +++ b/runtime/wasm/runtime.js @@ -222,6 +222,12 @@ ta_copy: (ta, t, s, n) => ta.copyWithin(t, s, n), ta_bytes: (a) => new Uint8Array(a.buffer, a.byteOffset, a.length * a.BYTES_PER_ELEMENT), + ta_blit_from_string: (s, p1, a, p2, l) => { + for (let i = 0; i < l; i++) a[p2 + i] = string_get(s, p1 + i); + }, + ta_blit_to_string: (a, p1, s, p2, l) => { + for (let i = 0; i < l; i++) string_set(s, p2 + i, a[p1 + i]); + }, wrap_callback: (f) => function () { var n = arguments.length; @@ -537,6 +543,8 @@ caml_handle_uncaught_exception, caml_buffer, caml_extract_string, + string_get, + string_set, _initialize, } = wasmModule.instance.exports;