diff --git a/example/README.md b/example/README.md index 7a277a37d..75a173eaf 100644 --- a/example/README.md +++ b/example/README.md @@ -53,9 +53,13 @@ COMMANDS Validate a module wasm2wat [OPTION]… [ARG]… - Generate a text format file (.wat) file from a binary format file + Generate a text format file (.wat) from a binary format file (.wasm) + wat2wasm [OPTION]… [ARG]… + Generate a binary format file (.wasm) from a text format file + (.wat) + COMMON OPTIONS --help[=FMT] (default=auto) Show this help in format FMT. The value FMT must be one of auto, diff --git a/example/wasm2wat/README.md b/example/wasm2wat/README.md index 3d88d2477..41f8c8a08 100644 --- a/example/wasm2wat/README.md +++ b/example/wasm2wat/README.md @@ -22,8 +22,8 @@ $ owi wasm2wat ./42.wasm ```sh $ owi wasm2wat --help=plain NAME - owi-wasm2wat - Generate a text format file (.wat) file from a binary - format file (.wasm) + owi-wasm2wat - Generate a text format file (.wat) from a binary format + file (.wasm) SYNOPSIS owi wasm2wat [OPTION]… [ARG]… diff --git a/src/bin/owi.ml b/src/bin/owi.ml index 4f68b453c..1917003cb 100644 --- a/src/bin/owi.ml +++ b/src/bin/owi.ml @@ -208,14 +208,25 @@ let wasm2wat_cmd = let open Cmdliner in let info = let doc = - "Generate a text format file (.wat) file from a binary format file \ - (.wasm)" + "Generate a text format file (.wat) from a binary format file (.wasm)" in let man = [] @ shared_man in Cmd.info "wasm2wat" ~version ~doc ~sdocs ~man in Cmd.v info Term.(const Cmd_wasm2wat.cmd $ files) +let wat2wasm_cmd = + let open Cmdliner in + let info = + let doc = + "Generate a binary format file (.wasm) from a text format file (.wat)" + in + let man = [] @ shared_man in + Cmd.info "wat2wasm" ~version ~doc ~sdocs ~man + in + Cmd.v info + Term.(const Cmd_wat2wasm.cmd $ profiling $ debug $ unsafe $ optimize $ files) + let cli = let open Cmdliner in let info = @@ -237,6 +248,7 @@ let cli = ; conc_cmd ; validate_cmd ; wasm2wat_cmd + ; wat2wasm_cmd ] let exit_code = diff --git a/src/binary_to_text/binary_encoder.ml b/src/binary_to_text/binary_encoder.ml new file mode 100644 index 000000000..c31aa273e --- /dev/null +++ b/src/binary_to_text/binary_encoder.ml @@ -0,0 +1,701 @@ +(* SPDX-License-Identifier: AGPL-3.0-or-later *) +(* Copyright © 2021-2024 OCamlPro *) +(* Written by the Owi programmers *) + +open Binary +open Syntax +open Types + +let end_opcode = '\x0B' + +let magic = "\x00\x61\x73\x6d" + +let version = "\x01\x00\x00\x00" + +let add_char c buf = Buffer.add_char buf c + +let add_string str buf = Buffer.add_string buf str + +(* add byte from int (ascii code) *) +let add_byte i = + let c = Char.chr (i land 0xff) in + add_char c + +(* add 2 bytes (16 bits) from int *) +let add_bytes_2 i buf = + add_byte (i land 0xff) buf; + add_byte (i lsr 8) buf + +(* add 4 bytes (32 bits) from int32 *) +let add_bytes_4 i buf = + add_bytes_2 (Int32.to_int (Int32.logand i 0xffffl)) buf; + add_bytes_2 (Int32.to_int (Int32.shift_right_logical i 16)) buf + +(* add 8 bytes (64 bits) from int64 *) +let add_bytes_8 i buf = + add_bytes_4 (Int64.to_int32 (Int64.logand i 0xffffffffL)) buf; + add_bytes_4 (Int64.to_int32 (Int64.shift_right i 32)) buf + +let rec write_U64 i buf = + let b = Int64.to_int (Int64.logand i 0x7fL) in + if 0L <= i && i < 128L then add_byte b buf + else ( + add_byte (b lor 0x80) buf; + write_U64 (Int64.shift_right_logical i 7) buf ) + +let write_U32 i = write_U64 (Int64.logand (Int64.of_int32 i) 0xffffffffL) + +let write_U32_of_int i = + let i = Int32.of_int i in + write_U32 i + +let rec write_S64 i buf = + let b = Int64.to_int (Int64.logand i 0x7fL) in + if -64L <= i && i < 64L then add_byte b buf + else ( + add_byte (b lor 0x80) buf; + write_S64 (Int64.shift_right i 7) buf ) + +let write_S32 i = write_S64 (Int64.of_int32 i) + +let write_F32 f = + let i32 = Float32.to_bits f in + add_bytes_4 i32 + +let write_F64 f = + let i64 = Float64.to_bits f in + add_bytes_8 i64 + +let indice_encode (idx : binary indice) buf = + let (Raw idx) = idx in + write_U32_of_int idx buf + +let add_char_indice c idx buf = + add_char c buf; + indice_encode idx buf + +let reftype_encode ht buf = + match ht with + | Func_ht -> add_char '\x70' buf + | Extern_ht -> add_char '\x6F' buf + | _ -> assert false + +let valtype_encode vt buf = + let c = + match vt with + | Num_type I32 -> '\x7F' + | Num_type I64 -> '\x7E' + | Num_type F32 -> '\x7D' + | Num_type F64 -> '\x7C' + | Ref_type (Null, Func_ht) -> '\x70' + | Ref_type (Null, Extern_ht) -> '\x6F' + | _ -> assert false (* vecttype v128 '\x7B' *) + in + add_char c buf + +let vector_encode datas func_encode buf = + let vector_buf = Buffer.create 16 in + let len = + List.fold_left + (fun acc data -> + match func_encode data vector_buf with + | None -> acc + | Some () -> acc + 1 ) + 0 datas + in + write_U32_of_int len buf; + Buffer.add_buffer buf vector_buf + +let vector_encode_from_namedvalues ~rev_values datas func_encode buf = + let { Named.values; _ } = datas in + match values with + | [] -> () + | values -> + let values = if rev_values then List.rev values else values in + vector_encode values func_encode buf + +let resulttype_encode (rt : _ result_type) buf = + vector_encode rt (fun vt buf -> Some (valtype_encode vt buf)) buf + +let paramtype_encode (pt : _ param_type) buf = + let vt = List.map snd pt in + vector_encode vt (fun vt buf -> Some (valtype_encode vt buf)) buf + +let mut_encode mut buf = + let c = match mut with Const -> '\x00' | Var -> '\x01' in + add_char c buf + +let block_type_encode (typ : binary block_type) buf = + match typ with + | Bt_raw (None, ([], [])) -> add_char '\x40' buf + | Bt_raw (None, ([], [ vt ])) -> valtype_encode vt buf + | _ -> assert false (* TODO: types link *) + +let global_type_encode (typ : _ global_type) buf = + let mut, vt = typ in + valtype_encode vt buf; + mut_encode mut buf + +let limits_encode limits buf = + match limits with + | { min; max = None } -> + add_char '\x00' buf; + write_U32_of_int min buf + | { min; max = Some max } -> + add_char '\x01' buf; + write_U32_of_int min buf; + write_U32_of_int max buf + +let memarg_encode ({ offset; align } : memarg) buf = + write_U32 offset buf; + write_U32 align buf + +let memory_encode (mem : mem) buf = + let _so, limits = mem in + limits_encode limits buf + +let memory_import_encode (mem : limits Imported.t) buf = + let { Imported.modul; name; assigned_name = _; desc = limits } = mem in + add_string modul buf; + add_string name buf; + add_char '\x02' buf; + limits_encode limits buf + +let table_encode (table : _ table) buf = + let _so, (limits, (_nullable, heaptype)) = table in + reftype_encode heaptype buf; + limits_encode limits buf + +let table_import_encode (table : _ table_type Imported.t) buf = + let { Imported.modul; name; assigned_name = _; desc } = table in + let limits, (_nullable, heaptype) = desc in + add_string modul buf; + add_string name buf; + add_char '\x01' buf; + reftype_encode heaptype buf; + limits_encode limits buf + +let fc_encode i buf = + add_char '\xFC' buf; + write_U32_of_int i buf + +let rec instr_encode instr buf = + match instr with + | Unreachable -> add_char '\x00' buf + | Nop -> add_char '\x01' buf + | Block (_str, Some bt, expr) -> + add_char '\x02' buf; + block_type_encode bt buf; + expr_encode expr buf + | Loop (_str, Some bt, expr) -> + add_char '\x03' buf; + block_type_encode bt buf; + expr_encode expr buf + | If_else (_str, Some bt, expr1, expr2) -> + add_char '\x04' buf; + block_type_encode bt buf; + begin + match expr2 with + | [] -> expr_encode expr1 buf + | expr2 -> + expr_encode expr1 ?end_op_code:(Some '\x05') buf; + expr_encode expr2 buf + end + | Br idx -> add_char_indice '\x0C' idx buf + | Br_if idx -> add_char_indice '\x0D' idx buf + | Br_table (idxs, idx) -> + let idxs = Array.to_list idxs in + add_char '\x0E' buf; + vector_encode idxs (fun idx buf -> Some (indice_encode idx buf)) buf; + indice_encode idx buf + | Call idx -> add_char_indice '\x10' idx buf + | Drop -> add_char '\x1A' buf + | Select None -> add_char '\x1B' buf + | Select (Some vts) -> + add_char '\x1C' buf; + List.iter (fun vt -> valtype_encode vt buf) vts + | Local_get idx -> add_char_indice '\x20' idx buf + | Local_set idx -> add_char_indice '\x21' idx buf + | Local_tee idx -> add_char_indice '\x22' idx buf + | Global_get idx -> add_char_indice '\x23' idx buf + | Global_set idx -> add_char_indice '\x24' idx buf + | Table_get idx -> add_char_indice '\x25' idx buf + | Table_set idx -> add_char_indice '\x26' idx buf + | I_load (S32, memarg) -> + add_char '\x28' buf; + memarg_encode memarg buf + | I_load (S64, memarg) -> + add_char '\x29' buf; + memarg_encode memarg buf + | F_load (S32, memarg) -> + add_char '\x2A' buf; + memarg_encode memarg buf + | F_load (S64, memarg) -> + add_char '\x2B' buf; + memarg_encode memarg buf + | I_load8 (S32, S, memarg) -> + add_char '\x2C' buf; + memarg_encode memarg buf + | I_load8 (S32, U, memarg) -> + add_char '\x2D' buf; + memarg_encode memarg buf + | I_load16 (S32, S, memarg) -> + add_char '\x2E' buf; + memarg_encode memarg buf + | I_load16 (S32, U, memarg) -> + add_char '\x2F' buf; + memarg_encode memarg buf + | I_load8 (S64, S, memarg) -> + add_char '\x30' buf; + memarg_encode memarg buf + | I_load8 (S64, U, memarg) -> + add_char '\x31' buf; + memarg_encode memarg buf + | I_load16 (S64, S, memarg) -> + add_char '\x32' buf; + memarg_encode memarg buf + | I_load16 (S64, U, memarg) -> + add_char '\x33' buf; + memarg_encode memarg buf + | I64_load32 (S, memarg) -> + add_char '\x34' buf; + memarg_encode memarg buf + | I64_load32 (U, memarg) -> + add_char '\x35' buf; + memarg_encode memarg buf + | I_store (S32, memarg) -> + add_char '\x36' buf; + memarg_encode memarg buf + | I_store (S64, memarg) -> + add_char '\x37' buf; + memarg_encode memarg buf + | F_store (S32, memarg) -> + add_char '\x38' buf; + memarg_encode memarg buf + | F_store (S64, memarg) -> + add_char '\x39' buf; + memarg_encode memarg buf + | I_store8 (S32, memarg) -> + add_char '\x3A' buf; + memarg_encode memarg buf + | I_store16 (S32, memarg) -> + add_char '\x3B' buf; + memarg_encode memarg buf + | I_store8 (S64, memarg) -> + add_char '\x3C' buf; + memarg_encode memarg buf + | I_store16 (S64, memarg) -> + add_char '\x3D' buf; + memarg_encode memarg buf + | I64_store32 memarg -> + add_char '\x3E' buf; + memarg_encode memarg buf + | Memory_size -> + add_char '\x3F' buf; + add_char '\x00' buf + | Memory_grow -> + add_char '\x40' buf; + add_char '\x00' buf + | I32_const i -> + add_char '\x41' buf; + write_S32 i buf + | I64_const i -> + add_char '\x42' buf; + write_S64 i buf + | F32_const f -> + add_char '\x43' buf; + write_F32 f buf + | F64_const f -> + add_char '\x44' buf; + write_F64 f buf + | I_testop (S32, Eqz) -> add_char '\x45' buf + | I_relop (S32, Eq) -> add_char '\x46' buf + | I_relop (S32, Ne) -> add_char '\x47' buf + | I_relop (S32, Lt S) -> add_char '\x48' buf + | I_relop (S32, Lt U) -> add_char '\x49' buf + | I_relop (S32, Gt S) -> add_char '\x4A' buf + | I_relop (S32, Gt U) -> add_char '\x4B' buf + | I_relop (S32, Le S) -> add_char '\x4C' buf + | I_relop (S32, Le U) -> add_char '\x4D' buf + | I_relop (S32, Ge S) -> add_char '\x4E' buf + | I_relop (S32, Ge U) -> add_char '\x4F' buf + | I_testop (S64, Eqz) -> add_char '\x50' buf + | I_relop (S64, Eq) -> add_char '\x51' buf + | I_relop (S64, Ne) -> add_char '\x52' buf + | I_relop (S64, Lt S) -> add_char '\x53' buf + | I_relop (S64, Lt U) -> add_char '\x54' buf + | I_relop (S64, Gt S) -> add_char '\x55' buf + | I_relop (S64, Gt U) -> add_char '\x56' buf + | I_relop (S64, Le S) -> add_char '\x57' buf + | I_relop (S64, Le U) -> add_char '\x58' buf + | I_relop (S64, Ge S) -> add_char '\x59' buf + | I_relop (S64, Ge U) -> add_char '\x5A' buf + | F_relop (S32, Eq) -> add_char '\x5B' buf + | F_relop (S32, Ne) -> add_char '\x5C' buf + | F_relop (S32, Lt) -> add_char '\x5D' buf + | F_relop (S32, Gt) -> add_char '\x5E' buf + | F_relop (S32, Le) -> add_char '\x5F' buf + | F_relop (S32, Ge) -> add_char '\x60' buf + | F_relop (S64, Eq) -> add_char '\x61' buf + | F_relop (S64, Ne) -> add_char '\x62' buf + | F_relop (S64, Lt) -> add_char '\x63' buf + | F_relop (S64, Gt) -> add_char '\x64' buf + | F_relop (S64, Le) -> add_char '\x65' buf + | F_relop (S64, Ge) -> add_char '\x66' buf + | I_unop (S32, Clz) -> add_char '\x67' buf + | I_unop (S32, Ctz) -> add_char '\x68' buf + | I_unop (S32, Popcnt) -> add_char '\x69' buf + | I_binop (S32, Add) -> add_char '\x6A' buf + | I_binop (S32, Sub) -> add_char '\x6B' buf + | I_binop (S32, Mul) -> add_char '\x6C' buf + | I_binop (S32, Div S) -> add_char '\x6D' buf + | I_binop (S32, Div U) -> add_char '\x6E' buf + | I_binop (S32, Rem S) -> add_char '\x6F' buf + | I_binop (S32, Rem U) -> add_char '\x70' buf + | I_binop (S32, And) -> add_char '\x71' buf + | I_binop (S32, Or) -> add_char '\x72' buf + | I_binop (S32, Xor) -> add_char '\x73' buf + | I_binop (S32, Shl) -> add_char '\x74' buf + | I_binop (S32, Shr S) -> add_char '\x75' buf + | I_binop (S32, Shr U) -> add_char '\x76' buf + | I_binop (S32, Rotl) -> add_char '\x77' buf + | I_binop (S32, Rotr) -> add_char '\x78' buf + | I_unop (S64, Clz) -> add_char '\x79' buf + | I_unop (S64, Ctz) -> add_char '\x7A' buf + | I_unop (S64, Popcnt) -> add_char '\x7B' buf + | I_binop (S64, Add) -> add_char '\x7C' buf + | I_binop (S64, Sub) -> add_char '\x7D' buf + | I_binop (S64, Mul) -> add_char '\x7E' buf + | I_binop (S64, Div S) -> add_char '\x7F' buf + | I_binop (S64, Div U) -> add_char '\x80' buf + | I_binop (S64, Rem S) -> add_char '\x81' buf + | I_binop (S64, Rem U) -> add_char '\x82' buf + | I_binop (S64, And) -> add_char '\x83' buf + | I_binop (S64, Or) -> add_char '\x84' buf + | I_binop (S64, Xor) -> add_char '\x85' buf + | I_binop (S64, Shl) -> add_char '\x86' buf + | I_binop (S64, Shr S) -> add_char '\x87' buf + | I_binop (S64, Shr U) -> add_char '\x88' buf + | I_binop (S64, Rotl) -> add_char '\x89' buf + | I_binop (S64, Rotr) -> add_char '\x8A' buf + | F_unop (S32, Abs) -> add_char '\x8B' buf + | F_unop (S32, Neg) -> add_char '\x8C' buf + | F_unop (S32, Ceil) -> add_char '\x8D' buf + | F_unop (S32, Floor) -> add_char '\x8E' buf + | F_unop (S32, Trunc) -> add_char '\x8F' buf + | F_unop (S32, Nearest) -> add_char '\x90' buf + | F_unop (S32, Sqrt) -> add_char '\x91' buf + | F_binop (S32, Add) -> add_char '\x92' buf + | F_binop (S32, Sub) -> add_char '\x93' buf + | F_binop (S32, Mul) -> add_char '\x94' buf + | F_binop (S32, Div) -> add_char '\x95' buf + | F_binop (S32, Min) -> add_char '\x96' buf + | F_binop (S32, Max) -> add_char '\x97' buf + | F_binop (S32, Copysign) -> add_char '\x98' buf + | F_unop (S64, Abs) -> add_char '\x99' buf + | F_unop (S64, Neg) -> add_char '\x9A' buf + | F_unop (S64, Ceil) -> add_char '\x9B' buf + | F_unop (S64, Floor) -> add_char '\x9C' buf + | F_unop (S64, Trunc) -> add_char '\x9D' buf + | F_unop (S64, Nearest) -> add_char '\x9E' buf + | F_unop (S64, Sqrt) -> add_char '\x9F' buf + | F_binop (S64, Add) -> add_char '\xA0' buf + | F_binop (S64, Sub) -> add_char '\xA1' buf + | F_binop (S64, Mul) -> add_char '\xA2' buf + | F_binop (S64, Div) -> add_char '\xA3' buf + | F_binop (S64, Min) -> add_char '\xA4' buf + | F_binop (S64, Max) -> add_char '\xA5' buf + | F_binop (S64, Copysign) -> add_char '\xA6' buf + | I32_wrap_i64 -> add_char '\xA7' buf + | I_trunc_f (S32, S32, S) -> add_char '\xA8' buf + | I_trunc_f (S32, S32, U) -> add_char '\xA9' buf + | I_trunc_f (S32, S64, S) -> add_char '\xAA' buf + | I_trunc_f (S32, S64, U) -> add_char '\xAB' buf + | I64_extend_i32 S -> add_char '\xAC' buf + | I64_extend_i32 U -> add_char '\xAD' buf + | I_trunc_f (S64, S32, S) -> add_char '\xAE' buf + | I_trunc_f (S64, S32, U) -> add_char '\xAF' buf + | I_trunc_f (S64, S64, S) -> add_char '\xB0' buf + | I_trunc_f (S64, S64, U) -> add_char '\xB1' buf + | F_convert_i (S32, S32, S) -> add_char '\xB2' buf + | F_convert_i (S32, S32, U) -> add_char '\xB3' buf + | F_convert_i (S32, S64, S) -> add_char '\xB4' buf + | F_convert_i (S32, S64, U) -> add_char '\xB5' buf + | F32_demote_f64 -> add_char '\xB6' buf + | F_convert_i (S64, S32, S) -> add_char '\xB7' buf + | F_convert_i (S64, S32, U) -> add_char '\xB8' buf + | F_convert_i (S64, S64, S) -> add_char '\xB9' buf + | F_convert_i (S64, S64, U) -> add_char '\xBA' buf + | F64_promote_f32 -> add_char '\xBB' buf + | I_reinterpret_f (S32, S32) -> add_char '\xBC' buf + | I_reinterpret_f (S64, S64) -> add_char '\xBD' buf + | F_reinterpret_i (S32, S32) -> add_char '\xBE' buf + | F_reinterpret_i (S64, S64) -> add_char '\xBF' buf + | I_extend8_s S32 -> add_char '\xC0' buf + | I_extend16_s S32 -> add_char '\xC1' buf + | I_extend8_s S64 -> add_char '\xC2' buf + | I_extend16_s S64 -> add_char '\xC3' buf + | I64_extend32_s -> add_char '\xC4' buf + | Ref_null rt -> + add_char '\xD0' buf; + reftype_encode rt buf + | Ref_is_null -> add_char '\xD1' buf + | Ref_func idx -> add_char_indice '\xD2' idx buf + | I_trunc_sat_f (S32, S32, S) -> fc_encode 0 buf + | I_trunc_sat_f (S32, S32, U) -> fc_encode 1 buf + | I_trunc_sat_f (S32, S64, S) -> fc_encode 2 buf + | I_trunc_sat_f (S32, S64, U) -> fc_encode 3 buf + | I_trunc_sat_f (S64, S32, S) -> fc_encode 4 buf + | I_trunc_sat_f (S64, S32, U) -> fc_encode 5 buf + | I_trunc_sat_f (S64, S64, S) -> fc_encode 6 buf + | I_trunc_sat_f (S64, S64, U) -> fc_encode 7 buf + | Memory_init idx -> + fc_encode 8 buf; + indice_encode idx buf; + add_char '\x00' buf + | Data_drop idx -> + fc_encode 9 buf; + indice_encode idx buf + | Memory_copy -> + fc_encode 10 buf; + add_char '\x00' buf; + add_char '\x00' buf + | Memory_fill -> + fc_encode 11 buf; + add_char '\x00' buf + | Table_init (tableidx, elemidx) -> + fc_encode 12 buf; + indice_encode elemidx buf; + indice_encode tableidx buf + | Elem_drop idx -> + fc_encode 13 buf; + indice_encode idx buf + | Table_copy (idx1, idx2) -> + fc_encode 14 buf; + indice_encode idx1 buf; + indice_encode idx2 buf + | Table_grow idx -> + fc_encode 15 buf; + indice_encode idx buf + | Table_size idx -> + fc_encode 16 buf; + indice_encode idx buf + | Table_fill idx -> + fc_encode 17 buf; + indice_encode idx buf + | _ -> assert false (* TODO *) + +and expr_encode expr ?end_op_code buf = + List.iter (fun instr -> instr_encode instr buf) expr; + match end_op_code with + | None -> add_char end_opcode buf + | Some end_op_code -> add_char end_op_code buf + +let export_encode cid (export : Binary.export) buf = + let { name; id } = export in + add_string name buf; + add_char cid buf; + write_U32_of_int id buf + +let global_encode (global : global) buf = + let { typ; init; id = _ } : global = global in + global_type_encode typ buf; + expr_encode init ?end_op_code:None buf + +let global_import_encode (global : _ global_type Imported.t) buf = + let { Imported.modul; name; assigned_name = _; desc } = global in + let mut, valtype = desc in + add_string modul buf; + add_string name buf; + add_char '\x03' buf; + valtype_encode valtype buf; + mut_encode mut buf + +let local_encode idx (local : _ param) buf = + let _so, vt = local in + write_U32_of_int idx buf; + valtype_encode vt buf + +let locals_encode typt_f locals buf = + let len = List.length locals in + let nb_params = + match typt_f with + | Bt_raw (_ind, (pt, _rt)) -> List.length pt + | _ -> assert false + in + write_U32_of_int len buf; + List.iteri (fun i loc -> local_encode (i + nb_params) loc buf) locals + +let encode_from_indexed func_local_encode func_import_encode ~is_local data buf + = + let v = Indexed.get data in + match v with + | Runtime.Local data -> + if is_local then Some (func_local_encode data buf) else None + | Runtime.Imported data -> + if is_local then None else Some (func_import_encode data buf) + +let section_encode id func_encode data buf = + let section_buf = Buffer.create 16 in + func_encode data section_buf; + if Buffer.length section_buf = 0 then () + else ( + add_char id buf; + write_U32_of_int (Buffer.length section_buf) buf; + Buffer.add_buffer buf section_buf ) + +(* type: section 1 *) +let types_encode func buf = + vector_encode_from_namedvalues ~rev_values:true func + (fun v buf -> + let v = Indexed.get v in + match v with + | Runtime.Local { type_f = t; locals = _; body = _; id = _ } -> begin + match t with + | Bt_raw (_ind, (pt, rt)) -> + add_char '\x60' buf; + paramtype_encode pt buf; + resulttype_encode rt buf; + Some () + | _ -> assert false + end + | _ -> None ) + buf + +(* import: section 2 *) +let imports_encode (func, table, mem, global) buf = + let fold_func import_encode_func buf acc data = + match + encode_from_indexed + (fun _ _ -> ()) + import_encode_func ~is_local:false data buf + with + | None -> acc + | Some () -> acc + 1 + in + let imp_buf = Buffer.create 16 in + let { Named.values = func_values; _ } = func in + let { Named.values = table_values; _ } = table in + let { Named.values = mem_values; _ } = mem in + let { Named.values = global_values; _ } = global in + let len = + List.fold_left + (fun acc func -> + let func = Indexed.get func in + match func with + | Runtime.Local _ -> acc + | Runtime.Imported { Imported.modul; name; assigned_name = _; desc = _ } + -> + add_string modul imp_buf; + add_string name imp_buf; + add_char '\x00' imp_buf; + write_U32_of_int acc imp_buf; + (* TODO ? *) + acc + 1 ) + 0 func_values + in + let len = + len + List.fold_left (fold_func table_import_encode imp_buf) 0 table_values + in + let len = + len + List.fold_left (fold_func memory_import_encode imp_buf) 0 mem_values + in + let len = + len + + List.fold_left (fold_func global_import_encode imp_buf) 0 global_values + in + write_U32_of_int len buf; + Buffer.add_buffer buf imp_buf + +(* function: section 3 *) +let functions_encode func buf = + let idx = ref 0 in + vector_encode_from_namedvalues ~rev_values:true func + (fun v buf -> + let v = Indexed.get v in + match v with + | Runtime.Local _ -> + let typeidx = !idx in + incr idx; + Some (write_U32_of_int typeidx buf) + | Runtime.Imported _ -> None ) + buf + +(* table: section 4 *) +let tables_encode table buf = + vector_encode_from_namedvalues ~rev_values:true table + (encode_from_indexed table_encode (fun _ _ -> ()) ~is_local:true) + buf + +(* memory: section 5 *) +let memories_encode memories buf = + vector_encode_from_namedvalues ~rev_values:true memories + (encode_from_indexed memory_encode (fun _ _ -> ()) ~is_local:true) + buf + +(* global: section 6 *) +let globals_encode globals buf = + vector_encode_from_namedvalues ~rev_values:false globals + (encode_from_indexed global_encode (fun _ _ -> ()) ~is_local:true) + buf + +(* export: section 7 *) +let exports_encode (exports : exports) buf = + let { global; mem; table; func } : exports = exports in + (* Memo: List.rev global; mem; table; func ? *) + List.iter (fun export -> export_encode '\x03' export buf) global; + List.iter (fun export -> export_encode '\x02' export buf) mem; + List.iter (fun export -> export_encode '\x01' export buf) table; + List.iter (fun export -> export_encode '\x00' export buf) func + +(* start: section 8 *) +let start_encode int_opt buf = + match int_opt with None -> () | Some funcidx -> write_U32_of_int funcidx buf + +let codes_encode func buf = + vector_encode_from_namedvalues ~rev_values:true func + (fun v buf -> + let v = Indexed.get v in + let code_buf = Buffer.create 16 in + match v with + | Runtime.Local { type_f; locals; body; id = _ } -> + locals_encode type_f locals code_buf; + expr_encode body code_buf; + write_U32_of_int (Buffer.length code_buf) buf; + Buffer.add_buffer buf code_buf; + Some () + | _ -> None ) + buf + +let encode (modul : Binary.modul) = + let buf = Buffer.create 256 in + add_string magic buf; + add_string version buf; + section_encode '\x01' types_encode modul.func buf; + section_encode '\x02' imports_encode + (modul.func, modul.table, modul.mem, modul.global) + buf; + section_encode '\x03' functions_encode modul.func buf; + section_encode '\x04' tables_encode modul.table buf; + section_encode '\x05' memories_encode modul.mem buf; + section_encode '\x06' globals_encode modul.global buf; + section_encode '\x07' exports_encode modul.exports buf; + section_encode '\x08' start_encode modul.start buf; + (* section_encode '\x09' element_encode modul.elem buf; *) + (* section_encode '\x0C' datacount_encode modul.data buf; *) + section_encode '\x0A' codes_encode modul.func buf; + (* section_encode '\x0B' data_encode modul.data buf; *) + Buffer.contents buf + +let write_file filename content = + let filename = Fpath.filename filename in + let filename = filename ^ ".wasm" in + let oc = Out_channel.open_bin filename in + Out_channel.output_string oc content; + Out_channel.close oc + +let convert (filename : Fpath.t) ~unsafe ~optimize m = + let+ m = Compile.Text.until_optimize ~unsafe ~optimize m in + let content = encode m in + let () = write_file filename content in + () diff --git a/src/cmd/cmd_wat2wasm.ml b/src/cmd/cmd_wat2wasm.ml new file mode 100644 index 000000000..4192eaeb4 --- /dev/null +++ b/src/cmd/cmd_wat2wasm.ml @@ -0,0 +1,18 @@ +(* SPDX-License-Identifier: AGPL-3.0-or-later *) +(* Copyright © 2021-2024 OCamlPro *) +(* Written by the Owi programmers *) + +open Syntax + +let cmd_one ~unsafe ~optimize file = + let ext = Fpath.get_ext file in + match ext with + | ".wat" -> + let* modul = Parse.Text.Module.from_file file in + Binary_encoder.convert file ~unsafe ~optimize modul + | ext -> Error (`Unsupported_file_extension ext) + +let cmd profiling debug unsafe optimize files = + if profiling then Log.profiling_on := true; + if debug then Log.debug_on := true; + list_iter (cmd_one ~unsafe ~optimize) files diff --git a/src/cmd/cmd_wat2wasm.mli b/src/cmd/cmd_wat2wasm.mli new file mode 100644 index 000000000..5cef2e0c3 --- /dev/null +++ b/src/cmd/cmd_wat2wasm.mli @@ -0,0 +1,5 @@ +(* SPDX-License-Identifier: AGPL-3.0-or-later *) +(* Copyright © 2021-2024 OCamlPro *) +(* Written by the Owi programmers *) + +val cmd : bool -> bool -> bool -> bool -> Fpath.t list -> unit Result.t diff --git a/src/dune b/src/dune index e2b21b947..91873c636 100644 --- a/src/dune +++ b/src/dune @@ -8,6 +8,7 @@ binary_parser binary_to_text binary_types + binary_encoder c_instrumentor c_share c_share_site @@ -22,6 +23,7 @@ cmd_conc cmd_validate cmd_wasm2wat + cmd_wat2wasm compile concrete concrete_choice diff --git a/test/help/help.t b/test/help/help.t index 3cb28aae3..40af8eebf 100644 --- a/test/help/help.t +++ b/test/help/help.t @@ -32,9 +32,13 @@ no subcommand should print help Validate a module wasm2wat [OPTION]… [ARG]… - Generate a text format file (.wat) file from a binary format file + Generate a text format file (.wat) from a binary format file (.wasm) + wat2wasm [OPTION]… [ARG]… + Generate a binary format file (.wasm) from a text format file + (.wat) + COMMON OPTIONS --help[=FMT] (default=auto) Show this help in format FMT. The value FMT must be one of auto,