From df0843bcb6eed2de204c17922ab5352cf25cd38c Mon Sep 17 00:00:00 2001 From: Antonio Nuno Monteiro Date: Tue, 27 Aug 2024 15:59:02 -0700 Subject: [PATCH] vendor multipart form fork (#208) * vendor multipart form fork * wip * wip * fix nix buidl --- .github/workflows/opam-build.yml | 2 + .gitmodules | 3 + dune | 2 + dune-project | 4 +- lib/form.ml | 5 +- multipart/dune | 2 +- multipart/multipart.ml | 137 +------------------------------ multipart/multipart.mli | 2 + multipart/pp.ml | 117 ++++++++++++++++++++++++++ multipart_test/multipart_test.ml | 97 ++++++++++++---------- nix/ci/test.nix | 40 +++++---- nix/default.nix | 5 +- piaf.opam | 5 +- piaf.opam.template | 1 - vendor/dune | 21 +++++ vendor/multipart_form | 1 + 16 files changed, 239 insertions(+), 205 deletions(-) create mode 100644 .gitmodules create mode 100644 multipart/pp.ml create mode 100644 vendor/dune create mode 160000 vendor/multipart_form diff --git a/.github/workflows/opam-build.yml b/.github/workflows/opam-build.yml index 44dd579e..b8e7aaad 100644 --- a/.github/workflows/opam-build.yml +++ b/.github/workflows/opam-build.yml @@ -29,6 +29,8 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + submodules: recursive - name: Use OCaml ${{ matrix.ocaml-compiler }} uses: ocaml/setup-ocaml@v2 diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..47de975d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vendor/multipart_form"] + path = vendor/multipart_form + url = git@github.com:anmonteiro/multipart_form.git diff --git a/dune b/dune index cf79f6f3..8fc40ee1 100644 --- a/dune +++ b/dune @@ -29,3 +29,5 @@ --write-to %{target}) (bash "echo \"$(cat %{target} | jq -r)\" > %{target}")))) + +(vendored_dirs vendor) diff --git a/dune-project b/dune-project index 4ea0ce06..b9f44a17 100644 --- a/dune-project +++ b/dune-project @@ -37,7 +37,9 @@ gluten-eio h2-eio httpun-ws - (multipart_form :dev) + pecu + prettym + unstrctrd (eio_main (>= "1.0")) (dune-site :with-test) diff --git a/lib/form.ml b/lib/form.ml index 5e22ef48..61fbe6f4 100644 --- a/lib/form.ml +++ b/lib/form.ml @@ -32,6 +32,8 @@ open Import module Multipart = struct + module Multipart_form = Piaf_multipart_form.Multipart_form + type t = { name : string ; filename : string option @@ -44,8 +46,7 @@ module Multipart = struct Buffer.add_string ctbuf ct; Buffer.add_char ctbuf '\r'; Buffer.add_char ctbuf '\n'; - let ct = Multipart_form.Content_type.of_string (Buffer.contents ctbuf) in - match ct with + match Multipart_form.Content_type.of_string (Buffer.contents ctbuf) with | Ok { Multipart_form.Content_type.ty = `Multipart; _ } -> true | Ok _ | Error _ -> false diff --git a/multipart/dune b/multipart/dune index 3251cbf8..bc062725 100644 --- a/multipart/dune +++ b/multipart/dune @@ -1,4 +1,4 @@ (library (name multipart) (public_name piaf.multipart) - (libraries faraday ke multipart_form eio piaf.stream)) + (libraries faraday ke piaf.multipart_form eio piaf.stream)) diff --git a/multipart/multipart.ml b/multipart/multipart.ml index 0e3c4927..995f5ba4 100644 --- a/multipart/multipart.ml +++ b/multipart/multipart.ml @@ -29,138 +29,8 @@ * POSSIBILITY OF SUCH DAMAGE. *---------------------------------------------------------------------------*) -module List = struct - include List - - let concat_map f l = - let rec aux f acc = function - | [] -> rev acc - | x :: l -> - let xs = f x in - aux f (rev_append xs acc) l - in - aux f [] l -end - -module Pp = struct - let pp_extension formatter t = - let payload = match t with `Ietf_token x | `X_token x -> x in - Format.fprintf formatter "%s" payload - - let pp_ty = Multipart_form.Content_type.Type.pp - - let pp_subty formatter t = - let payload = - match t with `Ietf_token x | `Iana_token x | `X_token x -> x - in - Format.fprintf formatter "%s" payload - - let pp_content_type formatter { Multipart_form.Content_type.ty; subty; _ } = - Format.fprintf formatter "%a/%a" pp_ty ty pp_subty subty - - let pp_value formatter t = - let ty, payload = - match t with - | Multipart_form.Content_type.Parameters.String x -> "String", x - | Token x -> "Token", x - in - Format.fprintf formatter "%s: %s" ty payload - - let pp_disposition_type formatter t = - let ty = - match t with - | `Inline -> "inline" - | `Attachment -> "attachment" - | (`Ietf_token _ | `X_token _) as ext -> - Format.asprintf "%a" pp_extension ext - in - Format.fprintf formatter "%s" ty - - let pp_unstructured formatter t = - let pp_one formatter (t : Unstrctrd.elt) = - let s = - match t with - | `Uchar u -> Format.asprintf "Uchar: %c" (Uchar.to_char u) - | `CR -> "CR" - | `LF -> "LF" - | `WSP s -> Format.asprintf "WSP: %s" (s :> string) - | `FWS wsp -> Format.asprintf "FWS: %s" (wsp :> string) - | `d0 -> "d0" - | `OBS_NO_WS_CTL obs -> Format.asprintf "OB_NO_WS_CTL: %c" (obs :> char) - | `Invalid_char invalid_char -> - Format.asprintf "Invalid_char: %c" (invalid_char :> char) - in - Format.fprintf formatter "%s" s - in - Format.pp_print_list - ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") - pp_one - formatter - t - - let pp_field formatter t = - let open Multipart_form.Field in - match t with - | Field (_, Content_type, { ty; subty; parameters }) -> - Format.fprintf - formatter - "Content-Type { ty: %a; subty: %a; params: [ %a ] }" - pp_ty - ty - pp_subty - subty - (Format.pp_print_list - ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") - (fun fmt (name, value) -> - Format.fprintf fmt "%s, %a" name pp_value value)) - parameters - | Field (_, Content_encoding, enc) -> - Format.fprintf - formatter - "Encoding: %a" - Multipart_form.Content_encoding.pp - enc - | Field (_, Content_disposition, dispo) -> - Format.fprintf - formatter - "Content-Disposition: %a" - Multipart_form.Content_disposition.pp - dispo - | Field (field_name, Field, unstructured) -> - Format.fprintf - formatter - "Field (%a, %a)" - Multipart_form.Field_name.pp - field_name - pp_unstructured - (unstructured :> Unstrctrd.elt list) - - (* let rec pp_contents formatter t = *) - (* let pp_atom formatter { Multipart_form.header; body } = *) - (* Format.fprintf *) - (* formatter *) - (* "{ fields: [ %a ]; contents: %a }" *) - (* (Format.pp_print_list *) - (* ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") *) - (* pp_field) *) - (* header *) - (* (pp_option ~pp:pp_contents) *) - (* body *) - (* in *) - (* match t with *) - (* | Multipart_form.Leaf x -> *) - (* Format.fprintf formatter "{ Contents: %a }" pp_atom x *) - (* | Multipart lst -> *) - (* Format.fprintf *) - (* formatter *) - (* "{ Multipart: [ %a ] }" *) - (* (Format.pp_print_list *) - (* ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") *) - (* pp_atom) *) - (* lst *) -end - -(* type t = string option Multipart_form.t *) +module Multipart_form = Piaf_multipart_form.Multipart_form +module Pp = Pp let content_type header = let open Multipart_form in @@ -232,7 +102,6 @@ let rec result_headers t = in match t with | Multipart { header; body } -> - Format.eprintf "nameof: %B@." (Option.is_some (name_of_header header)); let headers = atom_to_headers [] header in (match body with | [] -> headers @@ -323,7 +192,7 @@ let extract_parts ~emit ~finish ~max_chunk_size ~content_type stream = then Error "POST buffer has grown too much" else if not (Qe.is_empty ke) then - (* XXX(anmonteiro): It's OK to only read the first slice of the + (* NOTE(anmonteiro): It's OK to only read the first slice of the * queue. Ke's implementation returns at most 2 buffers from `peek`: * the second one is returned if the buffer has wrapped around its * capacity. *) diff --git a/multipart/multipart.mli b/multipart/multipart.mli index 30fd742e..cca7e113 100644 --- a/multipart/multipart.mli +++ b/multipart/multipart.mli @@ -29,6 +29,8 @@ * POSSIBILITY OF SUCH DAMAGE. *---------------------------------------------------------------------------*) +module Multipart_form = Piaf_multipart_form.Multipart_form + type t module Pp : sig diff --git a/multipart/pp.ml b/multipart/pp.ml new file mode 100644 index 00000000..2cfc4f3d --- /dev/null +++ b/multipart/pp.ml @@ -0,0 +1,117 @@ +module Multipart_form = Piaf_multipart_form.Multipart_form + +let pp_extension formatter t = + let payload = match t with `Ietf_token x | `X_token x -> x in + Format.fprintf formatter "%s" payload + +let pp_ty = Multipart_form.Content_type.Type.pp + +let pp_subty formatter t = + let payload = + match t with `Ietf_token x | `Iana_token x | `X_token x -> x + in + Format.fprintf formatter "%s" payload + +let pp_content_type formatter { Multipart_form.Content_type.ty; subty; _ } = + Format.fprintf formatter "%a/%a" pp_ty ty pp_subty subty + +let pp_value formatter t = + let ty, payload = + match t with + | Multipart_form.Content_type.Parameters.String x -> "String", x + | Token x -> "Token", x + in + Format.fprintf formatter "%s: %s" ty payload + +let pp_disposition_type formatter t = + let ty = + match t with + | `Inline -> "inline" + | `Attachment -> "attachment" + | (`Ietf_token _ | `X_token _) as ext -> + Format.asprintf "%a" pp_extension ext + in + Format.fprintf formatter "%s" ty + +let pp_unstructured formatter t = + let pp_one formatter (t : Unstrctrd.elt) = + let s = + match t with + | `Uchar u -> Format.asprintf "Uchar: %c" (Uchar.to_char u) + | `CR -> "CR" + | `LF -> "LF" + | `WSP s -> Format.asprintf "WSP: %s" (s :> string) + | `FWS wsp -> Format.asprintf "FWS: %s" (wsp :> string) + | `d0 -> "d0" + | `OBS_NO_WS_CTL obs -> Format.asprintf "OB_NO_WS_CTL: %c" (obs :> char) + | `Invalid_char invalid_char -> + Format.asprintf "Invalid_char: %c" (invalid_char :> char) + in + Format.fprintf formatter "%s" s + in + Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") + pp_one + formatter + t + +let pp_field formatter t = + let open Multipart_form.Field in + match t with + | Field (_, Content_type, { ty; subty; parameters }) -> + Format.fprintf + formatter + "Content-Type { ty: %a; subty: %a; params: [ %a ] }" + pp_ty + ty + pp_subty + subty + (Format.pp_print_list + ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") + (fun fmt (name, value) -> + Format.fprintf fmt "%s, %a" name pp_value value)) + parameters + | Field (_, Content_encoding, enc) -> + Format.fprintf + formatter + "Encoding: %a" + Multipart_form.Content_encoding.pp + enc + | Field (_, Content_disposition, dispo) -> + Format.fprintf + formatter + "Content-Disposition: %a" + Multipart_form.Content_disposition.pp + dispo + | Field (field_name, Field, unstructured) -> + Format.fprintf + formatter + "Field (%a, %a)" + Multipart_form.Field_name.pp + field_name + pp_unstructured + (unstructured :> Unstrctrd.elt list) + +(* let rec pp_contents formatter t = *) +(* let pp_atom formatter { Multipart_form.header; body } = *) +(* Format.fprintf *) +(* formatter *) +(* "{ fields: [ %a ]; contents: %a }" *) +(* (Format.pp_print_list *) +(* ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") *) +(* pp_field) *) +(* header *) +(* (pp_option ~pp:pp_contents) *) +(* body *) +(* in *) +(* match t with *) +(* | Multipart_form.Leaf x -> *) +(* Format.fprintf formatter "{ Contents: %a }" pp_atom x *) +(* | Multipart lst -> *) +(* Format.fprintf *) +(* formatter *) +(* "{ Multipart: [ %a ] }" *) +(* (Format.pp_print_list *) +(* ~pp_sep:(fun fmt () -> Format.fprintf fmt "; ") *) +(* pp_atom) *) +(* lst *) diff --git a/multipart_test/multipart_test.ml b/multipart_test/multipart_test.ml index 4b4ad937..0880161f 100644 --- a/multipart_test/multipart_test.ml +++ b/multipart_test/multipart_test.ml @@ -1,5 +1,7 @@ open Eio.Std +module Multipart_form = Piaf_multipart_form.Multipart_form + let content_type = "multipart/form-data; boundary=----WebKitFormBoundaryVuTaDGWRcduyfmAv" @@ -12,16 +14,15 @@ Content-Type: image/png|}^"\r\n\r\n"^ |} let multipart_request_body_chunks payload = - let continue = ref true in let cur_off = ref 0 in let ret = ref [] in let chunk_size = 32 in - while !continue do - let this_len = min chunk_size (String.length payload - !cur_off) in + let payload_len = String.length payload in + while !cur_off < payload_len do + let this_len = min chunk_size (payload_len - !cur_off) in let subs = String.sub payload !cur_off this_len in ret := subs :: !ret; cur_off := !cur_off + this_len; - if !cur_off = String.length payload then continue := false done; List.rev !ret @@ -53,38 +54,37 @@ let test_simple_boundary ~sw _env () = ~emit stream) in - let name, stream = Promise.await waiter in - Alcotest.(check (option string)) - "filename extracted" - (Some "picture.png") - name; - Alcotest.(check bool) - "working through chunks in parallel (stream arrives before the promise \ - resolves)" - false - ((Promise.is_resolved multipart_result)); - let chunks = Piaf_stream.to_list stream in - Alcotest.(check int) - "Correct number of chunks emitted" - (payload_size / max_chunk_size) - (List.length chunks); match Promise.await multipart_result with - | Ok (Ok t) -> - let fields = Multipart.result_fields t in - Alcotest.(check int) "one field" 1 (List.length fields); - let name, multipart_fields = List.hd fields in - Alcotest.(check string) "field name" "picture.png" name; - Alcotest.(check string) - "parsed content-type and disposition correctly" - {|Content-Disposition[*]: { type= ; filename= picture.png; + | Error exn -> raise exn + | Ok (Error (`Msg error)) -> Alcotest.fail error + | Ok (Ok _multipart) -> + let name, stream = Promise.await waiter in + Alcotest.(check (option string)) + "filename extracted" + (Some "picture.png") + name; + let chunks = Piaf_stream.to_list stream in + Alcotest.(check int) + "Correct number of chunks emitted" + (payload_size / max_chunk_size) + (List.length chunks); + match Promise.await multipart_result with + | Ok (Ok t) -> + let fields = Multipart.result_fields t in + Alcotest.(check int) "one field" 1 (List.length fields); + let name, multipart_fields = List.hd fields in + Alcotest.(check string) "field name" "picture.png" name; + Alcotest.(check string) + "parsed content-type and disposition correctly" + {|Content-Disposition[*]: { type= ; filename= picture.png; creation= ; modification= ; read= ; size= ; parameters= (parameters (name, "picture.png")); } Content-Type[*]: image/iana:png |} - (Format.asprintf "%a" Multipart_form.Header.pp multipart_fields) - | Ok (Error (`Msg msg)) -> - Alcotest.fail msg - | Error (exn) -> - Alcotest.fail (Printexc.to_string exn) + (Format.asprintf "%a" Multipart_form.Header.pp multipart_fields) + | Ok (Error (`Msg msg)) -> + Alcotest.fail msg + | Error (exn) -> + Alcotest.fail (Printexc.to_string exn) let test_unaligned_boundary ~sw _env () = let payload_size = 0x1100 in @@ -106,21 +106,28 @@ let test_unaligned_boundary ~sw _env () = push None; let waiter, wakener = Promise.create () in let emit name stream = Promise.resolve wakener (name, stream) in - let _multipart_result = + let multipart_result = Fiber.fork_promise ~sw (fun () -> - Multipart.parse_multipart_form ~content_type ~max_chunk_size ~emit stream - ) + Multipart.parse_multipart_form + ~content_type + ~max_chunk_size + ~emit + stream) in - let name, stream = Promise.await waiter in - Alcotest.(check (option string)) - "filename extracted" - (Some "picture.png") - name; - let chunks = Piaf_stream.to_list stream in - Alcotest.(check int) - "Correct number of chunks emitted" - ((payload_size / max_chunk_size) + 1) - (List.length chunks) + match Promise.await multipart_result with + | Error exn -> raise exn + | Ok (Error (`Msg error)) -> Alcotest.fail error + | Ok (Ok _multipart) -> + let name, stream = Promise.await waiter in + Alcotest.(check (option string)) + "filename extracted" + (Some "picture.png") + name; + let chunks = Piaf_stream.to_list stream in + Alcotest.(check int) + "Correct number of chunks emitted" + ((payload_size / max_chunk_size) + 1) + (List.length chunks) let test_no_boundary ~sw:_ _env () = let content_type_no_boundary = "text/plain" in diff --git a/nix/ci/test.nix b/nix/ci/test.nix index cf5f4925..bd445e8b 100644 --- a/nix/ci/test.nix +++ b/nix/ci/test.nix @@ -2,25 +2,28 @@ let lock = builtins.fromJSON (builtins.readFile ./../../flake.lock); - src = fetchGit { - url = with lock.nodes.nixpkgs.locked;"https://github.com/${owner}/${repo}"; - inherit (lock.nodes.nixpkgs.locked) rev; - allRefs = true; - }; - pkgs = import src { - extraOverlays = [ - (self: super: { - ocamlPackages = super.ocaml-ng."ocamlPackages_${ocamlVersion}".overrideScope - (oself: osuper: { - eio_main = osuper.eio_main.overrideAttrs (_: { - # Use eio_posix until the kernel version for GH Actions gets rolled - # forward. There seems to be a io_uring bug - propagatedBuildInputs = [ oself.eio_posix ]; + pkgs = + let + src = fetchGit { + url = with lock.nodes.nixpkgs.locked;"https://github.com/${owner}/${repo}"; + inherit (lock.nodes.nixpkgs.locked) rev; + allRefs = true; + }; + in + import src { + extraOverlays = [ + (self: super: { + ocamlPackages = super.ocaml-ng."ocamlPackages_${ocamlVersion}".overrideScope + (oself: osuper: { + eio_main = osuper.eio_main.overrideAttrs (_: { + # Use eio_posix until the kernel version for GH Actions gets rolled + # forward. There seems to be a io_uring bug + propagatedBuildInputs = [ oself.eio_posix ]; + }); }); - }); - }) - ]; - }; + }) + ]; + }; nix-filter-src = fetchGit { url = with lock.nodes.nix-filter.locked; "https://github.com/${owner}/${repo}"; inherit (lock.nodes.nix-filter.locked) rev; @@ -71,6 +74,7 @@ let "lib_test" "multipart" "multipart_test" + "vendor" "sendfile" "stream" "examples" diff --git a/nix/default.nix b/nix/default.nix index 1f737af3..9547d384 100644 --- a/nix/default.nix +++ b/nix/default.nix @@ -18,6 +18,7 @@ rec { "stream" "multipart" "multipart_test" + "vendor" ]; }; @@ -34,7 +35,9 @@ rec { h2-eio httpun-ws - multipart_form + pecu + unstrctrd + prettym dune-site diff --git a/piaf.opam b/piaf.opam index 71b8b838..16e30ab8 100644 --- a/piaf.opam +++ b/piaf.opam @@ -21,7 +21,9 @@ depends: [ "gluten-eio" "h2-eio" "httpun-ws" - "multipart_form" {dev} + "pecu" + "prettym" + "unstrctrd" "eio_main" {>= "1.0"} "dune-site" {with-test} "alcotest" {with-test} @@ -50,5 +52,4 @@ pin-depends: [ [ "h2.dev" "git+https://github.com/anmonteiro/ocaml-h2.git" ] [ "h2-eio.dev" "git+https://github.com/anmonteiro/ocaml-h2.git" ] [ "httpun-ws.dev" "git+https://github.com/anmonteiro/httpun-ws.git" ] - [ "multipart_form.dev" "git+https://github.com/anmonteiro/multipart_form.git" ] ] diff --git a/piaf.opam.template b/piaf.opam.template index 44dde5f6..c1a549e8 100644 --- a/piaf.opam.template +++ b/piaf.opam.template @@ -6,5 +6,4 @@ pin-depends: [ [ "h2.dev" "git+https://github.com/anmonteiro/ocaml-h2.git" ] [ "h2-eio.dev" "git+https://github.com/anmonteiro/ocaml-h2.git" ] [ "httpun-ws.dev" "git+https://github.com/anmonteiro/httpun-ws.git" ] - [ "multipart_form.dev" "git+https://github.com/anmonteiro/multipart_form.git" ] ] diff --git a/vendor/dune b/vendor/dune new file mode 100644 index 00000000..1a7534d3 --- /dev/null +++ b/vendor/dune @@ -0,0 +1,21 @@ +(data_only_dirs *) + +(subdir + multipart_form/lib + (library + (name piaf_multipart_form) + (public_name piaf.multipart_form) + (libraries + logs + bigstringaf + ke + unstrctrd + unstrctrd.parser + base64 + base64.rfc2045 + prettym + pecu + uutf + fmt + angstrom + faraday))) diff --git a/vendor/multipart_form b/vendor/multipart_form new file mode 160000 index 00000000..06a86ad3 --- /dev/null +++ b/vendor/multipart_form @@ -0,0 +1 @@ +Subproject commit 06a86ad395a4f09cdf1ac4fd1f15993521e7ac47