Skip to content

Commit

Permalink
detect corrupt chunks in multiproc
Browse files Browse the repository at this point in the history
  • Loading branch information
c-cube committed Aug 21, 2024
1 parent ff8c7e5 commit 26b8648
Showing 1 changed file with 14 additions and 7 deletions.
21 changes: 14 additions & 7 deletions src/tef-multiproc/trace_tef_multiproc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,20 @@ let aggregate_into ~dir ~final_file () : unit =
let buf = Bytes.create 4096 in

let emit_chunk buf i len =
if !afternewline && !first then
first := false
else if !afternewline then (
output_string oc ",\n";
afternewline := false
);
output oc buf i len
if len = 0 then
()
else if Bytes.get buf i = '{' && Bytes.get buf (i + len - 1) <> '}' then
(* incomplete chunk *)
()
else (
if !afternewline && !first then
first := false
else if !afternewline then (
output_string oc ",\n";
afternewline := false
);
output oc buf i len
)
in

(* dump content of jsonl file into [oc]. Insert "," before every object
Expand Down

0 comments on commit 26b8648

Please sign in to comment.