Skip to content

Commit

Permalink
feature: add streaming interface
Browse files Browse the repository at this point in the history
This allows us to partially match a string and then resume a match from where
we've ended.
  • Loading branch information
rgrinberg committed Oct 13, 2024
1 parent 0c89c14 commit f96f418
Show file tree
Hide file tree
Showing 6 changed files with 481 additions and 1 deletion.
232 changes: 232 additions & 0 deletions lib/compile.ml
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,238 @@ let make_match_str re positions ~len ~groups ~partial s ~pos =
else final_boundary_check re positions ~last ~slen s state_info ~groups
;;

module Stream = struct
type nonrec t =
{ state : State.t
; re : re
}

type 'a feed =
| Ok of 'a
| No_match

let create re =
let category = Category.(search_boundary ++ inexistant) in
let state = find_initial_state re category in
{ state; re }
;;

let feed t s ~pos ~len =
let last = pos + len in
let state = loop_no_mark t.re ~colors:t.re.colors s ~last ~pos t.state t.state in
let info = State.get_info state in
if Idx.is_break info.idx
&&
match Automata.State.status info.desc with
| Failed -> true
| Match _ | Running -> false
then No_match
else Ok { t with state }
;;

let finalize t s ~pos ~len =
let last = pos + len in
let state = scan_str t.re Positions.empty s t.state ~last ~pos ~groups:false in
let info = State.get_info state in
match
let _idx, res =
let final_cat = Category.(search_boundary ++ inexistant) in
final t.re Positions.empty info final_cat
in
res
with
| Running | Failed -> false
| Match _ -> true
;;

module Group = struct
type slice =
{ s : string
; pos : int
; len : int
}

module Slices = struct
type t = slice list

let get_substring slices ~start ~stop =
if stop = start
then ""
else (
let slices =
let rec drop slices remains =
if remains = 0
then slices
else (
match slices with
| [] -> assert false
| ({ s = _; pos; len } as slice) :: xs ->
let remains' = remains - len in
if remains' >= 0
then drop xs remains'
else (
let pos = pos + remains in
let len = len - remains in
{ slice with pos; len } :: xs))
in
drop slices start
in
let buf = Buffer.create (stop - start) in
let rec take slices remains =
if remains > 0
then (
match slices with
| [] -> assert false
| { s; pos; len } :: xs ->
let remains' = remains - len in
if remains' > 0
then (
Buffer.add_substring buf s pos len;
take xs remains')
else Buffer.add_substring buf s pos remains)
in
take slices (stop - start);
Buffer.contents buf)
;;

let rec drop t remains =
if remains = 0
then t
else (
match t with
| [] -> []
| ({ s = _; pos; len } as slice) :: t ->
if remains >= len
then drop t (remains - len)
else (
let delta = len - remains in
{ slice with pos = pos + delta; len = len - delta } :: t))
;;

let drop_rev t remains =
if remains = 0 then t else List.rev (drop (List.rev t) remains)
;;
end

type nonrec t =
{ t : t
; positions : Positions.t
; slices : Slices.t
; abs_pos : int
; first_match_pos : int
}

let create t =
{ t
; positions = Positions.make ~groups:true t.re
; slices = []
; abs_pos = 0
; first_match_pos = 0
}
;;

module Match = struct
type t =
{ pmarks : Pmark.Set.t
; slices : slice list
; marks : Mark_infos.t
; positions : int array
; start_pos : int
}

let test_mark t mark = Pmark.Set.mem mark t.pmarks

let get t i =
Mark_infos.offset t.marks i
|> Option.map (fun (start, stop) ->
let start = t.positions.(start) - t.start_pos in
let stop = t.positions.(stop) - t.start_pos in
Slices.get_substring t.slices ~start ~stop)
;;

let make ~start_pos ~pmarks ~slices ~marks ~positions =
let positions = Positions.all positions in
{ pmarks; slices; positions; marks; start_pos }
;;
end

let rec loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st =
if pos < last
then (
let st' = next colors st s pos in
let idx = (State.get_info st').idx in
if Idx.is_idx idx
then (
Positions.set positions (Idx.idx idx) (abs_pos + pos);
loop re ~abs_pos ~colors ~positions s ~pos:(pos + 1) ~last st' st')
else if Idx.is_break idx
then (
Positions.set positions (Idx.break_idx idx) (abs_pos + pos);
st')
else (
(* Unknown *)
validate re positions s ~pos st0;
loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st0))
else st
;;

let feed ({ t; positions; slices; abs_pos; first_match_pos = _ } as tt) s ~pos ~len =
let last = pos + len in
let state =
loop t.re ~abs_pos ~colors:t.re.colors s ~positions ~last ~pos t.state t.state
in
let info = State.get_info state in
if Idx.is_break info.idx
&&
match Automata.State.status info.desc with
| Failed -> true
| Match _ | Running -> false
then No_match
else (
let t = { t with state } in
let slices = { s; pos; len } :: slices in
let first_match_pos = Positions.first positions in
let slices = Slices.drop_rev slices (first_match_pos - tt.first_match_pos) in
let abs_pos = abs_pos + len in
Ok { tt with t; slices; abs_pos; first_match_pos })
;;

let finalize
({ t; positions; slices; abs_pos; first_match_pos = _ } as tt)
s
~pos
~len
: Match.t feed
=
let last = pos + len in
let state =
loop t.re ~abs_pos ~colors:t.re.colors s ~positions ~last ~pos t.state t.state
in
let info = State.get_info state in
match
match Automata.State.status info.desc with
| (Match _ | Failed) as s -> s
| Running ->
let idx, res =
let final_cat = Category.(search_boundary ++ inexistant) in
final t.re positions info final_cat
in
(match res with
| Running | Failed -> ()
| Match _ -> Positions.set positions (Automata.Idx.to_int idx) (abs_pos + last));
res
with
| Running | Failed -> No_match
| Match (marks, pmarks) ->
let first_match_position = Positions.first positions in
let slices = { s; pos; len } :: slices in
let slices = Slices.drop_rev slices (first_match_position - tt.first_match_pos) in
let slices = List.rev slices in
Ok (Match.make ~start_pos:first_match_position ~pmarks ~marks ~slices ~positions)
;;
end
end

let match_str_no_bounds ~groups ~partial re s ~pos ~len =
let positions = Positions.make ~groups re in
match make_match_str re positions ~len ~groups ~partial s ~pos with
Expand Down
28 changes: 28 additions & 0 deletions lib/compile.mli
Original file line number Diff line number Diff line change
@@ -1,5 +1,33 @@
type re

module Stream : sig
type t

type 'a feed =
| Ok of 'a
| No_match

val create : re -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> bool

module Group : sig
type stream := t
type t

module Match : sig
type t

val get : t -> int -> string option
val test_mark : t -> Pmark.t -> bool
end

val create : stream -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> Match.t feed
end
end

type match_info =
| Match of Group.t
| Failed
Expand Down
1 change: 1 addition & 0 deletions lib/core.ml
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,4 @@ include struct
end

module Seq = Search
module Stream = Compile.Stream
30 changes: 29 additions & 1 deletion lib/core.mli
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ val exec_partial_detailed
(** Marks *)
module Mark : sig
(** Mark id *)
type t
type t = Pmark.t

(** Tell if a mark was matched. *)
val test : Group.t -> t -> bool
Expand All @@ -229,6 +229,34 @@ module Mark : sig
val compare : t -> t -> int
end

module Stream : sig
type t

type 'a feed =
| Ok of 'a
| No_match

val create : re -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> bool

module Group : sig
type stream := t
type t

module Match : sig
type t

val get : t -> int -> string option
val test_mark : t -> Pmark.t -> bool
end

val create : stream -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> Match.t feed
end
end

(** {2 High Level Operations} *)

type split_token =
Expand Down
1 change: 1 addition & 0 deletions lib_test/expect/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(library
(name re_tests)
(modules import test_stream)
(libraries
re_private
;; This is because of the (implicit_transitive_deps false)
Expand Down
Loading

0 comments on commit f96f418

Please sign in to comment.