Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: add streaming interface #456

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 245 additions & 0 deletions lib/compile.ml
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,251 @@ let make_match_str re positions ~len ~groups ~partial s ~pos =
else final_boundary_check re positions ~last ~slen s state_info ~groups
;;

module Stream = struct
type nonrec t =
{ state : State.t
; re : re
}

type 'a feed =
| Ok of 'a
| No_match

let create re =
let category = Category.(search_boundary ++ inexistant) in
let state = find_initial_state re category in
{ state; re }
;;

let feed t s ~pos ~len =
(* TODO bound checks? *)
let last = pos + len in
let state = loop_no_mark t.re ~colors:t.re.colors s ~last ~pos t.state t.state in
let info = State.get_info state in
if Idx.is_break info.idx
&&
match Automata.State.status info.desc with
| Failed -> true
| Match _ | Running -> false
then No_match
else Ok { t with state }
;;

let finalize t s ~pos ~len =
(* TODO bound checks? *)
let last = pos + len in
let state = scan_str t.re Positions.empty s t.state ~last ~pos ~groups:false in
let info = State.get_info state in
match
let _idx, res =
let final_cat = Category.(search_boundary ++ inexistant) in
final t.re Positions.empty info final_cat
in
res
with
| Running | Failed -> false
| Match _ -> true
;;

module Group = struct
type slice =
{ s : string
; pos : int
; len : int
}

module Slices = struct
type t = slice list

let get_substring slices ~start ~stop =
if stop = start
then ""
else (
let slices =
let rec drop slices remains =
if remains = 0
then slices
else (
match slices with
| [] -> assert false
| ({ s = _; pos; len } as slice) :: xs ->
let remains' = remains - len in
if remains' >= 0
then drop xs remains'
else (
let pos = pos + remains in
let len = len - remains in
{ slice with pos; len } :: xs))
in
drop slices start
in
let buf = Buffer.create (stop - start) in
let rec take slices remains =
if remains > 0
then (
match slices with
| [] -> assert false
| { s; pos; len } :: xs ->
let remains' = remains - len in
if remains' > 0
then (
Buffer.add_substring buf s pos len;
take xs remains')
else Buffer.add_substring buf s pos remains)
in
take slices (stop - start);
Buffer.contents buf)
;;

let rec drop t remains =
if remains = 0
then t
else (
match t with
| [] -> []
| ({ s = _; pos; len } as slice) :: t ->
if remains >= len
then drop t (remains - len)
else (
let delta = len - remains in
{ slice with pos = pos + delta; len = len - delta } :: t))
;;

let drop_rev t remains =
(* TODO Use a proper functional queue *)
if remains = 0 then t else List.rev (drop (List.rev t) remains)
;;
end

type nonrec t =
{ t : t
; positions : Positions.t
; slices : Slices.t
; abs_pos : int
; first_match_pos : int
}

let no_match_starts_before t = t.first_match_pos

let create t =
{ t
; positions = Positions.make ~groups:true t.re
; slices = []
; abs_pos = 0
; first_match_pos = 0
}
;;

module Match = struct
type t =
{ pmarks : Pmark.Set.t
; slices : slice list
; marks : Mark_infos.t
; positions : int array
; start_pos : int
}

let test_mark t mark = Pmark.Set.mem mark t.pmarks

let get t i =
Mark_infos.offset t.marks i
|> Option.map (fun (start, stop) ->
let start = t.positions.(start) - t.start_pos in
let stop = t.positions.(stop) - t.start_pos in
Slices.get_substring t.slices ~start ~stop)
;;

let make ~start_pos ~pmarks ~slices ~marks ~positions =
let positions = Positions.all positions in
{ pmarks; slices; positions; marks; start_pos }
;;
end

let rec loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st =
if pos < last
then (
let st' = next colors st s pos in
let idx = (State.get_info st').idx in
if Idx.is_idx idx
then (
Positions.set positions (Idx.idx idx) (abs_pos + pos);
loop re ~abs_pos ~colors ~positions s ~pos:(pos + 1) ~last st' st')
else if Idx.is_break idx
then (
Positions.set positions (Idx.break_idx idx) (abs_pos + pos);
st')
else (
(* Unknown *)
validate re positions s ~pos st0;
loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st0))
else st
;;

let feed ({ t; positions; slices; abs_pos; first_match_pos = _ } as tt) s ~pos ~len =
let state =
(* TODO bound checks? *)
let last = pos + len in
loop t.re ~abs_pos ~colors:t.re.colors s ~positions ~last ~pos t.state t.state
in
let info = State.get_info state in
if Idx.is_break info.idx
&&
match Automata.State.status info.desc with
| Failed -> true
| Match _ | Running -> false
then No_match
else (
let t = { t with state } in
let slices = { s; pos; len } :: slices in
let first_match_pos = Positions.first positions in
let slices = Slices.drop_rev slices (first_match_pos - tt.first_match_pos) in
let abs_pos = abs_pos + len in
Ok { tt with t; slices; abs_pos; first_match_pos })
;;

let finalize
({ t; positions; slices; abs_pos; first_match_pos = _ } as tt)
s
~pos
~len
: Match.t feed
=
(* TODO bound checks? *)
let last = pos + len in
let info =
let state =
loop t.re ~abs_pos ~colors:t.re.colors s ~positions ~last ~pos t.state t.state
in
State.get_info state
in
match
match Automata.State.status info.desc with
| (Match _ | Failed) as s -> s
| Running ->
let idx, res =
let final_cat = Category.(search_boundary ++ inexistant) in
final t.re positions info final_cat
in
(match res with
| Running | Failed -> ()
| Match _ -> Positions.set positions (Automata.Idx.to_int idx) (abs_pos + last));
res
with
| Running | Failed -> No_match
| Match (marks, pmarks) ->
let first_match_position = Positions.first positions in
let slices =
let slices =
let slices = { s; pos; len } :: slices in
Slices.drop_rev slices (first_match_position - tt.first_match_pos)
in
List.rev slices
in
Ok (Match.make ~start_pos:first_match_position ~pmarks ~marks ~slices ~positions)
;;
end
end

let match_str_no_bounds ~groups ~partial re s ~pos ~len =
let positions = Positions.make ~groups re in
match make_match_str re positions ~len ~groups ~partial s ~pos with
Expand Down
29 changes: 29 additions & 0 deletions lib/compile.mli
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
type re

module Stream : sig
type t

type 'a feed =
| Ok of 'a
| No_match

val create : re -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> bool

module Group : sig
type stream := t
type t

module Match : sig
type t

val get : t -> int -> string option
val test_mark : t -> Pmark.t -> bool
end

val create : stream -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> Match.t feed
val no_match_starts_before : t -> int
end
end

type match_info =
| Match of Group.t
| Failed
Expand Down
1 change: 1 addition & 0 deletions lib/core.ml
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,4 @@ include struct
end

module Seq = Search
module Stream = Compile.Stream
30 changes: 29 additions & 1 deletion lib/core.mli
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ val exec_partial_detailed
(** Marks *)
module Mark : sig
(** Mark id *)
type t
type t = Pmark.t

(** Tell if a mark was matched. *)
val test : Group.t -> t -> bool
Expand All @@ -229,6 +229,34 @@ module Mark : sig
val compare : t -> t -> int
end

module Stream : sig
type t

type 'a feed =
| Ok of 'a
| No_match

val create : re -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> bool

module Group : sig
type stream := t
type t

module Match : sig
type t

val get : t -> int -> string option
val test_mark : t -> Pmark.t -> bool
end

val create : stream -> t
val feed : t -> string -> pos:int -> len:int -> t feed
val finalize : t -> string -> pos:int -> len:int -> Match.t feed
end
end

(** {2 High Level Operations} *)

type split_token =
Expand Down
1 change: 1 addition & 0 deletions lib_test/expect/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
(library
(name re_tests)
(modules import test_stream)
(libraries
re_private
;; This is because of the (implicit_transitive_deps false)
Expand Down
Loading
Loading