@@ -371,6 +371,238 @@ let make_match_str re positions ~len ~groups ~partial s ~pos =
371
371
else final_boundary_check re positions ~last ~slen s state_info ~groups
372
372
;;
373
373
374
+ module Stream = struct
375
+ type nonrec t =
376
+ { state : State .t
377
+ ; re : re
378
+ }
379
+
380
+ type 'a feed =
381
+ | Ok of 'a
382
+ | No_match
383
+
384
+ let create re =
385
+ let category = Category. (search_boundary ++ inexistant) in
386
+ let state = find_initial_state re category in
387
+ { state; re }
388
+ ;;
389
+
390
+ let feed t s ~pos ~len =
391
+ let last = pos + len in
392
+ let state = loop_no_mark t.re ~colors: t.re.colors s ~last ~pos t.state t.state in
393
+ let info = State. get_info state in
394
+ if Idx. is_break info.idx
395
+ &&
396
+ match Automata.State. status info.desc with
397
+ | Failed -> true
398
+ | Match _ | Running -> false
399
+ then No_match
400
+ else Ok { t with state }
401
+ ;;
402
+
403
+ let finalize t s ~pos ~len =
404
+ let last = pos + len in
405
+ let state = scan_str t.re Positions. empty s t.state ~last ~pos ~groups: false in
406
+ let info = State. get_info state in
407
+ match
408
+ let _idx, res =
409
+ let final_cat = Category. (search_boundary ++ inexistant) in
410
+ final t.re Positions. empty info final_cat
411
+ in
412
+ res
413
+ with
414
+ | Running | Failed -> false
415
+ | Match _ -> true
416
+ ;;
417
+
418
+ module Group = struct
419
+ type slice =
420
+ { s : string
421
+ ; pos : int
422
+ ; len : int
423
+ }
424
+
425
+ module Slices = struct
426
+ type t = slice list
427
+
428
+ let get_substring slices ~start ~stop =
429
+ if stop = start
430
+ then " "
431
+ else (
432
+ let slices =
433
+ let rec drop slices remains =
434
+ if remains = 0
435
+ then slices
436
+ else (
437
+ match slices with
438
+ | [] -> assert false
439
+ | ({ s = _ ; pos; len } as slice ) :: xs ->
440
+ let remains' = remains - len in
441
+ if remains' > = 0
442
+ then drop xs remains'
443
+ else (
444
+ let pos = pos + remains in
445
+ let len = len - remains in
446
+ { slice with pos; len } :: xs))
447
+ in
448
+ drop slices start
449
+ in
450
+ let buf = Buffer. create (stop - start) in
451
+ let rec take slices remains =
452
+ if remains > 0
453
+ then (
454
+ match slices with
455
+ | [] -> assert false
456
+ | { s; pos; len } :: xs ->
457
+ let remains' = remains - len in
458
+ if remains' > 0
459
+ then (
460
+ Buffer. add_substring buf s pos len;
461
+ take xs remains')
462
+ else Buffer. add_substring buf s pos remains)
463
+ in
464
+ take slices (stop - start);
465
+ Buffer. contents buf)
466
+ ;;
467
+
468
+ let rec drop t remains =
469
+ if remains = 0
470
+ then t
471
+ else (
472
+ match t with
473
+ | [] -> []
474
+ | ({ s = _ ; pos; len } as slice ) :: t ->
475
+ if remains > = len
476
+ then drop t (remains - len)
477
+ else (
478
+ let delta = len - remains in
479
+ { slice with pos = pos + delta; len = len - delta } :: t))
480
+ ;;
481
+
482
+ let drop_rev t remains =
483
+ if remains = 0 then t else List. rev (drop (List. rev t) remains)
484
+ ;;
485
+ end
486
+
487
+ type nonrec t =
488
+ { t : t
489
+ ; positions : Positions .t
490
+ ; slices : Slices .t
491
+ ; abs_pos : int
492
+ ; first_match_pos : int
493
+ }
494
+
495
+ let create t =
496
+ { t
497
+ ; positions = Positions. make ~groups: true t.re
498
+ ; slices = []
499
+ ; abs_pos = 0
500
+ ; first_match_pos = 0
501
+ }
502
+ ;;
503
+
504
+ module Match = struct
505
+ type t =
506
+ { pmarks : Pmark.Set .t
507
+ ; slices : slice list
508
+ ; marks : Mark_infos .t
509
+ ; positions : int array
510
+ ; start_pos : int
511
+ }
512
+
513
+ let test_mark t mark = Pmark.Set. mem mark t.pmarks
514
+
515
+ let get t i =
516
+ Mark_infos. offset t.marks i
517
+ |> Option. map (fun (start , stop ) ->
518
+ let start = t.positions.(start) - t.start_pos in
519
+ let stop = t.positions.(stop) - t.start_pos in
520
+ Slices. get_substring t.slices ~start ~stop )
521
+ ;;
522
+
523
+ let make ~start_pos ~pmarks ~slices ~marks ~positions =
524
+ let positions = Positions. all positions in
525
+ { pmarks; slices; positions; marks; start_pos }
526
+ ;;
527
+ end
528
+
529
+ let rec loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st =
530
+ if pos < last
531
+ then (
532
+ let st' = next colors st s pos in
533
+ let idx = (State. get_info st').idx in
534
+ if Idx. is_idx idx
535
+ then (
536
+ Positions. set positions (Idx. idx idx) (abs_pos + pos);
537
+ loop re ~abs_pos ~colors ~positions s ~pos: (pos + 1 ) ~last st' st')
538
+ else if Idx. is_break idx
539
+ then (
540
+ Positions. set positions (Idx. break_idx idx) (abs_pos + pos);
541
+ st')
542
+ else (
543
+ (* Unknown *)
544
+ validate re positions s ~pos st0;
545
+ loop re ~abs_pos ~colors ~positions s ~pos ~last st0 st0))
546
+ else st
547
+ ;;
548
+
549
+ let feed ({ t; positions; slices; abs_pos; first_match_pos = _ } as tt ) s ~pos ~len =
550
+ let last = pos + len in
551
+ let state =
552
+ loop t.re ~abs_pos ~colors: t.re.colors s ~positions ~last ~pos t.state t.state
553
+ in
554
+ let info = State. get_info state in
555
+ if Idx. is_break info.idx
556
+ &&
557
+ match Automata.State. status info.desc with
558
+ | Failed -> true
559
+ | Match _ | Running -> false
560
+ then No_match
561
+ else (
562
+ let t = { t with state } in
563
+ let slices = { s; pos; len } :: slices in
564
+ let first_match_pos = Positions. first positions in
565
+ let slices = Slices. drop_rev slices (first_match_pos - tt.first_match_pos) in
566
+ let abs_pos = abs_pos + len in
567
+ Ok { tt with t; slices; abs_pos; first_match_pos })
568
+ ;;
569
+
570
+ let finalize
571
+ ({ t; positions; slices; abs_pos; first_match_pos = _ } as tt )
572
+ s
573
+ ~pos
574
+ ~len
575
+ : Match. t feed
576
+ =
577
+ let last = pos + len in
578
+ let state =
579
+ loop t.re ~abs_pos ~colors: t.re.colors s ~positions ~last ~pos t.state t.state
580
+ in
581
+ let info = State. get_info state in
582
+ match
583
+ match Automata.State. status info.desc with
584
+ | (Match _ | Failed ) as s -> s
585
+ | Running ->
586
+ let idx, res =
587
+ let final_cat = Category. (search_boundary ++ inexistant) in
588
+ final t.re positions info final_cat
589
+ in
590
+ (match res with
591
+ | Running | Failed -> ()
592
+ | Match _ -> Positions. set positions (Automata.Idx. to_int idx) (abs_pos + last));
593
+ res
594
+ with
595
+ | Running | Failed -> No_match
596
+ | Match (marks , pmarks ) ->
597
+ let first_match_position = Positions. first positions in
598
+ let slices = { s; pos; len } :: slices in
599
+ let slices = Slices. drop_rev slices (first_match_position - tt.first_match_pos) in
600
+ let slices = List. rev slices in
601
+ Ok (Match. make ~start_pos: first_match_position ~pmarks ~marks ~slices ~positions )
602
+ ;;
603
+ end
604
+ end
605
+
374
606
let match_str_no_bounds ~groups ~partial re s ~pos ~len =
375
607
let positions = Positions. make ~groups re in
376
608
match make_match_str re positions ~len ~groups ~partial s ~pos with
0 commit comments