@@ -189,6 +189,9 @@ class Ex::Private
189
189
190
190
/* * The pattern string as passed by the user */
191
191
std::string pattern;
192
+
193
+ /* * Number of capture groups in the pattern (excluding the whole match) */
194
+ size_t captureCount = 0 ;
192
195
};
193
196
194
197
/* * Compiles a regular expression passed as a string into a stream of tokens that can be used for
@@ -198,6 +201,7 @@ void Ex::Private::compile()
198
201
{
199
202
error = false ;
200
203
data.clear ();
204
+ captureCount = 0 ;
201
205
if (pattern.empty ()) return ;
202
206
const char *start = pattern.c_str ();
203
207
const char *ps = start;
@@ -206,6 +210,10 @@ void Ex::Private::compile()
206
210
int prevTokenPos=-1 ;
207
211
int tokenPos=0 ;
208
212
213
+ // capture group assignment
214
+ std::vector<size_t > captureStack;
215
+ size_t nextCaptureId = 0 ;
216
+
209
217
auto addToken = [&](PToken tok)
210
218
{
211
219
tokenPos++;
@@ -274,12 +282,27 @@ void Ex::Private::compile()
274
282
addToken (PToken (PToken::Kind::Any));
275
283
break ;
276
284
case ' (' : // begin of capture group
277
- prevTokenPos = tokenPos;
278
- addToken (PToken (PToken::Kind::BeginCapture));
285
+ {
286
+ prevTokenPos = tokenPos;
287
+ addToken (PToken (PToken::Kind::BeginCapture));
288
+ size_t id = ++nextCaptureId; // groups start at 1, 0 is whole match
289
+ data.back ().setValue (id);
290
+ captureStack.push_back (id);
291
+ }
279
292
break ;
280
293
case ' )' : // end of capture group
281
- prevTokenPos = tokenPos;
282
- addToken (PToken (PToken::Kind::EndCapture));
294
+ {
295
+ prevTokenPos = tokenPos;
296
+ if (captureStack.empty ())
297
+ {
298
+ error=true ;
299
+ return ;
300
+ }
301
+ size_t id = captureStack.back ();
302
+ captureStack.pop_back ();
303
+ addToken (PToken (PToken::Kind::EndCapture));
304
+ data.back ().setValue (id);
305
+ }
283
306
break ;
284
307
case ' [' : // character class
285
308
{
@@ -402,6 +425,12 @@ void Ex::Private::compile()
402
425
}
403
426
ps++;
404
427
}
428
+ if (!captureStack.empty ()) // Unmatched '('?
429
+ {
430
+ error=true ;
431
+ return ;
432
+ }
433
+ captureCount = nextCaptureId;
405
434
// addToken(PToken(PToken::Kind::End));
406
435
}
407
436
@@ -412,6 +441,7 @@ void Ex::Private::dump()
412
441
size_t l = data.size ();
413
442
size_t i =0 ;
414
443
DBG (" ==== compiled token stream for pattern '%s' ===\n " ,pattern.c_str ());
444
+ DBG (" captureCount=%zu\n " ,captureCount);
415
445
while (i<l)
416
446
{
417
447
DBG (" [%s:%04x]\n " ,data[i].kindStr (),data[i].value ());
@@ -531,7 +561,7 @@ bool Ex::Private::matchAt(size_t tokenPos,size_t tokenLen,std::string_view str,M
531
561
size_t tokenStart = ++tokenPos;
532
562
while (tokenPos<tokenLen && data[tokenPos].kind ()!=PToken::Kind::EndCapture) { tokenPos++; }
533
563
Match rangeMatch;
534
- rangeMatch.init (str);
564
+ rangeMatch.init (str, 0 );
535
565
bool found = matchAt (tokenStart,tokenPos,str,rangeMatch,index,level+1 );
536
566
if (found)
537
567
{
@@ -614,12 +644,12 @@ bool Ex::Private::matchAt(size_t tokenPos,size_t tokenLen,std::string_view str,M
614
644
(isIdChar (str[index]) || index==0 || !isIdChar (str[index-1 ]))) return false ;
615
645
break ;
616
646
case PToken::Kind::BeginCapture:
617
- DBG (" BeginCapture(%zu)\n " ,index);
618
- match.startCapture (index);
647
+ DBG (" BeginCapture(%zu) gid=%u \n " ,index,tok. value () );
648
+ match.startCapture (tok. value (), index);
619
649
break ;
620
650
case PToken::Kind::EndCapture:
621
- DBG (" EndCapture(%zu)\n " ,index);
622
- match.endCapture (index);
651
+ DBG (" EndCapture(%zu) gid=%u \n " ,index,tok. value () );
652
+ match.endCapture (tok. value (), index);
623
653
break ;
624
654
case PToken::Kind::Any:
625
655
if (index>=str.length ()) return false ;
@@ -707,7 +737,7 @@ bool Ex::match(std::string_view str,Match &match,size_t pos) const
707
737
{
708
738
bool found=false ;
709
739
if (p->data .size ()==0 || p->error ) return found;
710
- match.init (str);
740
+ match.init (str,p-> captureCount );
711
741
712
742
PToken tok = p->data [0 ];
713
743
if (tok.kind ()==PToken::Kind::BeginOfLine) // only test match at the given position
@@ -721,10 +751,10 @@ bool Ex::match(std::string_view str,Match &match,size_t pos) const
721
751
size_t index = str.find (tok.asciiValue (),pos);
722
752
if (index==std::string::npos)
723
753
{
724
- DBG (" Ex::match(str='%s',pos=%zu)=false (no start char '%c')\n " ,str.c_str (),pos,tok.asciiValue ());
754
+ DBG (" Ex::match(str='%s',pos=%zu)=false (no start char '%c')\n " ,std::string ( str) .c_str (),pos,tok.asciiValue ());
725
755
return false ;
726
756
}
727
- DBG (" pos=%zu str='%s' char='%c' index=%zu\n " ,index,str.c_str (),tok.asciiValue (),index);
757
+ DBG (" pos=%zu str='%s' char='%c' index=%zu\n " ,index,std::string ( str) .c_str (),tok.asciiValue (),index);
728
758
pos=index;
729
759
}
730
760
while (pos<str.length ()) // search for a match starting at pos
@@ -734,7 +764,7 @@ bool Ex::match(std::string_view str,Match &match,size_t pos) const
734
764
pos++;
735
765
}
736
766
}
737
- DBG (" Ex::match(str='%s',pos=%zu)=%d\n " ,str.c_str (),pos,found);
767
+ DBG (" Ex::match(str='%s',pos=%zu)=%d\n " ,std::string ( str) .c_str (),pos,found);
738
768
return found;
739
769
}
740
770
0 commit comments