|
1 | 1 | /******************************************************************************
|
2 | 2 | *
|
3 |
| - * Copyright (C) 1997-2021 by Dimitri van Heesch. |
| 3 | + * Copyright (C) 1997-2025 by Dimitri van Heesch. |
4 | 4 | *
|
5 | 5 | * Permission to use, copy, modify, and distribute this software and its
|
6 | 6 | * documentation under the terms of the GNU General Public License is hereby
|
@@ -520,38 +520,102 @@ bool Ex::Private::matchAt(size_t tokenPos,size_t tokenLen,std::string_view str,M
|
520 | 520 | size_t startIndex = index;
|
521 | 521 | size_t len = str.length();
|
522 | 522 | PToken tok = data[++tokenPos];
|
523 |
| - if (tok.kind()==PToken::Kind::Character) // 'x*' -> eat x's |
| 523 | + |
| 524 | + // Special handling for an optional capture group: (...)? |
| 525 | + if (type==OptionalRange && tok.kind()==PToken::Kind::BeginCapture) |
| 526 | + { |
| 527 | + size_t groupId = tok.value(); |
| 528 | + size_t innerStart = tokenPos + 1; |
| 529 | + |
| 530 | + // Find matching EndCapture, accounting for nesting depth |
| 531 | + size_t tp = innerStart; |
| 532 | + int depth = 1; |
| 533 | + while (tp<tokenLen && depth>0) |
| 534 | + { |
| 535 | + if (data[tp].kind()==PToken::Kind::BeginCapture) depth++; |
| 536 | + else if (data[tp].kind()==PToken::Kind::EndCapture) depth--; |
| 537 | + tp++; |
| 538 | + } |
| 539 | + if (depth!=0) return false; // malformed, unmatched ')' |
| 540 | + size_t endCapturePos = tp - 1; // position of EndCapture |
| 541 | + size_t afterSeqPos = endCapturePos + 2; // skip EndCapture and End marker |
| 542 | + |
| 543 | + // Try with the group present |
| 544 | + Match tmp; |
| 545 | + tmp.init(str, /*captureCount*/ captureCount); |
| 546 | + bool innerOk = matchAt(innerStart,endCapturePos,str,tmp,index,level+1); |
| 547 | + if (innerOk) |
| 548 | + { |
| 549 | + size_t capLen = tmp.length(); |
| 550 | + |
| 551 | + // Copy nested captures from tmp (they may exist inside the group) |
| 552 | + for (size_t gid=1; gid<tmp.size(); gid++) |
| 553 | + { |
| 554 | + size_t sp = tmp[gid].position(); |
| 555 | + size_t sl = tmp[gid].length(); |
| 556 | + if (sp!=std::string::npos && sl!=std::string::npos) |
| 557 | + { |
| 558 | + match.startCapture(gid,sp); |
| 559 | + match.endCapture(gid,sp+sl); |
| 560 | + } |
| 561 | + } |
| 562 | + // Set the outer group's capture |
| 563 | + match.startCapture(groupId,index); |
| 564 | + match.endCapture(groupId,index+capLen); |
| 565 | + |
| 566 | + bool ok = matchAt(afterSeqPos,tokenLen,str,match,index+capLen,level+1); |
| 567 | + if (ok) |
| 568 | + { |
| 569 | + match.setMatch(pos,(index+capLen)-pos+match.length()); |
| 570 | + return true; |
| 571 | + } |
| 572 | + } |
| 573 | + |
| 574 | + // Try with the group absent (empty capture) |
| 575 | + match.startCapture(groupId,index); |
| 576 | + match.endCapture(groupId,index); // zero-length |
| 577 | + |
| 578 | + bool ok2 = matchAt(afterSeqPos,tokenLen,str,match,index,level+1); |
| 579 | + if (ok2) |
| 580 | + { |
| 581 | + match.setMatch(pos,index-pos+match.length()); |
| 582 | + return true; |
| 583 | + } |
| 584 | + return false; |
| 585 | + } |
| 586 | + |
| 587 | + if (tok.kind()==PToken::Kind::Character) // 'x*' or 'x?' |
524 | 588 | {
|
525 | 589 | char c_tok = tok.asciiValue();
|
526 | 590 | while (index<len && str[index]==c_tok) { index++; if (type==Optional) break; }
|
527 | 591 | tokenPos++;
|
528 | 592 | }
|
529 |
| - else if (tok.isCharClass()) // '[a-f0-4]* -> eat matching characters |
| 593 | + else if (tok.isCharClass()) // '[a-f0-4]*' or '[...]?' -> eat matching characters |
530 | 594 | {
|
531 | 595 | while (index<len && matchCharClass(tokenPos,str[index])) { index++; if (type==Optional) break; }
|
532 | 596 | tokenPos+=tok.value()+1; // skip over character ranges + end token
|
533 | 597 | }
|
534 |
| - else if (tok.kind()==PToken::Kind::Alpha) // '\a*' -> eat start id characters |
| 598 | + else if (tok.kind()==PToken::Kind::Alpha) // '\a*' or '\a?' -> eat start id characters |
535 | 599 | {
|
536 | 600 | while (index<len && isStartIdChar(str[index])) { index++; if (type==Optional) break; }
|
537 | 601 | tokenPos++;
|
538 | 602 | }
|
539 |
| - else if (tok.kind()==PToken::Kind::AlphaNum) // '\w*' -> eat id characters |
| 603 | + else if (tok.kind()==PToken::Kind::AlphaNum) // '\w*' or '\w?' -> eat id characters |
540 | 604 | {
|
541 | 605 | while (index<len && isIdChar(str[index])) { index++; if (type==Optional) break; }
|
542 | 606 | tokenPos++;
|
543 | 607 | }
|
544 |
| - else if (tok.kind()==PToken::Kind::WhiteSpace) // '\s*' -> eat spaces |
| 608 | + else if (tok.kind()==PToken::Kind::WhiteSpace) // '\s*' or '\s?' -> eat spaces |
545 | 609 | {
|
546 | 610 | while (index<len && isspace(str[index])) { index++; if (type==Optional) break; }
|
547 | 611 | tokenPos++;
|
548 | 612 | }
|
549 |
| - else if (tok.kind()==PToken::Kind::Digit) // '\d*' -> eat digits |
| 613 | + else if (tok.kind()==PToken::Kind::Digit) // '\d*' or '\d?' -> eat digits |
550 | 614 | {
|
551 | 615 | while (index<len && isdigit(str[index])) { index++; if (type==Optional) break; }
|
552 | 616 | tokenPos++;
|
553 | 617 | }
|
554 |
| - else if (tok.kind()==PToken::Kind::Any) // '.*' -> eat all |
| 618 | + else if (tok.kind()==PToken::Kind::Any) // '.*' or '.?' -> eat all |
555 | 619 | {
|
556 | 620 | if (type==Optional) index++; else index = str.length();
|
557 | 621 | tokenPos++;
|
|
0 commit comments