diff --git a/HISTORY b/HISTORY
index 8ada774..0c10edd 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,4 +1,10 @@
-HISTORY for LPeg 0.12
+HISTORY for LPeg 1.0
+
+* Changes from version 0.12 to 1.0
+ ---------------------------------
+ + group "names" can be any Lua value
+ + some bugs fixed
+ + other small improvements
* Changes from version 0.11 to 0.12
---------------------------------
diff --git a/lpcap.c b/lpcap.c
index d90b935..c9085de 100644
--- a/lpcap.c
+++ b/lpcap.c
@@ -1,5 +1,5 @@
/*
-** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $
+** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
@@ -126,7 +126,7 @@ static Capture *findback (CapState *cs, Capture *cap) {
continue; /* opening an enclosing capture: skip and get previous */
if (captype(cap) == Cgroup) {
getfromktable(cs, cap->idx); /* get group name */
- if (lua_equal(L, -2, -1)) { /* right group? */
+ if (lp_equal(L, -2, -1)) { /* right group? */
lua_pop(L, 2); /* remove reference name and group name */
return cap;
}
@@ -462,7 +462,7 @@ static int pushcapture (CapState *cs) {
case Carg: {
int arg = (cs->cap++)->idx;
if (arg + FIXEDARGS > cs->ptop)
- return luaL_error(L, "reference to absent argument #%d", arg);
+ return luaL_error(L, "reference to absent extra argument #%d", arg);
lua_pushvalue(L, arg + FIXEDARGS);
return 1;
}
diff --git a/lpcap.h b/lpcap.h
index c0a0e38..d762fdc 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -1,5 +1,5 @@
/*
-** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
+** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
*/
#if !defined(lpcap_h)
@@ -18,7 +18,7 @@ typedef enum CapKind {
typedef struct Capture {
const char *s; /* subject position */
- short idx; /* extra info about capture (group name, arg index, etc.) */
+ unsigned short idx; /* extra info (group name, arg index, etc.) */
byte kind; /* kind of capture */
byte siz; /* size of full capture + 1 (0 = not a full capture) */
} Capture;
diff --git a/lpcode.c b/lpcode.c
index 2cc0e0d..362ec20 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -1,5 +1,5 @@
/*
-** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $
+** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
@@ -33,26 +33,30 @@ static const Charset *fullset = &fullset_;
*/
/*
-** Check whether a charset is empty (IFail), singleton (IChar),
-** full (IAny), or none of those (ISet).
+** Check whether a charset is empty (returns IFail), singleton (IChar),
+** full (IAny), or none of those (ISet). When singleton, '*c' returns
+** which character it is. (When generic set, the set was the input,
+** so there is no need to return it.)
*/
static Opcode charsettype (const byte *cs, int *c) {
- int count = 0;
+ int count = 0; /* number of characters in the set */
int i;
- int candidate = -1; /* candidate position for a char */
- for (i = 0; i < CHARSETSIZE; i++) {
+ int candidate = -1; /* candidate position for the singleton char */
+ for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */
int b = cs[i];
- if (b == 0) {
- if (count > 1) return ISet; /* else set is still empty */
+ if (b == 0) { /* is byte empty? */
+ if (count > 1) /* was set neither empty nor singleton? */
+ return ISet; /* neither full nor empty nor singleton */
+ /* else set is still empty or singleton */
}
- else if (b == 0xFF) {
- if (count < (i * BITSPERCHAR))
- return ISet;
+ else if (b == 0xFF) { /* is byte full? */
+ if (count < (i * BITSPERCHAR)) /* was set not full? */
+ return ISet; /* neither full nor empty nor singleton */
else count += BITSPERCHAR; /* set is still full */
}
- else if ((b & (b - 1)) == 0) { /* byte has only one bit? */
- if (count > 0)
- return ISet; /* set is neither full nor empty */
+ else if ((b & (b - 1)) == 0) { /* has byte only one bit? */
+ if (count > 0) /* was set not empty? */
+ return ISet; /* neither full nor empty nor singleton */
else { /* set has only one char till now; track it */
count++;
candidate = i;
@@ -77,6 +81,7 @@ static Opcode charsettype (const byte *cs, int *c) {
}
}
+
/*
** A few basic operations on Charsets
*/
@@ -84,16 +89,11 @@ static void cs_complement (Charset *cs) {
loopset(i, cs->cs[i] = ~cs->cs[i]);
}
-
static int cs_equal (const byte *cs1, const byte *cs2) {
loopset(i, if (cs1[i] != cs2[i]) return 0);
return 1;
}
-
-/*
-** computes whether sets cs1 and cs2 are disjoint
-*/
static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
return 1;
@@ -101,7 +101,8 @@ static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
/*
-** Convert a 'char' pattern (TSet, TChar, TAny) to a charset
+** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
+** charset and return 1; else return 0.
*/
int tocharset (TTree *tree, Charset *cs) {
switch (tree->tag) {
@@ -116,7 +117,7 @@ int tocharset (TTree *tree, Charset *cs) {
return 1;
}
case TAny: {
- loopset(i, cs->cs[i] = 0xFF); /* add all to the set */
+ loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */
return 1;
}
default: return 0;
@@ -125,13 +126,16 @@ int tocharset (TTree *tree, Charset *cs) {
/*
-** Checks whether a pattern has captures
+** Check whether a pattern tree has captures
*/
int hascaptures (TTree *tree) {
tailcall:
switch (tree->tag) {
case TCapture: case TRunTime:
return 1;
+ case TCall:
+ tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */
+ case TOpenCall: assert(0);
default: {
switch (numsiblings[tree->tag]) {
case 1: /* return hascaptures(sib1(tree)); */
@@ -161,7 +165,7 @@ int hascaptures (TTree *tree) {
** p is nullable => nullable(p)
** nofail(p) => p cannot fail
** The function assumes that TOpenCall is not nullable;
-** this will be checked again when the grammar is fixed.)
+** this will be checked again when the grammar is fixed.
** Run-time captures can do whatever they want, so the result
** is conservative.
*/
@@ -198,7 +202,7 @@ int checkaux (TTree *tree, int pred) {
case TCall: /* return checkaux(sib2(tree), pred); */
tree = sib2(tree); goto tailcall;
default: assert(0); return 0;
- };
+ }
}
@@ -245,16 +249,20 @@ int fixedlenx (TTree *tree, int count, int len) {
/*
** Computes the 'first set' of a pattern.
** The result is a conservative aproximation:
-** match p ax -> x' for some x ==> a in first(p).
+** match p ax -> x (for some x) ==> a belongs to first(p)
+** or
+** a not in first(p) ==> match p ax -> fail (for all x)
+**
** The set 'follow' is the first set of what follows the
** pattern (full set if nothing follows it).
-** The function returns 0 when this set can be used for
-** tests that avoid the pattern altogether.
+**
+** The function returns 0 when this resulting set can be used for
+** test instructions that avoid the pattern altogether.
** A non-zero return can happen for two reasons:
-** 1) match p '' -> '' ==> returns 1.
-** (tests cannot be used because they always fail for an empty input)
-** 2) there is a match-time capture ==> returns 2.
-** (match-time captures should not be avoided by optimizations)
+** 1) match p '' -> '' ==> return has bit 1 set
+** (tests cannot be used because they would always fail for an empty input);
+** 2) there is a match-time capture ==> return has bit 2 set
+** (optimizations should not bypass match-time captures).
*/
static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
tailcall:
@@ -265,7 +273,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
}
case TTrue: {
loopset(i, firstset->cs[i] = follow->cs[i]);
- return 1;
+ return 1; /* accepts the empty string */
}
case TFalse: {
loopset(i, firstset->cs[i] = 0);
@@ -280,7 +288,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
}
case TSeq: {
if (!nullable(sib1(tree))) {
- /* return getfirst(sib1(tree), fullset, firstset); */
+ /* when p1 is not nullable, p2 has nothing to contribute;
+ return getfirst(sib1(tree), fullset, firstset); */
tree = sib1(tree); follow = fullset; goto tailcall;
}
else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
@@ -324,7 +333,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
/* else go through */
}
case TBehind: { /* instruction gives no new information */
- /* call 'getfirst' to check for math-time captures */
+ /* call 'getfirst' only to check for math-time captures */
int e = getfirst(sib1(tree), follow, firstset);
loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */
return e | 1; /* always can accept the empty string */
@@ -335,8 +344,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
/*
-** If it returns true, then pattern can fail only depending on the next
-** character of the subject
+** If 'headfail(tree)' true, then 'tree' can fail only depending on the
+** next character of the subject.
*/
static int headfail (TTree *tree) {
tailcall:
@@ -403,9 +412,9 @@ int sizei (const Instruction *i) {
switch((Opcode)i->i.code) {
case ISet: case ISpan: return CHARSETINSTSIZE;
case ITestSet: return CHARSETINSTSIZE + 1;
- case ITestChar: case ITestAny: case IChoice: case IJmp:
- case ICall: case IOpenCall: case ICommit: case IPartialCommit:
- case IBackCommit: return 2;
+ case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
+ case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
+ return 2;
default: return 1;
}
}
@@ -422,16 +431,17 @@ typedef struct CompileState {
/*
-** code generation is recursive; 'opt' indicates that the code is
-** being generated under a 'IChoice' operator jumping to its end.
-** 'tt' points to a previous test protecting this code. 'fl' is
-** the follow set of the pattern.
+** code generation is recursive; 'opt' indicates that the code is being
+** generated as the last thing inside an optional pattern (so, if that
+** code is optional too, it can reuse the 'IChoice' already in place for
+** the outer pattern). 'tt' points to a previous test protecting this
+** code (or NOINST). 'fl' is the follow set of the pattern.
*/
static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
const Charset *fl);
-void reallocprog (lua_State *L, Pattern *p, int nsize) {
+void realloccode (lua_State *L, Pattern *p, int nsize) {
void *ud;
lua_Alloc f = lua_getallocf(L, &ud);
void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
@@ -446,7 +456,7 @@ void reallocprog (lua_State *L, Pattern *p, int nsize) {
static int nextinstruction (CompileState *compst) {
int size = compst->p->codesize;
if (compst->ncode >= size)
- reallocprog(compst->L, compst->p, size * 2);
+ realloccode(compst->L, compst->p, size * 2);
return compst->ncode++;
}
@@ -462,6 +472,9 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) {
}
+/*
+** Add an instruction followed by space for an offset (to be set later)
+*/
static int addoffsetinst (CompileState *compst, Opcode op) {
int i = addinstruction(compst, op, 0); /* instruction */
addinstruction(compst, (Opcode)0, 0); /* open space for offset */
@@ -470,6 +483,9 @@ static int addoffsetinst (CompileState *compst, Opcode op) {
}
+/*
+** Set the offset of an instruction
+*/
static void setoffset (CompileState *compst, int instruction, int offset) {
getinstr(compst, instruction + 1).offset = offset;
}
@@ -478,7 +494,7 @@ static void setoffset (CompileState *compst, int instruction, int offset) {
/*
** Add a capture instruction:
** 'op' is the capture instruction; 'cap' the capture kind;
-** 'key' the key into ktable; 'aux' is optional offset
+** 'key' the key into ktable; 'aux' is the optional capture offset
**
*/
static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
@@ -494,12 +510,18 @@ static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
#define target(code,i) ((i) + code[i + 1].offset)
+/*
+** Patch 'instruction' to jump to 'target'
+*/
static void jumptothere (CompileState *compst, int instruction, int target) {
if (instruction >= 0)
setoffset(compst, instruction, target - instruction);
}
+/*
+** Patch 'instruction' to jump to current position
+*/
static void jumptohere (CompileState *compst, int instruction) {
jumptothere(compst, instruction, gethere(compst));
}
@@ -616,13 +638,13 @@ static void codebehind (CompileState *compst, TTree *tree) {
/*
** Choice; optimizations:
-** - when p1 is headfail
-** - when first(p1) and first(p2) are disjoint; than
+** - when p1 is headfail or
+** when first(p1) and first(p2) are disjoint, than
** a character not in first(p1) cannot go to p1, and a character
** in first(p1) cannot go to p2 (at it is not in first(p2)).
** (The optimization is not valid if p1 accepts the empty string,
** as then there is no character at all...)
-** - when p2 is empty and opt is true; a IPartialCommit can resuse
+** - when p2 is empty and opt is true; a IPartialCommit can reuse
** the Choice already active in the stack.
*/
static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
@@ -649,7 +671,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
}
else {
/* ; jmp L1; L2: */
int jmp;
int test = codetestset(compst, &st, 0);
- codegen(compst, tree, opt, test, fullset);
+ codegen(compst, tree, 0, test, fullset);
jmp = addoffsetinst(compst, IJmp);
jumptohere(compst, test);
jumptothere(compst, jmp, test);
@@ -863,7 +885,8 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
/*
** Main code-generation function: dispatch to auxiliar functions
-** according to kind of tree
+** according to kind of tree. ('needfollow' should return true
+** only for consructions that use 'fl'.)
*/
static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
const Charset *fl) {
@@ -906,6 +929,7 @@ static void peephole (CompileState *compst) {
Instruction *code = compst->p->code;
int i;
for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
+ redo:
switch (code[i].i.code) {
case IChoice: case ICall: case ICommit: case IPartialCommit:
case IBackCommit: case ITestChar: case ITestSet:
@@ -927,8 +951,7 @@ static void peephole (CompileState *compst) {
int fft = finallabel(code, ft);
code[i] = code[ft]; /* jump becomes that instruction... */
jumptothere(compst, i, fft); /* but must correct its offset */
- i--; /* reoptimize its label */
- break;
+ goto redo; /* reoptimize its label */
}
default: {
jumptothere(compst, i, ft); /* optimize label */
@@ -947,13 +970,13 @@ static void peephole (CompileState *compst) {
/*
** Compile a pattern
*/
-Instruction *compile (lua_State *L, Pattern *p) {
+Instruction *lpeg_compile (lua_State *L, Pattern *p) {
CompileState compst;
compst.p = p; compst.ncode = 0; compst.L = L;
- reallocprog(L, p, 2); /* minimum initial size */
+ realloccode(L, p, 2); /* minimum initial size */
codegen(&compst, p->tree, 0, NOINST, fullset);
addinstruction(&compst, IEnd, 0);
- reallocprog(L, p, compst.ncode); /* set final size */
+ realloccode(L, p, compst.ncode); /* set final size */
peephole(&compst);
return p->code;
}
diff --git a/lpcode.h b/lpcode.h
index 5c9d54f..c31ab1e 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -1,5 +1,5 @@
/*
-** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $
+** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $
*/
#if !defined(lpcode_h)
@@ -16,15 +16,23 @@ int checkaux (TTree *tree, int pred);
int fixedlenx (TTree *tree, int count, int len);
int hascaptures (TTree *tree);
int lp_gc (lua_State *L);
-Instruction *compile (lua_State *L, Pattern *p);
-void reallocprog (lua_State *L, Pattern *p, int nsize);
+Instruction *lpeg_compile (lua_State *L, Pattern *p);
+void realloccode (lua_State *L, Pattern *p, int nsize);
int sizei (const Instruction *i);
#define PEnullable 0
#define PEnofail 1
+/*
+** nofail(t) implies that 't' cannot fail with any input
+*/
#define nofail(t) checkaux(t, PEnofail)
+
+/*
+** (not nullable(t)) implies 't' cannot match without consuming
+** something
+*/
#define nullable(t) checkaux(t, PEnullable)
#define fixedlen(t) fixedlenx(t, 0, 0)
diff --git a/lpeg.html b/lpeg.html
index 4747e30..c0a7f09 100644
--- a/lpeg.html
+++ b/lpeg.html
@@ -10,7 +10,7 @@
-Sets the maximum size for the backtrack stack used by LPeg to
+Sets a limit for the size of the backtrack stack used by LPeg to
track calls and choices.
+(The default limit is 400.)
Most well-written patterns need little backtrack levels and
-therefore you seldom need to change this maximum;
-but a few useful patterns may need more space.
-Before changing this maximum you should try to rewrite your
+therefore you seldom need to change this limit;
+before changing it you should try to rewrite your
pattern to avoid the need for extra space.
+Nevertheless, a few useful patterns may overflow.
+Also, with recursive grammars,
+subjects with deep recursion may also need larger limits.
lpeg.version ()
lpeg.setmaxstack (max)
Creates a back capture.
This pattern matches the empty string and
produces the values produced by the most recent
-group capture named lpeg.Cb (name)
name
.
+group capture named name
+(where name
can be any Lua value).
@@ -762,7 +766,8 @@
lpeg.Cg (patt [, name])
patt
into a single capture.
The group may be anonymous (if no name is given)
-or named with the given name.
+or named with the given name
+(which can be any non-nil Lua value).
@@ -1375,13 +1380,13 @@
LPeg -source code.
+source code.-Copyright © 2013 Lua.org, PUC-Rio. +Copyright © 2007-2015 Lua.org, PUC-Rio.
Permission is hereby granted, free of charge, @@ -1419,7 +1424,7 @@
-$Id: lpeg.html,v 1.71 2013/04/11 19:17:41 roberto Exp $ +$Id: lpeg.html,v 1.75 2015/09/28 17:17:41 roberto Exp $
tag
field telling what non terminal
that table represents.
We can add such a tag using
-named group captures:
+named group captures:
x = re.compile[[ @@ -450,7 +450,7 @@Patterns
License
-Copyright © 2008-2010 Lua.org, PUC-Rio. +Copyright © 2008-2015 Lua.org, PUC-Rio.
Permission is hereby granted, free of charge, @@ -488,7 +488,7 @@
License
diff --git a/test.lua b/test.lua index 1d107ca..017a3ab 100755 --- a/test.lua +++ b/test.lua @@ -1,6 +1,6 @@ -#!/usr/bin/env lua5.1 +#!/usr/bin/env lua --- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $ +-- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $ -- require"strict" -- just to be pedantic @@ -16,9 +16,6 @@ local unpack = rawget(table, "unpack") or unpack local loadstring = rawget(_G, "loadstring") or load --- most tests here do not need much stack space -m.setmaxstack(5) - local any = m.P(1) local space = m.S" \t\n"^0 @@ -170,8 +167,8 @@ assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7) a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")} checkeq(a, {"123", "d"}) -a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")} -checkeq(a, {"123"}) +-- bug in LPeg 0.12 (nil value does not create a 'ktable') +assert(m.match(m.Cc(nil), "") == nil) a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")} checkeq(a, {"abcd", "l"}) @@ -194,6 +191,16 @@ checkeq(a, {1, 5}) t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")} checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""}) +-- bug in 0.12 ('hascapture' did not check for captures inside a rule) +do + local pat = m.P{ + 'S'; + S1 = m.C('abc') + 3, + S = #m.V('S1') -- rule has capture, but '#' must ignore it + } + assert(pat:match'abc' == 1) +end + -- test for small capture boundary for i = 250,260 do @@ -201,9 +208,8 @@ for i = 250,260 do assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i) end - -- tests for any*n and any*-n -for n = 1, 550 do +for n = 1, 550, 13 do local x_1 = string.rep('x', n - 1) local x = x_1 .. 'a' assert(not m.P(n):match(x_1)) @@ -282,6 +288,13 @@ assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3) p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1 assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21) + + +-- bug in 0.12.2 +-- p = { ('ab' ('c' 'ef'?)*)? } +p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1) +s = "abcefccefc" +assert(s == p:match(s)) pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510" @@ -343,10 +356,16 @@ checkeq(t, {hi = 10, ho = 20}) t = p:match'abc' checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'}) +-- non-string group names +p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io)) +t = p:match('abcdefghij') +assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c') + -- test for error messages -local function checkerr (msg, ...) - assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...)))) +local function checkerr (msg, f, ...) + local st, err = pcall(f, ...) + assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err)) end checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") @@ -370,6 +389,32 @@ p = {'a', } checkerr("rule 'a' may be left recursive", m.match, p, "a") +-- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit) +-- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1 +-- that is optimized to ICommit L1 + +p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' } +assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc') + + +do + -- large dynamic Cc + local lim = 2^16 - 1 + local c = 0 + local function seq (n) + if n == 1 then c = c + 1; return m.Cc(c) + else + local m = math.floor(n / 2) + return seq(m) * seq(n - m) + end + end + p = m.Ct(seq(lim)) + t = p:match('') + assert(t[lim] == lim) + checkerr("too many", function () p = p / print end) + checkerr("too many", seq, lim + 1) +end + -- tests for non-pattern as arguments to pattern functions @@ -488,7 +533,10 @@ assert(m.match(1 * m.B(1), 'a') == 2) assert(m.match(-m.B(1), 'a') == 1) assert(m.match(m.B(250), string.rep('a', 250)) == nil) assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251) -assert(not pcall(m.B, 260)) + +-- look-behind with an open call +checkerr("pattern may not have fixed length", m.B, m.V'S1') +checkerr("too long to look behind", m.B, 260) B = #letter * -m.B(letter) + -letter * m.B(letter) x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) }) @@ -555,18 +603,18 @@ assert(not p:match(string.rep("011", 10001))) -- this grammar does need backtracking info. local lim = 10000 p = m.P{ '0' * m.V(1) + '0' } -assert(not pcall(m.match, p, string.rep("0", lim))) +checkerr("stack overflow", m.match, p, string.rep("0", lim)) m.setmaxstack(2*lim) -assert(not pcall(m.match, p, string.rep("0", lim))) +checkerr("stack overflow", m.match, p, string.rep("0", lim)) m.setmaxstack(2*lim + 4) -assert(pcall(m.match, p, string.rep("0", lim))) +assert(m.match(p, string.rep("0", lim)) == lim + 1) -- this repetition should not need stack space (only the call does) p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' } m.setmaxstack(200) assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362) -m.setmaxstack(5) -- restore original limit +m.setmaxstack(100) -- restore low limit -- tests for optional start position assert(m.match("a", "abc", 1)) @@ -588,10 +636,10 @@ print("+") -- tests for argument captures -assert(not pcall(m.Carg, 0)) -assert(not pcall(m.Carg, -1)) -assert(not pcall(m.Carg, 2^18)) -assert(not pcall(m.match, m.Carg(1), 'a', 1)) +checkerr("invalid argument", m.Carg, 0) +checkerr("invalid argument", m.Carg, -1) +checkerr("invalid argument", m.Carg, 2^18) +checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1) assert(m.match(m.Carg(1), 'a', 1, print) == print) x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)} checkeq(x, {10, 20}) @@ -644,14 +692,16 @@ assert(m.match(p, "aaaa") == 5) assert(m.match(p, "abaa") == 2) assert(not m.match(p, "baaa")) -assert(not pcall(m.match, function () return 2^20 end, s)) -assert(not pcall(m.match, function () return 0 end, s)) -assert(not pcall(m.match, function (s, i) return i - 1 end, s)) -assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s)) +checkerr("invalid position", m.match, function () return 2^20 end, s) +checkerr("invalid position", m.match, function () return 0 end, s) +checkerr("invalid position", m.match, function (s, i) return i - 1 end, s) +checkerr("invalid position", m.match, + m.P(1)^0 * function (_, i) return i - 1 end, s) assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s)) -assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s)) +checkerr("invalid position", m.match, + m.P(1)^0 * function (_, i) return i + 1 end, s) assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s)) -assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)) +checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s) assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s)) assert(m.match(m.P(1)^0 * function (_, i) return true end, s) == string.len(s) + 1) @@ -696,6 +746,10 @@ t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")} t1 = {1,1,nil,nil,4,nil,3,nil,nil} for i=1,10 do assert(t[i] == t1[i]) end +-- bug in 0.12.2: ktable with only nil could be eliminated when joining +-- with a pattern without ktable +assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil) + t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")} checkeq(t, {"a", "ax", "b", "bx", "c", "cx"}) @@ -734,9 +788,9 @@ assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx") assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") == "411 - abc ") -assert(pcall(m.match, m.P(1)/"%0", "abc")) -assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range -assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range +assert(m.match(m.P(1)/"%0", "abc") == "a") +checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc") +checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc") p = m.C(1) p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1" @@ -754,7 +808,7 @@ assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3") p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1" assert(p:match'x' == 'alo - x - alo') -assert(not pcall(m.match, m.Cc(true) / "%1", "a")) +checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a") -- long strings for string capture l = 10000 @@ -782,35 +836,37 @@ checkeq(t, {a="b", c="du", xux="yuy"}) -- errors in accumulator capture --- very long match (forces fold to be a pair open-close) producing with -- no initial capture -assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600))) +checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa') +-- no initial capture (very long match forces fold to be a pair open-close) +checkerr("no initial value", m.match, m.Cf(m.P(500), print), + string.rep('a', 600)) -- nested capture produces no initial value -assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo")) +checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo") -- tests for loop checker -local function haveloop (p) - assert(not pcall(function (p) return p^0 end, m.P(p))) +local function isnullable (p) + checkerr("may accept empty string", function (p) return p^0 end, m.P(p)) end -haveloop(m.P("x")^-4) +isnullable(m.P("x")^-4) assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3) assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3) -haveloop("") -haveloop(m.P("x")^0) -haveloop(m.P("x")^-1) -haveloop(m.P("x") + 1 + 2 + m.P("a")^-1) -haveloop(-m.P("ab")) -haveloop(- -m.P("ab")) -haveloop(# #(m.P("ab") + "xy")) -haveloop(- #m.P("ab")^0) -haveloop(# -m.P("ab")^1) -haveloop(#m.V(3)) -haveloop(m.V(3) + m.V(1) + m.P('a')^-1) -haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) +isnullable("") +isnullable(m.P("x")^0) +isnullable(m.P("x")^-1) +isnullable(m.P("x") + 1 + 2 + m.P("a")^-1) +isnullable(-m.P("ab")) +isnullable(- -m.P("ab")) +isnullable(# #(m.P("ab") + "xy")) +isnullable(- #m.P("ab")^0) +isnullable(# -m.P("ab")^1) +isnullable(#m.V(3)) +isnullable(m.V(3) + m.V(1) + m.P('a')^-1) +isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)}) assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc") == 3) assert(m.match(m.P""^-3, "a") == 1) @@ -894,13 +950,20 @@ print"+" -- tests for back references -assert(not pcall(m.match, m.Cb('x'), '')) -assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')) +checkerr("back reference 'x' not found", m.match, m.Cb('x'), '') +checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a') p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k")) t = p:match("ab") checkeq(t, {"a", "b"}) +p = m.P(true) +for i = 1, 10 do p = p * m.Cg(1, i) end +for i = 1, 10 do + local p = p * m.Cb(i) + assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i)) +end + t = {} function foo (p) t[#t + 1] = p; return p .. "x" end @@ -1370,8 +1433,7 @@ assert(rev:match"0123456789" == "9876543210") -- testing error messages in re local function errmsg (p, err) - local s, msg = pcall(re.compile, p) - assert(not s and string.find(msg, err)) + checkerr(err, re.compile, p) end errmsg('aaaa', "rule 'aaaa'")-$Id: re.html,v 1.21 2013/03/28 20:43:30 roberto Exp $ +$Id: re.html,v 1.23 2015/09/28 17:17:41 roberto Exp $