From 158cce335dda3aac1d2e7f5aff26df359ca07343 Mon Sep 17 00:00:00 2001
From: Heinrich Hartmann
Date: Thu, 26 May 2016 18:41:50 +0200
Subject: [PATCH 1/4] Upgrade to Version 1.0
as provided by http://www.inf.puc-rio.br/~roberto/lpeg/ on 2016-05-26
`MD5 (lpeg-1.0.0.tar.gz) = 0aec64ccd13996202ad0c099e2877ece`
---
HISTORY | 8 +-
lpcap.c | 6 +-
lpcap.h | 4 +-
lpcode.c | 135 ++++++++++-------
lpcode.h | 12 +-
lpeg.html | 27 ++--
lpprint.c | 8 +-
lpprint.h | 3 +-
lptree.c | 446 +++++++++++++++++++++++++++++++-----------------------
lptypes.h | 38 ++---
lpvm.c | 6 +-
lpvm.h | 7 +-
makefile | 4 +-
re.html | 8 +-
test.lua | 166 +++++++++++++-------
15 files changed, 522 insertions(+), 356 deletions(-)
diff --git a/HISTORY b/HISTORY
index 8ada774..0c10edd 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,4 +1,10 @@
-HISTORY for LPeg 0.12
+HISTORY for LPeg 1.0
+
+* Changes from version 0.12 to 1.0
+ ---------------------------------
+ + group "names" can be any Lua value
+ + some bugs fixed
+ + other small improvements
* Changes from version 0.11 to 0.12
---------------------------------
diff --git a/lpcap.c b/lpcap.c
index d90b935..c9085de 100644
--- a/lpcap.c
+++ b/lpcap.c
@@ -1,5 +1,5 @@
/*
-** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $
+** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
@@ -126,7 +126,7 @@ static Capture *findback (CapState *cs, Capture *cap) {
continue; /* opening an enclosing capture: skip and get previous */
if (captype(cap) == Cgroup) {
getfromktable(cs, cap->idx); /* get group name */
- if (lua_equal(L, -2, -1)) { /* right group? */
+ if (lp_equal(L, -2, -1)) { /* right group? */
lua_pop(L, 2); /* remove reference name and group name */
return cap;
}
@@ -462,7 +462,7 @@ static int pushcapture (CapState *cs) {
case Carg: {
int arg = (cs->cap++)->idx;
if (arg + FIXEDARGS > cs->ptop)
- return luaL_error(L, "reference to absent argument #%d", arg);
+ return luaL_error(L, "reference to absent extra argument #%d", arg);
lua_pushvalue(L, arg + FIXEDARGS);
return 1;
}
diff --git a/lpcap.h b/lpcap.h
index c0a0e38..d762fdc 100644
--- a/lpcap.h
+++ b/lpcap.h
@@ -1,5 +1,5 @@
/*
-** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
+** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
*/
#if !defined(lpcap_h)
@@ -18,7 +18,7 @@ typedef enum CapKind {
typedef struct Capture {
const char *s; /* subject position */
- short idx; /* extra info about capture (group name, arg index, etc.) */
+ unsigned short idx; /* extra info (group name, arg index, etc.) */
byte kind; /* kind of capture */
byte siz; /* size of full capture + 1 (0 = not a full capture) */
} Capture;
diff --git a/lpcode.c b/lpcode.c
index 2cc0e0d..fbf44fe 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -1,5 +1,5 @@
/*
-** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $
+** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
@@ -33,26 +33,30 @@ static const Charset *fullset = &fullset_;
*/
/*
-** Check whether a charset is empty (IFail), singleton (IChar),
-** full (IAny), or none of those (ISet).
+** Check whether a charset is empty (returns IFail), singleton (IChar),
+** full (IAny), or none of those (ISet). When singleton, '*c' returns
+** which character it is. (When generic set, the set was the input,
+** so there is no need to return it.)
*/
static Opcode charsettype (const byte *cs, int *c) {
- int count = 0;
+ int count = 0; /* number of characters in the set */
int i;
- int candidate = -1; /* candidate position for a char */
- for (i = 0; i < CHARSETSIZE; i++) {
+ int candidate = -1; /* candidate position for the singleton char */
+ for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */
int b = cs[i];
- if (b == 0) {
- if (count > 1) return ISet; /* else set is still empty */
+ if (b == 0) { /* is byte empty? */
+ if (count > 1) /* was set neither empty nor singleton? */
+ return ISet; /* neither full nor empty nor singleton */
+ /* else set is still empty or singleton */
}
- else if (b == 0xFF) {
- if (count < (i * BITSPERCHAR))
- return ISet;
+ else if (b == 0xFF) { /* is byte full? */
+ if (count < (i * BITSPERCHAR)) /* was set not full? */
+ return ISet; /* neither full nor empty nor singleton */
else count += BITSPERCHAR; /* set is still full */
}
- else if ((b & (b - 1)) == 0) { /* byte has only one bit? */
- if (count > 0)
- return ISet; /* set is neither full nor empty */
+ else if ((b & (b - 1)) == 0) { /* has byte only one bit? */
+ if (count > 0) /* was set not empty? */
+ return ISet; /* neither full nor empty nor singleton */
else { /* set has only one char till now; track it */
count++;
candidate = i;
@@ -77,6 +81,7 @@ static Opcode charsettype (const byte *cs, int *c) {
}
}
+
/*
** A few basic operations on Charsets
*/
@@ -84,16 +89,11 @@ static void cs_complement (Charset *cs) {
loopset(i, cs->cs[i] = ~cs->cs[i]);
}
-
static int cs_equal (const byte *cs1, const byte *cs2) {
loopset(i, if (cs1[i] != cs2[i]) return 0);
return 1;
}
-
-/*
-** computes whether sets cs1 and cs2 are disjoint
-*/
static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
return 1;
@@ -101,7 +101,8 @@ static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
/*
-** Convert a 'char' pattern (TSet, TChar, TAny) to a charset
+** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
+** charset and return 1; else return 0.
*/
int tocharset (TTree *tree, Charset *cs) {
switch (tree->tag) {
@@ -116,7 +117,7 @@ int tocharset (TTree *tree, Charset *cs) {
return 1;
}
case TAny: {
- loopset(i, cs->cs[i] = 0xFF); /* add all to the set */
+ loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */
return 1;
}
default: return 0;
@@ -125,13 +126,16 @@ int tocharset (TTree *tree, Charset *cs) {
/*
-** Checks whether a pattern has captures
+** Check whether a pattern tree has captures
*/
int hascaptures (TTree *tree) {
tailcall:
switch (tree->tag) {
case TCapture: case TRunTime:
return 1;
+ case TCall:
+ tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */
+ case TOpenCall: assert(0);
default: {
switch (numsiblings[tree->tag]) {
case 1: /* return hascaptures(sib1(tree)); */
@@ -161,7 +165,7 @@ int hascaptures (TTree *tree) {
** p is nullable => nullable(p)
** nofail(p) => p cannot fail
** The function assumes that TOpenCall is not nullable;
-** this will be checked again when the grammar is fixed.)
+** this will be checked again when the grammar is fixed.
** Run-time captures can do whatever they want, so the result
** is conservative.
*/
@@ -198,7 +202,7 @@ int checkaux (TTree *tree, int pred) {
case TCall: /* return checkaux(sib2(tree), pred); */
tree = sib2(tree); goto tailcall;
default: assert(0); return 0;
- };
+ }
}
@@ -245,16 +249,20 @@ int fixedlenx (TTree *tree, int count, int len) {
/*
** Computes the 'first set' of a pattern.
** The result is a conservative aproximation:
-** match p ax -> x' for some x ==> a in first(p).
+** match p ax -> x (for some x) ==> a belongs to first(p)
+** or
+** a not in first(p) ==> match p ax -> fail (for all x)
+**
** The set 'follow' is the first set of what follows the
** pattern (full set if nothing follows it).
-** The function returns 0 when this set can be used for
-** tests that avoid the pattern altogether.
+**
+** The function returns 0 when this resulting set can be used for
+** test instructions that avoid the pattern altogether.
** A non-zero return can happen for two reasons:
-** 1) match p '' -> '' ==> returns 1.
-** (tests cannot be used because they always fail for an empty input)
-** 2) there is a match-time capture ==> returns 2.
-** (match-time captures should not be avoided by optimizations)
+** 1) match p '' -> '' ==> return has bit 1 set
+** (tests cannot be used because they would always fail for an empty input);
+** 2) there is a match-time capture ==> return has bit 2 set
+** (optimizations should not bypass match-time captures).
*/
static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
tailcall:
@@ -265,7 +273,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
}
case TTrue: {
loopset(i, firstset->cs[i] = follow->cs[i]);
- return 1;
+ return 1; /* accepts the empty string */
}
case TFalse: {
loopset(i, firstset->cs[i] = 0);
@@ -280,7 +288,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
}
case TSeq: {
if (!nullable(sib1(tree))) {
- /* return getfirst(sib1(tree), fullset, firstset); */
+ /* when p1 is not nullable, p2 has nothing to contribute;
+ return getfirst(sib1(tree), fullset, firstset); */
tree = sib1(tree); follow = fullset; goto tailcall;
}
else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
@@ -324,7 +333,7 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
/* else go through */
}
case TBehind: { /* instruction gives no new information */
- /* call 'getfirst' to check for math-time captures */
+ /* call 'getfirst' only to check for math-time captures */
int e = getfirst(sib1(tree), follow, firstset);
loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */
return e | 1; /* always can accept the empty string */
@@ -335,8 +344,8 @@ static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
/*
-** If it returns true, then pattern can fail only depending on the next
-** character of the subject
+** If 'headfail(tree)' true, then 'tree' can fail only depending on the
+** next character of the subject.
*/
static int headfail (TTree *tree) {
tailcall:
@@ -403,9 +412,9 @@ int sizei (const Instruction *i) {
switch((Opcode)i->i.code) {
case ISet: case ISpan: return CHARSETINSTSIZE;
case ITestSet: return CHARSETINSTSIZE + 1;
- case ITestChar: case ITestAny: case IChoice: case IJmp:
- case ICall: case IOpenCall: case ICommit: case IPartialCommit:
- case IBackCommit: return 2;
+ case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
+ case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
+ return 2;
default: return 1;
}
}
@@ -422,16 +431,17 @@ typedef struct CompileState {
/*
-** code generation is recursive; 'opt' indicates that the code is
-** being generated under a 'IChoice' operator jumping to its end.
-** 'tt' points to a previous test protecting this code. 'fl' is
-** the follow set of the pattern.
+** code generation is recursive; 'opt' indicates that the code is being
+** generated as the last thing inside an optional pattern (so, if that
+** code is optional too, it can reuse the 'IChoice' already in place for
+** the outer pattern). 'tt' points to a previous test protecting this
+** code (or NOINST). 'fl' is the follow set of the pattern.
*/
static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
const Charset *fl);
-void reallocprog (lua_State *L, Pattern *p, int nsize) {
+void realloccode (lua_State *L, Pattern *p, int nsize) {
void *ud;
lua_Alloc f = lua_getallocf(L, &ud);
void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
@@ -446,7 +456,7 @@ void reallocprog (lua_State *L, Pattern *p, int nsize) {
static int nextinstruction (CompileState *compst) {
int size = compst->p->codesize;
if (compst->ncode >= size)
- reallocprog(compst->L, compst->p, size * 2);
+ realloccode(compst->L, compst->p, size * 2);
return compst->ncode++;
}
@@ -462,6 +472,9 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) {
}
+/*
+** Add an instruction followed by space for an offset (to be set later)
+*/
static int addoffsetinst (CompileState *compst, Opcode op) {
int i = addinstruction(compst, op, 0); /* instruction */
addinstruction(compst, (Opcode)0, 0); /* open space for offset */
@@ -470,6 +483,9 @@ static int addoffsetinst (CompileState *compst, Opcode op) {
}
+/*
+** Set the offset of an instruction
+*/
static void setoffset (CompileState *compst, int instruction, int offset) {
getinstr(compst, instruction + 1).offset = offset;
}
@@ -478,7 +494,7 @@ static void setoffset (CompileState *compst, int instruction, int offset) {
/*
** Add a capture instruction:
** 'op' is the capture instruction; 'cap' the capture kind;
-** 'key' the key into ktable; 'aux' is optional offset
+** 'key' the key into ktable; 'aux' is the optional capture offset
**
*/
static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
@@ -494,12 +510,18 @@ static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
#define target(code,i) ((i) + code[i + 1].offset)
+/*
+** Patch 'instruction' to jump to 'target'
+*/
static void jumptothere (CompileState *compst, int instruction, int target) {
if (instruction >= 0)
setoffset(compst, instruction, target - instruction);
}
+/*
+** Patch 'instruction' to jump to current position
+*/
static void jumptohere (CompileState *compst, int instruction) {
jumptothere(compst, instruction, gethere(compst));
}
@@ -616,13 +638,13 @@ static void codebehind (CompileState *compst, TTree *tree) {
/*
** Choice; optimizations:
-** - when p1 is headfail
-** - when first(p1) and first(p2) are disjoint; than
+** - when p1 is headfail or
+** when first(p1) and first(p2) are disjoint, than
** a character not in first(p1) cannot go to p1, and a character
** in first(p1) cannot go to p2 (at it is not in first(p2)).
** (The optimization is not valid if p1 accepts the empty string,
** as then there is no character at all...)
-** - when p2 is empty and opt is true; a IPartialCommit can resuse
+** - when p2 is empty and opt is true; a IPartialCommit can reuse
** the Choice already active in the stack.
*/
static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
@@ -649,7 +671,7 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
}
else {
/* ==
- test(fail(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */
+ test(first(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */
int pcommit;
int test = codetestset(compst, &cs1, e1);
int pchoice = addoffsetinst(compst, IChoice);
@@ -737,7 +759,7 @@ static void coderep (CompileState *compst, TTree *tree, int opt,
/* L1: test (fail(p1)) -> L2; ; jmp L1; L2: */
int jmp;
int test = codetestset(compst, &st, 0);
- codegen(compst, tree, opt, test, fullset);
+ codegen(compst, tree, 0, test, fullset);
jmp = addoffsetinst(compst, IJmp);
jumptohere(compst, test);
jumptothere(compst, jmp, test);
@@ -863,7 +885,8 @@ static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
/*
** Main code-generation function: dispatch to auxiliar functions
-** according to kind of tree
+** according to kind of tree. ('needfollow' should return true
+** only for consructions that use 'fl'.)
*/
static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
const Charset *fl) {
@@ -906,6 +929,7 @@ static void peephole (CompileState *compst) {
Instruction *code = compst->p->code;
int i;
for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
+ redo:
switch (code[i].i.code) {
case IChoice: case ICall: case ICommit: case IPartialCommit:
case IBackCommit: case ITestChar: case ITestSet:
@@ -927,8 +951,7 @@ static void peephole (CompileState *compst) {
int fft = finallabel(code, ft);
code[i] = code[ft]; /* jump becomes that instruction... */
jumptothere(compst, i, fft); /* but must correct its offset */
- i--; /* reoptimize its label */
- break;
+ goto redo; /* reoptimize its label */
}
default: {
jumptothere(compst, i, ft); /* optimize label */
@@ -950,10 +973,10 @@ static void peephole (CompileState *compst) {
Instruction *compile (lua_State *L, Pattern *p) {
CompileState compst;
compst.p = p; compst.ncode = 0; compst.L = L;
- reallocprog(L, p, 2); /* minimum initial size */
+ realloccode(L, p, 2); /* minimum initial size */
codegen(&compst, p->tree, 0, NOINST, fullset);
addinstruction(&compst, IEnd, 0);
- reallocprog(L, p, compst.ncode); /* set final size */
+ realloccode(L, p, compst.ncode); /* set final size */
peephole(&compst);
return p->code;
}
diff --git a/lpcode.h b/lpcode.h
index 5c9d54f..896d3c7 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -1,5 +1,5 @@
/*
-** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $
+** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $
*/
#if !defined(lpcode_h)
@@ -17,14 +17,22 @@ int fixedlenx (TTree *tree, int count, int len);
int hascaptures (TTree *tree);
int lp_gc (lua_State *L);
Instruction *compile (lua_State *L, Pattern *p);
-void reallocprog (lua_State *L, Pattern *p, int nsize);
+void realloccode (lua_State *L, Pattern *p, int nsize);
int sizei (const Instruction *i);
#define PEnullable 0
#define PEnofail 1
+/*
+** nofail(t) implies that 't' cannot fail with any input
+*/
#define nofail(t) checkaux(t, PEnofail)
+
+/*
+** (not nullable(t)) implies 't' cannot match without consuming
+** something
+*/
#define nullable(t) checkaux(t, PEnullable)
#define fixedlen(t) fixedlenx(t, 0, 0)
diff --git a/lpeg.html b/lpeg.html
index 4747e30..c0a7f09 100644
--- a/lpeg.html
+++ b/lpeg.html
@@ -10,7 +10,7 @@
-
+
@@ -22,7 +22,7 @@
LPeg
- Parsing Expression Grammars For Lua, version 0.12
+ Parsing Expression Grammars For Lua, version 1.0
@@ -195,13 +195,16 @@ lpeg.version ()
lpeg.setmaxstack (max)
-Sets the maximum size for the backtrack stack used by LPeg to
+Sets a limit for the size of the backtrack stack used by LPeg to
track calls and choices.
+(The default limit is 400.)
Most well-written patterns need little backtrack levels and
-therefore you seldom need to change this maximum;
-but a few useful patterns may need more space.
-Before changing this maximum you should try to rewrite your
+therefore you seldom need to change this limit;
+before changing it you should try to rewrite your
pattern to avoid the need for extra space.
+Nevertheless, a few useful patterns may overflow.
+Also, with recursive grammars,
+subjects with deep recursion may also need larger limits.
@@ -682,7 +685,8 @@ lpeg.Cb (name)
Creates a back capture.
This pattern matches the empty string and
produces the values produced by the most recent
-group capture named name
.
+group capture named name
+(where name
can be any Lua value).
@@ -762,7 +766,8 @@
lpeg.Cg (patt [, name])
It groups all values returned by patt
into a single capture.
The group may be anonymous (if no name is given)
-or named with the given name.
+or named with the given name
+(which can be any non-nil Lua value).
@@ -1375,13 +1380,13 @@
Arithmetic expressions
Download
LPeg
-source code.
+source code.
-Copyright © 2013 Lua.org, PUC-Rio.
+Copyright © 2007-2015 Lua.org, PUC-Rio.
Permission is hereby granted, free of charge,
@@ -1419,7 +1424,7 @@
-$Id: lpeg.html,v 1.71 2013/04/11 19:17:41 roberto Exp $
+$Id: lpeg.html,v 1.75 2015/09/28 17:17:41 roberto Exp $
diff --git a/lpprint.c b/lpprint.c
index 05fa648..174d168 100644
--- a/lpprint.c
+++ b/lpprint.c
@@ -1,5 +1,5 @@
/*
-** $Id: lpprint.c,v 1.7 2013/04/12 16:29:49 roberto Exp $
+** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
@@ -52,7 +52,7 @@ static void printjmp (const Instruction *op, const Instruction *p) {
}
-static void printinst (const Instruction *op, const Instruction *p) {
+void printinst (const Instruction *op, const Instruction *p) {
const char *const names[] = {
"any", "char", "set",
"testany", "testchar", "testset",
@@ -221,10 +221,10 @@ void printtree (TTree *tree, int ident) {
void printktable (lua_State *L, int idx) {
int n, i;
- lua_getfenv(L, idx);
+ lua_getuservalue(L, idx);
if (lua_isnil(L, -1)) /* no ktable? */
return;
- n = lua_objlen(L, -1);
+ n = lua_rawlen(L, -1);
printf("[");
for (i = 1; i <= n; i++) {
printf("%d = ", i);
diff --git a/lpprint.h b/lpprint.h
index e640f74..6329760 100644
--- a/lpprint.h
+++ b/lpprint.h
@@ -1,5 +1,5 @@
/*
-** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
+** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $
*/
@@ -18,6 +18,7 @@ void printtree (TTree *tree, int ident);
void printktable (lua_State *L, int idx);
void printcharset (const byte *st);
void printcaplist (Capture *cap, Capture *limit);
+void printinst (const Instruction *op, const Instruction *p);
#else
diff --git a/lptree.c b/lptree.c
index a5dfeb4..ac5f515 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1,5 +1,5 @@
/*
-** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $
+** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $
** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
@@ -126,6 +126,189 @@ static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
}
+
+/*
+** {===================================================================
+** KTable manipulation
+**
+** - The ktable of a pattern 'p' can be shared by other patterns that
+** contain 'p' and no other constants. Because of this sharing, we
+** should not add elements to a 'ktable' unless it was freshly created
+** for the new pattern.
+**
+** - The maximum index in a ktable is USHRT_MAX, because trees and
+** patterns use unsigned shorts to store those indices.
+** ====================================================================
+*/
+
+/*
+** Create a new 'ktable' to the pattern at the top of the stack.
+*/
+static void newktable (lua_State *L, int n) {
+ lua_createtable(L, n, 0); /* create a fresh table */
+ lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */
+}
+
+
+/*
+** Add element 'idx' to 'ktable' of pattern at the top of the stack;
+** Return index of new element.
+** If new element is nil, does not add it to table (as it would be
+** useless) and returns 0, as ktable[0] is always nil.
+*/
+static int addtoktable (lua_State *L, int idx) {
+ if (lua_isnil(L, idx)) /* nil value? */
+ return 0;
+ else {
+ int n;
+ lua_getuservalue(L, -1); /* get ktable from pattern */
+ n = lua_rawlen(L, -1);
+ if (n >= USHRT_MAX)
+ luaL_error(L, "too many Lua values in pattern");
+ lua_pushvalue(L, idx); /* element to be added */
+ lua_rawseti(L, -2, ++n);
+ lua_pop(L, 1); /* remove 'ktable' */
+ return n;
+ }
+}
+
+
+/*
+** Return the number of elements in the ktable at 'idx'.
+** In Lua 5.2/5.3, default "environment" for patterns is nil, not
+** a table. Treat it as an empty table. In Lua 5.1, assumes that
+** the environment has no numeric indices (len == 0)
+*/
+static int ktablelen (lua_State *L, int idx) {
+ if (!lua_istable(L, idx)) return 0;
+ else return lua_rawlen(L, idx);
+}
+
+
+/*
+** Concatentate the contents of table 'idx1' into table 'idx2'.
+** (Assume that both indices are negative.)
+** Return the original length of table 'idx2' (or 0, if no
+** element was added, as there is no need to correct any index).
+*/
+static int concattable (lua_State *L, int idx1, int idx2) {
+ int i;
+ int n1 = ktablelen(L, idx1);
+ int n2 = ktablelen(L, idx2);
+ if (n1 + n2 > USHRT_MAX)
+ luaL_error(L, "too many Lua values in pattern");
+ if (n1 == 0) return 0; /* nothing to correct */
+ for (i = 1; i <= n1; i++) {
+ lua_rawgeti(L, idx1, i);
+ lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */
+ }
+ return n2;
+}
+
+
+/*
+** When joining 'ktables', constants from one of the subpatterns must
+** be renumbered; 'correctkeys' corrects their indices (adding 'n'
+** to each of them)
+*/
+static void correctkeys (TTree *tree, int n) {
+ if (n == 0) return; /* no correction? */
+ tailcall:
+ switch (tree->tag) {
+ case TOpenCall: case TCall: case TRunTime: case TRule: {
+ if (tree->key > 0)
+ tree->key += n;
+ break;
+ }
+ case TCapture: {
+ if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
+ tree->key += n;
+ break;
+ }
+ default: break;
+ }
+ switch (numsiblings[tree->tag]) {
+ case 1: /* correctkeys(sib1(tree), n); */
+ tree = sib1(tree); goto tailcall;
+ case 2:
+ correctkeys(sib1(tree), n);
+ tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */
+ default: assert(numsiblings[tree->tag] == 0); break;
+ }
+}
+
+
+/*
+** Join the ktables from p1 and p2 the ktable for the new pattern at the
+** top of the stack, reusing them when possible.
+*/
+static void joinktables (lua_State *L, int p1, TTree *t2, int p2) {
+ int n1, n2;
+ lua_getuservalue(L, p1); /* get ktables */
+ lua_getuservalue(L, p2);
+ n1 = ktablelen(L, -2);
+ n2 = ktablelen(L, -1);
+ if (n1 == 0 && n2 == 0) /* are both tables empty? */
+ lua_pop(L, 2); /* nothing to be done; pop tables */
+ else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */
+ lua_pop(L, 1); /* pop 2nd table */
+ lua_setuservalue(L, -2); /* set 1st ktable into new pattern */
+ }
+ else if (n1 == 0) { /* first table is empty? */
+ lua_setuservalue(L, -3); /* set 2nd table into new pattern */
+ lua_pop(L, 1); /* pop 1st table */
+ }
+ else {
+ lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */
+ /* stack: new p; ktable p1; ktable p2; new ktable */
+ concattable(L, -3, -1); /* from p1 into new ktable */
+ concattable(L, -2, -1); /* from p2 into new ktable */
+ lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */
+ lua_pop(L, 2); /* pop other ktables */
+ correctkeys(t2, n1); /* correction for indices from p2 */
+ }
+}
+
+
+/*
+** copy 'ktable' of element 'idx' to new tree (on top of stack)
+*/
+static void copyktable (lua_State *L, int idx) {
+ lua_getuservalue(L, idx);
+ lua_setuservalue(L, -2);
+}
+
+
+/*
+** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable'
+** from tree at the top of the stack, and correct corresponding
+** tree.
+*/
+static void mergektable (lua_State *L, int idx, TTree *stree) {
+ int n;
+ lua_getuservalue(L, -1); /* get ktables */
+ lua_getuservalue(L, idx);
+ n = concattable(L, -1, -2);
+ lua_pop(L, 2); /* remove both ktables */
+ correctkeys(stree, n);
+}
+
+
+/*
+** Create a new 'ktable' to the pattern at the top of the stack, adding
+** all elements from pattern 'p' (if not 0) plus element 'idx' to it.
+** Return index of new element.
+*/
+static int addtonewktable (lua_State *L, int p, int idx) {
+ newktable(L, 1);
+ if (p)
+ mergektable(L, p, NULL);
+ return addtoktable(L, idx);
+}
+
+/* }====================================================== */
+
+
/*
** {======================================================
** Tree generation
@@ -155,7 +338,7 @@ static Pattern *getpattern (lua_State *L, int idx) {
static int getsize (lua_State *L, int idx) {
- return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
+ return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
}
@@ -168,12 +351,16 @@ static TTree *gettree (lua_State *L, int idx, int *len) {
/*
-** create a pattern
+** create a pattern. Set its uservalue (the 'ktable') equal to its
+** metatable. (It could be any empty sequence; the metatable is at
+** hand here, so we use it.)
*/
static TTree *newtree (lua_State *L, int len) {
size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern);
Pattern *p = (Pattern *)lua_newuserdata(L, size);
luaL_getmetatable(L, PATTERN_T);
+ lua_pushvalue(L, -1);
+ lua_setuservalue(L, -3);
lua_setmetatable(L, -2);
p->code = NULL; p->codesize = 0;
return p->tree;
@@ -206,29 +393,6 @@ static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
}
-/*
-** Add element 'idx' to 'ktable' of pattern at the top of the stack;
-** create new 'ktable' if necessary. Return index of new element.
-*/
-static int addtoktable (lua_State *L, int idx) {
- if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */
- return 0;
- else {
- int n;
- lua_getfenv(L, -1); /* get ktable from pattern */
- n = lua_objlen(L, -1);
- if (n == 0) { /* is it empty/non-existent? */
- lua_pop(L, 1); /* remove it */
- lua_createtable(L, 1, 0); /* create a fresh table */
- }
- lua_pushvalue(L, idx); /* element to be added */
- lua_rawseti(L, -2, n + 1);
- lua_setfenv(L, -2); /* set it as ktable for pattern */
- return n + 1;
- }
-}
-
-
/*
** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
@@ -304,7 +468,7 @@ static TTree *getpatt (lua_State *L, int idx, int *len) {
case LUA_TFUNCTION: {
tree = newtree(L, 2);
tree->tag = TRunTime;
- tree->key = addtoktable(L, idx);
+ tree->key = addtonewktable(L, 0, idx);
sib1(tree)->tag = TTrue;
break;
}
@@ -319,123 +483,6 @@ static TTree *getpatt (lua_State *L, int idx, int *len) {
}
-/*
-** Return the number of elements in the ktable of pattern at 'idx'.
-** In Lua 5.2, default "environment" for patterns is nil, not
-** a table. Treat it as an empty table. In Lua 5.1, assumes that
-** the environment has no numeric indices (len == 0)
-*/
-static int ktablelen (lua_State *L, int idx) {
- if (!lua_istable(L, idx)) return 0;
- else return lua_objlen(L, idx);
-}
-
-
-/*
-** Concatentate the contents of table 'idx1' into table 'idx2'.
-** (Assume that both indices are negative.)
-** Return the original length of table 'idx2'
-*/
-static int concattable (lua_State *L, int idx1, int idx2) {
- int i;
- int n1 = ktablelen(L, idx1);
- int n2 = ktablelen(L, idx2);
- if (n1 == 0) return 0; /* nothing to correct */
- for (i = 1; i <= n1; i++) {
- lua_rawgeti(L, idx1, i);
- lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */
- }
- return n2;
-}
-
-
-/*
-** Make a merge of ktables from p1 and p2 the ktable for the new
-** pattern at the top of the stack.
-*/
-static int joinktables (lua_State *L, int p1, int p2) {
- int n1, n2;
- lua_getfenv(L, p1); /* get ktables */
- lua_getfenv(L, p2);
- n1 = ktablelen(L, -2);
- n2 = ktablelen(L, -1);
- if (n1 == 0 && n2 == 0) { /* are both tables empty? */
- lua_pop(L, 2); /* nothing to be done; pop tables */
- return 0; /* nothing to correct */
- }
- if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */
- lua_pop(L, 1); /* pop 2nd table */
- lua_setfenv(L, -2); /* set 1st ktable into new pattern */
- return 0; /* nothing to correct */
- }
- if (n1 == 0) { /* first table is empty? */
- lua_setfenv(L, -3); /* set 2nd table into new pattern */
- lua_pop(L, 1); /* pop 1st table */
- return 0; /* nothing to correct */
- }
- else {
- lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */
- /* stack: new p; ktable p1; ktable p2; new ktable */
- concattable(L, -3, -1); /* from p1 into new ktable */
- concattable(L, -2, -1); /* from p2 into new ktable */
- lua_setfenv(L, -4); /* new ktable becomes p env */
- lua_pop(L, 2); /* pop other ktables */
- return n1; /* correction for indices from p2 */
- }
-}
-
-
-static void correctkeys (TTree *tree, int n) {
- if (n == 0) return; /* no correction? */
- tailcall:
- switch (tree->tag) {
- case TOpenCall: case TCall: case TRunTime: case TRule: {
- if (tree->key > 0)
- tree->key += n;
- break;
- }
- case TCapture: {
- if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
- tree->key += n;
- break;
- }
- default: break;
- }
- switch (numsiblings[tree->tag]) {
- case 1: /* correctkeys(sib1(tree), n); */
- tree = sib1(tree); goto tailcall;
- case 2:
- correctkeys(sib1(tree), n);
- tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */
- default: assert(numsiblings[tree->tag] == 0); break;
- }
-}
-
-
-/*
-** copy 'ktable' of element 'idx' to new tree (on top of stack)
-*/
-static void copyktable (lua_State *L, int idx) {
- lua_getfenv(L, idx);
- lua_setfenv(L, -2);
-}
-
-
-/*
-** merge 'ktable' from rule at stack index 'idx' into 'ktable'
-** from tree at the top of the stack, and correct corresponding
-** tree.
-*/
-static void mergektable (lua_State *L, int idx, TTree *rule) {
- int n;
- lua_getfenv(L, -1); /* get ktables */
- lua_getfenv(L, idx);
- n = concattable(L, -1, -2);
- lua_pop(L, 2); /* remove both ktables */
- correctkeys(rule, n);
-}
-
-
/*
** create a new tree, whith a new root and one sibling.
** Sibling must be on the Lua stack, at index 1.
@@ -464,7 +511,7 @@ static TTree *newroot2sib (lua_State *L, int tag) {
tree->u.ps = 1 + s1;
memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
- correctkeys(sib2(tree), joinktables(L, 1, 2));
+ joinktables(L, 1, sib2(tree), 2);
return tree;
}
@@ -524,8 +571,8 @@ static int lp_choice (lua_State *L) {
*/
static int lp_star (lua_State *L) {
int size1;
- int n = luaL_checkint(L, 2);
- TTree *tree1 = gettree(L, 1, &size1);
+ int n = (int)luaL_checkinteger(L, 2);
+ TTree *tree1 = getpatt(L, 1, &size1);
if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
TTree *tree = newtree(L, (n + 1) * (size1 + 1));
if (nullable(tree1))
@@ -593,7 +640,7 @@ static int lp_sub (lua_State *L) {
sib1(tree)->tag = TNot; /* ...not... */
memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */
memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */
- correctkeys(sib1(tree), joinktables(L, 1, 2));
+ joinktables(L, 1, sib1(tree), 2);
}
return 1;
}
@@ -634,8 +681,8 @@ static int lp_behind (lua_State *L) {
TTree *tree;
TTree *tree1 = getpatt(L, 1, NULL);
int n = fixedlen(tree1);
+ luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length");
luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
- luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length");
luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
tree = newroot1sib(L, TBehind);
tree->u.n = n;
@@ -649,7 +696,7 @@ static int lp_behind (lua_State *L) {
static int lp_V (lua_State *L) {
TTree *tree = newleaf(L, TOpenCall);
luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
- tree->key = addtoktable(L, 1);
+ tree->key = addtonewktable(L, 0, 1);
return 1;
}
@@ -662,7 +709,7 @@ static int lp_V (lua_State *L) {
static int capture_aux (lua_State *L, int cap, int labelidx) {
TTree *tree = newroot1sib(L, TCapture);
tree->cap = cap;
- tree->key = addtoktable(L, labelidx);
+ tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx);
return 1;
}
@@ -670,10 +717,9 @@ static int capture_aux (lua_State *L, int cap, int labelidx) {
/*
** Fill a tree with an empty capture, using an empty (TTrue) sibling.
*/
-static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) {
+static TTree *auxemptycap (TTree *tree, int cap) {
tree->tag = TCapture;
tree->cap = cap;
- tree->key = addtoktable(L, idx);
sib1(tree)->tag = TTrue;
return tree;
}
@@ -682,8 +728,18 @@ static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) {
/*
** Create a tree for an empty capture
*/
-static TTree *newemptycap (lua_State *L, int cap, int idx) {
- return auxemptycap(L, newtree(L, 2), cap, idx);
+static TTree *newemptycap (lua_State *L, int cap) {
+ return auxemptycap(newtree(L, 2), cap);
+}
+
+
+/*
+** Create a tree for an empty capture with an associated Lua value
+*/
+static TTree *newemptycapkey (lua_State *L, int cap, int idx) {
+ TTree *tree = auxemptycap(newtree(L, 2), cap);
+ tree->key = addtonewktable(L, 0, idx);
+ return tree;
}
@@ -722,10 +778,8 @@ static int lp_tablecapture (lua_State *L) {
static int lp_groupcapture (lua_State *L) {
if (lua_isnoneornil(L, 2))
return capture_aux(L, Cgroup, 0);
- else {
- luaL_checkstring(L, 2);
+ else
return capture_aux(L, Cgroup, 2);
- }
}
@@ -741,14 +795,14 @@ static int lp_simplecapture (lua_State *L) {
static int lp_poscapture (lua_State *L) {
- newemptycap(L, Cposition, 0);
+ newemptycap(L, Cposition);
return 1;
}
static int lp_argcapture (lua_State *L) {
- int n = luaL_checkint(L, 1);
- TTree *tree = newemptycap(L, Carg, 0);
+ int n = (int)luaL_checkinteger(L, 1);
+ TTree *tree = newemptycap(L, Carg);
tree->key = n;
luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
return 1;
@@ -756,8 +810,8 @@ static int lp_argcapture (lua_State *L) {
static int lp_backref (lua_State *L) {
- luaL_checkstring(L, 1);
- newemptycap(L, Cbackref, 1);
+ luaL_checkany(L, 1);
+ newemptycapkey(L, Cbackref, 1);
return 1;
}
@@ -771,9 +825,10 @@ static int lp_constcapture (lua_State *L) {
if (n == 0) /* no values? */
newleaf(L, TTrue); /* no capture */
else if (n == 1)
- newemptycap(L, Cconst, 1); /* single constant capture */
+ newemptycapkey(L, Cconst, 1); /* single constant capture */
else { /* create a group capture with all values */
TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2);
+ newktable(L, n); /* create a 'ktable' for new tree */
tree->tag = TCapture;
tree->cap = Cgroup;
tree->key = 0;
@@ -781,10 +836,12 @@ static int lp_constcapture (lua_State *L) {
for (i = 1; i <= n - 1; i++) {
tree->tag = TSeq;
tree->u.ps = 3; /* skip TCapture and its sibling */
- auxemptycap(L, sib1(tree), Cconst, i);
+ auxemptycap(sib1(tree), Cconst);
+ sib1(tree)->key = addtoktable(L, i);
tree = sib2(tree);
}
- auxemptycap(L, tree, Cconst, i);
+ auxemptycap(tree, Cconst);
+ tree->key = addtoktable(L, i);
}
return 1;
}
@@ -794,7 +851,7 @@ static int lp_matchtime (lua_State *L) {
TTree *tree;
luaL_checktype(L, 2, LUA_TFUNCTION);
tree = newroot1sib(L, TRunTime);
- tree->key = addtoktable(L, 2);
+ tree->key = addtonewktable(L, 1, 2);
return 1;
}
@@ -851,7 +908,7 @@ static int collectrules (lua_State *L, int arg, int *totalsize) {
lua_pushnil(L); /* prepare to traverse grammar table */
while (lua_next(L, arg) != 0) {
if (lua_tonumber(L, -2) == 1 ||
- lua_equal(L, -2, postab + 1)) { /* initial rule? */
+ lp_equal(L, -2, postab + 1)) { /* initial rule? */
lua_pop(L, 1); /* remove value (keep key for lua_next) */
continue;
}
@@ -928,36 +985,40 @@ static int verifyerror (lua_State *L, int *passed, int npassed) {
/*
** Check whether a rule can be left recursive; raise an error in that
-** case; otherwise return 1 iff pattern is nullable. Assume ktable at
-** the top of the stack.
+** case; otherwise return 1 iff pattern is nullable.
+** The return value is used to check sequences, where the second pattern
+** is only relevant if the first is nullable.
+** Parameter 'nb' works as an accumulator, to allow tail calls in
+** choices. ('nb' true makes function returns true.)
+** Assume ktable at the top of the stack.
*/
static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
- int nullable) {
+ int nb) {
tailcall:
switch (tree->tag) {
case TChar: case TSet: case TAny:
case TFalse:
- return nullable; /* cannot pass from here */
+ return nb; /* cannot pass from here */
case TTrue:
case TBehind: /* look-behind cannot have calls */
return 1;
case TNot: case TAnd: case TRep:
/* return verifyrule(L, sib1(tree), passed, npassed, 1); */
- tree = sib1(tree); nullable = 1; goto tailcall;
+ tree = sib1(tree); nb = 1; goto tailcall;
case TCapture: case TRunTime:
- /* return verifyrule(L, sib1(tree), passed, npassed); */
+ /* return verifyrule(L, sib1(tree), passed, npassed, nb); */
tree = sib1(tree); goto tailcall;
case TCall:
- /* return verifyrule(L, sib2(tree), passed, npassed); */
+ /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
tree = sib2(tree); goto tailcall;
- case TSeq: /* only check 2nd child if first is nullable */
+ case TSeq: /* only check 2nd child if first is nb */
if (!verifyrule(L, sib1(tree), passed, npassed, 0))
- return nullable;
- /* else return verifyrule(L, sib2(tree), passed, npassed); */
+ return nb;
+ /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */
tree = sib2(tree); goto tailcall;
case TChoice: /* must check both children */
- nullable = verifyrule(L, sib1(tree), passed, npassed, nullable);
- /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */
+ nb = verifyrule(L, sib1(tree), passed, npassed, nb);
+ /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
tree = sib2(tree); goto tailcall;
case TRule:
if (npassed >= MAXRULES)
@@ -1000,7 +1061,7 @@ static void verifygrammar (lua_State *L, TTree *grammar) {
*/
static void initialrulename (lua_State *L, TTree *grammar, int frule) {
if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */
- int n = lua_objlen(L, -1) + 1; /* index for name */
+ int n = lua_rawlen(L, -1) + 1; /* index for name */
lua_pushvalue(L, frule); /* rule's name */
lua_rawseti(L, -2, n); /* ktable was on the top of the stack */
sib1(grammar)->key = n;
@@ -1016,9 +1077,9 @@ static TTree *newgrammar (lua_State *L, int arg) {
luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules");
g->tag = TGrammar; g->u.n = n;
lua_newtable(L); /* create 'ktable' */
- lua_setfenv(L, -2);
+ lua_setuservalue(L, -2);
buildgrammar(L, g, frule, n);
- lua_getfenv(L, -1); /* get 'ktable' for new tree */
+ lua_getuservalue(L, -1); /* get 'ktable' for new tree */
finalfix(L, frule - 1, g, sib1(g));
initialrulename(L, g, frule);
verifygrammar(L, g);
@@ -1032,7 +1093,7 @@ static TTree *newgrammar (lua_State *L, int arg) {
static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
- lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */
+ lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */
finalfix(L, 0, NULL, p->tree);
lua_pop(L, 1); /* remove 'ktable' */
return compile(L, p);
@@ -1043,7 +1104,7 @@ static int lp_printtree (lua_State *L) {
TTree *tree = getpatt(L, 1, NULL);
int c = lua_toboolean(L, 2);
if (c) {
- lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */
+ lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */
finalfix(L, 0, NULL, tree);
lua_pop(L, 1); /* remove 'ktable' */
}
@@ -1096,7 +1157,7 @@ static int lp_match (lua_State *L) {
int ptop = lua_gettop(L);
lua_pushnil(L); /* initialize subscache */
lua_pushlightuserdata(L, capture); /* initialize caplistidx */
- lua_getfenv(L, 1); /* initialize penvidx */
+ lua_getuservalue(L, 1); /* initialize penvidx */
r = match(L, s, s + i, s + l, code, capture, ptop);
if (r == NULL) {
lua_pushnil(L);
@@ -1113,8 +1174,12 @@ static int lp_match (lua_State *L) {
** =======================================================
*/
+/* maximum limit for stack size */
+#define MAXLIM (INT_MAX / 100)
+
static int lp_setmax (lua_State *L) {
- luaL_optinteger(L, 1, -1);
+ lua_Integer lim = luaL_checkinteger(L, 1);
+ luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range");
lua_settop(L, 1);
lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
return 0;
@@ -1138,8 +1203,7 @@ static int lp_type (lua_State *L) {
int lp_gc (lua_State *L) {
Pattern *p = getpattern(L, 1);
- if (p->codesize > 0)
- reallocprog(L, p, 0);
+ realloccode(L, p, 0); /* delete code block */
return 0;
}
@@ -1222,8 +1286,8 @@ int luaopen_lpeg (lua_State *L) {
luaL_newmetatable(L, PATTERN_T);
lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */
lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
- luaL_register(L, NULL, metareg);
- luaL_register(L, "lpeg", pattreg);
+ luaL_setfuncs(L, metareg, 0);
+ luaL_newlib(L, pattreg);
lua_pushvalue(L, -1);
lua_setfield(L, -3, "__index");
return 1;
diff --git a/lptypes.h b/lptypes.h
index 7ace545..5eb7987 100644
--- a/lptypes.h
+++ b/lptypes.h
@@ -1,7 +1,7 @@
/*
-** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $
+** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $
** LPeg - PEG pattern matching for Lua
-** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
+** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license)
** written by Roberto Ierusalimschy
*/
@@ -19,7 +19,7 @@
#include "lua.h"
-#define VERSION "0.12"
+#define VERSION "1.0.0"
#define PATTERN_T "lpeg-pattern"
@@ -27,36 +27,38 @@
/*
-** compatibility with Lua 5.2
+** compatibility with Lua 5.1
*/
-#if (LUA_VERSION_NUM == 502)
+#if (LUA_VERSION_NUM == 501)
-#undef lua_equal
-#define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
+#define lp_equal lua_equal
-#undef lua_getfenv
-#define lua_getfenv lua_getuservalue
-#undef lua_setfenv
-#define lua_setfenv lua_setuservalue
+#define lua_getuservalue lua_getfenv
+#define lua_setuservalue lua_setfenv
-#undef lua_objlen
-#define lua_objlen lua_rawlen
+#define lua_rawlen lua_objlen
-#undef luaL_register
-#define luaL_register(L,n,f) \
- { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); }
+#define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f)
+#define luaL_newlib(L,f) luaL_register(L,"lpeg",f)
#endif
+#if !defined(lp_equal)
+#define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
+#endif
+
+
/* default maximum size for call/backtrack stack */
#if !defined(MAXBACK)
-#define MAXBACK 100
+#define MAXBACK 400
#endif
/* maximum number of rules in a grammar */
-#define MAXRULES 200
+#if !defined(MAXRULES)
+#define MAXRULES 1000
+#endif
diff --git a/lpvm.c b/lpvm.c
index cd893ed..eaf2ebf 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -1,5 +1,5 @@
/*
-** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $
+** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
*/
@@ -18,7 +18,7 @@
/* initial size for call/backtrack stack */
#if !defined(INITBACK)
-#define INITBACK 100
+#define INITBACK MAXBACK
#endif
@@ -70,7 +70,7 @@ static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
max = lua_tointeger(L, -1); /* maximum allowed size */
lua_pop(L, 1);
if (n >= max) /* already at maximum size? */
- luaL_error(L, "too many pending calls/choices");
+ luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
newn = 2 * n; /* new size */
if (newn > max) newn = max;
newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
diff --git a/lpvm.h b/lpvm.h
index 6a2a558..757b9e1 100644
--- a/lpvm.h
+++ b/lpvm.h
@@ -1,5 +1,5 @@
/*
-** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $
+** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $
*/
#if !defined(lpvm_h)
@@ -49,14 +49,9 @@ typedef union Instruction {
} Instruction;
-int getposition (lua_State *L, int t, int i);
void printpatt (Instruction *p, int n);
const char *match (lua_State *L, const char *o, const char *s, const char *e,
Instruction *op, Capture *capture, int ptop);
-int verify (lua_State *L, Instruction *op, const Instruction *p,
- Instruction *e, int postable, int rule);
-void checkrule (lua_State *L, Instruction *op, int from, int to,
- int postable, int rule);
#endif
diff --git a/makefile b/makefile
index 57a18fb..7a8463e 100644
--- a/makefile
+++ b/makefile
@@ -1,5 +1,5 @@
LIBNAME = lpeg
-LUADIR = /usr/include/lua5.1/
+LUADIR = ../lua/
COPT = -O2
# COPT = -DLPEG_DEBUG -g
@@ -22,7 +22,7 @@ CWARNS = -Wall -Wextra -pedantic \
# -Wunreachable-code \
-CFLAGS = $(CWARNS) $(COPT) -ansi -I$(LUADIR) -fPIC
+CFLAGS = $(CWARNS) $(COPT) -std=c99 -I$(LUADIR) -fPIC
CC = gcc
FILES = lpvm.o lpcap.o lptree.o lpcode.o lpprint.o
diff --git a/re.html b/re.html
index 4717ec2..d0d9744 100644
--- a/re.html
+++ b/re.html
@@ -10,7 +10,7 @@
-
+
@@ -296,7 +296,7 @@
Abstract Syntax Trees
a
tag
field telling what non terminal
that table represents.
We can add such a tag using
-
named group captures:
+
named group captures:
x = re.compile[[
@@ -450,7 +450,7 @@ Patterns
-Copyright © 2008-2010 Lua.org, PUC-Rio.
+Copyright © 2008-2015 Lua.org, PUC-Rio.
Permission is hereby granted, free of charge,
@@ -488,7 +488,7 @@
-$Id: re.html,v 1.21 2013/03/28 20:43:30 roberto Exp $
+$Id: re.html,v 1.23 2015/09/28 17:17:41 roberto Exp $
diff --git a/test.lua b/test.lua
index 1d107ca..017a3ab 100755
--- a/test.lua
+++ b/test.lua
@@ -1,6 +1,6 @@
-#!/usr/bin/env lua5.1
+#!/usr/bin/env lua
--- $Id: test.lua,v 1.101 2013/04/12 16:30:33 roberto Exp $
+-- $Id: test.lua,v 1.109 2015/09/28 17:01:25 roberto Exp $
-- require"strict" -- just to be pedantic
@@ -16,9 +16,6 @@ local unpack = rawget(table, "unpack") or unpack
local loadstring = rawget(_G, "loadstring") or load
--- most tests here do not need much stack space
-m.setmaxstack(5)
-
local any = m.P(1)
local space = m.S" \t\n"^0
@@ -170,8 +167,8 @@ assert(m.match( basiclookfor((#m.P(b) * 1) * m.Cp()), " ( (a)") == 7)
a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "123")}
checkeq(a, {"123", "d"})
-a = {m.match(m.C(digit^1) * "d" * -1 + m.C(letter^1 * m.Cc"l"), "123d")}
-checkeq(a, {"123"})
+-- bug in LPeg 0.12 (nil value does not create a 'ktable')
+assert(m.match(m.Cc(nil), "") == nil)
a = {m.match(m.C(digit^1 * m.Cc"d") + m.C(letter^1 * m.Cc"l"), "abcd")}
checkeq(a, {"abcd", "l"})
@@ -194,6 +191,16 @@ checkeq(a, {1, 5})
t = {m.match({[1] = m.C(m.C(1) * m.V(1) + -1)}, "abc")}
checkeq(t, {"abc", "a", "bc", "b", "c", "c", ""})
+-- bug in 0.12 ('hascapture' did not check for captures inside a rule)
+do
+ local pat = m.P{
+ 'S';
+ S1 = m.C('abc') + 3,
+ S = #m.V('S1') -- rule has capture, but '#' must ignore it
+ }
+ assert(pat:match'abc' == 1)
+end
+
-- test for small capture boundary
for i = 250,260 do
@@ -201,9 +208,8 @@ for i = 250,260 do
assert(#m.match(m.C(m.C(i)), string.rep('a', i)) == i)
end
-
-- tests for any*n and any*-n
-for n = 1, 550 do
+for n = 1, 550, 13 do
local x_1 = string.rep('x', n - 1)
local x = x_1 .. 'a'
assert(not m.P(n):match(x_1))
@@ -282,6 +288,13 @@ assert(m.match(m.P"ab"^-1 - "c", "abcd") == 3)
p = ('Aa' * ('Bb' * ('Cc' * m.P'Dd'^0)^0)^0)^-1
assert(p:match("AaBbCcDdBbCcDdDdDdBb") == 21)
+
+
+-- bug in 0.12.2
+-- p = { ('ab' ('c' 'ef'?)*)? }
+p = m.C(('ab' * ('c' * m.P'ef'^-1)^0)^-1)
+s = "abcefccefc"
+assert(s == p:match(s))
pi = "3.14159 26535 89793 23846 26433 83279 50288 41971 69399 37510"
@@ -343,10 +356,16 @@ checkeq(t, {hi = 10, ho = 20})
t = p:match'abc'
checkeq(t, {hi = 10, ho = 20, 'a', 'b', 'c'})
+-- non-string group names
+p = m.Ct(m.Cg(1, print) * m.Cg(1, 23.5) * m.Cg(1, io))
+t = p:match('abcdefghij')
+assert(t[print] == 'a' and t[23.5] == 'b' and t[io] == 'c')
+
-- test for error messages
-local function checkerr (msg, ...)
- assert(m.match({ m.P(msg) + 1 * m.V(1) }, select(2, pcall(...))))
+local function checkerr (msg, f, ...)
+ local st, err = pcall(f, ...)
+ assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err))
end
checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a")
@@ -370,6 +389,32 @@ p = {'a',
}
checkerr("rule 'a' may be left recursive", m.match, p, "a")
+-- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit)
+-- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1
+-- that is optimized to ICommit L1
+
+p = m.P { (m.P {m.P'abc'} + 'ayz') * m.V'y'; y = m.P'x' }
+assert(p:match('abcx') == 5 and p:match('ayzx') == 5 and not p:match'abc')
+
+
+do
+ -- large dynamic Cc
+ local lim = 2^16 - 1
+ local c = 0
+ local function seq (n)
+ if n == 1 then c = c + 1; return m.Cc(c)
+ else
+ local m = math.floor(n / 2)
+ return seq(m) * seq(n - m)
+ end
+ end
+ p = m.Ct(seq(lim))
+ t = p:match('')
+ assert(t[lim] == lim)
+ checkerr("too many", function () p = p / print end)
+ checkerr("too many", seq, lim + 1)
+end
+
-- tests for non-pattern as arguments to pattern functions
@@ -488,7 +533,10 @@ assert(m.match(1 * m.B(1), 'a') == 2)
assert(m.match(-m.B(1), 'a') == 1)
assert(m.match(m.B(250), string.rep('a', 250)) == nil)
assert(m.match(250 * m.B(250), string.rep('a', 250)) == 251)
-assert(not pcall(m.B, 260))
+
+-- look-behind with an open call
+checkerr("pattern may not have fixed length", m.B, m.V'S1')
+checkerr("too long to look behind", m.B, 260)
B = #letter * -m.B(letter) + -letter * m.B(letter)
x = m.Ct({ (B * m.Cp())^-1 * (1 * m.V(1) + m.P(true)) })
@@ -555,18 +603,18 @@ assert(not p:match(string.rep("011", 10001)))
-- this grammar does need backtracking info.
local lim = 10000
p = m.P{ '0' * m.V(1) + '0' }
-assert(not pcall(m.match, p, string.rep("0", lim)))
+checkerr("stack overflow", m.match, p, string.rep("0", lim))
m.setmaxstack(2*lim)
-assert(not pcall(m.match, p, string.rep("0", lim)))
+checkerr("stack overflow", m.match, p, string.rep("0", lim))
m.setmaxstack(2*lim + 4)
-assert(pcall(m.match, p, string.rep("0", lim)))
+assert(m.match(p, string.rep("0", lim)) == lim + 1)
-- this repetition should not need stack space (only the call does)
p = m.P{ ('a' * m.V(1))^0 * 'b' + 'c' }
m.setmaxstack(200)
assert(p:match(string.rep('a', 180) .. 'c' .. string.rep('b', 180)) == 362)
-m.setmaxstack(5) -- restore original limit
+m.setmaxstack(100) -- restore low limit
-- tests for optional start position
assert(m.match("a", "abc", 1))
@@ -588,10 +636,10 @@ print("+")
-- tests for argument captures
-assert(not pcall(m.Carg, 0))
-assert(not pcall(m.Carg, -1))
-assert(not pcall(m.Carg, 2^18))
-assert(not pcall(m.match, m.Carg(1), 'a', 1))
+checkerr("invalid argument", m.Carg, 0)
+checkerr("invalid argument", m.Carg, -1)
+checkerr("invalid argument", m.Carg, 2^18)
+checkerr("absent extra argument #1", m.match, m.Carg(1), 'a', 1)
assert(m.match(m.Carg(1), 'a', 1, print) == print)
x = {m.match(m.Carg(1) * m.Carg(2), '', 1, 10, 20)}
checkeq(x, {10, 20})
@@ -644,14 +692,16 @@ assert(m.match(p, "aaaa") == 5)
assert(m.match(p, "abaa") == 2)
assert(not m.match(p, "baaa"))
-assert(not pcall(m.match, function () return 2^20 end, s))
-assert(not pcall(m.match, function () return 0 end, s))
-assert(not pcall(m.match, function (s, i) return i - 1 end, s))
-assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i - 1 end, s))
+checkerr("invalid position", m.match, function () return 2^20 end, s)
+checkerr("invalid position", m.match, function () return 0 end, s)
+checkerr("invalid position", m.match, function (s, i) return i - 1 end, s)
+checkerr("invalid position", m.match,
+ m.P(1)^0 * function (_, i) return i - 1 end, s)
assert(m.match(m.P(1)^0 * function (_, i) return i end * -1, s))
-assert(not pcall(m.match, m.P(1)^0 * function (_, i) return i + 1 end, s))
+checkerr("invalid position", m.match,
+ m.P(1)^0 * function (_, i) return i + 1 end, s)
assert(m.match(m.P(function (s, i) return s:len() + 1 end) * -1, s))
-assert(not pcall(m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s))
+checkerr("invalid position", m.match, m.P(function (s, i) return s:len() + 2 end) * -1, s)
assert(not m.match(m.P(function (s, i) return s:len() end) * -1, s))
assert(m.match(m.P(1)^0 * function (_, i) return true end, s) ==
string.len(s) + 1)
@@ -696,6 +746,10 @@ t = {m.match(m.Cc(nil,nil,4) * m.Cc(nil,3) * m.Cc(nil, nil) / g / g, "")}
t1 = {1,1,nil,nil,4,nil,3,nil,nil}
for i=1,10 do assert(t[i] == t1[i]) end
+-- bug in 0.12.2: ktable with only nil could be eliminated when joining
+-- with a pattern without ktable
+assert((m.P"aaa" * m.Cc(nil)):match"aaa" == nil)
+
t = {m.match((m.C(1) / function (x) return x, x.."x" end)^0, "abc")}
checkeq(t, {"a", "ax", "b", "bx", "c", "cx"})
@@ -734,9 +788,9 @@ assert(m.match(m.Cs((m.P(1) / ".xx")^0), "abcd") == ".xx.xx.xx.xx")
assert(m.match(m.Cp() * m.P(3) * m.Cp()/"%2%1%1 - %0 ", "abcde") ==
"411 - abc ")
-assert(pcall(m.match, m.P(1)/"%0", "abc"))
-assert(not pcall(m.match, m.P(1)/"%1", "abc")) -- out of range
-assert(not pcall(m.match, m.P(1)/"%9", "abc")) -- out of range
+assert(m.match(m.P(1)/"%0", "abc") == "a")
+checkerr("invalid capture index", m.match, m.P(1)/"%1", "abc")
+checkerr("invalid capture index", m.match, m.P(1)/"%9", "abc")
p = m.C(1)
p = p * p; p = p * p; p = p * p * m.C(1) / "%9 - %1"
@@ -754,7 +808,7 @@ assert(m.match(m.C(1)^0 / "%9-%1-%0-%3", s) == "9-1-" .. s .. "-3")
p = m.Cc('alo') * m.C(1) / "%1 - %2 - %1"
assert(p:match'x' == 'alo - x - alo')
-assert(not pcall(m.match, m.Cc(true) / "%1", "a"))
+checkerr("invalid capture value (a boolean)", m.match, m.Cc(true) / "%1", "a")
-- long strings for string capture
l = 10000
@@ -782,35 +836,37 @@ checkeq(t, {a="b", c="du", xux="yuy"})
-- errors in accumulator capture
--- very long match (forces fold to be a pair open-close) producing with
-- no initial capture
-assert(not pcall(m.match, m.Cf(m.P(500), print), string.rep('a', 600)))
+checkerr("no initial value", m.match, m.Cf(m.P(5), print), 'aaaaaa')
+-- no initial capture (very long match forces fold to be a pair open-close)
+checkerr("no initial value", m.match, m.Cf(m.P(500), print),
+ string.rep('a', 600))
-- nested capture produces no initial value
-assert(not pcall(m.match, m.Cf(m.P(1) / {}, print), "alo"))
+checkerr("no initial value", m.match, m.Cf(m.P(1) / {}, print), "alo")
-- tests for loop checker
-local function haveloop (p)
- assert(not pcall(function (p) return p^0 end, m.P(p)))
+local function isnullable (p)
+ checkerr("may accept empty string", function (p) return p^0 end, m.P(p))
end
-haveloop(m.P("x")^-4)
+isnullable(m.P("x")^-4)
assert(m.match(((m.P(0) + 1) * m.S"al")^0, "alo") == 3)
assert(m.match((("x" + #m.P(1))^-4 * m.S"al")^0, "alo") == 3)
-haveloop("")
-haveloop(m.P("x")^0)
-haveloop(m.P("x")^-1)
-haveloop(m.P("x") + 1 + 2 + m.P("a")^-1)
-haveloop(-m.P("ab"))
-haveloop(- -m.P("ab"))
-haveloop(# #(m.P("ab") + "xy"))
-haveloop(- #m.P("ab")^0)
-haveloop(# -m.P("ab")^1)
-haveloop(#m.V(3))
-haveloop(m.V(3) + m.V(1) + m.P('a')^-1)
-haveloop({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
+isnullable("")
+isnullable(m.P("x")^0)
+isnullable(m.P("x")^-1)
+isnullable(m.P("x") + 1 + 2 + m.P("a")^-1)
+isnullable(-m.P("ab"))
+isnullable(- -m.P("ab"))
+isnullable(# #(m.P("ab") + "xy"))
+isnullable(- #m.P("ab")^0)
+isnullable(# -m.P("ab")^1)
+isnullable(#m.V(3))
+isnullable(m.V(3) + m.V(1) + m.P('a')^-1)
+isnullable({[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(0)})
assert(m.match(m.P{[1] = m.V(2) * m.V(3), [2] = m.V(3), [3] = m.P(1)}^0, "abc")
== 3)
assert(m.match(m.P""^-3, "a") == 1)
@@ -894,13 +950,20 @@ print"+"
-- tests for back references
-assert(not pcall(m.match, m.Cb('x'), ''))
-assert(not pcall(m.match, m.Cg(1, 'a') * m.Cb('b'), 'a'))
+checkerr("back reference 'x' not found", m.match, m.Cb('x'), '')
+checkerr("back reference 'b' not found", m.match, m.Cg(1, 'a') * m.Cb('b'), 'a')
p = m.Cg(m.C(1) * m.C(1), "k") * m.Ct(m.Cb("k"))
t = p:match("ab")
checkeq(t, {"a", "b"})
+p = m.P(true)
+for i = 1, 10 do p = p * m.Cg(1, i) end
+for i = 1, 10 do
+ local p = p * m.Cb(i)
+ assert(p:match('abcdefghij') == string.sub('abcdefghij', i, i))
+end
+
t = {}
function foo (p) t[#t + 1] = p; return p .. "x" end
@@ -1370,8 +1433,7 @@ assert(rev:match"0123456789" == "9876543210")
-- testing error messages in re
local function errmsg (p, err)
- local s, msg = pcall(re.compile, p)
- assert(not s and string.find(msg, err))
+ checkerr(err, re.compile, p)
end
errmsg('aaaa', "rule 'aaaa'")
From 6809a250c1821eb2ba02f7f649d474375e6c1672 Mon Sep 17 00:00:00 2001
From: Heinrich Hartmann
Date: Thu, 26 May 2016 18:46:56 +0200
Subject: [PATCH 2/4] Allocate lightuserdata on the heap.
Solaris (OmniOS, Illumos) version of luajit does not support support
allocation of lightuserdata on the stack. This gives errors like this:
```
$ luajit test.lua
General tests for LPeg library
version 1.0.0
luajit: bad light userdata pointer
stack traceback:
[C]: in function 'match'
test.lua:58: in main chunk
[C]: at 0x00402d50
```
This commit patch fixes this behavior by moving the allocations to the
heap.
---
lptree.c | 8 ++++++--
lpvm.c | 7 +++++--
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/lptree.c b/lptree.c
index ac5f515..6ae8583 100644
--- a/lptree.c
+++ b/lptree.c
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include "lua.h"
@@ -1147,9 +1148,10 @@ static size_t initposition (lua_State *L, size_t len) {
** Main match function
*/
static int lp_match (lua_State *L) {
- Capture capture[INITCAPSIZE];
+ Capture *capture = calloc(INITCAPSIZE, sizeof(Capture));
const char *r;
size_t l;
+ int rv;
Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1));
Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1);
const char *s = luaL_checklstring(L, SUBJIDX, &l);
@@ -1163,7 +1165,9 @@ static int lp_match (lua_State *L) {
lua_pushnil(L);
return 1;
}
- return getcaptures(L, s, r, ptop);
+ rv = getcaptures(L, s, r, ptop);
+ free(capture);
+ return rv;
}
diff --git a/lpvm.c b/lpvm.c
index eaf2ebf..ced0ff8 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -5,6 +5,7 @@
#include
#include
+#include
#include "lua.h"
@@ -146,7 +147,7 @@ static int removedyncap (lua_State *L, Capture *capture,
*/
const char *match (lua_State *L, const char *o, const char *s, const char *e,
Instruction *op, Capture *capture, int ptop) {
- Stack stackbase[INITBACK];
+ Stack *stackbase = calloc(INITBACK, sizeof(Stack));
Stack *stacklimit = stackbase + INITBACK;
Stack *stack = stackbase; /* point to first empty slot in stack */
int capsize = INITCAPSIZE;
@@ -168,10 +169,12 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
assert(stack == getstackbase(L, ptop) + 1);
capture[captop].kind = Cclose;
capture[captop].s = NULL;
+ free(stackbase);
return s;
}
case IGiveup: {
assert(stack == getstackbase(L, ptop));
+ free(stackbase);
return NULL;
}
case IRet: {
@@ -345,7 +348,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e,
p++;
continue;
}
- default: assert(0); return NULL;
+ default: assert(0); free(stackbase); return NULL;
}
}
}
From 76f742a26137bbaedfb169abce06a8464677881e Mon Sep 17 00:00:00 2001
From: Heinrich Hartmann
Date: Wed, 1 Jun 2016 13:33:25 +0200
Subject: [PATCH 3/4] Enforce usage of our compile() implementation by renaming
it
Original Author: Jonas Kunze.
---
lpcode.c | 2 +-
lpcode.h | 2 +-
lptree.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/lpcode.c b/lpcode.c
index fbf44fe..362ec20 100644
--- a/lpcode.c
+++ b/lpcode.c
@@ -970,7 +970,7 @@ static void peephole (CompileState *compst) {
/*
** Compile a pattern
*/
-Instruction *compile (lua_State *L, Pattern *p) {
+Instruction *lpeg_compile (lua_State *L, Pattern *p) {
CompileState compst;
compst.p = p; compst.ncode = 0; compst.L = L;
realloccode(L, p, 2); /* minimum initial size */
diff --git a/lpcode.h b/lpcode.h
index 896d3c7..c31ab1e 100644
--- a/lpcode.h
+++ b/lpcode.h
@@ -16,7 +16,7 @@ int checkaux (TTree *tree, int pred);
int fixedlenx (TTree *tree, int count, int len);
int hascaptures (TTree *tree);
int lp_gc (lua_State *L);
-Instruction *compile (lua_State *L, Pattern *p);
+Instruction *lpeg_compile (lua_State *L, Pattern *p);
void realloccode (lua_State *L, Pattern *p, int nsize);
int sizei (const Instruction *i);
diff --git a/lptree.c b/lptree.c
index 6ae8583..0485ca3 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1097,7 +1097,7 @@ static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */
finalfix(L, 0, NULL, p->tree);
lua_pop(L, 1); /* remove 'ktable' */
- return compile(L, p);
+ return lpeg_compile(L, p);
}
From 316227ea40dbf42466aa10443048a60edc4bce90 Mon Sep 17 00:00:00 2001
From: Heinrich Hartmann
Date: Wed, 1 Jun 2016 20:04:10 +0200
Subject: [PATCH 4/4] Add checks for calloc's return code.
By using the assert macro the process will exit if calloc fails. There
is little chance to recover from a failed allocation. Moreover assert()
is used at other places already.
---
lptree.c | 1 +
lpvm.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/lptree.c b/lptree.c
index 0485ca3..f5a16dc 100644
--- a/lptree.c
+++ b/lptree.c
@@ -1149,6 +1149,7 @@ static size_t initposition (lua_State *L, size_t len) {
*/
static int lp_match (lua_State *L) {
Capture *capture = calloc(INITCAPSIZE, sizeof(Capture));
+ assert(capture);
const char *r;
size_t l;
int rv;
diff --git a/lpvm.c b/lpvm.c
index ced0ff8..b29d21f 100644
--- a/lpvm.c
+++ b/lpvm.c
@@ -148,6 +148,7 @@ static int removedyncap (lua_State *L, Capture *capture,
const char *match (lua_State *L, const char *o, const char *s, const char *e,
Instruction *op, Capture *capture, int ptop) {
Stack *stackbase = calloc(INITBACK, sizeof(Stack));
+ assert(stackbase);
Stack *stacklimit = stackbase + INITBACK;
Stack *stack = stackbase; /* point to first empty slot in stack */
int capsize = INITCAPSIZE;