From 4533d31ed827468869350b9d5341d5702157095a Mon Sep 17 00:00:00 2001 From: reshke Date: Thu, 1 Aug 2024 21:16:36 +0000 Subject: [PATCH] Add parser --- pkg/core/parser/.gitignore | 1 + pkg/core/parser/Makefile | 6 + pkg/core/parser/ast.go | 9 + pkg/core/parser/gram.go | 468 ++++++++++++++++++++++ pkg/core/parser/gram.y | 65 ++++ pkg/core/parser/lex.go | 766 +++++++++++++++++++++++++++++++++++++ pkg/core/parser/lex.rl | 114 ++++++ pkg/core/parser/utils.go | 63 +++ 8 files changed, 1492 insertions(+) create mode 100644 pkg/core/parser/.gitignore create mode 100644 pkg/core/parser/Makefile create mode 100644 pkg/core/parser/ast.go create mode 100644 pkg/core/parser/gram.go create mode 100644 pkg/core/parser/gram.y create mode 100644 pkg/core/parser/lex.go create mode 100644 pkg/core/parser/lex.rl create mode 100644 pkg/core/parser/utils.go diff --git a/pkg/core/parser/.gitignore b/pkg/core/parser/.gitignore new file mode 100644 index 0000000..9aeb32b --- /dev/null +++ b/pkg/core/parser/.gitignore @@ -0,0 +1 @@ +y.output \ No newline at end of file diff --git a/pkg/core/parser/Makefile b/pkg/core/parser/Makefile new file mode 100644 index 0000000..3bb5544 --- /dev/null +++ b/pkg/core/parser/Makefile @@ -0,0 +1,6 @@ +yaccgen: + goyacc -o gram.go -p yy gram.y +gen: + ragel -Z -G2 -o lex.go lex.rl + +build: gen yaccgen \ No newline at end of file diff --git a/pkg/core/parser/ast.go b/pkg/core/parser/ast.go new file mode 100644 index 0000000..d24ed55 --- /dev/null +++ b/pkg/core/parser/ast.go @@ -0,0 +1,9 @@ +package parser + +type Node interface { + iNode() +} + +type SayHelloCommand struct { + Node +} diff --git a/pkg/core/parser/gram.go b/pkg/core/parser/gram.go new file mode 100644 index 0000000..8127f57 --- /dev/null +++ b/pkg/core/parser/gram.go @@ -0,0 +1,468 @@ +// Code generated by goyacc -o gram.go -p yy gram.y. DO NOT EDIT. + +//line gram.y:2 +package parser + +import __yyfmt__ "fmt" + +//line gram.y:2 + +type YpParser yyParser + +func NewYpParser() YpParser { + return yyNewParser() +} + +//line gram.y:15 +type yySymType struct { + yys int + str string + int int + node Node +} + +const SAY = 57346 +const HELLO = 57347 +const SCONST = 57348 +const IDENT = 57349 +const ICONST = 57350 +const TEQ = 57351 +const TSEMICOLON = 57352 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "SAY", + "HELLO", + "SCONST", + "IDENT", + "ICONST", + "TEQ", + "TSEMICOLON", +} +var yyStatenames = [...]string{} + +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 + +//line yacctab:1 +var yyExca = [...]int{ + -1, 1, + 1, -1, + -2, 0, +} + +const yyPrivate = 57344 + +const yyLast = 6 + +var yyAct = [...]int{ + + 5, 6, 3, 4, 1, 2, +} +var yyPact = [...]int{ + + -2, -1000, -10, -4, -1000, -1000, -1000, +} +var yyPgo = [...]int{ + + 0, 5, 5, 4, 3, +} +var yyR1 = [...]int{ + + 0, 3, 4, 4, 2, 2, 1, +} +var yyR2 = [...]int{ + + 0, 2, 1, 0, 1, 1, 2, +} +var yyChk = [...]int{ + + -1000, -3, -1, 4, -4, 10, 5, +} +var yyDef = [...]int{ + + 0, -2, 3, 0, 1, 2, 6, +} +var yyTok1 = [...]int{ + + 1, +} +var yyTok2 = [...]int{ + + 2, 3, 4, 5, 6, 7, 8, 9, 10, +} +var yyTok3 = [...]int{ + 0, +} + +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} + +//line yaccpar:1 + +/* parser for yacc output */ + +var ( + yyDebug = 0 + yyErrorVerbose = false +) + +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} + +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} + +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} + +func (p *yyParserImpl) Lookahead() int { + return p.char +} + +func yyNewParser() yyParser { + return &yyParserImpl{} +} + +const yyFlag = -1000 + +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] + } + } + return __yyfmt__.Sprintf("tok-%v", c) +} + +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } + } + return __yyfmt__.Sprintf("state-%v", s) +} + +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 + + if !yyErrorVerbose { + return "syntax error" + } + + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg + } + } + + res := "syntax error: unexpected " + yyTokname(lookAhead) + + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) + + // Look for shiftable tokens. + base := yyPact[state] + for tok := TOKSTART; tok-1 < len(yyToknames); tok++ { + if n := base + tok; n >= 0 && n < yyLast && yyChk[yyAct[n]] == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + } + + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || yyExca[i+1] != state { + i += 2 + } + + // Look for tokens that we accept or reduce. + for i += 2; yyExca[i] >= 0; i += 2 { + tok := yyExca[i] + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } + } + + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) + } + return res +} + +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = yyTok1[0] + goto out + } + if char < len(yyTok1) { + token = yyTok1[char] + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = yyTok2[char-yyPrivate] + goto out + } + } + for i := 0; i < len(yyTok3); i += 2 { + token = yyTok3[i+0] + if token == char { + token = yyTok3[i+1] + goto out + } + } + +out: + if token == 0 { + token = yyTok2[1] /* unknown char */ + } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token +} + +func yyParse(yylex yyLexer) int { + return yyNewParser().Parse(yylex) +} + +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) + } + + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate + +yynewstate: + yyn = yyPact[yystate] + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = yyAct[yyn] + if yyChk[yyn] == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- + } + goto yystack + } + +yydefault: + /* default state action */ + yyn = yyDef[yystate] + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = yyExca[xi+0] + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = yyExca[xi+1] + if yyn < 0 { + goto ret0 + } + } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = yyPact[yyS[yyp].yys] + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = yyAct[yyn] /* simulate a shift of "error" */ + if yyChk[yystate] == yyErrCode { + goto yystack + } + } + + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 + + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } + } + + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } + + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= yyR2[yyn] + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = yyR1[yyn] + yyg := yyPgo[yyn] + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = yyAct[yyg] + } else { + yystate = yyAct[yyj] + if yyChk[yystate] != -yyn { + yystate = yyAct[yyg] + } + } + // dummy call; replaced with literal code + switch yynt { + + case 2: + yyDollar = yyS[yypt-1 : yypt+1] +//line gram.y:53 + { + } + case 3: + yyDollar = yyS[yypt-0 : yypt+1] +//line gram.y:54 + { + } + case 4: + yyDollar = yyS[yypt-1 : yypt+1] +//line gram.y:58 + { + yyVAL.str = yyDollar[1].str + } + case 5: + yyDollar = yyS[yypt-1 : yypt+1] +//line gram.y:59 + { + yyVAL.str = yyDollar[1].str + } + case 6: + yyDollar = yyS[yypt-2 : yypt+1] +//line gram.y:64 + { + yyVAL.node = &SayHelloCommand{} + } + } + goto yystack /* stack new state and value */ +} diff --git a/pkg/core/parser/gram.y b/pkg/core/parser/gram.y new file mode 100644 index 0000000..990b23f --- /dev/null +++ b/pkg/core/parser/gram.y @@ -0,0 +1,65 @@ +%{ +package parser + + +type YpParser yyParser + +func NewYpParser() YpParser { + return yyNewParser() +} + +%} + +// fields inside this union end up as the fields in a structure known +// as ${PREFIX}SymType, of which a reference is passed to the lexer. +%union { + str string + int int + node Node +} + +// any non-terminal which returns a value needs a type, which is +// really a field name in the above union struct + +// CMDS +%type command + +%type reversed_keyword + + + +/* basic words */ +%token SAY HELLO + +// same for terminals +%token SCONST IDENT +%token ICONST + +/* '=' */ +%token TEQ + +/* ';' != */ +%token TSEMICOLON + + +%start any_command + +%% + +any_command: + command semicolon_opt + +semicolon_opt: + TSEMICOLON {} + | /*empty*/ {} + ; + +reversed_keyword: + SAY {$$=$1} + | HELLO {$$=$1} + ; + + +command: + SAY HELLO { $$ = &SayHelloCommand{} } + ; \ No newline at end of file diff --git a/pkg/core/parser/lex.go b/pkg/core/parser/lex.go new file mode 100644 index 0000000..51b924a --- /dev/null +++ b/pkg/core/parser/lex.go @@ -0,0 +1,766 @@ + +//line lex.rl:1 +package parser + +import ( + "strconv" +) + + +//line lex.go:11 +const lexer_start int = 9 +const lexer_first_final int = 9 +const lexer_error int = 0 + +const lexer_en_main int = 9 + + +//line lex.rl:13 + + +type Lexer struct { + data []byte + p, pe, cs int + ts, te, act int + + result []string +} + +func NewLexer(data []byte) *Lexer { + lex := &Lexer{ + data: data, + pe: len(data), + } + +//line lex.go:36 + { + lex.cs = lexer_start + lex.ts = 0 + lex.te = 0 + lex.act = 0 + } + +//line lex.rl:29 + return lex +} + +func ResetLexer(lex *Lexer, data []byte) { + lex.pe = len(data) + lex.data = data + +//line lex.go:52 + { + lex.cs = lexer_start + lex.ts = 0 + lex.te = 0 + lex.act = 0 + } + +//line lex.rl:36 +} + +func (l *Lexer) Error(msg string) { + println(msg) +} + + +func (lex *Lexer) Lex(lval *yySymType) int { + eof := lex.pe + var tok int + + +//line lex.go:73 + { + if ( lex.p) == ( lex.pe) { + goto _test_eof + } + switch lex.cs { + case 9: + goto st_case_9 + case 0: + goto st_case_0 + case 10: + goto st_case_10 + case 1: + goto st_case_1 + case 2: + goto st_case_2 + case 3: + goto st_case_3 + case 4: + goto st_case_4 + case 11: + goto st_case_11 + case 5: + goto st_case_5 + case 12: + goto st_case_12 + case 13: + goto st_case_13 + case 6: + goto st_case_6 + case 7: + goto st_case_7 + case 8: + goto st_case_8 + case 14: + goto st_case_14 + case 15: + goto st_case_15 + case 16: + goto st_case_16 + case 17: + goto st_case_17 + case 18: + goto st_case_18 + case 19: + goto st_case_19 + case 20: + goto st_case_20 + case 21: + goto st_case_21 + case 22: + goto st_case_22 + case 23: + goto st_case_23 + } + goto st_out +tr2: +//line lex.rl:102 + lex.te = ( lex.p)+1 +{ lval.str = string(lex.data[lex.ts + 1:lex.te - 1]); tok = IDENT; {( lex.p)++; lex.cs = 9; goto _out }} + goto st9 +tr4: +//line lex.rl:104 + lex.te = ( lex.p)+1 +{ lval.str = string(lex.data[lex.ts + 1:lex.te - 1]); tok = SCONST; {( lex.p)++; lex.cs = 9; goto _out }} + goto st9 +tr10: +//line NONE:1 + switch lex.act { + case 0: + {{goto st0 }} + case 2: + {( lex.p) = ( lex.te) - 1 +/* nothing */} + case 6: + {( lex.p) = ( lex.te) - 1 + lval.str = string(lex.data[lex.ts:lex.te]); tok = SAY; {( lex.p)++; lex.cs = 9; goto _out }} + case 7: + {( lex.p) = ( lex.te) - 1 + lval.str = string(lex.data[lex.ts:lex.te]); tok = HELLO; {( lex.p)++; lex.cs = 9; goto _out }} + case 9: + {( lex.p) = ( lex.te) - 1 + lval.str = string(lex.data[lex.ts:lex.te]); tok = IDENT; {( lex.p)++; lex.cs = 9; goto _out }} + } + + goto st9 +tr19: +//line lex.rl:106 + lex.te = ( lex.p)+1 +{ lval.str = string(lex.data[lex.ts:lex.te]); tok = TEQ; {( lex.p)++; lex.cs = 9; goto _out }} + goto st9 +tr23: +//line lex.rl:90 + lex.te = ( lex.p) +( lex.p)-- +{ /* do nothing */ } + goto st9 +tr24: +//line lex.rl:92 + lex.te = ( lex.p) +( lex.p)-- +{/* nothing */} + goto st9 +tr25: +//line lex.rl:97 + lex.te = ( lex.p) +( lex.p)-- +{ lval.str = string(lex.data[lex.ts:lex.te]); tok = SCONST; {( lex.p)++; lex.cs = 9; goto _out }} + goto st9 +tr26: +//line lex.rl:95 + lex.te = ( lex.p) +( lex.p)-- +{ lval.int, _ = strconv.Atoi(string(lex.data[lex.ts:lex.te])); tok = ICONST; {( lex.p)++; lex.cs = 9; goto _out }} + goto st9 +tr27: +//line lex.rl:94 + lex.te = ( lex.p) +( lex.p)-- +{ lval.int, _ = strconv.Atoi(string(lex.data[lex.ts:lex.te])); tok = ICONST; {( lex.p)++; lex.cs = 9; goto _out }} + goto st9 +tr28: +//line lex.rl:103 + lex.te = ( lex.p) +( lex.p)-- +{ lval.str = string(lex.data[lex.ts:lex.te]); tok = IDENT; {( lex.p)++; lex.cs = 9; goto _out }} + goto st9 + st9: +//line NONE:1 + lex.ts = 0 + +//line NONE:1 + lex.act = 0 + + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof9 + } + st_case_9: +//line NONE:1 + lex.ts = ( lex.p) + +//line lex.go:214 + switch lex.data[( lex.p)] { + case 32: + goto st10 + case 34: + goto st1 + case 39: + goto st3 + case 45: + goto st4 + case 46: + goto st5 + case 47: + goto st6 + case 55: + goto st15 + case 61: + goto tr19 + case 72: + goto st18 + case 83: + goto st22 + case 95: + goto tr20 + case 104: + goto st18 + case 115: + goto st22 + } + switch { + case lex.data[( lex.p)] < 52: + switch { + case lex.data[( lex.p)] > 13: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 51 { + goto st15 + } + case lex.data[( lex.p)] >= 9: + goto st10 + } + case lex.data[( lex.p)] > 57: + switch { + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + case lex.data[( lex.p)] >= 65: + goto tr20 + } + default: + goto st17 + } + goto st0 +st_case_0: + st0: + lex.cs = 0 + goto _out + st10: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof10 + } + st_case_10: + if lex.data[( lex.p)] == 32 { + goto st10 + } + if 9 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 13 { + goto st10 + } + goto tr23 + st1: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof1 + } + st_case_1: + switch lex.data[( lex.p)] { + case 55: + goto st2 + case 95: + goto st2 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 51 { + goto st2 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto st2 + } + default: + goto st2 + } + goto st0 + st2: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof2 + } + st_case_2: + switch lex.data[( lex.p)] { + case 34: + goto tr2 + case 36: + goto st2 + case 95: + goto st2 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto st2 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto st2 + } + default: + goto st2 + } + goto st0 + st3: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof3 + } + st_case_3: + if lex.data[( lex.p)] == 39 { + goto tr4 + } + goto st3 + st4: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof4 + } + st_case_4: + switch lex.data[( lex.p)] { + case 45: + goto st11 + case 46: + goto st5 + } + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto st13 + } + goto st0 + st11: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof11 + } + st_case_11: + switch lex.data[( lex.p)] { + case 10: + goto tr24 + case 13: + goto tr24 + } + goto st11 + st5: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof5 + } + st_case_5: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto st12 + } + goto st0 + st12: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof12 + } + st_case_12: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto st12 + } + goto tr25 + st13: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof13 + } + st_case_13: + if lex.data[( lex.p)] == 46 { + goto st12 + } + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto st13 + } + goto tr26 + st6: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof6 + } + st_case_6: + if lex.data[( lex.p)] == 42 { + goto st7 + } + goto st0 + st7: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof7 + } + st_case_7: + if lex.data[( lex.p)] == 42 { + goto st8 + } + goto st7 + st8: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof8 + } + st_case_8: + switch lex.data[( lex.p)] { + case 42: + goto st8 + case 47: + goto tr12 + } + goto st7 +tr12: +//line NONE:1 + lex.te = ( lex.p)+1 + +//line lex.rl:92 + lex.act = 2; + goto st14 + st14: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof14 + } + st_case_14: +//line lex.go:440 + if lex.data[( lex.p)] == 42 { + goto st8 + } + goto st7 + st15: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof15 + } + st_case_15: + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 46: + goto st12 + case 95: + goto tr20 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto st15 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr27 +tr20: +//line NONE:1 + lex.te = ( lex.p)+1 + +//line lex.rl:103 + lex.act = 9; + goto st16 +tr32: +//line NONE:1 + lex.te = ( lex.p)+1 + +//line lex.rl:100 + lex.act = 7; + goto st16 +tr34: +//line NONE:1 + lex.te = ( lex.p)+1 + +//line lex.rl:99 + lex.act = 6; + goto st16 + st16: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof16 + } + st_case_16: +//line lex.go:497 + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 95: + goto tr20 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto tr20 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr10 + st17: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof17 + } + st_case_17: + if lex.data[( lex.p)] == 46 { + goto st12 + } + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto st17 + } + goto tr27 + st18: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof18 + } + st_case_18: + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 69: + goto st19 + case 95: + goto tr20 + case 101: + goto st19 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto tr20 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr28 + st19: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof19 + } + st_case_19: + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 76: + goto st20 + case 95: + goto tr20 + case 108: + goto st20 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto tr20 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr28 + st20: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof20 + } + st_case_20: + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 76: + goto st21 + case 95: + goto tr20 + case 108: + goto st21 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto tr20 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr28 + st21: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof21 + } + st_case_21: + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 79: + goto tr32 + case 95: + goto tr20 + case 111: + goto tr32 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto tr20 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr28 + st22: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof22 + } + st_case_22: + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 65: + goto st23 + case 95: + goto tr20 + case 97: + goto st23 + } + switch { + case lex.data[( lex.p)] < 66: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto tr20 + } + case lex.data[( lex.p)] > 90: + if 98 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr28 + st23: + if ( lex.p)++; ( lex.p) == ( lex.pe) { + goto _test_eof23 + } + st_case_23: + switch lex.data[( lex.p)] { + case 36: + goto tr20 + case 89: + goto tr34 + case 95: + goto tr20 + case 121: + goto tr34 + } + switch { + case lex.data[( lex.p)] < 65: + if 48 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 57 { + goto tr20 + } + case lex.data[( lex.p)] > 90: + if 97 <= lex.data[( lex.p)] && lex.data[( lex.p)] <= 122 { + goto tr20 + } + default: + goto tr20 + } + goto tr28 + st_out: + _test_eof9: lex.cs = 9; goto _test_eof + _test_eof10: lex.cs = 10; goto _test_eof + _test_eof1: lex.cs = 1; goto _test_eof + _test_eof2: lex.cs = 2; goto _test_eof + _test_eof3: lex.cs = 3; goto _test_eof + _test_eof4: lex.cs = 4; goto _test_eof + _test_eof11: lex.cs = 11; goto _test_eof + _test_eof5: lex.cs = 5; goto _test_eof + _test_eof12: lex.cs = 12; goto _test_eof + _test_eof13: lex.cs = 13; goto _test_eof + _test_eof6: lex.cs = 6; goto _test_eof + _test_eof7: lex.cs = 7; goto _test_eof + _test_eof8: lex.cs = 8; goto _test_eof + _test_eof14: lex.cs = 14; goto _test_eof + _test_eof15: lex.cs = 15; goto _test_eof + _test_eof16: lex.cs = 16; goto _test_eof + _test_eof17: lex.cs = 17; goto _test_eof + _test_eof18: lex.cs = 18; goto _test_eof + _test_eof19: lex.cs = 19; goto _test_eof + _test_eof20: lex.cs = 20; goto _test_eof + _test_eof21: lex.cs = 21; goto _test_eof + _test_eof22: lex.cs = 22; goto _test_eof + _test_eof23: lex.cs = 23; goto _test_eof + + _test_eof: {} + if ( lex.p) == eof { + switch lex.cs { + case 10: + goto tr23 + case 11: + goto tr24 + case 12: + goto tr25 + case 13: + goto tr26 + case 7: + goto tr10 + case 8: + goto tr10 + case 14: + goto tr24 + case 15: + goto tr27 + case 16: + goto tr10 + case 17: + goto tr27 + case 18: + goto tr28 + case 19: + goto tr28 + case 20: + goto tr28 + case 21: + goto tr28 + case 22: + goto tr28 + case 23: + goto tr28 + } + } + + _out: {} + } + +//line lex.rl:111 + + + return int(tok); +} \ No newline at end of file diff --git a/pkg/core/parser/lex.rl b/pkg/core/parser/lex.rl new file mode 100644 index 0000000..02f1354 --- /dev/null +++ b/pkg/core/parser/lex.rl @@ -0,0 +1,114 @@ +package parser + +import ( + "strconv" +) + +%%{ + machine lexer; + write data; + access lex.; + variable p lex.p; + variable pe lex.pe; +}%% + +type Lexer struct { + data []byte + p, pe, cs int + ts, te, act int + + result []string +} + +func NewLexer(data []byte) *Lexer { + lex := &Lexer{ + data: data, + pe: len(data), + } + %% write init; + return lex +} + +func ResetLexer(lex *Lexer, data []byte) { + lex.pe = len(data) + lex.data = data + %% write init; +} + +func (l *Lexer) Error(msg string) { + println(msg) +} + + +func (lex *Lexer) Lex(lval *yySymType) int { + eof := lex.pe + var tok int + + %%{ + # /* digit = [0-9] ; already defined */ + + +# xcstart = \/\*{op_chars}*; +# xcstop = \*+\/; +# xcinside = [^*/]+; + + integer = digit+; + ninteger = '-' integer; + param = '$' integer; + + decimal = ((digit*'.'digit+)|(digit+'.'digit*)); + real = (decimal)|('-'decimal); + + ident_start = [A-Za-z\200-\377_]; + ident_cont = [A-Za-z\200-\377_0-9$]; + + identifier = ident_start ident_cont*; + + qidentifier = '"' ident_start ident_cont* '"' ; + + +# space = [ \t\n\r\f]; + horiz_space = [ \t\f]; + newline = [\n\r]; + non_newline = [^\n\r]; + + sql_comment = '-''-' non_newline*; + c_style_comment = '/''*' (any - '*''/')* '*''/'; + comment = sql_comment | c_style_comment; + + +# whitespace = ({space}+|{comment}); + whitespace = space+; + + + op_chars = ( '~' | '!' | '@' | '#' | '^' | '&' | '|' | '`' | '?' | '+' | '-' | '*' | '\\' | '%' | '<' | '>' | '=' ) ; + operator = op_chars+; + + sconst = '\'' (any-'\'')* '\''; + + main := |* + whitespace => { /* do nothing */ }; + # integer const is string const + comment => {/* nothing */}; + + integer => { lval.int, _ = strconv.Atoi(string(lex.data[lex.ts:lex.te])); tok = ICONST; fbreak;}; + ninteger => { lval.int, _ = strconv.Atoi(string(lex.data[lex.ts:lex.te])); tok = ICONST; fbreak;}; + + real => { lval.str = string(lex.data[lex.ts:lex.te]); tok = SCONST; fbreak;}; + + /SAY/i => { lval.str = string(lex.data[lex.ts:lex.te]); tok = SAY; fbreak;}; + /HELLO/i => { lval.str = string(lex.data[lex.ts:lex.te]); tok = HELLO; fbreak;}; + + qidentifier => { lval.str = string(lex.data[lex.ts + 1:lex.te - 1]); tok = IDENT; fbreak;}; + identifier => { lval.str = string(lex.data[lex.ts:lex.te]); tok = IDENT; fbreak;}; + sconst => { lval.str = string(lex.data[lex.ts + 1:lex.te - 1]); tok = SCONST; fbreak;}; + + '=' => { lval.str = string(lex.data[lex.ts:lex.te]); tok = TEQ; fbreak;}; + + *|; + + write exec; + }%% + + return int(tok); +} \ No newline at end of file diff --git a/pkg/core/parser/utils.go b/pkg/core/parser/utils.go new file mode 100644 index 0000000..c8046c2 --- /dev/null +++ b/pkg/core/parser/utils.go @@ -0,0 +1,63 @@ +package parser + +import ( + "errors" + "fmt" +) + +// Tokenizer is the struct used to generate SQL +// tokens for the parser. +type Tokenizer struct { + s string + pos int + + ParseTree Node + LastError string + l *Lexer +} + +func (t *Tokenizer) Error(s string) { + t.LastError = s +} + +func NewStringTokenizer(sql string) *Tokenizer { + return &Tokenizer{ + s: sql, + l: NewLexer([]byte(sql)), + } +} + +func (t *Tokenizer) Lex(lval *yySymType) int { + return t.l.Lex(lval) +} + +func (t *Tokenizer) LexT() int { + return t.l.Lex(new(yySymType)) +} + +func (t *Tokenizer) Reset(sql string) { + t.s = sql + ResetLexer(t.l, []byte(sql)) +} + +func setParseTree(yylex interface{}, stmt Node) { + yylex.(*Tokenizer).ParseTree = stmt +} + +func Parse(sql string) (Node, error) { + tokenizer := NewStringTokenizer(sql) + if yyParse(tokenizer) != 0 { + return nil, errors.New(tokenizer.LastError + fmt.Sprintf(" on pos %d", tokenizer.l.ts)) + } + ast := tokenizer.ParseTree + return ast, nil +} + +func ParseWithLexerParser(l YpParser, t *Tokenizer, sql string) (Node, error) { + t.Reset(sql) + if l.Parse(t) != 0 { + return nil, errors.New(t.LastError + fmt.Sprintf(" on pos %d", t.l.ts)) + } + ast := t.ParseTree + return ast, nil +}