diff --git a/bibtex.y b/bibtex.y index 2d4569b..887fa5e 100644 --- a/bibtex.y +++ b/bibtex.y @@ -80,7 +80,9 @@ func Parse(r io.Reader) (*BibTex, error) { l := newLexer(r) bibtexParse(l) select { - case err := <-l.Errors: + case err := <-l.Errors: // Non-yacc errors + return nil, err + case err := <-l.ParseErrors: return nil, err default: return bib, nil diff --git a/bibtex.y.go b/bibtex.y.go index 7091167..224ed63 100644 --- a/bibtex.y.go +++ b/bibtex.y.go @@ -65,7 +65,6 @@ var bibtexToknames = [...]string{ "tBAREIDENT", "tIDENT", } - var bibtexStatenames = [...]string{} const bibtexEofCode = 1 @@ -79,15 +78,17 @@ func Parse(r io.Reader) (*BibTex, error) { l := newLexer(r) bibtexParse(l) select { - case err := <-l.Errors: + case err := <-l.Errors: // Non-yacc errors return nil, err + //case err := <-l.ParseErrors: + // return nil, err default: return bib, nil } } //line yacctab:1 -var bibtexExca = [...]int8{ +var bibtexExca = [...]int{ -1, 1, 1, -1, -2, 0, @@ -97,7 +98,8 @@ const bibtexPrivate = 57344 const bibtexLast = 61 -var bibtexAct = [...]int8{ +var bibtexAct = [...]int{ + 22, 39, 40, 41, 9, 10, 11, 24, 23, 44, 43, 27, 48, 26, 21, 20, 25, 8, 50, 28, 29, 33, 33, 49, 18, 16, 38, 19, 17, 14, @@ -106,8 +108,8 @@ var bibtexAct = [...]int8{ 54, 53, 33, 7, 32, 4, 1, 6, 5, 3, 2, } +var bibtexPact = [...]int{ -var bibtexPact = [...]int16{ -1000, -1000, 46, -1000, -1000, -1000, -1000, 0, 19, 17, 13, 12, -2, -3, -10, -10, -4, -6, -10, -10, 25, 20, 41, -1000, -1000, 36, 39, 34, 33, 10, @@ -115,24 +117,24 @@ var bibtexPact = [...]int16{ -1000, 14, 2, -1000, -1000, 28, 27, -1000, -14, -10, -1000, -1000, -1000, -1000, 11, } +var bibtexPgo = [...]int{ -var bibtexPgo = [...]int8{ 0, 60, 59, 2, 58, 1, 0, 57, 56, 55, } +var bibtexR1 = [...]int{ -var bibtexR1 = [...]int8{ 0, 8, 1, 1, 1, 1, 1, 2, 2, 9, 9, 4, 4, 7, 7, 6, 6, 6, 6, 3, 3, 5, 5, } +var bibtexR2 = [...]int{ -var bibtexR2 = [...]int8{ 0, 1, 0, 2, 2, 2, 2, 7, 7, 5, 5, 7, 7, 5, 5, 1, 1, 3, 3, 0, 3, 1, 3, } +var bibtexChk = [...]int{ -var bibtexChk = [...]int16{ -1000, -8, -1, -2, -9, -4, -7, 7, 17, 4, 5, 6, 12, 15, 12, 15, 12, 15, 12, 15, 17, 17, -6, 18, 17, -6, 17, 17, -6, -6, @@ -140,8 +142,8 @@ var bibtexChk = [...]int16{ -3, 17, -5, 18, 17, -6, -6, 13, 10, 9, 16, 13, 13, -3, -6, } +var bibtexDef = [...]int{ -var bibtexDef = [...]int8{ 2, -2, 1, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 16, 0, 0, 0, 0, 0, @@ -149,17 +151,16 @@ var bibtexDef = [...]int8{ 21, 0, 0, 17, 18, 0, 0, 7, 19, 0, 8, 11, 12, 22, 20, } +var bibtexTok1 = [...]int{ -var bibtexTok1 = [...]int8{ 1, } +var bibtexTok2 = [...]int{ -var bibtexTok2 = [...]int8{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, } - -var bibtexTok3 = [...]int8{ +var bibtexTok3 = [...]int{ 0, } @@ -241,9 +242,9 @@ func bibtexErrorMessage(state, lookAhead int) string { expected := make([]int, 0, 4) // Look for shiftable tokens. - base := int(bibtexPact[state]) + base := bibtexPact[state] for tok := TOKSTART; tok-1 < len(bibtexToknames); tok++ { - if n := base + tok; n >= 0 && n < bibtexLast && int(bibtexChk[int(bibtexAct[n])]) == tok { + if n := base + tok; n >= 0 && n < bibtexLast && bibtexChk[bibtexAct[n]] == tok { if len(expected) == cap(expected) { return res } @@ -253,13 +254,13 @@ func bibtexErrorMessage(state, lookAhead int) string { if bibtexDef[state] == -2 { i := 0 - for bibtexExca[i] != -1 || int(bibtexExca[i+1]) != state { + for bibtexExca[i] != -1 || bibtexExca[i+1] != state { i += 2 } // Look for tokens that we accept or reduce. for i += 2; bibtexExca[i] >= 0; i += 2 { - tok := int(bibtexExca[i]) + tok := bibtexExca[i] if tok < TOKSTART || bibtexExca[i+1] == 0 { continue } @@ -290,30 +291,30 @@ func bibtexlex1(lex bibtexLexer, lval *bibtexSymType) (char, token int) { token = 0 char = lex.Lex(lval) if char <= 0 { - token = int(bibtexTok1[0]) + token = bibtexTok1[0] goto out } if char < len(bibtexTok1) { - token = int(bibtexTok1[char]) + token = bibtexTok1[char] goto out } if char >= bibtexPrivate { if char < bibtexPrivate+len(bibtexTok2) { - token = int(bibtexTok2[char-bibtexPrivate]) + token = bibtexTok2[char-bibtexPrivate] goto out } } for i := 0; i < len(bibtexTok3); i += 2 { - token = int(bibtexTok3[i+0]) + token = bibtexTok3[i+0] if token == char { - token = int(bibtexTok3[i+1]) + token = bibtexTok3[i+1] goto out } } out: if token == 0 { - token = int(bibtexTok2[1]) /* unknown char */ + token = bibtexTok2[1] /* unknown char */ } if bibtexDebug >= 3 { __yyfmt__.Printf("lex %s(%d)\n", bibtexTokname(token), uint(char)) @@ -368,7 +369,7 @@ bibtexstack: bibtexS[bibtexp].yys = bibtexstate bibtexnewstate: - bibtexn = int(bibtexPact[bibtexstate]) + bibtexn = bibtexPact[bibtexstate] if bibtexn <= bibtexFlag { goto bibtexdefault /* simple state */ } @@ -379,8 +380,8 @@ bibtexnewstate: if bibtexn < 0 || bibtexn >= bibtexLast { goto bibtexdefault } - bibtexn = int(bibtexAct[bibtexn]) - if int(bibtexChk[bibtexn]) == bibtextoken { /* valid shift */ + bibtexn = bibtexAct[bibtexn] + if bibtexChk[bibtexn] == bibtextoken { /* valid shift */ bibtexrcvr.char = -1 bibtextoken = -1 bibtexVAL = bibtexrcvr.lval @@ -393,7 +394,7 @@ bibtexnewstate: bibtexdefault: /* default state action */ - bibtexn = int(bibtexDef[bibtexstate]) + bibtexn = bibtexDef[bibtexstate] if bibtexn == -2 { if bibtexrcvr.char < 0 { bibtexrcvr.char, bibtextoken = bibtexlex1(bibtexlex, &bibtexrcvr.lval) @@ -402,18 +403,18 @@ bibtexdefault: /* look through exception table */ xi := 0 for { - if bibtexExca[xi+0] == -1 && int(bibtexExca[xi+1]) == bibtexstate { + if bibtexExca[xi+0] == -1 && bibtexExca[xi+1] == bibtexstate { break } xi += 2 } for xi += 2; ; xi += 2 { - bibtexn = int(bibtexExca[xi+0]) + bibtexn = bibtexExca[xi+0] if bibtexn < 0 || bibtexn == bibtextoken { break } } - bibtexn = int(bibtexExca[xi+1]) + bibtexn = bibtexExca[xi+1] if bibtexn < 0 { goto ret0 } @@ -435,10 +436,10 @@ bibtexdefault: /* find a state where "error" is a legal shift action */ for bibtexp >= 0 { - bibtexn = int(bibtexPact[bibtexS[bibtexp].yys]) + bibtexErrCode + bibtexn = bibtexPact[bibtexS[bibtexp].yys] + bibtexErrCode if bibtexn >= 0 && bibtexn < bibtexLast { - bibtexstate = int(bibtexAct[bibtexn]) /* simulate a shift of "error" */ - if int(bibtexChk[bibtexstate]) == bibtexErrCode { + bibtexstate = bibtexAct[bibtexn] /* simulate a shift of "error" */ + if bibtexChk[bibtexstate] == bibtexErrCode { goto bibtexstack } } @@ -474,7 +475,7 @@ bibtexdefault: bibtexpt := bibtexp _ = bibtexpt // guard against "declared and not used" - bibtexp -= int(bibtexR2[bibtexn]) + bibtexp -= bibtexR2[bibtexn] // bibtexp is now the index of $0. Perform the default action. Iff the // reduced production is ε, $1 is possibly out of range. if bibtexp+1 >= len(bibtexS) { @@ -485,16 +486,16 @@ bibtexdefault: bibtexVAL = bibtexS[bibtexp+1] /* consult goto table to find next state */ - bibtexn = int(bibtexR1[bibtexn]) - bibtexg := int(bibtexPgo[bibtexn]) + bibtexn = bibtexR1[bibtexn] + bibtexg := bibtexPgo[bibtexn] bibtexj := bibtexg + bibtexS[bibtexp].yys + 1 if bibtexj >= bibtexLast { - bibtexstate = int(bibtexAct[bibtexg]) + bibtexstate = bibtexAct[bibtexg] } else { - bibtexstate = int(bibtexAct[bibtexj]) - if int(bibtexChk[bibtexstate]) != -bibtexn { - bibtexstate = int(bibtexAct[bibtexg]) + bibtexstate = bibtexAct[bibtexj] + if bibtexChk[bibtexstate] != -bibtexn { + bibtexstate = bibtexAct[bibtexg] } } // dummy call; replaced with literal code diff --git a/bibtex_test.go b/bibtex_test.go index 3e3b05a..a2a6315 100644 --- a/bibtex_test.go +++ b/bibtex_test.go @@ -2,6 +2,7 @@ package bibtex import ( "bytes" + "errors" "fmt" "io/ioutil" "path/filepath" @@ -145,6 +146,21 @@ func TestPrettyStringRoundTrip(t *testing.T) { } } +func TestUnexpectedAtSign(t *testing.T) { + // Tests correct syntax but scanning error + b, err := ioutil.ReadFile("example/unexpected-at-sign.badbib") + if err != nil { + t.Fatal(err) + } + _, err = Parse(bytes.NewReader(b)) + if err == nil { + t.Fatal("Expected error but got none") + } + if !errors.Is(err, ErrUnexpectedAtsign) { + t.Fatalf("expected error %+v but got %+v", ErrUnexpectedAtsign, err) + } +} + func AssertEntryListsEqual(t *testing.T, a, b []*BibEntry) { t.Helper() diff --git a/error.go b/error.go index b854e26..5119d74 100644 --- a/error.go +++ b/error.go @@ -7,9 +7,9 @@ import ( var ( // ErrUnexpectedAtsign is an error for unexpected @ in {}. - ErrUnexpectedAtsign = errors.New("Unexpected @ sign") + ErrUnexpectedAtsign = errors.New("unexpected @ sign") // ErrUnknownStringVar is an error for looking up undefined string var. - ErrUnknownStringVar = errors.New("Unknown string variable") + ErrUnknownStringVar = errors.New("unknown string variable") ) // ErrParse is a parse error. @@ -19,5 +19,5 @@ type ErrParse struct { } func (e *ErrParse) Error() string { - return fmt.Sprintf("Parse failed at %s: %s", e.Pos, e.Err) + return fmt.Sprintf("parse failed at %s: %s", e.Pos, e.Err) } diff --git a/example/unexpected-at-sign.badbib b/example/unexpected-at-sign.badbib new file mode 100644 index 0000000..aefa9eb --- /dev/null +++ b/example/unexpected-at-sign.badbib @@ -0,0 +1,3 @@ +@misc{web, + y = {ab@cd}, +} diff --git a/go.mod b/go.mod index a16c5c7..f11c2d4 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/nickng/bibtex -go 1.13 +go 1.18 require github.com/BurntSushi/toml v0.3.1 diff --git a/lexer.go b/lexer.go index 524a524..65d75ad 100644 --- a/lexer.go +++ b/lexer.go @@ -2,27 +2,39 @@ package bibtex -import "io" +import ( + "fmt" + "io" +) // lexer for bibtex. type lexer struct { - scanner *scanner - Errors chan error + scanner *scanner + ParseErrors chan error // Parse errors from yacc + Errors chan error // Other errors } // newLexer returns a new yacc-compatible lexer. func newLexer(r io.Reader) *lexer { - return &lexer{scanner: newScanner(r), Errors: make(chan error, 1)} + return &lexer{ + scanner: newScanner(r), + ParseErrors: make(chan error, 1), + Errors: make(chan error, 1), + } } // Lex is provided for yacc-compatible parser. func (l *lexer) Lex(yylval *bibtexSymType) int { - token, strval := l.scanner.Scan() + token, strval, err := l.scanner.Scan() + if err != nil { + l.Errors <- fmt.Errorf("%w at %s", err, l.scanner.pos) + return int(0) + } yylval.strval = strval return int(token) } // Error handles error. func (l *lexer) Error(err string) { - l.Errors <- &ErrParse{Err: err, Pos: l.scanner.pos} + l.ParseErrors <- &ErrParse{Err: err, Pos: l.scanner.pos} } diff --git a/scanner.go b/scanner.go index fca3951..b3cc089 100644 --- a/scanner.go +++ b/scanner.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "io" - "log" "strconv" "strings" ) @@ -50,7 +49,7 @@ func (s *scanner) unread() { } // Scan returns the next token and literal value. -func (s *scanner) Scan() (tok token, lit string) { +func (s *scanner) Scan() (tok token, lit string, err error) { ch := s.read() if isWhitespace(ch) { s.ignoreWhitespace() @@ -62,47 +61,50 @@ func (s *scanner) Scan() (tok token, lit string) { } switch ch { case eof: - return 0, "" + return 0, "", nil case '@': - return tATSIGN, string(ch) + return tATSIGN, string(ch), nil case ':': - return tCOLON, string(ch) + return tCOLON, string(ch), nil case ',': parseField = false // reset parseField if reached end of field. - return tCOMMA, string(ch) + return tCOMMA, string(ch), nil case '=': parseField = true // set parseField if = sign outside quoted or ident. - return tEQUAL, string(ch) + return tEQUAL, string(ch), nil case '"': - return s.scanQuoted() + tok, lit := s.scanQuoted() + return tok, lit, nil case '{': if parseField { return s.scanBraced() } - return tLBRACE, string(ch) + return tLBRACE, string(ch), nil case '}': if parseField { // reset parseField if reached end of entry. parseField = false } - return tRBRACE, string(ch) + return tRBRACE, string(ch), nil case '#': - return tPOUND, string(ch) + return tPOUND, string(ch), nil case ' ': s.ignoreWhitespace() } - return tILLEGAL, string(ch) + return tILLEGAL, string(ch), nil } // scanIdent categorises a string to one of three categories. -func (s *scanner) scanIdent() (tok token, lit string) { +func (s *scanner) scanIdent() (tok token, lit string, err error) { switch ch := s.read(); ch { case '"': - return s.scanQuoted() + tok, lit := s.scanQuoted() + return tok, lit, nil case '{': return s.scanBraced() default: s.unread() // Not open quote/brace. - return s.scanBare() + tok, lit := s.scanBare() + return tok, lit, nil } } @@ -132,7 +134,7 @@ func (s *scanner) scanBare() (token, string) { } // scanBraced parses a braced string, like {this}. -func (s *scanner) scanBraced() (token, string) { +func (s *scanner) scanBraced() (token, string, error) { var buf bytes.Buffer var macro bool brace := 1 @@ -149,14 +151,14 @@ func (s *scanner) scanBraced() (token, string) { brace-- macro = false if brace == 0 { // Balances open brace. - return tIDENT, buf.String() + return tIDENT, buf.String(), nil } _, _ = buf.WriteRune(ch) } else if ch == '@' { if macro { _, _ = buf.WriteRune(ch) } else { - log.Fatalf("%s: %s", ErrUnexpectedAtsign, buf.String()) + return token(0), buf.String(), ErrUnexpectedAtsign } } else if isWhitespace(ch) { _, _ = buf.WriteRune(ch) @@ -165,7 +167,7 @@ func (s *scanner) scanBraced() (token, string) { _, _ = buf.WriteRune(ch) } } - return tILLEGAL, buf.String() + return tILLEGAL, buf.String(), nil } // scanQuoted parses a quoted string, like "this".