Skip to content

Commit

Permalink
Update parser to support comment body (#22)
Browse files Browse the repository at this point in the history
This updates the yacc file and parser to recognise the special @comment
entry type.

https://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html#comment
  • Loading branch information
nickng committed Dec 12, 2023
1 parent 443fee2 commit 529c626
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 71 deletions.
5 changes: 2 additions & 3 deletions bibtex.y
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var bib *BibTex // Only for holding current bib

%token tCOMMENT tSTRING tPREAMBLE
%token tATSIGN tCOLON tEQUAL tCOMMA tPOUND tLBRACE tRBRACE tDQUOTE tLPAREN tRPAREN
%token <strval> tBAREIDENT tIDENT
%token <strval> tBAREIDENT tIDENT tCOMMENTBODY
%type <bibtex> bibtex
%type <bibentry> bibentry
%type <bibtag> tag stringentry
Expand All @@ -47,8 +47,7 @@ bibentry : tATSIGN tBAREIDENT tLBRACE tBAREIDENT tCOMMA tags tRBRACE { $$ = NewB
| tATSIGN tBAREIDENT tLPAREN tBAREIDENT tCOMMA tags tRPAREN { $$ = NewBibEntry($2, $4); for _, t := range $6 { $$.AddField(t.key, t.val) } }
;

commententry : tATSIGN tCOMMENT tLBRACE longstring tRBRACE {}
| tATSIGN tCOMMENT tLPAREN longstring tRBRACE {}
commententry : tATSIGN tCOMMENT tCOMMENTBODY { }
;

stringentry : tATSIGN tSTRING tLBRACE tBAREIDENT tEQUAL longstring tRBRACE { $$ = &bibTag{key: $4, val: $6 } }
Expand Down
111 changes: 52 additions & 59 deletions bibtex.y.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 6 additions & 7 deletions docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
// The package contains a simple parser and data structure to represent bibtex
// records.
//
// Supported syntax
// # Supported syntax
//
// The basic syntax is:
//
// @BIBTYPE{IDENT,
// key1 = word,
// key2 = "quoted",
// key3 = {quoted},
// }
// @BIBTYPE{IDENT,
// key1 = word,
// key2 = "quoted",
// key3 = {quoted},
// }
//
// where BIBTYPE is the type of document (e.g. inproceedings, article, etc.)
// and IDENT is a string identifier.
Expand All @@ -20,5 +20,4 @@
// found in the link below. If there are any problems, please file any issues
// with a minimal working example at the GitHub repository.
// http://maverick.inria.fr/~Xavier.Decoret/resources/xdkbibtex/bibtex_summary.html
//
package bibtex // import "github.com/nickng/bibtex"
35 changes: 33 additions & 2 deletions scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ var parseField bool

// scanner is a lexical scanner
type scanner struct {
r *bufio.Reader
pos tokenPos
commentMode bool
r *bufio.Reader
pos tokenPos
}

// newScanner returns a new instance of scanner.
Expand Down Expand Up @@ -79,6 +80,13 @@ func (s *scanner) Scan() (tok token, lit string, err error) {
if parseField {
return s.scanBraced()
}
// If we're reading a comment, return everything after {
// to the next @-sign (exclusive)
if s.commentMode {
s.unread()
commentBodyTok, commentBody := s.scanCommentBody()
return commentBodyTok, commentBody, nil
}
return tLBRACE, string(ch), nil
case '}':
if parseField { // reset parseField if reached end of entry.
Expand Down Expand Up @@ -122,6 +130,7 @@ func (s *scanner) scanBare() (token, string) {
}
str := buf.String()
if strings.ToLower(str) == "comment" {
s.commentMode = true
return tCOMMENT, str
} else if strings.ToLower(str) == "preamble" {
return tPREAMBLE, str
Expand Down Expand Up @@ -193,6 +202,28 @@ func (s *scanner) scanQuoted() (token, string) {
return tILLEGAL, buf.String()
}

// skipCommentBody is a scan method used for reading bibtex
// comment item by reading all runes until the next @.
//
// e.g.
// @comment{...anything can go here even if braces are unbalanced@
// comment body string will be "...anything can go here even if braces are unbalanced"
func (s *scanner) scanCommentBody() (token, string) {
var buf bytes.Buffer
for {
if ch := s.read(); ch == eof {
break
} else if ch == '@' {
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
s.commentMode = false
return tCOMMENTBODY, buf.String()
}

// ignoreWhitespace consumes the current rune and all contiguous whitespace.
func (s *scanner) ignoreWhitespace() {
for {
Expand Down

0 comments on commit 529c626

Please sign in to comment.