From fcaca240b9db8525e660fc1452a122b847456dbb Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 3 Jan 2021 10:43:49 -0500 Subject: [PATCH] Optional bootstrapping (#175) Before this patch, building 'happy' required a pre-built binary of 'happy'. This was elegant in the same way a self-hosting compiler is elegant. But it also made building purely from source more complicated than needed. Now, we have a subset of the functionality also written with parser combinators. One can build that minimal happy from source, and then bootstrap the regular one from it. This simplifies the build process considerably, along with helping with various (albeit mostly theoretical) trust issues. The parser combinators are bespoke now, but since they need not be performant we will probably just switch to `ReadP` from base, later. Co-authored-by: Vladislav Zavialov --- .appveyor.yml | 2 - .travis.yml | 4 +- Makefile | 22 +-- examples/ErlParser.ly | 2 +- happy.cabal | 24 ++- src/AttrGrammar.lhs | 40 ++--- src/AttrGrammarParser.ly | 7 +- src/Grammar.lhs | 25 ++- src/Lexer.lhs | 100 +++++------ src/Main.lhs | 5 +- src/ParseMonad.hs | 26 +-- src/ParseMonad/Bootstrapped.hs | 30 ++++ src/ParseMonad/Class.hs | 13 ++ src/ParseMonad/Oracle.hs | 99 ++++++++++ src/Parser.hs | 8 + src/{Parser.ly => Parser/Bootstrapped.ly} | 9 +- src/Parser/Oracle.hs | 209 ++++++++++++++++++++++ test.hs | 7 +- tests/Makefile | 5 +- 19 files changed, 512 insertions(+), 125 deletions(-) create mode 100644 src/ParseMonad/Bootstrapped.hs create mode 100644 src/ParseMonad/Class.hs create mode 100644 src/ParseMonad/Oracle.hs create mode 100644 src/Parser.hs rename src/{Parser.ly => Parser/Bootstrapped.ly} (96%) create mode 100644 src/Parser/Oracle.hs diff --git a/.appveyor.yml b/.appveyor.yml index de5d8a51..41b50a4e 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -29,8 +29,6 @@ install: - "cabal %CABOPTS% v2-update -vverbose+nowrap" - "cabal %CABOPTS% v2-install alex --bindir=/hsbin" - "alex --version" - - "cabal %CABOPTS% v2-install happy --bindir=/hsbin" - - "happy --version" build: off diff --git a/.travis.yml b/.travis.yml index 5662e3f3..11370513 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,8 +17,8 @@ env: before_install: - sudo add-apt-repository -y ppa:hvr/ghc - sudo apt-get update - - sudo apt-get install alex-3.1.7 happy-1.19.5 cabal-install-3.4 ghc-$GHCVER - - export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:/opt/happy/1.19.5/bin:$PATH + - sudo apt-get install alex-3.1.7 cabal-install-3.4 ghc-$GHCVER + - export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:$PATH install: - cabal update diff --git a/Makefile b/Makefile index ccf5a732..d9235997 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,5 @@ CABAL = cabal -HAPPY = happy -HAPPY_OPTS = -agc HAPPY_VER = `awk '/^version:/ { print $$2 }' happy.cabal` ALEX = alex @@ -9,13 +7,6 @@ ALEX_OPTS = -g SDIST_DIR=dist-newstyle/sdist -GEN = src/gen/Parser.hs src/gen/AttrGrammarParser.hs - -all : $(GEN) - -src/gen/%.hs : src/boot/%.ly - $(HAPPY) $(HAPPYFLAGS) $< -o $@ - sdist :: @case "`$(CABAL) --numeric-version`" in \ 2.[2-9].* | [3-9].* ) ;; \ @@ -25,10 +16,6 @@ sdist :: echo "Error: Tree is not clean"; \ exit 1; \ fi - $(HAPPY) $(HAPPY_OPTS) src/Parser.ly -o src/Parser.hs - $(HAPPY) $(HAPPY_OPTS) src/AttrGrammarParser.ly -o src/AttrGrammarParser.hs - mv src/Parser.ly src/Parser.ly.boot - mv src/AttrGrammarParser.ly src/AttrGrammarParser.ly.boot $(CABAL) v2-run gen-happy-sdist $(CABAL) v2-sdist @if [ ! -f "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" ]; then \ @@ -49,7 +36,14 @@ sdist-test-only :: rm -rf "${SDIST_DIR}/happy-$(HAPPY_VER)/" tar -xf "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" -C ${SDIST_DIR}/ echo "packages: ." > "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project" - cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" && cabal v2-test --enable-tests all + echo "tests: True" >> "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project" + cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" \ + && cabal v2-build all --flag -bootstrap \ + && cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \ + && cabal v2-test all -j --flag -bootstrap \ + && export PATH=./bootstrap-root:$$PATH \ + && cabal v2-build all --flag +bootstrap \ + && cabal v2-test all -j --flag +bootstrap @echo "" @echo "Success! ${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz is ready for distribution!" @echo "" diff --git a/examples/ErlParser.ly b/examples/ErlParser.ly index 770bfcba..e8227944 100644 --- a/examples/ErlParser.ly +++ b/examples/ErlParser.ly @@ -13,7 +13,7 @@ Author : Simon Marlow > import Lexer > import AbsSyn > import Types -> import ParseMonad +> import ParseMonad.Class > } > %token diff --git a/happy.cabal b/happy.cabal index 8f32f713..f6bacd8a 100644 --- a/happy.cabal +++ b/happy.cabal @@ -155,6 +155,11 @@ extra-source-files: tests/rank2.y tests/shift01.y +flag bootstrap + description: Optimize the implementation of happy using a pre-built happy + manual: True + default: True + source-repository head type: git location: https://github.com/simonmar/happy.git @@ -182,16 +187,32 @@ executable happy LALR Lexer ParseMonad + ParseMonad.Class Parser ProduceCode ProduceGLRCode NameSet Target AttrGrammar - AttrGrammarParser ParamRules PrettyGrammar + if flag(bootstrap) + -- TODO put this back when Cabal can use it's qualified goals to better + -- understand bootstrapping, see + -- https://github.com/haskell/cabal/issues/7189 + --build-tools: happy + cpp-options: -DHAPPY_BOOTSTRAP + other-modules: + ParseMonad.Bootstrapped + Parser.Bootstrapped + AttrGrammarParser + else + other-modules: + ParseMonad.Oracle + Parser.Oracle + + test-suite tests type: exitcode-stdio-1.0 main-is: test.hs @@ -200,4 +221,3 @@ test-suite tests build-depends: base, process default-language: Haskell98 - diff --git a/src/AttrGrammar.lhs b/src/AttrGrammar.lhs index 466ce17c..cf3513d6 100644 --- a/src/AttrGrammar.lhs +++ b/src/AttrGrammar.lhs @@ -1,15 +1,15 @@ > module AttrGrammar > ( AgToken (..) > , AgRule (..) +> , HasLexer (..) > , agLexAll -> , agLexer > , subRefVal > , selfRefVal > , rightRefVal > ) where > import Data.Char -> import ParseMonad +> import ParseMonad.Class > data AgToken > = AgTok_LBrace @@ -62,32 +62,30 @@ -- will wreck column alignment so attribute grammar specifications must -- not rely on layout. -> type Pfunc a = String -> Int -> ParseResult a - -> agLexAll :: P [AgToken] -> agLexAll = mkP $ aux [] +> agLexAll :: String -> Int -> ParseResult [AgToken] +> agLexAll = aux [] > where aux toks [] _ = Right (reverse toks) -> aux toks s l = agLexer' (\t -> aux (t:toks)) s l +> aux toks s l = agLexer (\t -> aux (t:toks)) s l -> agLexer :: (AgToken -> P a) -> P a -> agLexer m = mkP $ agLexer' (\x -> runP (m x)) +> instance HasLexer AgToken where +> lexToken = agLexer -> agLexer' :: (AgToken -> Pfunc a) -> Pfunc a -> agLexer' cont [] = cont AgTok_EOF [] -> agLexer' cont ('{':rest) = cont AgTok_LBrace rest -> agLexer' cont ('}':rest) = cont AgTok_RBrace rest -> agLexer' cont (';':rest) = cont AgTok_Semicolon rest -> agLexer' cont ('=':rest) = cont AgTok_Eq rest -> agLexer' cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest -> agLexer' cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest -> agLexer' cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest -> agLexer' cont s@('$':rest) = +> agLexer :: (AgToken -> Pfunc a) -> Pfunc a +> agLexer cont [] = cont AgTok_EOF [] +> agLexer cont ('{':rest) = cont AgTok_LBrace rest +> agLexer cont ('}':rest) = cont AgTok_RBrace rest +> agLexer cont (';':rest) = cont AgTok_Semicolon rest +> agLexer cont ('=':rest) = cont AgTok_Eq rest +> agLexer cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest +> agLexer cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest +> agLexer cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest +> agLexer cont s@('$':rest) = > let (n,rest') = span isDigit rest > in if null n > then agLexUnknown cont s > else agLexAttribute cont (\a -> AgTok_SubRef (read n,a)) rest' -> agLexer' cont s@(c:rest) -> | isSpace c = agLexer' cont (dropWhile isSpace rest) +> agLexer cont s@(c:rest) +> | isSpace c = agLexer cont (dropWhile isSpace rest) > | otherwise = agLexUnknown cont s > agLexUnknown :: (AgToken -> Pfunc a) -> Pfunc a diff --git a/src/AttrGrammarParser.ly b/src/AttrGrammarParser.ly index fc73b65b..a00df123 100644 --- a/src/AttrGrammarParser.ly +++ b/src/AttrGrammarParser.ly @@ -7,7 +7,8 @@ or a conditional statement. > { > {-# OPTIONS_GHC -w #-} > module AttrGrammarParser (agParser) where -> import ParseMonad +> import ParseMonad.Class +> import ParseMonad.Bootstrapped > import AttrGrammar > } @@ -25,7 +26,7 @@ or a conditional statement. > unknown { AgTok_Unknown _ } > > %monad { P } -> %lexer { agLexer } { AgTok_EOF } +> %lexer { lexTokenP } { AgTok_EOF } > %% @@ -64,5 +65,5 @@ or a conditional statement. > { > happyError :: P a -> happyError = failP ("Parse error\n") +> happyError = failP (\l -> show l ++ ": Parse error\n") > } diff --git a/src/Grammar.lhs b/src/Grammar.lhs index d3ce625e..bf91009d 100644 --- a/src/Grammar.lhs +++ b/src/Grammar.lhs @@ -1,4 +1,4 @@ ------------------------------------------------------------------------------ +/----------------------------------------------------------------------------- The Grammar data type. (c) 1993-2001 Andy Gill, Simon Marlow @@ -20,9 +20,16 @@ Here is our mid-section datatype > import GenUtils > import AbsSyn -> import ParseMonad +#ifdef HAPPY_BOOTSTRAP +> import ParseMonad.Class > import AttrGrammar +#endif + +This is only supported in the bootstrapped version +#ifdef HAPPY_BOOTSTRAP > import AttrGrammarParser +#endif + > import ParamRules > import Data.Array @@ -412,19 +419,28 @@ So is this. > checkCode :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int]) > checkCode arity _ _ code [] = doCheckCode arity code + +#ifdef HAPPY_BOOTSTRAP > checkCode arity lhs nonterm_names code attrs = rewriteAttributeGrammar arity lhs nonterm_names code attrs +#else +> checkCode arity _ _ code (_:_) = do +> addErr "Attribute grammars are not supported in non-bootstrapped build" +> doCheckCode arity code +#endif ------------------------------------------------------------------------------ -- Special processing for attribute grammars. We re-parse the body of the code -- block and output the nasty-looking record manipulation and let binding goop -- +#ifdef HAPPY_BOOTSTRAP + > rewriteAttributeGrammar :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int]) > rewriteAttributeGrammar arity lhs nonterm_names code attrs = first we need to parse the body of the code block -> case runP agParser code 0 of +> case runFromStartP agParser code 0 of > Left msg -> do addErr ("error in attribute grammar rules: "++msg) > return ("",[]) > Right rules -> @@ -470,7 +486,6 @@ So is this. > checkArity x = when (x > arity) $ addErr (show x++" out of range") - ------------------------------------------------------------------------------------ -- Actually emit the code for the record bindings and conditionals -- @@ -539,6 +554,8 @@ So is this. > formatToken (AgTok_Unknown x) = x++" " > formatToken AgTok_EOF = error "formatToken AgTok_EOF" +#endif + ----------------------------------------------------------------------------- -- Check for every $i that i is <= the arity of the rule. diff --git a/src/Lexer.lhs b/src/Lexer.lhs index 602dae31..1e41df43 100644 --- a/src/Lexer.lhs +++ b/src/Lexer.lhs @@ -7,9 +7,9 @@ The lexer. > module Lexer ( > Token(..), > TokenId(..), -> lexer ) where +> HasLexer(..) ) where -> import ParseMonad +> import ParseMonad.Class > import Data.Char ( isSpace, isAlphaNum, isDigit, digitToInt ) @@ -72,34 +72,34 @@ The lexer. ToDo: proper text instance here, for use in parser error messages. -> lexer :: (Token -> P a) -> P a -> lexer cont = mkP lexer' -> where lexer' "" = returnToken cont TokenEOF "" +> instance HasLexer Token where +> lexToken = lexer + +> lexer :: (Token -> Pfunc a) -> Pfunc a +> lexer cont = lexer' +> where lexer' "" = cont TokenEOF "" > lexer' ('-':'-':r) = lexer' (dropWhile (/= '\n') r) > lexer' ('{':'-':r) = \line -> lexNestedComment line lexer' r line > lexer' (c:rest) = nextLex cont c rest -> returnToken :: (t -> P a) -> t -> String -> Int -> ParseResult a -> returnToken cont tok = runP (cont tok) - -> nextLex :: (Token -> P a) -> Char -> String -> Int -> ParseResult a +> nextLex :: (Token -> Pfunc a) -> Char -> String -> Int -> ParseResult a > nextLex cont c = case c of -> '\n' -> \rest line -> returnToken lexer cont rest (line+1) +> '\n' -> \rest line -> lexer cont rest (line+1) > '%' -> lexPercent cont > ':' -> lexColon cont -> ';' -> returnToken cont (TokenKW TokSemiColon) +> ';' -> cont (TokenKW TokSemiColon) -> '|' -> returnToken cont (TokenKW TokBar) +> '|' -> cont (TokenKW TokBar) > '\'' -> lexChar cont > '"'{-"-}-> lexString cont > '{' -> lexCode cont -> '(' -> returnToken cont (TokenKW TokParenL) -> ')' -> returnToken cont (TokenKW TokParenR) -> ',' -> returnToken cont (TokenKW TokComma) +> '(' -> cont (TokenKW TokParenL) +> ')' -> cont (TokenKW TokParenR) +> ',' -> cont (TokenKW TokComma) > _ -> | isSpace c -> runP (lexer cont) +> | isSpace c -> lexer cont > | c >= 'a' && c <= 'z' > || c >= 'A' && c <= 'Z' -> lexId cont c > | isDigit c -> lexNum cont c @@ -108,69 +108,69 @@ ToDo: proper text instance here, for use in parser error messages. Percents come in two forms, in pairs, or followed by a special identifier. -> lexPercent :: (Token -> P a) -> [Char] -> Int -> ParseResult a +> lexPercent :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a > lexPercent cont s = case s of -> '%':rest -> returnToken cont (TokenKW TokDoublePercent) rest +> '%':rest -> cont (TokenKW TokDoublePercent) rest > 't':'o':'k':'e':'n':'t':'y':'p':'e':rest -> -> returnToken cont (TokenKW TokSpecId_TokenType) rest +> cont (TokenKW TokSpecId_TokenType) rest > 't':'o':'k':'e':'n':rest -> -> returnToken cont (TokenKW TokSpecId_Token) rest +> cont (TokenKW TokSpecId_Token) rest > 'n':'a':'m':'e':rest -> -> returnToken cont (TokenKW TokSpecId_Name) rest +> cont (TokenKW TokSpecId_Name) rest > 'p':'a':'r':'t':'i':'a':'l':rest -> -> returnToken cont (TokenKW TokSpecId_Partial) rest +> cont (TokenKW TokSpecId_Partial) rest > 'i':'m':'p':'o':'r':'t':'e':'d':'i':'d':'e':'n':'t':'i':'t':'y':rest -> -> returnToken cont (TokenKW TokSpecId_ImportedIdentity) rest +> cont (TokenKW TokSpecId_ImportedIdentity) rest > 'm':'o':'n':'a':'d':rest -> -> returnToken cont (TokenKW TokSpecId_Monad) rest +> cont (TokenKW TokSpecId_Monad) rest > 'l':'e':'x':'e':'r':rest -> -> returnToken cont (TokenKW TokSpecId_Lexer) rest +> cont (TokenKW TokSpecId_Lexer) rest > 'n':'o':'n':'a':'s':'s':'o':'c':rest -> -> returnToken cont (TokenKW TokSpecId_Nonassoc) rest +> cont (TokenKW TokSpecId_Nonassoc) rest > 'l':'e':'f':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Left) rest +> cont (TokenKW TokSpecId_Left) rest > 'r':'i':'g':'h':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Right) rest +> cont (TokenKW TokSpecId_Right) rest > 'p':'r':'e':'c':rest -> -> returnToken cont (TokenKW TokSpecId_Prec) rest +> cont (TokenKW TokSpecId_Prec) rest > 's':'h':'i':'f':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Shift) rest +> cont (TokenKW TokSpecId_Shift) rest > 'e':'x':'p':'e':'c':'t':rest -> -> returnToken cont (TokenKW TokSpecId_Expect) rest +> cont (TokenKW TokSpecId_Expect) rest > 'e':'r':'r':'o':'r':'h':'a':'n':'d':'l':'e':'r':'t':'y':'p':'e':rest -> -> returnToken cont (TokenKW TokSpecId_ErrorHandlerType) rest +> cont (TokenKW TokSpecId_ErrorHandlerType) rest > 'e':'r':'r':'o':'r':rest -> -> returnToken cont (TokenKW TokSpecId_Error) rest +> cont (TokenKW TokSpecId_Error) rest > 'a':'t':'t':'r':'i':'b':'u':'t':'e':'t':'y':'p':'e':rest -> -> returnToken cont (TokenKW TokSpecId_Attributetype) rest +> cont (TokenKW TokSpecId_Attributetype) rest > 'a':'t':'t':'r':'i':'b':'u':'t':'e':rest -> -> returnToken cont (TokenKW TokSpecId_Attribute) rest +> cont (TokenKW TokSpecId_Attribute) rest > _ -> lexError ("unrecognised directive: %" ++ > takeWhile (not.isSpace) s) s -> lexColon :: (Token -> P a) -> [Char] -> Int -> ParseResult a -> lexColon cont (':':rest) = returnToken cont (TokenKW TokDoubleColon) rest -> lexColon cont rest = returnToken cont (TokenKW TokColon) rest +> lexColon :: (Token -> Pfunc a) -> [Char] -> Int -> ParseResult a +> lexColon cont (':':rest) = cont (TokenKW TokDoubleColon) rest +> lexColon cont rest = cont (TokenKW TokColon) rest -> lexId :: (Token -> P a) -> Char -> String -> Int -> ParseResult a +> lexId :: (Token -> Pfunc a) -> Char -> String -> Int -> ParseResult a > lexId cont c rest = -> readId rest (\ ident rest' -> returnToken cont (TokenInfo (c:ident) TokId) rest') +> readId rest (\ ident rest' -> cont (TokenInfo (c:ident) TokId) rest') -> lexChar :: (Token -> P a) -> String -> Int -> ParseResult a +> lexChar :: (Token -> Pfunc a) -> String -> Int -> ParseResult a > lexChar cont rest = lexReadChar rest -> (\ ident -> returnToken cont (TokenInfo ("'" ++ ident ++ "'") TokId)) +> (\ ident -> cont (TokenInfo ("'" ++ ident ++ "'") TokId)) -> lexString :: (Token -> P a) -> String -> Int -> ParseResult a +> lexString :: (Token -> Pfunc a) -> String -> Int -> ParseResult a > lexString cont rest = lexReadString rest -> (\ ident -> returnToken cont (TokenInfo ("\"" ++ ident ++ "\"") TokId)) +> (\ ident -> cont (TokenInfo ("\"" ++ ident ++ "\"") TokId)) -> lexCode :: (Token -> P a) -> String -> Int -> ParseResult a +> lexCode :: (Token -> Pfunc a) -> String -> Int -> ParseResult a > lexCode cont rest = lexReadCode rest (0 :: Integer) "" cont -> lexNum :: (Token -> P a) -> Char -> String -> Int -> ParseResult a +> lexNum :: (Token -> Pfunc a) -> Char -> String -> Int -> ParseResult a > lexNum cont c rest = > readNum rest (\ num rest' -> -> returnToken cont (TokenNum (stringToInt (c:num)) TokNum) rest') +> cont (TokenNum (stringToInt (c:num)) TokNum) rest') > where stringToInt = foldl (\n c' -> digitToInt c' + 10*n) 0 > cleanupCode :: String -> String @@ -181,7 +181,7 @@ This has to match for @}@ that are {\em not} in strings. The code here is a bit tricky, but should work in most cases. > lexReadCode :: (Eq a, Num a) -> => String -> a -> String -> (Token -> P b) -> Int +> => String -> a -> String -> (Token -> Pfunc b) -> Int > -> ParseResult b > lexReadCode s n c = case s of > '\n':r -> \cont l -> lexReadCode r n ('\n':c) cont (l+1) @@ -189,7 +189,7 @@ here is a bit tricky, but should work in most cases. > '{' :r -> lexReadCode r (n+1) ('{':c) > > '}' :r -> | n == 0 -> \cont -> returnToken cont (TokenInfo ( +> | n == 0 -> \cont -> cont (TokenInfo ( > cleanupCode (reverse c)) TokCodeQuote) r > | otherwise -> lexReadCode r (n-1) ('}':c) > @@ -243,7 +243,7 @@ Utilities that read the rest of a token. > lexReadString [] fn = fn "" [] > lexError :: String -> String -> Int -> ParseResult a -> lexError err = runP (lineP >>= \l -> failP (show l ++ ": " ++ err ++ "\n")) +> lexError err = \_ l -> Left (show l ++ ": " ++ err ++ "\n") > lexNestedComment :: Int -> ([Char] -> Int -> ParseResult a) -> [Char] -> Int > -> ParseResult a diff --git a/src/Main.lhs b/src/Main.lhs index 2b5809c6..2c15724b 100644 --- a/src/Main.lhs +++ b/src/Main.lhs @@ -11,11 +11,12 @@ Path settings auto-generated by Cabal: > import Paths_happy -> import ParseMonad +> import ParseMonad.Class > import AbsSyn > import Grammar > import PrettyGrammar > import Parser + > import First > import LALR > import ProduceCode (produceParser) @@ -71,7 +72,7 @@ Open the file. Parse, using bootstrapping parser. -> (abssyn, hd, tl) <- case runP ourParser file 1 of +> (abssyn, hd, tl) <- case runFromStartP ourParser file 1 of > Left err -> die (fl_name ++ ':' : err) > Right abssyn@(AbsSyn hd _ _ tl) -> return (abssyn, hd, tl) diff --git a/src/ParseMonad.hs b/src/ParseMonad.hs index b9e7bf0f..634de1bd 100644 --- a/src/ParseMonad.hs +++ b/src/ParseMonad.hs @@ -1,18 +1,8 @@ -module ParseMonad where - -import Control.Monad.Reader - -type ParseResult = Either String -type P a = ReaderT (String, Int) ParseResult a - -failP :: String -> P a -failP str = ReaderT (\_ -> Left str) - -mkP :: (String -> Int -> ParseResult a) -> P a -mkP = ReaderT . uncurry - -runP :: P a -> String -> Int -> ParseResult a -runP f s l = runReaderT f (s, l) - -lineP :: P Int -lineP = asks snd +module ParseMonad (module X) where + +-- We use the bootstrapped version if it is available +#ifdef HAPPY_BOOTSTRAP +import ParseMonad.Bootstrapped as X +#else +import ParseMonad.Oracle as X +#endif diff --git a/src/ParseMonad/Bootstrapped.hs b/src/ParseMonad/Bootstrapped.hs new file mode 100644 index 00000000..50fee187 --- /dev/null +++ b/src/ParseMonad/Bootstrapped.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeSynonymInstances #-} + +#if __GLASGOW_HASKELL__ >= 800 +{-# OPTIONS_GHC -Wno-orphans #-} +#else +{-# OPTIONS_GHC -fno-warn-orphans #-} +#endif + +module ParseMonad.Bootstrapped where + +import Control.Monad.Reader +import ParseMonad.Class + +type P = ReaderT (String, Int) ParseResult + +mkP :: (String -> Int -> ParseResult a) -> P a +mkP = ReaderT . uncurry + +runP :: P a -> String -> Int -> ParseResult a +runP f s l = runReaderT f (s, l) + +instance ParseMonad P where + failP mkStr = ReaderT (\(_, l) -> Left $ mkStr l) + lineP = asks snd + runFromStartP m s l = runP m s l + +lexTokenP :: HasLexer token => (token -> P r) -> P r +lexTokenP k = ReaderT $ uncurry $ lexToken (\t -> runP $ k t) diff --git a/src/ParseMonad/Class.hs b/src/ParseMonad/Class.hs new file mode 100644 index 00000000..6824d768 --- /dev/null +++ b/src/ParseMonad/Class.hs @@ -0,0 +1,13 @@ +module ParseMonad.Class where + +type Pfunc a = String -> Int -> ParseResult a + +class HasLexer token where + lexToken :: (token -> Pfunc r) -> Pfunc r + +type ParseResult = Either String + +class Monad p => ParseMonad p where + failP :: (Int -> String) -> p a + lineP :: p Int + runFromStartP :: p a -> String -> Int -> ParseResult a diff --git a/src/ParseMonad/Oracle.hs b/src/ParseMonad/Oracle.hs new file mode 100644 index 00000000..1e770db3 --- /dev/null +++ b/src/ParseMonad/Oracle.hs @@ -0,0 +1,99 @@ +{-# LANGUAGE RankNTypes #-} + +module ParseMonad.Oracle where + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif +import Control.Monad + +import ParseMonad.Class + +data PState token = + PS !String + !Int + !(Maybe token) + +data Decision token = Consume | PutBack token + +type P' token a = + forall r. (a -> PState token -> ParseResult r) -> Pfunc r + +andThen :: Decision token -> P token a -> P' token a +d `andThen` m = \cont s l -> unP m cont (PS s l mTok) + where mTok = case d of Consume -> Nothing + PutBack tok -> Just tok + +andReturn :: Decision token -> a -> P' token a +d `andReturn` a = d `andThen` return a + +andThenJust :: Decision token -> P token a -> P' token (Maybe a) +d `andThenJust` p = d `andThen` fmap Just p + +infix 0 `andThen` +infix 0 `andReturn` +infix 0 `andThenJust` + +withToken :: HasLexer token => (token -> P' token a) -> P token a +withToken f = + MkP $ \cont (PS s l mTok) -> + case mTok of + Nothing -> lexToken (\tok -> f tok cont) s l + Just tok -> f tok cont s l + +newtype P token a = + MkP { unP :: forall r. (a -> PState token -> ParseResult r) -> + PState token -> ParseResult r } + +instance Functor (P token) where + fmap = liftM + +instance Applicative (P token) where + pure a = MkP ($ a) + MkP f <*> MkP v = MkP $ \cont -> f (\g -> v (cont . g)) + +instance Monad (P token) where +#if !MIN_VERSION_base(4,8,0) + return = pure +#endif + MkP m >>= k = MkP $ \cont -> m (\x -> unP (k x) cont) + +instance ParseMonad (P token) where + failP mkErr = MkP $ \_ (PS _ l _) -> Left (mkErr l) + lineP = MkP $ \cont pstate@(PS _ l _) -> cont l pstate + runFromStartP m s l = runP m (PS s l Nothing) + +runP :: P token a -> PState token -> ParseResult a +runP (MkP p) = p (\a _ -> Right a) + +manyP :: P token (Maybe a) -> P token [a] +manyP p = go [] + where + go acc = do + mX <- p + case mX of + Nothing -> return (reverse acc) + Just x -> go (x : acc) + +manySepByP :: HasLexer token => (token -> Bool) -> P token (Maybe a) -> P token [a] +manySepByP isSep p = go [] where + go acc = do + mX <- p + case mX of + Nothing -> return (reverse acc) + Just x -> do + let acc' = x : acc + withToken $ \tok -> + if isSep tok + then Consume `andThen` go acc' + else PutBack tok `andReturn` reverse acc' + +someSepByP :: HasLexer token => (token -> Bool) -> P token a -> P token [a] +someSepByP isSep p = go [] where + go acc = do + x <- p + let acc' = x : acc + withToken $ \tok -> + if isSep tok + then Consume `andThen` go acc' + else PutBack tok `andReturn` reverse acc' diff --git a/src/Parser.hs b/src/Parser.hs new file mode 100644 index 00000000..c6269df4 --- /dev/null +++ b/src/Parser.hs @@ -0,0 +1,8 @@ +module Parser (module X) where + +-- We use the bootstrapped version if it is available +#ifdef HAPPY_BOOTSTRAP +import Parser.Bootstrapped as X +#else +import Parser.Oracle as X +#endif diff --git a/src/Parser.ly b/src/Parser/Bootstrapped.ly similarity index 96% rename from src/Parser.ly rename to src/Parser/Bootstrapped.ly index 6b6ae7a1..4ba44ff1 100644 --- a/src/Parser.ly +++ b/src/Parser/Bootstrapped.ly @@ -8,8 +8,9 @@ The parser. > { > {-# OPTIONS_GHC -w #-} -> module Parser (ourParser,AbsSyn) where -> import ParseMonad +> module Parser.Bootstrapped (ourParser,AbsSyn) where +> import ParseMonad.Class +> import ParseMonad.Bootstrapped > import AbsSyn > import Lexer > } @@ -47,7 +48,7 @@ The parser. > "," { TokenKW TokComma } > %monad { P } -> %lexer { lexer } { TokenEOF } +> %lexer { lexTokenP } { TokenEOF } > %% @@ -146,5 +147,5 @@ The parser. > { > happyError :: P a -> happyError = lineP >>= \l -> failP (show l ++ ": Parse error\n") +> happyError = failP (\l -> show l ++ ": Parse error\n") > } diff --git a/src/Parser/Oracle.hs b/src/Parser/Oracle.hs new file mode 100644 index 00000000..7dc510ea --- /dev/null +++ b/src/Parser/Oracle.hs @@ -0,0 +1,209 @@ +{-# LANGUAGE RankNTypes #-} + +module Parser.Oracle (ourParser, AbsSyn) where + +#if !MIN_VERSION_base(4,8,0) +import Control.Applicative +#endif + +import Control.Monad (when) +import Data.Maybe (isJust) +import ParseMonad.Class +import ParseMonad.Oracle +import AbsSyn +import Lexer + +type Parser = P Token + +ourParser :: Parser AbsSyn +ourParser = do + headerCode <- optCodeP + tokInfos <- manyP optTokInfoP + expectKW "Expected %%" TokDoublePercent + rules <- rulesP + footerCode <- optCodeP + eofP + return (AbsSyn headerCode tokInfos rules footerCode) + +optCodeP :: Parser (Maybe String) +optCodeP = withToken match where + match (TokenInfo str TokCodeQuote) = Consume `andReturn` Just str + match tok = PutBack tok `andReturn` Nothing + +codeP :: Parser String +codeP = do + mCode <- optCodeP + case mCode of + Nothing -> parseError "Expected a code block" + Just code -> return code + +optTokInfoP :: Parser (Maybe (Directive String)) +optTokInfoP = withToken match where + match (TokenKW TokSpecId_TokenType) = + Consume `andThenJust` + pure TokenType <*> codeP + match (TokenKW TokSpecId_Token) = + Consume `andThenJust` + pure TokenSpec <*> manyP optTokenSpecP + match (TokenKW TokSpecId_Name) = + Consume `andThenJust` + pure TokenName <*> idtP <*> optIdtP <*> pure False + match (TokenKW TokSpecId_Partial) = + Consume `andThenJust` + pure TokenName <*> idtP <*> optIdtP <*> pure True + match (TokenKW TokSpecId_ImportedIdentity) = + Consume `andThenJust` + pure TokenImportedIdentity + match (TokenKW TokSpecId_Lexer) = + Consume `andThenJust` + pure TokenLexer <*> codeP <*> codeP + match (TokenKW TokSpecId_Monad) = + Consume `andThenJust` do + codes <- manyP optCodeP + case codes of + [c1] -> return $ TokenMonad "()" c1 "Prelude.>>=" "Prelude.return" + [c1, c2] -> return $ TokenMonad c1 c2 "Prelude.>>=" "Prelude.return" + [c1, c2, c3] -> return $ TokenMonad "()" c1 c2 c3 + [c1, c2, c3, c4] -> return $ TokenMonad c1 c2 c3 c4 + [] -> parseError "Expected a code block" + _ -> parseError "Too many code blocks" + match (TokenKW TokSpecId_Nonassoc) = + Consume `andThenJust` + pure TokenNonassoc <*> manyP optIdtP + match (TokenKW TokSpecId_Right) = + Consume `andThenJust` + pure TokenRight <*> manyP optIdtP + match (TokenKW TokSpecId_Left) = + Consume `andThenJust` + pure TokenLeft <*> manyP optIdtP + match (TokenKW TokSpecId_Expect) = + Consume `andThenJust` + pure TokenExpect <*> numP + match (TokenKW TokSpecId_Error) = + Consume `andThenJust` + pure TokenError <*> codeP + match (TokenKW TokSpecId_ErrorHandlerType) = + Consume `andThenJust` + pure TokenErrorHandlerType <*> idtP + match (TokenKW TokSpecId_Attributetype) = + Consume `andThenJust` + pure TokenAttributetype <*> codeP + match (TokenKW TokSpecId_Attribute) = + Consume `andThenJust` + pure TokenAttribute <*> idtP <*> codeP + match tok = PutBack tok `andReturn` Nothing + +optIdtP :: Parser (Maybe String) +optIdtP = withToken match where + match (TokenInfo idt TokId) = Consume `andReturn` Just idt + match tok = PutBack tok `andReturn` Nothing + +idtP :: Parser String +idtP = do + mIdt <- optIdtP + case mIdt of + Nothing -> parseError "Expected an identifier" + Just idt -> return idt + +numP :: Parser Int +numP = withToken match where + match (TokenNum n TokNum) = Consume `andReturn` n + match tok = PutBack tok `andThen` parseError "Expected a number" + +optTokenSpecP :: Parser (Maybe (String, String)) +optTokenSpecP = withToken match where + match (TokenInfo idt TokId) = + Consume `andThenJust` do + code <- codeP + return (idt, code) + match tok = PutBack tok `andReturn` Nothing + +rulesP :: Parser [Rule] +rulesP = do + rules <- manyP optRuleP + when (null rules) (parseError "At least one rule required") + return rules + +optRuleP :: Parser (Maybe Rule) +optRuleP = do + mIdt <- optIdtP + case mIdt of + Nothing -> return Nothing + Just idt -> do + params <- paramsP idtP + mSig <- optSigP + mIdt' <- if isJust mSig then optIdtP else return Nothing + case mIdt' of + Just idt' | idt' /= idt -> + parseError "Name mismatch in signature and definition" + _ -> return () + expectKW "Expected ':'" TokColon + prods <- someSepByP (isKW TokBar) prodP + let rule = Rule idt params prods mSig + return (Just rule) + +optSigP :: Parser (Maybe String) +optSigP = withToken match where + match (TokenKW TokDoubleColon) = Consume `andThenJust` codeP + match tok = PutBack tok `andReturn` Nothing + +paramsP :: Parser a -> Parser [a] +paramsP p = withToken match where + match (TokenKW TokParenL) = + Consume `andThen` do + params <- someSepByP (isKW TokComma) p + expectKW "Expected ')'" TokParenR + return params + match tok = PutBack tok `andReturn` [] + +optSemiP :: Parser () +optSemiP = withToken match where + match (TokenKW TokSemiColon) = Consume `andReturn` () + match tok = PutBack tok `andReturn` () + +prodP :: Parser Prod +prodP = do + terms <- manyP optTermP + prec <- precP + code <- codeP + optSemiP + l <- lineP + return (Prod terms code l prec) + +termP :: Parser Term +termP = do + mTerm <- optTermP + case mTerm of + Nothing -> parseError "Expected a term" + Just term -> return term + +optTermP :: Parser (Maybe Term) +optTermP = withToken match where + match (TokenInfo idt TokId) = + Consume `andThenJust` do + termParams <- paramsP termP + return (App idt termParams) + match tok = PutBack tok `andReturn` Nothing + +precP :: Parser Prec +precP = withToken match where + match (TokenKW TokSpecId_Shift) = Consume `andReturn` PrecShift + match (TokenKW TokSpecId_Prec) = Consume `andThen` fmap PrecId idtP + match tok = PutBack tok `andReturn` PrecNone + +eofP :: Parser () +eofP = withToken match where + match (TokenEOF) = Consume `andReturn` () + match tok = PutBack tok `andThen` parseError "Parse error" + +parseError :: String -> Parser a +parseError s = failP $ \l -> show l ++ ": " ++ s ++ "\n" + +isKW :: TokenId -> Token -> Bool +isKW tokId (TokenKW tokId') = tokId == tokId' +isKW _ _ = False + +expectKW :: String -> TokenId -> Parser () +expectKW err_msg kw = withToken match where + match (TokenKW tokId) | tokId == kw = Consume `andReturn` () + match tok = PutBack tok `andThen` parseError err_msg diff --git a/test.hs b/test.hs index e452a17b..816640d3 100644 --- a/test.hs +++ b/test.hs @@ -1,4 +1,9 @@ +import Data.List (intercalate) +import GHC.Conc (numCapabilities) import System.Process (system) import System.Exit (exitWith) -main = system "make -k -C tests clean all" >>= exitWith +main = do + let jFlag = "-j" ++ show numCapabilities + let cmd = ["make", jFlag, "-k", "-C", "tests", "clean", "all"] + system (intercalate " " cmd) >>= exitWith diff --git a/tests/Makefile b/tests/Makefile index 93130715..b839425d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -27,11 +27,14 @@ TESTS = Test.ly TestMulti.ly TestPrecedence.ly bug001.ly \ monad001.y monad002.ly precedence001.ly precedence002.y \ bogus-token.y bug002.y Partial.ly \ issue91.y issue93.y issue94.y issue95.y \ - AttrGrammar001.y AttrGrammar002.y \ test_rules.y monaderror.y monaderror-explist.y \ typeclass_monad001.y typeclass_monad002.ly typeclass_monad_lexer.y \ rank2.y shift01.y +ifdef HAPPY_BOOTSTRAP +TESTS += AttrGrammar001.y AttrGrammar002.y +endif + ERROR_TESTS = error001.y # NOTE: `cabal` will set the `happy_datadir` env-var accordingly before invoking the test-suite