Skip to content

Commit

Permalink
Optional bootstrapping (#175)
Browse files Browse the repository at this point in the history
Before this patch, building 'happy' required a pre-built binary of 'happy'. This was elegant in the same way a self-hosting compiler is elegant. But it also made building purely from source more complicated than needed.

Now, we have a subset of the functionality also written with parser combinators. One can build that minimal happy from source, and then bootstrap the regular one from it. This simplifies the build process considerably, along with helping with various (albeit mostly theoretical) trust issues.

The parser combinators are bespoke now, but since they need not be performant we will probably just switch to `ReadP` from base, later.

Co-authored-by: Vladislav Zavialov <[email protected]>
  • Loading branch information
Ericson2314 and int-index authored Jan 3, 2021
1 parent 75dbfd2 commit fcaca24
Show file tree
Hide file tree
Showing 19 changed files with 512 additions and 125 deletions.
2 changes: 0 additions & 2 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ install:
- "cabal %CABOPTS% v2-update -vverbose+nowrap"
- "cabal %CABOPTS% v2-install alex --bindir=/hsbin"
- "alex --version"
- "cabal %CABOPTS% v2-install happy --bindir=/hsbin"
- "happy --version"

build: off

Expand Down
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ env:
before_install:
- sudo add-apt-repository -y ppa:hvr/ghc
- sudo apt-get update
- sudo apt-get install alex-3.1.7 happy-1.19.5 cabal-install-3.4 ghc-$GHCVER
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:/opt/happy/1.19.5/bin:$PATH
- sudo apt-get install alex-3.1.7 cabal-install-3.4 ghc-$GHCVER
- export PATH=/opt/cabal/3.4/bin:/opt/ghc/$GHCVER/bin:/opt/alex/3.1.7/bin:$PATH

install:
- cabal update
Expand Down
22 changes: 8 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,12 @@
CABAL = cabal

HAPPY = happy
HAPPY_OPTS = -agc
HAPPY_VER = `awk '/^version:/ { print $$2 }' happy.cabal`

ALEX = alex
ALEX_OPTS = -g

SDIST_DIR=dist-newstyle/sdist

GEN = src/gen/Parser.hs src/gen/AttrGrammarParser.hs

all : $(GEN)

src/gen/%.hs : src/boot/%.ly
$(HAPPY) $(HAPPYFLAGS) $< -o $@

sdist ::
@case "`$(CABAL) --numeric-version`" in \
2.[2-9].* | [3-9].* ) ;; \
Expand All @@ -25,10 +16,6 @@ sdist ::
echo "Error: Tree is not clean"; \
exit 1; \
fi
$(HAPPY) $(HAPPY_OPTS) src/Parser.ly -o src/Parser.hs
$(HAPPY) $(HAPPY_OPTS) src/AttrGrammarParser.ly -o src/AttrGrammarParser.hs
mv src/Parser.ly src/Parser.ly.boot
mv src/AttrGrammarParser.ly src/AttrGrammarParser.ly.boot
$(CABAL) v2-run gen-happy-sdist
$(CABAL) v2-sdist
@if [ ! -f "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" ]; then \
Expand All @@ -49,7 +36,14 @@ sdist-test-only ::
rm -rf "${SDIST_DIR}/happy-$(HAPPY_VER)/"
tar -xf "${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz" -C ${SDIST_DIR}/
echo "packages: ." > "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project"
cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" && cabal v2-test --enable-tests all
echo "tests: True" >> "${SDIST_DIR}/happy-$(HAPPY_VER)/cabal.project"
cd "${SDIST_DIR}/happy-$(HAPPY_VER)/" \
&& cabal v2-build all --flag -bootstrap \
&& cabal v2-install --flag -bootstrap --installdir="./bootstrap-root" \
&& cabal v2-test all -j --flag -bootstrap \
&& export PATH=./bootstrap-root:$$PATH \
&& cabal v2-build all --flag +bootstrap \
&& cabal v2-test all -j --flag +bootstrap
@echo ""
@echo "Success! ${SDIST_DIR}/happy-$(HAPPY_VER).tar.gz is ready for distribution!"
@echo ""
2 changes: 1 addition & 1 deletion examples/ErlParser.ly
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Author : Simon Marlow <[email protected]>
> import Lexer
> import AbsSyn
> import Types
> import ParseMonad
> import ParseMonad.Class
> }

> %token
Expand Down
24 changes: 22 additions & 2 deletions happy.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ extra-source-files:
tests/rank2.y
tests/shift01.y

flag bootstrap
description: Optimize the implementation of happy using a pre-built happy
manual: True
default: True

source-repository head
type: git
location: https://github.com/simonmar/happy.git
Expand Down Expand Up @@ -182,16 +187,32 @@ executable happy
LALR
Lexer
ParseMonad
ParseMonad.Class
Parser
ProduceCode
ProduceGLRCode
NameSet
Target
AttrGrammar
AttrGrammarParser
ParamRules
PrettyGrammar

if flag(bootstrap)
-- TODO put this back when Cabal can use it's qualified goals to better
-- understand bootstrapping, see
-- https://github.com/haskell/cabal/issues/7189
--build-tools: happy
cpp-options: -DHAPPY_BOOTSTRAP
other-modules:
ParseMonad.Bootstrapped
Parser.Bootstrapped
AttrGrammarParser
else
other-modules:
ParseMonad.Oracle
Parser.Oracle


test-suite tests
type: exitcode-stdio-1.0
main-is: test.hs
Expand All @@ -200,4 +221,3 @@ test-suite tests

build-depends: base, process
default-language: Haskell98

40 changes: 19 additions & 21 deletions src/AttrGrammar.lhs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
> module AttrGrammar
> ( AgToken (..)
> , AgRule (..)
> , HasLexer (..)
> , agLexAll
> , agLexer
> , subRefVal
> , selfRefVal
> , rightRefVal
> ) where

> import Data.Char
> import ParseMonad
> import ParseMonad.Class

> data AgToken
> = AgTok_LBrace
Expand Down Expand Up @@ -62,32 +62,30 @@
-- will wreck column alignment so attribute grammar specifications must
-- not rely on layout.
> type Pfunc a = String -> Int -> ParseResult a
> agLexAll :: P [AgToken]
> agLexAll = mkP $ aux []
> agLexAll :: String -> Int -> ParseResult [AgToken]
> agLexAll = aux []
> where aux toks [] _ = Right (reverse toks)
> aux toks s l = agLexer' (\t -> aux (t:toks)) s l
> aux toks s l = agLexer (\t -> aux (t:toks)) s l
> agLexer :: (AgToken -> P a) -> P a
> agLexer m = mkP $ agLexer' (\x -> runP (m x))
> instance HasLexer AgToken where
> lexToken = agLexer
> agLexer' :: (AgToken -> Pfunc a) -> Pfunc a
> agLexer' cont [] = cont AgTok_EOF []
> agLexer' cont ('{':rest) = cont AgTok_LBrace rest
> agLexer' cont ('}':rest) = cont AgTok_RBrace rest
> agLexer' cont (';':rest) = cont AgTok_Semicolon rest
> agLexer' cont ('=':rest) = cont AgTok_Eq rest
> agLexer' cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest
> agLexer' cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest
> agLexer' cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest
> agLexer' cont s@('$':rest) =
> agLexer :: (AgToken -> Pfunc a) -> Pfunc a
> agLexer cont [] = cont AgTok_EOF []
> agLexer cont ('{':rest) = cont AgTok_LBrace rest
> agLexer cont ('}':rest) = cont AgTok_RBrace rest
> agLexer cont (';':rest) = cont AgTok_Semicolon rest
> agLexer cont ('=':rest) = cont AgTok_Eq rest
> agLexer cont ('w':'h':'e':'r':'e':rest) = cont AgTok_Where rest
> agLexer cont ('$':'$':rest) = agLexAttribute cont (\a -> AgTok_SelfRef a) rest
> agLexer cont ('$':'>':rest) = agLexAttribute cont (\a -> AgTok_RightmostRef a) rest
> agLexer cont s@('$':rest) =
> let (n,rest') = span isDigit rest
> in if null n
> then agLexUnknown cont s
> else agLexAttribute cont (\a -> AgTok_SubRef (read n,a)) rest'
> agLexer' cont s@(c:rest)
> | isSpace c = agLexer' cont (dropWhile isSpace rest)
> agLexer cont s@(c:rest)
> | isSpace c = agLexer cont (dropWhile isSpace rest)
> | otherwise = agLexUnknown cont s
> agLexUnknown :: (AgToken -> Pfunc a) -> Pfunc a
Expand Down
7 changes: 4 additions & 3 deletions src/AttrGrammarParser.ly
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ or a conditional statement.
> {
> {-# OPTIONS_GHC -w #-}
> module AttrGrammarParser (agParser) where
> import ParseMonad
> import ParseMonad.Class
> import ParseMonad.Bootstrapped
> import AttrGrammar
> }

Expand All @@ -25,7 +26,7 @@ or a conditional statement.
> unknown { AgTok_Unknown _ }
>
> %monad { P }
> %lexer { agLexer } { AgTok_EOF }
> %lexer { lexTokenP } { AgTok_EOF }

> %%

Expand Down Expand Up @@ -64,5 +65,5 @@ or a conditional statement.

> {
> happyError :: P a
> happyError = failP ("Parse error\n")
> happyError = failP (\l -> show l ++ ": Parse error\n")
> }
25 changes: 21 additions & 4 deletions src/Grammar.lhs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-----------------------------------------------------------------------------
/-----------------------------------------------------------------------------
The Grammar data type.

(c) 1993-2001 Andy Gill, Simon Marlow
Expand All @@ -20,9 +20,16 @@ Here is our mid-section datatype

> import GenUtils
> import AbsSyn
> import ParseMonad
#ifdef HAPPY_BOOTSTRAP
> import ParseMonad.Class
> import AttrGrammar
#endif

This is only supported in the bootstrapped version
#ifdef HAPPY_BOOTSTRAP
> import AttrGrammarParser
#endif

> import ParamRules

> import Data.Array
Expand Down Expand Up @@ -412,19 +419,28 @@ So is this.

> checkCode :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int])
> checkCode arity _ _ code [] = doCheckCode arity code

#ifdef HAPPY_BOOTSTRAP
> checkCode arity lhs nonterm_names code attrs = rewriteAttributeGrammar arity lhs nonterm_names code attrs
#else
> checkCode arity _ _ code (_:_) = do
> addErr "Attribute grammars are not supported in non-bootstrapped build"
> doCheckCode arity code
#endif

------------------------------------------------------------------------------
-- Special processing for attribute grammars. We re-parse the body of the code
-- block and output the nasty-looking record manipulation and let binding goop
--

#ifdef HAPPY_BOOTSTRAP

> rewriteAttributeGrammar :: Int -> [Name] -> [Name] -> String -> [(String,String)] -> M (String,[Int])
> rewriteAttributeGrammar arity lhs nonterm_names code attrs =

first we need to parse the body of the code block

> case runP agParser code 0 of
> case runFromStartP agParser code 0 of
> Left msg -> do addErr ("error in attribute grammar rules: "++msg)
> return ("",[])
> Right rules ->
Expand Down Expand Up @@ -470,7 +486,6 @@ So is this.
> checkArity x = when (x > arity) $ addErr (show x++" out of range")



------------------------------------------------------------------------------------
-- Actually emit the code for the record bindings and conditionals
--
Expand Down Expand Up @@ -539,6 +554,8 @@ So is this.
> formatToken (AgTok_Unknown x) = x++" "
> formatToken AgTok_EOF = error "formatToken AgTok_EOF"

#endif


-----------------------------------------------------------------------------
-- Check for every $i that i is <= the arity of the rule.
Expand Down
Loading

0 comments on commit fcaca24

Please sign in to comment.