diff --git a/Main.hs b/Main.hs index f9c5006..7163f93 100644 --- a/Main.hs +++ b/Main.hs @@ -54,12 +54,14 @@ langTbl = [ , (ncs , \(ParserProxy p) -> runProgram csharp p) , (npy , \(ParserProxy p) -> runProgram python p) , (njs , \(ParserProxy p) -> runProgram js p) + , (nphp , \(ParserProxy p) -> runProgram php p) ] where ncpp = ["cpp", "c++"] ncs = ["c#", "csharp", "cs"] npy = ["python", "py"] njs = ["js", "javascript"] + nphp = ["php"] data ParserProxy = forall p. ( Parser p diff --git a/alpaca-parser-generator.cabal b/alpaca-parser-generator.cabal index 1391acc..143c05f 100644 --- a/alpaca-parser-generator.cabal +++ b/alpaca-parser-generator.cabal @@ -4,7 +4,7 @@ cabal-version: 1.12 -- -- see: https://github.com/sol/hpack -- --- hash: cd6c7f5ac97c1d41c7346954b2cae820235b93c832066cd2af66fe0d72657a6f +-- hash: 04246f6177aff9d5d9783e553b24e2f2d45367528d87e890e8a473ffc316d9c6 name: alpaca-parser-generator version: 0.1.3.0 @@ -29,6 +29,7 @@ executable alpaca Lexer.CSharp Lexer.FA Lexer.JS + Lexer.PHP Lexer.Python Lexer.Types MonadTypes diff --git a/examples/polish-notation-calc/nodejs/.gitignore b/examples/polish-notation-calc/nodejs/.gitignore index a9dac5a..8ff86ef 100644 --- a/examples/polish-notation-calc/nodejs/.gitignore +++ b/examples/polish-notation-calc/nodejs/.gitignore @@ -1,3 +1,2 @@ lexer.* parser.* -__pycache__ diff --git a/examples/polish-notation-calc/php/.gitignore b/examples/polish-notation-calc/php/.gitignore new file mode 100644 index 0000000..66f2166 --- /dev/null +++ b/examples/polish-notation-calc/php/.gitignore @@ -0,0 +1,3 @@ +Lexer.* +Parser.* +*.txt diff --git a/examples/polish-notation-calc/php/main.php b/examples/polish-notation-calc/php/main.php new file mode 100644 index 0000000..94d1ae6 --- /dev/null +++ b/examples/polish-notation-calc/php/main.php @@ -0,0 +1,12 @@ +getNextToken())[0] !== Lexer::TOKEN_TYPE_EOF) { + echo(Lexer::tokenToString($token[0]) . ", " . $token[1] . "\n"); + } +} diff --git a/examples/polish-notation-calc/php/syntax.xy b/examples/polish-notation-calc/php/syntax.xy new file mode 100644 index 0000000..dae5a37 --- /dev/null +++ b/examples/polish-notation-calc/php/syntax.xy @@ -0,0 +1,18 @@ +number /[0-9]+/ +$text +add /\+/ +sub /\-/ +mul /\*/ +div /\// +pow /\^/ +/ +/ +%% + +S : E %eof { _1 } + ; +E : add E E { _2 + _3 } + | sub E E { _2 - _3 } + | mul E E { _2 * _3 } + | div E E { _2 / _3 } + | pow E E { _2 ** _3 } + | number { _1 } + ; diff --git a/examples/polish-notation-calc/php/test.sh b/examples/polish-notation-calc/php/test.sh new file mode 100755 index 0000000..41f1fed --- /dev/null +++ b/examples/polish-notation-calc/php/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +${alpaca:-alpaca} -l php $args syntax.xy &&\ + php main.php "$@" <<< "$input" diff --git a/lib/Lang.hs b/lib/Lang.hs index 776058a..64a6118 100644 --- a/lib/Lang.hs +++ b/lib/Lang.hs @@ -7,11 +7,13 @@ instance Lang CPP instance Lang Python instance Lang CSharp instance Lang JS +instance Lang PHP data CPP data Python data CSharp data JS +data PHP cpp :: Proxy CPP cpp = Proxy @@ -24,3 +26,6 @@ csharp = Proxy js :: Proxy JS js = Proxy + +php :: Proxy PHP +php = Proxy diff --git a/lib/Lexer.hs b/lib/Lexer.hs index 09c2ee0..4fc7601 100644 --- a/lib/Lexer.hs +++ b/lib/Lexer.hs @@ -10,4 +10,5 @@ import Lexer.CPP() import Lexer.CSharp() import Lexer.Python() import Lexer.JS() +import Lexer.PHP() import Lang diff --git a/lib/Lexer/PHP.hs b/lib/Lexer/PHP.hs new file mode 100644 index 0000000..cb59bde --- /dev/null +++ b/lib/Lexer/PHP.hs @@ -0,0 +1,114 @@ +{-# OPTIONS_GHC -Wno-orphans #-} +{-# LANGUAGE QuasiQuotes, OverloadedStrings #-} +module Lexer.PHP() where + +import qualified Data.List.NonEmpty as NE +import Regex.Parse +import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.IntSet as IS +import Lexer.Types +import Lang +import Utils + +instance LexerWriter PHP where + writeLexer _ accSt tokNames stList = + [ ("Lexer.php", [interp| +input = $input; + $this->curChIx = 0; + $this->debug = $debug; + } + + public static function tokenToString(int $token): string + { + switch($token) { + case 0: return '%eof'; + #{indent 3 tokToStr} + default: throw new \\InvalidArgumentException("Unknown token $token"); + } + } + + public function getNextToken(): array + { + $lastAccChIx = $this->curChIx; + $startChIx = $this->curChIx; + $accSt = -1; + $len = strlen($this->input); + #{indent 2 transTable} + end: + $lastReadChIx = $this->curChIx; + $this->curChIx = $lastAccChIx; + $text = substr($this->input, $startChIx, $lastAccChIx - $startChIx); + switch($accSt) { + #{indent 3 returnResult} + } + + if ($this->curChIx >= $len) { + if ($this->debug) printf("Got EOF while lexing \\"%s\\"\\n", $text); + return [self::TOKEN_TYPE_EOF, null]; + } + throw new \\InvalidArgumentException("Unexpected input: " . substr($this->input, $startChIx, $lastReadChIx)); + } +} +|])] + where + indent = indentLang 4 + returnResult = T.intercalate "\n" (map returnResult1 accSt) + returnResult1 :: (Int, (Maybe Text, Action)) -> Text + returnResult1 (st, (Just name, act)) = [interp| + case #{st}: + if ($this->debug) printf("Lexed token #{name}: \\"%s\\"\\n", $text); + return [self::TOKEN_TYPE_#{T.toUpper name}, #{mkAct act}]; + |] + returnResult1 (st, (Nothing, _)) = [interp| + case #{st}: + if ($this->debug) printf("Skipping state #{tshow st}: \\"%s\\"\\n", $text); + return $this->getNextToken(); + |] + accStS = IS.fromList $ map fst accSt + checkAccepting st + | st `IS.member` accStS = [interp| + $lastAccChIx = $this->curChIx; + $accSt = #{st}; + |] :: Text + | otherwise = "" + checkState :: (Int, (a, [(NE.NonEmpty CharPattern, Int)])) -> Maybe Text + checkState (curSt, (_, charTrans)) = Just [interp| + state_#{curSt}: + #{indent 1 $ checkAccepting curSt} + if ($this->curChIx >= $len) goto end; + $curCh = $this->input[$this->curChIx]; + $this->curChIx+=1; + #{indent 1 $ T.intercalate "\nelse " $ map checkChars charTrans} + goto end; + |] + transTable = T.intercalate "\n" $ mapMaybe checkState stList + tokDefns = T.intercalate "\n" $ zipWith (\x n -> [interp|const TOKEN_TYPE_#{T.toUpper x} = #{n};|] :: Text) tokNames [1::Word ..] + tokToStr = T.intercalate "\n" $ zipWith (\x n -> [interp|case #{n}: return '#{x}';|] :: Text) tokNames [1::Word ..] + mkAct NoAction = "null" + mkAct (Action act) = act + checkChars :: (NE.NonEmpty CharPattern, Int) -> Text + checkChars (charGroup, newSt) = [interp| + if (#{charCond charGroup}) goto state_#{newSt}; + |] + charCond = T.intercalate " || " . map charCond1 . NE.toList + charCond1 :: CharPattern -> Text + charCond1 (CChar c) = [interp|$curCh === #{tshow c}|] + charCond1 (CRange c1 c2) = [interp|($curCh >= #{tshow c1} && $curCh <= #{tshow c2})|] + charCond1 CAny = "true"