diff --git a/Main.hs b/Main.hs index 6077e7d..bab2d34 100644 --- a/Main.hs +++ b/Main.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts, OverloadedStrings, ExistentialQuantification #-} +{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses #-} module Main where import Lexer @@ -53,12 +54,14 @@ langTbl = [ , (ncs , \(ParserProxy p) -> runProgram csharp p) , (npy , \(ParserProxy p) -> runProgram python p) , (njs , \(ParserProxy p) -> runProgram js p) + , (nphp , \(ParserProxy p) -> runProgram php p) ] where ncpp = ["cpp", "c++"] ncs = ["c#", "csharp", "cs"] npy = ["python", "py"] njs = ["js", "javascript"] + nphp = ["php"] data ParserProxy = forall p. ( Parser p diff --git a/alpaca-parser-generator.cabal b/alpaca-parser-generator.cabal index 268f0e4..6f16bef 100644 --- a/alpaca-parser-generator.cabal +++ b/alpaca-parser-generator.cabal @@ -4,7 +4,7 @@ cabal-version: 1.12 -- -- see: https://github.com/sol/hpack -- --- hash: ce63852fddd1891722b3791ee6757ebf29e4809f39fba3da2331dec855a1fdf7 +-- hash: ce90a3b3415136568951b5b98465d1bdc3c45476c3c6df83485e3a17bfd8892a name: alpaca-parser-generator version: 0.1.2.1 @@ -29,6 +29,7 @@ executable alpaca Lexer.CSharp Lexer.FA Lexer.JS + Lexer.PHP Lexer.Python Lexer.Types MonadTypes diff --git a/examples/polish-notation-calc/nodejs/.gitignore b/examples/polish-notation-calc/nodejs/.gitignore index a9dac5a..8ff86ef 100644 --- a/examples/polish-notation-calc/nodejs/.gitignore +++ b/examples/polish-notation-calc/nodejs/.gitignore @@ -1,3 +1,2 @@ lexer.* parser.* -__pycache__ diff --git a/examples/polish-notation-calc/php/.gitignore b/examples/polish-notation-calc/php/.gitignore new file mode 100644 index 0000000..8ff86ef --- /dev/null +++ b/examples/polish-notation-calc/php/.gitignore @@ -0,0 +1,2 @@ +lexer.* +parser.* diff --git a/examples/polish-notation-calc/php/syntax.xy b/examples/polish-notation-calc/php/syntax.xy new file mode 100644 index 0000000..dae5a37 --- /dev/null +++ b/examples/polish-notation-calc/php/syntax.xy @@ -0,0 +1,18 @@ +number /[0-9]+/ +$text +add /\+/ +sub /\-/ +mul /\*/ +div /\// +pow /\^/ +/ +/ +%% + +S : E %eof { _1 } + ; +E : add E E { _2 + _3 } + | sub E E { _2 - _3 } + | mul E E { _2 * _3 } + | div E E { _2 / _3 } + | pow E E { _2 ** _3 } + | number { _1 } + ; diff --git a/lib/Lang.hs b/lib/Lang.hs index 776058a..64a6118 100644 --- a/lib/Lang.hs +++ b/lib/Lang.hs @@ -7,11 +7,13 @@ instance Lang CPP instance Lang Python instance Lang CSharp instance Lang JS +instance Lang PHP data CPP data Python data CSharp data JS +data PHP cpp :: Proxy CPP cpp = Proxy @@ -24,3 +26,6 @@ csharp = Proxy js :: Proxy JS js = Proxy + +php :: Proxy PHP +php = Proxy diff --git a/lib/Lexer.hs b/lib/Lexer.hs index 09c2ee0..4fc7601 100644 --- a/lib/Lexer.hs +++ b/lib/Lexer.hs @@ -10,4 +10,5 @@ import Lexer.CPP() import Lexer.CSharp() import Lexer.Python() import Lexer.JS() +import Lexer.PHP() import Lang diff --git a/lib/Lexer/PHP.hs b/lib/Lexer/PHP.hs new file mode 100644 index 0000000..d25a9e4 --- /dev/null +++ b/lib/Lexer/PHP.hs @@ -0,0 +1,121 @@ +{-# OPTIONS_GHC -Wno-orphans #-} +{-# LANGUAGE QuasiQuotes, OverloadedStrings #-} +module Lexer.PHP() where + +import qualified Data.List.NonEmpty as NE +import Regex.Parse +import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T +import Lexer.Types +import Lang +import Utils + +instance LexerWriter PHP where + writeLexer _ accSt tokNames stList = + [ ("Lexer.php", [interp| +input = $input; + $this->curChIx = $curChIx; + $this->debug = $debug; + } + + public static function tokenToString(int $token): string + { + switch($token) { + case 0: return '%eof'; + #{indent 3 tokToStr} + default: throw new \\InvalidArgumentException("Unknown token $token"); + } + } + + public function getNextToken(): array + { + $lastAccChIx = $this->curChIx; + $startChIx = $this->curChIx; + $accSt = -1; + $curSt = 0; + while ($curSt >= 0) { + if (in_array($curSt, [#{T.intercalate "," $ map (tshow . fst) accSt}])) { + $lastAccChIx = $this->curChIx; + $accSt = $curSt; + } + + if ($this->curChIx >= strlen($this->input)) + break; + + $curCh = $this->input[$this->curChIx]; + $this->curChIx+=1; + switch($curSt) { + #{indent 4 transTable} + } + break; + } + + $lastReadChIx = $this->curChIx; + $this->curChIx = $lastAccChIx; + $text = substr($this->input, $startChIx, $lastAccChIx); + switch($accSt) { + #{indent 3 returnResult} + } + + if ($this->curChIx >= strlen($this->input)) { + if ($this->debug) printf('Got EOF while lexing "%s"', $text); + return [self::TOKEN_TYPE_EOF, null]; + } + throw new \\InvalidArgumentException("Unexpected input: " . substr($this->input, $startChIx, $lastReadChIx)); + } +} +|])] + where + indent = indentLang 4 + returnResult = T.intercalate "\n" (map returnResult1 accSt) + returnResult1 :: (Int, (Maybe Text, Action)) -> Text + returnResult1 (st, (Just name, act)) = [interp| + case #{st}: + if ($this->debug) printf('Lexed token #{name}: "%s"', $text); + return [self::TOKEN_TYPE_#{T.toUpper name}, #{mkAct act}]; + |] + returnResult1 (st, (Nothing, _)) = [interp| + case #{st}: + if ($this->debug) printf('Skipping state #{tshow st}: "%s"', $text); + return $this->getNextToken(); + |] + checkState :: (Int, (a, [(NE.NonEmpty CharPattern, Int)])) -> Maybe Text + checkState (_, (_, [])) = Nothing + checkState (curSt, (_, charTrans)) = Just [interp| + case #{tshow curSt}: + #{indent 1 $ T.intercalate " else " (map checkChars charTrans)} + break; + |] + transTable = T.intercalate "\n" $ mapMaybe checkState stList + tokDefns = T.intercalate "\n" $ zipWith (\x n -> [interp|const TOKEN_TYPE_#{T.toUpper x} = #{n};|] :: Text) tokNames [1::Word ..] + tokToStr = T.intercalate "\n" $ zipWith (\x n -> [interp|case #{n}: return '#{x}';|] :: Text) tokNames [1::Word ..] + mkAct NoAction = "null" + mkAct (Action act) = act + checkChars :: (NE.NonEmpty CharPattern, Int) -> Text + checkChars (charGroup, newSt) = [interp| + if (#{charCond charGroup}) { + $curSt = #{newSt}; + continue; + } + |] + charCond = T.intercalate " || " . map charCond1 . NE.toList + charCond1 :: CharPattern -> Text + charCond1 (CChar c) = [interp|$curCh === #{tshow c}|] + charCond1 (CRange c1 c2) = [interp|($curCh >= #{tshow c1} && $curCh <= #{tshow c2})|] + charCond1 CAny = "true"