Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added lexer for php lang #1

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ langTbl = [
, (ncs , \(ParserProxy p) -> runProgram csharp p)
, (npy , \(ParserProxy p) -> runProgram python p)
, (njs , \(ParserProxy p) -> runProgram js p)
, (nphp , \(ParserProxy p) -> runProgram php p)
]
where
ncpp = ["cpp", "c++"]
ncs = ["c#", "csharp", "cs"]
npy = ["python", "py"]
njs = ["js", "javascript"]
nphp = ["php"]

data ParserProxy = forall p.
( Parser p
Expand Down
3 changes: 2 additions & 1 deletion alpaca-parser-generator.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ cabal-version: 1.12
--
-- see: https://github.com/sol/hpack
--
-- hash: cd6c7f5ac97c1d41c7346954b2cae820235b93c832066cd2af66fe0d72657a6f
-- hash: 04246f6177aff9d5d9783e553b24e2f2d45367528d87e890e8a473ffc316d9c6

name: alpaca-parser-generator
version: 0.1.3.0
Expand All @@ -29,6 +29,7 @@ executable alpaca
Lexer.CSharp
Lexer.FA
Lexer.JS
Lexer.PHP
Lexer.Python
Lexer.Types
MonadTypes
Expand Down
1 change: 0 additions & 1 deletion examples/polish-notation-calc/nodejs/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
lexer.*
parser.*
__pycache__
3 changes: 3 additions & 0 deletions examples/polish-notation-calc/php/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Lexer.*
Parser.*
*.txt
12 changes: 12 additions & 0 deletions examples/polish-notation-calc/php/main.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?php
namespace Alpaca;

require './Lexer.php';

$debug = $argv[1] == '-d';
while($f = fgets(STDIN)){
$lexer = new Lexer(rtrim($f), $debug);
while(($token = $lexer->getNextToken())[0] !== Lexer::TOKEN_TYPE_EOF) {
echo(Lexer::tokenToString($token[0]) . ", " . $token[1] . "\n");
}
}
18 changes: 18 additions & 0 deletions examples/polish-notation-calc/php/syntax.xy
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
number /[0-9]+/ +$text
add /\+/
sub /\-/
mul /\*/
div /\//
pow /\^/
/ +/
%%

S : E %eof { _1 }
;
E : add E E { _2 + _3 }
| sub E E { _2 - _3 }
| mul E E { _2 * _3 }
| div E E { _2 / _3 }
| pow E E { _2 ** _3 }
| number { _1 }
;
4 changes: 4 additions & 0 deletions examples/polish-notation-calc/php/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

${alpaca:-alpaca} -l php $args syntax.xy &&\
php main.php "$@" <<< "$input"
5 changes: 5 additions & 0 deletions lib/Lang.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ instance Lang CPP
instance Lang Python
instance Lang CSharp
instance Lang JS
instance Lang PHP

data CPP
data Python
data CSharp
data JS
data PHP

cpp :: Proxy CPP
cpp = Proxy
Expand All @@ -24,3 +26,6 @@ csharp = Proxy

js :: Proxy JS
js = Proxy

php :: Proxy PHP
php = Proxy
1 change: 1 addition & 0 deletions lib/Lexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ import Lexer.CPP()
import Lexer.CSharp()
import Lexer.Python()
import Lexer.JS()
import Lexer.PHP()
import Lang
114 changes: 114 additions & 0 deletions lib/Lexer/PHP.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{-# OPTIONS_GHC -Wno-orphans #-}
{-# LANGUAGE QuasiQuotes, OverloadedStrings #-}
module Lexer.PHP() where

import qualified Data.List.NonEmpty as NE
import Regex.Parse
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.IntSet as IS
import Lexer.Types
import Lang
import Utils

instance LexerWriter PHP where
writeLexer _ accSt tokNames stList =
[ ("Lexer.php", [interp|
<?php

namespace Alpaca;

class Lexer
{
const TOKEN_TYPE_EOF = 0;
#{indent 1 tokDefns}

private $input;
private $curChIx;
private $debug;

public function __construct(string $input, bool $debug)
{
$this->input = $input;
$this->curChIx = 0;
$this->debug = $debug;
}

public static function tokenToString(int $token): string
{
switch($token) {
case 0: return '%eof';
#{indent 3 tokToStr}
default: throw new \\InvalidArgumentException("Unknown token $token");
}
}

public function getNextToken(): array
{
$lastAccChIx = $this->curChIx;
$startChIx = $this->curChIx;
$accSt = -1;
$len = strlen($this->input);
#{indent 2 transTable}
end:
$lastReadChIx = $this->curChIx;
$this->curChIx = $lastAccChIx;
$text = substr($this->input, $startChIx, $lastAccChIx - $startChIx);
switch($accSt) {
#{indent 3 returnResult}
}

if ($this->curChIx >= $len) {
if ($this->debug) printf("Got EOF while lexing \\"%s\\"\\n", $text);
return [self::TOKEN_TYPE_EOF, null];
}
throw new \\InvalidArgumentException("Unexpected input: " . substr($this->input, $startChIx, $lastReadChIx));
}
}
|])]
where
indent = indentLang 4
returnResult = T.intercalate "\n" (map returnResult1 accSt)
returnResult1 :: (Int, (Maybe Text, Action)) -> Text
returnResult1 (st, (Just name, act)) = [interp|
case #{st}:
if ($this->debug) printf("Lexed token #{name}: \\"%s\\"\\n", $text);
return [self::TOKEN_TYPE_#{T.toUpper name}, #{mkAct act}];
|]
returnResult1 (st, (Nothing, _)) = [interp|
case #{st}:
if ($this->debug) printf("Skipping state #{tshow st}: \\"%s\\"\\n", $text);
return $this->getNextToken();
|]
accStS = IS.fromList $ map fst accSt
checkAccepting st
| st `IS.member` accStS = [interp|
$lastAccChIx = $this->curChIx;
$accSt = #{st};
|] :: Text
| otherwise = ""
checkState :: (Int, (a, [(NE.NonEmpty CharPattern, Int)])) -> Maybe Text
checkState (curSt, (_, charTrans)) = Just [interp|
state_#{curSt}:
#{indent 1 $ checkAccepting curSt}
if ($this->curChIx >= $len) goto end;
$curCh = $this->input[$this->curChIx];
$this->curChIx+=1;
#{indent 1 $ T.intercalate "\nelse " $ map checkChars charTrans}
goto end;
|]
transTable = T.intercalate "\n" $ mapMaybe checkState stList
tokDefns = T.intercalate "\n" $ zipWith (\x n -> [interp|const TOKEN_TYPE_#{T.toUpper x} = #{n};|] :: Text) tokNames [1::Word ..]
tokToStr = T.intercalate "\n" $ zipWith (\x n -> [interp|case #{n}: return '#{x}';|] :: Text) tokNames [1::Word ..]
mkAct NoAction = "null"
mkAct (Action act) = act
checkChars :: (NE.NonEmpty CharPattern, Int) -> Text
checkChars (charGroup, newSt) = [interp|
if (#{charCond charGroup}) goto state_#{newSt};
|]
charCond = T.intercalate " || " . map charCond1 . NE.toList
charCond1 :: CharPattern -> Text
charCond1 (CChar c) = [interp|$curCh === #{tshow c}|]
charCond1 (CRange c1 c2) = [interp|($curCh >= #{tshow c1} && $curCh <= #{tshow c2})|]
charCond1 CAny = "true"