Skip to content

Commit

Permalink
Fixed #21.
Browse files Browse the repository at this point in the history
  • Loading branch information
mivan committed Feb 13, 2017
1 parent 90c729f commit 61fc53f
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 25 deletions.
2 changes: 1 addition & 1 deletion src/quex_modules/definitions.qx
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ define {
// Ez a régi: ([a-zµ¿»¶±¼¾¹³áàăâåäãąæćčçďđðéèêěëęíìîïĺľłńňñóòôöőõøºŕřśšşßťţúùûůüűýźžżþ])
// Megj.: union(\P{Lowercase}, \P{Other_Lowercase} is lehet hogy jó
// \G{Lowercase_Letter} helyett -- vajon van különbség?
// UPPER: nagybetűk
// UPPER: nagybetűk
// Megj.: Csaba kódja alapján meg ezek voltak benne [§¡£¥¦©ª«¬®¯],
// most csak a monogrammokhoz kell az UPPER, ugyhogy kivettem.
// Ez a régi: ([A-ZÁÉÍÓÖÕÚÜÛ§¡£¥¦©ª«¬®¯ÀÂÃÄÅÆÇÈÊËÌÎÏÐÑÒÔØÙÝÞ])
Expand Down
69 changes: 45 additions & 24 deletions src/quex_modules/token.qx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ define {

//O P E R A T I O N S ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
OPERAND ([0-9]+[.,])?[0-9]+ // decimal fractions
OPERATION_WS [  ] // space (0020), non-breaking space (00A0)
OPERATION_WS [  ] // space (0020), non-breaking space (00A0)
OPERATOR {OPERATION_WS}?[*+]{OPERATION_WS}?
OPERATION {OPERAND}({OPERATOR}{OPERAND})+

Expand Down Expand Up @@ -159,9 +159,51 @@ mode PROGRAM : COMMON {
self_send1(token_TOKEN, LEX.c_str());
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// enumerate of names with hyphen
// mondatvegi verzio
(\G{Uppercase_Letter}\G{Lowercase_Letter}+)("-"\G{Uppercase_Letter}\G{Lowercase_Letter}+){2,}"."{SNT_CLOSE_QX} {
/* std::wcerr << L"enum of names with hyphen (snt end): " << Lexeme << std::endl; */
std::wstring LEX(Lexeme, wcslen(Lexeme)-2);
std::wstring res;
for(auto c : LEX)
{
if(c!=L'-')
{
res.push_back(c);
}
else
{
res.append(self.WORD_CLOSE_CPP + self.PUNCT_OPEN_CPP + L"-" + self.PUNCT_CLOSE_CPP + self.WORD_OPEN_CPP);
}
}
res = self.WORD_OPEN_CPP + res + self.WORD_CLOSE_CPP + self.PUNCT_OPEN_CPP + L"." + self.PUNCT_CLOSE_CPP + self.SNT_CLOSE_CPP;
self_send1(token_TOKEN, res.c_str());
}
// mondatkozi verzio
(\G{Uppercase_Letter}\G{Lowercase_Letter}+)("-"\G{Uppercase_Letter}\G{Lowercase_Letter}+){2,} {
/* std::wcerr << L"enum of names with hyphen: " << Lexeme << std::endl; */
std::wstring LEX(Lexeme);
std::wstring res;
for(auto c : LEX)
{
if(c!=L'-')
{
res.push_back(c);
}
else
{
res.append(self.WORD_CLOSE_CPP + self.PUNCT_OPEN_CPP + L"-" + self.PUNCT_CLOSE_CPP + self.WORD_OPEN_CPP);
}
}
res = self.WORD_OPEN_CPP + res + self.WORD_CLOSE_CPP;
self_send1(token_TOKEN, res.c_str());
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// operations with decimal fractions
{OPERATION}("."{SNT_CLOSE_QX})? {
/* std::wcerr << L"operations: " << Lexeme << std::endl; */
std::wstring LEX(Lexeme);
self.operation_processing(LEX);
self_send1(token_TOKEN, LEX.c_str());
Expand Down Expand Up @@ -203,27 +245,6 @@ mode PROGRAM : COMMON {
self_send1(token_PUNCT, LEX.c_str());
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// enumerate of names with hyphen
({UPPER}{LOWER}+)("-"{UPPER}{LOWER}+){2,} {
/* std::wcerr << L"enum of names with hyphen: " << Lexeme << std::endl; */
std::wstring LEX(Lexeme);
std::wstring tmp;
for(auto c : LEX)
{
if(c!=L'-')
{
tmp.push_back(c);
}
else
{
tmp.append(self.WORD_CLOSE_CPP + self.PUNCT_OPEN_CPP + L"-" + self.PUNCT_CLOSE_CPP + self.WORD_OPEN_CPP);
}
}
tmp.swap(LEX);
LEX = self.WORD_OPEN_CPP + LEX + self.WORD_CLOSE_CPP;
self_send1(token_TOKEN, LEX.c_str());
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// basic tokenizer rules:
Expand All @@ -235,14 +256,14 @@ mode PROGRAM : COMMON {
// par(en)thesis, but not (parenthesis)
// verision for words with "-e"
((({WORDS_WITH_DOTS}?{WORD_IN_PAR})?{WORDS_WITH_DOTS})|({WORDS_WITH_DOTS}?{WORD_IN_PAR}))"-e"("."+)?{SNT_CLOSE_QX}? {
/* std::wcerr << L"basic with -e! " << Lexeme << std::endl; */
/* std::wcerr << L"basic (-e): " << Lexeme << std::endl; */
std::wstring LEX(Lexeme);
self.particula_token_corrig(LEX);
self_send1(token_TOKEN, LEX.c_str());
}
// version for other cases:
((({WORD_IN_PAR})?{WORDS_WITH_DOTS})|({WORDS_WITH_DOTS}{WORD_IN_PAR}{WORDS_WITH_DOTS}?))("."+{SNT_CLOSE_QX})? {
/* std::wcerr << L"basic! " << Lexeme << std::endl; */
/* std::wcerr << L"basic: " << Lexeme << std::endl; */
std::wstring LEX(Lexeme);
self.basic_token_corrig(LEX);
self_send1(token_TOKEN, LEX.c_str());
Expand Down
6 changes: 6 additions & 0 deletions test/test_default_token_enumofnames.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,9 @@ OUT: <s><w>Nana</w><c>-</c><w>Nini</w><c>-</c><w>Nene</w></s>
IN : Nana-Nini-Nene-Nono
OUT: <s><w>Nana</w><c>-</c><w>Nini</w><c>-</c><w>Nene</w><c>-</c><w>Nono</w></s>

IN : Nana-Nini-Nene.
OUT: <s><w>Nana</w><c>-</c><w>Nini</w><c>-</c><w>Nene</w><c>.</c></s>

IN : A Nana-Nini-Nene-Nono.
OUT: <s><w>A</w><ws> </ws><w>Nana</w><c>-</c><w>Nini</w><c>-</c><w>Nene</w><c>-</c><w>Nono</w><c>.</c></s>

0 comments on commit 61fc53f

Please sign in to comment.