From 63dcb37a001365cc8078dadfeefd016e1e502bb1 Mon Sep 17 00:00:00 2001 From: Markus Heberling Date: Sun, 9 Aug 2020 13:40:50 +0200 Subject: [PATCH] Added Python3 Antlr Token Maker Demo --- RSyntaxTextAreaDemo/src/main/antlr/Python3.g4 | 1155 +++++++++++++++++ .../ui/rsyntaxtextarea/demo/DemoRootPane.java | 3 + .../demo/antlr/Python3TokenMaker.java | 92 ++ 3 files changed, 1250 insertions(+) create mode 100644 RSyntaxTextAreaDemo/src/main/antlr/Python3.g4 create mode 100644 RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/antlr/Python3TokenMaker.java diff --git a/RSyntaxTextAreaDemo/src/main/antlr/Python3.g4 b/RSyntaxTextAreaDemo/src/main/antlr/Python3.g4 new file mode 100644 index 000000000..d07c49ed5 --- /dev/null +++ b/RSyntaxTextAreaDemo/src/main/antlr/Python3.g4 @@ -0,0 +1,1155 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2014 by Bart Kiers + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Project : python3-parser; an ANTLR4 grammar for Python 3 + * https://github.com/bkiers/python3-parser + * Developed by : Bart Kiers, bart@big-o.nl + */ +grammar Python3; + +// All comments that start with "///" are copy-pasted from +// The Python Language Reference + +tokens { INDENT, DEDENT } + +@lexer::members { + // A queue where extra tokens are pushed on (see the NEWLINE lexer rule). + private java.util.LinkedList tokens = new java.util.LinkedList<>(); + // The stack that keeps track of the indentation level. + private java.util.Stack indents = new java.util.Stack<>(); + // The amount of opened braces, brackets and parenthesis. + private int opened = 0; + // The most recently produced token. + private Token lastToken = null; + @Override + public void emit(Token t) { + super.setToken(t); + tokens.offer(t); + } + + @Override + public Token nextToken() { + // Check if the end-of-file is ahead and there are still some DEDENTS expected. + if (_input.LA(1) == EOF && !this.indents.isEmpty()) { + // Remove any trailing EOF tokens from our buffer. + for (int i = tokens.size() - 1; i >= 0; i--) { + if (tokens.get(i).getType() == EOF) { + tokens.remove(i); + } + } + + // First emit an extra line break that serves as the end of the statement. + this.emit(commonToken(Python3Parser.NEWLINE, "\n")); + + // Now emit as much DEDENT tokens as needed. + while (!indents.isEmpty()) { + this.emit(createDedent()); + indents.pop(); + } + + // Put the EOF back on the token stream. + this.emit(commonToken(Python3Parser.EOF, "")); + } + + Token next = super.nextToken(); + + if (next.getChannel() == Token.DEFAULT_CHANNEL) { + // Keep track of the last token on the default channel. + this.lastToken = next; + } + + return tokens.isEmpty() ? next : tokens.poll(); + } + + private Token createDedent() { + CommonToken dedent = commonToken(Python3Parser.DEDENT, ""); + dedent.setLine(this.lastToken.getLine()); + return dedent; + } + + private CommonToken commonToken(int type, String text) { + int stop = this.getCharIndex() - 1; + int start = text.isEmpty() ? stop : stop - text.length() + 1; + return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop); + } + + // Calculates the indentation of the provided spaces, taking the + // following rules into account: + // + // "Tabs are replaced (from left to right) by one to eight spaces + // such that the total number of characters up to and including + // the replacement is a multiple of eight [...]" + // + // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation + static int getIndentationCount(String spaces) { + int count = 0; + for (char ch : spaces.toCharArray()) { + switch (ch) { + case '\t': + count += 8 - (count % 8); + break; + default: + // A normal space char. + count++; + } + } + + return count; + } + + boolean atStartOfInput() { + return super.getCharPositionInLine() == 0 && super.getLine() == 1; + } +} + +/* + * parser rules + */ + +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE; +file_input: (NEWLINE | stmt)* EOF; +eval_input: testlist NEWLINE* EOF; + +decorator: '@' dotted_name ( '(' (arglist)? ')' )? NEWLINE; +decorators: decorator+; +decorated: decorators (classdef | funcdef | async_funcdef); + +async_funcdef: ASYNC funcdef; +funcdef: 'def' NAME parameters ('->' test)? ':' suite; + +parameters: '(' (typedargslist)? ')'; +typedargslist: (tfpdef ('=' test)? (',' tfpdef ('=' test)?)* (',' ( + '*' (tfpdef)? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef (',')?)?)? + | '**' tfpdef (',')?)?)? + | '*' (tfpdef)? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef (',')?)?)? + | '**' tfpdef (',')?); +tfpdef: NAME (':' test)?; +varargslist: (vfpdef ('=' test)? (',' vfpdef ('=' test)?)* (',' ( + '*' (vfpdef)? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef (',')?)?)? + | '**' vfpdef (',')?)?)? + | '*' (vfpdef)? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef (',')?)?)? + | '**' vfpdef (',')? +); +vfpdef: NAME; + +stmt: simple_stmt | compound_stmt; +simple_stmt: small_stmt (';' small_stmt)* (';')? NEWLINE; +small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | + import_stmt | global_stmt | nonlocal_stmt | assert_stmt); +expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | + ('=' (yield_expr|testlist_star_expr))*); +annassign: ':' test ('=' test)?; +testlist_star_expr: (test|star_expr) (',' (test|star_expr))* (',')?; +augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | + '<<=' | '>>=' | '**=' | '//='); +// For normal and annotated assignments, additional restrictions enforced by the interpreter +del_stmt: 'del' exprlist; +pass_stmt: 'pass'; +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt; +break_stmt: 'break'; +continue_stmt: 'continue'; +return_stmt: 'return' (testlist)?; +yield_stmt: yield_expr; +raise_stmt: 'raise' (test ('from' test)?)?; +import_stmt: import_name | import_from; +import_name: 'import' dotted_as_names; +// note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS +import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)); +import_as_name: NAME ('as' NAME)?; +dotted_as_name: dotted_name ('as' NAME)?; +import_as_names: import_as_name (',' import_as_name)* (',')?; +dotted_as_names: dotted_as_name (',' dotted_as_name)*; +dotted_name: NAME ('.' NAME)*; +global_stmt: 'global' NAME (',' NAME)*; +nonlocal_stmt: 'nonlocal' NAME (',' NAME)*; +assert_stmt: 'assert' test (',' test)?; + +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt; +async_stmt: ASYNC (funcdef | with_stmt | for_stmt); +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ('else' ':' suite)?; +while_stmt: 'while' test ':' suite ('else' ':' suite)?; +for_stmt: 'for' exprlist 'in' testlist ':' suite ('else' ':' suite)?; +try_stmt: ('try' ':' suite + ((except_clause ':' suite)+ + ('else' ':' suite)? + ('finally' ':' suite)? | + 'finally' ':' suite)); +with_stmt: 'with' with_item (',' with_item)* ':' suite; +with_item: test ('as' expr)?; +// NB compile.c makes sure that the default except clause is last +except_clause: 'except' (test ('as' NAME)?)?; +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT; + +test: or_test ('if' or_test 'else' test)? | lambdef; +test_nocond: or_test | lambdef_nocond; +lambdef: 'lambda' (varargslist)? ':' test; +lambdef_nocond: 'lambda' (varargslist)? ':' test_nocond; +or_test: and_test ('or' and_test)*; +and_test: not_test ('and' not_test)*; +not_test: 'not' not_test | comparison; +comparison: expr (comp_op expr)*; +// <> isn't actually a valid comparison operator in Python. It's here for the +// sake of a __future__ import described in PEP 401 (which really works :-) +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'; +star_expr: '*' expr; +expr: xor_expr ('|' xor_expr)*; +xor_expr: and_expr ('^' and_expr)*; +and_expr: shift_expr ('&' shift_expr)*; +shift_expr: arith_expr (('<<'|'>>') arith_expr)*; +arith_expr: term (('+'|'-') term)*; +term: factor (('*'|'@'|'/'|'%'|'//') factor)*; +factor: ('+'|'-'|'~') factor | power; +power: atom_expr ('**' factor)?; +atom_expr: (AWAIT)? atom trailer*; +atom: ('(' (yield_expr|testlist_comp)? ')' | + '[' (testlist_comp)? ']' | + '{' (dictorsetmaker)? '}' | + NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False'); +testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* (',')? ); +trailer: '(' (arglist)? ')' | '[' subscriptlist ']' | '.' NAME; +subscriptlist: subscript (',' subscript)* (',')?; +subscript: test | (test)? ':' (test)? (sliceop)?; +sliceop: ':' (test)?; +exprlist: (expr|star_expr) (',' (expr|star_expr))* (',')?; +testlist: test (',' test)* (',')?; +dictorsetmaker: ( ((test ':' test | '**' expr) + (comp_for | (',' (test ':' test | '**' expr))* (',')?)) | + ((test | star_expr) + (comp_for | (',' (test | star_expr))* (',')?)) ); + +classdef: 'class' NAME ('(' (arglist)? ')')? ':' suite; + +arglist: argument (',' argument)* (',')?; + +// The reason that keywords are test nodes instead of NAME is that using NAME +// results in an ambiguity. ast.c makes sure it's a NAME. +// "test '=' test" is really "keyword '=' test", but we have no such token. +// These need to be in a single rule to avoid grammar that is ambiguous +// to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, +// we explicitly match '*' here, too, to give it proper precedence. +// Illegal combinations and orderings are blocked in ast.c: +// multiple (test comp_for) arguments are blocked; keyword unpackings +// that precede iterable unpackings are blocked; etc. +argument: ( test (comp_for)? | + test '=' test | + '**' test | + '*' test ); + +comp_iter: comp_for | comp_if; +comp_for: (ASYNC)? 'for' exprlist 'in' or_test (comp_iter)?; +comp_if: 'if' test_nocond (comp_iter)?; + +// not used in grammar, but may appear in "node" passed from Parser to Compiler +encoding_decl: NAME; + +yield_expr: 'yield' (yield_arg)?; +yield_arg: 'from' test | testlist; + +/* + * lexer rules + */ + +STRING + : STRING_LITERAL + | BYTES_LITERAL + ; + +NUMBER + : INTEGER + | FLOAT_NUMBER + | IMAG_NUMBER + ; + +INTEGER + : DECIMAL_INTEGER + | OCT_INTEGER + | HEX_INTEGER + | BIN_INTEGER + ; + +DEF : 'def'; +RETURN : 'return'; +RAISE : 'raise'; +FROM : 'from'; +IMPORT : 'import'; +AS : 'as'; +GLOBAL : 'global'; +NONLOCAL : 'nonlocal'; +ASSERT : 'assert'; +IF : 'if'; +ELIF : 'elif'; +ELSE : 'else'; +WHILE : 'while'; +FOR : 'for'; +IN : 'in'; +TRY : 'try'; +FINALLY : 'finally'; +WITH : 'with'; +EXCEPT : 'except'; +LAMBDA : 'lambda'; +OR : 'or'; +AND : 'and'; +NOT : 'not'; +IS : 'is'; +NONE : 'None'; +TRUE : 'True'; +FALSE : 'False'; +CLASS : 'class'; +YIELD : 'yield'; +DEL : 'del'; +PASS : 'pass'; +CONTINUE : 'continue'; +BREAK : 'break'; +ASYNC : 'async'; +AWAIT : 'await'; + +NEWLINE + : ( {atStartOfInput()}? SPACES + | ( '\r'? '\n' | '\r' | '\f' ) SPACES? + ) + { + String newLine = getText().replaceAll("[^\r\n\f]+", ""); + String spaces = getText().replaceAll("[\r\n\f]+", ""); + + // Strip newlines inside open clauses except if we are near EOF. We keep NEWLINEs near EOF to + // satisfy the final newline needed by the single_put rule used by the REPL. + int next = _input.LA(1); + int nextnext = _input.LA(2); + if (opened > 0 || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) { + // If we're inside a list or on a blank line, ignore all indents, + // dedents and line breaks. + skip(); + } + else { + emit(commonToken(NEWLINE, newLine)); + int indent = getIndentationCount(spaces); + int previous = indents.isEmpty() ? 0 : indents.peek(); + if (indent == previous) { + // skip indents of the same size as the present indent-size + skip(); + } + else if (indent > previous) { + indents.push(indent); + emit(commonToken(Python3Parser.INDENT, spaces)); + } + else { + // Possibly emit more than 1 DEDENT token. + while(!indents.isEmpty() && indents.peek() > indent) { + this.emit(createDedent()); + indents.pop(); + } + } + } + } + ; + +/// identifier ::= id_start id_continue* +NAME + : ID_START ID_CONTINUE* + ; + +/// stringliteral ::= [stringprefix](shortstring | longstring) +/// stringprefix ::= "r" | "u" | "R" | "U" | "f" | "F" +/// | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" +STRING_LITERAL + : ( [rR] | [uU] | [fF] | ( [fF] [rR] ) | ( [rR] [fF] ) )? ( SHORT_STRING | LONG_STRING ) + ; + +/// bytesliteral ::= bytesprefix(shortbytes | longbytes) +/// bytesprefix ::= "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" +BYTES_LITERAL + : ( [bB] | ( [bB] [rR] ) | ( [rR] [bB] ) ) ( SHORT_BYTES | LONG_BYTES ) + ; + +/// decimalinteger ::= nonzerodigit digit* | "0"+ +DECIMAL_INTEGER + : NON_ZERO_DIGIT DIGIT* + | '0'+ + ; + +/// octinteger ::= "0" ("o" | "O") octdigit+ +OCT_INTEGER + : '0' [oO] OCT_DIGIT+ + ; + +/// hexinteger ::= "0" ("x" | "X") hexdigit+ +HEX_INTEGER + : '0' [xX] HEX_DIGIT+ + ; + +/// bininteger ::= "0" ("b" | "B") bindigit+ +BIN_INTEGER + : '0' [bB] BIN_DIGIT+ + ; + +/// floatnumber ::= pointfloat | exponentfloat +FLOAT_NUMBER + : POINT_FLOAT + | EXPONENT_FLOAT + ; + +/// imagnumber ::= (floatnumber | intpart) ("j" | "J") +IMAG_NUMBER + : ( FLOAT_NUMBER | INT_PART ) [jJ] + ; + +DOT : '.'; +ELLIPSIS : '...'; +STAR : '*'; +OPEN_PAREN : '(' {opened++;}; +CLOSE_PAREN : ')' {opened--;}; +COMMA : ','; +COLON : ':'; +SEMI_COLON : ';'; +POWER : '**'; +ASSIGN : '='; +OPEN_BRACK : '[' {opened++;}; +CLOSE_BRACK : ']' {opened--;}; +OR_OP : '|'; +XOR : '^'; +AND_OP : '&'; +LEFT_SHIFT : '<<'; +RIGHT_SHIFT : '>>'; +ADD : '+'; +MINUS : '-'; +DIV : '/'; +MOD : '%'; +IDIV : '//'; +NOT_OP : '~'; +OPEN_BRACE : '{' {opened++;}; +CLOSE_BRACE : '}' {opened--;}; +LESS_THAN : '<'; +GREATER_THAN : '>'; +EQUALS : '=='; +GT_EQ : '>='; +LT_EQ : '<='; +NOT_EQ_1 : '<>'; +NOT_EQ_2 : '!='; +AT : '@'; +ARROW : '->'; +ADD_ASSIGN : '+='; +SUB_ASSIGN : '-='; +MULT_ASSIGN : '*='; +AT_ASSIGN : '@='; +DIV_ASSIGN : '/='; +MOD_ASSIGN : '%='; +AND_ASSIGN : '&='; +OR_ASSIGN : '|='; +XOR_ASSIGN : '^='; +LEFT_SHIFT_ASSIGN : '<<='; +RIGHT_SHIFT_ASSIGN : '>>='; +POWER_ASSIGN : '**='; +IDIV_ASSIGN : '//='; + +SKIP_ + : ( SPACES | COMMENT | LINE_JOINING ) -> skip + ; + +UNKNOWN_CHAR + : . + ; + +/* + * fragments + */ + +/// shortstring ::= "'" shortstringitem* "'" | '"' shortstringitem* '"' +/// shortstringitem ::= shortstringchar | stringescapeseq +/// shortstringchar ::= +fragment SHORT_STRING + : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] )* '\'' + | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"' + ; +/// longstring ::= "'''" longstringitem* "'''" | '"""' longstringitem* '"""' +fragment LONG_STRING + : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' + | '"""' LONG_STRING_ITEM*? '"""' + ; + +/// longstringitem ::= longstringchar | stringescapeseq +fragment LONG_STRING_ITEM + : LONG_STRING_CHAR + | STRING_ESCAPE_SEQ + ; + +/// longstringchar ::= +fragment LONG_STRING_CHAR + : ~'\\' + ; + +/// stringescapeseq ::= "\" +fragment STRING_ESCAPE_SEQ + : '\\' . + | '\\' NEWLINE + ; + +/// nonzerodigit ::= "1"..."9" +fragment NON_ZERO_DIGIT + : [1-9] + ; + +/// digit ::= "0"..."9" +fragment DIGIT + : [0-9] + ; + +/// octdigit ::= "0"..."7" +fragment OCT_DIGIT + : [0-7] + ; + +/// hexdigit ::= digit | "a"..."f" | "A"..."F" +fragment HEX_DIGIT + : [0-9a-fA-F] + ; + +/// bindigit ::= "0" | "1" +fragment BIN_DIGIT + : [01] + ; + +/// pointfloat ::= [intpart] fraction | intpart "." +fragment POINT_FLOAT + : INT_PART? FRACTION + | INT_PART '.' + ; + +/// exponentfloat ::= (intpart | pointfloat) exponent +fragment EXPONENT_FLOAT + : ( INT_PART | POINT_FLOAT ) EXPONENT + ; + +/// intpart ::= digit+ +fragment INT_PART + : DIGIT+ + ; + +/// fraction ::= "." digit+ +fragment FRACTION + : '.' DIGIT+ + ; + +/// exponent ::= ("e" | "E") ["+" | "-"] digit+ +fragment EXPONENT + : [eE] [+-]? DIGIT+ + ; + +/// shortbytes ::= "'" shortbytesitem* "'" | '"' shortbytesitem* '"' +/// shortbytesitem ::= shortbyteschar | bytesescapeseq +fragment SHORT_BYTES + : '\'' ( SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ )* '\'' + | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ )* '"' + ; + +/// longbytes ::= "'''" longbytesitem* "'''" | '"""' longbytesitem* '"""' +fragment LONG_BYTES + : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' + | '"""' LONG_BYTES_ITEM*? '"""' + ; + +/// longbytesitem ::= longbyteschar | bytesescapeseq +fragment LONG_BYTES_ITEM + : LONG_BYTES_CHAR + | BYTES_ESCAPE_SEQ + ; + +/// shortbyteschar ::= +fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE + : [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0026] + | [\u0028-\u005B] + | [\u005D-\u007F] + ; + +fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE + : [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0021] + | [\u0023-\u005B] + | [\u005D-\u007F] + ; + +/// longbyteschar ::= +fragment LONG_BYTES_CHAR + : [\u0000-\u005B] + | [\u005D-\u007F] + ; + +/// bytesescapeseq ::= "\" +fragment BYTES_ESCAPE_SEQ + : '\\' [\u0000-\u007F] + ; + +fragment SPACES + : [ \t]+ + ; + +fragment COMMENT + : '#' ~[\r\n\f]* + ; + +fragment LINE_JOINING + : '\\' SPACES? ( '\r'? '\n' | '\r' | '\f') + ; + +/// id_start ::= +fragment ID_START + : '_' + | [A-Z] + | [a-z] + | '\u00AA' + | '\u00B5' + | '\u00BA' + | [\u00C0-\u00D6] + | [\u00D8-\u00F6] + | [\u00F8-\u01BA] + | '\u01BB' + | [\u01BC-\u01BF] + | [\u01C0-\u01C3] + | [\u01C4-\u0241] + | [\u0250-\u02AF] + | [\u02B0-\u02C1] + | [\u02C6-\u02D1] + | [\u02E0-\u02E4] + | '\u02EE' + | '\u037A' + | '\u0386' + | [\u0388-\u038A] + | '\u038C' + | [\u038E-\u03A1] + | [\u03A3-\u03CE] + | [\u03D0-\u03F5] + | [\u03F7-\u0481] + | [\u048A-\u04CE] + | [\u04D0-\u04F9] + | [\u0500-\u050F] + | [\u0531-\u0556] + | '\u0559' + | [\u0561-\u0587] + | [\u05D0-\u05EA] + | [\u05F0-\u05F2] + | [\u0621-\u063A] + | '\u0640' + | [\u0641-\u064A] + | [\u066E-\u066F] + | [\u0671-\u06D3] + | '\u06D5' + | [\u06E5-\u06E6] + | [\u06EE-\u06EF] + | [\u06FA-\u06FC] + | '\u06FF' + | '\u0710' + | [\u0712-\u072F] + | [\u074D-\u076D] + | [\u0780-\u07A5] + | '\u07B1' + | [\u0904-\u0939] + | '\u093D' + | '\u0950' + | [\u0958-\u0961] + | '\u097D' + | [\u0985-\u098C] + | [\u098F-\u0990] + | [\u0993-\u09A8] + | [\u09AA-\u09B0] + | '\u09B2' + | [\u09B6-\u09B9] + | '\u09BD' + | '\u09CE' + | [\u09DC-\u09DD] + | [\u09DF-\u09E1] + | [\u09F0-\u09F1] + | [\u0A05-\u0A0A] + | [\u0A0F-\u0A10] + | [\u0A13-\u0A28] + | [\u0A2A-\u0A30] + | [\u0A32-\u0A33] + | [\u0A35-\u0A36] + | [\u0A38-\u0A39] + | [\u0A59-\u0A5C] + | '\u0A5E' + | [\u0A72-\u0A74] + | [\u0A85-\u0A8D] + | [\u0A8F-\u0A91] + | [\u0A93-\u0AA8] + | [\u0AAA-\u0AB0] + | [\u0AB2-\u0AB3] + | [\u0AB5-\u0AB9] + | '\u0ABD' + | '\u0AD0' + | [\u0AE0-\u0AE1] + | [\u0B05-\u0B0C] + | [\u0B0F-\u0B10] + | [\u0B13-\u0B28] + | [\u0B2A-\u0B30] + | [\u0B32-\u0B33] + | [\u0B35-\u0B39] + | '\u0B3D' + | [\u0B5C-\u0B5D] + | [\u0B5F-\u0B61] + | '\u0B71' + | '\u0B83' + | [\u0B85-\u0B8A] + | [\u0B8E-\u0B90] + | [\u0B92-\u0B95] + | [\u0B99-\u0B9A] + | '\u0B9C' + | [\u0B9E-\u0B9F] + | [\u0BA3-\u0BA4] + | [\u0BA8-\u0BAA] + | [\u0BAE-\u0BB9] + | [\u0C05-\u0C0C] + | [\u0C0E-\u0C10] + | [\u0C12-\u0C28] + | [\u0C2A-\u0C33] + | [\u0C35-\u0C39] + | [\u0C60-\u0C61] + | [\u0C85-\u0C8C] + | [\u0C8E-\u0C90] + | [\u0C92-\u0CA8] + | [\u0CAA-\u0CB3] + | [\u0CB5-\u0CB9] + | '\u0CBD' + | '\u0CDE' + | [\u0CE0-\u0CE1] + | [\u0D05-\u0D0C] + | [\u0D0E-\u0D10] + | [\u0D12-\u0D28] + | [\u0D2A-\u0D39] + | [\u0D60-\u0D61] + | [\u0D85-\u0D96] + | [\u0D9A-\u0DB1] + | [\u0DB3-\u0DBB] + | '\u0DBD' + | [\u0DC0-\u0DC6] + | [\u0E01-\u0E30] + | [\u0E32-\u0E33] + | [\u0E40-\u0E45] + | '\u0E46' + | [\u0E81-\u0E82] + | '\u0E84' + | [\u0E87-\u0E88] + | '\u0E8A' + | '\u0E8D' + | [\u0E94-\u0E97] + | [\u0E99-\u0E9F] + | [\u0EA1-\u0EA3] + | '\u0EA5' + | '\u0EA7' + | [\u0EAA-\u0EAB] + | [\u0EAD-\u0EB0] + | [\u0EB2-\u0EB3] + | '\u0EBD' + | [\u0EC0-\u0EC4] + | '\u0EC6' + | [\u0EDC-\u0EDD] + | '\u0F00' + | [\u0F40-\u0F47] + | [\u0F49-\u0F6A] + | [\u0F88-\u0F8B] + | [\u1000-\u1021] + | [\u1023-\u1027] + | [\u1029-\u102A] + | [\u1050-\u1055] + | [\u10A0-\u10C5] + | [\u10D0-\u10FA] + | '\u10FC' + | [\u1100-\u1159] + | [\u115F-\u11A2] + | [\u11A8-\u11F9] + | [\u1200-\u1248] + | [\u124A-\u124D] + | [\u1250-\u1256] + | '\u1258' + | [\u125A-\u125D] + | [\u1260-\u1288] + | [\u128A-\u128D] + | [\u1290-\u12B0] + | [\u12B2-\u12B5] + | [\u12B8-\u12BE] + | '\u12C0' + | [\u12C2-\u12C5] + | [\u12C8-\u12D6] + | [\u12D8-\u1310] + | [\u1312-\u1315] + | [\u1318-\u135A] + | [\u1380-\u138F] + | [\u13A0-\u13F4] + | [\u1401-\u166C] + | [\u166F-\u1676] + | [\u1681-\u169A] + | [\u16A0-\u16EA] + | [\u16EE-\u16F0] + | [\u1700-\u170C] + | [\u170E-\u1711] + | [\u1720-\u1731] + | [\u1740-\u1751] + | [\u1760-\u176C] + | [\u176E-\u1770] + | [\u1780-\u17B3] + | '\u17D7' + | '\u17DC' + | [\u1820-\u1842] + | '\u1843' + | [\u1844-\u1877] + | [\u1880-\u18A8] + | [\u1900-\u191C] + | [\u1950-\u196D] + | [\u1970-\u1974] + | [\u1980-\u19A9] + | [\u19C1-\u19C7] + | [\u1A00-\u1A16] + | [\u1D00-\u1D2B] + | [\u1D2C-\u1D61] + | [\u1D62-\u1D77] + | '\u1D78' + | [\u1D79-\u1D9A] + | [\u1D9B-\u1DBF] + | [\u1E00-\u1E9B] + | [\u1EA0-\u1EF9] + | [\u1F00-\u1F15] + | [\u1F18-\u1F1D] + | [\u1F20-\u1F45] + | [\u1F48-\u1F4D] + | [\u1F50-\u1F57] + | '\u1F59' + | '\u1F5B' + | '\u1F5D' + | [\u1F5F-\u1F7D] + | [\u1F80-\u1FB4] + | [\u1FB6-\u1FBC] + | '\u1FBE' + | [\u1FC2-\u1FC4] + | [\u1FC6-\u1FCC] + | [\u1FD0-\u1FD3] + | [\u1FD6-\u1FDB] + | [\u1FE0-\u1FEC] + | [\u1FF2-\u1FF4] + | [\u1FF6-\u1FFC] + | '\u2071' + | '\u207F' + | [\u2090-\u2094] + | '\u2102' + | '\u2107' + | [\u210A-\u2113] + | '\u2115' + | '\u2118' + | [\u2119-\u211D] + | '\u2124' + | '\u2126' + | '\u2128' + | [\u212A-\u212D] + | '\u212E' + | [\u212F-\u2131] + | [\u2133-\u2134] + | [\u2135-\u2138] + | '\u2139' + | [\u213C-\u213F] + | [\u2145-\u2149] + | [\u2160-\u2183] + | [\u2C00-\u2C2E] + | [\u2C30-\u2C5E] + | [\u2C80-\u2CE4] + | [\u2D00-\u2D25] + | [\u2D30-\u2D65] + | '\u2D6F' + | [\u2D80-\u2D96] + | [\u2DA0-\u2DA6] + | [\u2DA8-\u2DAE] + | [\u2DB0-\u2DB6] + | [\u2DB8-\u2DBE] + | [\u2DC0-\u2DC6] + | [\u2DC8-\u2DCE] + | [\u2DD0-\u2DD6] + | [\u2DD8-\u2DDE] + | '\u3005' + | '\u3006' + | '\u3007' + | [\u3021-\u3029] + | [\u3031-\u3035] + | [\u3038-\u303A] + | '\u303B' + | '\u303C' + | [\u3041-\u3096] + | [\u309B-\u309C] + | [\u309D-\u309E] + | '\u309F' + | [\u30A1-\u30FA] + | [\u30FC-\u30FE] + | '\u30FF' + | [\u3105-\u312C] + | [\u3131-\u318E] + | [\u31A0-\u31B7] + | [\u31F0-\u31FF] + | [\u3400-\u4DB5] + | [\u4E00-\u9FBB] + | [\uA000-\uA014] + | '\uA015' + | [\uA016-\uA48C] + | [\uA800-\uA801] + | [\uA803-\uA805] + | [\uA807-\uA80A] + | [\uA80C-\uA822] + | [\uAC00-\uD7A3] + | [\uF900-\uFA2D] + | [\uFA30-\uFA6A] + | [\uFA70-\uFAD9] + | [\uFB00-\uFB06] + | [\uFB13-\uFB17] + | '\uFB1D' + | [\uFB1F-\uFB28] + | [\uFB2A-\uFB36] + | [\uFB38-\uFB3C] + | '\uFB3E' + | [\uFB40-\uFB41] + | [\uFB43-\uFB44] + | [\uFB46-\uFBB1] + | [\uFBD3-\uFD3D] + | [\uFD50-\uFD8F] + | [\uFD92-\uFDC7] + | [\uFDF0-\uFDFB] + | [\uFE70-\uFE74] + | [\uFE76-\uFEFC] + | [\uFF21-\uFF3A] + | [\uFF41-\uFF5A] + | [\uFF66-\uFF6F] + | '\uFF70' + | [\uFF71-\uFF9D] + | [\uFF9E-\uFF9F] + | [\uFFA0-\uFFBE] + | [\uFFC2-\uFFC7] + | [\uFFCA-\uFFCF] + | [\uFFD2-\uFFD7] + | [\uFFDA-\uFFDC] + ; + +/// id_continue ::= +fragment ID_CONTINUE + : ID_START + | [0-9] + | [\u0300-\u036F] + | [\u0483-\u0486] + | [\u0591-\u05B9] + | [\u05BB-\u05BD] + | '\u05BF' + | [\u05C1-\u05C2] + | [\u05C4-\u05C5] + | '\u05C7' + | [\u0610-\u0615] + | [\u064B-\u065E] + | [\u0660-\u0669] + | '\u0670' + | [\u06D6-\u06DC] + | [\u06DF-\u06E4] + | [\u06E7-\u06E8] + | [\u06EA-\u06ED] + | [\u06F0-\u06F9] + | '\u0711' + | [\u0730-\u074A] + | [\u07A6-\u07B0] + | [\u0901-\u0902] + | '\u0903' + | '\u093C' + | [\u093E-\u0940] + | [\u0941-\u0948] + | [\u0949-\u094C] + | '\u094D' + | [\u0951-\u0954] + | [\u0962-\u0963] + | [\u0966-\u096F] + | '\u0981' + | [\u0982-\u0983] + | '\u09BC' + | [\u09BE-\u09C0] + | [\u09C1-\u09C4] + | [\u09C7-\u09C8] + | [\u09CB-\u09CC] + | '\u09CD' + | '\u09D7' + | [\u09E2-\u09E3] + | [\u09E6-\u09EF] + | [\u0A01-\u0A02] + | '\u0A03' + | '\u0A3C' + | [\u0A3E-\u0A40] + | [\u0A41-\u0A42] + | [\u0A47-\u0A48] + | [\u0A4B-\u0A4D] + | [\u0A66-\u0A6F] + | [\u0A70-\u0A71] + | [\u0A81-\u0A82] + | '\u0A83' + | '\u0ABC' + | [\u0ABE-\u0AC0] + | [\u0AC1-\u0AC5] + | [\u0AC7-\u0AC8] + | '\u0AC9' + | [\u0ACB-\u0ACC] + | '\u0ACD' + | [\u0AE2-\u0AE3] + | [\u0AE6-\u0AEF] + | '\u0B01' + | [\u0B02-\u0B03] + | '\u0B3C' + | '\u0B3E' + | '\u0B3F' + | '\u0B40' + | [\u0B41-\u0B43] + | [\u0B47-\u0B48] + | [\u0B4B-\u0B4C] + | '\u0B4D' + | '\u0B56' + | '\u0B57' + | [\u0B66-\u0B6F] + | '\u0B82' + | [\u0BBE-\u0BBF] + | '\u0BC0' + | [\u0BC1-\u0BC2] + | [\u0BC6-\u0BC8] + | [\u0BCA-\u0BCC] + | '\u0BCD' + | '\u0BD7' + | [\u0BE6-\u0BEF] + | [\u0C01-\u0C03] + | [\u0C3E-\u0C40] + | [\u0C41-\u0C44] + | [\u0C46-\u0C48] + | [\u0C4A-\u0C4D] + | [\u0C55-\u0C56] + | [\u0C66-\u0C6F] + | [\u0C82-\u0C83] + | '\u0CBC' + | '\u0CBE' + | '\u0CBF' + | [\u0CC0-\u0CC4] + | '\u0CC6' + | [\u0CC7-\u0CC8] + | [\u0CCA-\u0CCB] + | [\u0CCC-\u0CCD] + | [\u0CD5-\u0CD6] + | [\u0CE6-\u0CEF] + | [\u0D02-\u0D03] + | [\u0D3E-\u0D40] + | [\u0D41-\u0D43] + | [\u0D46-\u0D48] + | [\u0D4A-\u0D4C] + | '\u0D4D' + | '\u0D57' + | [\u0D66-\u0D6F] + | [\u0D82-\u0D83] + | '\u0DCA' + | [\u0DCF-\u0DD1] + | [\u0DD2-\u0DD4] + | '\u0DD6' + | [\u0DD8-\u0DDF] + | [\u0DF2-\u0DF3] + | '\u0E31' + | [\u0E34-\u0E3A] + | [\u0E47-\u0E4E] + | [\u0E50-\u0E59] + | '\u0EB1' + | [\u0EB4-\u0EB9] + | [\u0EBB-\u0EBC] + | [\u0EC8-\u0ECD] + | [\u0ED0-\u0ED9] + | [\u0F18-\u0F19] + | [\u0F20-\u0F29] + | '\u0F35' + | '\u0F37' + | '\u0F39' + | [\u0F3E-\u0F3F] + | [\u0F71-\u0F7E] + | '\u0F7F' + | [\u0F80-\u0F84] + | [\u0F86-\u0F87] + | [\u0F90-\u0F97] + | [\u0F99-\u0FBC] + | '\u0FC6' + | '\u102C' + | [\u102D-\u1030] + | '\u1031' + | '\u1032' + | [\u1036-\u1037] + | '\u1038' + | '\u1039' + | [\u1040-\u1049] + | [\u1056-\u1057] + | [\u1058-\u1059] + | '\u135F' + | [\u1369-\u1371] + | [\u1712-\u1714] + | [\u1732-\u1734] + | [\u1752-\u1753] + | [\u1772-\u1773] + | '\u17B6' + | [\u17B7-\u17BD] + | [\u17BE-\u17C5] + | '\u17C6' + | [\u17C7-\u17C8] + | [\u17C9-\u17D3] + | '\u17DD' + | [\u17E0-\u17E9] + | [\u180B-\u180D] + | [\u1810-\u1819] + | '\u18A9' + | [\u1920-\u1922] + | [\u1923-\u1926] + | [\u1927-\u1928] + | [\u1929-\u192B] + | [\u1930-\u1931] + | '\u1932' + | [\u1933-\u1938] + | [\u1939-\u193B] + | [\u1946-\u194F] + | [\u19B0-\u19C0] + | [\u19C8-\u19C9] + | [\u19D0-\u19D9] + | [\u1A17-\u1A18] + | [\u1A19-\u1A1B] + | [\u1DC0-\u1DC3] + | [\u203F-\u2040] + | '\u2054' + | [\u20D0-\u20DC] + | '\u20E1' + | [\u20E5-\u20EB] + | [\u302A-\u302F] + | [\u3099-\u309A] + | '\uA802' + | '\uA806' + | '\uA80B' + | [\uA823-\uA824] + | [\uA825-\uA826] + | '\uA827' + | '\uFB1E' + | [\uFE00-\uFE0F] + | [\uFE20-\uFE23] + | [\uFE33-\uFE34] + | [\uFE4D-\uFE4F] + | [\uFF10-\uFF19] + | '\uFF3F' + ; \ No newline at end of file diff --git a/RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/DemoRootPane.java b/RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/DemoRootPane.java index d22f76084..582024795 100755 --- a/RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/DemoRootPane.java +++ b/RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/DemoRootPane.java @@ -27,6 +27,7 @@ import org.fife.ui.rsyntaxtextarea.demo.antlr.JSONTokenMaker; import org.fife.ui.rsyntaxtextarea.demo.antlr.Java9TokenMaker; import org.fife.ui.rsyntaxtextarea.demo.antlr.MySqlTokenMaker; +import org.fife.ui.rsyntaxtextarea.demo.antlr.Python3TokenMaker; import org.fife.ui.rtextarea.Gutter; import org.fife.ui.rtextarea.RTextScrollPane; @@ -52,6 +53,7 @@ public class DemoRootPane extends JRootPane implements HyperlinkListener, atmf.putMapping("antlr/java9", Java9TokenMaker.class.getName(), Java9TokenMaker.class.getClassLoader()); atmf.putMapping("antlr/json", JSONTokenMaker.class.getName(), JSONTokenMaker.class.getClassLoader()); atmf.putMapping("antlr/mysql", MySqlTokenMaker.class.getName(), MySqlTokenMaker.class.getClassLoader()); + atmf.putMapping("antlr/python", Python3TokenMaker.class.getName(), Python3TokenMaker.class.getClassLoader()); textArea = createTextArea(); @@ -121,6 +123,7 @@ private JMenuBar createMenuBar() { addSyntaxItem("Perl", "PerlExample.txt", SYNTAX_STYLE_PERL, bg, menu); addSyntaxItem("PHP", "PhpExample.txt", SYNTAX_STYLE_PHP, bg, menu); addSyntaxItem("Python", "PythonExample.txt", SYNTAX_STYLE_PYTHON, bg, menu); + addSyntaxItem("Python (ANTLR)", "PythonExample.txt", "antlr/python", bg, menu); addSyntaxItem("Ruby", "RubyExample.txt", SYNTAX_STYLE_RUBY, bg, menu); addSyntaxItem("SQL", "SQLExample.txt", SYNTAX_STYLE_SQL, bg, menu); addSyntaxItem("SQL (ANTLR)", "SQLExample.txt", "antlr/mysql", bg, menu); diff --git a/RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/antlr/Python3TokenMaker.java b/RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/antlr/Python3TokenMaker.java new file mode 100644 index 000000000..e579256ca --- /dev/null +++ b/RSyntaxTextAreaDemo/src/main/java/org/fife/ui/rsyntaxtextarea/demo/antlr/Python3TokenMaker.java @@ -0,0 +1,92 @@ +package org.fife.ui.rsyntaxtextarea.demo.antlr; + +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.Lexer; +import org.fife.ui.rsyntaxtextarea.Token; +import org.fife.ui.rsyntaxtextarea.modes.antlr.AntlrTokenMaker; + +public class Python3TokenMaker extends AntlrTokenMaker { + @Override + protected Lexer createLexer(String text) { + return new Python3Lexer(CharStreams.fromString(text)) { + @Override + public void skip() { + setChannel(HIDDEN); + } + + @Override + public void emit(org.antlr.v4.runtime.Token t) { + // the Python3 grammar inserts some extra NEWLINE tokens, we need to filter out again + if (t.getType() != Python3Lexer.NEWLINE) { + super.emit(t); + } + } + }; + } + + @Override + protected int convertType(int type) { + switch (type) { + case Python3Lexer.SKIP_: + return Token.COMMENT_EOL; + case Python3Lexer.TRUE: + case Python3Lexer.FALSE: + return Token.LITERAL_BOOLEAN; + + case Python3Lexer.DEF: + case Python3Lexer.RETURN: + case Python3Lexer.RAISE: + case Python3Lexer.FROM: + case Python3Lexer.IMPORT: + case Python3Lexer.AS: + case Python3Lexer.GLOBAL: + case Python3Lexer.NONLOCAL: + case Python3Lexer.ASSERT: + case Python3Lexer.IF: + case Python3Lexer.ELIF: + case Python3Lexer.ELSE: + case Python3Lexer.WHILE: + case Python3Lexer.FOR: + case Python3Lexer.IN: + case Python3Lexer.TRY: + case Python3Lexer.FINALLY: + case Python3Lexer.WITH: + case Python3Lexer.EXCEPT: + case Python3Lexer.LAMBDA: + case Python3Lexer.OR: + case Python3Lexer.AND: + case Python3Lexer.NOT: + case Python3Lexer.IS: + case Python3Lexer.NONE: + case Python3Lexer.CLASS: + case Python3Lexer.YIELD: + case Python3Lexer.DEL: + case Python3Lexer.PASS: + case Python3Lexer.CONTINUE: + case Python3Lexer.BREAK: + case Python3Lexer.ASYNC: + case Python3Lexer.AWAIT: + return Token.RESERVED_WORD; + case Python3Lexer.STRING_LITERAL: + return Token.LITERAL_STRING_DOUBLE_QUOTE; + case Python3Lexer.NUMBER: + return Token.LITERAL_NUMBER_DECIMAL_INT; + case Python3Lexer.FLOAT_NUMBER: + return Token.LITERAL_NUMBER_FLOAT; + case Python3Lexer.OPEN_BRACE: + case Python3Lexer.CLOSE_BRACE: + case Python3Lexer.OPEN_BRACK: + case Python3Lexer.CLOSE_BRACK: + case Python3Lexer.OPEN_PAREN: + case Python3Lexer.CLOSE_PAREN: + return Token.SEPARATOR; + + } + return Token.IDENTIFIER; + } + + @Override + public String[] getLineCommentStartAndEnd(int languageIndex) { + return new String[]{"#", null}; + } +}