diff --git a/corpus/test b/corpus/test new file mode 100644 index 0000000..18dfda9 --- /dev/null +++ b/corpus/test @@ -0,0 +1,243 @@ +================================================================================ +Declarations +================================================================================ + +module foo; + +export { + global i = 42; +} + +global j = 42; +option option_ = T; +const const_ = F; + +type X: record { }; +redef record X += { + x: count &optional; +}; + +type E: enum { eA, eB }; +redef enum E += { eC = 3 }; + +function foo() + { + local x = 1; + } + +-------------------------------------------------------------------------------- + +(source_file + (module_decl + (id)) + (export_decl + (var_decl + (id) + (initializer + (init_kind) + (expr + (constant + (integer)))))) + (var_decl + (id) + (initializer + (init_kind) + (expr + (constant + (integer))))) + (option_decl + (id) + (initializer + (init_kind) + (expr + (constant + (boolean))))) + (const_decl + (id) + (initializer + (init_kind) + (expr + (constant + (boolean))))) + (type_decl + (id) + (type)) + (redef_record_decl + (id) + (type_spec + (id) + (type) + (attr_list + (attr)))) + (type_decl + (id) + (type + (enumerator + (id)) + (enumerator + (id)))) + (redef_enum_decl + (id) + (enumerator + (id) + (constant + (integer)))) + (func_decl + (id) + (func_params) + (stmt_list + (var_decl + (id) + (initializer + (init_kind) + (expr + (constant + (integer)))))))) + +================================================================================ +Record fields +================================================================================ + +global x = r$a; +global y = r?$a; + +-------------------------------------------------------------------------------- + +(source_file + (var_decl + (id) + (initializer + (init_kind) + (expr + (field_access + (expr + (id)) + (id))))) + (var_decl + (id) + (initializer + (init_kind) + (expr + (field_check + (expr + (id)) + (id)))))) + +================================================================================ +Function-likes +================================================================================ + +function foo(x: count) + { + x; + } +hook foo(x: count) + { + x; + } +redef event foo(x: count) + { + x; + } + +-------------------------------------------------------------------------------- + +(source_file + (func_decl + (id) + (func_params + (formal_args + (formal_arg + (id) + (type)))) + (stmt_list + (expr + (id)))) + (hook_decl + (id) + (func_params + (formal_args + (formal_arg + (id) + (type)))) + (stmt_list + (expr + (id)))) + (event_decl + (id) + (func_params + (formal_args + (formal_arg + (id) + (type)))) + (stmt_list + (expr + (id))))) + +================================================================================ +Simple statements +================================================================================ + +function foo(xs: vector of int): int + { + for ( _ in xs ) + { } + + for ( [_] in xs ) + { } + + while ( T ) + { } + + switch ( 1 ) + { + case 1: + fallthrough; + default: + break; + } + + return 0; + } + +-------------------------------------------------------------------------------- + +(source_file + (func_decl + (id) + (func_params + (formal_args + (formal_arg + (id) + (type + (type)))) + (type)) + (stmt_list + (for + (id) + (expr + (id))) + (for + (id) + (expr + (id))) + (while + (expr + (constant + (boolean)))) + (switch + (expr + (expr + (constant + (integer)))) + (case_list + (expr_list + (expr + (constant + (integer)))) + (stmt_list) + (stmt_list))) + (return + (expr + (constant + (integer))))))) diff --git a/grammar.js b/grammar.js index fd07821..8caf817 100644 --- a/grammar.js +++ b/grammar.js @@ -22,14 +22,14 @@ module.exports = grammar({ rules: { source_file: $ => seq( - repeat($.decl), - repeat($.stmt), + repeat($._decl), + repeat($._stmt), ), - decl: $ => choice( + _decl: $ => choice( $.module_decl, $.export_decl, - $.global_decl, + $.var_decl, $.option_decl, $.const_decl, $.redef_decl, @@ -37,45 +37,51 @@ module.exports = grammar({ $.redef_record_decl, $.type_decl, $.func_decl, + $.hook_decl, + $.event_decl, $.preproc_directive, ), module_decl: $ => seq('module', $.id, ';'), - export_decl: $ => seq('export', '{', repeat($.decl), '}'), + export_decl: $ => seq('export', '{', repeat($._decl), '}'), // A change here over Zeek's parser: we make the combo of init class // and initializer jointly optional, instead of individually. Helps // avoid ambiguity. - global_decl: $ => seq('global', $.id, optional(seq(':', $.type)), optional($.initializer), optional($.attr_list), ';'), + var_decl: $ => seq( + field('scope', choice('global', 'local')), + $.id, + optional(seq(':', $.type)), + optional($.initializer), + optional($.attr_list), ';'), option_decl: $ => seq('option', $.id, optional(seq(':', $.type)), optional($.initializer), optional($.attr_list), ';'), const_decl: $ => seq('const', $.id, optional(seq(':', $.type)), optional($.initializer), optional($.attr_list), ';'), redef_decl: $ => seq('redef', $.id, optional(seq(':', $.type)), optional($.initializer), optional($.attr_list), ';'), - redef_enum_decl: $ => seq('redef', 'enum', $.id, '+=', '{', $.enum_body, '}', ';'), + redef_enum_decl: $ => seq('redef', 'enum', $.id, '+=', '{', $._enum_body, '}', ';'), redef_record_decl: $ => seq('redef', 'record', $.id, '+=', '{', repeat($.type_spec), '}', optional($.attr_list), ';'), type_decl: $ => seq('type', $.id, ':', $.type, optional($.attr_list), ';'), - func_decl: $ => seq($.func_hdr, repeat($.preproc_directive), $.func_body), - stmt: $ => choice( + _stmt: $ => choice( // TODO: @no-test support seq('{', optional($.stmt_list), '}'), seq('print', $.expr_list, ';'), seq('event', $.event_hdr, ';'), - prec_r(seq('if', '(', $.expr, ')', $.stmt, optional(seq('else', $.stmt)))), - seq('switch', $.expr, '{', optional($.case_list), '}'), - seq('for', '(', $.id, optional(seq(',', $.id)), 'in', $.expr, ')', $.stmt), - seq('for', '(', '[', list1($.id, ','), ']', optional(seq(',', $.id)), 'in', $.expr, ')', $.stmt), - seq('while', '(', $.expr, ')', $.stmt), + $.switch, + $.if, + $.for, + $.while, + $.return, seq(choice('next', 'break', 'fallthrough'), ';'), - seq('return', optional($.expr), ';'), seq(choice('add', 'delete'), $.expr, ';'), - seq('local', $.id, optional(seq(':', $.type)), optional($.initializer), optional($.attr_list), ';'), + // Precedence works around ambiguity with `var_decl` in `_decl` at `source_file` scope. + prec(-1, $.var_decl), // Precedence here works around ambiguity with similar global declaration: prec(-1, seq('const', $.id, optional(seq(':', $.type)), optional($.initializer), optional($.attr_list), ';')), // Associativity here works around theoretical ambiguity if "when" nested: prec_r(seq( optional('return'), - 'when', optional($.capture_list), '(', $.expr, ')', $.stmt, + 'when', optional($.capture_list), '(', $.expr, ')', $._stmt, optional(seq('timeout', $.expr, '{', optional($.stmt_list), '}')), )), seq($.index_slice, '=', $.expr, ';'), @@ -85,7 +91,20 @@ module.exports = grammar({ ';', ), - stmt_list: $ => repeat1($.stmt), + if: $ => prec_r(seq('if', '(', $.expr, ')', $._stmt, optional(seq('else', $._stmt)))), + + for: $ => choice( + seq('for', '(', $.id, optional(seq(',', $.id)), 'in', $.expr, ')', $._stmt), + seq('for', '(', '[', list1($.id, ','), ']', optional(seq(',', $.id)), 'in', $.expr, ')', $._stmt), + ), + + while: $ => seq('while', '(', $.expr, ')', $._stmt), + + switch: $ => seq('switch', $.expr, '{', optional($.case_list), '}'), + + return: $ => seq('return', optional($.expr), ';'), + + stmt_list: $ => repeat1($._stmt), case_list: $ => repeat1( choice( @@ -116,7 +135,7 @@ module.exports = grammar({ 'timer', seq('record', '{', repeat($.type_spec), '}'), seq('union', '{', list1($.type, ','), '}'), - seq('enum', '{', $.enum_body, '}'), + seq('enum', '{', $._enum_body, '}'), 'list', seq('list', 'of', $.type), seq('vector', 'of', $.type), @@ -129,14 +148,14 @@ module.exports = grammar({ $.id, ), - enum_body: $ => list1($.enum_body_elem, ',', true), + _enum_body: $ => list1($.enumerator, ',', true), - enum_body_elem: $ => choice( + enumerator: $ => choice( seq($.id, '=', $.constant, optional($.deprecated)), seq($.id, optional($.deprecated)), ), - deprecated: $ => choice( + deprecated: _ => choice( '&deprecated', seq('&deprecated', '=', 'const'), ), @@ -151,11 +170,11 @@ module.exports = grammar({ type_spec: $ => seq($.id, ':', $.type, optional($.attr_list), ';'), initializer: $ => seq( - optional($.init_class), + optional($.init_kind), $.expr, ), - init_class: $ => prec_r(choice('=', '+=', '-=')), + init_kind: _ => prec_r(choice('=', '+=', '-=')), attr_list: $ => prec_l(repeat1($.attr)), @@ -190,7 +209,7 @@ module.exports = grammar({ expr: $ => choice( prec_l(9, seq($.expr, '[', $.expr_list, ']')), prec_l(9, seq($.expr, $.index_slice)), - prec_l(9, seq($.expr, '$', $.id)), + prec_l(9, choice($.field_access, $.field_check)), prec_r(8, seq('|', $.expr, '|')), prec_r(8, seq('++', $.expr)), @@ -232,7 +251,7 @@ module.exports = grammar({ prec_r(3, seq($.expr, '+=', $.expr)), prec(2, seq('$', $.id, '=', $.expr)), - prec(2, seq('$', $.id, $.begin_lambda, '=', $.func_body)), + prec(2, seq('$', $.id, $.begin_lambda, '=', $._func_body)), prec_l(1, seq('[', optional($.expr_list), ']')), prec_l(1, seq('{', optional($.expr_list), '}')), @@ -249,14 +268,16 @@ module.exports = grammar({ seq('(', $.expr, ')'), seq('copy', '(', $.expr, ')'), prec_r(seq('hook', $.expr)), - seq($.expr, '?$', $.id), seq('schedule', $.expr, '{', $.event_hdr, '}'), - seq('function', $.begin_lambda, $.func_body), + seq('function', $.begin_lambda, $._func_body), // Lower precedence here to favor local-variable statements prec_r(-1, seq('local', $.id, '=', $.expr)), ), + field_access: $ => prec_l(seq($.expr, '$', $.id)), + field_check: $ => prec_l(seq($.expr, '?$', $.id)), + expr_list: $ => list1($.expr, ',', true), constant: $ => choice( @@ -264,8 +285,7 @@ module.exports = grammar({ prec_l(seq($.ipv4, optional(seq('/', /[0-9]+/)))), prec_l(seq($.ipv6, optional(seq('/', /[0-9]+/)))), $.hostname, - 'T', - 'F', + $.boolean, $.hex, $.port, $.interval, @@ -274,14 +294,12 @@ module.exports = grammar({ prec(-10, $.integer), ), - func_hdr: $ => choice($.func, $.hook, $.event), - // Precedences here are to avoid ambiguity with related expressions - func: $ => prec(1, seq('function', $.id, $.func_params, optional($.attr_list))), - hook: $ => prec(1, seq('hook', $.id, $.func_params, optional($.attr_list))), - event: $ => seq(optional('redef'), 'event', $.id, $.func_params, optional($.attr_list)), + func_decl: $ => prec(1, seq('function', $.id, $.func_params, optional($.attr_list), $._func_body)), + hook_decl: $ => prec(1, seq('hook', $.id, $.func_params, optional($.attr_list), $._func_body)), + event_decl: $ => seq(optional('redef'), 'event', $.id, $.func_params, optional($.attr_list), $._func_body), - func_body: $ => seq('{', optional($.stmt_list), '}'), + _func_body: $ => seq('{', optional($.stmt_list), '}'), // Precedence here is to disambiguate other interpretations of the colon // and type, arising in expressions. @@ -313,16 +331,16 @@ module.exports = grammar({ ), // These directives return strings. - string_directive: $ => choice( + string_directive: _ => choice( '@DIR', '@FILENAME', ), event_hdr: $ => seq($.id, '(', optional($.expr_list), ')'), - id: $ => /[A-Za-z_][A-Za-z_0-9]*(::[A-Za-z_][A-Za-z_0-9]*)*/, - file: $ => /[^ \t\r\n]+/, - pattern: $ => /\/((\\\/)?[^\r\n\/]?)*\/i?/, + id: _ => /[A-Za-z_][A-Za-z_0-9]*(::[A-Za-z_][A-Za-z_0-9]*)*/, + file: _ => /[^ \t\r\n]+/, + pattern: _ => /\/((\\\/)?[^\r\n\/]?)*\/i?/, // https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses // @@ -331,51 +349,49 @@ module.exports = grammar({ // technically invalid strings). Might want to move to Zeek's regex, for // consistency. // - ipv6: $ => /(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))/, - ipv4: $ => /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/, + ipv6: _ => /(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))/, + ipv4: _ => /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/, - port: $ => /[0-9]+\/(tcp|udp|icmp|unknown)/, + port: _ => /[0-9]+\/(tcp|udp|icmp|unknown)/, - integer: $ => /[0-9]+/, - floatp: $ => /(([0-9]*\.?[0-9]+)|([0-9]+\.[0-9]*))([eE][-+]?[0-9]+)?/, - hex: $ => /0x[0-9a-fA-F]+/, + integer: _ => /[0-9]+/, + floatp: _ => /(([0-9]*\.?[0-9]+)|([0-9]+\.[0-9]*))([eE][-+]?[0-9]+)?/, + hex: _ => /0x[0-9a-fA-F]+/, + + boolean: _ => choice('T', 'F'), // For some reason I need to call out integers as a choice here // explicitly -- floatp's ability to parse an integer doesn't trigger. interval: $ => seq(choice($.integer, $.floatp), $.time_unit), - time_unit: $ => /(day|hr|min|sec|msec|usec)s?/, + time_unit: _ => /(day|hr|min|sec|msec|usec)s?/, // We require hostnames to have a dot. This is a departure from Zeek, // but one that avoids several annoying confusions with other constants. - hostname: $ => /([A-Za-z0-9][A-Za-z0-9\-]*\.)+[A-Za-z][A-Za-z0-9\-]*/, + hostname: _ => /([A-Za-z0-9][A-Za-z0-9\-]*\.)+[A-Za-z][A-Za-z0-9\-]*/, // Plain string characters or escape sequences, wrapped in double-quotes. string: $ => choice( /"([^\\\r\n\"]|\\([^\r\n]|[0-7]+|x[0-9a-fA-F]+))*"/, $.string_directive, ), - + // Zeekygen comments come in three flavors: a head one at the beginning // of a script (##!), one that refers to the previous node (##<), and // ones that refer to the subsequent one. Note that we skip the final // newline. - zeekygen_head_comment: $ => /##![^\r\n]*/, - zeekygen_prev_comment: $ => /##<[^\r\n]*/, - zeekygen_next_comment: $ => /##[^\r\n]*/, - - minor_comment: $ => /#[^\r\n]*/, + zeekygen_head_comment: _ => /##![^\r\n]*/, + zeekygen_prev_comment: _ => /##<[^\r\n]*/, + zeekygen_next_comment: _ => /##[^\r\n]*/, - // We track newlines explicitly -- this gives us the ability to honor - // existing formatting in select places. - nl: $ => /\r?\n/, + minor_comment: _ => /#[^\r\n]*/, }, 'extras': $ => [ /[ \t]+/, - $.nl, $.zeekygen_head_comment, $.zeekygen_prev_comment, $.zeekygen_next_comment, $.minor_comment, + /\r?\n/, ], }); diff --git a/queries/highlights.scm b/queries/highlights.scm index 29a601b..7483c03 100644 --- a/queries/highlights.scm +++ b/queries/highlights.scm @@ -1,9 +1,9 @@ ;; Language features ;; ----------------- -(event (id) @function) -(hook (id) @function) -(func (id) @function) +(event_decl (id) @function) +(hook_decl (id) @function) +(func_decl (id) @function) (type) @type (attr) @attribute