Skip to content

Commit

Permalink
Redo computation of keywords
Browse files Browse the repository at this point in the history
Our approach relied too much on pecularities of the grammar file which
made this fragile. This patch rewrites keyword extraction to instead
inspect the generated parser which should be more stable against subtle
formatting changes in the grammar (but might still fail if tree-sitter
changes their parser generator).
  • Loading branch information
bbannier committed Nov 16, 2024
1 parent 6e7cc01 commit 6851539
Show file tree
Hide file tree
Showing 5 changed files with 914 additions and 32 deletions.
2 changes: 1 addition & 1 deletion crates/tree-sitter-zeek/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ tree-sitter = "0.24.4"

[build-dependencies]
cc = "1.1.37"
regex = "1.11.1"
regex = { version = "1.11.1", default-features = false }
tree-sitter = "0.24.4"
tree-sitter-generate = "0.24.4"
20 changes: 10 additions & 10 deletions crates/tree-sitter-zeek/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@ use std::{
path::{Path, PathBuf},
};

fn generate_keywords(grammar: &Path) {
let file = File::open(grammar).unwrap();
fn generate_keywords(parser_c: &Path) {
let file = File::open(parser_c).unwrap();
let mut buf_reader = BufReader::new(file);
let mut contents = String::new();
buf_reader.read_to_string(&mut contents).unwrap();

let mut set: HashSet<String> = HashSet::new();
let re = Regex::new(r"'(@?&?[a-z]+-?_?[a-z]+)'").unwrap();
for cap in re.captures_iter(&contents) {
if cap[1].eq("zeek") || cap[1].eq("extras") {
continue;
}
set.insert(cap[1].to_string());
let re = Regex::new(r#"\[anon_sym_.*\] = "(.*)""#).unwrap();

for cs in re.captures_iter(&contents) {
set.insert(cs[1].replace("\\?", "?"));
}

let out_dir = env::var_os("OUT_DIR").unwrap();
Expand Down Expand Up @@ -62,11 +60,13 @@ fn main() {
// Compile tree-sitter C output.
let src_dir = out_dir.join("src");

let parser_c = src_dir.join("parser.c");

Build::new()
.file(src_dir.join("parser.c"))
.file(&parser_c)
.include(out_dir.join("src"))
.warnings(false)
.compile("tree-sitter-zeek");

generate_keywords(&grammar);
generate_keywords(&parser_c);
}
2 changes: 1 addition & 1 deletion crates/tree-sitter-zeek/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ pub const HIGHLIGHT_QUERY: &str = include_str!("../vendor/tree-sitter-zeek/queri
/// The content of the [`node-types.json`][] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const ZEEK_NODE_TYPES: &str = include_str!("../vendor/tree-sitter-zeek/src/node-types.json");
pub const ZEEK_NODE_TYPES: &str = include_str!(concat!(env!("OUT_DIR"), "/src/node-types.json"));
23 changes: 23 additions & 0 deletions src/snapshots/zeek_language_server__complete__test__keyword.snap
Original file line number Diff line number Diff line change
@@ -1,8 +1,31 @@
---
source: src/complete.rs
expression: result
snapshot_kind: text
---
[
CompletionItem {
label: "F",
label_details: None,
kind: Some(
Keyword,
),
detail: None,
documentation: None,
deprecated: None,
preselect: None,
sort_text: None,
filter_text: None,
insert_text: None,
insert_text_format: None,
insert_text_mode: None,
text_edit: None,
additional_text_edits: None,
command: None,
commit_characters: None,
data: None,
tags: None,
},
CompletionItem {
label: "fallthrough",
label_details: None,
Expand Down
Loading

0 comments on commit 6851539

Please sign in to comment.