Skip to content

Commit

Permalink
Reimplement humanizer using line-index crate
Browse files Browse the repository at this point in the history
  • Loading branch information
hurryabit committed Dec 23, 2023
1 parent 430d5f3 commit 95008b6
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 53 deletions.
23 changes: 23 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ tower-lsp = "0.20"
tokio = { version = "1.35", features = ["full"] }
simple_logger = { version = "4.3", features = ["stderr"] }
time = "0.3"
line-index = "0.1"

[dev-dependencies]
criterion = "0.5"
Expand Down
79 changes: 31 additions & 48 deletions compiler/src/parser/humanizer.rs
Original file line number Diff line number Diff line change
@@ -1,70 +1,53 @@
use crate::location::SourceLocation;
use line_index::{LineIndex, TextSize, WideEncoding};

#[derive(Debug, Eq, PartialEq)]
pub struct Humanizer {
line_starts: Vec<usize>,
}
pub struct Humanizer(LineIndex);

impl Humanizer {
pub fn new(input: &str) -> Self {
let mut line_starts = Vec::new();
let mut index = 0;
line_starts.push(index);
for line in input.lines() {
// FIXME(MH): This assumes a newline character is just one byte,
// which is not true on Windows.
index += line.len() + 1;
line_starts.push(index);
}
Self { line_starts }
Self(LineIndex::new(input))
}

pub fn run(&self, loc: usize) -> SourceLocation {
let line = self.line_starts.binary_search(&loc).unwrap_or_else(|x| x - 1);
SourceLocation { line: line as u32, column: (loc - self.line_starts[line]) as u32 }
u32::try_from(loc)
.ok()
.and_then(|loc| self.0.try_line_col(TextSize::new(loc)))
.and_then(|utf8_pos| self.0.to_wide(WideEncoding::Utf32, utf8_pos))
.map(|utf16_pos| SourceLocation { line: utf16_pos.line, column: utf16_pos.col })
.unwrap_or(SourceLocation { line: 0, column: 0 })
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_line_starts() {
let cases = vec![
("", vec![0]),
("a", vec![0, 2]),
("a\n", vec![0, 2]),
("aa", vec![0, 3]),
("a\nb", vec![0, 2, 4]),
("a\nb\n", vec![0, 2, 4]),
("ab\ncd\n", vec![0, 3, 6]),
("\na", vec![0, 1, 3]),
];
for (input, expected_line_starts) in cases {
let humanizer = Humanizer::new(input);
let expected_line_starts: Vec<_> = expected_line_starts.into_iter().collect();
assert_eq!(humanizer.line_starts, expected_line_starts);
}
}

#[test]
fn test_translation() {
let humanizer = Humanizer::new("ab\nc\nde\n\nf");
let humanizer = Humanizer::new("ab\nc\nde\n\nf\r\ng\näß");
let cases = vec![
(0, 0, 0),
(1, 0, 1),
(2, 0, 2),
(3, 1, 0),
(4, 1, 1),
(5, 2, 0),
(6, 2, 1),
(7, 2, 2),
(8, 3, 0),
(9, 4, 0),
(10, 4, 1),
(11, 5, 0),
(100, 5, 89),
(0, 0, 0), // ^|a
(1, 0, 1), // a|b
(2, 0, 2), // b|\n
(3, 1, 0), // \n|c
(4, 1, 1), // c|\n
(5, 2, 0), // \n|d
(6, 2, 1), // d|e
(7, 2, 2), // e|\n
(8, 3, 0), // \n|\n
(9, 4, 0), // \n|f
(10, 4, 1), // f|\r
(11, 4, 2), // \r|\n
(12, 5, 0), // \n|g
(13, 5, 1), // g|\n
(14, 6, 0), // \n|ä
(15, 0, 0), // in ä
(16, 6, 1), // ä|ß
(17, 0, 0), // in ß
(18, 6, 2), // ß|$
(19, 0, 0), // $|
(100, 0, 0),
];
for (loc, line, column) in cases {
assert_eq!(humanizer.run(loc), SourceLocation { line, column });
Expand Down
10 changes: 5 additions & 5 deletions compiler/src/tests/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,14 @@ fn location_eol_lf() {
"###);
}

// TODO(MH): This should have the same output as `location_eol_lf`.
#[test]
fn location_eol_crlf() {
insta::assert_snapshot!(parse_error("\r\nx"), @r###"
---
--------------------------------------------------
2:2-3:1: Unrecognized token `x` found at 2:2:3:1
2 | x
~
Unrecognized token `x` found at 2:1:2:2
Expected one of "fn" or "type"
"###);
}
Expand All @@ -119,15 +120,14 @@ fn location_comment_ascii() {
"###);
}

// TODO(MH): This should have the same output as `location_comment_ascii`.
#[test]
fn location_comment_unlauts() {
insta::assert_snapshot!(parse_error("/* äëïöü */ x"), @r###"
---
--------------------------------------------------
1 | /* äëïöü */ x
~
Unrecognized token `x` found at 1:18:1:19
~
Unrecognized token `x` found at 1:13:1:14
Expected one of "fn" or "type"
"###);
}

0 comments on commit 95008b6

Please sign in to comment.