Skip to content

Commit aaac504

Browse files
committed
coverage-dump: Include filenames hash in covfun line data
1 parent 33de064 commit aaac504

File tree

4 files changed

+102
-26
lines changed

4 files changed

+102
-26
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,7 @@ name = "coverage-dump"
779779
version = "0.1.0"
780780
dependencies = [
781781
"anyhow",
782+
"itertools",
782783
"leb128",
783784
"md-5",
784785
"miniz_oxide 0.7.4",

src/tools/coverage-dump/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77

88
[dependencies]
99
anyhow = "1.0.71"
10+
itertools = "0.12"
1011
leb128 = "0.2.5"
1112
md5 = { package = "md-5" , version = "0.10.5" }
1213
miniz_oxide = "0.7.1"

src/tools/coverage-dump/src/covfun.rs

+47-26
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
use std::collections::HashMap;
22
use std::fmt::{self, Debug, Write as _};
3-
use std::sync::OnceLock;
3+
use std::sync::LazyLock;
44

5-
use anyhow::{Context, anyhow};
5+
use anyhow::{Context, anyhow, ensure};
6+
use itertools::Itertools;
67
use regex::Regex;
78

89
use crate::llvm_junk::unescape_llvm_string_contents;
910
use crate::parser::Parser;
1011

12+
#[cfg(test)]
13+
mod tests;
14+
1115
pub(crate) fn dump_covfun_mappings(
1216
llvm_ir: &str,
1317
function_names: &HashMap<u64, String>,
@@ -16,9 +20,12 @@ pub(crate) fn dump_covfun_mappings(
1620
// each entry with its (demangled) name.
1721
let mut covfun_entries = llvm_ir
1822
.lines()
19-
.filter_map(covfun_line_data)
20-
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
21-
.collect::<Vec<_>>();
23+
.filter(|line| is_covfun_line(line))
24+
.map(parse_covfun_line)
25+
.map_ok(|line_data| {
26+
(function_names.get(&line_data.name_hash).map(String::as_str), line_data)
27+
})
28+
.collect::<Result<Vec<_>, _>>()?;
2229
covfun_entries.sort_by(|a, b| {
2330
// Sort entries primarily by name, to help make the order consistent
2431
// across platforms and relatively insensitive to changes.
@@ -108,36 +115,50 @@ pub(crate) fn dump_covfun_mappings(
108115
Ok(())
109116
}
110117

118+
#[derive(Debug, PartialEq, Eq)]
111119
struct CovfunLineData {
112-
name_hash: u64,
113120
is_used: bool,
121+
name_hash: u64,
122+
filenames_hash: u64,
114123
payload: Vec<u8>,
115124
}
116125

117-
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
118-
/// entry, and if so extracts relevant data in a `CovfunLineData`.
119-
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
120-
let re = {
121-
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
122-
// rather than the section name, because the section name is harder to
123-
// extract and differs across Linux/Windows/macOS. We also extract the
124-
// symbol name hash from the variable name rather than the data, since
125-
// it's easier and both should match.
126-
static RE: OnceLock<Regex> = OnceLock::new();
127-
RE.get_or_init(|| {
128-
Regex::new(
129-
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
130-
)
131-
.unwrap()
132-
})
133-
};
126+
fn is_covfun_line(line: &str) -> bool {
127+
line.starts_with("@__covrec_")
128+
}
134129

135-
let captures = re.captures(line)?;
136-
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
130+
/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
131+
/// entry, parses it to extract relevant data in a `CovfunLineData`.
132+
fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> {
133+
ensure!(is_covfun_line(line));
134+
135+
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
136+
// rather than the section name, because the section name is harder to
137+
// extract and differs across Linux/Windows/macOS.
138+
const RE_STRING: &str = r#"(?x)^
139+
@__covrec_[0-9A-Z]+(?<is_used>u)?
140+
\ = \ # (trailing space)
141+
.*
142+
<\{
143+
\ i64 \ (?<name_hash> -? [0-9]+),
144+
\ i32 \ -? [0-9]+, # (length of payload; currently unused)
145+
\ i64 \ -? [0-9]+, # (source hash; currently unused)
146+
\ i64 \ (?<filenames_hash> -? [0-9]+),
147+
\ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
148+
\ # (trailing space)
149+
}>
150+
.*$
151+
"#;
152+
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
153+
154+
let captures =
155+
RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?;
137156
let is_used = captures.name("is_used").is_some();
157+
let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64;
158+
let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64;
138159
let payload = unescape_llvm_string_contents(&captures["payload"]);
139160

140-
Some(CovfunLineData { name_hash, is_used, payload })
161+
Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload })
141162
}
142163

143164
// Extra parser methods only needed when parsing `covfun` payloads.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
use super::{CovfunLineData, parse_covfun_line};
2+
3+
/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends
4+
/// to emit them in signed form, so this helper function converts `i64` to `u64`.
5+
fn as_u64(x: i64) -> u64 {
6+
x as u64
7+
}
8+
9+
#[test]
10+
fn parse_covfun_line_data() {
11+
struct Case {
12+
line: &'static str,
13+
expected: CovfunLineData,
14+
}
15+
let cases = &[
16+
// Copied from `trivial.ll`:
17+
Case {
18+
line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
19+
expected: CovfunLineData {
20+
is_used: true,
21+
name_hash: as_u64(5307978893922758273),
22+
filenames_hash: as_u64(6343436898695299756),
23+
payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(),
24+
},
25+
},
26+
// Copied from `on-off-sandwich.ll`:
27+
Case {
28+
line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
29+
expected: CovfunLineData {
30+
is_used: true,
31+
name_hash: as_u64(-3400688559180533350),
32+
filenames_hash: as_u64(892196767019953100),
33+
payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(),
34+
},
35+
},
36+
// Copied from `no-core.ll`:
37+
Case {
38+
line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
39+
expected: CovfunLineData {
40+
is_used: true,
41+
name_hash: as_u64(1116917981370409222),
42+
filenames_hash: as_u64(-3625186110715410276),
43+
payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(),
44+
},
45+
},
46+
];
47+
48+
for &Case { line, ref expected } in cases {
49+
println!("- {line}");
50+
let line_data = parse_covfun_line(line).map_err(|e| e.to_string());
51+
assert_eq!(line_data.as_ref(), Ok(expected));
52+
}
53+
}

0 commit comments

Comments
 (0)