Skip to content

Commit 33de064

Browse files
committed
coverage-dump: Extract a common parser method for maybe-compressed bytes
1 parent a096c33 commit 33de064

File tree

2 files changed

+41
-23
lines changed

2 files changed

+41
-23
lines changed

src/tools/coverage-dump/src/llvm_junk.rs

+39
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1+
use std::borrow::Cow;
12
use std::sync::OnceLock;
23

4+
use anyhow::{anyhow, ensure};
35
use regex::bytes;
46

7+
use crate::parser::Parser;
8+
59
#[cfg(test)]
610
mod tests;
711

@@ -44,3 +48,38 @@ pub(crate) fn truncated_md5(bytes: &[u8]) -> u64 {
4448
// or target platform. (See `MD5Result::low` in LLVM's `MD5.h`.)
4549
u64::from_le_bytes(hash)
4650
}
51+
52+
impl<'a> Parser<'a> {
53+
/// Reads a sequence of:
54+
/// - Length of uncompressed data in bytes, as ULEB128
55+
/// - Length of compressed data in bytes (or 0), as ULEB128
56+
/// - The indicated number of compressed or uncompressed bytes
57+
///
58+
/// If the number of compressed bytes is 0, the subsequent bytes are
59+
/// uncompressed. Otherwise, the subsequent bytes are compressed, and will
60+
/// be decompressed.
61+
///
62+
/// Returns the uncompressed bytes that were read directly or decompressed.
63+
pub(crate) fn read_chunk_to_uncompressed_bytes(&mut self) -> anyhow::Result<Cow<'a, [u8]>> {
64+
let uncompressed_len = self.read_uleb128_usize()?;
65+
let compressed_len = self.read_uleb128_usize()?;
66+
67+
if compressed_len == 0 {
68+
// The bytes are uncompressed, so read them directly.
69+
let uncompressed_bytes = self.read_n_bytes(uncompressed_len)?;
70+
Ok(Cow::Borrowed(uncompressed_bytes))
71+
} else {
72+
// The bytes are compressed, so read and decompress them.
73+
let compressed_bytes = self.read_n_bytes(compressed_len)?;
74+
75+
let uncompressed_bytes = miniz_oxide::inflate::decompress_to_vec_zlib_with_limit(
76+
compressed_bytes,
77+
uncompressed_len,
78+
)
79+
.map_err(|e| anyhow!("{e:?}"))?;
80+
ensure!(uncompressed_bytes.len() == uncompressed_len);
81+
82+
Ok(Cow::Owned(uncompressed_bytes))
83+
}
84+
}
85+
}

src/tools/coverage-dump/src/prf_names.rs

+2-23
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::collections::HashMap;
22
use std::sync::OnceLock;
33

4-
use anyhow::{anyhow, ensure};
54
use regex::Regex;
65

76
use crate::llvm_junk::{truncated_md5, unescape_llvm_string_contents};
@@ -43,35 +42,15 @@ pub(crate) fn make_function_names_table(llvm_ir: &str) -> anyhow::Result<HashMap
4342
for payload in llvm_ir.lines().filter_map(prf_names_payload).map(unescape_llvm_string_contents)
4443
{
4544
let mut parser = Parser::new(&payload);
46-
let uncompressed_len = parser.read_uleb128_usize()?;
47-
let compressed_len = parser.read_uleb128_usize()?;
48-
49-
let uncompressed_bytes_vec;
50-
let uncompressed_bytes: &[u8] = if compressed_len == 0 {
51-
// The symbol name bytes are uncompressed, so read them directly.
52-
parser.read_n_bytes(uncompressed_len)?
53-
} else {
54-
// The symbol name bytes are compressed, so read and decompress them.
55-
let compressed_bytes = parser.read_n_bytes(compressed_len)?;
56-
57-
uncompressed_bytes_vec = miniz_oxide::inflate::decompress_to_vec_zlib_with_limit(
58-
compressed_bytes,
59-
uncompressed_len,
60-
)
61-
.map_err(|e| anyhow!("{e:?}"))?;
62-
ensure!(uncompressed_bytes_vec.len() == uncompressed_len);
63-
64-
&uncompressed_bytes_vec
65-
};
45+
let uncompressed_bytes = parser.read_chunk_to_uncompressed_bytes()?;
46+
parser.ensure_empty()?;
6647

6748
// Symbol names in the payload are separated by `0x01` bytes.
6849
for raw_name in uncompressed_bytes.split(|&b| b == 0x01) {
6950
let hash = truncated_md5(raw_name);
7051
let demangled = demangle_if_able(raw_name)?;
7152
map.insert(hash, demangled);
7253
}
73-
74-
parser.ensure_empty()?;
7554
}
7655

7756
Ok(map)

0 commit comments

Comments
 (0)