1
1
use std:: collections:: HashMap ;
2
2
use std:: fmt:: { self , Debug , Write as _} ;
3
- use std:: sync:: OnceLock ;
3
+ use std:: sync:: LazyLock ;
4
4
5
- use anyhow:: { Context , anyhow} ;
5
+ use anyhow:: { Context , anyhow, ensure} ;
6
+ use itertools:: Itertools ;
6
7
use regex:: Regex ;
7
8
8
9
use crate :: llvm_junk:: unescape_llvm_string_contents;
9
10
use crate :: parser:: Parser ;
10
11
12
+ #[ cfg( test) ]
13
+ mod tests;
14
+
11
15
pub ( crate ) fn dump_covfun_mappings (
12
16
llvm_ir : & str ,
13
17
function_names : & HashMap < u64 , String > ,
@@ -16,9 +20,12 @@ pub(crate) fn dump_covfun_mappings(
16
20
// each entry with its (demangled) name.
17
21
let mut covfun_entries = llvm_ir
18
22
. lines ( )
19
- . filter_map ( covfun_line_data)
20
- . map ( |line_data| ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data) )
21
- . collect :: < Vec < _ > > ( ) ;
23
+ . filter ( |line| is_covfun_line ( line) )
24
+ . map ( parse_covfun_line)
25
+ . map_ok ( |line_data| {
26
+ ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data)
27
+ } )
28
+ . collect :: < Result < Vec < _ > , _ > > ( ) ?;
22
29
covfun_entries. sort_by ( |a, b| {
23
30
// Sort entries primarily by name, to help make the order consistent
24
31
// across platforms and relatively insensitive to changes.
@@ -108,36 +115,50 @@ pub(crate) fn dump_covfun_mappings(
108
115
Ok ( ( ) )
109
116
}
110
117
118
+ #[ derive( Debug , PartialEq , Eq ) ]
111
119
struct CovfunLineData {
112
- name_hash : u64 ,
113
120
is_used : bool ,
121
+ name_hash : u64 ,
122
+ filenames_hash : u64 ,
114
123
payload : Vec < u8 > ,
115
124
}
116
125
117
- /// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
118
- /// entry, and if so extracts relevant data in a `CovfunLineData`.
119
- fn covfun_line_data ( line : & str ) -> Option < CovfunLineData > {
120
- let re = {
121
- // We cheat a little bit and match variable names `@__covrec_[HASH]u`
122
- // rather than the section name, because the section name is harder to
123
- // extract and differs across Linux/Windows/macOS. We also extract the
124
- // symbol name hash from the variable name rather than the data, since
125
- // it's easier and both should match.
126
- static RE : OnceLock < Regex > = OnceLock :: new ( ) ;
127
- RE . get_or_init ( || {
128
- Regex :: new (
129
- r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"# ,
130
- )
131
- . unwrap ( )
132
- } )
133
- } ;
126
+ fn is_covfun_line ( line : & str ) -> bool {
127
+ line. starts_with ( "@__covrec_" )
128
+ }
134
129
135
- let captures = re. captures ( line) ?;
136
- let name_hash = u64:: from_str_radix ( & captures[ "name_hash" ] , 16 ) . unwrap ( ) ;
130
+ /// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
131
+ /// entry, parses it to extract relevant data in a `CovfunLineData`.
132
+ fn parse_covfun_line ( line : & str ) -> anyhow:: Result < CovfunLineData > {
133
+ ensure ! ( is_covfun_line( line) ) ;
134
+
135
+ // We cheat a little bit and match variable names `@__covrec_[HASH]u`
136
+ // rather than the section name, because the section name is harder to
137
+ // extract and differs across Linux/Windows/macOS.
138
+ const RE_STRING : & str = r#"(?x)^
139
+ @__covrec_[0-9A-Z]+(?<is_used>u)?
140
+ \ = \ # (trailing space)
141
+ .*
142
+ <\{
143
+ \ i64 \ (?<name_hash> -? [0-9]+),
144
+ \ i32 \ -? [0-9]+, # (length of payload; currently unused)
145
+ \ i64 \ -? [0-9]+, # (source hash; currently unused)
146
+ \ i64 \ (?<filenames_hash> -? [0-9]+),
147
+ \ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
148
+ \ # (trailing space)
149
+ }>
150
+ .*$
151
+ "# ;
152
+ static RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( RE_STRING ) . unwrap ( ) ) ;
153
+
154
+ let captures =
155
+ RE . captures ( line) . with_context ( || format ! ( "couldn't parse covfun line: {line:?}" ) ) ?;
137
156
let is_used = captures. name ( "is_used" ) . is_some ( ) ;
157
+ let name_hash = i64:: from_str_radix ( & captures[ "name_hash" ] , 10 ) . unwrap ( ) as u64 ;
158
+ let filenames_hash = i64:: from_str_radix ( & captures[ "filenames_hash" ] , 10 ) . unwrap ( ) as u64 ;
138
159
let payload = unescape_llvm_string_contents ( & captures[ "payload" ] ) ;
139
160
140
- Some ( CovfunLineData { name_hash, is_used , payload } )
161
+ Ok ( CovfunLineData { is_used , name_hash, filenames_hash , payload } )
141
162
}
142
163
143
164
// Extra parser methods only needed when parsing `covfun` payloads.
0 commit comments