Skip to content

Commit

Permalink
analyze: script for automatically fixing some compile errors (#1174)
Browse files Browse the repository at this point in the history
This branch adds a Python script that automatically fixes some kinds of
compiler errors by processing the output of `rustc --error-format json`.
It applies all `MachineApplicable` suggestions, plus two other kinds of
fixes that don't come with a `MachineApplicable` suggestion (in our
current nightly) but are important for lighttpd `buffer`:

* "Lifetime may not live long enough": For each function,
`c2rust-analyze` adds a lifetime parameter for each reference in the
arguments and return type, but doesn't add any outlives bounds (e.g.
`'a: 'b`) to relate those lifetimes. On functions that return a borrowed
reference into one of their arguments, this results in a borrow checker
error. The error comes with a human-readable suggestion like "consider
adding the following bound: `'a: 'b`", and also marks the declaration of
the first lifetime (`'a`); the auto-fix script parses this output and
inserts the suggested `: 'b` bound after the declaration.
* "The trait `Copy` may not be implemented for this type": When
`c2rust-analyze` converts a field from raw pointer type to `&mut T` or
`Box<T>`, it doesn't remove `derive(Copy)` from the struct. This causes
a compile error because `&mut T` and `Box<T>` don't implement `Copy`.
The error marks the location of the `Copy` token in `#[derive(Copy)]`;
the auto-fix script removes that token (and the subsequent comma, if
necessary). Note this can produce a trailing comma (`#[derive(Clone,
Copy)]` -> `#[derive(Clone, )]`) or an empty `derive()`
(`#[derive(Copy)]` -> `#[derive()]`), both of which are legal but
unidiomatic.
  • Loading branch information
spernsteiner authored Dec 3, 2024
2 parents ecd1cbc + 7098d9e commit ff0fe1a
Show file tree
Hide file tree
Showing 5 changed files with 416 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,6 @@ marks.*.json
# Outputs of `c2rust-analyze`
inspect/
*.analysis.txt
*.rs.fixed
analysis.txt
polonius_cache/
1 change: 1 addition & 0 deletions c2rust-analyze/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/inspect/
*.rlib
/tests/auto_fix_errors/*.json
283 changes: 283 additions & 0 deletions c2rust-analyze/scripts/auto_fix_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
import argparse
from collections import defaultdict, namedtuple
import json
import re
import sys

def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description='automatically apply rustc-suggested fixes for compile errors')
parser.add_argument('-n', '--dry-run', action='store_true',
help="print fixes that would be applied, but don't modify any files")
parser.add_argument('path', metavar='ERRORS.JSON',
help='output of `rustc --error-format json`')
return parser.parse_args()

Fix = namedtuple('Fix', (
# Path to the file to modify.
'file_path',
# Line number where the fix will be applied. This is used for debug output
# only.
'line_number',
# Start of the byte range to replace.
'start_byte',
# End (exclusive) of the byte range to replace.
'end_byte',
# Replacement text (as a `str`, not `bytes`).
'new_text',
# The original error message. This is used for debug output only.
'message',
))

LifetimeBound = namedtuple('LifetimeBound', (
'file_path',
'line_number',
# Byte offset of the start of the lifetime parameter declaration.
'start_byte',
# Byte offset of the end of the lifetime parameter declaration.
'end_byte',
# The lifetime to use in the new bound. If `'a: 'b` is the suggested
# bound, then `start/end_byte` points to the declaration of `'a`, and
# `bound_lifetime` is the string `"'b"`.
'bound_lifetime',
))

RemoveDeriveCopy = namedtuple('RemoveDeriveCopy', (
'file_path',
'line_number',
# Byte offset of the start of the token `Copy`.
'start_byte',
# Byte offset of the end of the token `Copy`.
'end_byte',
))

MSG_LIFETIME_BOUND = 'lifetime may not live long enough'
MSG_DERIVE_COPY = 'the trait `Copy` may not be implemented for this type'

LIFETIME_DEFINED_RE = re.compile(r'^lifetime `([^`]*)` defined here$')
CONSIDER_ADDING_BOUND_RE = re.compile(r'^consider adding the following bound: `([^`:]*): ([^`]*)`$')
SPACE_COLON_RE = re.compile(rb'\s*:')
SPACE_COMMA_RE = re.compile(rb'\s*,')

def main():
args = parse_args()

fixes = []
def gather_fixes(j, message):
for span in j['spans']:
if span.get('suggestion_applicability') != 'MachineApplicable':
continue
fix = Fix(
file_path=span['file_name'],
line_number=span['line_start'],
start_byte=span['byte_start'],
end_byte=span['byte_end'],
new_text=span['suggested_replacement'],
message=message,
)
fixes.append(fix)

for child in j['children']:
gather_fixes(child, message)

lifetime_bounds = []
def gather_lifetime_bounds(j):
# We look for a particular pattern seen in lifetime errors. First,
# there should be a span with label "lifetime 'a defined here" pointing
# at the declaration of `'a`. Second, there should be a child of type
# `help` with the text "consider adding the following bound: `'a: 'b`".

decl_spans = {}
for span in j['spans']:
m = LIFETIME_DEFINED_RE.match(span['label'])
if m is not None:
lifetime = m.group(1)
if lifetime in decl_spans:
# Duplicate declaration for this lifetime. This shouldn't
# happen, but we can proceed as long as the lifetime isn't
# the target of the bound. We mark the duplicate lifetime
# so it can't be used as the target.
decl_spans[lifetime] = None
continue
decl_spans[lifetime] = span

for child in j['children']:
if child['level'] != 'help':
continue
m = CONSIDER_ADDING_BOUND_RE.match(child['message'])
if m is None:
continue
lifetime_a = m.group(1)
lifetime_b = m.group(2)
span = decl_spans.get(lifetime_a)
if span is None:
# We don't have anywhere to insert the new bound. This can
# also happen if there were duplicate declaration spans for
# this lifetime (we explicitly insert `None` into the map in
# that case).
continue
lifetime_bounds.append(LifetimeBound(
file_path=span['file_name'],
line_number=span['line_start'],
start_byte=span['byte_start'],
end_byte=span['byte_end'],
bound_lifetime=lifetime_b,
))

remove_derive_copies = []
def handle_derive_copy(j):
# Find the "primary span", which covers the `Copy` token.
primary_span = None
for span in j['spans']:
if span.get('is_primary'):
primary_span = span
break

if primary_span is None:
return

remove_derive_copies.append(RemoveDeriveCopy(
file_path=primary_span['file_name'],
line_number=primary_span['line_start'],
start_byte=primary_span['byte_start'],
end_byte=primary_span['byte_end'],
))

with open(args.path, 'r') as f:
for line in f:
j = json.loads(line)

# Only process errors, not warnings.
level = j['level']
if level == 'error':
pass
elif level in ('warning', 'failure-note'):
continue
else:
# `help`, `note`, etc should not appear at top level.
assert False, 'unexpected `level` %r' % (level,)

gather_fixes(j, j['message'])

if j['message'] == MSG_LIFETIME_BOUND:
gather_lifetime_bounds(j)
elif j['message'] == MSG_DERIVE_COPY:
handle_derive_copy(j)

# Convert suggested lifetime bounds to fixes. We have to group the bounds
# first because there may be multiple suggested bounds for a single
# declaration, in which case we want to generate a single `Fix` that adds
# all of them at once.

# Maps the `(file_path, line_number, start_byte, end_byte)` of the
# declaration site to the set of new lifetimes to apply at that site.
grouped_lifetime_bounds = {}
for lb in lifetime_bounds:
key = (lb.file_path, lb.line_number, lb.start_byte, lb.end_byte)
if key not in grouped_lifetime_bounds:
grouped_lifetime_bounds[key] = set()
grouped_lifetime_bounds[key].add(lb.bound_lifetime)

file_content = {}
def read_file(file_path):
if file_path not in file_content:
file_content[file_path] = open(file_path, 'rb').read()
return file_content[file_path]

for key, bound_lifetimes in sorted(grouped_lifetime_bounds.items()):
(file_path, line_number, start_byte, end_byte) = key
content = read_file(file_path)
decl_lifetime = content[start_byte : end_byte].decode('utf-8')
bound_lifetimes = ' + '.join(bound_lifetimes)
m = SPACE_COLON_RE.match(content, end_byte)
if m is None:
fix_end_byte = end_byte
fix_new_text = '%s: %s' % (decl_lifetime, bound_lifetimes)
else:
space_colon = m.group().decode('utf-8')
fix_end_byte = m.end()
fix_new_text = '%s%s %s +' % (decl_lifetime, space_colon, bound_lifetimes)
fixes.append(Fix(
file_path=file_path,
line_number=line_number,
start_byte=start_byte,
end_byte=fix_end_byte,
new_text=fix_new_text,
message=MSG_LIFETIME_BOUND,
))

# Convert errors about `#[derive(Copy)]` to fixes.
for rm in remove_derive_copies:
# The error span points to the `Copy` token, but we actually want to
# remove both `Copy` and the following comma, if one is present.
#
# #[derive(Foo, Copy, Bar)] -> #[derive(Foo, Bar)]
# #[derive(Foo, Copy)] -> #[derive(Foo,)]
# #[derive(Copy, Bar)] -> #[derive(Bar)]
# #[derive(Copy)] -> #[derive()]
#
# Note that `#[derive()]` is legal, though ideally it should be removed
# by a later cleanup pass.
content = read_file(rm.file_path)
m = SPACE_COMMA_RE.match(content, rm.end_byte)
fix_end_byte = m.end() if m is not None else rm.end_byte
fixes.append(Fix(
file_path=rm.file_path,
line_number=rm.line_number,
start_byte=rm.start_byte,
end_byte=fix_end_byte,
new_text='',
message=MSG_DERIVE_COPY,
))

# Group fixes by file
fixes_by_file = defaultdict(list)
for fix in fixes:
fixes_by_file[fix.file_path].append(fix)
fixes_by_file = dict(fixes_by_file)
for file_fixes in fixes_by_file.values():
file_fixes.sort(key=lambda fix: fix.start_byte)

# Apply fixes
for file_path, file_fixes in sorted(fixes_by_file.items()):
content = read_file(file_path)
chunks = []
pos = 0
prev_fix = None
for fix in file_fixes:
old_text = content[fix.start_byte : fix.end_byte].decode('utf-8')
desc = '%s:%d: %r -> %r (%s)' % (
file_path, fix.line_number, old_text, fix.new_text, fix.message)
if prev_fix is not None:
if fix.start_byte < prev_fix.end_byte:
if fix.start_byte == prev_fix.start_byte \
and fix.end_byte == prev_fix.end_byte \
and fix.new_text == prev_fix.new_text:
# `fix` and `prev_fix` suggest the same change, so we
# don't need to apply `fix`.
continue
# We want to apply fix, but can't because it overlaps with
# `prev_fix`.
print('skipping due to overlap: %s' % desc)
continue

prev_fix = fix

print(desc)
if fix.start_byte > pos:
chunks.append(content[pos : fix.start_byte])
chunks.append(fix.new_text.encode('utf-8'))
pos = fix.end_byte

if pos < len(content):
chunks.append(content[pos:])

new_content = b''.join(chunks)
if not args.dry_run:
open(file_path, 'wb').write(new_content)
print('wrote to %r' % file_path)
else:
print('would write to %r' % file_path)

if __name__ == '__main__':
main()
82 changes: 82 additions & 0 deletions c2rust-analyze/tests/auto_fix_errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
pub mod common;

use crate::common::{check_for_missing_tests_for, test_dir_for};
use std::fs::{self, File};
use std::process::Command;

#[test]
fn check_for_missing_tests() {
check_for_missing_tests_for(file!());
}

fn test(file_name: &str) {
let test_dir = test_dir_for(file!(), true);
let path = test_dir.join(file_name);
let fixed_path = path.with_extension("rs.fixed");
let json_path = path.with_extension("json");
let script_path = test_dir.join("../../scripts/auto_fix_errors.py");

fs::copy(&path, &fixed_path).unwrap();

// Run with `--error-format json` to produce JSON input for the script.
let mut cmd = Command::new("rustc");
cmd.arg("-A")
.arg("warnings")
.arg("--crate-name")
.arg(path.file_stem().unwrap())
.arg("--crate-type")
.arg("rlib")
.arg("--error-format")
.arg("json")
.arg(&fixed_path)
.stderr(File::create(&json_path).unwrap());
let status = cmd.status().unwrap();
assert_eq!(
status.code(),
Some(1),
"command {cmd:?} exited with code {status:?}"
);

// Run the script to fix errors.
let mut cmd = Command::new("python3");
cmd.arg(&script_path).arg(&json_path);
let status = cmd.status().unwrap();
assert!(
status.success(),
"command {cmd:?} exited with code {status:?}"
);

// There should be no more compile errors now.
let mut cmd = Command::new("rustc");
cmd.arg("-A")
.arg("warnings")
.arg("--crate-name")
.arg(path.file_stem().unwrap())
.arg("--crate-type")
.arg("rlib")
.arg(&fixed_path);
let status = cmd.status().unwrap();
assert!(
status.success(),
"command {cmd:?} exited with code {status:?}"
);
}

macro_rules! define_test {
($name:ident) => {
#[test]
fn $name() {
test(concat!(stringify!($name), ".rs"));
}
};
}

macro_rules! define_tests {
($($name:ident,)*) => {
$(define_test! { $name })*
}
}

define_tests! {
derive_copy,
}
Loading

0 comments on commit ff0fe1a

Please sign in to comment.