-
Notifications
You must be signed in to change notification settings - Fork 246
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
analyze: script for automatically fixing some compile errors (#1174)
This branch adds a Python script that automatically fixes some kinds of compiler errors by processing the output of `rustc --error-format json`. It applies all `MachineApplicable` suggestions, plus two other kinds of fixes that don't come with a `MachineApplicable` suggestion (in our current nightly) but are important for lighttpd `buffer`: * "Lifetime may not live long enough": For each function, `c2rust-analyze` adds a lifetime parameter for each reference in the arguments and return type, but doesn't add any outlives bounds (e.g. `'a: 'b`) to relate those lifetimes. On functions that return a borrowed reference into one of their arguments, this results in a borrow checker error. The error comes with a human-readable suggestion like "consider adding the following bound: `'a: 'b`", and also marks the declaration of the first lifetime (`'a`); the auto-fix script parses this output and inserts the suggested `: 'b` bound after the declaration. * "The trait `Copy` may not be implemented for this type": When `c2rust-analyze` converts a field from raw pointer type to `&mut T` or `Box<T>`, it doesn't remove `derive(Copy)` from the struct. This causes a compile error because `&mut T` and `Box<T>` don't implement `Copy`. The error marks the location of the `Copy` token in `#[derive(Copy)]`; the auto-fix script removes that token (and the subsequent comma, if necessary). Note this can produce a trailing comma (`#[derive(Clone, Copy)]` -> `#[derive(Clone, )]`) or an empty `derive()` (`#[derive(Copy)]` -> `#[derive()]`), both of which are legal but unidiomatic.
- Loading branch information
Showing
5 changed files
with
416 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,5 +49,6 @@ marks.*.json | |
# Outputs of `c2rust-analyze` | ||
inspect/ | ||
*.analysis.txt | ||
*.rs.fixed | ||
analysis.txt | ||
polonius_cache/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
/inspect/ | ||
*.rlib | ||
/tests/auto_fix_errors/*.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,283 @@ | ||
import argparse | ||
from collections import defaultdict, namedtuple | ||
import json | ||
import re | ||
import sys | ||
|
||
def parse_args() -> argparse.Namespace: | ||
parser = argparse.ArgumentParser( | ||
description='automatically apply rustc-suggested fixes for compile errors') | ||
parser.add_argument('-n', '--dry-run', action='store_true', | ||
help="print fixes that would be applied, but don't modify any files") | ||
parser.add_argument('path', metavar='ERRORS.JSON', | ||
help='output of `rustc --error-format json`') | ||
return parser.parse_args() | ||
|
||
Fix = namedtuple('Fix', ( | ||
# Path to the file to modify. | ||
'file_path', | ||
# Line number where the fix will be applied. This is used for debug output | ||
# only. | ||
'line_number', | ||
# Start of the byte range to replace. | ||
'start_byte', | ||
# End (exclusive) of the byte range to replace. | ||
'end_byte', | ||
# Replacement text (as a `str`, not `bytes`). | ||
'new_text', | ||
# The original error message. This is used for debug output only. | ||
'message', | ||
)) | ||
|
||
LifetimeBound = namedtuple('LifetimeBound', ( | ||
'file_path', | ||
'line_number', | ||
# Byte offset of the start of the lifetime parameter declaration. | ||
'start_byte', | ||
# Byte offset of the end of the lifetime parameter declaration. | ||
'end_byte', | ||
# The lifetime to use in the new bound. If `'a: 'b` is the suggested | ||
# bound, then `start/end_byte` points to the declaration of `'a`, and | ||
# `bound_lifetime` is the string `"'b"`. | ||
'bound_lifetime', | ||
)) | ||
|
||
RemoveDeriveCopy = namedtuple('RemoveDeriveCopy', ( | ||
'file_path', | ||
'line_number', | ||
# Byte offset of the start of the token `Copy`. | ||
'start_byte', | ||
# Byte offset of the end of the token `Copy`. | ||
'end_byte', | ||
)) | ||
|
||
MSG_LIFETIME_BOUND = 'lifetime may not live long enough' | ||
MSG_DERIVE_COPY = 'the trait `Copy` may not be implemented for this type' | ||
|
||
LIFETIME_DEFINED_RE = re.compile(r'^lifetime `([^`]*)` defined here$') | ||
CONSIDER_ADDING_BOUND_RE = re.compile(r'^consider adding the following bound: `([^`:]*): ([^`]*)`$') | ||
SPACE_COLON_RE = re.compile(rb'\s*:') | ||
SPACE_COMMA_RE = re.compile(rb'\s*,') | ||
|
||
def main(): | ||
args = parse_args() | ||
|
||
fixes = [] | ||
def gather_fixes(j, message): | ||
for span in j['spans']: | ||
if span.get('suggestion_applicability') != 'MachineApplicable': | ||
continue | ||
fix = Fix( | ||
file_path=span['file_name'], | ||
line_number=span['line_start'], | ||
start_byte=span['byte_start'], | ||
end_byte=span['byte_end'], | ||
new_text=span['suggested_replacement'], | ||
message=message, | ||
) | ||
fixes.append(fix) | ||
|
||
for child in j['children']: | ||
gather_fixes(child, message) | ||
|
||
lifetime_bounds = [] | ||
def gather_lifetime_bounds(j): | ||
# We look for a particular pattern seen in lifetime errors. First, | ||
# there should be a span with label "lifetime 'a defined here" pointing | ||
# at the declaration of `'a`. Second, there should be a child of type | ||
# `help` with the text "consider adding the following bound: `'a: 'b`". | ||
|
||
decl_spans = {} | ||
for span in j['spans']: | ||
m = LIFETIME_DEFINED_RE.match(span['label']) | ||
if m is not None: | ||
lifetime = m.group(1) | ||
if lifetime in decl_spans: | ||
# Duplicate declaration for this lifetime. This shouldn't | ||
# happen, but we can proceed as long as the lifetime isn't | ||
# the target of the bound. We mark the duplicate lifetime | ||
# so it can't be used as the target. | ||
decl_spans[lifetime] = None | ||
continue | ||
decl_spans[lifetime] = span | ||
|
||
for child in j['children']: | ||
if child['level'] != 'help': | ||
continue | ||
m = CONSIDER_ADDING_BOUND_RE.match(child['message']) | ||
if m is None: | ||
continue | ||
lifetime_a = m.group(1) | ||
lifetime_b = m.group(2) | ||
span = decl_spans.get(lifetime_a) | ||
if span is None: | ||
# We don't have anywhere to insert the new bound. This can | ||
# also happen if there were duplicate declaration spans for | ||
# this lifetime (we explicitly insert `None` into the map in | ||
# that case). | ||
continue | ||
lifetime_bounds.append(LifetimeBound( | ||
file_path=span['file_name'], | ||
line_number=span['line_start'], | ||
start_byte=span['byte_start'], | ||
end_byte=span['byte_end'], | ||
bound_lifetime=lifetime_b, | ||
)) | ||
|
||
remove_derive_copies = [] | ||
def handle_derive_copy(j): | ||
# Find the "primary span", which covers the `Copy` token. | ||
primary_span = None | ||
for span in j['spans']: | ||
if span.get('is_primary'): | ||
primary_span = span | ||
break | ||
|
||
if primary_span is None: | ||
return | ||
|
||
remove_derive_copies.append(RemoveDeriveCopy( | ||
file_path=primary_span['file_name'], | ||
line_number=primary_span['line_start'], | ||
start_byte=primary_span['byte_start'], | ||
end_byte=primary_span['byte_end'], | ||
)) | ||
|
||
with open(args.path, 'r') as f: | ||
for line in f: | ||
j = json.loads(line) | ||
|
||
# Only process errors, not warnings. | ||
level = j['level'] | ||
if level == 'error': | ||
pass | ||
elif level in ('warning', 'failure-note'): | ||
continue | ||
else: | ||
# `help`, `note`, etc should not appear at top level. | ||
assert False, 'unexpected `level` %r' % (level,) | ||
|
||
gather_fixes(j, j['message']) | ||
|
||
if j['message'] == MSG_LIFETIME_BOUND: | ||
gather_lifetime_bounds(j) | ||
elif j['message'] == MSG_DERIVE_COPY: | ||
handle_derive_copy(j) | ||
|
||
# Convert suggested lifetime bounds to fixes. We have to group the bounds | ||
# first because there may be multiple suggested bounds for a single | ||
# declaration, in which case we want to generate a single `Fix` that adds | ||
# all of them at once. | ||
|
||
# Maps the `(file_path, line_number, start_byte, end_byte)` of the | ||
# declaration site to the set of new lifetimes to apply at that site. | ||
grouped_lifetime_bounds = {} | ||
for lb in lifetime_bounds: | ||
key = (lb.file_path, lb.line_number, lb.start_byte, lb.end_byte) | ||
if key not in grouped_lifetime_bounds: | ||
grouped_lifetime_bounds[key] = set() | ||
grouped_lifetime_bounds[key].add(lb.bound_lifetime) | ||
|
||
file_content = {} | ||
def read_file(file_path): | ||
if file_path not in file_content: | ||
file_content[file_path] = open(file_path, 'rb').read() | ||
return file_content[file_path] | ||
|
||
for key, bound_lifetimes in sorted(grouped_lifetime_bounds.items()): | ||
(file_path, line_number, start_byte, end_byte) = key | ||
content = read_file(file_path) | ||
decl_lifetime = content[start_byte : end_byte].decode('utf-8') | ||
bound_lifetimes = ' + '.join(bound_lifetimes) | ||
m = SPACE_COLON_RE.match(content, end_byte) | ||
if m is None: | ||
fix_end_byte = end_byte | ||
fix_new_text = '%s: %s' % (decl_lifetime, bound_lifetimes) | ||
else: | ||
space_colon = m.group().decode('utf-8') | ||
fix_end_byte = m.end() | ||
fix_new_text = '%s%s %s +' % (decl_lifetime, space_colon, bound_lifetimes) | ||
fixes.append(Fix( | ||
file_path=file_path, | ||
line_number=line_number, | ||
start_byte=start_byte, | ||
end_byte=fix_end_byte, | ||
new_text=fix_new_text, | ||
message=MSG_LIFETIME_BOUND, | ||
)) | ||
|
||
# Convert errors about `#[derive(Copy)]` to fixes. | ||
for rm in remove_derive_copies: | ||
# The error span points to the `Copy` token, but we actually want to | ||
# remove both `Copy` and the following comma, if one is present. | ||
# | ||
# #[derive(Foo, Copy, Bar)] -> #[derive(Foo, Bar)] | ||
# #[derive(Foo, Copy)] -> #[derive(Foo,)] | ||
# #[derive(Copy, Bar)] -> #[derive(Bar)] | ||
# #[derive(Copy)] -> #[derive()] | ||
# | ||
# Note that `#[derive()]` is legal, though ideally it should be removed | ||
# by a later cleanup pass. | ||
content = read_file(rm.file_path) | ||
m = SPACE_COMMA_RE.match(content, rm.end_byte) | ||
fix_end_byte = m.end() if m is not None else rm.end_byte | ||
fixes.append(Fix( | ||
file_path=rm.file_path, | ||
line_number=rm.line_number, | ||
start_byte=rm.start_byte, | ||
end_byte=fix_end_byte, | ||
new_text='', | ||
message=MSG_DERIVE_COPY, | ||
)) | ||
|
||
# Group fixes by file | ||
fixes_by_file = defaultdict(list) | ||
for fix in fixes: | ||
fixes_by_file[fix.file_path].append(fix) | ||
fixes_by_file = dict(fixes_by_file) | ||
for file_fixes in fixes_by_file.values(): | ||
file_fixes.sort(key=lambda fix: fix.start_byte) | ||
|
||
# Apply fixes | ||
for file_path, file_fixes in sorted(fixes_by_file.items()): | ||
content = read_file(file_path) | ||
chunks = [] | ||
pos = 0 | ||
prev_fix = None | ||
for fix in file_fixes: | ||
old_text = content[fix.start_byte : fix.end_byte].decode('utf-8') | ||
desc = '%s:%d: %r -> %r (%s)' % ( | ||
file_path, fix.line_number, old_text, fix.new_text, fix.message) | ||
if prev_fix is not None: | ||
if fix.start_byte < prev_fix.end_byte: | ||
if fix.start_byte == prev_fix.start_byte \ | ||
and fix.end_byte == prev_fix.end_byte \ | ||
and fix.new_text == prev_fix.new_text: | ||
# `fix` and `prev_fix` suggest the same change, so we | ||
# don't need to apply `fix`. | ||
continue | ||
# We want to apply fix, but can't because it overlaps with | ||
# `prev_fix`. | ||
print('skipping due to overlap: %s' % desc) | ||
continue | ||
|
||
prev_fix = fix | ||
|
||
print(desc) | ||
if fix.start_byte > pos: | ||
chunks.append(content[pos : fix.start_byte]) | ||
chunks.append(fix.new_text.encode('utf-8')) | ||
pos = fix.end_byte | ||
|
||
if pos < len(content): | ||
chunks.append(content[pos:]) | ||
|
||
new_content = b''.join(chunks) | ||
if not args.dry_run: | ||
open(file_path, 'wb').write(new_content) | ||
print('wrote to %r' % file_path) | ||
else: | ||
print('would write to %r' % file_path) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
pub mod common; | ||
|
||
use crate::common::{check_for_missing_tests_for, test_dir_for}; | ||
use std::fs::{self, File}; | ||
use std::process::Command; | ||
|
||
#[test] | ||
fn check_for_missing_tests() { | ||
check_for_missing_tests_for(file!()); | ||
} | ||
|
||
fn test(file_name: &str) { | ||
let test_dir = test_dir_for(file!(), true); | ||
let path = test_dir.join(file_name); | ||
let fixed_path = path.with_extension("rs.fixed"); | ||
let json_path = path.with_extension("json"); | ||
let script_path = test_dir.join("../../scripts/auto_fix_errors.py"); | ||
|
||
fs::copy(&path, &fixed_path).unwrap(); | ||
|
||
// Run with `--error-format json` to produce JSON input for the script. | ||
let mut cmd = Command::new("rustc"); | ||
cmd.arg("-A") | ||
.arg("warnings") | ||
.arg("--crate-name") | ||
.arg(path.file_stem().unwrap()) | ||
.arg("--crate-type") | ||
.arg("rlib") | ||
.arg("--error-format") | ||
.arg("json") | ||
.arg(&fixed_path) | ||
.stderr(File::create(&json_path).unwrap()); | ||
let status = cmd.status().unwrap(); | ||
assert_eq!( | ||
status.code(), | ||
Some(1), | ||
"command {cmd:?} exited with code {status:?}" | ||
); | ||
|
||
// Run the script to fix errors. | ||
let mut cmd = Command::new("python3"); | ||
cmd.arg(&script_path).arg(&json_path); | ||
let status = cmd.status().unwrap(); | ||
assert!( | ||
status.success(), | ||
"command {cmd:?} exited with code {status:?}" | ||
); | ||
|
||
// There should be no more compile errors now. | ||
let mut cmd = Command::new("rustc"); | ||
cmd.arg("-A") | ||
.arg("warnings") | ||
.arg("--crate-name") | ||
.arg(path.file_stem().unwrap()) | ||
.arg("--crate-type") | ||
.arg("rlib") | ||
.arg(&fixed_path); | ||
let status = cmd.status().unwrap(); | ||
assert!( | ||
status.success(), | ||
"command {cmd:?} exited with code {status:?}" | ||
); | ||
} | ||
|
||
macro_rules! define_test { | ||
($name:ident) => { | ||
#[test] | ||
fn $name() { | ||
test(concat!(stringify!($name), ".rs")); | ||
} | ||
}; | ||
} | ||
|
||
macro_rules! define_tests { | ||
($($name:ident,)*) => { | ||
$(define_test! { $name })* | ||
} | ||
} | ||
|
||
define_tests! { | ||
derive_copy, | ||
} |
Oops, something went wrong.