From d411d44fa0959cd4ab699384fd84554deb05bcd5 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 15 Aug 2024 16:23:29 -0700 Subject: [PATCH 1/5] analyze: add script to automatically fix compile errors --- c2rust-analyze/scripts/auto_fix_errors.py | 113 ++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 c2rust-analyze/scripts/auto_fix_errors.py diff --git a/c2rust-analyze/scripts/auto_fix_errors.py b/c2rust-analyze/scripts/auto_fix_errors.py new file mode 100644 index 0000000000..8a045a8e3b --- /dev/null +++ b/c2rust-analyze/scripts/auto_fix_errors.py @@ -0,0 +1,113 @@ +import argparse +from dataclasses import dataclass +import json +import sys + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description='automatically apply rustc-suggested fixes for compile errors') + parser.add_argument('-n', '--dry-run', action='store_true', + help="print fixes that would be applied, but don't modify any files") + parser.add_argument('path', metavar='ERRORS.JSON', + help='output of `rustc --error-format json`') + return parser.parse_args() + +@dataclass(frozen=True) +class Fix: + file_path: str + line_number: int + start_byte: int + end_byte: int + new_text: str + message: str + +def main(): + args = parse_args() + + fixes = [] + def gather_fixes(j, message): + for span in j['spans']: + if span.get('suggestion_applicability') != 'MachineApplicable': + continue + fix = Fix( + file_path=span['file_name'], + line_number=span['line_start'], + start_byte=span['byte_start'], + end_byte=span['byte_end'], + new_text=span['suggested_replacement'], + message=message, + ) + fixes.append(fix) + + for child in j['children']: + gather_fixes(child, message) + + with open(args.path, 'r') as f: + for line in f: + j = json.loads(line) + + # Only process errors, not warnings. + level = j['level'] + if level == 'error': + pass + elif level in ('warning', 'failure-note'): + continue + else: + # `help`, `note`, etc should not appear at top level. + assert False, 'unexpected `level` %r' % (level,) + + gather_fixes(j, j['message']) + + fixes_by_file = {} + for fix in fixes: + file_fixes = fixes_by_file.get(fix.file_path) + if file_fixes is None: + fixes_by_file[fix.file_path] = [fix] + else: + file_fixes.append(fix) + for file_fixes in fixes_by_file.values(): + file_fixes.sort(key=lambda fix: fix.start_byte) + + # Apply fixes + for file_path, file_fixes in sorted(fixes_by_file.items()): + content = open(file_path, 'rb').read() + chunks = [] + pos = 0 + prev_fix = None + for fix in file_fixes: + old_text = content[fix.start_byte : fix.end_byte].decode('utf-8') + desc = '%s:%d: %r -> %r (%s)' % ( + file_path, fix.line_number, old_text, fix.new_text, fix.message) + if prev_fix is not None: + if fix.start_byte < prev_fix.end_byte: + if fix.start_byte == prev_fix.start_byte \ + and fix.end_byte == prev_fix.end_byte \ + and fix.new_text == prev_fix.new_text: + # `fix` and `prev_fix` suggest the same change, so we + # don't need to apply `fix`. + continue + # We want to apply fix, but can't because it overlaps with + # `prev_fix`. + print('skipping due to overlap: %s' % desc) + continue + + prev_fix = fix + + print(desc) + if fix.start_byte > pos: + chunks.append(content[pos : fix.start_byte]) + chunks.append(fix.new_text.encode('utf-8')) + pos = fix.end_byte + + if pos < len(content): + chunks.append(content[pos:]) + + new_content = b''.join(chunks) + if not args.dry_run: + open(file_path, 'wb').write(new_content) + print('wrote to %r' % file_path) + else: + print('would write to %r' % file_path) + +if __name__ == '__main__': + main() From 39b22cf2f952fa23c422d370a6fbc3383d4dc3a2 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 15 Aug 2024 17:01:54 -0700 Subject: [PATCH 2/5] analyze: auto_fix_errors.py: handle suggested lifetime bounds --- c2rust-analyze/scripts/auto_fix_errors.py | 109 +++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/c2rust-analyze/scripts/auto_fix_errors.py b/c2rust-analyze/scripts/auto_fix_errors.py index 8a045a8e3b..d0370479c2 100644 --- a/c2rust-analyze/scripts/auto_fix_errors.py +++ b/c2rust-analyze/scripts/auto_fix_errors.py @@ -1,6 +1,7 @@ import argparse from dataclasses import dataclass import json +import re import sys def parse_args() -> argparse.Namespace: @@ -21,6 +22,23 @@ class Fix: new_text: str message: str +@dataclass(frozen=True) +class LifetimeBound: + file_path: str + line_number: int + # Byte offset of the start of the lifetime parameter declaration. + start_byte: int + # Byte offset of the end of the lifetime parameter declaration. + end_byte: int + # The lifetime to use in the new bound. If `'a: 'b` is the suggested + # bound, then `start/end_byte` points to the declaration of `'a`, and + # `bound_lifetime` is the string `"'b"`. + bound_lifetime: str + +LIFETIME_DEFINED_RE = re.compile(r'^lifetime `([^`]*)` defined here$') +CONSIDER_ADDING_BOUND_RE = re.compile(r'^consider adding the following bound: `([^`:]*): ([^`]*)`$') +SPACE_COLON_RE = re.compile(rb'\s*:') + def main(): args = parse_args() @@ -42,6 +60,50 @@ def gather_fixes(j, message): for child in j['children']: gather_fixes(child, message) + lifetime_bounds = [] + def gather_lifetime_bounds(j): + # We look for a particular pattern seen in lifetime errors. First, + # there should be a span with label "lifetime 'a defined here" pointing + # at the declaration of `'a`. Second, there should be a child of type + # `help` with the text "consider adding the following bound: `'a: 'b`". + + decl_spans = {} + for span in j['spans']: + m = LIFETIME_DEFINED_RE.match(span['label']) + if m is not None: + lifetime = m.group(1) + if lifetime in decl_spans: + # Duplicate declaration for this lifetime. This shouldn't + # happen, but we can proceed as long as the lifetime isn't + # the target of the bound. We mark the duplicate lifetime + # so it can't be used as the target. + decl_spans[lifetime] = None + continue + decl_spans[lifetime] = span + + for child in j['children']: + if child['level'] != 'help': + continue + m = CONSIDER_ADDING_BOUND_RE.match(child['message']) + if m is None: + continue + lifetime_a = m.group(1) + lifetime_b = m.group(2) + span = decl_spans.get(lifetime_a) + if span is None: + # We don't have anywhere to insert the new bound. This can + # also happen if there were duplicate declaration spans for + # this lifetime (we explicitly insert `None` into the map in + # that case). + continue + lifetime_bounds.append(LifetimeBound( + file_path=span['file_name'], + line_number=span['line_start'], + start_byte=span['byte_start'], + end_byte=span['byte_end'], + bound_lifetime=lifetime_b, + )) + with open(args.path, 'r') as f: for line in f: j = json.loads(line) @@ -58,6 +120,51 @@ def gather_fixes(j, message): gather_fixes(j, j['message']) + if j['message'] == 'lifetime may not live long enough': + gather_lifetime_bounds(j) + + # Convert suggested lifetime bounds to fixes. We have to group the bounds + # first because there may be multiple suggested bounds for a single + # declaration, in which case we want to generate a single `Fix` that adds + # all of them at once. + + # Maps the `(file_path, line_number, start_byte, end_byte)` of the + # declaration site to the set of new lifetimes to apply at that site. + grouped_lifetime_bounds = {} + for lb in lifetime_bounds: + key = (lb.file_path, lb.line_number, lb.start_byte, lb.end_byte) + if key not in grouped_lifetime_bounds: + grouped_lifetime_bounds[key] = set() + grouped_lifetime_bounds[key].add(lb.bound_lifetime) + + file_content = {} + def read_file(file_path): + if file_path not in file_content: + file_content[file_path] = open(file_path, 'rb').read() + return file_content[file_path] + + for key, bound_lifetimes in sorted(grouped_lifetime_bounds.items()): + (file_path, line_number, start_byte, end_byte) = key + content = read_file(file_path) + decl_lifetime = content[start_byte : end_byte].decode('utf-8') + bound_lifetimes = ' + '.join(bound_lifetimes) + m = SPACE_COLON_RE.match(content, end_byte) + if m is None: + fix_end_byte = end_byte + fix_new_text = '%s: %s' % (decl_lifetime, bound_lifetimes) + else: + space_colon = m.group().decode('utf-8') + fix_end_byte = m.end() + fix_new_text = '%s%s %s +' % (decl_lifetime, space_colon, bound_lifetimes) + fixes.append(Fix( + file_path=file_path, + line_number=line_number, + start_byte=start_byte, + end_byte=fix_end_byte, + new_text=fix_new_text, + message='lifetime may not live long enough', + )) + fixes_by_file = {} for fix in fixes: file_fixes = fixes_by_file.get(fix.file_path) @@ -70,7 +177,7 @@ def gather_fixes(j, message): # Apply fixes for file_path, file_fixes in sorted(fixes_by_file.items()): - content = open(file_path, 'rb').read() + content = read_file(file_path) chunks = [] pos = 0 prev_fix = None From 2a24483e74c25d0e6dfb1795fe1f07009868e4d6 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 21 Oct 2024 16:45:21 -0700 Subject: [PATCH 3/5] auto_fix_errors.py: remove `Copy` from types that don't support it --- .gitignore | 1 + c2rust-analyze/.gitignore | 1 + c2rust-analyze/scripts/auto_fix_errors.py | 63 +++++++++++++- c2rust-analyze/tests/auto_fix_errors.rs | 82 +++++++++++++++++++ .../tests/auto_fix_errors/derive_copy.rs | 49 +++++++++++ 5 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 c2rust-analyze/tests/auto_fix_errors.rs create mode 100644 c2rust-analyze/tests/auto_fix_errors/derive_copy.rs diff --git a/.gitignore b/.gitignore index 9a1f3c0bfd..ff89bde6b0 100644 --- a/.gitignore +++ b/.gitignore @@ -49,5 +49,6 @@ marks.*.json # Outputs of `c2rust-analyze` inspect/ *.analysis.txt +*.rs.fixed analysis.txt polonius_cache/ diff --git a/c2rust-analyze/.gitignore b/c2rust-analyze/.gitignore index c0dc9bcc98..6dab7813d9 100644 --- a/c2rust-analyze/.gitignore +++ b/c2rust-analyze/.gitignore @@ -1,2 +1,3 @@ /inspect/ *.rlib +/tests/auto_fix_errors/*.json diff --git a/c2rust-analyze/scripts/auto_fix_errors.py b/c2rust-analyze/scripts/auto_fix_errors.py index d0370479c2..544be27d53 100644 --- a/c2rust-analyze/scripts/auto_fix_errors.py +++ b/c2rust-analyze/scripts/auto_fix_errors.py @@ -35,9 +35,22 @@ class LifetimeBound: # `bound_lifetime` is the string `"'b"`. bound_lifetime: str +@dataclass(frozen=True) +class RemoveDeriveCopy: + file_path: str + line_number: int + # Byte offset of the start of the token `Copy`. + start_byte: int + # Byte offset of the end of the token `Copy`. + end_byte: int + +MSG_LIFETIME_BOUND = 'lifetime may not live long enough' +MSG_DERIVE_COPY = 'the trait `Copy` may not be implemented for this type' + LIFETIME_DEFINED_RE = re.compile(r'^lifetime `([^`]*)` defined here$') CONSIDER_ADDING_BOUND_RE = re.compile(r'^consider adding the following bound: `([^`:]*): ([^`]*)`$') SPACE_COLON_RE = re.compile(rb'\s*:') +SPACE_COMMA_RE = re.compile(rb'\s*,') def main(): args = parse_args() @@ -104,6 +117,25 @@ def gather_lifetime_bounds(j): bound_lifetime=lifetime_b, )) + remove_derive_copies = [] + def handle_derive_copy(j): + # Find the "primary span", which covers the `Copy` token. + primary_span = None + for span in j['spans']: + if span.get('is_primary'): + primary_span = span + break + + if primary_span is None: + return + + remove_derive_copies.append(RemoveDeriveCopy( + file_path=primary_span['file_name'], + line_number=primary_span['line_start'], + start_byte=primary_span['byte_start'], + end_byte=primary_span['byte_end'], + )) + with open(args.path, 'r') as f: for line in f: j = json.loads(line) @@ -120,8 +152,10 @@ def gather_lifetime_bounds(j): gather_fixes(j, j['message']) - if j['message'] == 'lifetime may not live long enough': + if j['message'] == MSG_LIFETIME_BOUND: gather_lifetime_bounds(j) + elif j['message'] == MSG_DERIVE_COPY: + handle_derive_copy(j) # Convert suggested lifetime bounds to fixes. We have to group the bounds # first because there may be multiple suggested bounds for a single @@ -162,9 +196,34 @@ def read_file(file_path): start_byte=start_byte, end_byte=fix_end_byte, new_text=fix_new_text, - message='lifetime may not live long enough', + message=MSG_LIFETIME_BOUND, + )) + + # Convert errors about `#[derive(Copy)]` to fixes. + for rm in remove_derive_copies: + # The error span points to the `Copy` token, but we actually want to + # remove both `Copy` and the following comma, if one is present. + # + # #[derive(Foo, Copy, Bar)] -> #[derive(Foo, Bar)] + # #[derive(Foo, Copy)] -> #[derive(Foo,)] + # #[derive(Copy, Bar)] -> #[derive(Bar)] + # #[derive(Copy)] -> #[derive()] + # + # Note that `#[derive()]` is legal, though ideally it should be removed + # by a later cleanup pass. + content = read_file(rm.file_path) + m = SPACE_COMMA_RE.match(content, rm.end_byte) + fix_end_byte = m.end() if m is not None else rm.end_byte + fixes.append(Fix( + file_path=rm.file_path, + line_number=rm.line_number, + start_byte=rm.start_byte, + end_byte=fix_end_byte, + new_text='', + message=MSG_DERIVE_COPY, )) + # Group fixes by file fixes_by_file = {} for fix in fixes: file_fixes = fixes_by_file.get(fix.file_path) diff --git a/c2rust-analyze/tests/auto_fix_errors.rs b/c2rust-analyze/tests/auto_fix_errors.rs new file mode 100644 index 0000000000..410d9d21c2 --- /dev/null +++ b/c2rust-analyze/tests/auto_fix_errors.rs @@ -0,0 +1,82 @@ +pub mod common; + +use crate::common::{check_for_missing_tests_for, test_dir_for}; +use std::fs::{self, File}; +use std::process::Command; + +#[test] +fn check_for_missing_tests() { + check_for_missing_tests_for(file!()); +} + +fn test(file_name: &str) { + let test_dir = test_dir_for(file!(), true); + let path = test_dir.join(file_name); + let fixed_path = path.with_extension("rs.fixed"); + let json_path = path.with_extension("json"); + let script_path = test_dir.join("../../scripts/auto_fix_errors.py"); + + fs::copy(&path, &fixed_path).unwrap(); + + // Run with `--error-format json` to produce JSON input for the script. + let mut cmd = Command::new("rustc"); + cmd.arg("-A") + .arg("warnings") + .arg("--crate-name") + .arg(path.file_stem().unwrap()) + .arg("--crate-type") + .arg("rlib") + .arg("--error-format") + .arg("json") + .arg(&fixed_path) + .stderr(File::create(&json_path).unwrap()); + let status = cmd.status().unwrap(); + assert_eq!( + status.code(), + Some(1), + "command {cmd:?} exited with code {status:?}" + ); + + // Run the script to fix errors. + let mut cmd = Command::new("python3"); + cmd.arg(&script_path).arg(&json_path); + let status = cmd.status().unwrap(); + assert!( + status.success(), + "command {cmd:?} exited with code {status:?}" + ); + + // There should be no more compile errors now. + let mut cmd = Command::new("rustc"); + cmd.arg("-A") + .arg("warnings") + .arg("--crate-name") + .arg(path.file_stem().unwrap()) + .arg("--crate-type") + .arg("rlib") + .arg(&fixed_path); + let status = cmd.status().unwrap(); + assert!( + status.success(), + "command {cmd:?} exited with code {status:?}" + ); +} + +macro_rules! define_test { + ($name:ident) => { + #[test] + fn $name() { + test(concat!(stringify!($name), ".rs")); + } + }; +} + +macro_rules! define_tests { + ($($name:ident,)*) => { + $(define_test! { $name })* + } +} + +define_tests! { + derive_copy, +} diff --git a/c2rust-analyze/tests/auto_fix_errors/derive_copy.rs b/c2rust-analyze/tests/auto_fix_errors/derive_copy.rs new file mode 100644 index 0000000000..9f2c20c925 --- /dev/null +++ b/c2rust-analyze/tests/auto_fix_errors/derive_copy.rs @@ -0,0 +1,49 @@ +#[derive(Clone, Copy)] +struct S1 { + x: Box, + y: &'static i32, +} + +#[derive(Clone, Copy,)] +struct S2 { + x: Box, + y: &'static i32, +} + + +#[derive(Copy, Clone)] +struct S3 { + x: Box, + y: &'static i32, +} + + +#[derive(Copy, Clone,)] +struct S4 { + x: Box, + y: &'static i32, +} + + +#[derive(Copy)] +struct S5 { + x: Box, + y: &'static i32, +} + +#[derive(Copy,)] +struct S6 { + x: Box, + y: &'static i32, +} + + +#[derive( + Copy + , + Clone +)] +struct S7 { + x: Box, + y: &'static i32, +} From ba9bad90e8b8a1ef12c408de685b47246b5d78ba Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 2 Dec 2024 11:25:24 -0800 Subject: [PATCH 4/5] auto_fix_errors.py: make compatible with python < 3.7 --- c2rust-analyze/scripts/auto_fix_errors.py | 51 +++++++++++++---------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/c2rust-analyze/scripts/auto_fix_errors.py b/c2rust-analyze/scripts/auto_fix_errors.py index 544be27d53..e31a9a2497 100644 --- a/c2rust-analyze/scripts/auto_fix_errors.py +++ b/c2rust-analyze/scripts/auto_fix_errors.py @@ -1,5 +1,5 @@ import argparse -from dataclasses import dataclass +from collections import namedtuple import json import re import sys @@ -13,36 +13,43 @@ def parse_args() -> argparse.Namespace: help='output of `rustc --error-format json`') return parser.parse_args() -@dataclass(frozen=True) -class Fix: - file_path: str - line_number: int - start_byte: int - end_byte: int - new_text: str - message: str +Fix = namedtuple('Fix', ( + # Path to the file to modify. + 'file_path', + # Line number where the fix will be applied. This is used for debug output + # only. + 'line_number', + # Start of the byte range to replace. + 'start_byte', + # End (exclusive) of the byte range to replace. + 'end_byte', + # Replacement text (as a `str`, not `bytes`). + 'new_text', + # The original error message. This is used for debug output only. + 'message', +)) -@dataclass(frozen=True) -class LifetimeBound: - file_path: str - line_number: int +LifetimeBound = namedtuple('LifetimeBound', ( + 'file_path', + 'line_number', # Byte offset of the start of the lifetime parameter declaration. - start_byte: int + 'start_byte', # Byte offset of the end of the lifetime parameter declaration. - end_byte: int + 'end_byte', # The lifetime to use in the new bound. If `'a: 'b` is the suggested # bound, then `start/end_byte` points to the declaration of `'a`, and # `bound_lifetime` is the string `"'b"`. - bound_lifetime: str + 'bound_lifetime', +)) -@dataclass(frozen=True) -class RemoveDeriveCopy: - file_path: str - line_number: int +RemoveDeriveCopy = namedtuple('RemoveDeriveCopy', ( + 'file_path', + 'line_number', # Byte offset of the start of the token `Copy`. - start_byte: int + 'start_byte', # Byte offset of the end of the token `Copy`. - end_byte: int + 'end_byte', +)) MSG_LIFETIME_BOUND = 'lifetime may not live long enough' MSG_DERIVE_COPY = 'the trait `Copy` may not be implemented for this type' From 7098d9e8a732db4cf7020bb48764e16df00c5a07 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 3 Dec 2024 11:32:52 -0800 Subject: [PATCH 5/5] auto_fix_errors.py: simplify code using defaultdict --- c2rust-analyze/scripts/auto_fix_errors.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/c2rust-analyze/scripts/auto_fix_errors.py b/c2rust-analyze/scripts/auto_fix_errors.py index e31a9a2497..c622faf207 100644 --- a/c2rust-analyze/scripts/auto_fix_errors.py +++ b/c2rust-analyze/scripts/auto_fix_errors.py @@ -1,5 +1,5 @@ import argparse -from collections import namedtuple +from collections import defaultdict, namedtuple import json import re import sys @@ -231,13 +231,10 @@ def read_file(file_path): )) # Group fixes by file - fixes_by_file = {} + fixes_by_file = defaultdict(list) for fix in fixes: - file_fixes = fixes_by_file.get(fix.file_path) - if file_fixes is None: - fixes_by_file[fix.file_path] = [fix] - else: - file_fixes.append(fix) + fixes_by_file[fix.file_path].append(fix) + fixes_by_file = dict(fixes_by_file) for file_fixes in fixes_by_file.values(): file_fixes.sort(key=lambda fix: fix.start_byte)