From 859e3fc7faaf293097ca1b929004f02089b6d43d Mon Sep 17 00:00:00 2001 From: Aleksei Latyshev Date: Tue, 2 Apr 2024 21:29:42 +0200 Subject: [PATCH] [`refurb`] Implement `int-on-sliced-str` (`FURB166`) (#10650) ## Summary implement int_on_sliced_str (FURB166) lint - #1348 - [original lint](https://github.com/dosisod/refurb/blob/master/refurb/checks/builtin/use_int_base_zero.py) ## Test Plan cargo test --- .../resources/test/fixtures/refurb/FURB166.py | 29 ++++ .../src/checkers/ast/analyze/expression.rs | 3 + crates/ruff_linter/src/codes.rs | 1 + crates/ruff_linter/src/rules/refurb/mod.rs | 1 + .../rules/refurb/rules/int_on_sliced_str.rs | 134 +++++++++++++++++ .../ruff_linter/src/rules/refurb/rules/mod.rs | 2 + ...es__refurb__tests__FURB166_FURB166.py.snap | 141 ++++++++++++++++++ ruff.schema.json | 1 + 8 files changed, 312 insertions(+) create mode 100644 crates/ruff_linter/resources/test/fixtures/refurb/FURB166.py create mode 100644 crates/ruff_linter/src/rules/refurb/rules/int_on_sliced_str.rs create mode 100644 crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB166_FURB166.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB166.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB166.py new file mode 100644 index 0000000000000..1c4764787fb06 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/refurb/FURB166.py @@ -0,0 +1,29 @@ +# Errors + +_ = int("0b1010"[2:], 2) +_ = int("0o777"[2:], 8) +_ = int("0xFFFF"[2:], 16) + +b = "0b11" +_ = int(b[2:], 2) + +_ = int("0xFFFF"[2:], base=16) + +_ = int(b"0xFFFF"[2:], 16) + + +def get_str(): + return "0xFFF" + + +_ = int(get_str()[2:], 16) + +# OK + +_ = int("0b1100", 0) +_ = int("123", 3) +_ = int("123", 10) +_ = int("0b1010"[3:], 2) +_ = int("0b1010"[:2], 2) +_ = int("12345"[2:]) +_ = int("12345"[2:], xyz=1) # type: ignore diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index f7fb21373d889..5a64941417858 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -973,6 +973,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::UnnecessaryIterableAllocationForFirstElement) { ruff::rules::unnecessary_iterable_allocation_for_first_element(checker, expr); } + if checker.enabled(Rule::IntOnSlicedStr) { + refurb::rules::int_on_sliced_str(checker, call); + } } Expr::Dict(dict) => { if checker.any_enabled(&[ diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index a6db70d943302..60950ee89288c 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -1053,6 +1053,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Refurb, "161") => (RuleGroup::Preview, rules::refurb::rules::BitCount), (Refurb, "163") => (RuleGroup::Preview, rules::refurb::rules::RedundantLogBase), (Refurb, "164") => (RuleGroup::Preview, rules::refurb::rules::UnnecessaryFromFloat), + (Refurb, "166") => (RuleGroup::Preview, rules::refurb::rules::IntOnSlicedStr), (Refurb, "167") => (RuleGroup::Preview, rules::refurb::rules::RegexFlagAlias), (Refurb, "168") => (RuleGroup::Preview, rules::refurb::rules::IsinstanceTypeNone), (Refurb, "169") => (RuleGroup::Preview, rules::refurb::rules::TypeNoneComparison), diff --git a/crates/ruff_linter/src/rules/refurb/mod.rs b/crates/ruff_linter/src/rules/refurb/mod.rs index 67e1022600426..f27f2291d1135 100644 --- a/crates/ruff_linter/src/rules/refurb/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/mod.rs @@ -32,6 +32,7 @@ mod tests { #[test_case(Rule::ImplicitCwd, Path::new("FURB177.py"))] #[test_case(Rule::SingleItemMembershipTest, Path::new("FURB171.py"))] #[test_case(Rule::BitCount, Path::new("FURB161.py"))] + #[test_case(Rule::IntOnSlicedStr, Path::new("FURB166.py"))] #[test_case(Rule::RegexFlagAlias, Path::new("FURB167.py"))] #[test_case(Rule::IsinstanceTypeNone, Path::new("FURB168.py"))] #[test_case(Rule::TypeNoneComparison, Path::new("FURB169.py"))] diff --git a/crates/ruff_linter/src/rules/refurb/rules/int_on_sliced_str.rs b/crates/ruff_linter/src/rules/refurb/rules/int_on_sliced_str.rs new file mode 100644 index 0000000000000..33af2c5ae1d9b --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/rules/int_on_sliced_str.rs @@ -0,0 +1,134 @@ +use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::{Expr, ExprCall, Identifier}; +use ruff_text_size::Ranged; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for uses of `int` with an explicit base in which a string expression +/// is stripped of its leading prefix (i.e., `0b`, `0o`, or `0x`). +/// +/// ## Why is this bad? +/// Given an integer string with a prefix (e.g., `0xABC`), Python can automatically +/// determine the base of the integer by the prefix without needing to specify +/// it explicitly. +/// +/// Instead of `int(num[2:], 16)`, use `int(num, 0)`, which will automatically +/// deduce the base based on the prefix. +/// +/// ## Example +/// ```python +/// num = "0xABC" +/// +/// if num.startswith("0b"): +/// i = int(num[2:], 2) +/// elif num.startswith("0o"): +/// i = int(num[2:], 8) +/// elif num.startswith("0x"): +/// i = int(num[2:], 16) +/// +/// print(i) +/// ``` +/// +/// Use instead: +/// ```python +/// num = "0xABC" +/// +/// i = int(num, 0) +/// +/// print(i) +/// ``` +/// +/// ## Fix safety +/// The rule's fix is marked as unsafe, as Ruff cannot guarantee that the +/// argument to `int` will remain valid when its base is included in the +/// function call. +/// +/// ## References +/// - [Python documentation: `int`](https://docs.python.org/3/library/functions.html#int) +#[violation] +pub struct IntOnSlicedStr { + base: u8, +} + +impl AlwaysFixableViolation for IntOnSlicedStr { + #[derive_message_formats] + fn message(&self) -> String { + let IntOnSlicedStr { base } = self; + format!("Use of `int` with explicit `base={base}` after removing prefix") + } + + fn fix_title(&self) -> String { + format!("Replace with `base=0`") + } +} + +pub(crate) fn int_on_sliced_str(checker: &mut Checker, call: &ExprCall) { + // Verify that the function is `int`. + let Expr::Name(name) = call.func.as_ref() else { + return; + }; + if name.id.as_str() != "int" { + return; + } + if !checker.semantic().is_builtin("int") { + return; + } + + // There must be exactly two arguments (e.g., `int(num[2:], 16)`). + let (expression, base) = match ( + call.arguments.args.as_ref(), + call.arguments.keywords.as_ref(), + ) { + ([expression], [base]) if base.arg.as_ref().map(Identifier::as_str) == Some("base") => { + (expression, &base.value) + } + ([expression, base], []) => (expression, base), + _ => { + return; + } + }; + + // The base must be a number literal with a value of 2, 8, or 16. + let Some(base_u8) = base + .as_number_literal_expr() + .and_then(|base| base.value.as_int()) + .and_then(ruff_python_ast::Int::as_u8) + else { + return; + }; + if !matches!(base_u8, 2 | 8 | 16) { + return; + } + + // Determine whether the expression is a slice of a string (e.g., `num[2:]`). + let Expr::Subscript(expr_subscript) = expression else { + return; + }; + let Expr::Slice(expr_slice) = expr_subscript.slice.as_ref() else { + return; + }; + if expr_slice.upper.is_some() || expr_slice.step.is_some() { + return; + } + if !expr_slice + .lower + .as_ref() + .and_then(|expr| expr.as_number_literal_expr()) + .and_then(|expr| expr.value.as_int()) + .is_some_and(|expr| expr.as_u8() == Some(2)) + { + return; + } + + let mut diagnostic = Diagnostic::new(IntOnSlicedStr { base: base_u8 }, call.range()); + diagnostic.set_fix(Fix::unsafe_edits( + Edit::range_replacement( + checker.locator().slice(&*expr_subscript.value).to_string(), + expression.range(), + ), + [Edit::range_replacement("0".to_string(), base.range())], + )); + checker.diagnostics.push(diagnostic); +} diff --git a/crates/ruff_linter/src/rules/refurb/rules/mod.rs b/crates/ruff_linter/src/rules/refurb/rules/mod.rs index cbb8d1fc7f0bb..c18f2350ecd4d 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/mod.rs @@ -5,6 +5,7 @@ pub(crate) use for_loop_set_mutations::*; pub(crate) use hashlib_digest_hex::*; pub(crate) use if_expr_min_max::*; pub(crate) use implicit_cwd::*; +pub(crate) use int_on_sliced_str::*; pub(crate) use isinstance_type_none::*; pub(crate) use list_reverse_copy::*; pub(crate) use math_constant::*; @@ -31,6 +32,7 @@ mod for_loop_set_mutations; mod hashlib_digest_hex; mod if_expr_min_max; mod implicit_cwd; +mod int_on_sliced_str; mod isinstance_type_none; mod list_reverse_copy; mod math_constant; diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB166_FURB166.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB166_FURB166.py.snap new file mode 100644 index 0000000000000..c78dbdb22cad2 --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB166_FURB166.py.snap @@ -0,0 +1,141 @@ +--- +source: crates/ruff_linter/src/rules/refurb/mod.rs +--- +FURB166.py:3:5: FURB166 [*] Use of `int` with explicit `base=2` after removing prefix + | +1 | # Errors +2 | +3 | _ = int("0b1010"[2:], 2) + | ^^^^^^^^^^^^^^^^^^^^ FURB166 +4 | _ = int("0o777"[2:], 8) +5 | _ = int("0xFFFF"[2:], 16) + | + = help: Replace with `base=0` + +ℹ Unsafe fix +1 1 | # Errors +2 2 | +3 |-_ = int("0b1010"[2:], 2) + 3 |+_ = int("0b1010", 0) +4 4 | _ = int("0o777"[2:], 8) +5 5 | _ = int("0xFFFF"[2:], 16) +6 6 | + +FURB166.py:4:5: FURB166 [*] Use of `int` with explicit `base=8` after removing prefix + | +3 | _ = int("0b1010"[2:], 2) +4 | _ = int("0o777"[2:], 8) + | ^^^^^^^^^^^^^^^^^^^ FURB166 +5 | _ = int("0xFFFF"[2:], 16) + | + = help: Replace with `base=0` + +ℹ Unsafe fix +1 1 | # Errors +2 2 | +3 3 | _ = int("0b1010"[2:], 2) +4 |-_ = int("0o777"[2:], 8) + 4 |+_ = int("0o777", 0) +5 5 | _ = int("0xFFFF"[2:], 16) +6 6 | +7 7 | b = "0b11" + +FURB166.py:5:5: FURB166 [*] Use of `int` with explicit `base=16` after removing prefix + | +3 | _ = int("0b1010"[2:], 2) +4 | _ = int("0o777"[2:], 8) +5 | _ = int("0xFFFF"[2:], 16) + | ^^^^^^^^^^^^^^^^^^^^^ FURB166 +6 | +7 | b = "0b11" + | + = help: Replace with `base=0` + +ℹ Unsafe fix +2 2 | +3 3 | _ = int("0b1010"[2:], 2) +4 4 | _ = int("0o777"[2:], 8) +5 |-_ = int("0xFFFF"[2:], 16) + 5 |+_ = int("0xFFFF", 0) +6 6 | +7 7 | b = "0b11" +8 8 | _ = int(b[2:], 2) + +FURB166.py:8:5: FURB166 [*] Use of `int` with explicit `base=2` after removing prefix + | + 7 | b = "0b11" + 8 | _ = int(b[2:], 2) + | ^^^^^^^^^^^^^ FURB166 + 9 | +10 | _ = int("0xFFFF"[2:], base=16) + | + = help: Replace with `base=0` + +ℹ Unsafe fix +5 5 | _ = int("0xFFFF"[2:], 16) +6 6 | +7 7 | b = "0b11" +8 |-_ = int(b[2:], 2) + 8 |+_ = int(b, 0) +9 9 | +10 10 | _ = int("0xFFFF"[2:], base=16) +11 11 | + +FURB166.py:10:5: FURB166 [*] Use of `int` with explicit `base=16` after removing prefix + | + 8 | _ = int(b[2:], 2) + 9 | +10 | _ = int("0xFFFF"[2:], base=16) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB166 +11 | +12 | _ = int(b"0xFFFF"[2:], 16) + | + = help: Replace with `base=0` + +ℹ Unsafe fix +7 7 | b = "0b11" +8 8 | _ = int(b[2:], 2) +9 9 | +10 |-_ = int("0xFFFF"[2:], base=16) + 10 |+_ = int("0xFFFF", base=0) +11 11 | +12 12 | _ = int(b"0xFFFF"[2:], 16) +13 13 | + +FURB166.py:12:5: FURB166 [*] Use of `int` with explicit `base=16` after removing prefix + | +10 | _ = int("0xFFFF"[2:], base=16) +11 | +12 | _ = int(b"0xFFFF"[2:], 16) + | ^^^^^^^^^^^^^^^^^^^^^^ FURB166 + | + = help: Replace with `base=0` + +ℹ Unsafe fix +9 9 | +10 10 | _ = int("0xFFFF"[2:], base=16) +11 11 | +12 |-_ = int(b"0xFFFF"[2:], 16) + 12 |+_ = int(b"0xFFFF", 0) +13 13 | +14 14 | +15 15 | def get_str(): + +FURB166.py:19:5: FURB166 [*] Use of `int` with explicit `base=16` after removing prefix + | +19 | _ = int(get_str()[2:], 16) + | ^^^^^^^^^^^^^^^^^^^^^^ FURB166 +20 | +21 | # OK + | + = help: Replace with `base=0` + +ℹ Unsafe fix +16 16 | return "0xFFF" +17 17 | +18 18 | +19 |-_ = int(get_str()[2:], 16) + 19 |+_ = int(get_str(), 0) +20 20 | +21 21 | # OK +22 22 | diff --git a/ruff.schema.json b/ruff.schema.json index 1c26154c2d19f..f6d36e1cfc51a 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3063,6 +3063,7 @@ "FURB161", "FURB163", "FURB164", + "FURB166", "FURB167", "FURB168", "FURB169",