From 1ed5e0337b2936d34970bc5633254cf60bd99fc5 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Thu, 4 Apr 2024 16:01:41 -0400 Subject: [PATCH] Prevent shadowing variables in loops --- .../assignment/array_assignment_analyzer.rs | 1 + .../expr/binop/assignment_analyzer.rs | 24 ++++---- .../expr/call/function_call_analyzer.rs | 2 +- src/analyzer/expr/call_analyzer.rs | 2 +- src/analyzer/expr/pipe_analyzer.rs | 1 + src/analyzer/expr/variable_fetch_analyzer.rs | 57 +++++++++++++++++++ src/analyzer/functionlike_analyzer.rs | 1 + src/analyzer/scope_context/mod.rs | 3 + src/analyzer/stmt/foreach_analyzer.rs | 3 + src/analyzer/stmt/loop_analyzer.rs | 9 +++ src/analyzer/stmt/try_analyzer.rs | 1 + src/code_info/data_flow/node.rs | 7 ++- src/code_info/issue.rs | 9 ++- src/code_info_builder/lib.rs | 3 +- .../Foreach/overwriteExistingVar/input.hack | 9 +++ .../Foreach/overwriteExistingVar/output.txt | 1 + 16 files changed, 115 insertions(+), 18 deletions(-) create mode 100644 tests/inference/Loop/Foreach/overwriteExistingVar/output.txt diff --git a/src/analyzer/expr/assignment/array_assignment_analyzer.rs b/src/analyzer/expr/assignment/array_assignment_analyzer.rs index 774051e1..827fad9a 100644 --- a/src/analyzer/expr/assignment/array_assignment_analyzer.rs +++ b/src/analyzer/expr/assignment/array_assignment_analyzer.rs @@ -118,6 +118,7 @@ pub(crate) fn analyze( false, false, false, + false, )); } } diff --git a/src/analyzer/expr/binop/assignment_analyzer.rs b/src/analyzer/expr/binop/assignment_analyzer.rs index 31357fc0..6e9d9de1 100644 --- a/src/analyzer/expr/binop/assignment_analyzer.rs +++ b/src/analyzer/expr/binop/assignment_analyzer.rs @@ -201,13 +201,16 @@ pub(crate) fn analyze( origin_node_ids.retain(|id| { if let Some(node) = analysis_data.data_flow_graph.get_node(id) { - match &node.kind { - DataFlowNodeKind::ForLoopInit { - var_name, - start_offset, - end_offset, - } => { - var_name == var_id + match (&id, &node.kind) { + ( + DataFlowNodeId::ForInit(start_offset, end_offset), + DataFlowNodeKind::ForLoopInit { + var_id: for_loop_var_id, + .. + }, + ) => { + for_loop_var_id.0 + == statements_analyzer.get_interner().get(var_id).unwrap() && (pos.start_offset() as u32) > *start_offset && (pos.end_offset() as u32) < *end_offset } @@ -525,6 +528,9 @@ fn analyze_assignment_to_variable( }, has_parent_nodes, assign_value_type.has_awaitable_types(), + context.inside_loop + && !context.inside_assignment_op + && context.for_loop_init_bounds.0 > 0, ) } else { DataFlowNode::get_for_lvar( @@ -563,9 +569,7 @@ fn analyze_assignment_to_variable( let for_node = DataFlowNode { id: DataFlowNodeId::ForInit(start_offset, end_offset), kind: DataFlowNodeKind::ForLoopInit { - start_offset, - end_offset, - var_name: var_id.clone(), + var_id: VarId(statements_analyzer.get_interner().get(var_id).unwrap()), }, }; diff --git a/src/analyzer/expr/call/function_call_analyzer.rs b/src/analyzer/expr/call/function_call_analyzer.rs index b0f5db19..acb1a2d8 100644 --- a/src/analyzer/expr/call/function_call_analyzer.rs +++ b/src/analyzer/expr/call/function_call_analyzer.rs @@ -199,7 +199,7 @@ pub(crate) fn analyze( &functionlike_id, ); - if !function_storage.is_production_code { + if !function_storage.is_production_code && function_storage.user_defined { if let Some(calling_context) = context.function_context.calling_functionlike_id { let is_caller_production = match calling_context { FunctionLikeIdentifier::Function(function_id) => { diff --git a/src/analyzer/expr/call_analyzer.rs b/src/analyzer/expr/call_analyzer.rs index d3b55277..d4f7d59d 100644 --- a/src/analyzer/expr/call_analyzer.rs +++ b/src/analyzer/expr/call_analyzer.rs @@ -414,7 +414,7 @@ pub(crate) fn check_method_args( check_template_result(statements_analyzer, template_result, pos, &functionlike_id); } - if !functionlike_storage.is_production_code { + if !functionlike_storage.is_production_code && functionlike_storage.user_defined { if let Some(calling_context) = context.function_context.calling_functionlike_id { let is_caller_production = match calling_context { FunctionLikeIdentifier::Function(function_id) => { diff --git a/src/analyzer/expr/pipe_analyzer.rs b/src/analyzer/expr/pipe_analyzer.rs index 00f25990..43b88bea 100644 --- a/src/analyzer/expr/pipe_analyzer.rs +++ b/src/analyzer/expr/pipe_analyzer.rs @@ -39,6 +39,7 @@ pub(crate) fn analyze( false, true, false, + false, ); pipe_expr_type.parent_nodes.push(parent_node.clone()); diff --git a/src/analyzer/expr/variable_fetch_analyzer.rs b/src/analyzer/expr/variable_fetch_analyzer.rs index 79e97ad7..092e01c2 100644 --- a/src/analyzer/expr/variable_fetch_analyzer.rs +++ b/src/analyzer/expr/variable_fetch_analyzer.rs @@ -4,6 +4,7 @@ use crate::{ stmt_analyzer::AnalysisError, }; use hakana_reflection_info::{ + code_location::HPos, data_flow::{ graph::GraphKind, node::{DataFlowNode, DataFlowNodeId, DataFlowNodeKind}, @@ -64,6 +65,62 @@ pub(crate) fn analyze( EFFECT_READ_GLOBALS, ); } else if let Some(var_type) = context.vars_in_scope.get(&lid.1 .1) { + if var_type.parent_nodes.len() > 1 + && !context.inside_loop_exprs + && context.for_loop_init_bounds.0 == 0 + && !context.inside_assignment_op + && !lid.1 .1.contains(' ') // eliminate temp vars + && analysis_data.data_flow_graph.kind == GraphKind::FunctionBody + { + let mut loop_init_pos: Option = None; + + for parent_node in &var_type.parent_nodes { + if let DataFlowNodeKind::VariableUseSource { + pos: for_loop_init_pos, + from_loop_init: true, + .. + } = parent_node.kind + { + if let Some(loop_init_pos_inner) = loop_init_pos { + if for_loop_init_pos.start_offset < loop_init_pos_inner.start_offset { + loop_init_pos = Some(for_loop_init_pos); + } + } else { + loop_init_pos = Some(for_loop_init_pos); + } + } + } + + if let Some(loop_init_pos) = loop_init_pos { + for parent_node in &var_type.parent_nodes { + if let DataFlowNodeKind::VariableUseSource { + has_parent_nodes: true, + from_loop_init: false, + pos: parent_node_pos, + .. + } = parent_node.kind + { + if parent_node_pos.start_offset < loop_init_pos.start_offset { + analysis_data.maybe_add_issue( + Issue::new( + IssueKind::ShadowedLoopVar, + format!( + "Assignment to {} overwrites a variable defined above and referenced below", + lid.1 .1 + ), + loop_init_pos, + &context.function_context.calling_functionlike_id, + ), + statements_analyzer.get_config(), + statements_analyzer.get_file_path_actual(), + ); + break; + } + } + } + } + } + let mut var_type = (**var_type).clone(); var_type = add_dataflow_to_variable( diff --git a/src/analyzer/functionlike_analyzer.rs b/src/analyzer/functionlike_analyzer.rs index a020c550..e41214e5 100644 --- a/src/analyzer/functionlike_analyzer.rs +++ b/src/analyzer/functionlike_analyzer.rs @@ -1088,6 +1088,7 @@ impl<'a> FunctionLikeAnalyzer<'a> { pure: false, has_awaitable: param_type.has_awaitable_types(), has_parent_nodes: true, + from_loop_init: false, }, } }; diff --git a/src/analyzer/scope_context/mod.rs b/src/analyzer/scope_context/mod.rs index 6fd45514..6ad7155b 100644 --- a/src/analyzer/scope_context/mod.rs +++ b/src/analyzer/scope_context/mod.rs @@ -142,6 +142,8 @@ pub struct ScopeContext { pub inside_loop: bool, + pub inside_loop_exprs: bool, + /// The current case scope, if we're in a switch pub case_scope: Option, @@ -190,6 +192,7 @@ impl ScopeContext { inside_assignment_op: false, inside_try: false, inside_awaitall: false, + inside_loop_exprs: false, inside_negation: false, has_returned: false, diff --git a/src/analyzer/stmt/foreach_analyzer.rs b/src/analyzer/stmt/foreach_analyzer.rs index 5d442429..0499a28f 100644 --- a/src/analyzer/stmt/foreach_analyzer.rs +++ b/src/analyzer/stmt/foreach_analyzer.rs @@ -104,6 +104,8 @@ pub(crate) fn analyze( let mut foreach_context = context.clone(); + foreach_context.inside_loop_exprs = true; + foreach_context.inside_loop = true; foreach_context.break_types.push(BreakContext::Loop); @@ -143,6 +145,7 @@ pub(crate) fn analyze( )?; foreach_context.for_loop_init_bounds = (0, 0); + foreach_context.inside_loop_exprs = false; loop_analyzer::analyze( statements_analyzer, diff --git a/src/analyzer/stmt/loop_analyzer.rs b/src/analyzer/stmt/loop_analyzer.rs index bc0eaa36..247f0813 100644 --- a/src/analyzer/stmt/loop_analyzer.rs +++ b/src/analyzer/stmt/loop_analyzer.rs @@ -132,6 +132,7 @@ pub(crate) fn analyze<'a>( statements_analyzer, ); + loop_context.inside_loop_exprs = true; for post_expression in post_expressions { expression_analyzer::analyze( statements_analyzer, @@ -141,6 +142,7 @@ pub(crate) fn analyze<'a>( &mut None, )?; } + loop_context.inside_loop_exprs = true; } else { let original_parent_context = loop_parent_context.clone(); @@ -199,6 +201,7 @@ pub(crate) fn analyze<'a>( } } + continue_context.inside_loop_exprs = true; for post_expression in &post_expressions { expression_analyzer::analyze( statements_analyzer, @@ -208,6 +211,7 @@ pub(crate) fn analyze<'a>( &mut None, )?; } + continue_context.inside_loop_exprs = false; let mut recorded_issues = analysis_data.clear_currently_recorded_issues(); analysis_data.stop_recording_issues(); @@ -406,6 +410,7 @@ pub(crate) fn analyze<'a>( } } + continue_context.inside_loop_exprs = true; for post_expression in &post_expressions { expression_analyzer::analyze( statements_analyzer, @@ -415,6 +420,7 @@ pub(crate) fn analyze<'a>( &mut None, )?; } + continue_context.inside_loop_exprs = false; recorded_issues = analysis_data.clear_currently_recorded_issues(); analysis_data.stop_recording_issues(); @@ -704,6 +710,8 @@ fn apply_pre_condition_to_loop_context( loop_context.inside_conditional = true; + loop_context.inside_loop_exprs = true; + expression_analyzer::analyze( statements_analyzer, pre_condition, @@ -714,6 +722,7 @@ fn apply_pre_condition_to_loop_context( add_branch_dataflow(statements_analyzer, pre_condition, analysis_data); + loop_context.inside_loop_exprs = false; loop_context.inside_conditional = false; let mut new_referenced_var_ids = loop_context.cond_referenced_var_ids.clone(); diff --git a/src/analyzer/stmt/try_analyzer.rs b/src/analyzer/stmt/try_analyzer.rs index 6af9ecf5..d97d34ef 100644 --- a/src/analyzer/stmt/try_analyzer.rs +++ b/src/analyzer/stmt/try_analyzer.rs @@ -192,6 +192,7 @@ pub(crate) fn analyze( false, true, false, + false, ) } else { DataFlowNode::get_for_lvar( diff --git a/src/code_info/data_flow/node.rs b/src/code_info/data_flow/node.rs index eba3e07d..b8ebcfee 100644 --- a/src/code_info/data_flow/node.rs +++ b/src/code_info/data_flow/node.rs @@ -387,14 +387,13 @@ pub enum DataFlowNodeKind { pure: bool, has_parent_nodes: bool, has_awaitable: bool, + from_loop_init: bool, }, VariableUseSink { pos: HPos, }, ForLoopInit { - var_name: String, - start_offset: u32, - end_offset: u32, + var_id: VarId, }, DataSource { pos: HPos, @@ -761,6 +760,7 @@ impl DataFlowNode { pure: bool, has_parent_nodes: bool, has_awaitable: bool, + from_loop_init: bool, ) -> Self { Self { id: DataFlowNodeId::Var( @@ -775,6 +775,7 @@ impl DataFlowNode { pure, has_awaitable, has_parent_nodes, + from_loop_init, }, } } diff --git a/src/code_info/issue.rs b/src/code_info/issue.rs index 2a79844d..d0bb0092 100644 --- a/src/code_info/issue.rs +++ b/src/code_info/issue.rs @@ -105,6 +105,7 @@ pub enum IssueKind { RedundantNonnullTypeComparison, RedundantTruthinessCheck, RedundantTypeComparison, + ShadowedLoopVar, TaintedData(Box), TestOnlyCall, UndefinedIntArrayOffset, @@ -217,7 +218,7 @@ impl IssueKind { } } -#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Clone, Debug, Eq, Hash, Serialize, Deserialize)] pub struct Issue { pub kind: IssueKind, pub description: String, @@ -228,6 +229,12 @@ pub struct Issue { pub insertion_start: Option, } +impl PartialEq for Issue { + fn eq(&self, other: &Self) -> bool { + self.kind == other.kind && self.pos == other.pos && self.description == other.description + } +} + impl Issue { pub fn new( kind: IssueKind, diff --git a/src/code_info_builder/lib.rs b/src/code_info_builder/lib.rs index a1556a14..09e20b70 100644 --- a/src/code_info_builder/lib.rs +++ b/src/code_info_builder/lib.rs @@ -16,9 +16,8 @@ use hakana_reflection_info::{ use hakana_reflection_info::{FileSource, GenericParent}; use hakana_str::{StrId, ThreadedInterner}; use hakana_type::{get_bool, get_int, get_mixed_any, get_string}; -use naming_special_names_rust::user_attributes; use no_pos_hash::{position_insensitive_hash, Hasher}; -use oxidized::ast::{FunParam, Tparam, TypeHint, UserAttribute}; +use oxidized::ast::{FunParam, Tparam, TypeHint}; use oxidized::ast_defs::Id; use oxidized::{ aast, diff --git a/tests/inference/Loop/Foreach/overwriteExistingVar/input.hack b/tests/inference/Loop/Foreach/overwriteExistingVar/input.hack index 99d2f242..fd69d940 100644 --- a/tests/inference/Loop/Foreach/overwriteExistingVar/input.hack +++ b/tests/inference/Loop/Foreach/overwriteExistingVar/input.hack @@ -12,3 +12,12 @@ function bar(vec $strs): void { } echo $str; } + +function baz(vec $strs): void { + foreach (vec['a', 'b', 'c'] as $str) { + echo $str; + } + foreach ($strs as $str) { // this is also fine + echo $str; + } +} \ No newline at end of file diff --git a/tests/inference/Loop/Foreach/overwriteExistingVar/output.txt b/tests/inference/Loop/Foreach/overwriteExistingVar/output.txt new file mode 100644 index 00000000..f1900543 --- /dev/null +++ b/tests/inference/Loop/Foreach/overwriteExistingVar/output.txt @@ -0,0 +1 @@ +ERROR: ShadowedLoopVar - input.hack:2:23 - Assignment to $str overwrites a variable defined above and referenced below \ No newline at end of file