From f06eae7c4138000ad44a4d94084d3908f6d505e2 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Mon, 25 Mar 2024 01:11:56 -0400 Subject: [PATCH] Improve taint analysis a little more --- src/analyzer/expr/binop/concat_analyzer.rs | 2 +- src/analyzer/expr/call/arguments_analyzer.rs | 24 ++ .../existing_atomic_method_call_analyzer.rs | 48 +-- .../call/function_call_return_type_fetcher.rs | 378 +++++++++++++----- .../expr/fetch/array_fetch_analyzer.rs | 38 +- src/analyzer/stmt_analyzer.rs | 18 +- src/code_info_builder/lib.rs | 15 +- src/str/build.rs | 150 +++++++ 8 files changed, 525 insertions(+), 148 deletions(-) diff --git a/src/analyzer/expr/binop/concat_analyzer.rs b/src/analyzer/expr/binop/concat_analyzer.rs index 83f8ea51..2fec7f9e 100644 --- a/src/analyzer/expr/binop/concat_analyzer.rs +++ b/src/analyzer/expr/binop/concat_analyzer.rs @@ -145,7 +145,7 @@ pub(crate) fn analyze_concat_nodes( result_type } -fn get_concat_nodes(expr: &aast::Expr<(), ()>) -> Vec<&aast::Expr<(), ()>> { +pub(crate) fn get_concat_nodes(expr: &aast::Expr<(), ()>) -> Vec<&aast::Expr<(), ()>> { match &expr.2 { aast::Expr_::Binop(x) => { let (binop, e1, e2) = (&x.bop, &x.lhs, &x.rhs); diff --git a/src/analyzer/expr/call/arguments_analyzer.rs b/src/analyzer/expr/call/arguments_analyzer.rs index ede54664..2b7f55d9 100644 --- a/src/analyzer/expr/call/arguments_analyzer.rs +++ b/src/analyzer/expr/call/arguments_analyzer.rs @@ -183,6 +183,7 @@ pub(crate) fn check_arguments_match( || matches!(functionlike_info.effects, FnEffect::Arg(_)) || functionlike_info.pure_can_throw || functionlike_info.user_defined + || functionlike_info.method_info.is_some() { context.inside_general_use = true; } @@ -1113,6 +1114,29 @@ fn handle_possibly_matching_inout_param( vec![], vec![], ); + } else if matches!( + functionlike_id, + FunctionLikeIdentifier::Function(StrId::JSON_DECODE_WITH_ERROR) + ) && argument_offset == 1 + { + let argument_node = DataFlowNode::get_for_method_argument( + functionlike_id.to_string(statements_analyzer.get_interner()), + 0, + Some(statements_analyzer.get_hpos(all_args[1].1.pos())), + Some(statements_analyzer.get_hpos(function_call_pos)), + ); + + analysis_data + .data_flow_graph + .add_node(argument_node.clone()); + + analysis_data.data_flow_graph.add_path( + &argument_node, + &out_node, + PathKind::Aggregate, + vec![], + vec![], + ); } analysis_data.data_flow_graph.add_node(out_node); diff --git a/src/analyzer/expr/call/existing_atomic_method_call_analyzer.rs b/src/analyzer/expr/call/existing_atomic_method_call_analyzer.rs index 2c5b582d..37cac51a 100644 --- a/src/analyzer/expr/call/existing_atomic_method_call_analyzer.rs +++ b/src/analyzer/expr/call/existing_atomic_method_call_analyzer.rs @@ -23,7 +23,9 @@ use oxidized::{ }; use rustc_hash::FxHashMap; -use crate::expr::fetch::array_fetch_analyzer::add_array_fetch_dataflow; +use crate::expr::fetch::array_fetch_analyzer::{ + add_array_fetch_dataflow, get_array_access_type_given_offset, +}; use crate::stmt_analyzer::AnalysisError; use crate::{ expr::{ @@ -468,31 +470,31 @@ fn handle_shapes_static_method( .get_rc_expr_type(call_expr.1[1].1.pos()) .cloned(); - let mut expr_type = None; - if let (Some(dict_type), Some(dim_type)) = (dict_type, dim_type) { - for atomic_type in &dict_type.types { - if let TAtomic::TDict { .. } = atomic_type { - let expr_type_inner = handle_array_access_on_dict( - statements_analyzer, - pos, - analysis_data, - context, - atomic_type, - &dim_type, - false, - &mut false, - true, - &mut false, - &mut false, - ); - - expr_type = Some(expr_type_inner); - } - } + let mut expr_type_inner = get_array_access_type_given_offset( + statements_analyzer, + analysis_data, + (&call_expr.1[0].1, Some(&call_expr.1[1].1), pos), + &dict_type, + &dim_type, + false, + &None, + context, + ); + + add_array_fetch_dataflow( + statements_analyzer, + call_expr.1[0].1.pos(), + analysis_data, + None, + &mut expr_type_inner, + &mut (*dim_type).clone(), + ); + + return Some(expr_type_inner); } - return Some(expr_type.unwrap_or(get_mixed_any())); + return Some(get_mixed_any()); } } StrId::TO_DICT | StrId::TO_ARRAY => { diff --git a/src/analyzer/expr/call/function_call_return_type_fetcher.rs b/src/analyzer/expr/call/function_call_return_type_fetcher.rs index 19af4adf..e3b462bb 100644 --- a/src/analyzer/expr/call/function_call_return_type_fetcher.rs +++ b/src/analyzer/expr/call/function_call_return_type_fetcher.rs @@ -25,7 +25,7 @@ use std::collections::BTreeMap; use std::path::Path; use std::sync::Arc; -use crate::expr::binop::concat_analyzer::analyze_concat_nodes; +use crate::expr::binop::concat_analyzer::{analyze_concat_nodes, get_concat_nodes}; use crate::expr::fetch::array_fetch_analyzer::handle_array_access_on_dict; use crate::expr::variable_fetch_analyzer; use crate::function_analysis_data::FunctionAnalysisData; @@ -426,73 +426,41 @@ fn handle_special_functions( None } } - &StrId::LIB_STR_FORMAT => { + &StrId::LIB_STR_FORMAT | &StrId::SPRINTF => { if let Some(first_arg) = args.first() { - if let aast::Expr_::String(simple_string) = &first_arg.1 .2 { - let mut escaped = false; - let mut in_format_string = false; - - let mut literals = vec![]; - - let mut cur_literal = "".to_string(); - - for c in simple_string.iter().copied() { - if in_format_string { - in_format_string = false; - continue; - } - - if !escaped { - if c as char == '%' { - in_format_string = true; - literals.push(aast::Expr( - (), - first_arg.1.pos().clone(), - aast::Expr_::String(BString::from(cur_literal)), - )); - cur_literal = "".to_string(); - continue; - } + match &first_arg.1 .2 { + aast::Expr_::String(simple_string) => { + return Some(handle_str_format( + simple_string, + first_arg, + args, + statements_analyzer, + analysis_data, + pos, + )); + } + aast::Expr_::Binop(boxed) => { + let mut concat_nodes = get_concat_nodes(&boxed.lhs); + concat_nodes.push(&boxed.rhs); - if c as char == '\\' { - escaped = true; - } + let mut more_complex_string = BString::new(vec![]); - in_format_string = false; - } else { - if c as char == '\\' { - cur_literal += "\\"; - escaped = false; - continue; + for concat_node in concat_nodes { + if let aast::Expr_::String(simple_string) = &concat_node.2 { + more_complex_string.append(&mut simple_string.clone()); } - - escaped = false; } - cur_literal += (c as char).to_string().as_str(); - } - - literals.push(aast::Expr( - (), - first_arg.1.pos().clone(), - aast::Expr_::String(BString::from(cur_literal)), - )); - - let mut concat_args = vec![]; - - for (i, literal) in literals.iter().enumerate() { - concat_args.push(literal); - if let Some(arg) = args.get(i + 1) { - concat_args.push(&arg.1); - } else { - break; - } + return Some(handle_str_format( + &more_complex_string, + first_arg, + args, + statements_analyzer, + analysis_data, + pos, + )); } - - let result_type = - analyze_concat_nodes(concat_args, statements_analyzer, analysis_data, pos); - - return Some(result_type); + _ => (), } } @@ -631,6 +599,76 @@ fn handle_special_functions( } } +fn handle_str_format( + simple_string: &BString, + first_arg: &(ast_defs::ParamKind, aast::Expr<(), ()>), + args: &Vec<(ast_defs::ParamKind, aast::Expr<(), ()>)>, + statements_analyzer: &StatementsAnalyzer<'_>, + analysis_data: &mut FunctionAnalysisData, + pos: &Pos, +) -> TUnion { + let mut escaped = false; + let mut in_format_string = false; + let mut literals = vec![]; + let mut cur_literal = "".to_string(); + + for c in simple_string.iter().copied() { + if in_format_string { + in_format_string = false; + continue; + } + + if !escaped { + if c as char == '%' { + in_format_string = true; + literals.push(aast::Expr( + (), + first_arg.1.pos().clone(), + aast::Expr_::String(BString::from(cur_literal)), + )); + cur_literal = "".to_string(); + continue; + } + + if c as char == '\\' { + escaped = true; + } + + in_format_string = false; + } else { + if c as char == '\\' { + cur_literal += "\\"; + escaped = false; + continue; + } + + escaped = false; + } + + cur_literal += (c as char).to_string().as_str(); + } + + literals.push(aast::Expr( + (), + first_arg.1.pos().clone(), + aast::Expr_::String(BString::from(cur_literal)), + )); + + let mut concat_args = vec![]; + + for (i, literal) in literals.iter().enumerate() { + concat_args.push(literal); + if let Some(arg) = args.get(i + 1) { + concat_args.push(&arg.1); + } else { + break; + } + } + + let result_type = analyze_concat_nodes(concat_args, statements_analyzer, analysis_data, pos); + result_type +} + fn get_type_structure_type( statements_analyzer: &StatementsAnalyzer, first_expr_type: &TUnion, @@ -755,7 +793,17 @@ fn add_dataflow( data_flow_graph.add_node(function_call_node.clone()); - let (param_offsets, variadic_path) = get_special_argument_nodes(functionlike_id, expr); + let (param_offsets, variadic_path) = + if !functionlike_storage.user_defined && (!expr.2.is_empty() || expr.3.is_some()) { + get_special_argument_nodes( + functionlike_id, + expr, + functionlike_storage, + statements_analyzer.get_interner(), + ) + } else { + (vec![], None) + }; let added_removed_taints = if let GraphKind::WholeProgram(_) = &data_flow_graph.kind { get_special_added_removed_taints(functionlike_id, statements_analyzer.get_interner()) @@ -893,6 +941,8 @@ fn get_special_argument_nodes( &Vec<(ast_defs::ParamKind, aast::Expr<(), ()>)>, &Option>, ), + _functionlike_info: &FunctionLikeInfo, + _interner: &Interner, ) -> (Vec<(usize, PathKind)>, Option) { match functionlike_id { FunctionLikeIdentifier::Function(function_name) => match *function_name { @@ -904,9 +954,6 @@ fn get_special_argument_nodes( | StrId::TRIM | StrId::LTRIM | StrId::RTRIM - | StrId::LIB_STR_TRIM - | StrId::LIB_STR_TRIM_LEFT - | StrId::LIB_STR_TRIM_RIGHT | StrId::LIB_STR_LOWERCASE | StrId::LIB_STR_UPPERCASE | StrId::LIB_STR_CAPITALIZE @@ -945,7 +992,6 @@ fn get_special_argument_nodes( | StrId::CHOP | StrId::CONVERT_UUDECODE | StrId::CONVERT_UUENCODE - | StrId::JSON_DECODE | StrId::BASE64_ENCODE | StrId::BASE64_DECODE | StrId::URLENCODE @@ -982,11 +1028,38 @@ fn get_special_argument_nodes( | StrId::IP2LONG | StrId::BIN2HEX | StrId::HEX2BIN - | StrId::ESCAPESHELLARG => (vec![(0, PathKind::Default)], None), - StrId::LIB_REGEX_FIRST_MATCH => (vec![(0, PathKind::Default)], Some(PathKind::Default)), + | StrId::ESCAPESHELLARG + | StrId::FIXME_UNSAFE_CAST + | StrId::LIB_DICT_COUNT_VALUES + | StrId::LIB_DICT_UNIQUE + | StrId::LIB_STR_REVERSE + | StrId::LIB_VEC_CAST_CLEAR_LEGACY_ARRAY_MARK + | StrId::CLASS_METH_GET_CLASS + | StrId::CLASS_METH_GET_METHOD + | StrId::CHR + | StrId::DECBIN + | StrId::DECHEX + | StrId::FB_SERIALIZE + | StrId::HEXDEC + | StrId::LZ4_COMPRESS + | StrId::LZ4_UNCOMPRESS + | StrId::RAWURLDECODE + | StrId::UTF8_DECODE + | StrId::UTF8_ENCODE + | StrId::STREAM_GET_META_DATA + | StrId::DIRNAME => (vec![(0, PathKind::Default)], None), + StrId::LIB_REGEX_FIRST_MATCH + | StrId::LIB_DICT_MERGE + | StrId::ARRAY_MERGE + | StrId::LIB_VEC_CONCAT + | StrId::LIB_KEYSET_UNION + | StrId::PACK + | StrId::UNPACK + | StrId::JSON_DECODE => (vec![(0, PathKind::Default)], Some(PathKind::Default)), StrId::LIB_DICT_SELECT_KEYS | StrId::LIB_VEC_TAKE | StrId::LIB_DICT_TAKE + | StrId::LIB_KEYSET_TAKE | StrId::LIB_STR_SLICE | StrId::LIB_STR_FORMAT_NUMBER | StrId::LIB_DICT_DIFF_BY_KEY @@ -995,6 +1068,8 @@ fn get_special_argument_nodes( | StrId::LIB_VEC_DIFF | StrId::LIB_KEYSET_DIFF | StrId::LIB_KEYSET_INTERSECT + | StrId::LIB_DICT_DROP + | StrId::LIB_KEYSET_DROP | StrId::LIB_VEC_INTERSECT | StrId::LIB_VEC_SLICE | StrId::LIB_VEC_RANGE @@ -1004,9 +1079,26 @@ fn get_special_argument_nodes( | StrId::LIB_STR_STRIP_SUFFIX | StrId::LIB_STR_REPEAT | StrId::SUBSTR - | StrId::LIB_DICT_ASSOCIATE => { - (vec![(0, PathKind::Default)], Some(PathKind::Aggregate)) - } + | StrId::LIB_DICT_ASSOCIATE + | StrId::GZCOMPRESS + | StrId::GZDECODE + | StrId::GZDEFLATE + | StrId::GZUNCOMPRESS + | StrId::JSON_DECODE_WITH_ERROR + | StrId::LIB__PRIVATE_REGEX_MATCH + | StrId::LIB_STR_TRIM + | StrId::LIB_STR_TRIM_LEFT + | StrId::LIB_STR_TRIM_RIGHT + | StrId::BASENAME => (vec![(0, PathKind::Default)], Some(PathKind::Aggregate)), + StrId::LIB_STR_SLICE_L => ( + vec![ + (0, PathKind::Aggregate), + (1, PathKind::Default), + (1, PathKind::Aggregate), + (2, PathKind::Aggregate), + ], + None, + ), StrId::LIB_C_IS_EMPTY | StrId::LIB_C_COUNT | StrId::COUNT @@ -1017,7 +1109,6 @@ fn get_special_argument_nodes( | StrId::LIB_STR_LENGTH | StrId::LIB_VEC_KEYS | StrId::LIB_STR_TO_INT - | StrId::LIB_MATH_ROUND | StrId::LIB_MATH_SUM | StrId::LIB_MATH_SUM_FLOAT | StrId::LIB_MATH_MIN @@ -1039,9 +1130,56 @@ fn get_special_argument_nodes( | StrId::CTYPE_LOWER | StrId::SHA1 | StrId::MD5 - | StrId::DIRNAME + | StrId::NON_CRYPTO_MD5_LOWER + | StrId::NON_CRYPTO_MD5_UPPER | StrId::CRC32 - | StrId::FILTER_VAR => (vec![(0, PathKind::Aggregate)], None), + | StrId::FILTER_VAR + | StrId::LIB_LOCALE_CREATE + | StrId::IS_A + | StrId::IS_BOOL + | StrId::IS_CALLABLE + | StrId::IS_CALLABLE_WITH_NAME + | StrId::IS_FINITE + | StrId::IS_FLOAT + | StrId::IS_INFINITE + | StrId::IS_INT + | StrId::IS_NAN + | StrId::IS_NULL + | StrId::IS_NUMERIC + | StrId::IS_OBJECT + | StrId::IS_RESOURCE + | StrId::IS_SCALAR + | StrId::IS_STRING + | StrId::CTYPE_ALNUM + | StrId::CTYPE_ALPHA + | StrId::CTYPE_DIGIT + | StrId::CTYPE_PUNCT + | StrId::CTYPE_SPACE + | StrId::CTYPE_UPPER + | StrId::CTYPE_XDIGIT + | StrId::IS_DICT + | StrId::IS_VEC + | StrId::IS_ANY_ARRAY + | StrId::IS_DICT_OR_DARRAY + | StrId::IS_VEC_OR_VARRAY + | StrId::ASIN + | StrId::ATAN2 + | StrId::CEIL + | StrId::ABS + | StrId::DEG2RAD + | StrId::FLOOR + | StrId::CLASS_EXISTS + | StrId::LONG2IP + | StrId::RAD2DEG + | StrId::ROUND + | StrId::GETTYPE + | StrId::IS_FUN + | StrId::IS_PHP_ARRAY + | StrId::FUNCTION_EXISTS + | StrId::GET_PARENT_CLASS + | StrId::GET_RESOURCE_TYPE + | StrId::FLOATVAL + | StrId::TYPE_STRUCTURE_FN => (vec![(0, PathKind::Aggregate)], None), StrId::LIB_MATH_ALMOST_EQUALS | StrId::LIB_MATH_BASE_CONVERT | StrId::LIB_MATH_EXP @@ -1056,6 +1194,10 @@ fn get_special_argument_nodes( | StrId::LIB_STR_ENDS_WITH | StrId::LIB_STR_ENDS_WITH_CI | StrId::LIB_STR_SEARCH + | StrId::LIB_STR_SEARCH_L + | StrId::LIB_STR_SEARCH_LAST + | StrId::LIB_STR_SEARCH_LAST_L + | StrId::LIB_STR_SEARCH_CI | StrId::LIB_STR_CONTAINS | StrId::LIB_STR_CONTAINS_CI | StrId::LIB_STR_COMPARE @@ -1066,7 +1208,29 @@ fn get_special_argument_nodes( | StrId::SUBSTR_COUNT | StrId::STRCMP | StrId::STRNATCASECMP - | StrId::LIB_KEYSET_EQUAL => (vec![], Some(PathKind::Aggregate)), + | StrId::LIB_KEYSET_EQUAL + | StrId::LIB_DICT_EQUAL + | StrId::LIB_LEGACY_FIXME_EQ + | StrId::LIB_LEGACY_FIXME_LT + | StrId::LIB_LEGACY_FIXME_NEQ + | StrId::LIB_STR_LENGTH_L + | StrId::IS_SUBCLASS_OF + | StrId::STRIPOS + | StrId::STRLEN + | StrId::STRNATCMP + | StrId::STRNCMP + | StrId::STRRPOS + | StrId::STRSPN + | StrId::LEVENSHTEIN + | StrId::INTDIV + | StrId::STRCASECMP + | StrId::STRCSPN + | StrId::SUBSTR_COMPARE + | StrId::VERSION_COMPARE + | StrId::FMOD + | StrId::POW + | StrId::LIB_MATH_ROUND + | StrId::MB_DETECT_ENCODING => (vec![], Some(PathKind::Aggregate)), StrId::LIB_C_CONTAINS | StrId::LIB_C_CONTAINS_KEY | StrId::IN_ARRAY @@ -1083,6 +1247,15 @@ fn get_special_argument_nodes( ], None, ), + StrId::PREG_MATCH_WITH_MATCHES_AND_ERROR => ( + vec![ + (0, PathKind::Aggregate), + (1, PathKind::Aggregate), + (4, PathKind::Aggregate), + (5, PathKind::Aggregate), + ], + None, + ), StrId::JSON_ENCODE | StrId::SERIALIZE => (vec![(0, PathKind::Serialize)], None), StrId::VAR_DUMP | StrId::PRINTF => { (vec![(0, PathKind::Serialize)], Some(PathKind::Serialize)) @@ -1110,21 +1283,21 @@ fn get_special_argument_nodes( None, ), StrId::PREG_GREP => (vec![(0, PathKind::Aggregate), (1, PathKind::Default)], None), - StrId::LIB_STR_REPLACE_EVERY => ( + StrId::LIB_STR_REPLACE_EVERY | StrId::VSPRINTF | StrId::IMPLODE | StrId::JOIN => ( vec![ (0, PathKind::Default), (1, PathKind::UnknownArrayFetch(ArrayDataKind::ArrayValue)), ], None, ), - StrId::STR_PAD | StrId::LIB_STR_PAD_LEFT | StrId::LIB_STR_PAD_RIGHT | StrId::CHUNK_SPLIT | StrId::LIB_REGEX_REPLACE | StrId::LIB_STR_REPLACE - | StrId::LIB_STR_REPLACE_CI => ( + | StrId::LIB_STR_REPLACE_CI + | StrId::STRTR => ( vec![ (0, PathKind::Default), (1, PathKind::Aggregate), @@ -1132,10 +1305,12 @@ fn get_special_argument_nodes( ], None, ), - StrId::IMPLODE | StrId::JOIN => ( + StrId::LIB_STR_SPLICE => ( vec![ (0, PathKind::Default), - (1, PathKind::UnknownArrayFetch(ArrayDataKind::ArrayValue)), + (1, PathKind::Default), + (2, PathKind::Aggregate), + (3, PathKind::Aggregate), ], None, ), @@ -1149,15 +1324,29 @@ fn get_special_argument_nodes( ], None, ), + StrId::LIB_VEC_FILL | StrId::EXPLODE | StrId::PREG_SPLIT => ( + vec![ + (0, PathKind::Aggregate), + ( + 1, + PathKind::UnknownArrayAssignment(ArrayDataKind::ArrayValue), + ), + ], + None, + ), StrId::HTTP_BUILD_QUERY => ( vec![(0, PathKind::UnknownArrayFetch(ArrayDataKind::ArrayValue))], None, ), - StrId::EXPLODE | StrId::PREG_SPLIT => ( - vec![( - 1, - PathKind::UnknownArrayAssignment(ArrayDataKind::ArrayValue), - )], + StrId::LIB_REGEX_SPLIT => ( + vec![ + ( + 0, + PathKind::UnknownArrayAssignment(ArrayDataKind::ArrayValue), + ), + (1, PathKind::Aggregate), + (2, PathKind::Aggregate), + ], None, ), StrId::LIB_VEC_ZIP => ( @@ -1260,6 +1449,7 @@ fn get_special_argument_nodes( ), StrId::LIB_C_FIRST | StrId::LIB_C_FIRSTX + | StrId::LIB_C_NFIRST | StrId::LIB_C_LAST | StrId::LIB_C_LASTX | StrId::LIB_C_ONLYX @@ -1306,16 +1496,20 @@ fn get_special_argument_nodes( vec![(0, PathKind::UnknownArrayFetch(ArrayDataKind::ArrayKey))], None, ), - StrId::LIB_DICT_MERGE | StrId::LIB_VEC_CONCAT | StrId::LIB_KEYSET_UNION => { - (vec![(0, PathKind::Default)], Some(PathKind::Default)) - } + // handled separately + StrId::LIB_STR_FORMAT | StrId::SPRINTF => (vec![], None), _ => { - // if function_name.starts_with("HH\\Lib\\") - // && !function_name.starts_with("HH\\Lib\\Math\\") + // if !matches!(functionlike_info.effects, FnEffect::Some(_)) + // && !matches!(functionlike_info.effects, FnEffect::Arg(_)) + // && !functionlike_info.pure_can_throw + // && !functionlike_info.user_defined // { - // println!("no taints through {}", function_name); + // println!("{}", functionlike_id.to_string(interner)); // } - (vec![], None) + + // this is a cop-out, but will guarantee false-positives vs false-negatives + // in taint analysis + (vec![], Some(PathKind::Default)) } }, _ => panic!(), diff --git a/src/analyzer/expr/fetch/array_fetch_analyzer.rs b/src/analyzer/expr/fetch/array_fetch_analyzer.rs index a2ff3d71..fee9a5ff 100644 --- a/src/analyzer/expr/fetch/array_fetch_analyzer.rs +++ b/src/analyzer/expr/fetch/array_fetch_analyzer.rs @@ -100,7 +100,7 @@ pub(crate) fn analyze( if let Some(stmt_var_type) = stmt_var_type { // maybe todo handle access on null - let stmt_type = Some(get_array_access_type_given_offset( + let mut stmt_type_inner = get_array_access_type_given_offset( statements_analyzer, analysis_data, (expr.0, expr.1, pos), @@ -109,30 +109,28 @@ pub(crate) fn analyze( false, &extended_var_id, context, - )); + ); - if let Some(mut stmt_type) = stmt_type.clone() { - if let Some(keyed_array_var_id) = &keyed_array_var_id { - let can_store_result = context.inside_assignment || !stmt_var_type.is_mixed(); + if let Some(keyed_array_var_id) = &keyed_array_var_id { + let can_store_result = context.inside_assignment || !stmt_var_type.is_mixed(); - if !context.inside_isset && can_store_result && keyed_array_var_id.contains("[$") { - context - .vars_in_scope - .insert(keyed_array_var_id.clone(), Rc::new(stmt_type.clone())); - } + if !context.inside_isset && can_store_result && keyed_array_var_id.contains("[$") { + context + .vars_in_scope + .insert(keyed_array_var_id.clone(), Rc::new(stmt_type_inner.clone())); } + } - add_array_fetch_dataflow( - statements_analyzer, - expr.0.pos(), - analysis_data, - keyed_array_var_id.clone(), - &mut stmt_type, - &mut used_key_type, - ); + add_array_fetch_dataflow( + statements_analyzer, + expr.0.pos(), + analysis_data, + keyed_array_var_id.clone(), + &mut stmt_type_inner, + &mut used_key_type, + ); - analysis_data.set_expr_type(pos, stmt_type.clone()); - } + analysis_data.set_expr_type(pos, stmt_type_inner.clone()); } if let Some(dim_expr) = expr.1 { diff --git a/src/analyzer/stmt_analyzer.rs b/src/analyzer/stmt_analyzer.rs index 7a7d15bc..c127abc1 100644 --- a/src/analyzer/stmt_analyzer.rs +++ b/src/analyzer/stmt_analyzer.rs @@ -294,17 +294,13 @@ fn detect_unused_statement_expressions( if let Some(functionlike_id) = functionlike_id { match functionlike_id { FunctionLikeIdentifier::Function(function_id) => { - if function_id == StrId::INVARIANT - || function_id == StrId::INVARIANT_VIOLATION - || function_id == StrId::TRIGGER_ERROR - || function_id == StrId::FUNCTION_EXISTS - || function_id == StrId::CLASS_EXISTS - || function_id == StrId::SET_FRAME_METADATA - || function_id == StrId::LIB_C_FIRSTX - || function_id == StrId::LIB_C_LASTX - || function_id == StrId::LIB_C_ONLYX + let codebase = statements_analyzer.get_codebase(); + + if let Some(functionlike_info) = codebase + .functionlike_infos + .get(&(function_id, StrId::EMPTY)) { - fn_can_throw = true; + fn_can_throw = functionlike_info.pure_can_throw } } FunctionLikeIdentifier::Method(_, method_name_id) => { @@ -316,7 +312,7 @@ fn detect_unused_statement_expressions( fn_can_throw = true; } } - _ => {} + _ => (), } }; diff --git a/src/code_info_builder/lib.rs b/src/code_info_builder/lib.rs index a7b8586f..94701e27 100644 --- a/src/code_info_builder/lib.rs +++ b/src/code_info_builder/lib.rs @@ -638,7 +638,20 @@ impl<'a> Scanner<'a> { functionlike_storage.is_production_code = self.file_source.is_production_code; - if name == Some(StrId::INVARIANT) { + if matches!( + name, + Some( + StrId::INVARIANT + | StrId::INVARIANT_VIOLATION + | StrId::TRIGGER_ERROR + | StrId::FUNCTION_EXISTS + | StrId::CLASS_EXISTS + | StrId::SET_FRAME_METADATA + | StrId::LIB_C_FIRSTX + | StrId::LIB_C_LASTX + | StrId::LIB_C_ONLYX + ) + ) { functionlike_storage.pure_can_throw = true; } diff --git a/src/str/build.rs b/src/str/build.rs index ff9ef38d..ed56d190 100644 --- a/src/str/build.rs +++ b/src/str/build.rs @@ -26,6 +26,8 @@ fn main() -> Result<()> { "HH\\BuiltinEnumClass", "HH\\Container", "HH\\EnumClass\\Label", + "HH\\FIXME\\UNSAFE_CAST", + "HH\\Facts\\enabled", "HH\\FormatString", "HH\\Iterator", "HH\\KeyedContainer", @@ -48,13 +50,17 @@ fn main() -> Result<()> { "HH\\Lib\\C\\last_key", "HH\\Lib\\C\\last_keyx", "HH\\Lib\\C\\lastx", + "HH\\Lib\\C\\nfirst", "HH\\Lib\\C\\onlyx", "HH\\Lib\\C\\search", "HH\\Lib\\Dict\\associate", "HH\\Lib\\Dict\\chunk", "HH\\Lib\\Dict\\contains", "HH\\Lib\\Dict\\contains_key", + "HH\\Lib\\Dict\\count_values", "HH\\Lib\\Dict\\diff_by_key", + "HH\\Lib\\Dict\\drop", + "HH\\Lib\\Dict\\equal", "HH\\Lib\\Dict\\fill_keys", "HH\\Lib\\Dict\\filter", "HH\\Lib\\Dict\\filter_async", @@ -75,8 +81,10 @@ fn main() -> Result<()> { "HH\\Lib\\Dict\\reverse", "HH\\Lib\\Dict\\select_keys", "HH\\Lib\\Dict\\take", + "HH\\Lib\\Dict\\unique", "HH\\Lib\\Keyset\\chunk", "HH\\Lib\\Keyset\\diff", + "HH\\Lib\\Keyset\\drop", "HH\\Lib\\Keyset\\equal", "HH\\Lib\\Keyset\\filter", "HH\\Lib\\Keyset\\filter_async", @@ -89,6 +97,10 @@ fn main() -> Result<()> { "HH\\Lib\\Keyset\\map_with_key", "HH\\Lib\\Keyset\\take", "HH\\Lib\\Keyset\\union", + "HH\\Lib\\Legacy_FIXME\\eq", + "HH\\Lib\\Legacy_FIXME\\lt", + "HH\\Lib\\Legacy_FIXME\\neq", + "HH\\Lib\\Locale\\create", "HH\\Lib\\Math\\INT32_MAX", "HH\\Lib\\Math\\abs", "HH\\Lib\\Math\\almost_equals", @@ -121,6 +133,7 @@ fn main() -> Result<()> { "HH\\Lib\\Regex\\first_match", "HH\\Lib\\Regex\\matches", "HH\\Lib\\Regex\\replace", + "HH\\Lib\\Regex\\split", "HH\\Lib\\Str\\capitalize", "HH\\Lib\\Str\\capitalize_words", "HH\\Lib\\Str\\chunk", @@ -135,6 +148,7 @@ fn main() -> Result<()> { "HH\\Lib\\Str\\is_empty", "HH\\Lib\\Str\\join", "HH\\Lib\\Str\\length", + "HH\\Lib\\Str\\length_l", "HH\\Lib\\Str\\lowercase", "HH\\Lib\\Str\\pad_left", "HH\\Lib\\Str\\pad_right", @@ -142,8 +156,15 @@ fn main() -> Result<()> { "HH\\Lib\\Str\\replace", "HH\\Lib\\Str\\replace_ci", "HH\\Lib\\Str\\replace_every", + "HH\\Lib\\Str\\reverse", "HH\\Lib\\Str\\search", + "HH\\Lib\\Str\\search_ci", + "HH\\Lib\\Str\\search_l", + "HH\\Lib\\Str\\search_last", + "HH\\Lib\\Str\\search_last_l", "HH\\Lib\\Str\\slice", + "HH\\Lib\\Str\\slice_l", + "HH\\Lib\\Str\\splice", "HH\\Lib\\Str\\split", "HH\\Lib\\Str\\starts_with", "HH\\Lib\\Str\\starts_with_ci", @@ -154,10 +175,12 @@ fn main() -> Result<()> { "HH\\Lib\\Str\\trim_left", "HH\\Lib\\Str\\trim_right", "HH\\Lib\\Str\\uppercase", + "HH\\Lib\\Vec\\cast_clear_legacy_array_mark", "HH\\Lib\\Vec\\chunk", "HH\\Lib\\Vec\\concat", "HH\\Lib\\Vec\\diff", "HH\\Lib\\Vec\\drop", + "HH\\Lib\\Vec\\fill", "HH\\Lib\\Vec\\filter", "HH\\Lib\\Vec\\filter_async", "HH\\Lib\\Vec\\filter_nulls", @@ -176,19 +199,41 @@ fn main() -> Result<()> { "HH\\Lib\\Vec\\take", "HH\\Lib\\Vec\\unique", "HH\\Lib\\Vec\\zip", + "HH\\Lib\\_Private\\regex_match", + "HH\\Lib\\_Private\\validate_offset", "HH\\MemberOf", + "HH\\ReifiedGenerics\\get_classname", + "HH\\ReifiedGenerics\\get_type_structure", "HH\\Shapes", "HH\\Traversable", "HH\\TypeStructure", "HH\\Vector", + "HH\\class_meth_get_class", + "HH\\class_meth_get_method", + "HH\\darray", "HH\\dict", + "HH\\ffp_parse_string_native", + "HH\\fun_get_function", "HH\\global_get", "HH\\idx", "HH\\invariant", "HH\\invariant_violation", + "HH\\is_any_array", + "HH\\is_dict", + "HH\\is_dict_or_darray", + "HH\\is_fun", + "HH\\is_php_array", + "HH\\is_vec", + "HH\\is_vec_or_varray", "HH\\keyset", + "HH\\non_crypto_md5_lower", + "HH\\non_crypto_md5_upper", "HH\\set_frame_metadata", + "HH\\str_number_coercible", + "HH\\str_to_numeric", "HH\\type_structure", + "HH\\type_structure_for_alias", + "HH\\varray", "HH\\vec", "Hakana\\FindPaths\\Sanitize", "Hakana\\Immutable", @@ -213,16 +258,33 @@ fn main() -> Result<()> { "__PHP_Incomplete_Class", "__Sealed", "__construct", + "abs", "addcslashes", "addslashes", + "array_combine", + "array_key_exists", + "array_keys", + "array_merge", + "array_push", + "array_reverse", + "array_shift", + "array_slice", + "array_unique", + "array_unshift", + "arsort", + "asin", + "asort", "assert", "assertAll", "at", + "atan2", "base64_decode", "base64_encode", "basename", "bin2hex", + "ceil", "chop", + "chr", "chunk_split", "class_exists", "coerce", @@ -230,28 +292,54 @@ fn main() -> Result<()> { "convert_uuencode", "count", "crc32", + "ctype_alnum", + "ctype_alpha", + "ctype_digit", "ctype_lower", + "ctype_punct", + "ctype_space", + "ctype_upper", + "ctype_xdigit", + "curl_error", "date", "date_format", "debug_backtrace", + "decbin", + "dechex", + "deg2rad", "dirname", "echo", "escapeshellarg", "explode", "extension", + "fb_serialize", "file_get_contents", "filename", "filter_var", + "floatval", + "floor", + "fmod", "fromItems", "function_exists", "get_class", "get_object_vars", + "get_parent_class", + "get_resource_type", + "gethostname", + "getrandmax", + "gettype", + "gzcompress", + "gzdecode", + "gzdeflate", "gzinflate", + "gzuncompress", "hash", "hash_equals", "hash_hmac", "hex2bin", + "hexdec", "highlight_string", + "hphp_to_string", "htmlentities", "htmlentitydecode", "htmlspecialchars", @@ -261,51 +349,98 @@ fn main() -> Result<()> { "implode", "in_array", "include", + "inet_ntop", + "inet_pton", + "intdiv", + "interface_exists", "intval", "ip2long", + "is_a", + "is_bool", + "is_callable", + "is_callable_with_name", + "is_finite", + "is_float", + "is_infinite", + "is_int", + "is_nan", + "is_null", + "is_numeric", + "is_object", + "is_resource", + "is_scalar", + "is_string", + "is_subclass_of", "isset", "join", "json_decode", + "json_decode_with_error", "json_encode", "keyExists", + "krsort", + "ksort", "lcfirst", + "levenshtein", "log", + "long2ip", "ltrim", + "lz4_compress", + "lz4_uncompress", + "max", + "mb_detect_encoding", + "mb_list_encodings", "mb_strlen", "mb_strtolower", "mb_strtoupper", "md5", + "method_exists", "microtime", + "min", "mktime", + "mt_getrandmax", + "mysql_escape_string", "nl2br", "number_format", "ord", + "pack", "parent", "password_hash", "pathinfo", + "pow", "preg_filter", "preg_grep", "preg_match", + "preg_match_all", "preg_match_all_with_matches", + "preg_match_with_error", "preg_match_with_matches", + "preg_match_with_matches_and_error", "preg_quote", "preg_replace", "preg_replace_with_count", "preg_split", "print_r", + "print_r_pure", "printf", "quote_meta", "quoted_printable_decode", "quoted_printable_encode", + "rad2deg", "rand", "range", + "rawurldecode", "rawurlencode", "realpath", "removeKey", + "round", + "rsort", "rtrim", "self", "serialize", "sha1", + "socket_strerror", + "sort", + "sprintf", "sscanf", "static", "stdClass", @@ -316,25 +451,37 @@ fn main() -> Result<()> { "str_rot13", "str_shuffle", "str_split", + "str_word_count", + "strcasecmp", "strchr", "strcmp", + "strcspn", + "stream_get_meta_data", "strgetcsv", "strip_tags", "stripcslashes", + "stripos", "stripslashes", "stristr", + "strlen", "strnatcasecmp", + "strnatcmp", + "strncmp", "strpad", "strpbrk", "strpos", "strrchr", "strrev", + "strrpos", + "strspn", "strstr", "strtolower", "strtotime", "strtoupper", + "strtr", "strval", "substr", + "substr_compare", "substr_count", "substr_replace", "this", @@ -344,12 +491,15 @@ fn main() -> Result<()> { "trim", "ucfirst", "ucwords", + "unpack", "unset", "urldecode", "urlencode", + "utf8_decode", "utf8_encode", "var_dump", "var_export", + "version_compare", "vsprintf", "wordwrap", ];