diff --git a/src/analyzer/expr/call/arguments_analyzer.rs b/src/analyzer/expr/call/arguments_analyzer.rs index cffae284..0850c752 100644 --- a/src/analyzer/expr/call/arguments_analyzer.rs +++ b/src/analyzer/expr/call/arguments_analyzer.rs @@ -179,13 +179,13 @@ pub(crate) fn check_arguments_match( for (_, arg_expr) in args.iter() { let was_inside_call = context.inside_general_use; - // if matches!(functionlike_info.effects, FnEffect::Some(_)) - // || matches!(functionlike_info.effects, FnEffect::Arg(_)) - // || functionlike_info.pure_can_throw - // || functionlike_info.user_defined - // { - context.inside_general_use = true; - // } + if matches!(functionlike_info.effects, FnEffect::Some(_)) + || matches!(functionlike_info.effects, FnEffect::Arg(_)) + || functionlike_info.pure_can_throw + || functionlike_info.user_defined + { + context.inside_general_use = true; + } // don't analyse closures here if !matches!(arg_expr.2, aast::Expr_::Lfun(_) | aast::Expr_::Efun(_)) { @@ -1048,6 +1048,18 @@ fn handle_possibly_matching_inout_param( let arg_type = arg_type.unwrap_or(get_mixed_any()); + if functionlike_id == &FunctionLikeIdentifier::Function(StrId::PREG_MATCH_WITH_MATCHES) + && argument_offset == 2 + { + let function_call_node = DataFlowNode::get_for_method_return( + functionlike_id.to_string(statements_analyzer.get_interner()), + Some(statements_analyzer.get_hpos(function_call_pos)), + Some(statements_analyzer.get_hpos(function_call_pos)), + ); + + inout_type.parent_nodes.push(function_call_node); + } + if let GraphKind::WholeProgram(_) = &analysis_data.data_flow_graph.kind { let out_node = DataFlowNode::get_for_method_argument_out( functionlike_id.to_string(statements_analyzer.get_interner()), diff --git a/src/analyzer/expr/call/function_call_return_type_fetcher.rs b/src/analyzer/expr/call/function_call_return_type_fetcher.rs index dc9d79c7..8341ac83 100644 --- a/src/analyzer/expr/call/function_call_return_type_fetcher.rs +++ b/src/analyzer/expr/call/function_call_return_type_fetcher.rs @@ -731,10 +731,9 @@ fn add_dataflow( // todo conditionally remove taints - let function_call_node; - if let GraphKind::WholeProgram(_) = &data_flow_graph.kind { - function_call_node = DataFlowNode::get_for_method_return( + let function_call_node = if let GraphKind::WholeProgram(_) = &data_flow_graph.kind { + DataFlowNode::get_for_method_return( functionlike_id.to_string(statements_analyzer.get_interner()), if let Some(return_pos) = &functionlike_storage.return_type_location { Some(*return_pos) @@ -746,14 +745,14 @@ fn add_dataflow( } else { None }, - ); + ) } else { - function_call_node = DataFlowNode::get_for_method_return( + DataFlowNode::get_for_method_return( functionlike_id.to_string(statements_analyzer.get_interner()), Some(statements_analyzer.get_hpos(pos)), Some(statements_analyzer.get_hpos(pos)), - ); - } + ) + }; data_flow_graph.add_node(function_call_node.clone()); @@ -912,6 +911,7 @@ fn get_special_argument_nodes( | StrId::LIB_STR_LOWERCASE | StrId::LIB_STR_UPPERCASE | StrId::LIB_STR_CAPITALIZE + | StrId::LIB_STR_CAPITALIZE_WORDS | StrId::ASIO_JOIN | StrId::STRIP_TAGS | StrId::STRIPSLASHES @@ -929,7 +929,6 @@ fn get_special_argument_nodes( | StrId::STRPBRK | StrId::STRRCHR | StrId::STRREV - | StrId::SUBSTR | StrId::PREG_QUOTE | StrId::WORDWRAP | StrId::REALPATH @@ -951,6 +950,7 @@ fn get_special_argument_nodes( | StrId::BASE64_ENCODE | StrId::BASE64_DECODE | StrId::URLENCODE + | StrId::URLDECODE | StrId::LIB_DICT_FILTER | StrId::LIB_DICT_FILTER_ASYNC | StrId::LIB_DICT_FILTER_KEYS @@ -961,7 +961,6 @@ fn get_special_argument_nodes( | StrId::LIB_VEC_FILTER_ASYNC | StrId::LIB_VEC_FILTER_NULLS | StrId::LIB_VEC_FILTER_WITH_KEY - | StrId::LIB_VEC_TAKE | StrId::LIB_VEC_DROP | StrId::LIB_VEC_REVERSE | StrId::LIB_VEC_UNIQUE @@ -970,12 +969,18 @@ fn get_special_argument_nodes( | StrId::LIB_KEYSET_FILTER_ASYNC | StrId::LIB_KEYSET_FLATTEN | StrId::LIB_KEYSET_KEYS - | StrId::LIB_STR_SLICE - | StrId::LIB_REGEX_FIRST_MATCH | StrId::KEYSET | StrId::VEC | StrId::DICT - | StrId::GET_OBJECT_VARS => (vec![(0, PathKind::Default)], None), + | StrId::GET_OBJECT_VARS + | StrId::RAWURLENCODE + | StrId::LIB_DICT_FROM_ASYNC + | StrId::LIB_VEC_FROM_ASYNC + | StrId::ORD + | StrId::LOG + | StrId::IP2LONG + | StrId::BIN2HEX + | StrId::HEX2BIN => (vec![(0, PathKind::Default)], None), StrId::LIB_VEC_DIFF | StrId::LIB_KEYSET_DIFF | StrId::LIB_KEYSET_INTERSECT @@ -983,10 +988,22 @@ fn get_special_argument_nodes( | StrId::LIB_VEC_SLICE | StrId::LIB_VEC_RANGE | StrId::LIB_VEC_CHUNK - | StrId::LIB_STRING_STRIP_PREFIX => { + | StrId::LIB_STR_STRIP_PREFIX + | StrId::LIB_STR_STRIP_SUFFIX + | StrId::LIB_STR_REPEAT + | StrId::SUBSTR + | StrId::LIB_DICT_ASSOCIATE + | StrId::LIB_REGEX_FIRST_MATCH => { + (vec![(0, PathKind::Default)], Some(PathKind::Default)) + } + StrId::LIB_DICT_SELECT_KEYS + | StrId::LIB_VEC_TAKE + | StrId::LIB_DICT_TAKE + | StrId::LIB_STR_SLICE + | StrId::LIB_STR_FORMAT_NUMBER + | StrId::LIB_DICT_DIFF_BY_KEY => { (vec![(0, PathKind::Default)], Some(PathKind::Aggregate)) } - StrId::LIB_DICT_ASSOCIATE => (vec![(0, PathKind::Default)], Some(PathKind::Default)), StrId::LIB_C_IS_EMPTY | StrId::LIB_C_COUNT | StrId::COUNT @@ -994,8 +1011,6 @@ fn get_special_argument_nodes( | StrId::LIB_C_EVERY | StrId::LIB_C_SEARCH | StrId::LIB_STR_IS_EMPTY - | StrId::LIB_STR_COMPARE - | StrId::LIB_STR_COMPARE_CI | StrId::LIB_STR_LENGTH | StrId::LIB_VEC_KEYS | StrId::LIB_STR_TO_INT @@ -1004,7 +1019,6 @@ fn get_special_argument_nodes( | StrId::LIB_MATH_SUM_FLOAT | StrId::LIB_MATH_MIN | StrId::LIB_MATH_MIN_BY - | StrId::LIB_MATH_MINVA | StrId::LIB_MATH_MAX | StrId::LIB_MATH_MEAN | StrId::LIB_MATH_MEDIAN @@ -1017,7 +1031,11 @@ fn get_special_argument_nodes( | StrId::LIB_MATH_SQRT | StrId::LIB_MATH_TAN | StrId::LIB_MATH_ABS - | StrId::INTVAL => (vec![(0, PathKind::Aggregate)], None), + | StrId::INTVAL + | StrId::GET_CLASS + | StrId::CTYPE_LOWER + | StrId::SHA1 + | StrId::MD5 => (vec![(0, PathKind::Aggregate)], None), StrId::LIB_MATH_ALMOST_EQUALS | StrId::LIB_MATH_BASE_CONVERT | StrId::LIB_MATH_EXP @@ -1026,19 +1044,29 @@ fn get_special_argument_nodes( | StrId::LIB_MATH_TO_BASE | StrId::LIB_MATH_MAX_BY | StrId::LIB_MATH_MAXVA + | StrId::LIB_MATH_MINVA | StrId::LIB_STR_STARTS_WITH | StrId::LIB_STR_STARTS_WITH_CI | StrId::LIB_STR_ENDS_WITH | StrId::LIB_STR_ENDS_WITH_CI | StrId::LIB_STR_SEARCH | StrId::LIB_STR_CONTAINS - | StrId::LIB_STR_CONTAINS_CI => (vec![], Some(PathKind::Aggregate)), + | StrId::LIB_STR_CONTAINS_CI + | StrId::LIB_STR_COMPARE + | StrId::LIB_STR_COMPARE_CI + | StrId::HASH_EQUALS + | StrId::RANGE + | StrId::STRPOS + | StrId::SUBSTR_COUNT + | StrId::STRCMP + | StrId::LIB_KEYSET_EQUAL => (vec![], Some(PathKind::Aggregate)), StrId::LIB_C_CONTAINS | StrId::LIB_C_CONTAINS_KEY | StrId::IN_ARRAY | StrId::PREG_MATCH | StrId::LIB_REGEX_MATCHES - | StrId::PREG_MATCH_WITH_MATCHES => ( + | StrId::PREG_MATCH_WITH_MATCHES + | StrId::PREG_MATCH_ALL_WITH_MATCHES => ( vec![(0, PathKind::Aggregate), (1, PathKind::Aggregate)], None, ), @@ -1050,11 +1078,25 @@ fn get_special_argument_nodes( (vec![(0, PathKind::Default), (1, PathKind::Default)], None) } StrId::STR_REPLACE | StrId::STR_IREPLACE | StrId::PREG_FILTER | StrId::PREG_REPLACE => { - (vec![(1, PathKind::Default), (2, PathKind::Default)], None) + ( + vec![ + (0, PathKind::Aggregate), + (1, PathKind::Default), + (2, PathKind::Default), + ], + None, + ) } StrId::LIB_STR_REPLACE | StrId::LIB_STR_REPLACE_CI => { (vec![(0, PathKind::Default), (2, PathKind::Default)], None) } + StrId::LIB_STR_REPLACE_EVERY => ( + vec![ + (0, PathKind::Default), + (1, PathKind::UnknownArrayFetch(ArrayDataKind::ArrayValue)), + ], + None, + ), StrId::LIB_REGEX_REPLACE => ( vec![ (0, PathKind::Default), @@ -1128,10 +1170,13 @@ fn get_special_argument_nodes( | StrId::LIB_STR_SPLIT | StrId::LIB_STR_CHUNK | StrId::LIB_REGEX_EVERY_MATCH => ( - vec![( - 0, - PathKind::UnknownArrayAssignment(ArrayDataKind::ArrayValue), - )], + vec![ + ( + 0, + PathKind::UnknownArrayAssignment(ArrayDataKind::ArrayValue), + ), + (1, PathKind::Aggregate), + ], None, ), StrId::LIB_VEC_SORT => (vec![(0, PathKind::Default)], None), @@ -1175,6 +1220,7 @@ fn get_special_argument_nodes( )], None, ), + StrId::LIB_DICT_CHUNK => (vec![(0, PathKind::Default), (1, PathKind::Aggregate)], None), StrId::LIB_C_FIRST | StrId::LIB_C_FIRSTX | StrId::LIB_C_LAST @@ -1208,7 +1254,10 @@ fn get_special_argument_nodes( } } ( - vec![(0, PathKind::UnknownArrayFetch(ArrayDataKind::ArrayValue))], + vec![ + (0, PathKind::UnknownArrayFetch(ArrayDataKind::ArrayValue)), + (1, PathKind::Aggregate), + ], None, ) } diff --git a/src/str/build.rs b/src/str/build.rs index 001ae7fa..4fe010ab 100644 --- a/src/str/build.rs +++ b/src/str/build.rs @@ -51,8 +51,10 @@ fn main() -> Result<()> { "HH\\Lib\\C\\onlyx", "HH\\Lib\\C\\search", "HH\\Lib\\Dict\\associate", + "HH\\Lib\\Dict\\chunk", "HH\\Lib\\Dict\\contains", "HH\\Lib\\Dict\\contains_key", + "HH\\Lib\\Dict\\diff_by_key", "HH\\Lib\\Dict\\fill_keys", "HH\\Lib\\Dict\\filter", "HH\\Lib\\Dict\\filter_async", @@ -61,6 +63,7 @@ fn main() -> Result<()> { "HH\\Lib\\Dict\\filter_with_key", "HH\\Lib\\Dict\\flatten", "HH\\Lib\\Dict\\flip", + "HH\\Lib\\Dict\\from_async", "HH\\Lib\\Dict\\from_entries", "HH\\Lib\\Dict\\from_keys", "HH\\Lib\\Dict\\from_keys_async", @@ -69,8 +72,10 @@ fn main() -> Result<()> { "HH\\Lib\\Dict\\map_with_key", "HH\\Lib\\Dict\\map_with_key_async", "HH\\Lib\\Dict\\merge", + "HH\\Lib\\Dict\\select_keys", "HH\\Lib\\Dict\\take", "HH\\Lib\\Keyset\\diff", + "HH\\Lib\\Keyset\\equal", "HH\\Lib\\Keyset\\filter", "HH\\Lib\\Keyset\\filter_async", "HH\\Lib\\Keyset\\filter_nulls", @@ -115,6 +120,7 @@ fn main() -> Result<()> { "HH\\Lib\\Regex\\matches", "HH\\Lib\\Regex\\replace", "HH\\Lib\\Str\\capitalize", + "HH\\Lib\\Str\\capitalize_words", "HH\\Lib\\Str\\chunk", "HH\\Lib\\Str\\compare", "HH\\Lib\\Str\\compare_ci", @@ -123,24 +129,27 @@ fn main() -> Result<()> { "HH\\Lib\\Str\\ends_with", "HH\\Lib\\Str\\ends_with_ci", "HH\\Lib\\Str\\format", + "HH\\Lib\\Str\\format_number", "HH\\Lib\\Str\\is_empty", "HH\\Lib\\Str\\join", "HH\\Lib\\Str\\length", "HH\\Lib\\Str\\lowercase", + "HH\\Lib\\Str\\repeat", "HH\\Lib\\Str\\replace", "HH\\Lib\\Str\\replace_ci", + "HH\\Lib\\Str\\replace_every", "HH\\Lib\\Str\\search", "HH\\Lib\\Str\\slice", "HH\\Lib\\Str\\split", "HH\\Lib\\Str\\starts_with", "HH\\Lib\\Str\\starts_with_ci", + "HH\\Lib\\Str\\strip_prefix", "HH\\Lib\\Str\\strip_suffix", "HH\\Lib\\Str\\to_int", "HH\\Lib\\Str\\trim", "HH\\Lib\\Str\\trim_left", "HH\\Lib\\Str\\trim_right", "HH\\Lib\\Str\\uppercase", - "HH\\Lib\\String\\strip_prefix", "HH\\Lib\\Vec\\chunk", "HH\\Lib\\Vec\\concat", "HH\\Lib\\Vec\\diff", @@ -150,6 +159,7 @@ fn main() -> Result<()> { "HH\\Lib\\Vec\\filter_nulls", "HH\\Lib\\Vec\\filter_with_key", "HH\\Lib\\Vec\\flatten", + "HH\\Lib\\Vec\\from_async", "HH\\Lib\\Vec\\intersect", "HH\\Lib\\Vec\\keys", "HH\\Lib\\Vec\\map", @@ -182,8 +192,8 @@ fn main() -> Result<()> { "Hakana\\SecurityAnalysis\\IgnorePathIfTrue", "Hakana\\SecurityAnalysis\\RemoveTaintsWhenReturningTrue", "Hakana\\SecurityAnalysis\\Sanitize", - "Hakana\\SecurityAnalysis\\Sink", "Hakana\\SecurityAnalysis\\ShapeSource", + "Hakana\\SecurityAnalysis\\Sink", "Hakana\\SecurityAnalysis\\Source", "Hakana\\SecurityAnalysis\\SpecializeCall", "Hakana\\SpecialTypes\\LiteralString", @@ -206,6 +216,7 @@ fn main() -> Result<()> { "base64_decode", "base64_encode", "basename", + "bin2hex", "chop", "chunk_split", "class_exists", @@ -213,6 +224,7 @@ fn main() -> Result<()> { "convert_uudecode", "convert_uuencode", "count", + "ctype_lower", "date", "date_format", "debug_backtrace", @@ -224,6 +236,7 @@ fn main() -> Result<()> { "filename", "fromItems", "function_exists", + "get_class", "get_object_vars", "hash_equals", "hash_hmac", @@ -239,12 +252,14 @@ fn main() -> Result<()> { "in_array", "include", "intval", + "ip2long", "isset", "join", "json_decode", "json_encode", "keyExists", "lcfirst", + "log", "ltrim", "mb_strlen", "mb_strtolower", @@ -253,11 +268,13 @@ fn main() -> Result<()> { "microtime", "mktime", "nl2br", + "ord", "parent", "password_hash", "pathinfo", "preg_filter", "preg_match", + "preg_match_all_with_matches", "preg_match_with_matches", "preg_quote", "preg_replace", @@ -269,6 +286,7 @@ fn main() -> Result<()> { "quoted_printable_encode", "rand", "range", + "rawurlencode", "realpath", "removeKey", "rtrim", @@ -286,6 +304,7 @@ fn main() -> Result<()> { "str_shuffle", "str_split", "strchr", + "strcmp", "strgetcsv", "strip_tags", "stripcslashes", @@ -293,6 +312,7 @@ fn main() -> Result<()> { "stristr", "strpad", "strpbrk", + "strpos", "strrchr", "strrev", "strstr", @@ -301,6 +321,7 @@ fn main() -> Result<()> { "strtoupper", "strval", "substr", + "substr_count", "substr_replace", "this", "toArray", @@ -310,6 +331,7 @@ fn main() -> Result<()> { "ucfirst", "ucwords", "unset", + "urldecode", "urlencode", "utf8_encode", "var_dump",