From 1d75dfa6072430682b8d8df1094d5e2af78401d4 Mon Sep 17 00:00:00 2001 From: Maxime Arthaud Date: Thu, 29 Jun 2023 06:46:17 -0700 Subject: [PATCH] Parse `.all_static_fields` in model files Summary: # Context In some cases, users might want to know which specific field or attribute of the sink on an argument accessed. If the whole argument was marked as a sink, then there is no way to know whether a specific field was accessed. If a specific field is marked as a sink using `ParameterPath[_.foo]`, we provide that field in the taint output as `leaf:argument[foo]`. However, one would have to mark all fields with `ParameterPath[]` manually if they want to track all fields. Note that using `ParameterPath[_.all()]` does not solve that problem, the sink is on a special catch-all field `*`, hence the resulting taint will have `leaf:argument[*]`. # Solution In the following diffs, we will introduce a new access path `_.all_static_fields()` which adds sinks on all statically-known fields, inferred from the type annotation. Note that this solution will be quite computationally expensive since it will introduce a different sink for each field. This diff implements parsing the new access path. Reviewed By: tianhan0 Differential Revision: D47097963 fbshipit-source-id: 7710ec532e831a7cbaf1b76bb4ca0834d125aee6 --- .../taint/modelParseResult.ml | 18 ++- .../taint/modelParseResult.mli | 11 +- .../taint/modelParser.ml | 145 +++++++++++------- .../taint/modelParser.mli | 2 +- .../taint/test/modelTest.ml | 87 ++++++++--- 5 files changed, 186 insertions(+), 77 deletions(-) diff --git a/source/interprocedural_analyses/taint/modelParseResult.ml b/source/interprocedural_analyses/taint/modelParseResult.ml index 4738215f670..681e86a4fe1 100644 --- a/source/interprocedural_analyses/taint/modelParseResult.ml +++ b/source/interprocedural_analyses/taint/modelParseResult.ml @@ -39,12 +39,26 @@ module CollapseDepth = struct let show = Format.asprintf "%a" pp end +module TaintPath = struct + type t = + | Regular of AccessPath.Path.t + | AllStaticFields + [@@deriving equal] + + let pp formatter = function + | Regular path -> AccessPath.Path.pp formatter path + | AllStaticFields -> Format.fprintf formatter ".all_static_fields()" + + + let show = Format.asprintf "%a" pp +end + module TaintFeatures = struct type t = { breadcrumbs: Features.Breadcrumb.t list; via_features: Features.ViaFeature.t list; applies_to: AccessPath.Path.t option; - parameter_path: AccessPath.Path.t option; + parameter_path: TaintPath.t option; return_path: AccessPath.Path.t option; update_path: AccessPath.Path.t option; leaf_names: Features.LeafName.t list; @@ -150,7 +164,7 @@ module TaintFeatures = struct in features |> add_path_option ~name:"AppliesTo" applies_to - |> add_path_option ~name:"ParameterPath" parameter_path + |> add_option ~name:"ParameterPath" ~pp:TaintPath.pp parameter_path |> add_path_option ~name:"ReturnPath" return_path |> add_path_option ~name:"UpdatePath" update_path |> add_option ~name:"TraceLength" ~pp:Int.pp trace_length diff --git a/source/interprocedural_analyses/taint/modelParseResult.mli b/source/interprocedural_analyses/taint/modelParseResult.mli index 1313ecdfe51..c4836c35d9a 100644 --- a/source/interprocedural_analyses/taint/modelParseResult.mli +++ b/source/interprocedural_analyses/taint/modelParseResult.mli @@ -26,12 +26,19 @@ module CollapseDepth : sig [@@deriving equal] end +module TaintPath : sig + type t = + | Regular of AccessPath.Path.t + | AllStaticFields + [@@deriving equal, show] +end + module TaintFeatures : sig type t = { breadcrumbs: Features.Breadcrumb.t list; via_features: Features.ViaFeature.t list; applies_to: AccessPath.Path.t option; - parameter_path: AccessPath.Path.t option; + parameter_path: TaintPath.t option; return_path: AccessPath.Path.t option; update_path: AccessPath.Path.t option; leaf_names: Features.LeafName.t list; @@ -68,7 +75,7 @@ module TaintKindsWithFeatures : sig val from_via_feature : Features.ViaFeature.t -> t - val from_parameter_path : AccessPath.Path.t -> t + val from_parameter_path : TaintPath.t -> t val from_return_path : AccessPath.Path.t -> t diff --git a/source/interprocedural_analyses/taint/modelParser.ml b/source/interprocedural_analyses/taint/modelParser.ml index 827b1201416..45f1401bf45 100644 --- a/source/interprocedural_analyses/taint/modelParser.ml +++ b/source/interprocedural_analyses/taint/modelParser.ml @@ -103,7 +103,7 @@ let base_name expression = | _ -> None -let rec parse_access_path ~path ~location expression = +let parse_access_path ~path ~location expression = let module Label = Abstract.TreeDomain.Label in let open Core.Result in let annotation_error reason = @@ -112,52 +112,75 @@ let rec parse_access_path ~path ~location expression = ~location (InvalidAccessPath { access_path = expression; reason }) in - match Node.value expression with - | Expression.Name (Name.Identifier "_") -> Ok [] - | Expression.Name (Name.Identifier _) -> - Error (annotation_error "access path must start with `_`") - | Expression.Name (Name.Attribute { base; attribute; _ }) -> - (* The analysis does not currently distinguish between fields and indices. - * Silently convert fields to indices to prevent confusion. *) - parse_access_path ~path ~location base - >>| fun base -> base @ [Label.create_name_index attribute] - | Expression.Call - { - callee = { Node.value = Name (Name.Attribute { base; attribute = "__getitem__"; _ }); _ }; - arguments = [{ Call.Argument.value = argument; _ }]; - } -> ( - parse_access_path ~path ~location base - >>= fun base -> - match Node.value argument with - | Expression.Constant (Constant.Integer index) -> Ok (base @ [Label.create_int_index index]) - | Expression.Constant (Constant.String { StringLiteral.value = key; _ }) -> - Ok (base @ [Label.create_name_index key]) - | _ -> - Error - (annotation_error - (Format.sprintf - "expected int or string literal argument for index access, got `%s`" - (Expression.show argument)))) - | Expression.Call - { - callee = { Node.value = Name (Name.Attribute { base; attribute = "keys"; _ }); _ }; - arguments = []; - } -> - parse_access_path ~path ~location base >>| fun base -> base @ [AccessPath.dictionary_keys] - | Expression.Call - { - callee = { Node.value = Name (Name.Attribute { base; attribute = "all"; _ }); _ }; - arguments = []; - } -> - parse_access_path ~path ~location base >>| fun base -> base @ [Label.AnyIndex] - | Expression.Call { callee = { Node.value = Name (Name.Attribute { base; attribute; _ }); _ }; _ } - -> - parse_access_path ~path ~location base - >>= fun _ -> - Error - (annotation_error - (Format.sprintf "unexpected method call `%s` (allowed: `keys`, `all`)" attribute)) - | _ -> Error (annotation_error "unexpected expression") + let rec parse_expression expression = + match Node.value expression with + | Expression.Name (Name.Identifier "_") -> Ok (TaintPath.Regular []) + | Expression.Name (Name.Identifier _) -> + Error (annotation_error "access path must start with `_`") + | Expression.Name (Name.Attribute { base; attribute; _ }) -> + (* The analysis does not currently distinguish between fields and indices. + * Silently convert fields to indices to prevent confusion. *) + parse_regular_path base + >>| fun base -> TaintPath.Regular (base @ [Label.create_name_index attribute]) + | Expression.Call + { + callee = { Node.value = Name (Name.Attribute { base; attribute = "__getitem__"; _ }); _ }; + arguments = [{ Call.Argument.value = argument; _ }]; + } -> ( + parse_regular_path base + >>= fun base -> + match Node.value argument with + | Expression.Constant (Constant.Integer index) -> + Ok (TaintPath.Regular (base @ [Label.create_int_index index])) + | Expression.Constant (Constant.String { StringLiteral.value = key; _ }) -> + Ok (TaintPath.Regular (base @ [Label.create_name_index key])) + | _ -> + Error + (annotation_error + (Format.sprintf + "expected int or string literal argument for index access, got `%s`" + (Expression.show argument)))) + | Expression.Call + { + callee = { Node.value = Name (Name.Attribute { base; attribute = "keys"; _ }); _ }; + arguments = []; + } -> + parse_regular_path base + >>| fun base -> TaintPath.Regular (base @ [AccessPath.dictionary_keys]) + | Expression.Call + { + callee = { Node.value = Name (Name.Attribute { base; attribute = "all"; _ }); _ }; + arguments = []; + } -> + parse_regular_path base >>| fun base -> TaintPath.Regular (base @ [Label.AnyIndex]) + | Expression.Call + { + callee = + { Node.value = Name (Name.Attribute { base; attribute = "all_static_fields"; _ }); _ }; + arguments = []; + } -> ( + parse_regular_path base + >>= function + | [] -> Ok TaintPath.AllStaticFields + | _ -> Error (annotation_error "`all_static_fields()` can only be used on `_`")) + | Expression.Call + { callee = { Node.value = Name (Name.Attribute { base; attribute; _ }); _ }; _ } -> + parse_expression base + >>= fun _ -> + Error + (annotation_error + (Format.sprintf + "unexpected method call `%s` (allowed: `keys`, `all`, `all_static_fields`)" + attribute)) + | _ -> Error (annotation_error "unexpected expression") + and parse_regular_path expression = + match parse_expression expression with + | Ok (TaintPath.Regular path) -> Ok path + | Ok TaintPath.AllStaticFields -> + Error (annotation_error "cannot access attributes or methods of `all_static_fields()`") + | Error _ as error -> error + in + parse_expression expression let rec parse_annotations @@ -338,10 +361,20 @@ let rec parse_annotations | Some "ParameterPath" -> parse_access_path ~path ~location expression >>| TaintKindsWithFeatures.from_parameter_path - | Some "ReturnPath" -> - parse_access_path ~path ~location expression >>| TaintKindsWithFeatures.from_return_path - | Some "UpdatePath" -> - parse_access_path ~path ~location expression >>| TaintKindsWithFeatures.from_update_path + | Some "ReturnPath" -> ( + parse_access_path ~path ~location expression + >>= function + | TaintPath.Regular path -> Ok (TaintKindsWithFeatures.from_return_path path) + | TaintPath.AllStaticFields -> + Error + (annotation_error "`all_static_fields()` is not allowed within `ReturnPath[]`")) + | Some "UpdatePath" -> ( + parse_access_path ~path ~location expression + >>= function + | TaintPath.Regular path -> Ok (TaintKindsWithFeatures.from_update_path path) + | TaintPath.AllStaticFields -> + Error + (annotation_error "`all_static_fields()` is not allowed within `UpdatePath[]`")) | Some "CollapseDepth" -> extract_collapse_depth expression >>| fun depth -> TaintKindsWithFeatures.from_collapse_depth (CollapseDepth.Value depth) @@ -832,13 +865,16 @@ let path_for_source_or_sink ~kind ~root ~features = in match features with | { - TaintFeatures.parameter_path = Some parameter_path; + TaintFeatures.parameter_path = Some (TaintPath.Regular parameter_path); applies_to = None; return_path = None; update_path = None; _; } -> Ok parameter_path + | { parameter_path = Some TaintPath.AllStaticFields; _ } -> + (* TODO(T148742302): implement all_static_fields() *) + Error "`all_static_fields()` is not currently implemented" | { return_path = Some _; _ } -> Error (Format.sprintf "Invalid ReturnPath annotation for %s" kind) | { update_path = Some _; _ } -> @@ -990,7 +1026,10 @@ let introduce_taint_in_taint_out let input_path = match features with | { applies_to; parameter_path = None; _ } -> Ok (Option.value ~default:[] applies_to) - | { parameter_path = Some parameter_path; applies_to = None; _ } -> Ok parameter_path + | { parameter_path = Some (TaintPath.Regular parameter_path); applies_to = None; _ } -> + Ok parameter_path + | { parameter_path = Some TaintPath.AllStaticFields; _ } -> + Error "`all_static_fields()` is not allowed within `TaintInTaintOut[]`" | _ -> Error (Format.asprintf diff --git a/source/interprocedural_analyses/taint/modelParser.mli b/source/interprocedural_analyses/taint/modelParser.mli index 60154fbbaa0..343c076080b 100644 --- a/source/interprocedural_analyses/taint/modelParser.mli +++ b/source/interprocedural_analyses/taint/modelParser.mli @@ -39,7 +39,7 @@ val parse_access_path : path:PyrePath.t option -> location:Ast.Location.t -> Ast.Expression.t -> - (AccessPath.Path.t, ModelVerificationError.t) result + (ModelParseResult.TaintPath.t, ModelVerificationError.t) result val parse_decorator_modes : path:PyrePath.t -> diff --git a/source/interprocedural_analyses/taint/test/modelTest.ml b/source/interprocedural_analyses/taint/test/modelTest.ml index ae30123d241..1160895a8c8 100644 --- a/source/interprocedural_analyses/taint/test/modelTest.ml +++ b/source/interprocedural_analyses/taint/test/modelTest.ml @@ -3605,7 +3605,7 @@ let test_invalid_models context = ~model_source:"def test.sink(parameter: TaintSink[Test, ParameterPath[_.unknown()]]): ..." ~expect: "`_.unknown()` is an invalid access path: unexpected method call `unknown` (allowed: `keys`, \ - `all`)" + `all`, `all_static_fields`)" (); assert_invalid_model ~model_source:"def test.sink(parameter: TaintSink[Test, ReturnPath[_[0]]]): ..." @@ -7193,6 +7193,7 @@ let test_query_parsing context = let test_access_path _ = let module Label = Abstract.TreeDomain.Label in + let module TaintPath = ModelParseResult.TaintPath in let parse_access_path source = PyreParser.Parser.parse_exn [source] |> (function @@ -7206,42 +7207,55 @@ let test_access_path _ = assert_bool (Format.asprintf "Unexpected access path error: %a" ModelVerificationError.pp error) false - | Ok path -> assert_equal ~printer:AccessPath.Path.show ~cmp:AccessPath.Path.equal expected path + | Ok path -> assert_equal ~printer:TaintPath.show ~cmp:TaintPath.equal expected path in let assert_invalid_path ~source ~expected = match parse_access_path source with | Error error -> assert_equal ~printer:ident expected (ModelVerificationError.display error) | Ok _ -> assert_bool (Format.sprintf "Unexpected valid access path for: %s" source) false in - assert_valid_path ~source:"_" ~expected:[]; - assert_valid_path ~source:"_.foo" ~expected:[Label.Index "foo"]; - assert_valid_path ~source:"_.foo.bar" ~expected:[Label.Index "foo"; Label.Index "bar"]; - assert_valid_path ~source:"_['foo']" ~expected:[Label.Index "foo"]; - assert_valid_path ~source:"_[\"foo\"]" ~expected:[Label.Index "foo"]; - assert_valid_path ~source:"_[0]" ~expected:[Label.Index "0"]; - assert_valid_path ~source:"_['foo']['bar']" ~expected:[Label.Index "foo"; Label.Index "bar"]; - assert_valid_path ~source:"_['foo'].bar" ~expected:[Label.Index "foo"; Label.Index "bar"]; - assert_valid_path ~source:"_.foo['bar']" ~expected:[Label.Index "foo"; Label.Index "bar"]; - assert_valid_path ~source:"_.foo[0]" ~expected:[Label.Index "foo"; Label.Index "0"]; - assert_valid_path ~source:"_.keys()" ~expected:[AccessPath.dictionary_keys]; - assert_valid_path ~source:"_.all()" ~expected:[Label.AnyIndex]; + assert_valid_path ~source:"_" ~expected:(TaintPath.Regular []); + assert_valid_path ~source:"_.foo" ~expected:(TaintPath.Regular [Label.Index "foo"]); + assert_valid_path + ~source:"_.foo.bar" + ~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]); + assert_valid_path ~source:"_['foo']" ~expected:(TaintPath.Regular [Label.Index "foo"]); + assert_valid_path ~source:"_[\"foo\"]" ~expected:(TaintPath.Regular [Label.Index "foo"]); + assert_valid_path ~source:"_[0]" ~expected:(TaintPath.Regular [Label.Index "0"]); + assert_valid_path + ~source:"_['foo']['bar']" + ~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]); + assert_valid_path + ~source:"_['foo'].bar" + ~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]); + assert_valid_path + ~source:"_.foo['bar']" + ~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]); + assert_valid_path + ~source:"_.foo[0]" + ~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "0"]); + assert_valid_path ~source:"_.keys()" ~expected:(TaintPath.Regular [AccessPath.dictionary_keys]); + assert_valid_path ~source:"_.all()" ~expected:(TaintPath.Regular [Label.AnyIndex]); + assert_valid_path ~source:"_.all_static_fields()" ~expected:TaintPath.AllStaticFields; assert_valid_path ~source:"_[0].keys().foo.all()" - ~expected:[Label.Index "0"; AccessPath.dictionary_keys; Label.Index "foo"; Label.AnyIndex]; + ~expected: + (TaintPath.Regular + [Label.Index "0"; AccessPath.dictionary_keys; Label.Index "foo"; Label.AnyIndex]); assert_valid_path ~source:"_.all()['a'].bar" - ~expected:[Label.AnyIndex; Label.Index "a"; Label.Index "bar"]; + ~expected:(TaintPath.Regular [Label.AnyIndex; Label.Index "a"; Label.Index "bar"]); assert_invalid_path ~source:"foo" ~expected:"`foo` is an invalid access path: access path must start with `_`"; assert_invalid_path ~source:"foo.bar" - ~expected:"`foo` is an invalid access path: access path must start with `_`"; + ~expected:"`foo.bar` is an invalid access path: access path must start with `_`"; assert_invalid_path ~source:"_.a-b" ~expected: "`_.a.__sub__(b)` is an invalid access path: unexpected method call `__sub__` (allowed: \ - `keys`, `all`)"; + `keys`, `all`, `all_static_fields`)"; assert_invalid_path ~source:"_[a]" ~expected: @@ -7256,7 +7270,42 @@ let test_access_path _ = ~source:"_.keys().something()" ~expected: "`_.keys().something()` is an invalid access path: unexpected method call `something` \ - (allowed: `keys`, `all`)"; + (allowed: `keys`, `all`, `all_static_fields`)"; + assert_invalid_path + ~source:"_.foo.all_static_fields()" + ~expected: + "`_.foo.all_static_fields()` is an invalid access path: `all_static_fields()` can only be \ + used on `_`"; + assert_invalid_path + ~source:"_['foo'].all_static_fields()" + ~expected: + "`_[\"foo\"].all_static_fields()` is an invalid access path: `all_static_fields()` can only \ + be used on `_`"; + assert_invalid_path + ~source:"_.all().all_static_fields()" + ~expected: + "`_.all().all_static_fields()` is an invalid access path: `all_static_fields()` can only be \ + used on `_`"; + assert_invalid_path + ~source:"_.keys().all_static_fields()" + ~expected: + "`_.keys().all_static_fields()` is an invalid access path: `all_static_fields()` can only be \ + used on `_`"; + assert_invalid_path + ~source:"_.all_static_fields().foo" + ~expected: + "`_.all_static_fields().foo` is an invalid access path: cannot access attributes or methods \ + of `all_static_fields()`"; + assert_invalid_path + ~source:"_.all_static_fields()['foo']" + ~expected: + "`_.all_static_fields()[\"foo\"]` is an invalid access path: cannot access attributes or \ + methods of `all_static_fields()`"; + assert_invalid_path + ~source:"_.all_static_fields().all_static_fields()" + ~expected: + "`_.all_static_fields().all_static_fields()` is an invalid access path: cannot access \ + attributes or methods of `all_static_fields()`"; ()