Skip to content

Commit

Permalink
Parse .all_static_fields in model files
Browse files Browse the repository at this point in the history
Summary:
# Context
In some cases, users might want to know which specific field or attribute of the sink on an argument accessed.
If the whole argument was marked as a sink, then there is no way to know whether a specific field was accessed.
If a specific field is marked as a sink using `ParameterPath[_.foo]`, we provide that field in the taint output as `leaf:argument[foo]`. However, one would have to mark all fields with `ParameterPath[]` manually if they want to track all fields.
Note that using `ParameterPath[_.all()]` does not solve that problem, the sink is on a special catch-all field `*`, hence the resulting taint will have `leaf:argument[*]`.

# Solution
In the following diffs, we will introduce a new access path `_.all_static_fields()` which adds sinks on all statically-known fields, inferred from the type annotation. Note that this solution will be quite computationally expensive since it will introduce a different sink for each field.

This diff implements parsing the new access path.

Reviewed By: tianhan0

Differential Revision: D47097963

fbshipit-source-id: 7710ec532e831a7cbaf1b76bb4ca0834d125aee6
  • Loading branch information
arthaud authored and facebook-github-bot committed Jun 29, 2023
1 parent 53627dd commit 1d75dfa
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 77 deletions.
18 changes: 16 additions & 2 deletions source/interprocedural_analyses/taint/modelParseResult.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,26 @@ module CollapseDepth = struct
let show = Format.asprintf "%a" pp
end

module TaintPath = struct
type t =
| Regular of AccessPath.Path.t
| AllStaticFields
[@@deriving equal]

let pp formatter = function
| Regular path -> AccessPath.Path.pp formatter path
| AllStaticFields -> Format.fprintf formatter ".all_static_fields()"


let show = Format.asprintf "%a" pp
end

module TaintFeatures = struct
type t = {
breadcrumbs: Features.Breadcrumb.t list;
via_features: Features.ViaFeature.t list;
applies_to: AccessPath.Path.t option;
parameter_path: AccessPath.Path.t option;
parameter_path: TaintPath.t option;
return_path: AccessPath.Path.t option;
update_path: AccessPath.Path.t option;
leaf_names: Features.LeafName.t list;
Expand Down Expand Up @@ -150,7 +164,7 @@ module TaintFeatures = struct
in
features
|> add_path_option ~name:"AppliesTo" applies_to
|> add_path_option ~name:"ParameterPath" parameter_path
|> add_option ~name:"ParameterPath" ~pp:TaintPath.pp parameter_path
|> add_path_option ~name:"ReturnPath" return_path
|> add_path_option ~name:"UpdatePath" update_path
|> add_option ~name:"TraceLength" ~pp:Int.pp trace_length
Expand Down
11 changes: 9 additions & 2 deletions source/interprocedural_analyses/taint/modelParseResult.mli
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,19 @@ module CollapseDepth : sig
[@@deriving equal]
end

module TaintPath : sig
type t =
| Regular of AccessPath.Path.t
| AllStaticFields
[@@deriving equal, show]
end

module TaintFeatures : sig
type t = {
breadcrumbs: Features.Breadcrumb.t list;
via_features: Features.ViaFeature.t list;
applies_to: AccessPath.Path.t option;
parameter_path: AccessPath.Path.t option;
parameter_path: TaintPath.t option;
return_path: AccessPath.Path.t option;
update_path: AccessPath.Path.t option;
leaf_names: Features.LeafName.t list;
Expand Down Expand Up @@ -68,7 +75,7 @@ module TaintKindsWithFeatures : sig

val from_via_feature : Features.ViaFeature.t -> t

val from_parameter_path : AccessPath.Path.t -> t
val from_parameter_path : TaintPath.t -> t

val from_return_path : AccessPath.Path.t -> t

Expand Down
145 changes: 92 additions & 53 deletions source/interprocedural_analyses/taint/modelParser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ let base_name expression =
| _ -> None


let rec parse_access_path ~path ~location expression =
let parse_access_path ~path ~location expression =
let module Label = Abstract.TreeDomain.Label in
let open Core.Result in
let annotation_error reason =
Expand All @@ -112,52 +112,75 @@ let rec parse_access_path ~path ~location expression =
~location
(InvalidAccessPath { access_path = expression; reason })
in
match Node.value expression with
| Expression.Name (Name.Identifier "_") -> Ok []
| Expression.Name (Name.Identifier _) ->
Error (annotation_error "access path must start with `_`")
| Expression.Name (Name.Attribute { base; attribute; _ }) ->
(* The analysis does not currently distinguish between fields and indices.
* Silently convert fields to indices to prevent confusion. *)
parse_access_path ~path ~location base
>>| fun base -> base @ [Label.create_name_index attribute]
| Expression.Call
{
callee = { Node.value = Name (Name.Attribute { base; attribute = "__getitem__"; _ }); _ };
arguments = [{ Call.Argument.value = argument; _ }];
} -> (
parse_access_path ~path ~location base
>>= fun base ->
match Node.value argument with
| Expression.Constant (Constant.Integer index) -> Ok (base @ [Label.create_int_index index])
| Expression.Constant (Constant.String { StringLiteral.value = key; _ }) ->
Ok (base @ [Label.create_name_index key])
| _ ->
Error
(annotation_error
(Format.sprintf
"expected int or string literal argument for index access, got `%s`"
(Expression.show argument))))
| Expression.Call
{
callee = { Node.value = Name (Name.Attribute { base; attribute = "keys"; _ }); _ };
arguments = [];
} ->
parse_access_path ~path ~location base >>| fun base -> base @ [AccessPath.dictionary_keys]
| Expression.Call
{
callee = { Node.value = Name (Name.Attribute { base; attribute = "all"; _ }); _ };
arguments = [];
} ->
parse_access_path ~path ~location base >>| fun base -> base @ [Label.AnyIndex]
| Expression.Call { callee = { Node.value = Name (Name.Attribute { base; attribute; _ }); _ }; _ }
->
parse_access_path ~path ~location base
>>= fun _ ->
Error
(annotation_error
(Format.sprintf "unexpected method call `%s` (allowed: `keys`, `all`)" attribute))
| _ -> Error (annotation_error "unexpected expression")
let rec parse_expression expression =
match Node.value expression with
| Expression.Name (Name.Identifier "_") -> Ok (TaintPath.Regular [])
| Expression.Name (Name.Identifier _) ->
Error (annotation_error "access path must start with `_`")
| Expression.Name (Name.Attribute { base; attribute; _ }) ->
(* The analysis does not currently distinguish between fields and indices.
* Silently convert fields to indices to prevent confusion. *)
parse_regular_path base
>>| fun base -> TaintPath.Regular (base @ [Label.create_name_index attribute])
| Expression.Call
{
callee = { Node.value = Name (Name.Attribute { base; attribute = "__getitem__"; _ }); _ };
arguments = [{ Call.Argument.value = argument; _ }];
} -> (
parse_regular_path base
>>= fun base ->
match Node.value argument with
| Expression.Constant (Constant.Integer index) ->
Ok (TaintPath.Regular (base @ [Label.create_int_index index]))
| Expression.Constant (Constant.String { StringLiteral.value = key; _ }) ->
Ok (TaintPath.Regular (base @ [Label.create_name_index key]))
| _ ->
Error
(annotation_error
(Format.sprintf
"expected int or string literal argument for index access, got `%s`"
(Expression.show argument))))
| Expression.Call
{
callee = { Node.value = Name (Name.Attribute { base; attribute = "keys"; _ }); _ };
arguments = [];
} ->
parse_regular_path base
>>| fun base -> TaintPath.Regular (base @ [AccessPath.dictionary_keys])
| Expression.Call
{
callee = { Node.value = Name (Name.Attribute { base; attribute = "all"; _ }); _ };
arguments = [];
} ->
parse_regular_path base >>| fun base -> TaintPath.Regular (base @ [Label.AnyIndex])
| Expression.Call
{
callee =
{ Node.value = Name (Name.Attribute { base; attribute = "all_static_fields"; _ }); _ };
arguments = [];
} -> (
parse_regular_path base
>>= function
| [] -> Ok TaintPath.AllStaticFields
| _ -> Error (annotation_error "`all_static_fields()` can only be used on `_`"))
| Expression.Call
{ callee = { Node.value = Name (Name.Attribute { base; attribute; _ }); _ }; _ } ->
parse_expression base
>>= fun _ ->
Error
(annotation_error
(Format.sprintf
"unexpected method call `%s` (allowed: `keys`, `all`, `all_static_fields`)"
attribute))
| _ -> Error (annotation_error "unexpected expression")
and parse_regular_path expression =
match parse_expression expression with
| Ok (TaintPath.Regular path) -> Ok path
| Ok TaintPath.AllStaticFields ->
Error (annotation_error "cannot access attributes or methods of `all_static_fields()`")
| Error _ as error -> error
in
parse_expression expression


let rec parse_annotations
Expand Down Expand Up @@ -338,10 +361,20 @@ let rec parse_annotations
| Some "ParameterPath" ->
parse_access_path ~path ~location expression
>>| TaintKindsWithFeatures.from_parameter_path
| Some "ReturnPath" ->
parse_access_path ~path ~location expression >>| TaintKindsWithFeatures.from_return_path
| Some "UpdatePath" ->
parse_access_path ~path ~location expression >>| TaintKindsWithFeatures.from_update_path
| Some "ReturnPath" -> (
parse_access_path ~path ~location expression
>>= function
| TaintPath.Regular path -> Ok (TaintKindsWithFeatures.from_return_path path)
| TaintPath.AllStaticFields ->
Error
(annotation_error "`all_static_fields()` is not allowed within `ReturnPath[]`"))
| Some "UpdatePath" -> (
parse_access_path ~path ~location expression
>>= function
| TaintPath.Regular path -> Ok (TaintKindsWithFeatures.from_update_path path)
| TaintPath.AllStaticFields ->
Error
(annotation_error "`all_static_fields()` is not allowed within `UpdatePath[]`"))
| Some "CollapseDepth" ->
extract_collapse_depth expression
>>| fun depth -> TaintKindsWithFeatures.from_collapse_depth (CollapseDepth.Value depth)
Expand Down Expand Up @@ -832,13 +865,16 @@ let path_for_source_or_sink ~kind ~root ~features =
in
match features with
| {
TaintFeatures.parameter_path = Some parameter_path;
TaintFeatures.parameter_path = Some (TaintPath.Regular parameter_path);
applies_to = None;
return_path = None;
update_path = None;
_;
} ->
Ok parameter_path
| { parameter_path = Some TaintPath.AllStaticFields; _ } ->
(* TODO(T148742302): implement all_static_fields() *)
Error "`all_static_fields()` is not currently implemented"
| { return_path = Some _; _ } ->
Error (Format.sprintf "Invalid ReturnPath annotation for %s" kind)
| { update_path = Some _; _ } ->
Expand Down Expand Up @@ -990,7 +1026,10 @@ let introduce_taint_in_taint_out
let input_path =
match features with
| { applies_to; parameter_path = None; _ } -> Ok (Option.value ~default:[] applies_to)
| { parameter_path = Some parameter_path; applies_to = None; _ } -> Ok parameter_path
| { parameter_path = Some (TaintPath.Regular parameter_path); applies_to = None; _ } ->
Ok parameter_path
| { parameter_path = Some TaintPath.AllStaticFields; _ } ->
Error "`all_static_fields()` is not allowed within `TaintInTaintOut[]`"
| _ ->
Error
(Format.asprintf
Expand Down
2 changes: 1 addition & 1 deletion source/interprocedural_analyses/taint/modelParser.mli
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ val parse_access_path
: path:PyrePath.t option ->
location:Ast.Location.t ->
Ast.Expression.t ->
(AccessPath.Path.t, ModelVerificationError.t) result
(ModelParseResult.TaintPath.t, ModelVerificationError.t) result

val parse_decorator_modes
: path:PyrePath.t ->
Expand Down
87 changes: 68 additions & 19 deletions source/interprocedural_analyses/taint/test/modelTest.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3605,7 +3605,7 @@ let test_invalid_models context =
~model_source:"def test.sink(parameter: TaintSink[Test, ParameterPath[_.unknown()]]): ..."
~expect:
"`_.unknown()` is an invalid access path: unexpected method call `unknown` (allowed: `keys`, \
`all`)"
`all`, `all_static_fields`)"
();
assert_invalid_model
~model_source:"def test.sink(parameter: TaintSink[Test, ReturnPath[_[0]]]): ..."
Expand Down Expand Up @@ -7193,6 +7193,7 @@ let test_query_parsing context =

let test_access_path _ =
let module Label = Abstract.TreeDomain.Label in
let module TaintPath = ModelParseResult.TaintPath in
let parse_access_path source =
PyreParser.Parser.parse_exn [source]
|> (function
Expand All @@ -7206,42 +7207,55 @@ let test_access_path _ =
assert_bool
(Format.asprintf "Unexpected access path error: %a" ModelVerificationError.pp error)
false
| Ok path -> assert_equal ~printer:AccessPath.Path.show ~cmp:AccessPath.Path.equal expected path
| Ok path -> assert_equal ~printer:TaintPath.show ~cmp:TaintPath.equal expected path
in
let assert_invalid_path ~source ~expected =
match parse_access_path source with
| Error error -> assert_equal ~printer:ident expected (ModelVerificationError.display error)
| Ok _ -> assert_bool (Format.sprintf "Unexpected valid access path for: %s" source) false
in
assert_valid_path ~source:"_" ~expected:[];
assert_valid_path ~source:"_.foo" ~expected:[Label.Index "foo"];
assert_valid_path ~source:"_.foo.bar" ~expected:[Label.Index "foo"; Label.Index "bar"];
assert_valid_path ~source:"_['foo']" ~expected:[Label.Index "foo"];
assert_valid_path ~source:"_[\"foo\"]" ~expected:[Label.Index "foo"];
assert_valid_path ~source:"_[0]" ~expected:[Label.Index "0"];
assert_valid_path ~source:"_['foo']['bar']" ~expected:[Label.Index "foo"; Label.Index "bar"];
assert_valid_path ~source:"_['foo'].bar" ~expected:[Label.Index "foo"; Label.Index "bar"];
assert_valid_path ~source:"_.foo['bar']" ~expected:[Label.Index "foo"; Label.Index "bar"];
assert_valid_path ~source:"_.foo[0]" ~expected:[Label.Index "foo"; Label.Index "0"];
assert_valid_path ~source:"_.keys()" ~expected:[AccessPath.dictionary_keys];
assert_valid_path ~source:"_.all()" ~expected:[Label.AnyIndex];
assert_valid_path ~source:"_" ~expected:(TaintPath.Regular []);
assert_valid_path ~source:"_.foo" ~expected:(TaintPath.Regular [Label.Index "foo"]);
assert_valid_path
~source:"_.foo.bar"
~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]);
assert_valid_path ~source:"_['foo']" ~expected:(TaintPath.Regular [Label.Index "foo"]);
assert_valid_path ~source:"_[\"foo\"]" ~expected:(TaintPath.Regular [Label.Index "foo"]);
assert_valid_path ~source:"_[0]" ~expected:(TaintPath.Regular [Label.Index "0"]);
assert_valid_path
~source:"_['foo']['bar']"
~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]);
assert_valid_path
~source:"_['foo'].bar"
~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]);
assert_valid_path
~source:"_.foo['bar']"
~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "bar"]);
assert_valid_path
~source:"_.foo[0]"
~expected:(TaintPath.Regular [Label.Index "foo"; Label.Index "0"]);
assert_valid_path ~source:"_.keys()" ~expected:(TaintPath.Regular [AccessPath.dictionary_keys]);
assert_valid_path ~source:"_.all()" ~expected:(TaintPath.Regular [Label.AnyIndex]);
assert_valid_path ~source:"_.all_static_fields()" ~expected:TaintPath.AllStaticFields;
assert_valid_path
~source:"_[0].keys().foo.all()"
~expected:[Label.Index "0"; AccessPath.dictionary_keys; Label.Index "foo"; Label.AnyIndex];
~expected:
(TaintPath.Regular
[Label.Index "0"; AccessPath.dictionary_keys; Label.Index "foo"; Label.AnyIndex]);
assert_valid_path
~source:"_.all()['a'].bar"
~expected:[Label.AnyIndex; Label.Index "a"; Label.Index "bar"];
~expected:(TaintPath.Regular [Label.AnyIndex; Label.Index "a"; Label.Index "bar"]);
assert_invalid_path
~source:"foo"
~expected:"`foo` is an invalid access path: access path must start with `_`";
assert_invalid_path
~source:"foo.bar"
~expected:"`foo` is an invalid access path: access path must start with `_`";
~expected:"`foo.bar` is an invalid access path: access path must start with `_`";
assert_invalid_path
~source:"_.a-b"
~expected:
"`_.a.__sub__(b)` is an invalid access path: unexpected method call `__sub__` (allowed: \
`keys`, `all`)";
`keys`, `all`, `all_static_fields`)";
assert_invalid_path
~source:"_[a]"
~expected:
Expand All @@ -7256,7 +7270,42 @@ let test_access_path _ =
~source:"_.keys().something()"
~expected:
"`_.keys().something()` is an invalid access path: unexpected method call `something` \
(allowed: `keys`, `all`)";
(allowed: `keys`, `all`, `all_static_fields`)";
assert_invalid_path
~source:"_.foo.all_static_fields()"
~expected:
"`_.foo.all_static_fields()` is an invalid access path: `all_static_fields()` can only be \
used on `_`";
assert_invalid_path
~source:"_['foo'].all_static_fields()"
~expected:
"`_[\"foo\"].all_static_fields()` is an invalid access path: `all_static_fields()` can only \
be used on `_`";
assert_invalid_path
~source:"_.all().all_static_fields()"
~expected:
"`_.all().all_static_fields()` is an invalid access path: `all_static_fields()` can only be \
used on `_`";
assert_invalid_path
~source:"_.keys().all_static_fields()"
~expected:
"`_.keys().all_static_fields()` is an invalid access path: `all_static_fields()` can only be \
used on `_`";
assert_invalid_path
~source:"_.all_static_fields().foo"
~expected:
"`_.all_static_fields().foo` is an invalid access path: cannot access attributes or methods \
of `all_static_fields()`";
assert_invalid_path
~source:"_.all_static_fields()['foo']"
~expected:
"`_.all_static_fields()[\"foo\"]` is an invalid access path: cannot access attributes or \
methods of `all_static_fields()`";
assert_invalid_path
~source:"_.all_static_fields().all_static_fields()"
~expected:
"`_.all_static_fields().all_static_fields()` is an invalid access path: cannot access \
attributes or methods of `all_static_fields()`";
()


Expand Down

0 comments on commit 1d75dfa

Please sign in to comment.