Skip to content

Commit

Permalink
validate multipart hash consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
ferd committed Apr 6, 2024
1 parent 0a3539e commit 3b7c747
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 7 deletions.
6 changes: 4 additions & 2 deletions apps/revault/src/revault_file_disk.erl
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,10 @@ multipart_final({state, Path, _PartsSeen, PartsTotal, Hash,
{Fd, HashState}},
Path, PartsTotal, Hash) ->
ok = file:close(Fd),
Hash = crypto:hash_final(HashState),
ok.
case crypto:hash_final(HashState) of
Hash -> ok;
_BadHash -> error(invalid_hash)
end.

%%%%%%%%%%%%%%%%%%%%%
%%% FILE WRAPPERS %%%
Expand Down
2 changes: 0 additions & 2 deletions apps/revault/src/revault_fsm.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1116,8 +1116,6 @@ handle_multipart_file(MPState, F, Hash, PartNum, PartTotal, Bin) when PartNum =:
#{F := State} = MPState,
{ok, NewState} = revault_file:multipart_update(State, F, PartNum, PartTotal, Hash, Bin),
ok = revault_file:multipart_final(NewState, F, PartTotal, Hash),
%% TODO: validate whether multipart file handling validates the
%% hash we pass in, and otherwise add an explicit check
{done, maps:remove(F, MPState)}.

delete_file(Name, F, Meta) ->
Expand Down
5 changes: 4 additions & 1 deletion apps/revault/src/revault_s3.erl
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,10 @@ multipart_final({state, Path, _PartsSeen, PartsTotal, Hash,
{Tmp, UploadId, RollingHash, PartsAcc}},
Path, PartsTotal, Hash) ->
%% Compute our own final hash and the final checksum
Hash = crypto:hash_final(RollingHash),
case crypto:hash_final(RollingHash) of
Hash -> ok;
_ -> error(invalid_hash)
end,
Chk = base64:encode(Hash),
%% S3 multipart upload does a checksum of checksums with a -N suffix.
%% We'll move the upload to get the final hash, but we still want to do
Expand Down
24 changes: 23 additions & 1 deletion apps/revault/test/revault_file_disk_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
-include_lib("common_test/include/ct.hrl").

all() ->
[read_range, multipart].
[read_range, multipart, multipart_hash].

read_range() ->
[{doc, "general checks on reading subsets of files"}].
Expand Down Expand Up @@ -47,3 +47,25 @@ multipart(Config) ->
ok = revault_file_disk:multipart_final(State, File, Parts, Hash),
?assertEqual({ok, Bin}, file:read_file(File)),
ok.

multiparti_hash() ->
[{doc, "Multipart API validates the hash when finalizing"}].
multipart_hash(Config) ->
WidthBytes = 100,
WidthBits = 8*WidthBytes,
Parts = 11,
Bin = <<0:WidthBits, 1:WidthBits, 2:WidthBits, 3:WidthBits, 4:WidthBits,
5:WidthBits, 6:WidthBits, 7:WidthBits, 8:WidthBits, 9:WidthBits, 10>>,
Hash = revault_file_disk:hash_bin(<<1, Bin/binary>>),
File = filename:join([?config(priv_dir, Config), "multipart.scratch"]),
{_, State} = lists:foldl(
fun(Part, {N,S}) ->
{ok, NewS} = revault_file_disk:multipart_update(S, File, N, Parts, Hash, Part),
{N+1, NewS}
end,
{1, revault_file_disk:multipart_init(File, Parts, Hash)},
[<<N:WidthBits>> || N <- lists:seq(0,Parts-2)]++[<<10>>]
),
?assertError(invalid_hash,
revault_file_disk:multipart_final(State, File, Parts, Hash)),
ok.
49 changes: 48 additions & 1 deletion apps/revault/test/revault_s3_SUITE.erl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
all() ->
[client_cache, consult, hash_cache, hash,
copy, list_uncached, list_cached, is_regular,
write_file, delete, size, multipart, read_range].
write_file, delete, size, multipart, multipart_hash,
read_range].

init_per_suite(Config) ->
InitBucket = application:get_env(revault, bucket, undefined),
Expand Down Expand Up @@ -396,6 +397,52 @@ multipart(_Config) ->
?assertEqual(ok, revault_s3:multipart_final(S3, <<"path">>, 3, HA)),
ok.

multipart_hash() ->
[{doc, "mocked out call to the multipart functions, based "
"on data from the integration suite, checking checksums."}].
multipart_hash(_Config) ->
UploadId = <<"123456">>,
P1 = <<1>>,
P2 = <<2>>,
P3 = <<3>>,
Whole = <<P1/binary,P2/binary,P3/binary>>,
H1 = revault_file:hash_bin(P1),
H2 = revault_file:hash_bin(P2),
H3 = revault_file:hash_bin(P3),
HA = <<1, (revault_file:hash_bin(Whole))/binary>>,
CM = << (base64:encode(revault_file:hash_bin(<<H1/binary, H2/binary, H3/binary>>)))/binary, "-3" >>,
CA = base64:encode(HA),
meck:expect(aws_s3, create_multipart_upload,
fun(_Cli, _Bucket, _Path, _Query, _Opts) ->
{ok, #{<<"InitiateMultipartUploadResult">> =>
#{<<"UploadId">> => UploadId}},
{200, [], make_ref()}}
end),
meck:expect(aws_s3, upload_part,
fun(_Cli, _Bucket, _Path,
#{<<"PartNumber">> := _, <<"ChecksumSHA256">> := Chk},
_Opts) ->
{ok, #{<<"ETag">> => Chk, <<"ChecksumSHA256">> => Chk}, {200, [], make_ref()}}
end),
meck:expect(aws_s3, complete_multipart_upload,
fun(_Cli, _Bucket, _Path, _Query, _Opts) ->
{ok, #{<<"CompleteMultipartUploadResult">> =>
#{<<"ChecksumSHA256">> => CM}},
{200, [], make_ref()}}
end),
meck:expect(aws_s3, copy_object,
fun(_Cli, _Bucket, _To, _Query) ->
{ok, #{<<"CopyObjectResult">> => #{<<"ChecksumSHA256">> => CA}}, {200, []}}
end),
meck:expect(aws_s3, delete_object,
fun(_Cli, _Bucket, _Path, _Query) -> {ok, #{}, {200, []}} end),
S0 = revault_s3:multipart_init(<<"path">>, 3, HA),
{ok, S1} = revault_s3:multipart_update(S0, <<"path">>, 1, 3, HA, P1),
{ok, S2} = revault_s3:multipart_update(S1, <<"path">>, 2, 3, HA, P2),
{ok, S3} = revault_s3:multipart_update(S2, <<"path">>, 3, 3, HA, P3),
?assertError(invalid_hash, revault_s3:multipart_final(S3, <<"path">>, 3, HA)),
ok.

read_range() ->
[{doc, "mocked out call to the read object functions, based "
"on data from the integration suite"}].
Expand Down

0 comments on commit 3b7c747

Please sign in to comment.