diff --git a/apps/revault/src/revault_file_disk.erl b/apps/revault/src/revault_file_disk.erl index 6f8a84f..480fa24 100644 --- a/apps/revault/src/revault_file_disk.erl +++ b/apps/revault/src/revault_file_disk.erl @@ -148,8 +148,10 @@ multipart_final({state, Path, _PartsSeen, PartsTotal, Hash, {Fd, HashState}}, Path, PartsTotal, Hash) -> ok = file:close(Fd), - Hash = crypto:hash_final(HashState), - ok. + case crypto:hash_final(HashState) of + Hash -> ok; + _BadHash -> error(invalid_hash) + end. %%%%%%%%%%%%%%%%%%%%% %%% FILE WRAPPERS %%% diff --git a/apps/revault/src/revault_fsm.erl b/apps/revault/src/revault_fsm.erl index 5af2192..821c789 100644 --- a/apps/revault/src/revault_fsm.erl +++ b/apps/revault/src/revault_fsm.erl @@ -1116,8 +1116,6 @@ handle_multipart_file(MPState, F, Hash, PartNum, PartTotal, Bin) when PartNum =: #{F := State} = MPState, {ok, NewState} = revault_file:multipart_update(State, F, PartNum, PartTotal, Hash, Bin), ok = revault_file:multipart_final(NewState, F, PartTotal, Hash), - %% TODO: validate whether multipart file handling validates the - %% hash we pass in, and otherwise add an explicit check {done, maps:remove(F, MPState)}. delete_file(Name, F, Meta) -> diff --git a/apps/revault/src/revault_s3.erl b/apps/revault/src/revault_s3.erl index c075754..42cc64e 100644 --- a/apps/revault/src/revault_s3.erl +++ b/apps/revault/src/revault_s3.erl @@ -249,7 +249,10 @@ multipart_final({state, Path, _PartsSeen, PartsTotal, Hash, {Tmp, UploadId, RollingHash, PartsAcc}}, Path, PartsTotal, Hash) -> %% Compute our own final hash and the final checksum - Hash = crypto:hash_final(RollingHash), + case crypto:hash_final(RollingHash) of + Hash -> ok; + _ -> error(invalid_hash) + end, Chk = base64:encode(Hash), %% S3 multipart upload does a checksum of checksums with a -N suffix. %% We'll move the upload to get the final hash, but we still want to do diff --git a/apps/revault/test/revault_file_disk_SUITE.erl b/apps/revault/test/revault_file_disk_SUITE.erl index ae4305c..cc36f06 100644 --- a/apps/revault/test/revault_file_disk_SUITE.erl +++ b/apps/revault/test/revault_file_disk_SUITE.erl @@ -4,7 +4,7 @@ -include_lib("common_test/include/ct.hrl"). all() -> - [read_range, multipart]. + [read_range, multipart, multipart_hash]. read_range() -> [{doc, "general checks on reading subsets of files"}]. @@ -47,3 +47,25 @@ multipart(Config) -> ok = revault_file_disk:multipart_final(State, File, Parts, Hash), ?assertEqual({ok, Bin}, file:read_file(File)), ok. + +multiparti_hash() -> + [{doc, "Multipart API validates the hash when finalizing"}]. +multipart_hash(Config) -> + WidthBytes = 100, + WidthBits = 8*WidthBytes, + Parts = 11, + Bin = <<0:WidthBits, 1:WidthBits, 2:WidthBits, 3:WidthBits, 4:WidthBits, + 5:WidthBits, 6:WidthBits, 7:WidthBits, 8:WidthBits, 9:WidthBits, 10>>, + Hash = revault_file_disk:hash_bin(<<1, Bin/binary>>), + File = filename:join([?config(priv_dir, Config), "multipart.scratch"]), + {_, State} = lists:foldl( + fun(Part, {N,S}) -> + {ok, NewS} = revault_file_disk:multipart_update(S, File, N, Parts, Hash, Part), + {N+1, NewS} + end, + {1, revault_file_disk:multipart_init(File, Parts, Hash)}, + [<> || N <- lists:seq(0,Parts-2)]++[<<10>>] + ), + ?assertError(invalid_hash, + revault_file_disk:multipart_final(State, File, Parts, Hash)), + ok. diff --git a/apps/revault/test/revault_s3_SUITE.erl b/apps/revault/test/revault_s3_SUITE.erl index ddeb159..7676b25 100644 --- a/apps/revault/test/revault_s3_SUITE.erl +++ b/apps/revault/test/revault_s3_SUITE.erl @@ -9,7 +9,8 @@ all() -> [client_cache, consult, hash_cache, hash, copy, list_uncached, list_cached, is_regular, - write_file, delete, size, multipart, read_range]. + write_file, delete, size, multipart, multipart_hash, + read_range]. init_per_suite(Config) -> InitBucket = application:get_env(revault, bucket, undefined), @@ -396,6 +397,52 @@ multipart(_Config) -> ?assertEqual(ok, revault_s3:multipart_final(S3, <<"path">>, 3, HA)), ok. +multipart_hash() -> + [{doc, "mocked out call to the multipart functions, based " + "on data from the integration suite, checking checksums."}]. +multipart_hash(_Config) -> + UploadId = <<"123456">>, + P1 = <<1>>, + P2 = <<2>>, + P3 = <<3>>, + Whole = <>, + H1 = revault_file:hash_bin(P1), + H2 = revault_file:hash_bin(P2), + H3 = revault_file:hash_bin(P3), + HA = <<1, (revault_file:hash_bin(Whole))/binary>>, + CM = << (base64:encode(revault_file:hash_bin(<

>)))/binary, "-3" >>, + CA = base64:encode(HA), + meck:expect(aws_s3, create_multipart_upload, + fun(_Cli, _Bucket, _Path, _Query, _Opts) -> + {ok, #{<<"InitiateMultipartUploadResult">> => + #{<<"UploadId">> => UploadId}}, + {200, [], make_ref()}} + end), + meck:expect(aws_s3, upload_part, + fun(_Cli, _Bucket, _Path, + #{<<"PartNumber">> := _, <<"ChecksumSHA256">> := Chk}, + _Opts) -> + {ok, #{<<"ETag">> => Chk, <<"ChecksumSHA256">> => Chk}, {200, [], make_ref()}} + end), + meck:expect(aws_s3, complete_multipart_upload, + fun(_Cli, _Bucket, _Path, _Query, _Opts) -> + {ok, #{<<"CompleteMultipartUploadResult">> => + #{<<"ChecksumSHA256">> => CM}}, + {200, [], make_ref()}} + end), + meck:expect(aws_s3, copy_object, + fun(_Cli, _Bucket, _To, _Query) -> + {ok, #{<<"CopyObjectResult">> => #{<<"ChecksumSHA256">> => CA}}, {200, []}} + end), + meck:expect(aws_s3, delete_object, + fun(_Cli, _Bucket, _Path, _Query) -> {ok, #{}, {200, []}} end), + S0 = revault_s3:multipart_init(<<"path">>, 3, HA), + {ok, S1} = revault_s3:multipart_update(S0, <<"path">>, 1, 3, HA, P1), + {ok, S2} = revault_s3:multipart_update(S1, <<"path">>, 2, 3, HA, P2), + {ok, S3} = revault_s3:multipart_update(S2, <<"path">>, 3, 3, HA, P3), + ?assertError(invalid_hash, revault_s3:multipart_final(S3, <<"path">>, 3, HA)), + ok. + read_range() -> [{doc, "mocked out call to the read object functions, based " "on data from the integration suite"}].