From 04a55cb2c81693a2a978d14516c59953c5320476 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Thu, 20 Feb 2020 13:25:48 -0700 Subject: [PATCH] lib/pull: Skip scratch deltas if normal commit with same bindings found If a normal commit cannot be found in the history of the current ref, try to find one by looking at the collection and ref bindings in all the commits in the repo. This will only work if the remote is using collection IDs since otherwise just looking at the ref bindings in the local commits would be ambiguous about what remote the commit came from. If a normal commit with matching bindings is found, prefer an object pull. With this change it's possible to have a partial local HEAD commit (from a previous metadata only pull, for example) and broken history to a normal ancestor and correctly use the heuristic that an object pull should be preferred. This is a common case since user repos are likely to have missed intervening commits on the same remote ref. --- src/libostree/ostree-repo-pull.c | 113 +++++++++++++++++++++++++++---- tests/pull-test.sh | 81 +++++++++++++++++++++- 2 files changed, 180 insertions(+), 14 deletions(-) diff --git a/src/libostree/ostree-repo-pull.c b/src/libostree/ostree-repo-pull.c index 0a373b681c..822bdd2952 100644 --- a/src/libostree/ostree-repo-pull.c +++ b/src/libostree/ostree-repo-pull.c @@ -2588,6 +2588,84 @@ normal_commit_reachable (OstreeRepo *self, return FALSE; } +static gboolean +normal_bound_commit_exists (OtPullData *pull_data, + const OstreeCollectionRef *ref, + GCancellable *cancellable) +{ + g_return_val_if_fail (ref != NULL, FALSE); + + g_autofree char *collection_id = NULL; + if (ref->collection_id != NULL) + collection_id = g_strdup (ref->collection_id); + else + { + collection_id = get_remote_repo_collection_id (pull_data); + if (collection_id == NULL) + return FALSE; + } + + g_autoptr(GHashTable) objects = NULL; + g_autoptr(GError) list_error = NULL; + if (!ostree_repo_list_objects (pull_data->repo, OSTREE_REPO_LIST_OBJECTS_ALL, + &objects, cancellable, &list_error)) + { + g_debug ("Couldn't list objects: %s", list_error->message); + return FALSE; + } + + GLNX_HASH_TABLE_FOREACH (objects, GVariant*, serialized_key) + { + const char *checksum; + OstreeObjectType objtype; + ostree_object_name_deserialize (serialized_key, &checksum, &objtype); + if (objtype != OSTREE_OBJECT_TYPE_COMMIT) + continue; + + g_autoptr(GVariant) commit = NULL; + OstreeRepoCommitState commitstate; + g_autoptr(GError) local_error = NULL; + if (!ostree_repo_load_commit (pull_data->repo, checksum, &commit, + &commitstate, &local_error)) + { + /* Ignore issues with the commit since we're going to try to pull a + * scratch delta, anyways. + */ + g_debug ("Couldn't load commit %s: %s", checksum, local_error->message); + continue; + } + + /* Is this a partial commit? */ + if ((commitstate & OSTREE_REPO_COMMIT_STATE_PARTIAL) > 0) + continue; + + /* If the commit has the same collection binding and the ref is included + * in the ref bindings, we have a match. + */ + g_autoptr(GVariant) metadata = g_variant_get_child_value (commit, 0); + const char *collection_id_binding; + if (!g_variant_lookup (metadata, + OSTREE_COMMIT_META_KEY_COLLECTION_BINDING, + "&s", + &collection_id_binding)) + continue; + if (!g_str_equal (collection_id_binding, collection_id)) + continue; + + g_autofree const char **ref_bindings = NULL; + if (!g_variant_lookup (metadata, + OSTREE_COMMIT_META_KEY_REF_BINDING, + "^a&s", + &ref_bindings)) + continue; + if (g_strv_contains ((const char *const *) ref_bindings, ref->ref_name)) + return TRUE; + } + + /* No normal commit found with same bindings */ + return FALSE; +} + /* * DELTA_SEARCH_RESULT_UNCHANGED: * We already have the commit. @@ -2618,12 +2696,13 @@ typedef struct { * See the enum in `DeltaSearchResult` for available result types. */ static gboolean -get_best_static_delta_start_for (OtPullData *pull_data, - const char *to_revision, - const char *cur_revision, - DeltaSearchResult *out_result, - GCancellable *cancellable, - GError **error) +get_best_static_delta_start_for (OtPullData *pull_data, + const OstreeCollectionRef *ref, + const char *to_revision, + const char *cur_revision, + DeltaSearchResult *out_result, + GCancellable *cancellable, + GError **error) { /* Array of possible from checksums */ g_autoptr(GPtrArray) candidates = g_ptr_array_new_with_free_func (g_free); @@ -2733,12 +2812,20 @@ get_best_static_delta_start_for (OtPullData *pull_data, * efficient. */ if (out_result->result == DELTA_SEARCH_RESULT_SCRATCH && - !pull_data->require_static_deltas && - cur_revision != NULL && - normal_commit_reachable (pull_data->repo, cur_revision)) - { - g_debug ("Found normal commit in history of %s", cur_revision); - out_result->result = DELTA_SEARCH_RESULT_NO_MATCH; + !pull_data->require_static_deltas) + { + if (cur_revision != NULL && + normal_commit_reachable (pull_data->repo, cur_revision)) + { + g_debug ("Found normal commit in history of %s", cur_revision); + out_result->result = DELTA_SEARCH_RESULT_NO_MATCH; + } + else if (ref != NULL && + normal_bound_commit_exists (pull_data, ref, cancellable)) + { + g_debug ("Found normal commit with same bindings as %s", ref->ref_name); + out_result->result = DELTA_SEARCH_RESULT_NO_MATCH; + } } return TRUE; @@ -3457,7 +3544,7 @@ initiate_request (OtPullData *pull_data, DeltaSearchResult deltares; /* Look for a delta to @to_revision in the summary data */ - if (!get_best_static_delta_start_for (pull_data, to_revision, + if (!get_best_static_delta_start_for (pull_data, ref, to_revision, delta_from_revision, &deltares, pull_data->cancellable, error)) return FALSE; diff --git a/tests/pull-test.sh b/tests/pull-test.sh index 4f2b833130..8d3a345bae 100644 --- a/tests/pull-test.sh +++ b/tests/pull-test.sh @@ -54,7 +54,7 @@ function verify_initial_contents() { assert_file_has_content baz/cow '^moo$' } -num_tests=40 +num_tests=44 # 3 tests needs GPG support num_gpg_tests=3 if has_gpgme; then @@ -721,3 +721,82 @@ echo "ok scratch delta (require static deltas with normal parent)" # Clean up the deltas so we don't leave a corrupt one around rm ostree-srv/gnomerepo/deltas -rf + +# Test using scratch deltas with unreachable commits but matching ref bindings. +# Add a collection ID to the remote to make some commits with appropriate +# bindings. +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo config set core.collection-id org.example.Repo +rm ostree-srv/gnomerepo/deltas -rf +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo refs --delete scratch +rm scratch-files -rf +mkdir scratch-files +echo "stuff for rev1" > scratch-files/rev1 +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo commit ${COMMIT_ARGS} -b scratch -s rev1 scratch-files +rev1=$(${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo rev-parse scratch) +echo "stuff for rev2" > scratch-files/rev2 +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo commit ${COMMIT_ARGS} -b scratch -s rev2 scratch-files +rev2=$(${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo rev-parse scratch) +echo "stuff for rev3" > scratch-files/rev3 +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo commit ${COMMIT_ARGS} -b scratch -s rev3 scratch-files +rev3=$(${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo rev-parse scratch) +rm scratch-files -rf + +# Make a scratch delta and then corrupt it so we can tell when the pull is +# fetching the delta and not falling back to an object pull. +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo static-delta --empty generate scratch +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo summary -u +superblock=$(find ostree-srv/gnomerepo/deltas -name superblock) +echo FAIL > ${superblock} + +# Pull with a partial HEAD ref and partial unreachable commit with matching ref +# bindings. This will search all commits for one bound to the requested ref and +# find the partial grandparent. This should fail because it tries to pull the +# scratch delta. +repo_init --no-gpg-verify +${CMD_PREFIX} ostree --repo=repo pull origin --commit-metadata-only scratch@${rev1} +${CMD_PREFIX} ostree --repo=repo pull origin --commit-metadata-only scratch +if ${CMD_PREFIX} ostree --repo=repo pull origin scratch 2>err.txt; then + assert_not_reached "pull of corrupt scratch delta unexpectedly succeeded" +fi +assert_file_has_content err.txt "Invalid checksum for static delta" +echo "ok scratch delta (partial HEAD and partial bound grandparent)" + +# Pull with a partial HEAD ref and normal unreachable commit with matching ref +# bindings. This will search all commits for one bound to the requested ref and +# find the normal grandparent. This will succeed with an object pull. +repo_init --no-gpg-verify +${CMD_PREFIX} ostree --repo=repo pull origin scratch@${rev1} +${CMD_PREFIX} ostree --repo=repo pull origin --commit-metadata-only scratch +${CMD_PREFIX} ostree --repo=repo pull origin scratch +echo "ok scratch delta (partial HEAD and normal bound grandparent)" + +# Pull the HEAD by revision with a normal unreachable commit with matching ref +# bindings but collection-id not set for the remote. Since the pull is by +# revision, it will not have the collection ID set in the requested ref and the +# search for the bound grandparent will fail. This should fail because it tries +# to pull the scratch delta. +repo_init --no-gpg-verify +${CMD_PREFIX} ostree --repo=repo pull origin scratch@${rev1} +${CMD_PREFIX} ostree --repo=repo pull origin --commit-metadata-only scratch@${rev3} +if ${CMD_PREFIX} ostree --repo=repo pull origin scratch@${rev3} 2>err.txt; then + assert_not_reached "pull of corrupt scratch delta unexpectedly succeeded" +fi +assert_file_has_content err.txt "Invalid checksum for static delta" +echo "ok scratch delta (partial HEAD by revision, normal bound grandparent and no remote collection-id)" + +# Set the remote's collection ID and pull the HEAD by revision with a normal +# unreachable commit with matching ref bindings but collection-id not set for +# the remote. Since the pull is by revision, it will not have the collection ID +# set in the requested ref, but the collection ref configured for the remote +# will be used. This will succeed with an object pull. +repo_init --no-gpg-verify --collection-id=org.example.Repo +${CMD_PREFIX} ostree --repo=repo pull origin scratch@${rev1} +${CMD_PREFIX} ostree --repo=repo pull origin --commit-metadata-only scratch@${rev3} +${CMD_PREFIX} ostree --repo=repo pull origin scratch@${rev3} +echo "ok scratch delta (partial HEAD by revision, normal bound grandparent and remote collection-id)" + +# Remove the remote repo's collection ID +${CMD_PREFIX} ostree --repo=ostree-srv/gnomerepo config unset core.collection-id + +# Clean up the deltas so we don't leave a corrupt one around +rm ostree-srv/gnomerepo/deltas -rf