From 501416a2fb83f4bf551346ab41c8130970161660 Mon Sep 17 00:00:00 2001 From: Chris Dinh Date: Thu, 7 Nov 2024 11:43:38 -0800 Subject: [PATCH] Add simple suffix evaluation Summary: # Context Watchman is a primary vector into the eden globFiles endpoint. How watchman interprets and sends arguments to eden different based on how the query is structured. This change is intended to make a common query pathway more efficient(referred to as SimpleSuffix from here). Query format example: ["query","",{relative_root:"",fields:["content.sha1hex","name"],expression:["allof",["type","f"],["suffix",[""]]]}] This is currently interpreted as [rel_root/**], then post-filtered in watchman. This change is intended to change this to use eden glob's searchRoot field and internal filtering. # This Diff Adds new functions to watchman QueryExprs: evaluateSimpleSuffix - intended to determine if the expression is a valid simple suffix getSuffixQueryGlobPatterns - basically only valid on SuffixExpr. Returns a list of suffixes in glob form. These are intended to allow watchman to determine if a given query is using a simple suffix so we can pass arguments to eden more efficiently. This is primarily targeted at allowing targeted SLAPI offloading, b ut may have benefits for queries that aren't offloaded since we're returning less files from eden. Rollout is controlled by the watchman config value eden_enable_simple_suffix # Technical Details evaluateSimpleSuffix - returns Excluded(Not part of a Simple Suffix), Type, Suffix, or Allof. An Allof result indicates that this is a valis SimpleSuffix. getSuffixQueryGlobPatterns - Takes the suffix expression (eg. ['a', 'b']) and converts it to glob form (**/*.a, **/*.b) Relative root is computed and passed down. Adjustments to filter is made to properly reflect this. Reviewed By: jdelliot Differential Revision: D65302345 fbshipit-source-id: 57c073022452b2ff8ea1c913d45cad2a3eee1484 --- watchman/query/QueryExpr.h | 17 ++++ watchman/query/base.cpp | 53 ++++++++++++ watchman/query/dirname.cpp | 8 ++ watchman/query/empty.cpp | 16 ++++ watchman/query/intcompare.cpp | 8 ++ watchman/query/match.cpp | 8 ++ watchman/query/name.cpp | 8 ++ watchman/query/pcre.cpp | 8 ++ watchman/query/since.cpp | 8 ++ watchman/query/suffix.cpp | 13 +++ watchman/query/type.cpp | 11 +++ watchman/test/SuffixQueryTest.cpp | 133 ++++++++++++++++++++++++++++++ watchman/watcher/eden.cpp | 77 ++++++++++++++--- 13 files changed, 356 insertions(+), 12 deletions(-) create mode 100644 watchman/test/SuffixQueryTest.cpp diff --git a/watchman/query/QueryExpr.h b/watchman/query/QueryExpr.h index 19a8553dc152..28239d815996 100644 --- a/watchman/query/QueryExpr.h +++ b/watchman/query/QueryExpr.h @@ -43,6 +43,11 @@ enum AggregateOp { AllOf, }; +/** + * Describes which part of a simple suffix expression + */ +enum SimpleSuffixType { Excluded, Suffix, IsSimpleSuffix, Type }; + class QueryExpr { public: virtual ~QueryExpr() = default; @@ -78,6 +83,8 @@ class QueryExpr { virtual std::optional> computeGlobUpperBound( CaseSensitivity) const = 0; + virtual std::vector getSuffixQueryGlobPatterns() const = 0; + enum ReturnOnlyFiles { No, Yes, Unrelated }; /** @@ -86,6 +93,16 @@ class QueryExpr { * method to handle this query. */ virtual ReturnOnlyFiles listOnlyFiles() const = 0; + + /** + * Returns whether this expression is a simple suffix expression, or a part + * of a simple suffix expression. A simple suffix expression is an allof + * expression that contains a single suffix expresssion containing one or more + * suffixes, and a type expresssion that wants files only. The intention for + * this is to allow watchman to more accurately determine what arguments to + * pass to eden's globFiles API. + */ + virtual SimpleSuffixType evaluateSimpleSuffix() const = 0; }; } // namespace watchman diff --git a/watchman/query/base.cpp b/watchman/query/base.cpp index 635ee9e2d6d7..8e0b5b4669b2 100644 --- a/watchman/query/base.cpp +++ b/watchman/query/base.cpp @@ -64,6 +64,14 @@ class NotExpr : public QueryExpr { } return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(not, NotExpr::parse); @@ -87,6 +95,14 @@ class TrueExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(true, TrueExpr::parse); @@ -110,6 +126,14 @@ class FalseExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(false, FalseExpr::parse); @@ -312,6 +336,35 @@ class ListExpr : public QueryExpr { } return result; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + if (allof) { + std::vector types; + for (auto& subExpr : exprs) { + types.push_back(subExpr->evaluateSimpleSuffix()); + } + if (types.size() == 2) { + if ((types[0] == SimpleSuffixType::Type && + types[1] == SimpleSuffixType::Suffix) || + (types[1] == SimpleSuffixType::Type && + types[0] == SimpleSuffixType::Suffix)) { + return SimpleSuffixType::IsSimpleSuffix; + } + } + } + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + if (allof) { + for (auto& subExpr : exprs) { + if (subExpr->evaluateSimpleSuffix() == SimpleSuffixType::Suffix) { + return subExpr->getSuffixQueryGlobPatterns(); + } + } + } + return std::vector{}; + } }; W_TERM_PARSER(anyof, ListExpr::parseAnyOf); diff --git a/watchman/query/dirname.cpp b/watchman/query/dirname.cpp index 5c92a1e61830..3f22c45a3756 100644 --- a/watchman/query/dirname.cpp +++ b/watchman/query/dirname.cpp @@ -178,6 +178,14 @@ class DirNameExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(dirname, DirNameExpr::parseDirName); diff --git a/watchman/query/empty.cpp b/watchman/query/empty.cpp index cc5c3833b873..a001b147d8a6 100644 --- a/watchman/query/empty.cpp +++ b/watchman/query/empty.cpp @@ -34,6 +34,14 @@ class ExistsExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(exists, ExistsExpr::parse); @@ -79,6 +87,14 @@ class EmptyExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(empty, EmptyExpr::parse); diff --git a/watchman/query/intcompare.cpp b/watchman/query/intcompare.cpp index c89da8f78657..4b13bb9d3c16 100644 --- a/watchman/query/intcompare.cpp +++ b/watchman/query/intcompare.cpp @@ -132,6 +132,14 @@ class SizeExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(size, SizeExpr::parse); diff --git a/watchman/query/match.cpp b/watchman/query/match.cpp index c3c0297cfec9..8257ff69ade7 100644 --- a/watchman/query/match.cpp +++ b/watchman/query/match.cpp @@ -212,6 +212,14 @@ class WildMatchExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(match, WildMatchExpr::parseMatch); W_TERM_PARSER(imatch, WildMatchExpr::parseIMatch); diff --git a/watchman/query/name.cpp b/watchman/query/name.cpp index d1d3802f0f4f..1b43bf64d0da 100644 --- a/watchman/query/name.cpp +++ b/watchman/query/name.cpp @@ -186,6 +186,14 @@ class NameExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(name, NameExpr::parseName); diff --git a/watchman/query/pcre.cpp b/watchman/query/pcre.cpp index 5f22e7ca0cd7..7cfcce9bb9df 100644 --- a/watchman/query/pcre.cpp +++ b/watchman/query/pcre.cpp @@ -157,6 +157,14 @@ class PcreExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(pcre, PcreExpr::parsePcre); W_TERM_PARSER(ipcre, PcreExpr::parseIPcre); diff --git a/watchman/query/since.cpp b/watchman/query/since.cpp index e290a10ffa7b..da725905d8d2 100644 --- a/watchman/query/since.cpp +++ b/watchman/query/since.cpp @@ -170,6 +170,14 @@ class SinceExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(since, SinceExpr::parse); diff --git a/watchman/query/suffix.cpp b/watchman/query/suffix.cpp index f33230b75fb9..c193c654a22e 100644 --- a/watchman/query/suffix.cpp +++ b/watchman/query/suffix.cpp @@ -100,6 +100,19 @@ class SuffixExpr : public QueryExpr { ReturnOnlyFiles listOnlyFiles() const override { return ReturnOnlyFiles::Unrelated; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + return SimpleSuffixType::Suffix; + } + + std::vector getSuffixQueryGlobPatterns() const override { + std::vector patterns; + for (const auto& suffix : suffixSet_) { + patterns.push_back("**/*." + suffix.string()); + } + + return patterns; + } }; W_TERM_PARSER(suffix, SuffixExpr::parse); W_CAP_REG("suffix-set") diff --git a/watchman/query/type.cpp b/watchman/query/type.cpp index d1b1b68addd8..cf87e9f7d601 100644 --- a/watchman/query/type.cpp +++ b/watchman/query/type.cpp @@ -119,6 +119,17 @@ class TypeExpr : public QueryExpr { } return ReturnOnlyFiles::Yes; } + + SimpleSuffixType evaluateSimpleSuffix() const override { + if (arg == 'f') { + return SimpleSuffixType::Type; + } + return SimpleSuffixType::Excluded; + } + + std::vector getSuffixQueryGlobPatterns() const override { + return std::vector{}; + } }; W_TERM_PARSER(type, TypeExpr::parse); diff --git a/watchman/test/SuffixQueryTest.cpp b/watchman/test/SuffixQueryTest.cpp new file mode 100644 index 000000000000..eff657d0115d --- /dev/null +++ b/watchman/test/SuffixQueryTest.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include "watchman/query/GlobTree.h" +#include "watchman/query/Query.h" +#include "watchman/query/QueryExpr.h" +#include "watchman/query/TermRegistry.h" +#include "watchman/thirdparty/jansson/jansson.h" + +using namespace watchman; +using namespace testing; + +namespace { + +std::optional parse_json(std::string expression_json) { + json_error_t err{}; + auto expression = json_loads(expression_json.c_str(), JSON_DECODE_ANY, &err); + if (!expression.has_value()) { + ADD_FAILURE() << "JSON parse error in fixture: " << err.text << " at " + << err.source << ":" << err.line << ":" << err.column; + return std::nullopt; + } + return expression; +} + +std::optional expr_evaluate_simple_suffix( + std::string expression_json) { + json_error_t err{}; + auto expression = parse_json(expression_json); + if (!expression.has_value()) { + return std::nullopt; + } + Query query; + // Disable automatic parsing of "match" as "imatch", "name" as "iname", etc. + auto expr = watchman::parseQueryExpr(&query, *expression); + return expr->evaluateSimpleSuffix(); +} + +std::optional> expr_get_suffix_glob( + std::string expression_json) { + json_error_t err{}; + auto expression = parse_json(expression_json); + if (!expression.has_value()) { + return std::nullopt; + } + Query query; + // Disable automatic parsing of "match" as "imatch", "name" as "iname", etc. + auto expr = watchman::parseQueryExpr(&query, *expression); + auto rv = expr->getSuffixQueryGlobPatterns(); + std::sort(rv.begin(), rv.end()); + return rv; +} + +} // namespace + +TEST(SuffixQueryTest, false) { + EXPECT_THAT( + expr_evaluate_simple_suffix(R"( ["false"] )"), + Optional(SimpleSuffixType::Excluded)); +} + +TEST(SuffixQueryTest, false_glob) { + EXPECT_THAT( + expr_get_suffix_glob(R"( ["false"] )"), + Optional(std::vector{})); +} + +TEST(SuffixQueryTest, type_d) { + EXPECT_THAT( + expr_evaluate_simple_suffix(R"( ["type", "d"] )"), + Optional(SimpleSuffixType::Excluded)); +} + +TEST(SuffixQueryTest, type_d_glob) { + EXPECT_THAT( + expr_get_suffix_glob(R"( ["type", "d"] )"), + Optional(std::vector{})); +} + +TEST(SuffixQueryTest, type_f) { + EXPECT_THAT( + expr_evaluate_simple_suffix(R"( ["type", "f"] )"), + Optional(SimpleSuffixType::Type)); +} + +TEST(SuffixQueryTest, type_f_glob) { + EXPECT_THAT( + expr_get_suffix_glob(R"( ["type", "f"] )"), + Optional(std::vector{})); +} +TEST(SuffixQueryTest, suffix) { + EXPECT_THAT( + expr_evaluate_simple_suffix(R"( ["suffix", ["a", "f"]] )"), + Optional(SimpleSuffixType::Suffix)); +} + +TEST(SuffixQueryTest, suffix_glob) { + EXPECT_THAT( + expr_get_suffix_glob(R"( ["suffix", ["a", "f"]] )"), + Optional(std::vector{"**/*.a", "**/*.f"})); +} + +TEST(SuffixQueryTest, allof_excl) { + EXPECT_THAT( + expr_evaluate_simple_suffix(R"( ["allof", ["type", "f"], ["exists"]] )"), + Optional(SimpleSuffixType::Excluded)); +} + +TEST(SuffixQueryTest, allof_excl_glob) { + EXPECT_THAT( + expr_get_suffix_glob(R"( ["allof", ["type", "f"], ["exists"]] )"), + Optional(std::vector{})); +} + +TEST(SuffixQueryTest, allof_yes) { + EXPECT_THAT( + expr_evaluate_simple_suffix( + R"( ["allof", ["type", "f"], ["suffix", ["a"]]] )"), + Optional(SimpleSuffixType::IsSimpleSuffix)); +} + +TEST(SuffixQueryTest, allof_yes_glob) { + EXPECT_THAT( + expr_get_suffix_glob(R"( ["allof", ["type", "f"], ["suffix", ["a"]]] )"), + Optional(std::vector{"**/*.a"})); +} diff --git a/watchman/watcher/eden.cpp b/watchman/watcher/eden.cpp index 1352ec161fe9..1b74306c5a27 100644 --- a/watchman/watcher/eden.cpp +++ b/watchman/watcher/eden.cpp @@ -613,13 +613,18 @@ static std::string escapeGlobSpecialChars(w_string_piece str) { * We need to respect the ignore_dirs configuration setting and * also remove anything that doesn't match the relative_root constraint * in the query. */ -void filterOutPaths(std::vector& files, QueryContext* ctx) { +void filterOutPaths( + std::vector& files, + QueryContext* ctx, + const std::string& relative_root = "") { files.erase( std::remove_if( files.begin(), files.end(), - [ctx](const NameAndDType& item) { - auto full = w_string::pathCat({ctx->root->root_path, item.name}); + [ctx, relative_root](const NameAndDType& item) { + w_string full; + full = w_string::pathCat( + {ctx->root->root_path, relative_root, item.name}); if (!ctx->fileMatchesRelativeRoot(full)) { // Not in the desired area, so filter it out @@ -655,7 +660,8 @@ std::vector globNameAndDType( const std::vector& globPatterns, bool includeDotfiles, bool splitGlobPattern = false, - bool listOnlyFiles = false) { + bool listOnlyFiles = false, + const std::string& relative_root = "") { // TODO(xavierd): Once the config: "eden_split_glob_pattern" is rolled out // everywhere, remove this code. if (splitGlobPattern && globPatterns.size() > 1) { @@ -672,6 +678,7 @@ std::vector globNameAndDType( params.wantDtype() = true; params.listOnlyFiles() = listOnlyFiles; params.sync() = getSyncBehavior(); + params.searchRoot() = relative_root; globFutures.emplace_back( client->semifuture_globFiles(params).via(executor)); @@ -691,6 +698,7 @@ std::vector globNameAndDType( params.wantDtype() = true; params.listOnlyFiles() = listOnlyFiles; params.sync() = getSyncBehavior(); + params.searchRoot() = relative_root; Glob glob; try { @@ -867,7 +875,8 @@ class EdenView final : public QueryableView { const std::vector& globStrings, QueryContext* ctx, bool includeDotfiles, - bool includeDir = true) const { + bool includeDir = true, + const std::string& relative_root = "") const { auto client = getEdenClient(thriftChannel_); bool listOnlyFiles = false; @@ -882,12 +891,13 @@ class EdenView final : public QueryableView { globStrings, includeDotfiles, splitGlobPattern_, - listOnlyFiles); + listOnlyFiles, + relative_root); ctx->edenGlobFilesDurationUs.store( timer.elapsed().count(), std::memory_order_relaxed); // Filter out any ignored files - filterOutPaths(fileInfo, ctx); + filterOutPaths(fileInfo, ctx, relative_root); for (auto& item : fileInfo) { auto file = make_unique( @@ -997,8 +1007,23 @@ class EdenView final : public QueryableView { void allFilesGenerator(const Query*, QueryContext* ctx) const override { ctx->generationStarted(); - auto globPatterns = getGlobPatternsForAllFiles(ctx); - executeGlobBasedQuery(globPatterns, ctx, /*includeDotfiles=*/true); + std::string relative_root = ""; + std::vector globPatterns; + bool includeDir = true; + if (isSimpleSuffixQuery(ctx)) { + globPatterns = getSuffixQueryGlobPatterns(ctx); + relative_root = getSuffixQueryRelativeRoot(ctx); + includeDir = false; + } else { + globPatterns = getGlobPatternsForAllFiles(ctx); + } + + executeGlobBasedQuery( + globPatterns, + ctx, + /*includeDotfiles=*/true, + includeDir, + relative_root); } ClockPosition getMostRecentRootNumberAndTickValue() const override { @@ -1191,6 +1216,26 @@ class EdenView final : public QueryableView { return globPatterns; } + bool isSimpleSuffixQuery(QueryContext* ctx) const { + // Checks if this query expression is a simple suffix query. + // A simple suffix query is an allof expression that only contains + // 1. Type = f + // 2. Suffix + if (ctx->query->expr) { + return ctx->query->expr->evaluateSimpleSuffix() == + SimpleSuffixType::IsSimpleSuffix; + } + return false; + } + + std::vector getSuffixQueryGlobPatterns(QueryContext* ctx) const { + return ctx->query->expr->getSuffixQueryGlobPatterns(); + } + + std::string getSuffixQueryRelativeRoot(QueryContext* ctx) const { + return computeRelativePathPiece(ctx).string(); + } + /** * Returns all the files in the watched directory for a fresh instance. * @@ -1203,8 +1248,14 @@ class EdenView final : public QueryableView { // Avoid a full tree walk if we don't need it! return std::vector(); } - - auto globPatterns = getGlobPatternsForAllFiles(ctx); + std::string relative_root = ""; + std::vector globPatterns; + if (isSimpleSuffixQuery(ctx)) { + globPatterns = getSuffixQueryGlobPatterns(ctx); + relative_root = getSuffixQueryRelativeRoot(ctx); + } else { + globPatterns = getGlobPatternsForAllFiles(ctx); + } auto client = getEdenClient(thriftChannel_); return globNameAndDType( @@ -1212,7 +1263,9 @@ class EdenView final : public QueryableView { mountPoint_, std::move(globPatterns), /*includeDotfiles=*/true, - splitGlobPattern_); + splitGlobPattern_, + /*listOnlyFiles=*/false, + relative_root); } struct GetAllChangesSinceResult {