From 08c5e770fcd8d985830c6b34b138fca6b5ce8855 Mon Sep 17 00:00:00 2001 From: ddovod Date: Tue, 28 Aug 2018 09:10:47 +0300 Subject: [PATCH 1/6] adding 3 more options for completion scoring --- .gitmodules | 3 ++ CMakeLists.txt | 3 ++ src/completion_matcher.h | 10 +++++++ src/config.h | 6 +++- src/fts_match.cc | 20 +++++++++++++ src/fts_match.h | 13 ++++++++ src/fuzzy_match.cc | 9 ++++++ src/fuzzy_match.h | 7 +++-- src/messages/text_document_completion.cc | 25 +++++++++++----- src/prefix_match.cc | 38 ++++++++++++++++++++++++ src/prefix_match.h | 14 +++++++++ third_party/lib_fts | 1 + 12 files changed, 138 insertions(+), 11 deletions(-) create mode 100644 src/completion_matcher.h create mode 100644 src/fts_match.cc create mode 100644 src/fts_match.h create mode 100644 src/prefix_match.cc create mode 100644 src/prefix_match.h create mode 160000 third_party/lib_fts diff --git a/.gitmodules b/.gitmodules index 211365bfa..0172f592e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "third_party/reproc"] path = third_party/reproc url = https://github.com/DaanDeMeyer/reproc.git +[submodule "third_party/lib_fts"] + path = third_party/lib_fts + url = https://github.com/forrestthewoods/lib_fts.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c54eae41..5b5c0c3a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,6 +156,7 @@ target_include_directories(cquery PRIVATE third_party/pugixml/src third_party/rapidjson/include third_party/sparsepp + third_party/lib_fts/code ) ### Install @@ -304,6 +305,8 @@ target_sources(cquery PRIVATE src/file_contents.cc src/file_types.cc src/fuzzy_match.cc + src/fts_match.cc + src/prefix_match.cc src/iindexer.cc src/import_manager.cc src/import_pipeline.cc diff --git a/src/completion_matcher.h b/src/completion_matcher.h new file mode 100644 index 000000000..d773d512e --- /dev/null +++ b/src/completion_matcher.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +class CompletionMatcher { + public: + // virtual ~CompletionMatcher() = default; // don't know why but it crashed if I uncomment this like, investigating + virtual int Match(std::string_view text) = 0; + virtual int MinScore() const = 0; +}; diff --git a/src/config.h b/src/config.h index 3867ffe5c..99d98aae7 100644 --- a/src/config.h +++ b/src/config.h @@ -156,6 +156,9 @@ struct Config { // For example, to hide all files in a /CACHE/ folder, use ".*/CACHE/.*" std::vector includeBlacklist; std::vector includeWhitelist; + + // + std::string matcherType = "cqueryMatcher"; }; Completion completion; @@ -261,7 +264,8 @@ MAKE_REFLECT_STRUCT(Config::Completion, includeMaxPathSize, includeSuffixWhitelist, includeBlacklist, - includeWhitelist); + includeWhitelist, + matcherType); MAKE_REFLECT_STRUCT(Config::Formatting, enabled) MAKE_REFLECT_STRUCT(Config::Diagnostics, blacklist, diff --git a/src/fts_match.cc b/src/fts_match.cc new file mode 100644 index 000000000..c10c526ba --- /dev/null +++ b/src/fts_match.cc @@ -0,0 +1,20 @@ +#include "fts_match.h" + +#define FTS_FUZZY_MATCH_IMPLEMENTATION 1 +#include + +FtsMatcher::FtsMatcher(std::string_view pattern) { + original_pattern = pattern; +} + +int FtsMatcher::Match(std::string_view text) { + int result = 0; + if (fts::fuzzy_match(original_pattern.data(), text.data(), result)) { + return result; + } + return MinScore(); +} + +int FtsMatcher::MinScore() const { + return -100000; +} diff --git a/src/fts_match.h b/src/fts_match.h new file mode 100644 index 000000000..d98fcad7b --- /dev/null +++ b/src/fts_match.h @@ -0,0 +1,13 @@ +#pragma once + +#include "completion_matcher.h" + +class FtsMatcher : public CompletionMatcher { + public: + FtsMatcher(std::string_view pattern); + int Match(std::string_view text) override; + int MinScore() const override; + + private: + std::string_view original_pattern; +}; diff --git a/src/fuzzy_match.cc b/src/fuzzy_match.cc index 6e271665d..6f8670a57 100644 --- a/src/fuzzy_match.cc +++ b/src/fuzzy_match.cc @@ -6,6 +6,7 @@ #include #include #include +#include "lex_utils.h" enum CharClass { Other, Lower, Upper }; enum CharRole { None, Tail, Head }; @@ -73,6 +74,7 @@ int FuzzyMatcher::MatchScore(int i, int j, bool last) { } FuzzyMatcher::FuzzyMatcher(std::string_view pattern) { + original_pattern = pattern; CalculateRoles(pattern, pat_role, &pat_set); size_t n = 0; for (size_t i = 0; i < pattern.size(); i++) @@ -85,6 +87,9 @@ FuzzyMatcher::FuzzyMatcher(std::string_view pattern) { } int FuzzyMatcher::Match(std::string_view text) { + if (!CaseFoldingSubsequenceMatch(original_pattern, text).first) { + return MinScore(); + } int n = int(text.size()); if (n > kMaxText) return kMinScore + 1; @@ -123,6 +128,10 @@ int FuzzyMatcher::Match(std::string_view text) { return ret; } +int FuzzyMatcher::MinScore() const { + return kMinScore; +} + TEST_SUITE("fuzzy_match") { bool Ranks(std::string_view pat, std::vector texts) { FuzzyMatcher fuzzy(pat); diff --git a/src/fuzzy_match.h b/src/fuzzy_match.h index 336815b72..925b5c578 100644 --- a/src/fuzzy_match.h +++ b/src/fuzzy_match.h @@ -4,8 +4,9 @@ #include #include +#include "completion_matcher.h" -class FuzzyMatcher { +class FuzzyMatcher : public CompletionMatcher { public: constexpr static int kMaxPat = 100; constexpr static int kMaxText = 200; @@ -14,9 +15,11 @@ class FuzzyMatcher { constexpr static int kMinScore = INT_MIN / 4; FuzzyMatcher(std::string_view pattern); - int Match(std::string_view text); + int Match(std::string_view text) override; + int MinScore() const override; private: + std::string_view original_pattern; std::string pat; std::string_view text; int pat_set, text_set; diff --git a/src/messages/text_document_completion.cc b/src/messages/text_document_completion.cc index 7cd8a252b..2d7edb2ba 100644 --- a/src/messages/text_document_completion.cc +++ b/src/messages/text_document_completion.cc @@ -1,11 +1,14 @@ #include "clang_complete.h" #include "code_complete_cache.h" #include "fuzzy_match.h" +#include "fts_match.h" +#include "prefix_match.h" #include "include_complete.h" #include "message_handler.h" #include "queue_manager.h" #include "timer.h" #include "working_files.h" +#include "config.h" #include "lex_utils.h" @@ -221,17 +224,23 @@ void FilterAndSortCompletionResponse( item.filterText = item.label; } - // Fuzzy match and remove awful candidates. - FuzzyMatcher fuzzy(complete_text); + // Match and remove awful candidates. + std::unique_ptr matcher; + if (g_config->completion.matcherType == "cqueryMatcher") { + matcher = std::make_unique(complete_text); + } else if (g_config->completion.matcherType == "ftsMatcher") { + matcher = std::make_unique(complete_text); + } else if (g_config->completion.matcherType == "caseSensitivePrefixMatcher") { + matcher = std::make_unique(complete_text, true); + } else if (g_config->completion.matcherType == "caseInsensitivePrefixMatcher") { + matcher = std::make_unique(complete_text, false); + } for (auto& item : items) { - item.score_ = - CaseFoldingSubsequenceMatch(complete_text, *item.filterText).first - ? fuzzy.Match(*item.filterText) - : FuzzyMatcher::kMinScore; + item.score_ = matcher->Match(*item.filterText); } items.erase(std::remove_if(items.begin(), items.end(), - [](const lsCompletionItem& item) { - return item.score_ <= FuzzyMatcher::kMinScore; + [&matcher](const lsCompletionItem& item) { + return item.score_ <= matcher->MinScore(); }), items.end()); std::sort(items.begin(), items.end(), diff --git a/src/prefix_match.cc b/src/prefix_match.cc new file mode 100644 index 000000000..878b2bc98 --- /dev/null +++ b/src/prefix_match.cc @@ -0,0 +1,38 @@ +#include "prefix_match.h" + +#include + +namespace { + bool StartsWith(std::string_view text, std::string_view prefix) { + return text.find(prefix) == 0; + } + + bool StartsWithIgnoreCase(std::string_view text, std::string_view prefix) { + auto min_length = std::min(text.size(), prefix.size()); + if (min_length == 0) { + return false; + } + return std::equal( + text.begin(), text.begin() + min_length, + prefix.begin(), prefix.begin() + min_length, + [](char ch1, char ch2) { return std::toupper(ch1) == std::toupper(ch2); } + ); + } +} + +PrefixMatcher::PrefixMatcher(std::string_view pattern, bool case_sensitive) { + original_pattern = pattern; + this->case_sensitive = case_sensitive; +} + +int PrefixMatcher::Match(std::string_view text) { + if (case_sensitive) { + return ::StartsWith(text, original_pattern) ? 1 : MinScore(); + } else { + return ::StartsWithIgnoreCase(text, original_pattern) ? 1 : MinScore(); + } +} + +int PrefixMatcher::MinScore() const { + return -1; +} diff --git a/src/prefix_match.h b/src/prefix_match.h new file mode 100644 index 000000000..b23503e7e --- /dev/null +++ b/src/prefix_match.h @@ -0,0 +1,14 @@ +#pragma once + +#include "completion_matcher.h" + +class PrefixMatcher : public CompletionMatcher { + public: + PrefixMatcher(std::string_view pattern, bool case_sensitive); + int Match(std::string_view text) override; + int MinScore() const override; + + private: + std::string_view original_pattern; + bool case_sensitive; +}; diff --git a/third_party/lib_fts b/third_party/lib_fts new file mode 160000 index 000000000..80f3f8c52 --- /dev/null +++ b/third_party/lib_fts @@ -0,0 +1 @@ +Subproject commit 80f3f8c52db53428247e741b9efe2cde9667050c From db1bf88d262b758771e8d2df8f34bcb2bb3b7302 Mon Sep 17 00:00:00 2001 From: davidd Date: Tue, 28 Aug 2018 10:32:07 +0300 Subject: [PATCH 2/6] fixing compilation issues --- src/fts_match.cc | 3 +++ src/prefix_match.cc | 1 + 2 files changed, 4 insertions(+) diff --git a/src/fts_match.cc b/src/fts_match.cc index c10c526ba..147d3e5db 100644 --- a/src/fts_match.cc +++ b/src/fts_match.cc @@ -1,7 +1,10 @@ #include "fts_match.h" #define FTS_FUZZY_MATCH_IMPLEMENTATION 1 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-function" #include +#pragma clang diagnostic pop FtsMatcher::FtsMatcher(std::string_view pattern) { original_pattern = pattern; diff --git a/src/prefix_match.cc b/src/prefix_match.cc index 878b2bc98..a80a81231 100644 --- a/src/prefix_match.cc +++ b/src/prefix_match.cc @@ -1,6 +1,7 @@ #include "prefix_match.h" #include +#include namespace { bool StartsWith(std::string_view text, std::string_view prefix) { From 40a2f8ace2234ec6a2d6d2af7c263ddde7b113f1 Mon Sep 17 00:00:00 2001 From: ddovod Date: Sat, 1 Sep 2018 21:46:04 +0300 Subject: [PATCH 3/6] adding virtual dtor to CompletionMatcher --- src/completion_matcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/completion_matcher.h b/src/completion_matcher.h index d773d512e..d69c47742 100644 --- a/src/completion_matcher.h +++ b/src/completion_matcher.h @@ -4,7 +4,7 @@ class CompletionMatcher { public: - // virtual ~CompletionMatcher() = default; // don't know why but it crashed if I uncomment this like, investigating + virtual ~CompletionMatcher() = default; virtual int Match(std::string_view text) = 0; virtual int MinScore() const = 0; }; From 4d53c1d06a4fd936a59906cd9eeaf1411086eec6 Mon Sep 17 00:00:00 2001 From: ddovod Date: Sat, 1 Sep 2018 21:48:02 +0300 Subject: [PATCH 4/6] using string utilities from utils.h --- src/prefix_match.cc | 19 +------------------ src/utils.cc | 6 ++++++ src/utils.h | 3 ++- 3 files changed, 9 insertions(+), 19 deletions(-) diff --git a/src/prefix_match.cc b/src/prefix_match.cc index 878b2bc98..621db8fbe 100644 --- a/src/prefix_match.cc +++ b/src/prefix_match.cc @@ -1,25 +1,8 @@ #include "prefix_match.h" +#include "utils.h" #include -namespace { - bool StartsWith(std::string_view text, std::string_view prefix) { - return text.find(prefix) == 0; - } - - bool StartsWithIgnoreCase(std::string_view text, std::string_view prefix) { - auto min_length = std::min(text.size(), prefix.size()); - if (min_length == 0) { - return false; - } - return std::equal( - text.begin(), text.begin() + min_length, - prefix.begin(), prefix.begin() + min_length, - [](char ch1, char ch2) { return std::toupper(ch1) == std::toupper(ch2); } - ); - } -} - PrefixMatcher::PrefixMatcher(std::string_view pattern, bool case_sensitive) { original_pattern = pattern; this->case_sensitive = case_sensitive; diff --git a/src/utils.cc b/src/utils.cc index 1a07b980a..575a19b8d 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -79,6 +79,12 @@ bool StartsWith(std::string_view value, std::string_view start) { return std::equal(start.begin(), start.end(), value.begin()); } +bool StartsWithIgnoreCase(std::string_view value, std::string_view start) { + if (start.size() > value.size()) + return false; + return std::equal(start.begin(), start.end(), value.begin(), [](char ch1, char ch2) { return std::toupper(ch1) == std::toupper(ch2); }); +} + bool AnyStartsWith(const std::vector& values, const std::string& start) { return std::any_of( diff --git a/src/utils.h b/src/utils.h index d0f044f99..62a809292 100644 --- a/src/utils.h +++ b/src/utils.h @@ -24,6 +24,7 @@ uint64_t HashUsr(std::string_view s); // Returns true if |value| starts/ends with |start| or |ending|. bool StartsWith(std::string_view value, std::string_view start); +bool StartsWithIgnoreCase(std::string_view value, std::string_view start); bool EndsWith(std::string_view value, std::string_view ending); bool AnyStartsWith(const std::vector& values, const std::string& start); @@ -146,4 +147,4 @@ bool IsWindowsAbsolutePath(const std::string& path); bool IsDirectory(const std::string& path); -size_t HashArguments(const std::vector& args); \ No newline at end of file +size_t HashArguments(const std::vector& args); From a4ef8032b8ed6ec2b9d04ed6d055211b273ef9d4 Mon Sep 17 00:00:00 2001 From: ddovod Date: Sat, 1 Sep 2018 21:48:34 +0300 Subject: [PATCH 5/6] adding some docs --- src/config.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/config.h b/src/config.h index 99d98aae7..d90a6c331 100644 --- a/src/config.h +++ b/src/config.h @@ -157,7 +157,12 @@ struct Config { std::vector includeBlacklist; std::vector includeWhitelist; - // + // Matcher type to filter completion candidates. + // Available matchers are: + // "cqueryMatcher": default cquery fuzzy matching algorithm + // "ftsMatcher": fuzzy matching algorithm powered by + // lib_fts "caseSensitivePrefixMatcher": simple case sensitive prefix + // matcher "caseInsensitivePrefixMatcher": simple case insensitive prefix std::string matcherType = "cqueryMatcher"; }; Completion completion; From 2ea4bdfb7db0925380d0833bacb613c54c939fcd Mon Sep 17 00:00:00 2001 From: ddovod Date: Sat, 1 Sep 2018 21:48:50 +0300 Subject: [PATCH 6/6] disabling clang-format on third-party libs --- third_party/.clang-format | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 third_party/.clang-format diff --git a/third_party/.clang-format b/third_party/.clang-format new file mode 100644 index 000000000..ef2ae21fa --- /dev/null +++ b/third_party/.clang-format @@ -0,0 +1,4 @@ +--- +DisableFormat: true +SortIncludes: false +...