diff --git a/src/DynamoCore/Utilities/LuceneSearchUtility.cs b/src/DynamoCore/Utilities/LuceneSearchUtility.cs index 94e8b518f79..579fac5fb12 100644 --- a/src/DynamoCore/Utilities/LuceneSearchUtility.cs +++ b/src/DynamoCore/Utilities/LuceneSearchUtility.cs @@ -73,6 +73,8 @@ internal class LuceneSearchUtility /// internal static readonly LuceneStartConfig DefaultPkgIndexStartConfig = new LuceneStartConfig(LuceneSearchUtility.LuceneStorage.FILE_SYSTEM, LuceneConfig.PackagesIndexingDirectory); + private bool hasEmptySpaces { get; set; } + public enum LuceneStorage { //Lucene Storage will be located in RAM and all the info indexed will be lost when Dynamo app is closed @@ -82,6 +84,18 @@ public enum LuceneStorage FILE_SYSTEM } + /// + /// This enum will be used to identify which can of search should be executed based in the user search criteria + /// + public enum SearchType + { + //Normal search using just one word matching a specific node name + Normal, + + //Search by category using the "." character for example "list.re" + ByCategory + } + // Used for creating the StandardAnalyzer internal Analyzer Analyzer; @@ -264,7 +278,14 @@ internal void SetDocumentFieldValue(Document doc, string field, string value, bo /// internal string CreateSearchQuery(string[] fields, string SearchTerm) { + //By Default the search will be normal + SearchType searchType = SearchType.Normal; int fuzzyLogicMaxEdits = LuceneConfig.FuzzySearchMinEdits; + hasEmptySpaces = false; + + //Max number of nodes allowed in the search when is a ByEmptySpace search + const int MaxNodeNamesRepeated = 20; + // Use a larger max edit value - more tolerant with typo when search term is longer than threshold if (SearchTerm.Length > LuceneConfig.FuzzySearchMaxEditsThreshold) { @@ -273,13 +294,21 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) var booleanQuery = new BooleanQuery(); string searchTerm = QueryParser.Escape(SearchTerm); - var bCategoryBasedSearch = searchTerm.Contains('.') ? true : false; + + if (searchTerm.Contains('.')) + searchType = SearchType.ByCategory; + else if (searchTerm.Contains(' ')) + hasEmptySpaces = true; + else + searchType = SearchType.Normal; + + var trimmedSearchTerm = hasEmptySpaces == true ? searchTerm.Replace(" ", "") : searchTerm; foreach (string f in fields) { //Needs to be again due that now a query can contain different values per field (e.g. CategorySplitted:list, Name:tr) searchTerm = QueryParser.Escape(SearchTerm); - if (bCategoryBasedSearch == true) + if (searchType == SearchType.ByCategory) { //This code section should be only executed if the search criteria is CategoryBased like "category.nodename" if (f != nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) && @@ -297,26 +326,26 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) } } - FuzzyQuery fuzzyQuery; - if (searchTerm.Length > LuceneConfig.FuzzySearchMinimalTermLength) - { - fuzzyQuery = new FuzzyQuery(new Term(f, searchTerm), fuzzyLogicMaxEdits); - booleanQuery.Add(fuzzyQuery, Occur.SHOULD); - } - //For normal search we don't consider the fields NameSplitted and CategorySplitted if ((f == nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) || - f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) && bCategoryBasedSearch == false) + f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) && searchType != SearchType.ByCategory) continue; //This case is for when the user type something like "list.", I mean, not specifying the node name or part of it if (string.IsNullOrEmpty(searchTerm)) continue; - var fieldQuery = CalculateFieldWeight(f, searchTerm); - var wildcardQuery = CalculateFieldWeight(f, searchTerm, true); + FuzzyQuery fuzzyQuery; + if (searchTerm.Length > LuceneConfig.FuzzySearchMinimalTermLength) + { + fuzzyQuery = new FuzzyQuery(new Term(f, hasEmptySpaces == true ? trimmedSearchTerm : searchTerm), fuzzyLogicMaxEdits); + booleanQuery.Add(fuzzyQuery, Occur.SHOULD); + } + + var fieldQuery = CalculateFieldWeight(f, hasEmptySpaces == true ? trimmedSearchTerm : searchTerm); + var wildcardQuery = CalculateFieldWeight(f, hasEmptySpaces == true ? trimmedSearchTerm : searchTerm, true); - if (bCategoryBasedSearch && f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) + if (searchType == SearchType.ByCategory && f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) { booleanQuery.Add(fieldQuery, Occur.MUST); booleanQuery.Add(wildcardQuery, Occur.MUST); @@ -331,6 +360,10 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) { foreach (string s in searchTerm.Split(' ', '.')) { + //If is a ByEmptySpace search and the splitted words match with more than MaxNodeNamesRepeated nodes then the word is skipped + int nodesFrequency = dynamoModel.SearchModel.Entries.Where(entry => entry.Name.ToLower().Contains(s) && !string.IsNullOrEmpty(s)).Count(); + if (nodesFrequency > MaxNodeNamesRepeated) continue; + if (string.IsNullOrEmpty(s)) continue; if (s.Length > LuceneConfig.FuzzySearchMinimalTermLength)