Skip to content

Commit

Permalink
SLCORE-819 Add underscore char to the split pattern for better bindin…
Browse files Browse the repository at this point in the history
…g suggestion
  • Loading branch information
serhat-yenican-sonarsource committed May 28, 2024
1 parent c4600de commit 8ed96ab
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
public class BindingCandidatesFinder {

private static final SonarLintLogger LOG = SonarLintLogger.get();
private static final String SPLIT_PATTERN = "[\\W_]+";
private final ConfigurationRepository configRepository;
private final BindingClueProvider bindingClueProvider;
private final SonarProjectsCache sonarProjectsCache;
Expand Down Expand Up @@ -106,7 +107,7 @@ private boolean isConfigScopeNameCloseEnoughToSonarProject(String configScopeNam
LOG.debug("Unable to find SonarProject with key '{}' on connection '{}' in the cache", projectKey, connectionId);
return false;
}
TextSearchIndex<ServerProject> index = new TextSearchIndex<>();
TextSearchIndex<ServerProject> index = new TextSearchIndex<>(SPLIT_PATTERN);
var p = sonarProjectOpt.get();
index.index(p, p.getKey() + " " + p.getName());
var searchResult = index.search(configScopeName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,17 @@
* Performance of search: O(log N) on the number of indexed terms + O(N) on the number of results
*/
public class TextSearchIndex<T> {
private static final String SPLIT_PATTERN = "\\W";
private static final String DEFAULT_SPLIT_PATTERN = "\\W";
private final String splitPattern;
private TreeMap<String, List<DictEntry>> termToObj;
private Map<T, Integer> objToWordFrequency;

public TextSearchIndex() {
this(DEFAULT_SPLIT_PATTERN);
}

public TextSearchIndex(String splitPattern) {
this.splitPattern = splitPattern;
clear();
}

Expand Down Expand Up @@ -180,8 +186,8 @@ private void addToDictionary(String token, int tokenIndex, T obj) {
entries.add(new DictEntry(obj, tokenIndex));
}

private static List<String> tokenize(String text) {
var split = text.split(SPLIT_PATTERN);
private List<String> tokenize(String text) {
var split = text.split(splitPattern);
List<String> terms = new ArrayList<>(split.length);

for (String s : split) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* SonarLint Core - Implementation
* Copyright (C) 2016-2024 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonarsource.sonarlint.core;

import org.junit.jupiter.api.Test;

import static org.assertj.core.api.Assertions.assertThat;

class TextSearchIndexTest {
@Test
void splits_strings_based_on_provided_split_pattern() {
TextSearchIndex<String> index = new TextSearchIndex<>("[\\W_]+");
index.index("text", "a-b-c_d");
var searchResult = index.search("d");
assertThat(searchResult).isNotEmpty();
}
}

0 comments on commit 8ed96ab

Please sign in to comment.