chore: improve completion ranking

S-furi · S-furi · commit 10f4cf7f4df1 · 2025-10-16T18:00:14.000+02:00
Added tests, and improved the following:

- improve fuzzy matches
- prefer shorter completions first
- ultimately follow `sortText`
diff --git a/completions/src/main/kotlin/completions/lsp/util/completions/FuzzyCompletionRanking.kt b/completions/src/main/kotlin/completions/lsp/util/completions/FuzzyCompletionRanking.kt
@@ -8,7 +8,12 @@ import org.eclipse.lsp4j.CompletionItem
 internal object FuzzyCompletionRanking {
     private val objectMapper = Json { ignoreUnknownKeys = true }
 
-    private data class RankedItem(val item: CompletionItem, val score: Int)
+    private data class RankedItem(
+        val item: CompletionItem,
+        val score: Int,
+        val matchSpan: Int,
+        val candidateLength: Int
+    )
 
     /**
      * Extracts the prefix of this [CompletionItem] that triggered the completion.
@@ -23,19 +28,39 @@ internal object FuzzyCompletionRanking {
      * fuzzy scoring is performed on what has been typed by the user so far,
      * then we use [CompletionItem.sortText] to break ties.
      *
+     * Tie-breakers after the fuzzy score:
+     * - smaller match span (the window covering all matched characters)
+     * - shorter candidate length
+     * - sortText (ascending)
+     *
      * @param query the query the user has typed so far
      */
-    fun List<CompletionItem>.rankCompletions(query: String): List<CompletionItem> =
-        map { RankedItem(it, fuzzyScore(query, it.sortingKey())) }
-            .sortedWith(compareByDescending<RankedItem> { it.score }.thenBy { it.item.sortText })
+    fun List<CompletionItem>.rankCompletions(query: String): List<CompletionItem> {
+        // Empty query should rank purely by sortText (do not involve span/length)
+        if (query.isEmpty()) {
+            return this.sortedBy { it.sortText }
+        }
+        return map { item ->
+            val candidate = item.sortingKey()
+            val (score, span) = fuzzyScoreWithSpan(query, candidate)
+            RankedItem(item, score, span, candidate.length)
+        }
+            .sortedWith(
+                compareByDescending<RankedItem> { it.score }
+                    .thenBy { it.matchSpan }
+                    .thenBy { it.candidateLength }
+                    .thenBy { it.item.sortText }
+            )
             .map { it.item }
+    }
 
-    private fun fuzzyScore(query: String, candidate: String): Int {
-        if (query.isEmpty()) return 1
+    private fun fuzzyScoreWithSpan(query: String, candidate: String): Pair<Int, Int> {
+        if (query.isEmpty()) return 1 to 0
 
         var score = 0
         var queryIndex = 0
         var lastMatchIndex = -1
+        var firstMatchIndex = -1
 
         for (i in candidate.indices) {
             if (queryIndex >= query.length) break
@@ -44,16 +69,25 @@ internal object FuzzyCompletionRanking {
             val cc = candidate[i].lowercaseChar()
 
             if (cc == qc) {
+                if (firstMatchIndex == -1) firstMatchIndex = i
                 score += 10
-                if (lastMatchIndex == i - 1) score += 5 // consecutive match bonus
-                if (i == 0 || !candidate[i-1].isLetterOrDigit()) score += 3 // bonus if beginning
+                if (lastMatchIndex == i - 1) {
+                    score += 5
+                } else if (lastMatchIndex != -1) {
+                    val gap = i - lastMatchIndex - 1
+                    if (gap > 0) score -= gap
+                }
                 lastMatchIndex = i
                 queryIndex++
             }
         }
-        return if (queryIndex == query.length) score else 0
+        return if (queryIndex == query.length) {
+            val span = lastMatchIndex - firstMatchIndex + 1
+            score to span
+        } else {
+            0 to Int.MAX_VALUE
+        }
     }
 
     private fun CompletionItem.sortingKey(): String = this.filterText ?: this.label
-}
-
+}
diff --git a/completions/src/test/kotlin/lsp/FuzzyCompletionRankingTest.kt b/completions/src/test/kotlin/lsp/FuzzyCompletionRankingTest.kt
@@ -0,0 +1,115 @@
+package lsp
+
+import completions.lsp.util.completions.FuzzyCompletionRanking.rankCompletions
+import org.eclipse.lsp4j.CompletionItem
+import org.junit.jupiter.api.Assertions.assertIterableEquals
+import org.junit.jupiter.api.Test
+
+class FuzzyCompletionRankingTest {
+
+    @Test
+    fun `empty query ranks only by sortText`() {
+        val query = ""
+
+        val a = completionItem("zeta", sortText = "3")
+        val b = completionItem("alpha", sortText = "1")
+        val c = completionItem("gamma", sortText = "2")
+        val ranked = listOf(a, b, c).rankCompletions(query)
+
+        assertIterableEquals(listOf(b, c, a), ranked)
+    }
+
+    @Test
+    fun `Kotlin common API names should be properly ranked`() {
+        val query = "pr"
+
+        val println = completionItem("println", sortText = "2")
+        val print = completionItem("print", sortText = "1")
+        val property = completionItem("property", sortText = "4")
+        val map = completionItem("map", sortText = "3")
+
+        val ranked = listOf(map, println, property, print).rankCompletions(query)
+
+        assertIterableEquals(listOf(print, println, property, map), ranked)
+    }
+
+    @Test
+    fun `completions are ranked properly`() {
+        val c1 = completionItem("toInt", sortText = "3")
+        val c2 = completionItem("toUInt", sortText = "1")
+        val c3 = completionItem("toInterval", sortText = "2")
+
+        val ranked = listOf(c2, c3, c1).rankCompletions("toIn")
+        assertIterableEquals(listOf(c1, c3, c2), ranked)
+    }
+
+    @Test
+    fun `tie-break by sortText when fuzzy scores equal`() {
+        val query = "pr"
+
+        val a = completionItem("prX", sortText = "1")
+        val b = completionItem("prY", sortText = "2")
+        val ranked = listOf(b, a).rankCompletions(query)
+
+        assertIterableEquals(listOf(a, b), ranked)
+    }
+
+    @Test
+    fun `exact and consecutive matches outrank sparse matches`() {
+        val query = "pr"
+
+        val a = completionItem("print", sortText = "2") // exact
+        val b = completionItem("p...r..", sortText = "1") // consecutive
+        val c = completionItem("pxxx", sortText = "3") // sparse
+        val ranked = listOf(a, b, c).rankCompletions(query)
+
+        assertIterableEquals(listOf(a, b, c), ranked)
+    }
+
+    @Test
+    fun `case-insensitive matching, tie-break with sortText`() {
+        val query = "PrI"
+
+        val a = completionItem("pRiNtLn", sortText = "2")
+        val b = completionItem("println", sortText = "1")
+        val ranked = listOf(a, b).rankCompletions(query)
+
+        assertIterableEquals(listOf(b, a), ranked)
+    }
+
+    @Test
+    fun `non-matching candidates are ranked after matching ones`() {
+        val query = "map"
+
+        val match1 = completionItem("map", sortText = "2")
+        val match2 = completionItem("maybeApply", sortText = "3")
+        val nonMatch = completionItem("println", sortText = "1")
+        val ranked = listOf(nonMatch, match2, match1).rankCompletions(query)
+
+        assertIterableEquals(listOf(match1, match2, nonMatch), ranked)
+    }
+
+    @Test
+    fun `consecutive matches beat non-consecutive`() {
+        val query = "io"
+
+        val consecutive = completionItem("ioScope", sortText = "2")
+        val sparse = completionItem("iXoScope", sortText = "1")
+        val ranked = listOf(sparse, consecutive).rankCompletions(query)
+
+        assertIterableEquals(listOf(consecutive, sparse), ranked)
+    }
+
+
+
+    private fun completionItem(
+        label: String,
+        sortText: String,
+        filterText: String? = null,
+        dataJson: String? = null,
+    ): CompletionItem = CompletionItem(label).apply {
+        this.sortText = sortText
+        if (filterText != null) this.filterText = filterText
+        if (dataJson != null) this.data = dataJson
+    }
+}