Skip to content

ESQL - Add K mandatory param for KNN function #129763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -511,9 +511,6 @@ tests:
- class: org.elasticsearch.entitlement.runtime.policy.FileAccessTreeTests
method: testWindowsAbsolutPathAccess
issue: https://github.com/elastic/elasticsearch/issues/129168
- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is removed in this PR as it's already being tested in all other tests

method: test {knn-function.KnnSearchWithKOption ASYNC}
issue: https://github.com/elastic/elasticsearch/issues/129447
- class: org.elasticsearch.xpack.ml.integration.ClassificationIT
method: testWithDatastreams
issue: https://github.com/elastic/elasticsearch/issues/129457
Expand All @@ -535,9 +532,6 @@ tests:
- class: org.elasticsearch.xpack.security.PermissionsIT
method: testWhenUserLimitedByOnlyAliasOfIndexCanWriteToIndexWhichWasRolledoverByILMPolicy
issue: https://github.com/elastic/elasticsearch/issues/129481
- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT
method: test {knn-function.KnnSearchWithKOption SYNC}
issue: https://github.com/elastic/elasticsearch/issues/129512
- class: org.elasticsearch.xpack.logsdb.qa.StandardVersusStandardReindexedIntoLogsDbChallengeRestIT
method: testMatchAllQuery
issue: https://github.com/elastic/elasticsearch/issues/129527
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_REMOVE_ERROR_PARSING = def(9_102_0_00);
public static final TransportVersion ML_INFERENCE_CUSTOM_SERVICE_EMBEDDING_BATCH_SIZE = def(9_103_0_00);
public static final TransportVersion STREAMS_LOGS_SUPPORT = def(9_104_0_00);
public static final TransportVersion ESQL_KNN_K_PARAM_MANDATORY = def(9_105_0_00);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
# top-n query at the shard level

knnSearch
required_capability: knn_function
required_capability: knn_function_v2

// tag::knn-function[]
from colors metadata _score
| where knn(rgb_vector, [0, 120, 0])
| where knn(rgb_vector, [0, 120, 0], 10)
| sort _score desc, color asc
// end::knn-function[]
| keep color, rgb_vector
Expand All @@ -29,31 +29,12 @@ chartreuse | [127.0, 255.0, 0.0]
// end::knn-function-result[]
;

knnSearchWithKOption
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed test - k is already added to all other tests

required_capability: knn_function

// tag::knn-function-options[]
from colors metadata _score
| where knn(rgb_vector, [0,255,255], {"k": 4})
| sort _score desc, color asc
// end::knn-function-options[]
| keep color, rgb_vector
| limit 4
;

color:text | rgb_vector:dense_vector
cyan | [0.0, 255.0, 255.0]
turquoise | [64.0, 224.0, 208.0]
aqua marine | [127.0, 255.0, 212.0]
teal | [0.0, 128.0, 128.0]
;

# https://github.com/elastic/elasticsearch/issues/129550
knnSearchWithSimilarityOption-Ignore
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| where knn(rgb_vector, [255,192,203], {"k": 140, "similarity": 40})
| where knn(rgb_vector, [255,192,203], 140, {"similarity": 40})
| sort _score desc, color asc
| keep color, rgb_vector
;
Expand All @@ -63,14 +44,13 @@ pink | [255.0, 192.0, 203.0]
peach puff | [255.0, 218.0, 185.0]
bisque | [255.0, 228.0, 196.0]
wheat | [245.0, 222.0, 179.0]

;

knnHybridSearch
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| where match(color, "blue") or knn(rgb_vector, [65,105,225], {"k": 140})
| where match(color, "blue") or knn(rgb_vector, [65,105,225], 140)
| where primary == true
| sort _score desc, color asc
| keep color, rgb_vector
Expand All @@ -90,10 +70,10 @@ yellow | [255.0, 255.0, 0.0]
;

knnWithMultipleFunctions
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| where knn(rgb_vector, [128,128,0], {"k": 140}) and match(color, "olive")
| where knn(rgb_vector, [128,128,0], 140) and match(color, "olive")
| sort _score desc, color asc
| keep color, rgb_vector
;
Expand All @@ -103,11 +83,11 @@ olive | [128.0, 128.0, 0.0]
;

knnAfterKeep
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| keep rgb_vector, color, _score
| where knn(rgb_vector, [128,255,0], {"k": 140})
| where knn(rgb_vector, [128,255,0], 140)
| sort _score desc, color asc
| keep rgb_vector
| limit 5
Expand All @@ -122,11 +102,11 @@ rgb_vector:dense_vector
;

knnAfterDrop
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| drop primary
| where knn(rgb_vector, [128,250,0], {"k": 140})
| where knn(rgb_vector, [128,250,0], 140)
| sort _score desc, color asc
| keep color, rgb_vector
| limit 5
Expand All @@ -141,11 +121,11 @@ lime | [0.0, 255.0, 0.0]
;

knnAfterEval
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| eval composed_name = locate(color, " ") > 0
| where knn(rgb_vector, [128,128,0], {"k": 140})
| where knn(rgb_vector, [128,128,0], 140)
| sort _score desc, color asc
| keep color, composed_name
| limit 5
Expand All @@ -160,11 +140,11 @@ golden rod | true
;

knnWithConjunction
required_capability: knn_function
required_capability: knn_function_v2

# TODO We need kNN prefiltering here so we get more candidates that pass the filter
from colors metadata _score
| where knn(rgb_vector, [255,255,238], {"k": 140}) and hex_code like "#FFF*"
| where knn(rgb_vector, [255,255,238], 140) and hex_code like "#FFF*"
| sort _score desc, color asc
| keep color, hex_code, rgb_vector
| limit 10
Expand All @@ -181,11 +161,11 @@ yellow | #FFFF00 | [255.0, 255.0, 0.0]
;

knnWithDisjunctionAndFiltersConjunction
required_capability: knn_function
required_capability: knn_function_v2

# TODO We need kNN prefiltering here so we get more candidates that pass the filter
from colors metadata _score
| where (knn(rgb_vector, [0,255,255], {"k": 140}) or knn(rgb_vector, [128, 0, 255], {"k": 140})) and primary == true
| where (knn(rgb_vector, [0,255,255], 140) or knn(rgb_vector, [128, 0, 255], 140)) and primary == true
| keep color, rgb_vector, _score
| sort _score desc, color asc
| drop _score
Expand All @@ -205,11 +185,11 @@ yellow | [255.0, 255.0, 0.0]
;

knnWithNonPushableConjunction
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| eval composed_name = locate(color, " ") > 0
| where knn(rgb_vector, [128,128,0], {"k": 140}) and composed_name == false
| where knn(rgb_vector, [128,128,0], 140) and composed_name == false
| sort _score desc, color asc
| keep color, composed_name
| limit 10
Expand All @@ -230,10 +210,10 @@ maroon | false

# https://github.com/elastic/elasticsearch/issues/129550
testKnnWithNonPushableDisjunctions-Ignore
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| where knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 30}) or length(color) > 10
| where knn(rgb_vector, [128,128,0], 140, {"similarity": 30}) or length(color) > 10
| sort _score desc, color asc
| keep color
;
Expand All @@ -247,10 +227,10 @@ papaya whip

# https://github.com/elastic/elasticsearch/issues/129550
testKnnWithNonPushableDisjunctionsOnComplexExpressions-Ignore
required_capability: knn_function
required_capability: knn_function_v2

from colors metadata _score
| where (knn(rgb_vector, [128,128,0], {"k": 140, "similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], {"k": 140, "similarity": 60}) and primary == false)
| where (knn(rgb_vector, [128,128,0], 140, {"similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], 140, {"similarity": 60}) and primary == false)
| sort _score desc, color asc
| keep color, primary
;
Expand All @@ -262,24 +242,24 @@ indigo | false
;

testKnnInStatsNonPushable
required_capability: knn_function
required_capability: knn_function_v2

from colors
| where length(color) < 10
| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 140})
| stats c = count(*) where knn(rgb_vector, [128,128,255], 140)
;

c: long
50
;

testKnnInStatsWithGrouping
required_capability: knn_function
required_capability: knn_function_v2
required_capability: full_text_functions_in_stats_where

from colors
| where length(color) < 10
| stats c = count(*) where knn(rgb_vector, [128,128,255], {"k": 140}) by primary
| stats c = count(*) where knn(rgb_vector, [128,128,255], 140) by primary
;

c: long | primary: boolean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public void testKnnDefaults() {

var query = String.format(Locale.ROOT, """
FROM test METADATA _score
| WHERE knn(vector, %s)
| WHERE knn(vector, %s, 10)
| KEEP id, floats, _score, vector
| SORT _score DESC
""", Arrays.toString(queryVector));
Expand Down Expand Up @@ -73,7 +73,7 @@ public void testKnnOptions() {

var query = String.format(Locale.ROOT, """
FROM test METADATA _score
| WHERE knn(vector, %s, {"k": 5})
| WHERE knn(vector, %s, 5)
| KEEP id, floats, _score, vector
| SORT _score DESC
""", Arrays.toString(queryVector));
Expand All @@ -94,7 +94,7 @@ public void testKnnNonPushedDown() {
// TODO we need to decide what to do when / if user uses k for limit, as no more than k results will be returned from knn query
var query = String.format(Locale.ROOT, """
FROM test METADATA _score
| WHERE knn(vector, %s, {"k": 5}) OR id > 10
| WHERE knn(vector, %s, 5) OR id > 10
| KEEP id, floats, _score, vector
| SORT _score DESC
""", Arrays.toString(queryVector));
Expand All @@ -111,7 +111,7 @@ public void testKnnNonPushedDown() {

@Before
public void setup() throws IOException {
assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled());
assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V2.isEnabled());

var indexName = "test";
var client = client().admin().indices();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1195,7 +1195,7 @@ public enum Cap {
/**
* Support knn function
*/
KNN_FUNCTION(Build.current().isSnapshot()),
KNN_FUNCTION_V2(Build.current().isSnapshot()),

LIKE_WITH_LIST_OF_PATTERNS,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ private static List<NamedWriteableRegistry.Entry> fullText() {
}

private static List<NamedWriteableRegistry.Entry> vector() {
if (EsqlCapabilities.Cap.KNN_FUNCTION.isEnabled()) {
if (EsqlCapabilities.Cap.KNN_FUNCTION_V2.isEnabled()) {
return List.of(Knn.ENTRY);
}
return List.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ private static FunctionDefinition[][] snapshotFunctions() {
def(LastOverTime.class, LastOverTime::withUnresolvedTimestamp, "last_over_time"),
def(FirstOverTime.class, FirstOverTime::withUnresolvedTimestamp, "first_over_time"),
def(Term.class, bi(Term::new), "term"),
def(Knn.class, tri(Knn::new), "knn") } };
def(Knn.class, Knn::new, "knn") } };
}

public EsqlFunctionRegistry snapshotRegistry() {
Expand Down
Loading
Loading