From 62ad1cfbd3ec89c9ca5eeedf0b2d051b42938635 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Thu, 9 Mar 2023 14:47:18 +0000 Subject: [PATCH 01/10] Support searching by value only, e.g. key is not mandatory --- .../api/v1/resources/query_handler.py | 12 +- .../v1/resources/schemas/filter_schema.json | 2 +- .../api/v1/resources/swagger_docs/search.yml | 2 +- omero_search_engine/api/v1/resources/utils.py | 209 ++++++++++-------- 4 files changed, 129 insertions(+), 96 deletions(-) diff --git a/omero_search_engine/api/v1/resources/query_handler.py b/omero_search_engine/api/v1/resources/query_handler.py index 822b6b7f..0c4d3b8c 100644 --- a/omero_search_engine/api/v1/resources/query_handler.py +++ b/omero_search_engine/api/v1/resources/query_handler.py @@ -736,9 +736,15 @@ def simple_search( ): if not operator: operator = "equals" - and_filters = [ - {"name": key, "value": value, "operator": operator, "resource": resource} - ] + if key: + and_filters = [ + {"name": key, "value": value, "operator": operator, "resource": resource} + ] + else: + and_filters = [ + {"value": value, "operator": operator, "resource": resource} + ] + query_details = {"and_filters": and_filters} if bookmark: bookmark = [bookmark] diff --git a/omero_search_engine/api/v1/resources/schemas/filter_schema.json b/omero_search_engine/api/v1/resources/schemas/filter_schema.json index 3f0df36a..5104a595 100644 --- a/omero_search_engine/api/v1/resources/schemas/filter_schema.json +++ b/omero_search_engine/api/v1/resources/schemas/filter_schema.json @@ -5,7 +5,7 @@ "description": "filter_schema", "type": "object", "additionalProperties": true, - "required": ["name","value","operator"], + "required": ["value","operator"], "properties": { "name": { "name": "name", diff --git a/omero_search_engine/api/v1/resources/swagger_docs/search.yml b/omero_search_engine/api/v1/resources/swagger_docs/search.yml index fdb70676..341e1185 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/search.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/search.yml @@ -13,7 +13,7 @@ parameters: description: the resource attribute in: query type: string - required: true + required: False - name: value description: the attribute value in: query diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index 39dcb8f9..8ac31935 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -290,7 +290,9 @@ def elasticsearch_query_builder( for filter in and_filter: search_omero_app.logger.info("FILTER %s" % filter) try: - key = filter["name"].strip() + key = filter.get("name") + if key: + key = key.strip() value = filter["value"].strip() operator = filter["operator"].strip() except Exception as e: @@ -310,10 +312,11 @@ def elasticsearch_query_builder( value=value ) ) - _nested_must_part.append( - case_sensitive_must_name_condition_template.substitute( - name=key - ) # noqa + if key: + _nested_must_part.append( + case_sensitive_must_name_condition_template.substitute( + name=key + ) # noqa ) else: @@ -322,10 +325,11 @@ def elasticsearch_query_builder( value=value ) ) - _nested_must_part.append( - case_insensitive_must_name_condition_template.substitute( # noqa - name=key - ) + if key: + _nested_must_part.append( + case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) nested_must_part.append( @@ -342,10 +346,11 @@ def elasticsearch_query_builder( wild_card_value=value ) ) - _nested_must_part.append( - case_sensitive_must_name_condition_template.substitute( - name=key - ) # noqa + if key: + _nested_must_part.append( + case_sensitive_must_name_condition_template.substitute( + name=key + ) # noqa ) else: @@ -354,10 +359,11 @@ def elasticsearch_query_builder( wild_card_value=value ) ) - _nested_must_part.append( - case_insensitive_must_name_condition_template.substitute( # noqa - name=key - ) + if key: + _nested_must_part.append( + case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) nested_must_part.append( @@ -370,13 +376,14 @@ def elasticsearch_query_builder( if operator == "not_contains": value = "*{value}*".format(value=adjust_value(value)) if case_sensitive: - nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_sensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + nested_must_part.append( + nested_keyvalue_pair_query_template.substitute( + nested=case_sensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) ) - ) nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_sensitive_wildcard_value_condition_template.substitute( # noqa @@ -385,13 +392,14 @@ def elasticsearch_query_builder( ) ) else: - nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_insensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + nested_must_part.append( + nested_keyvalue_pair_query_template.substitute( + nested=case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) ) - ) nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_insensitive_wildcard_value_condition_template.substitute( # noqa @@ -402,13 +410,14 @@ def elasticsearch_query_builder( else: if case_sensitive: - nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_sensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + nested_must_part.append( + nested_keyvalue_pair_query_template.substitute( + nested=case_sensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) ) - ) nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_sensitive_must_value_condition_template.substitute( # noqa @@ -417,13 +426,14 @@ def elasticsearch_query_builder( ) ) else: - nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_insensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + nested_must_part.append( + nested_keyvalue_pair_query_template.substitute( + nested=case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) ) - ) nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_insensitive_must_value_condition_template.substitute( # noqa @@ -435,13 +445,14 @@ def elasticsearch_query_builder( elif operator in ["lt", "lte", "gt", "gte"]: # nested_must_part.append(nested_keyvalue_pair_query_template.substitute(nested=must_name_condition_template.substitute(name=key))) # noqa if case_sensitive: - nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_sensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + nested_must_part.append( + nested_keyvalue_pair_query_template.substitute( + nested=case_sensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) ) - ) nested_must_part.append( nested_keyvalue_pair_query_template.substitute( @@ -451,13 +462,14 @@ def elasticsearch_query_builder( ) ) else: - nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_insensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + nested_must_part.append( + nested_keyvalue_pair_query_template.substitute( + nested=case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) ) - ) nested_must_part.append( nested_keyvalue_pair_query_template.substitute( @@ -479,7 +491,9 @@ def elasticsearch_query_builder( shoud_not_value = [] # should_names = [] try: - key = or_filter["name"].strip() + key = or_filter.get("name") + if key: + key = key.strip() value = or_filter["value"].strip() operator = or_filter["operator"].strip() except Exception: @@ -488,7 +502,7 @@ def elasticsearch_query_builder( name, value and operator keywords." ) - if key not in added_keys: + if key and key not in added_keys: added_keys.append(key) if operator == "equals": @@ -498,22 +512,24 @@ def elasticsearch_query_builder( value=value ) ) - should_values.append( - case_sensitive_must_name_condition_template.substitute( - name=key + if key: + should_values.append( + case_sensitive_must_name_condition_template.substitute( + name=key + ) ) - ) else: should_values.append( case_insensitive_must_value_condition_template.substitute( # noqa value=value ) ) - should_values.append( - case_insensitive_must_name_condition_template.substitute( - name=key + if key: + should_values.append( + case_insensitive_must_name_condition_template.substitute( + name=key + ) ) - ) elif operator == "contains": value = "*{value}*".format(value=value) if case_sensitive: @@ -522,22 +538,24 @@ def elasticsearch_query_builder( wild_card_value=value ) ) - should_values.append( - case_sensitive_must_name_condition_template.substitute( - name=key + if key: + should_values.append( + case_sensitive_must_name_condition_template.substitute( + name=key + ) ) - ) else: should_values.append( case_insensitive_wildcard_value_condition_template.substitute( # noqa wild_card_value=value ) ) - should_values.append( - case_insensitive_must_name_condition_template.substitute( - name=key + if key: + should_values.append( + case_insensitive_must_name_condition_template.substitute( + name=key + ) ) - ) elif operator in ["not_equals", "not_contains"]: if operator == "not_contains": value = "*{value}*".format(value=value) @@ -547,22 +565,24 @@ def elasticsearch_query_builder( wild_card_value=value ) ) - shoud_not_value.append( - case_sensitive_must_name_condition_template.substitute( - name=key + if key: + shoud_not_value.append( + case_sensitive_must_name_condition_template.substitute( + name=key + ) ) - ) else: shoud_not_value.append( case_insensitive_wildcard_value_condition_template.substitute( # noqa wild_card_value=value ) ) - shoud_not_value.append( - case_insensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + shoud_not_value.append( + case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) - ) else: if case_sensitive: shoud_not_value.append( @@ -570,22 +590,24 @@ def elasticsearch_query_builder( value=value ) ) - shoud_not_value.append( - case_sensitive_must_name_condition_template.substitute( - name=key + if key: + shoud_not_value.append( + case_sensitive_must_name_condition_template.substitute( + name=key + ) ) - ) else: shoud_not_value.append( case_insensitive_must_value_condition_template.substitute( # noqa value=value ) ) - shoud_not_value.append( - case_insensitive_must_name_condition_template.substitute( # noqa - name=key + if key: + shoud_not_value.append( + case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ) ) - ) elif operator in ["lt", "lte", "gt", "gte"]: if case_sensitive: should_values.append( @@ -593,22 +615,24 @@ def elasticsearch_query_builder( operator=operator, value=value ) ) - should_values.append( - case_sensitive_must_name_condition_template.substitute( - name=key + if key: + should_values.append( + case_sensitive_must_name_condition_template.substitute( + name=key + ) ) - ) else: should_values.append( case_insensitive_range_value_condition_template.substitute( # noqa operator=operator, value=value ) ) - should_values.append( - case_insensitive_must_name_condition_template.substitute( - name=key + if key: + should_values.append( + case_insensitive_must_name_condition_template.substitute( + name=key + ) ) - ) # must_value_condition ss = ",".join(should_values) ff = nested_keyvalue_pair_query_template.substitute(nested=ss) @@ -660,11 +684,14 @@ def elasticsearch_query_builder( def check_single_filter(res_table, filter, names, organism_converter): - key = filter["name"] + key = filter.get("name") value = filter["value"] operator = filter["operator"] if operator != "contains" and operator != "not_contains": - key_ = [name for name in names if name.casefold() == key.casefold()] + if key: + key_ = [name for name in names if name.casefold() == key.casefold()] + else: + key_=[] if len(key_) == 1: filter["name"] = key_[0] if filter["name"] == "Organism": From adae7ae15025136b79f397f7b02ee0130ae95a45 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sun, 12 Mar 2023 20:12:30 +0000 Subject: [PATCH 02/10] add test for search by value only --- .../api/v1/resources/swagger_docs/search.yml | 4 +-- .../validation/psql_templates.py | 11 +++++++ .../validation/results_validator.py | 32 +++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/omero_search_engine/api/v1/resources/swagger_docs/search.yml b/omero_search_engine/api/v1/resources/swagger_docs/search.yml index 341e1185..c83497d6 100644 --- a/omero_search_engine/api/v1/resources/swagger_docs/search.yml +++ b/omero_search_engine/api/v1/resources/swagger_docs/search.yml @@ -20,8 +20,8 @@ parameters: type: string required: true - name: study - description: filter by study name - in: query + description: fi lter by study name + in: querygit push type: string required: False - name: operator diff --git a/omero_search_engine/validation/psql_templates.py b/omero_search_engine/validation/psql_templates.py index cabec123..3f09429c 100644 --- a/omero_search_engine/validation/psql_templates.py +++ b/omero_search_engine/validation/psql_templates.py @@ -52,6 +52,17 @@ def substitute(self, **kwargs): lower(annotation_mapvalue.value)=lower('$value')""" ) +# get images satisfy image key-value query +query_images_value_only = Template( + """ +Select DISTINCT image.id from image +inner join imageannotationlink on image.id =imageannotationlink.parent +inner join annotation_mapvalue on +annotation_mapvalue.annotation_id=imageannotationlink.child +where lower(annotation_mapvalue.value) like '%$value%'""" +) + + # Get number of images which satisfy project key-value query query_image_project_meta_data = Template( """ diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index eabb2130..575afc2c 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -33,6 +33,7 @@ query_image_or, screens_count, projects_count, + query_images_value_only ) import os @@ -755,3 +756,34 @@ def get_no_images_sql_containers(): report = "\n".join(messages) # noqa with open(report_file, "w") as f: f.write(report) + + +def validate_search_by_value(): + value="cancer" + query = { + "and_filters": [ + {"value": value, "operator": "contains", "resource": "image"}, + ], + "or_filters": [ + + ] + } + + query_data = {"query_details": query} + # validate the query syntex + size=0 + query_validation_res = query_validator(query_data) + if query_validation_res == "OK": + search_omero_app.logger.info("Getting results from search engine") + searchengine_results = determine_search_results_(query_data) + if searchengine_results.get("results"): + size = searchengine_results.get("results").get("size") + search_omero_app.logger.info ("Results from the searchengine: %s"%size) + search_omero_app.logger.info("Getting results from postgresql databse") + conn = search_omero_app.config["database_connector"] + postgres_results = conn.execute_query(query_images_value_only.substitute(value=value.lower())) + results = [item["id"] for item in postgres_results] + search_omero_app.logger.info("Results from the postgresql databse: %s"%(len(results))) + if len(results)==size: + search_omero_app.logger.info("Results from both searchengine and postgresql are identical") + return True From b5daaa7d32fcf3f733efb382c9038a5064dee418 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 12 Mar 2023 20:15:23 +0000 Subject: [PATCH 03/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../api/v1/resources/query_handler.py | 4 +-- omero_search_engine/api/v1/resources/utils.py | 10 +++---- .../validation/results_validator.py | 26 +++++++++++-------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/omero_search_engine/api/v1/resources/query_handler.py b/omero_search_engine/api/v1/resources/query_handler.py index 0c4d3b8c..e1f8612f 100644 --- a/omero_search_engine/api/v1/resources/query_handler.py +++ b/omero_search_engine/api/v1/resources/query_handler.py @@ -741,9 +741,7 @@ def simple_search( {"name": key, "value": value, "operator": operator, "resource": resource} ] else: - and_filters = [ - {"value": value, "operator": operator, "resource": resource} - ] + and_filters = [{"value": value, "operator": operator, "resource": resource}] query_details = {"and_filters": and_filters} if bookmark: diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index 8ac31935..c7d65b52 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -317,7 +317,7 @@ def elasticsearch_query_builder( case_sensitive_must_name_condition_template.substitute( name=key ) # noqa - ) + ) else: _nested_must_part.append( @@ -330,7 +330,7 @@ def elasticsearch_query_builder( case_insensitive_must_name_condition_template.substitute( # noqa name=key ) - ) + ) nested_must_part.append( nested_keyvalue_pair_query_template.substitute( @@ -351,7 +351,7 @@ def elasticsearch_query_builder( case_sensitive_must_name_condition_template.substitute( name=key ) # noqa - ) + ) else: _nested_must_part.append( @@ -364,7 +364,7 @@ def elasticsearch_query_builder( case_insensitive_must_name_condition_template.substitute( # noqa name=key ) - ) + ) nested_must_part.append( nested_keyvalue_pair_query_template.substitute( @@ -691,7 +691,7 @@ def check_single_filter(res_table, filter, names, organism_converter): if key: key_ = [name for name in names if name.casefold() == key.casefold()] else: - key_=[] + key_ = [] if len(key_) == 1: filter["name"] = key_[0] if filter["name"] == "Organism": diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 575afc2c..960f8cbe 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -33,7 +33,7 @@ query_image_or, screens_count, projects_count, - query_images_value_only + query_images_value_only, ) import os @@ -759,31 +759,35 @@ def get_no_images_sql_containers(): def validate_search_by_value(): - value="cancer" + value = "cancer" query = { "and_filters": [ {"value": value, "operator": "contains", "resource": "image"}, ], - "or_filters": [ - - ] + "or_filters": [], } query_data = {"query_details": query} # validate the query syntex - size=0 + size = 0 query_validation_res = query_validator(query_data) if query_validation_res == "OK": search_omero_app.logger.info("Getting results from search engine") searchengine_results = determine_search_results_(query_data) if searchengine_results.get("results"): size = searchengine_results.get("results").get("size") - search_omero_app.logger.info ("Results from the searchengine: %s"%size) + search_omero_app.logger.info("Results from the searchengine: %s" % size) search_omero_app.logger.info("Getting results from postgresql databse") conn = search_omero_app.config["database_connector"] - postgres_results = conn.execute_query(query_images_value_only.substitute(value=value.lower())) + postgres_results = conn.execute_query( + query_images_value_only.substitute(value=value.lower()) + ) results = [item["id"] for item in postgres_results] - search_omero_app.logger.info("Results from the postgresql databse: %s"%(len(results))) - if len(results)==size: - search_omero_app.logger.info("Results from both searchengine and postgresql are identical") + search_omero_app.logger.info( + "Results from the postgresql databse: %s" % (len(results)) + ) + if len(results) == size: + search_omero_app.logger.info( + "Results from both searchengine and postgresql are identical" + ) return True From de377411a43ae2272f623110613cc4ee5e8ec775 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Sun, 12 Mar 2023 20:21:34 +0000 Subject: [PATCH 04/10] Add examplefor search by value --- examples/search_by_value.py | 39 +++++++++++++++++++ omero_search_engine/api/v1/resources/utils.py | 8 ++-- 2 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 examples/search_by_value.py diff --git a/examples/search_by_value.py b/examples/search_by_value.py new file mode 100644 index 00000000..16d95d63 --- /dev/null +++ b/examples/search_by_value.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (C) 2023 University of Dundee & Open Microscopy Environment. +# All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import sys +from utils import query_the_search_ending, logging + +logging.basicConfig(stream=sys.stdout, level=logging.INFO) + +""" +This example will use search by value only without key +It will search for thr images which have key/value +pairs which the vlaue contains cancer""" + +query = { + "and_filters": [ + {"value": "cancer", "operator": "contains"}, + ], + "or_filters": [], +} + +main_attributes = [] +logging.info("Sending the query:") +results_1 = query_the_search_ending(query, main_attributes) diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index c7d65b52..38aaee60 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -526,7 +526,7 @@ def elasticsearch_query_builder( ) if key: should_values.append( - case_insensitive_must_name_condition_template.substitute( + case_insensitive_must_name_condition_template.substitute( # noqa name=key ) ) @@ -552,7 +552,7 @@ def elasticsearch_query_builder( ) if key: should_values.append( - case_insensitive_must_name_condition_template.substitute( + case_insensitive_must_name_condition_template.substitute( # noqa name=key ) ) @@ -567,7 +567,7 @@ def elasticsearch_query_builder( ) if key: shoud_not_value.append( - case_sensitive_must_name_condition_template.substitute( + case_sensitive_must_name_condition_template.substitute( # noqa name=key ) ) @@ -592,7 +592,7 @@ def elasticsearch_query_builder( ) if key: shoud_not_value.append( - case_sensitive_must_name_condition_template.substitute( + case_sensitive_must_name_condition_template.substitute( # noqa name=key ) ) From a5d27e1cc1fd274b30645ff666001a38a85a4705 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Wed, 15 Mar 2023 15:43:47 +0000 Subject: [PATCH 05/10] add test for searching more than one conditions and integrate a test --- app_data/test_index_data.json | 12 +++ examples/search_by_value.py | 9 ++ .../validation/results_validator.py | 90 ++++++++++++++++++- 3 files changed, 107 insertions(+), 4 deletions(-) diff --git a/app_data/test_index_data.json b/app_data/test_index_data.json index 658d43a4..512623a1 100644 --- a/app_data/test_index_data.json +++ b/app_data/test_index_data.json @@ -1,5 +1,17 @@ { "complex_test_cases":{ + "query_image_and_value_only":[ + [ + "hela", + "kif1" + ] + ], + "query_image_or_value_only":[ + [ + "pdxdc1", + "pdx1" + ] + ], "query_image_and":[ [ [ diff --git a/examples/search_by_value.py b/examples/search_by_value.py index 16d95d63..9ca7d944 100644 --- a/examples/search_by_value.py +++ b/examples/search_by_value.py @@ -37,3 +37,12 @@ main_attributes = [] logging.info("Sending the query:") results_1 = query_the_search_ending(query, main_attributes) + +and_filters = [ + {"name": "cell line", "value": "hela", "operator": "equals"}, + {"value": "kif", "operator": "contains"}, +] +query = {"and_filters": and_filters, "or_filters": []} + +logging.info("Sending the second query:") +results_2 = query_the_search_ending(query, main_attributes) diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 960f8cbe..34776210 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -39,6 +39,7 @@ query_methods = { "image": query_images_key_value, + "image_value_only": query_images_value_only, "project": query_image_project_meta_data, "screen": query_images_screen_key_value, "project_name": query_images_in_project_name, @@ -101,6 +102,26 @@ def get_or_sql(self, clauses, name="query_image_or"): ) # noqa return results + def get_sql_value_only(self, clauses): + if "or" in self.name: + operator = "or" + else: + operator = "and" + conn = search_omero_app.config["database_connector"] + all_res = [] + for val in clauses: + sql = query_methods["image_value_only"].substitute(value=val) + postgres_results = conn.execute_query(sql) + ress = [item["id"] for item in postgres_results] + if len(all_res) == 0: + all_res = ress + else: + if operator == "or": + all_res = list(set(ress).union(set(all_res))) + elif operator == "and": + all_res = list(set(all_res) & set(ress)) + return all_res + def get_and_sql(self, clauses): results = [] co = 0 @@ -127,7 +148,9 @@ def get_results_postgres(self): """ search_omero_app.logger.info("Getting results from postgres") if self.type == "complex": - if self.name == "query_image_or": + if "_value_only" in self.name: + self.postgres_results = self.get_sql_value_only(self.clauses) + elif self.name == "query_image_or": self.postgres_results = self.get_or_sql(self.clauses) elif self.name == "query_image_and": self.postgres_results = self.get_and_sql(self.clauses) @@ -162,7 +185,20 @@ def get_results_searchengine(self): """ if self.type == "complex": filters = [] - if self.name != "query_image_and_or": + if "_value_only" in self.name: + for claus in self.clauses: + filters.append( + { + "value": claus, + "operator": "contains", + "resource": self.resource, + } + ) + if "or" in self.name: + query = {"and_filters": [], "or_filters": [filters]} + else: + query = {"and_filters": filters, "or_filters": []} + elif self.name != "query_image_and_or": for claus in self.clauses: filters.append( { @@ -758,8 +794,14 @@ def get_no_images_sql_containers(): f.write(report) -def validate_search_by_value(): - value = "cancer" +def validate_search_by_value(value=None): + """ + test searching by value + compare the results from database server and searchengine + + """ + if not value: + value = "cancer" query = { "and_filters": [ {"value": value, "operator": "contains", "resource": "image"}, @@ -791,3 +833,43 @@ def validate_search_by_value(): "Results from both searchengine and postgresql are identical" ) return True + + +def validate_search_by_value_conds(vals=None, operator=None): + """ + can be used to test more than one conditions + for search by values only + """ + if not vals: + vals = ["hel", "kif1"] + if not operator: + operator = "and" + conn = search_omero_app.config["database_connector"] + all_res = [] + for val in vals: + sql = query_images_value_only.substitute(value=val) + postgres_results = conn.execute_query(sql) + ress = [item["id"] for item in postgres_results] + if len(all_res) == 0: + all_res = ress + else: + if operator == "or": + all_res = list(set(ress).union(set(all_res))) + elif operator == "and": + all_res = list(set(all_res) & set(ress)) + + sql_1 = query_images_value_only.substitute(value="he") + sql_2 = query_images_value_only.substitute(value="ki") + conn = search_omero_app.config["database_connector"] + postgres_results2 = conn.execute_query(sql_2) + postgres_results1 = conn.execute_query(sql_1) + print(len(postgres_results1)) + print(len(postgres_results2)) + s1 = [item["id"] for item in postgres_results1] + s2 = [item["id"] for item in postgres_results2] + postgres_results = list(set(s1) & set(s2)) + + z = set(s1).union(set(s2)) + + print(len(postgres_results)) + print(len(z)) From 7358f00778dd23b64c56b07b54d7ab0717cfa5cf Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Aug 2024 10:42:43 +0000 Subject: [PATCH 06/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- omero_search_engine/api/v1/resources/utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index 9b28d591..da258e0b 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -479,7 +479,6 @@ def elasticsearch_query_builder( nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_sensitive_wildcard_value_condition_template.substitute( # noqa - wild_card_value=value ), must_part=case_sensitive_must_name_condition_template.substitute( # noqa @@ -500,7 +499,6 @@ def elasticsearch_query_builder( nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_insensitive_wildcard_value_condition_template.substitute( # noqa - wild_card_value=value ), must_part=case_insensitive_must_name_condition_template.substitute( # noqa @@ -522,7 +520,6 @@ def elasticsearch_query_builder( nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_sensitive_must_value_condition_template.substitute( # noqa - value=value ), must_part=case_sensitive_must_name_condition_template.substitute( # noqa @@ -543,7 +540,6 @@ def elasticsearch_query_builder( nested_must_not_part.append( nested_keyvalue_pair_query_template.substitute( nested=case_insensitive_must_value_condition_template.substitute( # noqa - value=value ), must_part=case_insensitive_must_name_condition_template.substitute( # noqa From f4e494e5dbaed3e68745b8c093378d5f3d2840c7 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Thu, 1 Aug 2024 12:50:11 +0100 Subject: [PATCH 07/10] fix syntax error --- omero_search_engine/validation/results_validator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index 48199d0b..b486874c 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -231,8 +231,8 @@ def get_results_db(self, operator=None): self.postgres_results = self.get_in_sql(self.clauses, self.type) return elif self.type == "complex": - if "_value_only" in self.name: - self.postgres_results = self.get_sql_value_only(self.clauses) + if "_value_only" in self.name: + self.postgres_results = self.get_sql_value_only(self.clauses) elif self.name == "query_image_or": self.postgres_results = self.get_or_sql(self.clauses) elif self.name == "query_image_and": @@ -1259,4 +1259,3 @@ def check_container_keys_vakues(): search_omero_app.logger.info( "No results returned from searchengine" ) - From 5f8a992ea3a9e5dcd6c0b7bfd6eb9c3b3dc6d198 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 Aug 2024 11:50:50 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- omero_search_engine/validation/results_validator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/omero_search_engine/validation/results_validator.py b/omero_search_engine/validation/results_validator.py index b486874c..e9a9108d 100644 --- a/omero_search_engine/validation/results_validator.py +++ b/omero_search_engine/validation/results_validator.py @@ -232,9 +232,9 @@ def get_results_db(self, operator=None): return elif self.type == "complex": if "_value_only" in self.name: - self.postgres_results = self.get_sql_value_only(self.clauses) + self.postgres_results = self.get_sql_value_only(self.clauses) elif self.name == "query_image_or": - self.postgres_results = self.get_or_sql(self.clauses) + self.postgres_results = self.get_or_sql(self.clauses) elif self.name == "query_image_and": self.postgres_results = self.get_and_sql(self.clauses) else: @@ -1157,7 +1157,7 @@ def validate_search_by_value_conds(vals=None, operator=None): print(len(postgres_results)) print(len(z)) - + """ def set_ownership(resource , name, value, owner_id=None, group_id=None): From e68b822b352f6a0fa671c85b955ba6f14d207835 Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 5 Aug 2024 12:02:09 +0100 Subject: [PATCH 09/10] fix merger confilict --- omero_search_engine/api/v1/resources/utils.py | 343 ++++++++++-------- 1 file changed, 184 insertions(+), 159 deletions(-) diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index da258e0b..b88516dd 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -161,7 +161,7 @@ def get_resource_annotation_table(resource_table): # Used for contains and not contains case_insensitive_wildcard_value_condition_template = Template( """ -{"wildcard": {"key_values.value.keyvaluenormalize":"$wild_card_value"}}""" +{"wildcard": {"key_values.value.keyvaluenormalize":"$wild_card_value" }}""" ) case_sensitive_range_value_condition_template = Template( """ @@ -316,7 +316,7 @@ def elasticsearch_query_builder( key = filter.get("name") if key: key = key.strip() - value = filter["value"].strip() + #value = filter["value"].strip() operator = filter["operator"].strip() if operator in operators_required_list_data_type: @@ -406,28 +406,46 @@ def elasticsearch_query_builder( if operator == "not_in": if case_sensitive: - nested_must_part.append( - nested_query_template_must_must_not.substitute( - must_not_part=case_sensitive_must_in_value_condition_template.substitute( # noqa - value=value - ), - must_part=case_sensitive_must_name_condition_template.substitute( # noqa - name=key - ), + if key: + nested_must_part.append( + nested_query_template_must_must_not.substitute( + must_not_part=case_sensitive_must_in_value_condition_template.substitute( # noqa + value=value + ), + must_part=case_sensitive_must_name_condition_template.substitute( # noqa + name=key + ), + ) + ) + else: + nested_must_part.append( + nested_query_template_must_must_not.substitute( + must_not_part=case_sensitive_must_in_value_condition_template.substitute( # noqa + value=value + ), + ) ) - ) else: - nested_must_part.append( - nested_query_template_must_must_not.substitute( - must_not_part=case_insensitive_must_in_value_condition_template.substitute( # noqa - value=value - ), - must_part=case_insensitive_must_name_condition_template.substitute( # noqa - name=key - ), + if key: + nested_must_part.append( + nested_query_template_must_must_not.substitute( + must_not_part=case_insensitive_must_in_value_condition_template.substitute( # noqa + value=value + ), + must_part=case_insensitive_must_name_condition_template.substitute( # noqa + name=key + ), + ) + ) + else: + nested_must_part.append( + nested_query_template_must_must_not.substitute( + must_not_part=case_insensitive_must_in_value_condition_template.substitute( # noqa + value=value + ), + ) ) - ) if operator == "contains": value = "*{value}*".format(value=adjust_value(value)) @@ -471,82 +489,89 @@ def elasticsearch_query_builder( if key: nested_must_part.append( nested_keyvalue_pair_query_template.substitute( - nested=case_sensitive_must_name_condition_template.substitute( # noqa + nested=case_sensitive_wildcard_value_condition_template.substitute( # noqa + wild_card_value=value + ), + must_part=case_sensitive_must_name_condition_template.substitute( # noqa name=key - ) + ), ) ) - nested_must_not_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_sensitive_wildcard_value_condition_template.substitute( # noqa - wild_card_value=value - ), - must_part=case_sensitive_must_name_condition_template.substitute( # noqa - name=key - ), + else: + nested_must_not_part.append( + nested_keyvalue_pair_query_template.substitute( + nested=case_sensitive_wildcard_value_condition_template.substitute( # noqa + wild_card_value=value + ), + ) ) - ) + else: if key: nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_insensitive_must_name_condition_template.substitute( # noqa + nested_query_template_must_must_not.substitute( + must_not_part=case_insensitive_wildcard_value_condition_template.substitute( # noqa + wild_card_value=value + ), + must_part=case_insensitive_must_name_condition_template.substitute( # noqa name=key - ) + ), ) ) - nested_must_not_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_insensitive_wildcard_value_condition_template.substitute( # noqa - wild_card_value=value - ), - must_part=case_insensitive_must_name_condition_template.substitute( # noqa - name=key - ), + else: + nested_must_part.append( + nested_query_template_must_must_not.substitute( + must_not_part=case_insensitive_wildcard_value_condition_template.substitute( # noqa + wild_card_value=value + ), + must_part=[] + ) ) - ) - else: if case_sensitive: if key: nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_sensitive_must_name_condition_template.substitute( # noqa + nested_query_template_must_must_not.substitute( + must_not_part=case_sensitive_must_value_condition_template.substitute( # noqa + value=value + ), + must_part=case_sensitive_must_name_condition_template.substitute( # noqa name=key - ) + ), ) ) - nested_must_not_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_sensitive_must_value_condition_template.substitute( # noqa - value=value - ), - must_part=case_sensitive_must_name_condition_template.substitute( # noqa - name=key - ), + else: + nested_must_part.append( + nested_query_template_must_must_not.substitute( + must_not_part=case_sensitive_wildcard_value_condition_template.substitute( # noqa + wild_card_value=value + ), + must_part='' + ) ) - ) - else: if key: nested_must_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_insensitive_must_name_condition_template.substitute( # noqa + nested_query_template_must_must_not.substitute( + must_not_part=case_insensitive_must_value_condition_template.substitute( # noqa + value=value + ), + must_part=case_insensitive_must_name_condition_template.substitute( # noqa name=key - ) + ), ) ) - nested_must_not_part.append( - nested_keyvalue_pair_query_template.substitute( - nested=case_insensitive_must_value_condition_template.substitute( # noqa - value=value - ), - must_part=case_insensitive_must_name_condition_template.substitute( # noqa - name=key - ), + else: + + nested_must_part.append( + nested_query_template_must_must_not.substitute( + must_not_part=case_insensitive_wildcard_value_condition_template.substitute( # noqa + wild_card_value=value + ), + must_part='' + ) ) - ) elif operator in ["lt", "lte", "gt", "gte"]: # nested_must_part.append(nested_keyvalue_pair_query_template.substitute(nested=must_name_condition_template.substitute(name=key))) # noqa @@ -1055,100 +1080,100 @@ def search_resource_annotation( @query: the a dict contains the three filters (or, and and not) items @raw_elasticsearch_query: raw query sending directly to elasticsearch """ - try: - res_index = resource_elasticsearchindex.get(table_) - if not res_index: - return build_error_message( - "{table_} is not a valid resurce".format(table_=table_) - ) - query_details = query.get("query_details") + #try: + res_index = resource_elasticsearchindex.get(table_) + if not res_index: + return build_error_message( + "{table_} is not a valid resurce".format(table_=table_) + ) + query_details = query.get("query_details") - start_time = time.time() - if not raw_elasticsearch_query: - query_details = query.get("query_details") - main_attributes = query.get("main_attributes") - if not query_details and main_attributes and len(main_attributes) > 0: - pass - - elif ( - not query - or len(query) == 0 - or not query_details - or len(query_details) == 0 - or isinstance(query_details, str) - ): - print("Error ") - return build_error_message( - "{query} is not a valid query".format(query=query) - ) - and_filters = query_details.get("and_filters") - or_filters = query_details.get("or_filters") - case_sensitive = query_details.get("case_sensitive") - # check and fid if possible names and values inside - # filters conditions - check_filters(table_, [and_filters, or_filters], case_sensitive) - query_string = elasticsearch_query_builder( - and_filters, or_filters, case_sensitive, main_attributes - ) - # query_string has to be string, if it is a dict, - # something went wrong and the message inside the dict - # which will be returned to the sender: - if isinstance(query_string, dict): - return query_string - - search_omero_app.logger.info("Query %s" % query_string) - query = json.loads(query_string, strict=False) - raw_query_to_send_back = json.loads(query_string, strict=False) - else: - query = raw_elasticsearch_query - raw_query_to_send_back = copy.copy(raw_elasticsearch_query) - if return_containers: - # code to return the containers only - # It will call the projects container first then - # search within screens - query["aggs"] = json.loads( - count_attr_template.substitute(field="project_name.keyvalue") - ) - query["_source"] = {"includes": [""]} - res = search_index_using_search_after( - res_index, - query, - bookmark, - pagination_dict, - return_containers, - "project", - ) - query["aggs"] = json.loads( - count_attr_template.substitute(field="screen_name.keyvalue") + start_time = time.time() + if not raw_elasticsearch_query: + query_details = query.get("query_details") + main_attributes = query.get("main_attributes") + if not query_details and main_attributes and len(main_attributes) > 0: + pass + + elif ( + not query + or len(query) == 0 + or not query_details + or len(query_details) == 0 + or isinstance(query_details, str) + ): + print("Error ") + return build_error_message( + "{query} is not a valid query".format(query=query) ) + and_filters = query_details.get("and_filters") + or_filters = query_details.get("or_filters") + case_sensitive = query_details.get("case_sensitive") + # check and fid if possible names and values inside + # filters conditions + check_filters(table_, [and_filters, or_filters], case_sensitive) + query_string = elasticsearch_query_builder( + and_filters, or_filters, case_sensitive, main_attributes + ) + # query_string has to be string, if it is a dict, + # something went wrong and the message inside the dict + # which will be returned to the sender: + if isinstance(query_string, dict): + return query_string + + search_omero_app.logger.info("Query %s" % query_string) + query = json.loads(query_string, strict=False) + raw_query_to_send_back = json.loads(query_string, strict=False) + else: + query = raw_elasticsearch_query + raw_query_to_send_back = copy.copy(raw_elasticsearch_query) + if return_containers: + # code to return the containers only + # It will call the projects container first then + # search within screens + query["aggs"] = json.loads( + count_attr_template.substitute(field="project_name.keyvalue") + ) + query["_source"] = {"includes": [""]} + res = search_index_using_search_after( + res_index, + query, + bookmark, + pagination_dict, + return_containers, + "project", + ) + query["aggs"] = json.loads( + count_attr_template.substitute(field="screen_name.keyvalue") + ) - res_2 = search_index_using_search_after( - res_index, query, bookmark, pagination_dict, return_containers, "screen" - ) - # Combines the containers results - studies = res + res_2 - res = {"results": studies} - else: - res = search_index_using_search_after( - res_index, query, bookmark, pagination_dict, return_containers - ) - notice = "" - end_time = time.time() - query_time = "%.2f" % (end_time - start_time) - return { - "results": res, - "query_details": query_details, - "resource": table_, - "server_query_time": query_time, - "raw_elasticsearch_query": raw_query_to_send_back, - "notice": notice, - } - except Exception as e: - search_omero_app.logger.info("Query %s" % str(query)) - search_omero_app.logger.info("==>>>Error: %s" % str(e)) - return build_error_message( - "Something went wrong, please check your query and try again later." + res_2 = search_index_using_search_after( + res_index, query, bookmark, pagination_dict, return_containers, "screen" ) + # Combines the containers results + studies = res + res_2 + res = {"results": studies} + else: + res = search_index_using_search_after( + res_index, query, bookmark, pagination_dict, return_containers + ) + notice = "" + end_time = time.time() + query_time = "%.2f" % (end_time - start_time) + return { + "results": res, + "query_details": query_details, + "resource": table_, + "server_query_time": query_time, + "raw_elasticsearch_query": raw_query_to_send_back, + "notice": notice, + } + #except Exception as e: + # search_omero_app.logger.info("Query %s" % str(query)) + # search_omero_app.logger.info("==>>>Error: %s" % str(e)) + # return build_error_message( + # "Something went wrong, please check your query and try again later." + # ) def get_studies_titles(idr_name, resource): From 2e083ad876dca6f97911036a4e5b7eadef8ef75e Mon Sep 17 00:00:00 2001 From: khaledk2 Date: Mon, 5 Aug 2024 12:17:33 +0100 Subject: [PATCH 10/10] Fix pre-commit --- omero_search_engine/api/v1/resources/utils.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/omero_search_engine/api/v1/resources/utils.py b/omero_search_engine/api/v1/resources/utils.py index b88516dd..c5f89266 100644 --- a/omero_search_engine/api/v1/resources/utils.py +++ b/omero_search_engine/api/v1/resources/utils.py @@ -316,7 +316,7 @@ def elasticsearch_query_builder( key = filter.get("name") if key: key = key.strip() - #value = filter["value"].strip() + # value = filter["value"].strip() operator = filter["operator"].strip() if operator in operators_required_list_data_type: @@ -506,7 +506,6 @@ def elasticsearch_query_builder( ) ) - else: if key: nested_must_part.append( @@ -525,7 +524,7 @@ def elasticsearch_query_builder( must_not_part=case_insensitive_wildcard_value_condition_template.substitute( # noqa wild_card_value=value ), - must_part=[] + must_part=[], ) ) else: @@ -547,7 +546,7 @@ def elasticsearch_query_builder( must_not_part=case_sensitive_wildcard_value_condition_template.substitute( # noqa wild_card_value=value ), - must_part='' + must_part="", ) ) else: @@ -569,7 +568,7 @@ def elasticsearch_query_builder( must_not_part=case_insensitive_wildcard_value_condition_template.substitute( # noqa wild_card_value=value ), - must_part='' + must_part="", ) ) @@ -1080,7 +1079,7 @@ def search_resource_annotation( @query: the a dict contains the three filters (or, and and not) items @raw_elasticsearch_query: raw query sending directly to elasticsearch """ - #try: + # try: res_index = resource_elasticsearchindex.get(table_) if not res_index: return build_error_message( @@ -1168,7 +1167,7 @@ def search_resource_annotation( "raw_elasticsearch_query": raw_query_to_send_back, "notice": notice, } - #except Exception as e: + # except Exception as e: # search_omero_app.logger.info("Query %s" % str(query)) # search_omero_app.logger.info("==>>>Error: %s" % str(e)) # return build_error_message(