From dac3a6009a400c8b6db7ae0ad1f2ae15692a6e3f Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Mon, 13 May 2024 13:04:28 +0200 Subject: [PATCH] [535] Implement basic support for GenQuery2 --- README.md | 28 +++++++++++ irods/api_number.py | 1 + irods/genquery2.py | 55 ++++++++++++++++++++++ irods/message/__init__.py | 6 +++ irods/session.py | 15 ++++++ irods/test/genquery2_test.py | 90 ++++++++++++++++++++++++++++++++++++ 6 files changed, 195 insertions(+) create mode 100644 irods/genquery2.py create mode 100644 irods/test/genquery2_test.py diff --git a/README.md b/README.md index 5e2b3728..4edd3a39 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ Currently supported: - iRODS connection over SSL - Implement basic GenQueries (select columns and filtering) - Support more advanced GenQueries with limits, offsets, and aggregations +- Support for queries using the GenQuery2 interface - Query the collections and data objects within a collection - Execute direct SQL queries - Execute iRODS rules @@ -1252,6 +1253,33 @@ As stated, this type of object discovery requires some extra study and effort, but the ability to search arbitrary iRODS zones (to which we are federated and have the user permissions) is powerful indeed. + +GenQuery2 queries +------- + +GenQuery2 is a successor to the regular GenQuery interface. It is available +by default on iRODS 4.3.2 and higher. GenQuery2 currently has an experimental status, +and is subject to change. + +Queries can be executed using the `genquery2` function. For example: + +``` +>>> session.genquery2("SELECT COLL_NAME WHERE COLL_NAME = '/tempZone/home' OR COLL_NAME LIKE '%/genquery2_dummy_doesnotexist'") +[['/tempZone/home']] +``` + +Alternatively, create a GenQuery2 object and use it to execute queries. For example: + +``` +>>> q = session.genquery2_object() +>>> q.execute("SELECT COLL_NAME WHERE COLL_NAME = '/tempZone/home' OR COLL_NAME LIKE '%/genquery2_dummy_doesnotexist'", zone="tempZone") +[['/tempZone/home']] +``` + +GenQuery2 objects also support retrieving the SQL generated by a GenQuery2 query using the +`get_sql` function and retrieving column mappings using the `get_column_mappings` function. + + Tickets ------- diff --git a/irods/api_number.py b/irods/api_number.py index e8e856b8..f0ecf5ba 100644 --- a/irods/api_number.py +++ b/irods/api_number.py @@ -178,6 +178,7 @@ "SSL_END_AN": 1101, "CLIENT_HINTS_AN": 10215, "GET_RESOURCE_INFO_FOR_OPERATION_AN": 10220, + "GENQUERY2_AN": 10221, "ATOMIC_APPLY_METADATA_OPERATIONS_APN": 20002, "GET_FILE_DESCRIPTOR_INFO_APN": 20000, "REPLICA_CLOSE_APN": 20004, diff --git a/irods/genquery2.py b/irods/genquery2.py new file mode 100644 index 00000000..2ef06c2a --- /dev/null +++ b/irods/genquery2.py @@ -0,0 +1,55 @@ +import json + +from irods.api_number import api_number +from irods.exception import OperationNotSupported +from irods.message import GenQuery2Request, STR_PI, iRODSMessage + + +class GenQuery2(object): + """Interface to the GenQuery2 API + + This class provides an interface to the GenQuery2 API, an experimental + iRODS API for querying iRODS. GenQuery2 is an improved version of the + traditional GenQuery interface. The GenQuery2 interface may be subject + to change. + """ + + def __init__(self, session): + self.session = session + if not self._is_supported(): + raise OperationNotSupported( + "GenQuery2 is not supported by default on this iRODS version.") + + def execute(self, query, zone=None): + """Execute this GenQuery2 query, and return the results.""" + effective_zone = self.session.zone if zone is None else zone + return json.loads(self._exec_genquery2(query, effective_zone)) + + def get_sql(self, query, zone=None): + """Return the SQL query that this GenQuery2 query will be translated to.""" + effective_zone = self.session.zone if zone is None else zone + return self._exec_genquery2(query, effective_zone, sql_flag=True) + + def get_column_mappings(self, zone=None): + effective_zone = self.session.zone if zone is None else zone + return json.loads(self._exec_genquery2( + "", effective_zone, column_mappings_flag=True)) + + def _exec_genquery2(self, query, zone, sql_flag=False, + column_mappings_flag=False): + msg = GenQuery2Request() + msg.query_string = query + msg.zone = zone + msg.sql_only = 1 if sql_flag else 0 + msg.column_mappings = 1 if column_mappings_flag else 0 + message = iRODSMessage('RODS_API_REQ', + msg=msg, + int_info=api_number['GENQUERY2_AN']) + with self.session.pool.get_connection() as conn: + conn.send(message) + response = conn.recv() + return response.get_main_message(STR_PI).myStr + + def _is_supported(self): + """Checks whether this iRODS server supports GenQuery2.""" + return self.session.server_version >= (4, 3, 2) diff --git a/irods/message/__init__.py b/irods/message/__init__.py index e96b900d..3761e4a6 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -628,6 +628,12 @@ class GenQueryResponse(Message): # openFlags; double offset; double dataSize; int numThreads; int oprType; # struct *SpecColl_PI; struct KeyValPair_PI;" +class GenQuery2Request(Message): + _name = 'Genquery2Input_PI' + query_string = StringProperty() + zone = StringProperty() + sql_only = IntegerProperty() + column_mappings = IntegerProperty() class FileOpenRequest(Message): _name = 'DataObjInp_PI' diff --git a/irods/session.py b/irods/session.py index 802605c3..cdcd194e 100644 --- a/irods/session.py +++ b/irods/session.py @@ -9,6 +9,7 @@ import threading import weakref from irods.query import Query +from irods.genquery2 import GenQuery2 from irods.pool import Pool from irods.account import iRODSAccount from irods.api_number import api_number @@ -269,6 +270,20 @@ def configure(self, **kwargs): def query(self, *args, **kwargs): return Query(self, *args, **kwargs) + def genquery2_object(self, **kwargs): + """ Returns GenQuery2 object + + Returns GenQuery2 object that can be used to execute GenQuery2 queries, + to retrieve the SQL query for a particular GenQuery2 query, and to + get GenQuery2 column mappings. + """ + return GenQuery2(self, **kwargs) + + def genquery2(self, query, **kwargs): + """Shorthand for executing a single GenQuery2 query.""" + q = GenQuery2(self) + return q.execute(query, **kwargs) + @property def username(self): return self.pool.account.client_user diff --git a/irods/test/genquery2_test.py b/irods/test/genquery2_test.py new file mode 100644 index 00000000..54535997 --- /dev/null +++ b/irods/test/genquery2_test.py @@ -0,0 +1,90 @@ +import unittest + +import irods.test.helpers as helpers + + +class TestGenQuery2(unittest.TestCase): + + def setUp(self): + self.sess = helpers.make_session() + + if self.sess.server_version < (4, 3, 2): + self.skipTest( + 'GenQuery2 is not available by default in iRODS before v4.3.2.') + + self.coll_path_a = '/{}/home/{}/test_query2_coll_a'.format( + self.sess.zone, self.sess.username) + self.coll_path_b = '/{}/home/{}/test_query2_coll_b'.format( + self.sess.zone, self.sess.username) + self.sess.collections.create(self.coll_path_a) + self.sess.collections.create(self.coll_path_b) + + def tearDown(self): + '''Remove test data and close connections + ''' + self.sess.collections.remove(self.coll_path_a, force=True) + self.sess.collections.remove(self.coll_path_b, force=True) + self.sess.cleanup() + + def test_select(self): + query = "SELECT COLL_NAME WHERE COLL_NAME = '{}'".format( + self.coll_path_a) + q = self.sess.genquery2_object() + query_result = q.execute(query) + query_sql = q.get_sql(query) + self.assertIn([self.coll_path_a], query_result) + self.assertEqual(len(query_result), 1) + self.assertEqual(query_sql, "select distinct t0.coll_name from R_COLL_MAIN t0 inner join R_OBJT_ACCESS pcoa on t0.coll_id = pcoa.object_id inner join R_TOKN_MAIN pct on pcoa.access_type_id = pct.token_id inner join R_USER_MAIN pcu on pcoa.user_id = pcu.user_id where t0.coll_name = ? and pcoa.access_type_id >= 1000 fetch first 256 rows only") + + def test_select_with_explicit_zone(self): + query = "SELECT COLL_NAME WHERE COLL_NAME = '{}'".format( + self.coll_path_a) + q = self.sess.genquery2_object() + query_result = q.execute(query, zone=self.sess.zone) + query_sql = q.get_sql(query, zone=self.sess.zone) + self.assertIn([self.coll_path_a], query_result) + self.assertEqual(len(query_result), 1) + self.assertEqual(query_sql, "select distinct t0.coll_name from R_COLL_MAIN t0 inner join R_OBJT_ACCESS pcoa on t0.coll_id = pcoa.object_id inner join R_TOKN_MAIN pct on pcoa.access_type_id = pct.token_id inner join R_USER_MAIN pcu on pcoa.user_id = pcu.user_id where t0.coll_name = ? and pcoa.access_type_id >= 1000 fetch first 256 rows only") + + def test_select_with_shorthand(self): + query = "SELECT COLL_NAME WHERE COLL_NAME = '{}'".format( + self.coll_path_a) + query_result = self.sess.genquery2(query) + self.assertIn([self.coll_path_a], query_result) + self.assertEqual(len(query_result), 1) + + def test_select_with_shorthand_and_explicit_zone(self): + query = "SELECT COLL_NAME WHERE COLL_NAME = '{}'".format( + self.coll_path_a) + query_result = self.sess.genquery2(query, zone=self.sess.zone) + self.assertIn([self.coll_path_a], query_result) + self.assertEqual(len(query_result), 1) + + def test_select_or(self): + query = "SELECT COLL_NAME WHERE COLL_NAME = '{}' OR COLL_NAME = '{}'".format( + self.coll_path_a, self.coll_path_b) + q = self.sess.genquery2_object() + query_result = q.execute(query) + query_sql = q.get_sql(query) + self.assertIn([self.coll_path_a], query_result) + self.assertIn([self.coll_path_b], query_result) + self.assertEqual(len(query_result), 2) + self.assertEqual(query_sql, "select distinct t0.coll_name from R_COLL_MAIN t0 inner join R_OBJT_ACCESS pcoa on t0.coll_id = pcoa.object_id inner join R_TOKN_MAIN pct on pcoa.access_type_id = pct.token_id inner join R_USER_MAIN pcu on pcoa.user_id = pcu.user_id where t0.coll_name = ? or t0.coll_name = ? and pcoa.access_type_id >= 1000 fetch first 256 rows only") + + def test_select_and(self): + query = "SELECT COLL_NAME WHERE COLL_NAME LIKE '{}' AND COLL_NAME LIKE '{}'".format( + "%test_query2_coll%", "%query2_coll_a%") + q = self.sess.genquery2_object() + query_result = q.execute(query) + query_sql = q.get_sql(query) + self.assertIn([self.coll_path_a], query_result) + self.assertEqual(len(query_result), 1) + self.assertEqual(query_sql, "select distinct t0.coll_name from R_COLL_MAIN t0 inner join R_OBJT_ACCESS pcoa on t0.coll_id = pcoa.object_id inner join R_TOKN_MAIN pct on pcoa.access_type_id = pct.token_id inner join R_USER_MAIN pcu on pcoa.user_id = pcu.user_id where t0.coll_name like ? and t0.coll_name like ? and pcoa.access_type_id >= 1000 fetch first 256 rows only") + + def test_column_mappings(self): + q = self.sess.genquery2_object() + result = q.get_column_mappings() + self.assertIn("COLL_ID", result.keys()) + self.assertIn("DATA_ID", result.keys()) + self.assertIn("RESC_ID", result.keys()) + self.assertIn("USER_ID", result.keys())