molgenis · YpeZ · Dec 11, 2024 · Dec 17, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/docs/molgenis/use_usingpyclient.md b/docs/molgenis/use_usingpyclient.md
@@ -124,50 +124,54 @@ Raises the `TokenSigninException` when the client is already signed in with a us
 ```python
 def get(self, 
         table: str, 
+        columns: list[str] = None,
         query_filter: str = None, 
         schema: str = None, 
         as_df: bool = False) -> list | pandas.DataFrame:
     ...
 ```
 Retrieves data from a table on a schema and returns the result either as a list of dictionaries or as a pandas DataFrame.
+Use the `columns` parameter to specify which columns to retrieve. Note that in case `as_df=True` the column _names_ should be supplied, otherwise the column _ids_.
 Use the `query_filter` parameter to filter the results based on filters applied to the columns.
 This query requires a special syntax. 
-Columns can be filtered on equality `==`, inequality `!=`, greater `>` and smaller `<` than.
+Values in columns can be filtered on equality `==`, inequality `!=`, greater `>` and smaller `<` than.
 Values within an interval can also be filtered by using the operand `between`, followed by list of the upper bound and lower bound.
 The values of reference and ontology columns can also be filtered by joining the column id of the table with the column id of the reference/ontology table by a dot, as in the example `countries.name`, where `countries` is a column in the table `My table` and `name` is the column id of the referenced table specifying the names of countries. 
 It is possible to add filters on multiple columns by separating the filter statements with _' and '_.
 It is recommended to supply the filters that are compared as variables passed in an f-string.
 
 Throws the `NoSuchSchemaException` if the user does not have at least _viewer_ permissions or if the schema does not exist.
-Throws the `NoSuchColumnException` if the query filter contains a column id that is not present in the table.
+Throws the `NoSuchColumnException` if the `columns` argument or query filter contains a column that is not present in the table.
 
 
 | parameter      | type | description                                                                    | required | default |
 |----------------|------|--------------------------------------------------------------------------------|----------|---------|
 | `table`        | str  | the name of a table                                                            | True     | None    |
+| `columns`      | list | a list of column names or ids to filter on                                     | False    | None    |
 | `schema`       | str  | the name of a schema                                                           | False    | None    |
 | `query_filter` | str  | a string to filter the results on                                              | False    | None    |
 | `as_df`        | bool | if true: returns data as pandas DataFrame <br/> else as a list of dictionaries | False    | False   |
 
 ##### examples
+
 ```python
-# Get all entries for the table 'Collections' on the schema 'MySchema'
-table_data = client.get(table='Collections', schema='MySchema')
+# Get all entries for the table 'Resources' on the schema 'MySchema'
+table_data = client.get(table='Resources', schema='MySchema', columns=['name', 'collectionEvents'])
 
 # Set the default schema to 'MySchema'
 client.set_schema('MySchema')
 # Get the same entries and return them as pandas DataFrame
-table_data = client.get(table='Collections', as_df=True)
+table_data = client.get(table='Resources', columns=['name', 'collection events'], as_df=True)
 
 # Get the entries where the value of a particular column 'number of participants' is greater than 10000
-table_data = client.get(table='Collections', query_filter='numberOfParticipants > 10000')
+table_data = client.get(table='Resources', query_filter='numberOfParticipants > 10000')
 
-# Get the entries where 'number of participants' is greater than 10000 and the cohort type is a 'Population cohort'
+# Get the entries where 'number of participants' is greater than 10000 and the resource type is a 'Population cohort'
 # Store the information in variables, first
-min_subcohorts = 10000
+min_subpop = 10000
 cohort_type = 'Population cohort'
-table_data = client.get(table='Collections', query_filter=f'numberOfParticipants > {min_subcohorts}'
-                                                          f'and cohortType == {cohort_type}')
+table_data = client.get(table='Resources', query_filter=f'numberOfParticipants > {min_subpop}'
+                                                        f'and cohortType == {cohort_type}')
 ```
 
 ### get_schema_metadata
@@ -212,11 +216,11 @@ Throws the `NoSuchSchemaException` if the user does not have at least _viewer_ p
 ##### examples
 ```python
 
-# Export the table 'Collections' on the schema 'MySchema' from the CSV API to a BytesIO object 
-collections_raw: BytesIO = await client.export(schema='MySchema', table='Collections')  
+# Export the table 'Resources' on the schema 'MySchema' from the CSV API to a BytesIO object 
+resources_raw: BytesIO = await client.export(schema='MySchema', table='Resources')  
 
-# Export 'Collections' from the Excel API to the file 'Collections-export.xlsx' 
-await client.export(schema='MySchema', table='Collections', filename='Collections-export.xlsx')
+# Export 'Resources' from the Excel API to the file 'Resources-export.xlsx' 
+await client.export(schema='MySchema', table='Resources', filename='Resources-export.xlsx')
 ```
 
 
@@ -243,12 +247,12 @@ Throws the `NoSuchSchemaException` if the schema is not found on the server.
 
 ##### examples
 ```python
-# Save an edited table with Collections data from a CSV file to the Collections table
-client.save_schema(table='Collections', file='Collections-edited.csv')
+# Save an edited table with Resources data from a CSV file to the Resources table
+client.save_schema(table='Resources', file='Resources-edited.csv')
 
-# Save an edited table with Collections data from memory to the Collections table
-collections: pandas.DataFrame = ...
-client.save_schema(table='Collections', data=collections)
+# Save an edited table with Resources data from memory to the Resources table
+resources: pandas.DataFrame = ...
+client.save_schema(table='Resources', data=resources)
 ```
 
 ### upload_file
@@ -269,8 +273,8 @@ Throws the `NoSuchSchemaException` if the schema is not found on the server.
 
 ##### examples
 ```python
-# Upload a file containing Collections data to a schema
-await client.upload_file(file_path='data/Collections.csv')
+# Upload a file containing Resources data to a schema
+await client.upload_file(file_path='data/Resources.csv')
 
 # Upload a file containing members information to a schema
 await client.upload_file(file_path='molgenis_members.csv', schema='MySchema')
@@ -306,18 +310,27 @@ Throws the `NoSuchSchemaException` if the schema is not found on the server.
 
 ##### examples
 ```python
-# Delete cohorts from a list of ids
-cohorts = [{'name': 'Cohort 1', 'name': 'Cohort 2'}]
-client.delete_records(schema='MySchema', table='Cohorts', data=cohorts)
+# Delete resources from a list of ids
+resources = [{'name': 'Resource 1', 'name': 'Resource 2'}]
+client.delete_records(schema='MySchema', table='Resources', data=resources)
 
-# Delete cohorts from pandas DataFrame
-cohorts_df = pandas.DataFrame(data=cohorts)
-client.delete_records(schema='MySchema', table='Cohorts', data=cohorts_df)
+# Delete resources from pandas DataFrame
+resources_df = pandas.DataFrame(data=resources)
+client.delete_records(schema='MySchema', table='Resources', data=resources_df)
 
-# Delete cohorts from entries in a CSV file
-client.delete_records(schema='MySchema', table='Cohorts', file='Cohorts-to-delete.csv')
+# Delete resources from entries in a CSV file
+client.delete_records(schema='MySchema', table='Resources', file='Resources-to-delete.csv')
 ```
 
+### truncate
+```python
+client.truncate(table='My table', schema='My Schema')
+```
+Truncates the table and removes all its contents.
+This will fail if entries in the table are referenced from other tables.
+
+Throws the `ReferenceException` if entries in the table are referenced in other tables.
+
 ### create_schema
 ```python
 async def create_schema(self, 

diff --git a/tools/pyclient/README.md b/tools/pyclient/README.md
@@ -13,6 +13,13 @@ pip install molgenis-emx2-pyclient
 Releases of the Molgenis EMX2 Pyclient follow the release number of the accompanying release of the Molgenis EMX2 software.
 Therefore, releases of the Pyclient are less frequent than those of EMX2 and the latest version of the Pyclient may differ from the latest version of Molgenis EMX2.
 
+#### 11.42.3
+- Added: feature 'truncate' to remove all entries from a table
+- Added: option to filter results of `get` method by columns
+- Improved: results returned from `get` with `as_df=False` by implementing the GraphQL API
+- Improved: added additional parsing for data returned from the CSV API to pandas DataFrame in `get` method 
+- Fixed: log level was set to `DEBUG` without possibility to change this. The user can now set the log level again at their preferred level
+
 #### 11.23.0
 Added: an optional `job` argument to the `Client` initialization, allowing the Pyclient to run asynchronous methods within a job in EMX2."
 

diff --git a/tools/pyclient/dev/dev.py b/tools/pyclient/dev/dev.py
@@ -2,7 +2,7 @@
 # FILE: dev.py
 # AUTHOR: David Ruvolo, Ype Zijlstra
 # CREATED: 2023-05-22
-# MODIFIED: 2024-09-11
+# MODIFIED: 2025-01-14
 # PURPOSE: development script for initial testing of the py-client
 # STATUS: ongoing
 # PACKAGES: pandas, python-dotenv
@@ -24,7 +24,8 @@
 
 from tools.pyclient.src.molgenis_emx2_pyclient import Client
 from tools.pyclient.src.molgenis_emx2_pyclient.exceptions import (NoSuchSchemaException, NoSuchTableException,
-                                                                  GraphQLException, PermissionDeniedException)
+                                                                  GraphQLException, PermissionDeniedException,
+                                                                  ReferenceException)
 
 
 async def main():
@@ -39,16 +40,18 @@ async def main():
 
     async with Client('https://emx2.dev.molgenis.org/', schema='catalogue') as client:
 
-        participant_range = [10_000, 20_000.5]
-        big_data = client.get(table='Collection subcohorts',
-                              query_filter=f'`numberOfParticipants` between {participant_range}', as_df=True)
-        print(big_data.head().to_string())
+        participant_range = [10_000, 20_000]
+        subpopulations = client.get(table='Subpopulations',
+                                    query_filter=f'`numberOfParticipants` between {participant_range}',
+                                    columns=['name', 'description', 'numberOfParticipants'],
+                                    as_df=False)
+        print(subpopulations)
 
         excluded_countries = ["Denmark", "France"]
-        collections = client.get(table='Collections',
-                             query_filter=f'subcohorts.countries.name != {excluded_countries}',
+        resources = client.get(table='Resources',
+                             query_filter=f'subpopulations.countries.name != {excluded_countries}',
                              as_df=True)
-        print(collections.head().to_string())
+        print(resources.head().to_string())
 
         var_values = client.get(table='Variable values',
                                 query_filter='label != No and value != 1', as_df=True)
@@ -77,7 +80,6 @@ async def main():
 
         # Export the entire 'pet store' schema to memory in Excel format,
         # print its table names and the contents of the 'Pet' table.
-        # Export the 'Collections' table from schema 'catalogue' to memory and print a sample of its contents
         pet_store_excel = await client.export(schema='pet store', as_excel=True)
 
         pet_store = openpyxl.load_workbook(pet_store_excel, data_only=True)
@@ -86,9 +88,10 @@ async def main():
         pet_sheet = pd.DataFrame((ps := pd.DataFrame(pet_store['Pet'].values)).values[1:], columns=ps.iloc[0].values)
         print(pet_sheet.to_string())
 
-        raw_collections = await client.export(schema='catalogue', table='Collections')
-        collections = pd.read_csv(raw_collections)
-        print(collections.sample(5).to_string())
+        # Export the 'Resources' table from schema 'catalogue' to memory and print a sample of its contents
+        raw_resources = await client.export(schema='catalogue', table='Resources')
+        resources = pd.read_csv(raw_resources)
+        print(resources.sample(5).to_string())
 
     # Connect to server with a default schema specified
     with Client('https://emx2.dev.molgenis.org/', schema='pet store', token=token) as client:
@@ -232,63 +235,69 @@ async def main():
         try:
             schema_create = asyncio.create_task(client.create_schema(name='myNewSchema'))
             print(client.schema_names)
+
+            # Import the pet store data, downloaded earlier
+            await schema_create
+            upload_task = asyncio.create_task(client.upload_file(schema='myNewSchema', file_path='pet store.zip'))
+
+            # Truncate the 'Pet' table
+            await upload_task
+            try:
+                client.truncate(table='Pet', schema='myNewSchema')
+            except ReferenceException:
+                print("Could not truncate table 'Pet', as it is referenced to in another table.")
+
+            try:
+                client.truncate(table='User', schema='myNewSchema')
+            except ReferenceException:
+                print("This cannot happen, as table 'User' is not referenced to by other tables.")
+
         except (GraphQLException, PermissionDeniedException) as e:
             print(e)
 
         # Update the description
         try:
             await schema_create
             client.update_schema(name='myNewSchema', description='I forgot the description')
-            print(client.schema_names)
-            print(client.schemas)
         except (GraphQLException, NoSuchSchemaException) as e:
             print(e)
 
         # Recreate the schema: delete and create
         try:
             await client.recreate_schema(name='myNewSchema')
-            print(client.schema_names)
         except (GraphQLException, NoSuchSchemaException) as e:
             print(e)
 
         # Delete the schema
         try:
             await schema_create
             await asyncio.create_task(client.delete_schema(name='myNewSchema'))
-            print(client.schema_names)
         except (GraphQLException, NoSuchSchemaException) as e:
             print(e)
 
-    print("\n\n")
 
-    # Use the Schema, Table, and Column classes
+    # //////////////////////////////////////////////////////////////////////////////////////////
+    # Examples for using the Schema, Table, and Column classes
+    # Get the metadata for the 'catalogue' schema
     catalogue_schema = Client('https://emx2.dev.molgenis.org/').get_schema_metadata('catalogue')
 
-    # Find the tables inheriting from the 'Collections' table
-    resource_children = catalogue_schema.get_tables(by='inheritName', value='Collections')
-
-    print("Tables in the schema inheriting from the 'Collections' table.")
-    for res_chi in resource_children:
-        print(f"{res_chi!s}\n{res_chi!r}")
-    print("\n")
-
-    # Find the  table
-    collections_meta = catalogue_schema.get_table(by='name', value='Collections')
-    print(collections_meta)
+    # Get the metadata for the Resources table
+    resources_meta = catalogue_schema.get_table(by='name', value='Resources')
+    print(resources_meta)
 
-    # Find the columns in the Collections table referencing the Organisations table
-    orgs_refs = collections_meta.get_columns(by='refTableName', value='Organisations')
-    print(orgs_refs)
+    # Find the columns in the Resources table referencing entries in the Resources table
+    resources_refs = resources_meta.get_columns(by='refTableName', value='Resources')
+    print(resources_refs)
 
-    # Find the columns in the Collections table referencing the Organisations table in a reference array
-    orgs_array_refs = collections_meta.get_columns(by=['columnType', 'refTableName'],
-                                                   value=['REF_ARRAY', 'Collection organisations'])
-    print(orgs_array_refs)
+    # Find the columns in the Resources table referencing the Resources table in a reference array
+    res_arrays_refs = resources_meta.get_columns(by=['columnType', 'refTableName'],
+                                                   value=['REF_ARRAY', 'Resources'])
+    print(res_arrays_refs)
 
     # Print the __str__ and __repr__ representations of these columns
-    print("Columns in the Collections table referencing the Collection organisations table in an array.")
-    for orgs_ref in orgs_array_refs:
-        print(f"{orgs_ref!s}\n{orgs_ref!r}\n")
+    print("Columns in the Resources table referencing the Resources table in an array.")
+    for res_ref in res_arrays_refs:
+        print(f"{res_ref!s}\n{res_ref!r}\n")
 
 if __name__ == '__main__':
     asyncio.run(main())