glencoesoftware · DavidStirling · Aug 26, 2024 · Jul 30, 2024 · Jul 31, 2024 · Aug 2, 2024
diff --git a/README.md b/README.md
@@ -82,7 +82,7 @@ Otherwise a CLI interface will be provided.
 
 This behaviour can be disabled by supplying `interactive=False` to the connect call.
 
-### Reading data
+## Reading data
 
 Several utility methods are provided for working with OMERO.tables. These all support the full range of connection modes.
 
@@ -113,30 +113,72 @@ my_dataframe.head()
 
 Returned dataframes also come with a pandas index column, representing the original row numbers from the OMERO.table.
 
-### Writing data
+## Writing data
 
 Pandas dataframes can also be written back as new OMERO.tables.
 N.b. It is currently not possible to modify a table on the server.
 
 Connection handling works just as it does with downloading, you can 
 provide credentials, a token or a connection object.
 
-To upload data, the user needs to specify which OMERO object the table
-will be associated with. To do this, the third and fourth arguments 
-should be the object ID and object type. Supported objects are Dataset, 
+To upload data, the user needs to specify which OMERO object(s) the table
+will be associated with. This can be achieved with the `parent_id` and 
+`parent_type` arguments. Supported objects are Dataset, 
 Well, Plate, Project, Screen and Image.
 
 ```python
 import pandas
 import omero2pandas
 my_data = pandas.read_csv("/path/to/my_data.csv")
-ann_id = omero2pandas.upload_table(my_data, "Name for table", 142, "Image")
-# Returns the annotation ID of the uploaded file object
+ann_id = omero2pandas.upload_table(my_data, "Name for table", 
+                                   parent_id=142, parent_type="Image")
+# Returns the annotation ID of the uploaded FileAnnotation object
 ```
 
 Once uploaded, the table will be accessible on OMERO.web under the file 
 annotations panel of the parent object. Using unique table names is advised.
 
+### Linking to multiple objects
+
+To link to multiple objects, you can supply a list of `(<type>, <id>)`
+tuples to the `links` parameter. The resulting table's FileAnnotation 
+will be linked to all objects in the `links` parameter (plus 
+`parent_type`:`parent_id` if provided).
+
+
+```python
+import omero2pandas
+ann_id = omero2pandas.upload_table(
+    "/path/to/my.csv", "My table", 
+    links=[("Image", 101), ("Dataset", 2), ("Roi", 1923)])
+# Uploads with Annotation links to Image 101, Dataset 2 and ROI 1923 
+```
+
+Links allow OMERO.web to display the resulting table as 
+an annotation associated with those objects.
+
+
+### Large Tables
+The first argument to `upload_table` can be a pandas dataframe or a path to a 
+.csv file containing the table data. In the latter case the table will be read 
+in chunks corresponding to the `chunk_size` argument. This will allow you to 
+upload tables which are too large to load into system memory.
+
+```python
+import omero2pandas
+ann_id = omero2pandas.upload_table("/path/to/my.csv", "My table", 
+                                   142, chunk_size=100)
+# Reads and uploads the file to Image 142, loading 100 lines at a time 
+```
+
+The `chunk_size` argument sets how many rows to send with each call to the server. 
+If not specified, omero2pandas will attempt to automatically optimise chunk 
+size to send ~2 million table cells per call (up to a max of 50,000 
+rows per message for narrow tables).
+
+
+
+
 # Advanced Usage
 
 This package also contains utility functions for managing an OMERO connection.

diff --git a/omero2pandas/__init__.py b/omero2pandas/__init__.py
@@ -10,6 +10,7 @@
 import logging
 import os
 import sys
+from typing import Iterable
 
 import pandas
 import omero
@@ -182,20 +183,23 @@ def read_table(file_id=None, annotation_id=None, column_names=(), rows=None,
     return df
 
 
-def upload_table(dataframe, table_name, parent_id, parent_type='Image',
-                 chunk_size=1000, omero_connector=None, server=None,
-                 port=4064, username=None, password=None):
+def upload_table(source, table_name, parent_id=None, parent_type='Image',
+                 links=None, chunk_size=None, omero_connector=None,
+                 server=None, port=4064, username=None, password=None):
     """
     Upload a pandas dataframe to a new OMERO table.
     For the connection, supply either an active client object or server
     credentials (not both!). If neither are provided the program will search
     for an OMERO user token on the system.
-    :param dataframe: Pandas dataframe to upload to OMERO
+    :param source: Pandas dataframe or CSV file path to upload to OMERO
     :param table_name: Name for the table on OMERO
     :param parent_id: Object ID to attach the table to as an annotation.
     :param parent_type: Object type to attach to.
-    One of: Image, Dataset, Plate, Well
-    :param chunk_size: Rows to transmit to the server in a single operation
+    One of: Image, Dataset, Project, Well, Plate, Screen, Roi
+    :param links: List of (Type, ID) tuples specifying objects to
+    link the table to.
+    :param chunk_size: Rows to transmit to the server in a single operation.
+    Default: Automatically choose a size
     :param omero_connector: OMERO.client object which is already connected
     to a server. Supersedes any other connection details.
     :param server: Address of the server
@@ -204,12 +208,28 @@ def upload_table(dataframe, table_name, parent_id, parent_type='Image',
     :param password: Password for server login
     :return: File Annotation ID of the new table
     """
+    # Coerce inputs to the links list input format
+    links = links or []
+    if (len(links) == 2 and
+            isinstance(links[0], str) and isinstance(links[1], int)):
+        # Someone forgot to nest their tuples, let's fix that
+        links = [links]
+    elif isinstance(links, tuple):
+        # Make sure it's mutable
+        links = list(links)
+    if parent_id is not None:
+        if (parent_type, parent_id) not in links:
+            links.append((parent_type, parent_id))
+    if not links:
+        raise ValueError("No OMERO objects to link the table to")
+    elif not isinstance(links, Iterable):
+        raise ValueError(f"Links should be an iterable list of "
+                         f"type/id pairs, not {type(links)}")
     with OMEROConnection(server=server, username=username, password=password,
                          port=port, client=omero_connector) as connector:
         conn = connector.get_gateway()
         conn.SERVICE_OPTS.setOmeroGroup('-1')
-        ann_id = create_table(dataframe, table_name, parent_id, parent_type,
-                              conn, chunk_size)
+        ann_id = create_table(source, table_name, links, conn, chunk_size)
         if ann_id is None:
             LOGGER.warning("Failed to create OMERO table")
         return ann_id
@@ -349,7 +369,7 @@ def _get_table(conn, object_type, object_id):
 
     # Load the table
     resources = conn.c.sf.sharedResources()
-    data_table = resources.openTable(orig_file, _ctx=conn.SERVICE_OPTS)
+    data_table = resources.openTable(orig_file, conn.SERVICE_OPTS)
     conn.SERVICE_OPTS.setOmeroGroup(orig_group)
     return data_table