Merge pull request #584 from kbase/dev_heatmap_filtering

RE2022-275: refactor filtering handler
kbase · Dec 22, 2023 · b4f2907 · b4f2907
2 parents e37d29c + e912e22
commit b4f2907
Show file tree

Hide file tree

Showing 3 changed files with 241 additions and 129 deletions.
diff --git a/src/common/product_models/heatmap_common_models.py b/src/common/product_models/heatmap_common_models.py
@@ -7,7 +7,7 @@
 
 from pydantic import BaseModel, Field
 
-from src.common.product_models.columnar_attribs_common_models import ColumnType as AttribsColumnType
+from src.common.product_models import columnar_attribs_common_models as attribs_models
 from src.common.product_models.common_models import SubsetProcessStates
 
 # these fields need to match the fields in the models below.
@@ -55,14 +55,14 @@ class ColumnType(str, Enum):
 
 # Maps a heatmap column type to a genome attributes column type
 _HEATMAP_TO_ATTRIBS_MAPPING = {
-    ColumnType.FLOAT: AttribsColumnType.FLOAT,
-    ColumnType.INT: AttribsColumnType.INT,
-    ColumnType.COUNT: AttribsColumnType.INT,
-    ColumnType.BOOL: AttribsColumnType.BOOL
+    ColumnType.FLOAT: attribs_models.ColumnType.FLOAT,
+    ColumnType.INT: attribs_models.ColumnType.INT,
+    ColumnType.COUNT: attribs_models.ColumnType.INT,
+    ColumnType.BOOL: attribs_models.ColumnType.BOOL
 }
 
 
-def trans_column_type_heatmap_to_attribs(col_type: ColumnType) -> AttribsColumnType:
+def trans_column_type_heatmap_to_attribs(col_type: ColumnType) -> attribs_models.ColumnType:
     """
     Translate a heatmap column type to an attributes column type.
     This method is suitable for models like filters that exclusively operate on attributes column types
@@ -99,6 +99,20 @@ class ColumnInformation(BaseModel):
     )
 
 
+def transfer_col_heatmap_to_attribs(col: ColumnInformation) -> attribs_models.AttributesColumn:
+    """
+    Convert a heatmap column to an attributes column.
+
+    Matching heatmap `col_id` field to the `key` field in the attribs column
+    Matching heatmap `type` field to the `type` field in the attribs column
+    Leaving the remaining fields, i.e. filter_strategy, min/max_value, etc. as None in the resulting attribs column
+    """
+    return attribs_models.AttributesColumn(
+        key=col.col_id,
+        type=trans_column_type_heatmap_to_attribs(col.type),
+    )
+
+
 class ColumnCategory(BaseModel):
     """
     A set of columns grouped into a disjoint, non-hierarchical category.
@@ -225,6 +239,26 @@ class CellDetail(BaseModel):
     values: list[CellDetailEntry]
 
 
+def form_heatmap_cell_val_key(col_id: str) -> str:
+    """
+    Form a key for a heatmap cell value from a column ID.
+    key format: <HEATMAP_COL_PREFIX>_<col_id>_<FIELD_HEATMAP_CELL_VALUEl>
+
+    col_id: the column ID
+    """
+    return f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_VALUE}"
+
+
+def form_heatmap_cell_id_key(col_id: str) -> str:
+    """
+    Form a key for a heatmap cell ID from a column ID.
+    key format: <HEATMAP_COL_PREFIX>_<col_id>_<FIELD_HEATMAP_CELL_ID>
+
+    col_id: the column ID
+    """
+    return f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_ID}"
+
+
 def transform_heatmap_row_cells(data: dict[str, Any]):
     """
     Transform, in place, the cells structure in a heatmap row to a new structure.
@@ -260,13 +294,9 @@ def transform_heatmap_row_cells(data: dict[str, Any]):
     for cell in data.pop(FIELD_HEATMAP_ROW_CELLS):
         col_id = cell[FIELD_HEATMAP_COL_ID]
 
-        # Construct keys and values for the new structure
-        cell_id_key = f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_ID}"
-        cell_val_key = f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_VALUE}"
-
         # Add the new keys and values to the data structure
-        data[cell_id_key] = cell[FIELD_HEATMAP_CELL_ID]
-        data[cell_val_key] = cell[FIELD_HEATMAP_CELL_VALUE]
+        data[form_heatmap_cell_id_key(col_id)] = cell[FIELD_HEATMAP_CELL_ID]
+        data[form_heatmap_cell_val_key(col_id)] = cell[FIELD_HEATMAP_CELL_VALUE]
 
 
 def revert_transformed_heatmap_row_cells(data: dict[str, Any]):

diff --git a/src/service/data_products/genome_attributes.py b/src/service/data_products/genome_attributes.py
@@ -38,8 +38,7 @@
     SELECTION_ID_PREFIX,
 )
 from src.service.data_products.table_models import TableAttributes
-from src.service.filtering import analyzers
-from src.service.filtering.filters import FilterSet
+from src.service.filtering.filtering_processing import get_filters
 from src.service.http_bearer import KBaseHTTPBearer
 from src.service.processing import SubsetSpecification
 from src.service.routes_common import PATH_VALIDATOR_COLLECTION_ID
@@ -275,118 +274,6 @@ def _remove_keys(doc):
     return doc
 
 
-##########################
-# Filter handling code
-##########################
-
-# might want to move some of this into a shared file at some point, might be reusable for other
-# DPs
-# Not sure about the meta call, may need to abstract that out somehow
-
-_FILTER_PREFIX = "filter_"
-
-
-def _get_filter_map(r: Request) -> dict[str, str]:
-    filter_query = {}
-    for q in r.query_params.keys():
-        if q.startswith(_FILTER_PREFIX):
-            field = q[len(_FILTER_PREFIX):]
-            if len(r.query_params.getlist(q)) > 1:
-                raise errors.IllegalParameterError(
-                    f"More than one filter specification provided for field {field}")
-            filter_query[field] = r.query_params[q]
-    return filter_query
-
-
-async def _get_filters(
-    r: Request,
-    arango_coll: str,
-    coll_id: str,
-    load_ver: str,
-    load_ver_override: bool,
-    view_name: str = None,
-    count: bool = False,
-    sort_on: str = None,
-    sort_desc: bool = False,
-    filter_conjunction: bool = True,
-    match_spec: SubsetSpecification = None,
-    selection_spec: SubsetSpecification = None,
-    keep: dict[str, set[col_models.ColumnType]] = None,
-    keep_filter_nulls: bool = False,
-    skip: int = 0,
-    limit: int = 1000,
-) -> FilterSet:
-    filter_query = _get_filter_map(r)
-    appstate = app_state.get_app_state(r)
-    column_meta = await _get_genome_attributes_meta_internal(
-        appstate.arangostorage, coll_id, load_ver, load_ver_override)
-    if filter_query and not view_name:
-        if load_ver_override:
-            # If we need this feature than the admin needs to supply the view name to use
-            # via the API
-            raise ValueError("Filtering is not supported with a load version override.")
-        raise ValueError(f"No search view name configured for collection {coll_id}, "
-            + f"data product {ID}. Cannot perform filtering operation")
-    columns = {c.key: c for c in column_meta.columns}
-    if sort_on and sort_on not in columns:
-        raise errors.IllegalParameterError(
-            f"No such field for collection {coll_id} load version {load_ver}: {sort_on}")
-    if keep:
-        for col in keep:
-            if col not in columns:
-                raise errors.IllegalParameterError(
-                    f"No such field for collection {coll_id} load version {load_ver}: {col}")
-            if keep[col] and columns[col].type not in keep[col]:
-                raise errors.IllegalParameterError(
-                    f"Column {col} is type '{columns[col].type}', which is not one of the "
-                    + f"acceptable types for this operation: {[t.value for t in keep[col]]}")
-    fs = FilterSet(
-        coll_id,
-        load_ver,
-        collection=arango_coll,
-        view=view_name,
-        count=count,
-        sort_on=sort_on,
-        sort_descending=sort_desc,
-        conjunction=filter_conjunction,
-        match_spec=match_spec,
-        selection_spec=selection_spec,
-        keep=list(keep.keys()) if keep else None,
-        keep_filter_nulls=keep_filter_nulls,
-        skip=skip,
-        limit=limit
-    )
-    return _append_filters(fs, filter_query, columns)
-
-
-def _append_filters(
-    fs: FilterSet,
-    filter_query: dict[str, str],
-    columns: dict[str, col_models.AttributesColumn]
-) -> FilterSet:
-    for field, querystring in filter_query.items():
-        if field not in columns:
-            raise errors.IllegalParameterError(f"No such filter field: {field}")
-        column = columns[field]
-        minlen = analyzers.get_minimum_query_length(column.filter_strategy)
-        if minlen and len(querystring) < minlen:
-            raise errors.IllegalParameterError(
-                f"Filter field '{field}' requires a minimum query length of {minlen}")
-        fs.append(
-            field,
-            column.type,
-            querystring,
-            analyzers.get_analyzer(column.filter_strategy),
-            column.filter_strategy
-        )
-    return fs
-
-
-##########################
-# End filter handling code
-##########################
-
-
 _FLD_COL_ID = "colid"
 _FLD_COL_NAME = "colname"
 _FLD_COL_LV = "colload"
@@ -477,12 +364,15 @@ async def get_genome_attributes(
     coll, load_ver = await get_load_version(appstate.arangostorage, collection_id, ID, lvo, user)
     match_spec = await _get_match_spec(appstate, user, coll, match_id, match_mark)
     sel_spec = await _get_selection_spec(appstate, coll, selection_id, selection_mark)
-    filters = await _get_filters(
+    filters = await get_filters(
         r,
         names.COLL_GENOME_ATTRIBS,
         collection_id,
         load_ver,
         load_ver_override,
+        ID,
+        (await _get_genome_attributes_meta_internal(
+            appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns,
         view_name=coll.get_data_product(ID).search_view if coll else None,
         count=count,
         sort_on=sort_on,
@@ -552,12 +442,16 @@ async def get_histogram(
     coll, load_ver = await get_load_version(appstate.arangostorage, collection_id, ID, lvo, user)
     match_spec = await _get_match_spec(appstate, user, coll, match_id)
     sel_spec = await _get_selection_spec(appstate, coll, selection_id)
-    filters = await _get_filters(
+
+    filters = await get_filters(
         r,
         names.COLL_GENOME_ATTRIBS,
         collection_id,
         load_ver,
         load_ver_override,
+        ID,
+        (await _get_genome_attributes_meta_internal(
+            appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns,
         view_name=coll.get_data_product(ID).search_view if coll else None,
         filter_conjunction=conjunction,
         match_spec=match_spec,
@@ -632,12 +526,15 @@ async def get_xy_scatter(
     coll, load_ver = await get_load_version(appstate.arangostorage, collection_id, ID, lvo, user)
     match_spec = await _get_match_spec(appstate, user, coll, match_id)
     sel_spec = await _get_selection_spec(appstate, coll, selection_id)
-    filters = await _get_filters(
+    filters = await get_filters(
         r,
         names.COLL_GENOME_ATTRIBS,
         collection_id,
         load_ver,
         load_ver_override,
+        ID,
+        (await _get_genome_attributes_meta_internal(
+            appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns,
         view_name=coll.get_data_product(ID).search_view if coll else None,
         filter_conjunction=conjunction,
         match_spec=match_spec,