Skip to content

Commit

Permalink
Merge pull request #584 from kbase/dev_heatmap_filtering
Browse files Browse the repository at this point in the history
RE2022-275: refactor filtering handler
  • Loading branch information
Tianhao-Gu authored Dec 22, 2023
2 parents e37d29c + e912e22 commit b4f2907
Show file tree
Hide file tree
Showing 3 changed files with 241 additions and 129 deletions.
54 changes: 42 additions & 12 deletions src/common/product_models/heatmap_common_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pydantic import BaseModel, Field

from src.common.product_models.columnar_attribs_common_models import ColumnType as AttribsColumnType
from src.common.product_models import columnar_attribs_common_models as attribs_models
from src.common.product_models.common_models import SubsetProcessStates

# these fields need to match the fields in the models below.
Expand Down Expand Up @@ -55,14 +55,14 @@ class ColumnType(str, Enum):

# Maps a heatmap column type to a genome attributes column type
_HEATMAP_TO_ATTRIBS_MAPPING = {
ColumnType.FLOAT: AttribsColumnType.FLOAT,
ColumnType.INT: AttribsColumnType.INT,
ColumnType.COUNT: AttribsColumnType.INT,
ColumnType.BOOL: AttribsColumnType.BOOL
ColumnType.FLOAT: attribs_models.ColumnType.FLOAT,
ColumnType.INT: attribs_models.ColumnType.INT,
ColumnType.COUNT: attribs_models.ColumnType.INT,
ColumnType.BOOL: attribs_models.ColumnType.BOOL
}


def trans_column_type_heatmap_to_attribs(col_type: ColumnType) -> AttribsColumnType:
def trans_column_type_heatmap_to_attribs(col_type: ColumnType) -> attribs_models.ColumnType:
"""
Translate a heatmap column type to an attributes column type.
This method is suitable for models like filters that exclusively operate on attributes column types
Expand Down Expand Up @@ -99,6 +99,20 @@ class ColumnInformation(BaseModel):
)


def transfer_col_heatmap_to_attribs(col: ColumnInformation) -> attribs_models.AttributesColumn:
"""
Convert a heatmap column to an attributes column.
Matching heatmap `col_id` field to the `key` field in the attribs column
Matching heatmap `type` field to the `type` field in the attribs column
Leaving the remaining fields, i.e. filter_strategy, min/max_value, etc. as None in the resulting attribs column
"""
return attribs_models.AttributesColumn(
key=col.col_id,
type=trans_column_type_heatmap_to_attribs(col.type),
)


class ColumnCategory(BaseModel):
"""
A set of columns grouped into a disjoint, non-hierarchical category.
Expand Down Expand Up @@ -225,6 +239,26 @@ class CellDetail(BaseModel):
values: list[CellDetailEntry]


def form_heatmap_cell_val_key(col_id: str) -> str:
"""
Form a key for a heatmap cell value from a column ID.
key format: <HEATMAP_COL_PREFIX>_<col_id>_<FIELD_HEATMAP_CELL_VALUEl>
col_id: the column ID
"""
return f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_VALUE}"


def form_heatmap_cell_id_key(col_id: str) -> str:
"""
Form a key for a heatmap cell ID from a column ID.
key format: <HEATMAP_COL_PREFIX>_<col_id>_<FIELD_HEATMAP_CELL_ID>
col_id: the column ID
"""
return f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_ID}"


def transform_heatmap_row_cells(data: dict[str, Any]):
"""
Transform, in place, the cells structure in a heatmap row to a new structure.
Expand Down Expand Up @@ -260,13 +294,9 @@ def transform_heatmap_row_cells(data: dict[str, Any]):
for cell in data.pop(FIELD_HEATMAP_ROW_CELLS):
col_id = cell[FIELD_HEATMAP_COL_ID]

# Construct keys and values for the new structure
cell_id_key = f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_ID}"
cell_val_key = f"{HEATMAP_COL_PREFIX}{HEATMAP_COL_SEPARATOR}{col_id}{HEATMAP_COL_SEPARATOR}{FIELD_HEATMAP_CELL_VALUE}"

# Add the new keys and values to the data structure
data[cell_id_key] = cell[FIELD_HEATMAP_CELL_ID]
data[cell_val_key] = cell[FIELD_HEATMAP_CELL_VALUE]
data[form_heatmap_cell_id_key(col_id)] = cell[FIELD_HEATMAP_CELL_ID]
data[form_heatmap_cell_val_key(col_id)] = cell[FIELD_HEATMAP_CELL_VALUE]


def revert_transformed_heatmap_row_cells(data: dict[str, Any]):
Expand Down
131 changes: 14 additions & 117 deletions src/service/data_products/genome_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
SELECTION_ID_PREFIX,
)
from src.service.data_products.table_models import TableAttributes
from src.service.filtering import analyzers
from src.service.filtering.filters import FilterSet
from src.service.filtering.filtering_processing import get_filters
from src.service.http_bearer import KBaseHTTPBearer
from src.service.processing import SubsetSpecification
from src.service.routes_common import PATH_VALIDATOR_COLLECTION_ID
Expand Down Expand Up @@ -275,118 +274,6 @@ def _remove_keys(doc):
return doc


##########################
# Filter handling code
##########################

# might want to move some of this into a shared file at some point, might be reusable for other
# DPs
# Not sure about the meta call, may need to abstract that out somehow

_FILTER_PREFIX = "filter_"


def _get_filter_map(r: Request) -> dict[str, str]:
filter_query = {}
for q in r.query_params.keys():
if q.startswith(_FILTER_PREFIX):
field = q[len(_FILTER_PREFIX):]
if len(r.query_params.getlist(q)) > 1:
raise errors.IllegalParameterError(
f"More than one filter specification provided for field {field}")
filter_query[field] = r.query_params[q]
return filter_query


async def _get_filters(
r: Request,
arango_coll: str,
coll_id: str,
load_ver: str,
load_ver_override: bool,
view_name: str = None,
count: bool = False,
sort_on: str = None,
sort_desc: bool = False,
filter_conjunction: bool = True,
match_spec: SubsetSpecification = None,
selection_spec: SubsetSpecification = None,
keep: dict[str, set[col_models.ColumnType]] = None,
keep_filter_nulls: bool = False,
skip: int = 0,
limit: int = 1000,
) -> FilterSet:
filter_query = _get_filter_map(r)
appstate = app_state.get_app_state(r)
column_meta = await _get_genome_attributes_meta_internal(
appstate.arangostorage, coll_id, load_ver, load_ver_override)
if filter_query and not view_name:
if load_ver_override:
# If we need this feature than the admin needs to supply the view name to use
# via the API
raise ValueError("Filtering is not supported with a load version override.")
raise ValueError(f"No search view name configured for collection {coll_id}, "
+ f"data product {ID}. Cannot perform filtering operation")
columns = {c.key: c for c in column_meta.columns}
if sort_on and sort_on not in columns:
raise errors.IllegalParameterError(
f"No such field for collection {coll_id} load version {load_ver}: {sort_on}")
if keep:
for col in keep:
if col not in columns:
raise errors.IllegalParameterError(
f"No such field for collection {coll_id} load version {load_ver}: {col}")
if keep[col] and columns[col].type not in keep[col]:
raise errors.IllegalParameterError(
f"Column {col} is type '{columns[col].type}', which is not one of the "
+ f"acceptable types for this operation: {[t.value for t in keep[col]]}")
fs = FilterSet(
coll_id,
load_ver,
collection=arango_coll,
view=view_name,
count=count,
sort_on=sort_on,
sort_descending=sort_desc,
conjunction=filter_conjunction,
match_spec=match_spec,
selection_spec=selection_spec,
keep=list(keep.keys()) if keep else None,
keep_filter_nulls=keep_filter_nulls,
skip=skip,
limit=limit
)
return _append_filters(fs, filter_query, columns)


def _append_filters(
fs: FilterSet,
filter_query: dict[str, str],
columns: dict[str, col_models.AttributesColumn]
) -> FilterSet:
for field, querystring in filter_query.items():
if field not in columns:
raise errors.IllegalParameterError(f"No such filter field: {field}")
column = columns[field]
minlen = analyzers.get_minimum_query_length(column.filter_strategy)
if minlen and len(querystring) < minlen:
raise errors.IllegalParameterError(
f"Filter field '{field}' requires a minimum query length of {minlen}")
fs.append(
field,
column.type,
querystring,
analyzers.get_analyzer(column.filter_strategy),
column.filter_strategy
)
return fs


##########################
# End filter handling code
##########################


_FLD_COL_ID = "colid"
_FLD_COL_NAME = "colname"
_FLD_COL_LV = "colload"
Expand Down Expand Up @@ -477,12 +364,15 @@ async def get_genome_attributes(
coll, load_ver = await get_load_version(appstate.arangostorage, collection_id, ID, lvo, user)
match_spec = await _get_match_spec(appstate, user, coll, match_id, match_mark)
sel_spec = await _get_selection_spec(appstate, coll, selection_id, selection_mark)
filters = await _get_filters(
filters = await get_filters(
r,
names.COLL_GENOME_ATTRIBS,
collection_id,
load_ver,
load_ver_override,
ID,
(await _get_genome_attributes_meta_internal(
appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns,
view_name=coll.get_data_product(ID).search_view if coll else None,
count=count,
sort_on=sort_on,
Expand Down Expand Up @@ -552,12 +442,16 @@ async def get_histogram(
coll, load_ver = await get_load_version(appstate.arangostorage, collection_id, ID, lvo, user)
match_spec = await _get_match_spec(appstate, user, coll, match_id)
sel_spec = await _get_selection_spec(appstate, coll, selection_id)
filters = await _get_filters(

filters = await get_filters(
r,
names.COLL_GENOME_ATTRIBS,
collection_id,
load_ver,
load_ver_override,
ID,
(await _get_genome_attributes_meta_internal(
appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns,
view_name=coll.get_data_product(ID).search_view if coll else None,
filter_conjunction=conjunction,
match_spec=match_spec,
Expand Down Expand Up @@ -632,12 +526,15 @@ async def get_xy_scatter(
coll, load_ver = await get_load_version(appstate.arangostorage, collection_id, ID, lvo, user)
match_spec = await _get_match_spec(appstate, user, coll, match_id)
sel_spec = await _get_selection_spec(appstate, coll, selection_id)
filters = await _get_filters(
filters = await get_filters(
r,
names.COLL_GENOME_ATTRIBS,
collection_id,
load_ver,
load_ver_override,
ID,
(await _get_genome_attributes_meta_internal(
appstate.arangostorage, collection_id, load_ver, load_ver_override)).columns,
view_name=coll.get_data_product(ID).search_view if coll else None,
filter_conjunction=conjunction,
match_spec=match_spec,
Expand Down
Loading

0 comments on commit b4f2907

Please sign in to comment.