Skip to content

Commit

Permalink
Merge changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Pushkar-Bhuse committed Aug 15, 2022
1 parent 1a4e3f2 commit 4dfcff7
Show file tree
Hide file tree
Showing 11 changed files with 777 additions and 258 deletions.
10 changes: 9 additions & 1 deletion forte/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,15 @@

# Name of the key to access the attribute dict of an entry type from
# ``_type_attributes`` of ``DataStore``.
TYPE_ATTR_KEY = "attributes"
ATTR_INFO_KEY = "attributes"

# Name of the key to access the type of an attribute from
# ``_type_attributes`` of ``DataStore``.
ATTR_TYPE_KEY = "type"

# Name of the key to access the index of an attribute from
# ``_type_attributes`` of ``DataStore``.
ATTR_INDEX_KEY = "index"

# Name of the key to access a set of parent names of an entry type from
# ``_type_attributes`` of ``DataStore``.
Expand Down
92 changes: 65 additions & 27 deletions forte/data/base_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
Iterable,
)
from functools import partial
from typing_inspect import get_origin
from inspect import isclass
from typing_inspect import is_forward_ref
from packaging.version import Version
import jsonpickle

Expand All @@ -47,6 +48,7 @@
LinkType,
FList,
FDict,
ENTRY_TYPE_DATA_STRUCTURES,
)
from forte.version import (
PACK_VERSION,
Expand Down Expand Up @@ -482,32 +484,71 @@ def on_entry_creation(
# Use the auto-inferred control component.
c = self.__control_component

def entry_getter(cls: Entry, attr_name: str, field_type):
"""A getter function for `dataclass` fields of entry object.
When the field contains ``tid``s, we will convert them to entry
object on the fly.
def entry_getter(cls: Entry, attr_name: str):
"""A getter function for dataclass fields of entry object.
Depending on the value stored in the data store and the type
of the attribute, the method decides how to process the value.
- Attributes repersented as ``FList`` and ``FDict`` objects are stored
as list and dictionary respectively in the dtaa store entry. These
values are converted to ``FList`` and ``FDict`` objects on the fly.
- When the field contains ``tid``s, we will convert them to entry
object on the fly. This is done by checking the type
information of the attribute in the entry object. If the
attribute is of type ``Entry`` or a ``ForwardRef``, we can
assume that that value stored in the data store entry represents
the entry's ``tid``.
- When values are stored as a tuple, we assume the value represents
a `subentry` stored in a `MultiPack`.
- In all other cases, the values are returned in the forms that they
are stored in the data store entry.
Args:
cls: An ``Entry`` class object.
attr_name: The name of the attribute.
field_type: The type of the attribute.
Returns:
The value of the required attribute in the form specified
by the corresponding ``Entry`` class object.
"""

data_store_ref = (
cls.pack._data_store # pylint: disable=protected-access
)
attr_val = data_store_ref.get_attribute(
tid=cls.tid, attr_name=attr_name
)
if field_type in (FList, FDict):
attr_type = data_store_ref.get_attr_type(
cls.entry_type(), attr_name
)

if attr_type[0] in ENTRY_TYPE_DATA_STRUCTURES:
# Generate FList/FDict object on the fly
return field_type(parent_entry=cls, data=attr_val)
return attr_type[0](parent_entry=cls, data=attr_val)
try:
# TODO: Find a better solution to determine if a field is Entry
# Will be addressed by https://github.com/asyml/forte/issues/835
# Convert tid to entry object on the fly
if isinstance(attr_val, int):
# Single pack entry
# Check dataclass attribute value type
# If the attribute was an Entry object, only its tid
# is stored in the DataStore and hence its needs to be converted.

# Entry objects are stored in data stores by their tid (which is
# of type int). Thus, if we enounter an int value, we check the
# type information which is stored as a tuple. if any entry in this
# tuple is a subclass of Entry or is a ForwardRef to another entry,
# we can infer that this int value represents the tid of an Entry
# object and thus must be converted to an object using get_entry
# before returning.
if (
isinstance(attr_val, int)
and attr_type[1]
and any(
issubclass(entry, Entry)
if isclass(entry)
else is_forward_ref(entry)
for entry in list(attr_type[1])
)
):
return cls.pack.get_entry(tid=attr_val)

# The condition below is to check whether the attribute's value
# is a pair of integers - `(pack_id, tid)`. If so we may have
# encountered a `tid` that can only be resolved by
Expand All @@ -524,8 +565,8 @@ def entry_getter(cls: Entry, attr_name: str, field_type):
pass
return attr_val

def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
"""A setter function for `dataclass` fields of entry object.
def entry_setter(cls: Entry, value: Any, attr_name: str):
"""A setter function for dataclass fields of entry object.
When the value contains entry objects, we will convert them into
``tid``s before storing to ``DataStore``. Additionally, if the entry
setter method is called on an attribute that does not have a pack
Expand All @@ -538,7 +579,6 @@ def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
cls: An ``Entry`` class object.
value: The value to be assigned to the attribute.
attr_name: The name of the attribute.
field_type: The type of the attribute.
"""
attr_value: Any

Expand All @@ -558,18 +598,22 @@ def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
data_store_ref = (
pack._data_store # pylint: disable=protected-access
)

attr_type = data_store_ref.get_attr_type(
cls.entry_type(), attr_name
)
# Assumption: Users will not assign value to a FList/FDict field.
# Only internal methods can set the FList/FDict field, and value's
# type has to be Iterator[Entry]/Dict[Any, Entry].
if field_type is FList:
if attr_type[0] is FList:
try:
attr_value = [entry.tid for entry in value]
except AttributeError as e:
raise ValueError(
"You are trying to assign value to a `FList` field, "
"which can only accept an iterator of `Entry` objects."
) from e
elif field_type is FDict:
elif attr_type[0] is FDict:
try:
attr_value = {
key: entry.tid for key, entry in value.items()
Expand Down Expand Up @@ -612,11 +656,9 @@ def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
self._save_entry_to_data_store(entry=entry)

# Register property functions for all dataclass fields.
for name, field in entry.__dataclass_fields__.items():

for name in entry.__dataclass_fields__:
# Convert the typing annotation to the original class.
# This will be used to determine if a field is FList/FDict.
field_type = get_origin(field.type)
setattr(
type(entry),
name,
Expand All @@ -625,12 +667,8 @@ def entry_setter(cls: Entry, value: Any, attr_name: str, field_type):
property(
# We need to bound the attribute name and field type here
# for the getter and setter of each field.
fget=partial(
entry_getter, attr_name=name, field_type=field_type
),
fset=partial(
entry_setter, attr_name=name, field_type=field_type
),
fget=partial(entry_getter, attr_name=name),
fset=partial(entry_setter, attr_name=name),
),
)

Expand Down
Loading

0 comments on commit 4dfcff7

Please sign in to comment.