Skip to content

Commit

Permalink
Merge pull request #282 from dkpro/bugfix/280-JSON-CAS-parsing-does-n…
Browse files Browse the repository at this point in the history
…ot-handle-DocumentAnnotation-properly

#280 - JSON CAS parsing does not handle DocumentAnnotation properly
  • Loading branch information
reckart authored Mar 21, 2023
2 parents 97f4fe0 + 4721cf5 commit 3b4a75b
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 4 deletions.
6 changes: 4 additions & 2 deletions cassis/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] =
self._max_sofa_num = 0
self._post_processors = []

embedded_typesystem = TypeSystem()
json_typesystem = data.get(TYPES_FIELD)
embedded_typesystem = TypeSystem(
add_document_annotation_type=not (json_typesystem.get(FLAG_DOCUMENT_ANNOTATION))
)

# First, build a dependency graph to support cases where a child type is defined before its super type
type_dependencies = defaultdict(set)
Expand All @@ -84,7 +86,7 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] =

# Second, load all the types but no features since features of a type X might be of a later loaded type Y
for type_name in toposort_flatten(type_dependencies):
if is_predefined(type_name):
if is_predefined(type_name) or embedded_typesystem.contains_type(type_name):
continue

self._parse_type(embedded_typesystem, type_name, json_typesystem[type_name])
Expand Down
4 changes: 2 additions & 2 deletions cassis/typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1102,8 +1102,8 @@ def _defines_predefined_type(self, type_name):
self._predefined_types.add(type_name)

def _add_document_annotation_type(self):
t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName="uima.tcas.Annotation")
self.create_feature(t, name="language", rangeType="uima.cas.String")
t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName=TYPE_NAME_ANNOTATION)
self.create_feature(t, name="language", rangeType=TYPE_NAME_STRING)

def transitive_closure(self, seed_types: Set[Type], built_in: bool = False) -> Set[Type]:
# Build transitive closure of used types by following parents, features, etc.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"%TYPES" : {
"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" : {
"%NAME" : "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData",
"%SUPER_TYPE" : "uima.tcas.DocumentAnnotation",
"documentTitle" : {
"%NAME" : "documentTitle",
"%RANGE" : "uima.cas.String"
},
"documentId" : {
"%NAME" : "documentId",
"%RANGE" : "uima.cas.String"
},
"documentUri" : {
"%NAME" : "documentUri",
"%RANGE" : "uima.cas.String"
},
"collectionId" : {
"%NAME" : "collectionId",
"%RANGE" : "uima.cas.String"
},
"documentBaseUri" : {
"%NAME" : "documentBaseUri",
"%RANGE" : "uima.cas.String"
},
"isLastSegment" : {
"%NAME" : "isLastSegment",
"%RANGE" : "uima.cas.Boolean"
}
}
},
"%FEATURE_STRUCTURES" : [ {
"%ID" : 2,
"%TYPE" : "uima.cas.Sofa",
"sofaNum" : 1,
"sofaID" : "_InitialView",
"mimeType" : "text",
"sofaString" : "This is a test ."
}, {
"%ID" : 1,
"%TYPE" : "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData",
"@sofa" : 2,
"begin" : 0,
"end" : 16,
"documentId" : "doc",
"isLastSegment" : false
} ],
"%VIEWS" : {
"_InitialView" : {
"%SOFA" : 2,
"%MEMBERS" : [ 1 ]
}
}
}
4 changes: 4 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
["uima.tcas.DocumentAnnotation", 0, 6, "這是一個測試"],
],
),
(
os.path.join(SER_REF_DIR, "casExtendingDocumentAnnotation"),
[["de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", 0, 16, "This is a test ."]],
),
]


Expand Down

0 comments on commit 3b4a75b

Please sign in to comment.