diff --git a/cassis/json.py b/cassis/json.py index e832518..befcfda 100644 --- a/cassis/json.py +++ b/cassis/json.py @@ -74,8 +74,10 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = self._max_sofa_num = 0 self._post_processors = [] - embedded_typesystem = TypeSystem() json_typesystem = data.get(TYPES_FIELD) + embedded_typesystem = TypeSystem( + add_document_annotation_type=not (json_typesystem.get(FLAG_DOCUMENT_ANNOTATION)) + ) # First, build a dependency graph to support cases where a child type is defined before its super type type_dependencies = defaultdict(set) @@ -84,7 +86,7 @@ def deserialize(self, source: Union[IO, str], typesystem: Optional[TypeSystem] = # Second, load all the types but no features since features of a type X might be of a later loaded type Y for type_name in toposort_flatten(type_dependencies): - if is_predefined(type_name): + if is_predefined(type_name) or embedded_typesystem.contains_type(type_name): continue self._parse_type(embedded_typesystem, type_name, json_typesystem[type_name]) diff --git a/cassis/typesystem.py b/cassis/typesystem.py index f6b9240..dcf904e 100644 --- a/cassis/typesystem.py +++ b/cassis/typesystem.py @@ -1102,8 +1102,8 @@ def _defines_predefined_type(self, type_name): self._predefined_types.add(type_name) def _add_document_annotation_type(self): - t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName="uima.tcas.Annotation") - self.create_feature(t, name="language", rangeType="uima.cas.String") + t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName=TYPE_NAME_ANNOTATION) + self.create_feature(t, name="language", rangeType=TYPE_NAME_STRING) def transitive_closure(self, seed_types: Set[Type], built_in: bool = False) -> Set[Type]: # Build transitive closure of used types by following parents, features, etc. diff --git a/tests/test_files/json/fs_as_array/ser-ref/casExtendingDocumentAnnotation/data.json b/tests/test_files/json/fs_as_array/ser-ref/casExtendingDocumentAnnotation/data.json new file mode 100644 index 0000000..65a99ed --- /dev/null +++ b/tests/test_files/json/fs_as_array/ser-ref/casExtendingDocumentAnnotation/data.json @@ -0,0 +1,54 @@ +{ + "%TYPES" : { + "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" : { + "%NAME" : "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", + "%SUPER_TYPE" : "uima.tcas.DocumentAnnotation", + "documentTitle" : { + "%NAME" : "documentTitle", + "%RANGE" : "uima.cas.String" + }, + "documentId" : { + "%NAME" : "documentId", + "%RANGE" : "uima.cas.String" + }, + "documentUri" : { + "%NAME" : "documentUri", + "%RANGE" : "uima.cas.String" + }, + "collectionId" : { + "%NAME" : "collectionId", + "%RANGE" : "uima.cas.String" + }, + "documentBaseUri" : { + "%NAME" : "documentBaseUri", + "%RANGE" : "uima.cas.String" + }, + "isLastSegment" : { + "%NAME" : "isLastSegment", + "%RANGE" : "uima.cas.Boolean" + } + } + }, + "%FEATURE_STRUCTURES" : [ { + "%ID" : 2, + "%TYPE" : "uima.cas.Sofa", + "sofaNum" : 1, + "sofaID" : "_InitialView", + "mimeType" : "text", + "sofaString" : "This is a test ." + }, { + "%ID" : 1, + "%TYPE" : "de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", + "@sofa" : 2, + "begin" : 0, + "end" : 16, + "documentId" : "doc", + "isLastSegment" : false + } ], + "%VIEWS" : { + "_InitialView" : { + "%SOFA" : 2, + "%MEMBERS" : [ 1 ] + } + } +} diff --git a/tests/test_json.py b/tests/test_json.py index c7ed910..7b3c98d 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -77,6 +77,10 @@ ["uima.tcas.DocumentAnnotation", 0, 6, "這是一個測試"], ], ), + ( + os.path.join(SER_REF_DIR, "casExtendingDocumentAnnotation"), + [["de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData", 0, 16, "This is a test ."]], + ), ]