Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#318 - Address linter issues #319

Merged
merged 1 commit into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cassis/cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from cassis.typesystem import (
FEATURE_BASE_NAME_HEAD,
FEATURE_BASE_NAME_LANGUAGE,
TYPE_NAME_DOCUMENT_ANNOTATION,
TYPE_NAME_FS_ARRAY,
TYPE_NAME_FS_LIST,
TYPE_NAME_SOFA,
Expand All @@ -21,8 +23,6 @@
TypeCheckError,
TypeSystem,
TypeSystemMode,
TYPE_NAME_DOCUMENT_ANNOTATION,
FEATURE_BASE_NAME_LANGUAGE,
)

_validator_optional_string = validators.optional(validators.instance_of(str))
Expand Down
34 changes: 32 additions & 2 deletions cassis/json.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,42 @@
import base64
import json
import math
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from io import TextIOBase, TextIOWrapper
from math import isnan
from typing import Union, IO, Optional, Dict, List
from toposort import toposort_flatten

from cassis.cas import NAME_DEFAULT_SOFA, Cas, IdGenerator, Sofa, View
from cassis.typesystem import *
from cassis.typesystem import (
TYPE_NAME_ANNOTATION,
TypeSystem,
is_predefined,
merge_typesystems,
TYPE_NAME_SOFA,
FEATURE_BASE_NAME_SOFAARRAY,
array_type_name_for_type,
FEATURE_BASE_NAME_SOFASTRING,
FEATURE_BASE_NAME_SOFAID,
FEATURE_BASE_NAME_SOFAMIME,
FEATURE_BASE_NAME_SOFANUM,
FEATURE_BASE_NAME_SOFAURI,
TYPE_NAME_FS_ARRAY,
TYPE_NAME_BYTE_ARRAY,
TYPE_NAME_FLOAT_ARRAY,
TYPE_NAME_DOUBLE_ARRAY,
TypeSystemMode,
TYPE_NAME_DOCUMENT_ANNOTATION,
Type,
Feature,
TYPE_NAME_TOP,
is_primitive_array,
TYPE_NAME_FLOAT,
TYPE_NAME_DOUBLE,
element_type_name_for_array_type,
is_primitive,
is_array,
)

RESERVED_FIELD_PREFIX = "%"
REF_FEATURE_PREFIX = "@"
Expand Down
4 changes: 2 additions & 2 deletions cassis/xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from cassis.typesystem import (
_LIST_TYPES,
_PRIMITIVE_ARRAY_TYPES,
_PRIMITIVE_LIST_TYPES,
FEATURE_BASE_NAME_BEGIN,
FEATURE_BASE_NAME_END,
FEATURE_BASE_NAME_HEAD,
Expand Down Expand Up @@ -295,7 +294,8 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b
else:
view = cas.create_view(sofa.sofaID, xmiID=sofa.xmiID, sofaNum=sofa.sofaNum)

# Directly set the sofaString and offsetConverter for the sofa to avoid recomputing the offset convertion (slow!) when using the setter
# Directly set the sofaString and offsetConverter for the sofa to avoid recomputing the offset convertion
# (slow!) when using the setter
view.get_sofa()._sofaString = sofa.sofaString
view.get_sofa()._offset_converter = sofa._offset_converter
view.sofa_mime = sofa.mimeType
Expand Down
8 changes: 4 additions & 4 deletions tests/test_cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ def test_default_typesystem_is_not_shared():
cas1 = Cas()
cas2 = Cas()

t1 = cas1.typesystem.create_type(name="test.Type")
t2 = cas2.typesystem.create_type(name="test.Type")
cas1.typesystem.create_type(name="test.Type")
cas2.typesystem.create_type(name="test.Type")


def test_default_typesystem_is_not_shared_load_from_xmi(empty_cas_xmi):
# https://github.com/dkpro/dkpro-cassis/issues/67
cas1 = load_cas_from_xmi(empty_cas_xmi)
cas2 = load_cas_from_xmi(empty_cas_xmi)

t1 = cas1.typesystem.create_type(name="test.Type")
t2 = cas2.typesystem.create_type(name="test.Type")
cas1.typesystem.create_type(name="test.Type")
cas2.typesystem.create_type(name="test.Type")


# View
Expand Down
2 changes: 1 addition & 1 deletion tests/test_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from cassis.typesystem import TYPE_NAME_ANNOTATION, TypeSystemMode, TYPE_NAME_DOCUMENT_ANNOTATION
from cassis.typesystem import TYPE_NAME_ANNOTATION, TYPE_NAME_DOCUMENT_ANNOTATION, TypeSystemMode
from tests.fixtures import *
from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
from tests.util import assert_json_equal
Expand Down
11 changes: 5 additions & 6 deletions tests/test_typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
TYPE_NAME_ANNOTATION_BASE,
TYPE_NAME_ARRAY_BASE,
TYPE_NAME_BOOLEAN,
TYPE_NAME_DOCUMENT_ANNOTATION,
TYPE_NAME_INTEGER,
TYPE_NAME_SOFA,
TYPE_NAME_STRING,
TYPE_NAME_STRING_ARRAY,
TYPE_NAME_TOP,
TypeCheckError,
is_predefined,
TYPE_NAME_DOCUMENT_ANNOTATION,
)
from tests.fixtures import *
from tests.util import assert_xml_equal
Expand Down Expand Up @@ -315,7 +315,6 @@ def test_type_inherits_from_annotation():
],
)
def test_is_predefined(type_name: str, expected: bool):

assert is_predefined(type_name) == expected


Expand Down Expand Up @@ -498,7 +497,7 @@ def test_is_primitive_collection(type_name: str, expected: bool):
("uima.cas.DoubleArray", True),
],
)
def test_is_primitive_collection(type_name: str, expected: bool):
def test_is_primitive_array(type_name: str, expected: bool):
typesystem = TypeSystem()

assert typesystem.is_primitive_array(type_name) == expected
Expand Down Expand Up @@ -883,9 +882,9 @@ def test_that_merging_types_creates_self_contained_type_system():
def test_that_dkpro_core_typeystem_can_be_loaded():
ts = load_dkpro_core_typesystem()

POS = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS")
NamedEntity = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity")
CoreferenceLink = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink")
assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS") is not None
assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity") is not None
assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink") is not None


# Type checking
Expand Down
2 changes: 0 additions & 2 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from pathlib import Path

from cassis.typesystem import TYPE_NAME_ANNOTATION, TYPE_NAME_FS_ARRAY
from tests.fixtures import *
from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
Expand Down
33 changes: 17 additions & 16 deletions tests/test_xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_deserializing_from_string(small_typesystem_xml):
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="mySofa" mimeType="text/plain"
sofaString="Joe waited for the train . The train was late ."/>
<cas:View sofa="1" members="8 79 84"/>
</xmi:XMI>
</xmi:XMI>
"""
load_cas_from_xmi(cas_xmi, typesystem=typesystem)

Expand Down Expand Up @@ -102,7 +102,7 @@ def test_views_are_parsed(small_xmi, small_typesystem_xml):
<cas:Sofa xmi:id="2" sofaNum="2" sofaID="sofa2" mimeType="text/plain"
sofaString="The train was late ."/>
<cas:View sofa="2" members="84"/>
</xmi:XMI>
</xmi:XMI>
"""
cas = load_cas_from_xmi(cas_xmi, typesystem=typesystem)

Expand Down Expand Up @@ -302,11 +302,11 @@ def test_offsets_are_recomputed_when_sofa_string_changes(cas_with_smileys_xmi, d

def test_offsets_work_for_empty_sofastring():
xmi = """<?xml version="1.0" encoding="UTF-8"?>
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="0" language="en" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="" />
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="0" language="en" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="" />
<cas:View sofa="1" members="2" />
</xmi:XMI>"""

Expand All @@ -316,14 +316,15 @@ def test_offsets_work_for_empty_sofastring():

def test_that_invalid_offsets_remain_unmapped_on_import():
xmi = """<?xml version="1.0" encoding="UTF-8"?>
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="4" language="en" />
<tcas:Annotation xmi:id="3" sofa="1" begin="100" end="200" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Test" />
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="4" language="en" />
<tcas:Annotation xmi:id="3" sofa="1" begin="100" end="200" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Test" />
<cas:View sofa="1" members="2 3" />
</xmi:XMI>"""
</xmi:XMI>
"""

# assert no exception
with warnings.catch_warnings(record=True) as ws:
Expand Down Expand Up @@ -361,14 +362,14 @@ def test_leniency_type_not_in_typesystem_lenient(cas_with_leniency_xmi, small_ty
typesystem = load_typesystem(small_typesystem_xml)

with pytest.warns(UserWarning):
cas = load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=True)
load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=True)


def test_leniency_type_not_in_typesystem_not_lenient(cas_with_leniency_xmi, small_typesystem_xml):
typesystem = load_typesystem(small_typesystem_xml)

with pytest.raises(TypeNotFoundError):
cas = load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=False)
load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=False)


def test_multiple_references_allowed_true():
Expand Down