Merge branch 'develop' into feat/add_label

explosion · Sep 5, 2023 · 7d03f3e · 7d03f3e
2 parents a080510 + 99adb49
commit 7d03f3e
Show file tree

Hide file tree

Showing 13 changed files with 99 additions and 137 deletions.
diff --git a/spacy_llm/tasks/__init__.py b/spacy_llm/tasks/__init__.py
@@ -12,10 +12,10 @@
 from .textcat import TextCatTask, make_textcat_task
 
 _LATEST_TASKS = (
-    "spacy.NER.v2",
+    "spacy.NER.v3",
     "spacy.REL.v1",
     "spacy.Sentiment.v1",
-    "spacy.SpanCat.v2",
+    "spacy.SpanCat.v3",
     "spacy.Summarization.v1",
     "spacy.TextCat.v3",
 )

diff --git a/spacy_llm/tasks/spancat/registry.py b/spacy_llm/tasks/spancat/registry.py
@@ -6,7 +6,6 @@
 from ...util import split_labels
 from ..span import parse_responses as parse_span_responses
 from ..span import parse_responses_cot as parse_span_responses_cot
-from ..span.task import SpanTaskLabelCheck
 from ..span.util import check_label_consistency as check_labels
 from ..span.util import check_label_consistency_cot as check_labels_cot
 from .task import DEFAULT_SPANCAT_TEMPLATE_V1, DEFAULT_SPANCAT_TEMPLATE_V2
@@ -145,7 +144,6 @@ def make_spancat_task_v3(
     case_sensitive_matching: bool = False,
     spans_key: str = "sc",
     scorer: Optional[Scorer] = None,
-    check_label_consistency: Optional[SpanTaskLabelCheck[SpanCatTask]] = None,
 ):
     """SpanCat.v3 task factory for SpanCat with chain-of-thought prompting.
 
@@ -168,7 +166,6 @@ def make_spancat_task_v3(
     case_sensitive_matching (bool): Whether to search without case sensitivity.
     spans_key (str): Key of the `Doc.spans` dict to save under.
     scorer (Optional[Scorer]): Scorer function.
-    check_label_consistency (SpanTaskLabelCheck): Callable to check label consistency.
     """
     labels_list = split_labels(labels)
     raw_examples = examples() if callable(examples) else examples
@@ -191,5 +188,5 @@ def make_spancat_task_v3(
         spans_key=spans_key,
         scorer=scorer or score,
         description=description,
-        check_label_consistency=check_label_consistency or check_labels_cot,
+        check_label_consistency=check_labels_cot,
     )
diff --git a/spacy_llm/tests/tasks/examples/ner.json b/spacy_llm/tests/tasks/examples/ner.json
@@ -25,12 +25,6 @@
         "is_entity": true,
         "label": "LOC",
         "reason": "is a location"
-      },
-      {
-        "text": "hill",
-        "is_entity": true,
-        "label": "DESTINATION",
-        "reason": "is a destination"
       }
     ]
   }

diff --git a/spacy_llm/tests/tasks/examples/ner.jsonl b/spacy_llm/tests/tasks/examples/ner.jsonl
@@ -1 +1 @@
-{"text":"Jack and Jill went up the hill.","spans":[{"text":"Jack","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"Jill","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"went up","is_entity":false,"label":"==NONE==","reason":"is a verb"},{"text":"hill","is_entity":true,"label":"LOC","reason":"is a location"},{"text":"hill","is_entity":true,"label":"DESTINATION","reason":"is a destination"}]}
+{"text":"Jack and Jill went up the hill.","spans":[{"text":"Jack","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"Jill","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"went up","is_entity":false,"label":"==NONE==","reason":"is a verb"},{"text":"hill","is_entity":true,"label":"LOC","reason":"is a location"}]}
diff --git a/spacy_llm/tests/tasks/examples/ner.yml b/spacy_llm/tests/tasks/examples/ner.yml
@@ -16,7 +16,3 @@
       is_entity: true
       label: LOC
       reason: is a location
-    - text: hill
-      is_entity: true
-      label: DESTINATION
-      reason: is a destination
diff --git a/spacy_llm/tests/tasks/legacy/test_ner.py b/spacy_llm/tests/tasks/legacy/test_ner.py
@@ -325,7 +325,7 @@ def test_ner_zero_shot_task(text, response, gold_ents):
     labels = "PER,ORG,LOC"
     llm_ner = make_ner_task_v2(labels=labels)
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list so we get what's inside
@@ -384,7 +384,7 @@ def test_ner_labels(response, normalizer, gold_ents):
     labels = "PER,ORG,LOC"
     llm_ner = make_ner_task_v2(labels=labels, normalizer=normalizer)
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list
@@ -433,7 +433,7 @@ def test_ner_alignment(response, alignment_mode, gold_ents):
     labels = "PER,ORG,LOC"
     llm_ner = make_ner_task_v2(labels=labels, alignment_mode=alignment_mode)  # type: ignore
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list
@@ -484,7 +484,7 @@ def test_ner_matching(response, case_sensitive, single_match, gold_ents):
         labels=labels, case_sensitive_matching=case_sensitive, single_match=single_match
     )
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list
@@ -500,7 +500,7 @@ def test_jinja_template_rendering_without_examples():
     with annoying newlines and spaces at the edge of the text.
     """
     labels = "PER,ORG,LOC"
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc = nlp.make_doc("Alice and Bob went to the supermarket")
 
     llm_ner = make_ner_task_v2(labels=labels, examples=None)
@@ -542,7 +542,7 @@ def test_jinja_template_rendering_with_examples(examples_path):
     with annoying newlines and spaces at the edge of the text.
     """
     labels = "PER,ORG,LOC"
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc = nlp.make_doc("Alice and Bob went to the supermarket")
 
     examples = fewshot_reader(examples_path)
@@ -601,7 +601,7 @@ def test_jinja_template_rendering_with_label_definitions():
     with annoying newlines and spaces at the edge of the text.
     """
     labels = "PER,ORG,LOC"
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc = nlp.make_doc("Alice and Bob went to the supermarket")
     llm_ner = make_ner_task_v2(
         labels=labels,
@@ -660,7 +660,7 @@ def test_external_template_actually_loads():
     template_path = str(TEMPLATES_DIR / "ner.jinja2")
     template = file_reader(template_path)
     labels = "PER,ORG,LOC"
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc = nlp.make_doc("Alice and Bob went to the supermarket")
 
     llm_ner = make_ner_task_v2(labels=labels, template=template)

diff --git a/spacy_llm/tests/tasks/legacy/test_spancat.py b/spacy_llm/tests/tasks/legacy/test_spancat.py
@@ -197,7 +197,7 @@ def test_spancat_zero_shot_task(text, response, gold_spans):
     labels = "PER,ORG,LOC"
     llm_spancat = make_spancat_task_v2(labels=labels)
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list so we get what's inside
@@ -256,7 +256,7 @@ def test_spancat_labels(response, normalizer, gold_spans):
     labels = "PER,ORG,LOC"
     llm_spancat = make_spancat_task_v2(labels=labels, normalizer=normalizer)
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list
@@ -305,7 +305,7 @@ def test_spancat_alignment(response, alignment_mode, gold_spans):
     labels = "PER,ORG,LOC"
     llm_spancat = make_spancat_task_v2(labels=labels, alignment_mode=alignment_mode)  # type: ignore
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list
@@ -356,7 +356,7 @@ def test_spancat_matching(response, case_sensitive, single_match, gold_spans):
         labels=labels, case_sensitive_matching=case_sensitive, single_match=single_match
     )
     # Prepare doc
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc_in = nlp.make_doc(text)
     # Pass to the parser
     # Note: parser() returns a list
@@ -372,7 +372,7 @@ def test_jinja_template_rendering_without_examples():
     with annoying newlines and spaces at the edge of the text.
     """
     labels = "PER,ORG,LOC"
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc = nlp.make_doc("Alice and Bob went to the supermarket")
 
     llm_spancat = make_spancat_task_v2(labels=labels, examples=None)
@@ -415,7 +415,7 @@ def test_jinja_template_rendering_with_examples(examples_path):
     with annoying newlines and spaces at the edge of the text.
     """
     labels = "PER,ORG,LOC"
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc = nlp.make_doc("Alice and Bob went to the supermarket")
 
     examples = fewshot_reader(examples_path)

diff --git a/spacy_llm/tests/tasks/test_lemma.py b/spacy_llm/tests/tasks/test_lemma.py
@@ -197,7 +197,7 @@ def test_jinja_template_rendering_without_examples():
     We apply the .strip() method for each prompt so that we don't have to deal
     with annoying newlines and spaces at the edge of the text.
     """
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     text = "Alice and Bob went to the supermarket"
     doc = nlp.make_doc(text)
 
@@ -240,7 +240,7 @@ def test_jinja_template_rendering_with_examples(examples_path):
     We apply the .strip() method for each prompt so that we don't have to deal
     with annoying newlines and spaces at the edge of the text.
     """
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     text = "Alice and Bob went to the supermarket."
     doc = nlp.make_doc(text)
 
@@ -333,7 +333,7 @@ def test_external_template_actually_loads():
     template_path = str(TEMPLATES_DIR / "lemma.jinja2")
     template = file_reader(template_path)
     text = "Alice and Bob went to the supermarket"
-    nlp = spacy.blank("xx")
+    nlp = spacy.blank("en")
     doc = nlp.make_doc(text)
 
     lemma_task = make_lemma_task(template=template)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"text":"Jack and Jill went up the hill.","spans":[{"text":"Jack","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"Jill","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"went up","is_entity":false,"label":"==NONE==","reason":"is a verb"},{"text":"hill","is_entity":true,"label":"LOC","reason":"is a location"},{"text":"hill","is_entity":true,"label":"DESTINATION","reason":"is a destination"}]}
		{"text":"Jack and Jill went up the hill.","spans":[{"text":"Jack","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"Jill","is_entity":true,"label":"PER","reason":"is the name of a person"},{"text":"went up","is_entity":false,"label":"==NONE==","reason":"is a verb"},{"text":"hill","is_entity":true,"label":"LOC","reason":"is a location"}]}