forked from explosion/spacy-course
-
Notifications
You must be signed in to change notification settings - Fork 0
/
exc_04_03.py
26 lines (21 loc) · 807 Bytes
/
exc_04_03.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import json
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Span
with open("exercises/en/iphone.json", encoding="utf8") as f:
TEXTS = json.loads(f.read())
nlp = spacy.blank("en")
matcher = Matcher(nlp.vocab)
# Dois tokens cujo formato em minúsculas corresponda a "iphone" e "x"
pattern1 = [{____: ____}, {____: ____}]
# Token cujo formato em minísculas corresponda a "iphone" e um dígito
pattern2 = [{____: ____}, {____: ____}]
# Adicione as expressões ao Matcher e crie docs com as entidades com correspondência
matcher.add("GADGET", [pattern1, pattern2])
docs = []
for doc in nlp.pipe(TEXTS):
matches = matcher(doc)
spans = [Span(doc, start, end, label=match_id) for match_id, start, end in matches]
print(spans)
doc.ents = spans
docs.append(doc)