Skip to content

Commit

Permalink
properly fix intent_name by removing it (#6962)
Browse files Browse the repository at this point in the history
* properly fix intent_name by removing it

* fix training_data filtering

* return dissappeared nlu_examples

* fix creating message from events

* remove unused import

* fix a mess created by empty string

* pass only nlu examples

* fix is_core_message

* fix is_core_message

* fix pattern utils tests

* fix test_training_data

* fix test_regex_entity_extractor

* fix test_test

* use only NLU examples if needed

* fix test_bilou_utils

* increase timeout for pipeline test

* update examples for tests

* update check if training is possible

Co-authored-by: Tanja Bergmann <[email protected]>
Co-authored-by: Daksh <[email protected]>
  • Loading branch information
3 people authored Oct 8, 2020
1 parent 985a2fb commit cc8a264
Show file tree
Hide file tree
Showing 17 changed files with 119 additions and 149 deletions.
18 changes: 18 additions & 0 deletions data/examples/wit/demo-flights.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
{
"text" : "i'm looking for a flight from london to amsterdam next monday",
"entities" : [
{
"entity" : "intent",
"value" : "\"flight_booking\"",
"start" : 0,
"end" : 42
},
{
"entity" : "location",
"value" : "\"london\"",
Expand All @@ -59,6 +65,12 @@
{
"text" : "i want to fly to berlin",
"entities" : [
{
"entity" : "intent",
"value" : "\"flight_booking\"",
"start" : 0,
"end" : 42
},
{
"entity" : "location",
"value" : "\"berlin\"",
Expand All @@ -71,6 +83,12 @@
{
"text" : "i want to fly from london",
"entities" : [
{
"entity" : "intent",
"value" : "\"flight_booking\"",
"start" : 0,
"end" : 42
},
{
"entity" : "location",
"value" : "\"london\"",
Expand Down
9 changes: 6 additions & 3 deletions data/test/wit_converted_to_rasa.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
]
},
{
"text": "i'm looking for a flight from london to amsterdam next monday",
"text": "i'm looking for a flight from london to amsterdam next monday",
"intent": "flight_booking",
"entities": [
{
"start": 30,
Expand All @@ -53,7 +54,8 @@
]
},
{
"text": "i want to fly to berlin",
"text": "i want to fly to berlin",
"intent": "flight_booking",
"entities": [
{
"start": 17,
Expand All @@ -65,7 +67,8 @@
]
},
{
"text": "i want to fly from london",
"text": "i want to fly from london",
"intent": "flight_booking",
"entities": [
{
"start": 19,
Expand Down
4 changes: 2 additions & 2 deletions rasa/nlu/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,7 +1306,7 @@ def get_eval_data(

should_eval_entities = is_entity_extractor_present(interpreter)

for example in tqdm(test_data.training_examples):
for example in tqdm(test_data.nlu_examples):
result = interpreter.parse(example.get(TEXT), only_output_properties=False)

if should_eval_intents:
Expand Down Expand Up @@ -1861,7 +1861,7 @@ def compare_nlu(
_, train_included = train.train_test_split(percentage / 100)
# only count for the first run and ignore the others
if run == 0:
training_examples_per_run.append(len(train_included.training_examples))
training_examples_per_run.append(len(train_included.nlu_examples))

model_output_path = os.path.join(run_path, percent_string)
train_split_path = os.path.join(model_output_path, "train")
Expand Down
4 changes: 2 additions & 2 deletions rasa/nlu/utils/bilou_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def build_tag_id_dict(
distinct_tags = set(
[
tag_without_prefix(e)
for example in training_data.training_examples
for example in training_data.nlu_examples
if example.get(bilou_key)
for e in example.get(bilou_key)
]
Expand All @@ -157,7 +157,7 @@ def apply_bilou_schema(training_data: TrainingData) -> None:
Args:
training_data: the training data
"""
for message in training_data.training_examples:
for message in training_data.nlu_examples:
entities = message.get(ENTITIES)

if not entities:
Expand Down
2 changes: 1 addition & 1 deletion rasa/shared/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ class ActionExecuted(Event):

def __init__(
self,
action_name: Text,
action_name: Optional[Text] = None,
policy: Optional[Text] = None,
confidence: Optional[float] = None,
timestamp: Optional[float] = None,
Expand Down
17 changes: 11 additions & 6 deletions rasa/shared/importers/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from rasa.shared.core.training_data.structures import StoryGraph
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.constants import INTENT_NAME, TEXT
from rasa.shared.nlu.constants import ENTITIES, ACTION_NAME
from rasa.shared.importers.autoconfig import TrainingType
from rasa.shared.core.domain import IS_RETRIEVAL_INTENT_KEY

Expand Down Expand Up @@ -533,18 +533,23 @@ def _unique_events_from_stories(


def _messages_from_user_utterance(event: UserUttered) -> Message:
return Message(data={TEXT: event.text, INTENT_NAME: event.intent_name})
# sub state correctly encodes intent vs text
data = event.as_sub_state()
# sub state stores entities differently
if data.get(ENTITIES) and event.entities:
data[ENTITIES] = event.entities

return Message(data=data)


def _messages_from_action(event: ActionExecuted) -> Message:
return Message.build_from_action(
action_name=event.action_name, action_text=event.action_text or ""
)
# sub state correctly encodes action_name vs action_text
return Message(data=event.as_sub_state())


def _additional_training_data_from_default_actions() -> TrainingData:
additional_messages_from_default_actions = [
Message.build_from_action(action_name=action_name)
Message(data={ACTION_NAME: action_name})
for action_name in rasa.shared.core.constants.DEFAULT_ACTION_NAMES
]

Expand Down
1 change: 0 additions & 1 deletion rasa/shared/nlu/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
INTENT_RESPONSE_KEY = "intent_response_key"
ACTION_TEXT = "action_text"
ACTION_NAME = "action_name"
INTENT_NAME = "intent_name"
INTENT_NAME_KEY = "name"
METADATA = "metadata"
METADATA_INTENT = "intent"
Expand Down
24 changes: 2 additions & 22 deletions rasa/shared/nlu/training_data/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
FEATURE_TYPE_SEQUENCE,
ACTION_TEXT,
ACTION_NAME,
INTENT_NAME,
)

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -132,24 +131,6 @@ def build(
# pytype: enable=unsupported-operands
return cls(data, **kwargs)

@classmethod
def build_from_action(
cls,
action_text: Optional[Text] = "",
action_name: Optional[Text] = "",
**kwargs: Any,
) -> "Message":
"""
Build a `Message` from `ActionExecuted` data.
Args:
action_text: text of a bot's utterance
action_name: name of an action executed
Returns:
Message
"""
action_data = {ACTION_TEXT: action_text, ACTION_NAME: action_name}
return cls(data=action_data, **kwargs)

def get_full_intent(self) -> Text:
"""Get intent as it appears in training data"""

Expand Down Expand Up @@ -330,9 +311,8 @@ def is_core_message(self) -> bool:
Returns:
True, if message is a core message, false otherwise.
"""
return (
self.data.get(ACTION_NAME) is not None
or self.data.get(INTENT_NAME) is not None
return bool(
self.data.get(ACTION_NAME)
or self.data.get(ACTION_TEXT)
or (
(self.data.get(INTENT) or self.data.get(RESPONSE))
Expand Down
49 changes: 17 additions & 32 deletions rasa/shared/nlu/training_data/training_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
ENTITIES,
TEXT,
ACTION_NAME,
INTENT_NAME,
ACTION_TEXT,
)
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.training_data import util
Expand Down Expand Up @@ -130,21 +130,21 @@ def sanitize_examples(examples: List[Message]) -> List[Message]:

return list(OrderedDict.fromkeys(examples))

@lazy_property
def nlu_examples(self) -> List[Message]:
return [ex for ex in self.training_examples if not ex.is_core_message()]

@lazy_property
def intent_examples(self) -> List[Message]:
return [ex for ex in self.training_examples if ex.get(INTENT)]
return [ex for ex in self.nlu_examples if ex.get(INTENT)]

@lazy_property
def response_examples(self) -> List[Message]:
return [ex for ex in self.training_examples if ex.get(INTENT_RESPONSE_KEY)]
return [ex for ex in self.nlu_examples if ex.get(INTENT_RESPONSE_KEY)]

@lazy_property
def entity_examples(self) -> List[Message]:
return [ex for ex in self.training_examples if ex.get(ENTITIES)]

@lazy_property
def nlu_examples(self) -> List[Message]:
return [ex for ex in self.training_examples if not ex.is_core_message()]
return [ex for ex in self.nlu_examples if ex.get(ENTITIES)]

@lazy_property
def intents(self) -> Set[Text]:
Expand Down Expand Up @@ -577,38 +577,23 @@ def is_empty(self) -> bool:
"""Checks if any training data was loaded."""

lists_to_check = [
self._training_examples_without_empty_e2e_examples(),
self.training_examples,
self.entity_synonyms,
self.regex_features,
self.lookup_tables,
]
return not any([len(lst) > 0 for lst in lists_to_check])

def without_empty_e2e_examples(self) -> "TrainingData":
"""Removes training data examples from intent labels and action names which
were added for end-to-end training.
Returns:
Itself but without training examples which don't have a text or intent.
"""
training_examples = copy.deepcopy(self.training_examples)
entity_synonyms = self.entity_synonyms.copy()
regex_features = copy.deepcopy(self.regex_features)
lookup_tables = copy.deepcopy(self.lookup_tables)
responses = copy.deepcopy(self.responses)
copied = TrainingData(
training_examples, entity_synonyms, regex_features, lookup_tables, responses
)
copied.training_examples = self._training_examples_without_empty_e2e_examples()

return copied
def can_train_nlu_model(self) -> bool:
"""Checks if any NLU training data was loaded."""

def _training_examples_without_empty_e2e_examples(self) -> List[Message]:
return [
example
for example in self.training_examples
if not example.get(ACTION_NAME) and not example.get(INTENT_NAME)
lists_to_check = [
self.nlu_examples,
self.entity_synonyms,
self.regex_features,
self.lookup_tables,
]
return not any([len(lst) > 0 for lst in lists_to_check])


def list_to_str(lst: List[Text], delim: Text = ", ", quote: Text = "'") -> Text:
Expand Down
6 changes: 3 additions & 3 deletions rasa/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ async def _train_async_internal(
file_importer.get_stories(), file_importer.get_nlu_data()
)

if stories.is_empty() and nlu_data.is_empty():
if stories.is_empty() and nlu_data.can_train_nlu_model():
print_error(
"No training data given. Please provide stories and NLU data in "
"order to train a Rasa model using the '--data' argument."
Expand All @@ -174,7 +174,7 @@ async def _train_async_internal(
additional_arguments=nlu_additional_arguments,
)

if nlu_data.is_empty():
if nlu_data.can_train_nlu_model():
print_warning("No NLU data present. Just a Rasa Core model will be trained.")
return await _train_core_with_validated_data(
file_importer,
Expand Down Expand Up @@ -495,7 +495,7 @@ async def _train_nlu_async(
)

training_data = await file_importer.get_nlu_data()
if training_data.is_empty():
if training_data.can_train_nlu_model():
print_error(
f"Path '{nlu_data}' doesn't contain valid NLU data in it. "
f"Please verify the data format. "
Expand Down
31 changes: 23 additions & 8 deletions tests/nlu/extractors/test_regex_entity_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from rasa.shared.nlu.training_data.training_data import TrainingData
from rasa.shared.nlu.training_data.message import Message
from rasa.shared.nlu.constants import ENTITIES, TEXT
from rasa.shared.nlu.constants import ENTITIES, TEXT, INTENT
from rasa.nlu.extractors.regex_entity_extractor import RegexEntityExtractor


Expand Down Expand Up @@ -86,12 +86,17 @@ def test_process(
training_data.lookup_tables = lookup
training_data.training_examples = [
Message(
data={TEXT: "Hi Max!", "entities": [{"entity": "person", "value": "Max"}]}
data={
TEXT: "Hi Max!",
INTENT: "greet",
ENTITIES: [{"entity": "person", "value": "Max"}],
}
),
Message(
data={
TEXT: "I live in Berlin",
"entities": [{"entity": "city", "value": "Berlin"}],
INTENT: "inform",
ENTITIES: [{"entity": "city", "value": "Berlin"}],
}
),
]
Expand Down Expand Up @@ -165,12 +170,17 @@ def test_lowercase(
training_data.lookup_tables = lookup
training_data.training_examples = [
Message(
data={TEXT: "Hi Max!", "entities": [{"entity": "person", "value": "Max"}]}
data={
TEXT: "Hi Max!",
INTENT: "greet",
ENTITIES: [{"entity": "person", "value": "Max"}],
}
),
Message(
data={
TEXT: "I live in Berlin",
"entities": [{"entity": "city", "value": "Berlin"}],
INTENT: "inform",
ENTITIES: [{"entity": "city", "value": "Berlin"}],
}
),
]
Expand All @@ -184,18 +194,23 @@ def test_lowercase(


def test_do_not_overwrite_any_entities():
message = Message(data={TEXT: "Max lives in Berlin."})
message = Message(data={TEXT: "Max lives in Berlin.", INTENT: "infrom"})
message.set(ENTITIES, [{"entity": "person", "value": "Max", "start": 0, "end": 3}])

training_data = TrainingData()
training_data.training_examples = [
Message(
data={TEXT: "Hi Max!", "entities": [{"entity": "person", "value": "Max"}]}
data={
TEXT: "Hi Max!",
INTENT: "greet",
ENTITIES: [{"entity": "person", "value": "Max"}],
}
),
Message(
data={
TEXT: "I live in Berlin",
"entities": [{"entity": "city", "value": "Berlin"}],
INTENT: "inform",
ENTITIES: [{"entity": "city", "value": "Berlin"}],
}
),
]
Expand Down
Loading

0 comments on commit cc8a264

Please sign in to comment.