Skip to content

Commit

Permalink
fix: chat auto, gracefully handle when str not found (#781)
Browse files Browse the repository at this point in the history
  • Loading branch information
Elliott authored Oct 24, 2023
1 parent 1b059c8 commit 5c4cc7e
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
2 changes: 1 addition & 1 deletion dataquality/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"""


__version__ = "1.1.5"
__version__ = "1.1.6"

import sys
from typing import Any, List, Optional
Expand Down
7 changes: 6 additions & 1 deletion dataquality/integrations/seq2seq/formatters/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,12 @@ def format_sample(
# instance of the user or assistant role and start there
first_user_index = parsed_history.find(f"{self.user}: ")
first_assistant_index = parsed_history.find(f"{self.assistant}: ")
start_index = min(first_user_index, first_assistant_index)
# .find() returns -1 if the substring is not found, we must ignore those
non_negative = [
val for val in [first_user_index, first_assistant_index] if val >= 0
]
# If both are -1, we just take the last max_input_tokens tokens
start_index = min(non_negative) if non_negative else -self.max_input_tokens
user_inputs[i] = parsed_history[start_index:]

return formatted_sample

0 comments on commit 5c4cc7e

Please sign in to comment.