Skip to content

Commit

Permalink
Pass kwargs to tokenizer when creating preprocessor from a tokenizer.…
Browse files Browse the repository at this point in the history
…json
  • Loading branch information
SamanehSaadat committed May 17, 2024
1 parent 778ccd7 commit 323abb9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion keras_nlp/src/models/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def from_preset(

tokenizer = load_serialized_object(preset, TOKENIZER_CONFIG_FILE)
tokenizer.load_preset_assets(preset)
preprocessor = cls(tokenizer=tokenizer)
preprocessor = cls(tokenizer=tokenizer, **kwargs)

return preprocessor

Expand Down
7 changes: 7 additions & 0 deletions keras_nlp/src/models/preprocessor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ def test_from_preset(self):
BertMaskedLMPreprocessor,
)

@pytest.mark.large
def test_from_preset_with_sequence_length(self):
preprocessor = BertPreprocessor.from_preset(
"bert_tiny_en_uncased", sequence_length=16
)
self.assertEqual(preprocessor.sequence_length, 16)

@pytest.mark.large
def test_from_preset_errors(self):
with self.assertRaises(ValueError):
Expand Down

0 comments on commit 323abb9

Please sign in to comment.