Skip to content

Commit

Permalink
Rename preset IDs for consistency (#612)
Browse files Browse the repository at this point in the history
And start to add rules we would like to follow to our style guide.
  • Loading branch information
mattdangerw authored Dec 27, 2022
1 parent f7d816f commit 729815b
Show file tree
Hide file tree
Showing 26 changed files with 140 additions and 111 deletions.
23 changes: 23 additions & 0 deletions STYLE_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,29 @@ When a specific abbreviation is very common and is pronounceable (acronym),
consider it as a standalone word, e.g. Bert, Deberta, etc. In this case, "Bert"
is considered as a common noun and not an abbreviation anymore.

## Naming of Models and Presets

Naming of models and presets is a difficult and important element of our
library usability. In general we try to to follow the branding of "upstream"
model naming, subject to the consistency constraints laid out here.

- The model and preset names should be recognizable to users familiar with the
original release. E.g. the model that goes with the "DeBERTaV3" paper should
be called `DebertaV3`. A release of a [toxic-bert](https://huggingface.co/unitary/toxic-bert)
checkpoint for `keras_nlp.models.Bert`, should include the string
`"toxic_bert"`.
- All preset names should include the language of the pretraining data. If three
or more language are supported, the preset name should include `"multi"` (not
the single letter "m").
- If a preset lowercases input for cased-based languages, the preset name should
be marked with `"uncased"`.
- Don't abbreviate size names. E.g. "xsmall" or "XL" in an original checkpoint
releases should map to `"extra_small"` or `"extra_large"` in a preset names.
- No configuration in names. E.g. use "bert_base" instead of
"bert_L-12_H-768_A-12".

When in doubt, readability should win out!

## File names

When possible, keep publicly documented classes in their own files, and make
Expand Down
18 changes: 9 additions & 9 deletions keras_nlp/models/bert/bert_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_uncased/v1/vocab.txt",
"vocabulary_hash": "64800d5d8528ce344256daf115d4965e",
},
"bert_base_en_cased": {
"bert_base_en": {
"config": {
"vocabulary_size": 28996,
"num_layers": 12,
Expand All @@ -126,9 +126,9 @@
"Base size of BERT where case is maintained. "
"Trained on English Wikipedia + BooksCorpus."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_cased/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en/v1/model.h5",
"weights_hash": "f94a6cb012e18f4fb8ec92abb91864e9",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_cased/v1/vocab.txt",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en/v1/vocab.txt",
"vocabulary_hash": "bb6ca9b42e790e5cd986bbb16444d0e0",
},
"bert_base_zh": {
Expand All @@ -151,7 +151,7 @@
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_zh/v1/vocab.txt",
"vocabulary_hash": "3b5b76c4aef48ecf8cb3abaafe960f09",
},
"bert_base_multi_cased": {
"bert_base_multi": {
"config": {
"vocabulary_size": 119547,
"num_layers": 12,
Expand All @@ -169,9 +169,9 @@
"Base size of BERT. Trained on trained on Wikipedias of 104 "
"languages."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi_cased/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi/v1/model.h5",
"weights_hash": "b0631cec0a1f2513c6cfd75ba29c33aa",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi_cased/v1/vocab.txt",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi/v1/vocab.txt",
"vocabulary_hash": "d9d865138d17f1958502ed060ecfeeb6",
},
"bert_large_en_uncased": {
Expand All @@ -197,7 +197,7 @@
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_uncased/v1/vocab.txt",
"vocabulary_hash": "64800d5d8528ce344256daf115d4965e",
},
"bert_large_en_cased": {
"bert_large_en": {
"config": {
"vocabulary_size": 28996,
"num_layers": 24,
Expand All @@ -215,9 +215,9 @@
"Base size of BERT where case is maintained. "
"Trained on English Wikipedia + BooksCorpus."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_cased/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en/v1/model.h5",
"weights_hash": "8b8ab82290bbf4f8db87d4f100648890",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_cased/v1/vocab.txt",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en/v1/vocab.txt",
"vocabulary_hash": "bb6ca9b42e790e5cd986bbb16444d0e0",
},
}
Expand Down
6 changes: 4 additions & 2 deletions keras_nlp/models/deberta_v3/deberta_v3_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,14 @@ def from_preset(
}
# Load architecture and weights from preset
model = keras_nlp.models.DebertaV3Backbone.from_preset("deberta_base")
model = keras_nlp.models.DebertaV3Backbone.from_preset(
"deberta_base_en",
)
output = model(input_data)
# Load randomly initialized model from preset architecture
model = keras_nlp.models.DebertaV3Backbone.from_preset(
"deberta_base", load_weights=False
"deberta_base_en", load_weights=False
)
output = model(input_data)
```
Expand Down
8 changes: 4 additions & 4 deletions keras_nlp/models/deberta_v3/deberta_v3_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def from_preset(
# Create a DebertaV3Classifier and fit your data.
classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
"deberta_base",
"deberta_v3_base_en",
num_classes=4,
)
classifier.compile(
Expand All @@ -220,13 +220,13 @@ def from_preset(
# Use a shorter sequence length.
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
"deberta_base",
"deberta_v3_base_en",
sequence_length=128,
)
# Create a DebertaV3Classifier and fit your data.
classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
"deberta_base",
"deberta_v3_base_en",
num_classes=4,
preprocessor=preprocessor,
)
Expand All @@ -249,7 +249,7 @@ def from_preset(
# Create a DebertaV3Classifier and fit your data.
classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
"deberta_base",
"deberta_v3_base_en",
num_classes=4,
preprocessor=None,
)
Expand Down
4 changes: 2 additions & 2 deletions keras_nlp/models/deberta_v3/deberta_v3_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,13 @@ def from_preset(
```python
# Load preprocessor from preset
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
"deberta_base",
"deberta_v3_base_en",
)
preprocessor("The quick brown fox jumped.")
# Override sequence_length
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
"deberta_base",
"deberta_v3_base_en",
sequence_length=64
)
preprocessor("The quick brown fox jumped.")
Expand Down
24 changes: 12 additions & 12 deletions keras_nlp/models/deberta_v3/deberta_v3_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"""DeBERTa model preset configurations."""

backbone_presets = {
"deberta_v3_extra_small": {
"deberta_v3_extra_small_en": {
"config": {
"vocabulary_size": 128100,
"num_layers": 12,
Expand All @@ -30,12 +30,12 @@
"Extra small size of DeBERTaV3. "
"Trained on English Wikipedia, BookCorpus and OpenWebText."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/model.h5",
"weights_hash": "d8e10327107e5c5e20b45548a5028619",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small/v1/vocab.spm",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/vocab.spm",
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
},
"deberta_v3_small": {
"deberta_v3_small_en": {
"config": {
"vocabulary_size": 128100,
"num_layers": 6,
Expand All @@ -51,12 +51,12 @@
"Small size of DeBERTaV3. "
"Trained on English Wikipedia, BookCorpus and OpenWebText."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/model.h5",
"weights_hash": "84118eb7c5a735f2061ecccaf71bb888",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small/v1/vocab.spm",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/vocab.spm",
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
},
"deberta_v3_base": {
"deberta_v3_base_en": {
"config": {
"vocabulary_size": 128100,
"num_layers": 12,
Expand All @@ -72,12 +72,12 @@
"Base size of DeBERTaV3. "
"Trained on English Wikipedia, BookCorpus and OpenWebText."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/model.h5",
"weights_hash": "cebce044aeed36aec9b94e3b8a255430",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base/v1/vocab.spm",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/vocab.spm",
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
},
"deberta_v3_large": {
"deberta_v3_large_en": {
"config": {
"vocabulary_size": 128100,
"num_layers": 24,
Expand All @@ -93,9 +93,9 @@
"Base size of DeBERTaV3. "
"Trained on English Wikipedia, BookCorpus and OpenWebText."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large_en/v1/model.h5",
"weights_hash": "bce7690f358a9e39304f8c0ebc71a745",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large/v1/vocab.spm",
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large_en/v1/vocab.spm",
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
},
"deberta_v3_base_multi": {
Expand Down
12 changes: 6 additions & 6 deletions keras_nlp/models/deberta_v3/deberta_v3_presets_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ class DebertaV3PresetSmokeTest(tf.test.TestCase, parameterized.TestCase):

def test_tokenizer_output(self):
tokenizer = DebertaV3Tokenizer.from_preset(
"deberta_v3_extra_small",
"deberta_v3_extra_small_en",
)
outputs = tokenizer("The quick brown fox.")
expected_outputs = [279, 1538, 3258, 16123, 260]
self.assertAllEqual(outputs, expected_outputs)

def test_preprocessor_output(self):
preprocessor = DebertaV3Preprocessor.from_preset(
"deberta_v3_extra_small",
"deberta_v3_extra_small_en",
sequence_length=4,
)
outputs = preprocessor("The quick brown fox.")["token_ids"]
Expand All @@ -62,7 +62,7 @@ def test_backbone_output(self, load_weights):
"padding_mask": tf.constant([[1, 1, 1, 1]]),
}
model = DebertaV3Backbone.from_preset(
"deberta_v3_extra_small", load_weights=load_weights
"deberta_v3_extra_small_en", load_weights=load_weights
)
outputs = model(input_data)
if load_weights:
Expand All @@ -76,7 +76,7 @@ def test_backbone_output(self, load_weights):
def test_classifier_output(self, load_weights):
input_data = tf.constant(["The quick brown fox."])
model = DebertaV3Classifier.from_preset(
"deberta_v3_extra_small", load_weights=load_weights
"deberta_v3_extra_small_en", load_weights=load_weights
)
# Never assert output values, as the head weights are random.
model.predict(input_data)
Expand All @@ -90,7 +90,7 @@ def test_classifier_output_without_preprocessing(self, load_weights):
"padding_mask": tf.constant([[1, 1, 1, 1]]),
}
model = DebertaV3Classifier.from_preset(
"deberta_v3_extra_small",
"deberta_v3_extra_small_en",
load_weights=load_weights,
preprocessor=None,
)
Expand All @@ -117,7 +117,7 @@ def test_preset_docstring(self, cls):
def test_unknown_preset_error(self, cls):
# Not a preset name
with self.assertRaises(ValueError):
cls.from_preset("deberta_v3_extra_small_clowntown")
cls.from_preset("deberta_v3_extra_small_en_clowntown")


@pytest.mark.extra_large
Expand Down
2 changes: 1 addition & 1 deletion keras_nlp/models/deberta_v3/deberta_v3_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def from_preset(
```python
# Load a preset tokenizer.
tokenizer = keras_nlp.models.DebertaV3Tokenizer.from_preset(
"deberta_base",
"deberta_v3_base_en",
)
# Tokenize some input.
Expand Down
6 changes: 3 additions & 3 deletions keras_nlp/models/distil_bert/distil_bert_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,12 @@ def from_preset(
# Use a shorter sequence length.
preprocessor = keras_nlp.models.DistilBertBackbone.from_preset(
"bert_base_en_uncased",
"distil_bert_base_en_uncased",
sequence_length=128,
)
# Create a DistilBertClassifier and fit your data.
classifier = keras_nlp.models.DistilBertClassifier.from_preset(
"bert_base_en_uncased",
"distil_bert_base_en_uncased",
num_classes=4,
preprocessor=preprocessor,
)
Expand All @@ -250,7 +250,7 @@ def from_preset(
# Create a DistilBERT classifier and fit your data.
classifier = keras_nlp.models.DistilBertClassifier.from_preset(
"bert_base_en_uncased",
"distil_bert_base_en_uncased",
num_classes=4,
preprocessor=None,
)
Expand Down
12 changes: 6 additions & 6 deletions keras_nlp/models/distil_bert/distil_bert_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en_uncased/v1/vocab.txt",
"vocabulary_hash": "64800d5d8528ce344256daf115d4965e",
},
"distil_bert_base_en_cased": {
"distil_bert_base_en": {
"config": {
"vocabulary_size": 28996,
"num_layers": 6,
Expand All @@ -55,12 +55,12 @@
"Trained on English Wikipedia + BooksCorpus using BERT as the "
"teacher model."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en_cased/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en/v1/model.h5",
"weights_hash": "fa36aa6865978efbf85a5c8264e5eb57",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en_cased/v1/vocab.txt",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en/v1/vocab.txt",
"vocabulary_hash": "bb6ca9b42e790e5cd986bbb16444d0e0",
},
"distil_bert_base_multi_cased": {
"distil_bert_base_multi": {
"config": {
"vocabulary_size": 119547,
"num_layers": 6,
Expand All @@ -77,9 +77,9 @@
"Base size of DistilBERT. Trained on Wikipedias of 104 languages "
"using BERT the teacher model."
),
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi_cased/v1/model.h5",
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi/v1/model.h5",
"weights_hash": "c0f11095e2a6455bd3b1a6d14800a7fa",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi_cased/v1/vocab.txt",
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi/v1/vocab.txt",
"vocabulary_hash": "d9d865138d17f1958502ed060ecfeeb6",
},
}
2 changes: 1 addition & 1 deletion keras_nlp/models/gpt2/gpt2_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def from_preset(
}
# Load architecture and weights from preset
model = GPT2Backbone.from_preset("gpt2_base")
model = GPT2Backbone.from_preset("gpt2_base_en")
output = model(input_data)
# Load randomly initialized model from preset architecture
Expand Down
Loading

0 comments on commit 729815b

Please sign in to comment.