diff --git a/README.md b/README.md index 685d974..96e2cdd 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,7 @@ Please install other required packages via `pip install -r requirements.txt`. ``` cd experiment - python run_param_tuner.py --config config/DCN_tiny_h5_tuner_config.yaml --gpu 0 1 2 3 0 1 2 3 + python run_param_tuner.py --config config/DCN_tiny_parquet_tuner_config.yaml --gpu 0 1 2 3 0 1 2 3 ``` ## 🔥 Citation diff --git a/experiment/config/DCN_tiny_npz_tuner_config.yaml b/experiment/config/DCN_tiny_parquet_tuner_config.yaml similarity index 64% rename from experiment/config/DCN_tiny_npz_tuner_config.yaml rename to experiment/config/DCN_tiny_parquet_tuner_config.yaml index 1eb5092..be72383 100644 --- a/experiment/config/DCN_tiny_npz_tuner_config.yaml +++ b/experiment/config/DCN_tiny_parquet_tuner_config.yaml @@ -1,6 +1,13 @@ base_config: ../model_zoo/DCN/DCN_torch/config/ base_expid: DCN_default -dataset_id: tiny_npz +dataset_id: tiny_parquet + +tiny_parquet: + data_root: ../data/ + data_format: npz + train_data: ../data/tiny_parquet/train.parquet + valid_data: ../data/tiny_parquet/valid.parquet + test_data: ../data/tiny_parquet/test.parquet tuner_space: model_root: './checkpoints/' diff --git a/fuxictr/preprocess/tokenizer.py b/fuxictr/preprocess/tokenizer.py index 21a574c..51aa961 100644 --- a/fuxictr/preprocess/tokenizer.py +++ b/fuxictr/preprocess/tokenizer.py @@ -86,7 +86,8 @@ def merge_vocab(self, shared_tokenizer): else: shared_tokenizer.vocab.update(self.vocab) vocab_size = shared_tokenizer.vocab_size() - if shared_tokenizer.vocab["__OOV__"] != vocab_size - 1: + if (shared_tokenizer.vocab["__OOV__"] != vocab_size - 1 or + shared_tokenizer.vocab["__OOV__"] != len(shared_tokenizer.vocab) - 1): shared_tokenizer.vocab["__OOV__"] = vocab_size self.vocab = shared_tokenizer.vocab return shared_tokenizer