diff --git a/backup/dictionary.pkl b/backup/dictionary.pkl index df44aea..25310c2 100644 Binary files a/backup/dictionary.pkl and b/backup/dictionary.pkl differ diff --git a/backup/processing_progress.txt b/backup/processing_progress.txt index 433979e..00af69a 100644 Binary files a/backup/processing_progress.txt and b/backup/processing_progress.txt differ diff --git a/dictionary.msgpack b/dictionary.msgpack index 6392ca2..94773eb 100644 Binary files a/dictionary.msgpack and b/dictionary.msgpack differ diff --git a/tokens.msgpack b/tokens.msgpack index e11293f..b00bb06 100644 Binary files a/tokens.msgpack and b/tokens.msgpack differ diff --git a/train.py b/train.py index 1743679..7326e56 100644 --- a/train.py +++ b/train.py @@ -131,8 +131,8 @@ async def main(retain=False): # Silencing for now. Creating too many problems. # Merge batches periodically - # if (word_count + 1) % (PRUNE_FREQUENCY * 100) == 0: - # await merge_batches() + if (word_count + 1) % (PRUNE_FREQUENCY * 50) == 0: + await merge_batches() # Final batch creation after processing is complete await create_batch(tree_store, TARGET_DICTIONARY_COUNT)