From 54dd0f1656973b581d6e04a0b1b171380dd95692 Mon Sep 17 00:00:00 2001 From: fonhorst Date: Tue, 2 Jan 2024 15:06:17 +0300 Subject: [PATCH] run of bigartm cooc dict build from the image --- examples/preparation_pipeline.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/examples/preparation_pipeline.py b/examples/preparation_pipeline.py index cbd3f88..b7eb4da 100644 --- a/examples/preparation_pipeline.py +++ b/examples/preparation_pipeline.py @@ -51,6 +51,22 @@ def prepare_all_artifacts_debug(save_path: str): if __name__ == "__main__": prepare_all_artifacts_debug(SAVE_PATH) +# Run bigartm from cli +# bigartm \ +# -c vw \ # Raw corpus in Vowpal Wabbit format +# -v vocab \ # vocab file in UCI format +# --cooc-window 10 \ +# --cooc-min-tf 200 \ +# --write-cooc-tf cooc_tf_ \ +# --cooc-min-df 200 \ +# --write-cooc-df cooc_df_ \ +# --write-ppmi-tf ppmi_tf_ \ +# --write-ppmi-df ppmi_df_ + + +# run container: docker run -it -v /home/nikolay/wspace/AutoTM/tmp/train-00000-of-00001-processed-corpora:/dataset artm:3.9 /bin/bash +# bigartm -c /dataset/test_set_data_voc.txt -v /dataset/vocab.txt --cooc-window 10 --cooc-min-tf 200 --write-cooc-tf cooc_tf_ --cooc-min-df 200 --write-cooc-df cooc_df_ --write-ppmi-tf ppmi_tf_ --write-ppmi-df ppmi_df_ + # Normal version. DO NOT DELETE!!! # if __name__ == "__main__":