Skip to content

Commit

Permalink
run of bigartm cooc dict build from the image
Browse files Browse the repository at this point in the history
  • Loading branch information
fonhorst committed Jan 2, 2024
1 parent e409b5d commit 54dd0f1
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions examples/preparation_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,22 @@ def prepare_all_artifacts_debug(save_path: str):
if __name__ == "__main__":
prepare_all_artifacts_debug(SAVE_PATH)

# Run bigartm from cli
# bigartm \
# -c vw \ # Raw corpus in Vowpal Wabbit format
# -v vocab \ # vocab file in UCI format
# --cooc-window 10 \
# --cooc-min-tf 200 \
# --write-cooc-tf cooc_tf_ \
# --cooc-min-df 200 \
# --write-cooc-df cooc_df_ \
# --write-ppmi-tf ppmi_tf_ \
# --write-ppmi-df ppmi_df_


# run container: docker run -it -v /home/nikolay/wspace/AutoTM/tmp/train-00000-of-00001-processed-corpora:/dataset artm:3.9 /bin/bash
# bigartm -c /dataset/test_set_data_voc.txt -v /dataset/vocab.txt --cooc-window 10 --cooc-min-tf 200 --write-cooc-tf cooc_tf_ --cooc-min-df 200 --write-cooc-df cooc_df_ --write-ppmi-tf ppmi_tf_ --write-ppmi-df ppmi_df_


# Normal version. DO NOT DELETE!!!
# if __name__ == "__main__":
Expand Down

0 comments on commit 54dd0f1

Please sign in to comment.