-
Notifications
You must be signed in to change notification settings - Fork 9
/
Makefile
executable file
·69 lines (62 loc) · 2.19 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
############################## Open-domain Search & Query-side Fine-tuning ###################################
# All of the source comes from Densephrases/Makefile.
# Here, we only include query-side fine-tuning part.
# pre-requisite: set path by `./config.sh`
# Required arguments
model-name:
ifeq ($(MODEL_NAME),)
echo "Please set MODEL_NAME before training (e.g., MODEL_NAME=test)"; exit 2;
endif
dump-dir:
ifeq ($(DUMP_DIR),)
echo "Please set DUMP_DIR before dumping/indexing (e.g., DUMP_DIR=test)"; exit 2;
endif
# Choose index size
small-index:
$(eval NUM_CLUSTERS=256)
$(eval INDEX_TYPE=OPQ96)
medium1-index:
$(eval NUM_CLUSTERS=16384)
$(eval INDEX_TYPE=OPQ96)
medium2-index:
$(eval NUM_CLUSTERS=131072)
$(eval INDEX_TYPE=OPQ96)
large-index:
$(eval NUM_CLUSTERS=1048576)
$(eval INDEX_TYPE=OPQ96)
large-index-sq:
$(eval NUM_CLUSTERS=1048576)
$(eval INDEX_TYPE=SQ4)
# Dataset paths
nq-open-data:
$(eval TRAIN_DATA=open-qa/nq-open/train_preprocessed.json)
$(eval DEV_DATA=open-qa/nq-open/dev_preprocessed.json)
$(eval TEST_DATA=open-qa/nq-open/test_preprocessed.json)
$(eval OPTIONS=--truecase)
all-open-data:
$(eval TEST_DATA=$(DATA_DIR)/open-qa/nq-open/test_preprocessed.json)
$(eval TEST_DATA=$(TEST_DATA),$(DATA_DIR)/open-qa/webq/WebQuestions-test_preprocessed.json)
$(eval TEST_DATA=$(TEST_DATA),$(DATA_DIR)/open-qa/trec/CuratedTrec-test_preprocessed.json)
$(eval TEST_DATA=$(TEST_DATA),$(DATA_DIR)/open-qa/triviaqa-unfiltered/test_preprocessed.json)
$(eval TEST_DATA=$(TEST_DATA),$(DATA_DIR)/open-qa/squad/test_preprocessed.json)
$(eval OPTIONS=--truecase)
# Query-side fine-tuning
train-query: dump-dir model-name nq-open-data large-index
python $(BASE_DIR)/train_query.py \
--run_mode train_query \
--cache_dir $(CACHE_DIR) \
--train_path $(DATA_DIR)/$(TRAIN_DATA) \
--dev_path $(DATA_DIR)/$(DEV_DATA) \
--test_path $(DATA_DIR)/$(TEST_DATA) \
--per_device_train_batch_size 12 \
--eval_batch_size 12 \
--learning_rate 3e-5 \
--num_train_epochs 5 \
--dump_dir $(DUMP_DIR) \
--index_name start/$(NUM_CLUSTERS)_flat_$(INDEX_TYPE) \
--load_dir $(LOAD_DIR_OR_PRETRAINED_HF_NAME) \
--output_dir $(SAVE_DIR)/$(MODEL_NAME) \
--top_k 100 \
--cuda \
--save_pred \
$(OPTIONS)