-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_test.sh
70 lines (57 loc) · 3.35 KB
/
run_test.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
################################################################################
#
# Copyright (c) 2019 Baidu.com, Inc. All Rights Reserved
#
################################################################################
# set gpu id to use
export CUDA_VISIBLE_DEVICES=0
# generalizes target_a/target_b of goal for all outputs, replaces them with slot mark
TOPIC_GENERALIZATION=1
# set python path according to your actual environment
pythonpath='python'
# the prefix of the file name used by the model, must be consistent with the configuration in network.py
prefix=demo
# put all data set that used and generated for testing under this folder: datapath
# for more details, please refer to the following data processing instructions
datapath=./data
# in test stage, you can eval dev.txt or test.txt
# the "dev.txt" and "test.txt" are the original data provided by the organizer and
# need to be placed in this folder: datapath/resource/
# the following preprocessing will generate the actual data needed for model testing
# after testing dev.txt, you can run eval.py to get the final eval score,
# because dev.txt is session data, you have all the utterances both of bot and user
# after testing test.txt, you can upload the predict to the competition website to get result
# DATA_TYPE = "dev" or "test"
datapart=dev
# ensure that each file is in the correct path
# 1. put the data provided by the organizers under this folder: datapath/resource/
# - the data provided consists of three parts: train.txt dev.txt test.txt
# - the train.txt and dev.txt are session data, the test.txt is sample data
# - in test stage, we just use the dev.txt or test.txt
# 2. the sample data extracted from session data is in this folder: datapath/resource/
# 3. the text file required by the model is in this folder: datapath
# 4. the topic file used to generalize data is in this directory: datapath
corpus_file=${datapath}/resource/${datapart}.txt
sample_file=${datapath}/resource/sample.${datapart}.txt
text_file=${datapath}/${prefix}.test
topic_file=${datapath}/${prefix}.test.topic
# step 1: if eval dev.txt, firstly have to convert session data to sample data
# if eval test.txt, we can use test.txt provided by the organizer directly.
if [ "${datapart}"x = "test"x ]; then
sample_file=${corpus_file}
else
${pythonpath} ./tools/convert_session_to_sample.py ${corpus_file} ${sample_file}
fi
# step 2: convert sample data to text data required by the model
${pythonpath} ./tools/convert_conversation_corpus_to_model_text.py ${sample_file} ${text_file} ${topic_file} ${TOPIC_GENERALIZATION}
# step 3: predict by model
${pythonpath} ./network.py --test --ckpt models/best.model --gen_file ./output/test.result --use_posterior False --gpu 0 > log.txt 2>&1
# step 4: replace slot mark generated during topic generalization with real text
${pythonpath} ./tools/topic_materialization.py ./output/test.result ./output/test.result.final ${topic_file}
# step 5: if you eval dev.txt, you can run the following command to get result
# if you eval test.txt, you can upload the ./output/test.result.final to the competition website to get result
if [ "${datapart}"x != "test"x ]; then
${pythonpath} ./tools/convert_result_for_eval.py ${sample_file} ./output/test.result.final ./output/test.result.eval
${pythonpath} ./tools/eval.py ./output/test.result.eval
fi