-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexperiment.py
62 lines (48 loc) · 2.16 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# -*- coding:utf-8 -*-
import os
import time
import logging
import numpy as np
import theano
from theano import tensor as T
from dataProcess.stream import preprocess
from agent import Agent
import config
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# theano.config.exception_verbosity = 'high'
theano.config.optimizer = 'fast_compile'
# config = getattr(config, 'get_config')()
# data_class = preprocess(*config['train_file'])
# data = {'facts': [], 'question': [], 'label': []}
# for facts, question, label in data_class.data_stream():
# data['facts'].append(facts)
# data['question'].append(question)
# data['label'].append(label)
if __name__ == '__main__':
config = getattr(config, 'get_config')()
# collect n_itr dataset epochs
n_itr = config['n_itr']
# collect n_eps episodes
n_eps = config['n_eps']
agent = Agent(**config)
data_class = preprocess(*config['train_file'])
for _ in xrange(n_itr):
cnt = 0
for facts, question, label in data_class.data_stream():
observations = []
actions = []
rewards = []
# there should fullfill an episode, takes in facts, questions , labels
# return answer results, and rewards. The episode including interaction
# with env is all done in f_train, which will intricically call reasoner.apply
rl_cost, sl_cost, decoder_cost = agent.f_train(facts[0].T, facts[1].T, question[0].T, question[1].T, label)
print 'the costs are: ',rl_cost,sl_cost,decoder_cost
cnt += 1
print 'cnt:',cnt
# facts[0].T.shape, facts[1].T.shape, question[0].T.shape, question[1].T.shape, label.shape layout: (10, 5) (10, 5) (13, 1) (13, 1) (1,)
# 实际上这是个不好的写法,我们应该尽量减少theano内部内容?比如与环境交互的部分移出去?
# this episode finishes, compute all cost, train backward
# for facts, question, label in data_class.data_stream():
# sums = agent.f_train(facts[0].T, facts[1].T, question[0].T, question[1].T, label)[1]
# TODO: 先把前传调通,一个一个看变量,再调反传