diff --git a/training_for_git/batch.lua b/training_for_git/batch.lua new file mode 100644 index 0000000..9c37596 --- /dev/null +++ b/training_for_git/batch.lua @@ -0,0 +1,104 @@ +require 'torch' -- torch +require 'optim' +require 'nn' -- provides a normalization operator +local train_file_path = 'train.th7' +local test_file_path = 'test.th7' +local train_data = torch.load(train_file_path) +local test_data = torch.load(test_file_path) +local Y = train_data[{{},{2,5}}] +local X = train_data[{{},{6,-1}}] +local test_labels = test_data[{{},{2,5}}] +local test_X = test_data[{{},{6,-1}}] +local batch_size = 30 +epochs = 3 + +model = nn.Sequential() -- define the container +ninputs = 350; noutputs = 4 ; nhiddens1 = 1024; nhiddens2 = 512; nhiddens3 = 256 +model:add(nn.Linear(ninputs,nhiddens1)) +model:add(nn.Sigmoid()) +model:add(nn.Linear(nhiddens1,nhiddens2)) +model:add(nn.Sigmoid()) +model:add(nn.Linear(nhiddens2,nhiddens3)) +model:add(nn.Sigmoid()) +model:add(nn.Linear(nhiddens3,noutputs)) +criterion = nn.AbsCriterion()--MSECriterion() +x, dl_dx = model:getParameters() +sgd_params = { + learningRate = 0.01, + learningRateDecay = 1e-08, + weightDecay = 0, + momentum = 0 +} + +function train(X,Y) + + current_loss = 0 + for batch = 1,(#train_data)[1], batch_size do + + local inputs = {} + local targets = {} + local x_start = batch + local x_end = math.min(batch + batch_size-1, (#train_data)[1]) + for i = x_start,x_end do + local target = Y[i] + local input = X[i] + table.insert(inputs, input) + table.insert(targets, target) + end + local feval = function(x_new) + if x ~= x_new then + x:copy(x_new) + end + dl_dx:zero() + local f=0 + for i = 1, #inputs do + local loss_x = criterion:forward(model:forward(inputs[i]), targets[i]) + model:backward(inputs[i], criterion:backward(model.output, targets[i])) + f = f+loss_x + end + return f/#inputs, dl_dx:div(#inputs) + end + _,fs = optim.adagrad(feval,x,sgd_params) + current_loss = current_loss + fs[1] + end + current_loss = current_loss/( (#train_data)[1]/batch_size) + print('train loss = ' .. current_loss) + return current_loss +end + +time = sys.clock() +local cumm_loss = 0. +for j = 1, epochs do + print(j) + cumm_loss = train( X, Y ) +print( 'Final loss = ' .. cumm_loss ) +if j%10 == 0 then +print('id approx text') +local loss1 = 0.0 +local loss2 = 0.0 +local loss3 = 0.0 +local loss4 = 0.0 +for i = 1,(#test_data)[1] do + local myPrediction = model:forward(test_X[i]) + loss1 = loss1+math.abs(myPrediction[1] - test_labels[i][1]) + loss2 = loss2+math.abs(myPrediction[2] - test_labels[i][2]) + loss3 = loss3+math.abs(myPrediction[3] - test_labels[i][3]) + loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4]) +end + +loss1 = loss1/(#test_data)[1] +loss2 = loss2/(#test_data)[1] +loss3 = loss3/(#test_data)[1] +loss4 = loss4/(#test_data)[1] +end +end + + +-- time taken +time = sys.clock() - time +print( "Time per epoch = " .. (time / epochs) .. '[s]') + + + +print(loss1,loss2,loss3,loss4) +torch.save('estimation_model.dat',model) diff --git a/training_for_git/gpu_rec.lua b/training_for_git/gpu_rec.lua new file mode 100644 index 0000000..4e6f270 --- /dev/null +++ b/training_for_git/gpu_rec.lua @@ -0,0 +1,130 @@ +require 'rnn' +require 'optim' + +batchSize = 30 +rho = 10 +hiddenSize = 512 +hiddenSize1 = 256 +inputSize = 400 +outputSize = 3 +epochs = 100 +xStart = 6 +yStart = 2 +yEnd = 4 + + +local train_file_path = 'recurrent_train.th7' +local train_data = torch.load(train_file_path) +local Y = train_data[{{},{yStart,yEnd}}] +local X = train_data[{{},{xStart,-1}}] +seriesSize = (#train_data)[1] +print(seriesSize) +local test_file_path = 'recurrent_test.th7' +local test_data = torch.load(test_file_path) +local test_labels = test_data[{{},{yStart,yEnd}}] +local test_X = test_data[{{},{xStart,-1}}] + +model = nn.Sequential() +model:add(nn.Sequencer(nn.FastLSTM(inputSize, hiddenSize, rho))) +model:add(nn.Sequencer(nn.FastLSTM(hiddenSize, hiddenSize1, rho))) +model:add(nn.Sequencer(nn.Linear(hiddenSize1, outputSize))) + +criterion = nn.SequencerCriterion(nn.AbsCriterion()) + +-- dummy dataset (task predict the next item) +--dataset = torch.randn(seriesSize, inputSize) + +-- define the index of the batch elements +offsets = {} +for i= 1, batchSize do + table.insert(offsets, i)--math.ceil(math.random() * batchSize)) +end +offsets = torch.LongTensor(offsets) + +function nextBatch() + local inputs, targets = {}, {} + for step = 1, rho do + --get a batch of inputs + table.insert(inputs, X:index(1, offsets)) + -- shift of one batch indexes + offsets:add(1) + for j=1,batchSize do + if offsets[j] > seriesSize then + offsets[j] = 1 + end + end + -- a batch of targets + table.insert(targets, Y[{{},{1,3}}]:index(1,offsets)) + end + return inputs, targets +end + +-- get weights and loss wrt weights from the model +x, dl_dx = model:getParameters() + +feval = function(x_new) + -- copy the weight if are changed + if x ~= x_new then + x:copy(x_new) + end + + -- select a training batch + local inputs, targets = nextBatch() + + -- reset gradients (gradients are always accumulated, to accommodate + -- batch methods) + dl_dx:zero() + + -- evaluate the loss function and its derivative wrt x, given mini batch + local prediction = model:forward(inputs) + local loss_x = criterion:forward(prediction, targets) + model:backward(inputs, criterion:backward(prediction, targets)) + + return loss_x, dl_dx +end + +sgd_params = { + learningRate = 0.01, + learningRateDecay = 1e-08, + weightDecay = 0, + momentum = 0 +} + +time = sys.clock() +for j = 1, epochs do + -- train a mini_batch of batchSize in parallel + _, fs = optim.adagrad(feval,x, sgd_params) + print('error for iteration ' .. sgd_params.evalCounter .. ' is ' .. fs[1]) + +end + + +print('id approx text') +local loss1 = 0.0 +local loss2 = 0.0 +local loss3 = 0.0 +local loss4 = 0.0 +for i = 1,(#test_data)[1], 1 do + local inputs = {} + for step = 1, 1 do + --get a batch of inputs + table.insert(inputs, test_X[i]) + end + local myPrediction = model:forward(inputs) + loss1 = loss1+math.abs(myPrediction[1][1] - test_labels[i][1]) + loss2 = loss2+math.abs(myPrediction[1][2] - test_labels[i][2]) + loss3 = loss3+math.abs(myPrediction[1][3] - test_labels[i][3]) + --loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4]) +end + +loss1 = loss1/(#test_data)[1] +loss2 = loss2/(#test_data)[1] +loss3 = loss3/(#test_data)[1] +--loss4 = loss4/(#test_data)[1] + +-- time taken +time = sys.clock() - time +print( "Time per epoch = " .. (time / epochs) .. '[s]') + +print(loss1,loss2,loss3,loss4) +torch.save('recurrent.dat',model) diff --git a/training_for_git/no_validation.lua b/training_for_git/no_validation.lua new file mode 100644 index 0000000..30a1ede --- /dev/null +++ b/training_for_git/no_validation.lua @@ -0,0 +1,129 @@ +require 'torch' -- torch +require 'optim' +require 'nn' -- provides a normalization operator +local train_file_path = 'train.th7' +local test_file_path = 'test.th7' +local train_data = torch.load(train_file_path) +local test_data = torch.load(test_file_path) +local train_labels = train_data[{{},{2,5}}] +local train_X = train_data[{{},{6,-1}}] +local test_labels = test_data[{{},{2,5}}] +local test_X = test_data[{{},{6,-1}}] +local batch_size = 30 +model = nn.Sequential() -- define the container +ninputs = 350; noutputs = 4 ; nhiddens1 = 1024; nhiddens2 = 512; nhiddens3 = 256 +--model:add(nn.Linear(ninputs, noutputs)) -- define the only module +model:add(nn.Linear(ninputs,nhiddens1)) +model:add(nn.Sigmoid()) +model:add(nn.Linear(nhiddens1,nhiddens2)) +model:add(nn.Sigmoid()) +model:add(nn.Linear(nhiddens2,nhiddens3)) +model:add(nn.Sigmoid()) +model:add(nn.Linear(nhiddens3,noutputs)) +criterion = nn.AbsCriterion()--MSECriterion() +x, dl_dx = model:getParameters() + +feval = function(x_new) + if x ~= x_new then + x:copy(x_new) + end + -- select a new training sample + _nidx_ = (_nidx_ or 0) + 1 + if _nidx_ > (#train_data)[1] then _nidx_ = 1 end + --local sample = data[_nidx_] + local target = train_labels[_nidx_] -- this funny looking syntax allows + local inputs = train_X[_nidx_] -- slicing of arrays. + -- reset gradients (gradients are always accumulated, to accommodate + -- batch methods) + dl_dx:zero() + -- evaluate the loss function and its derivative wrt x, for that sample + --print(inputs) + --print(target) + for i=1, 350 do + if type(inputs[i]) ~= 'number' then + print(i) + print(inputs[i]) + print(type(inputs[i])) end + end + --io.write("continue with this operation (y/n)?") + --answer=io.read() + local loss_x = criterion:forward(model:forward(inputs), target) + model:backward(inputs, criterion:backward(model.output, target)) + -- return loss(x) and dloss/dx + return loss_x, dl_dx +end +-- Given the function above, we can now easily train the model using SGD. +-- For that, we need to define four key parameters: +-- + a learning rate: the size of the step taken at each stochastic +-- estimate of the gradient +-- + a weight decay, to regularize the solution (L2 regularization) +-- + a momentum term, to average steps over time +-- + a learning rate decay, to let the algorithm converge more precisely +sgd_params = { + learningRate = 0.01, + learningRateDecay = 1e-08, + weightDecay = 0, + momentum = 0 +} +-- We're now good to go... all we have left to do is run over the dataset +-- for a certain number of iterations, and perform a stochastic update +-- at each iteration. The number of iterations is found empirically here, +-- but should typically be determinined using cross-validation. +-- we cycle 1e4 times over our training data +for i = 1,1 do + print(i) + -- this variable is used to estimate the average loss + current_loss = 0 + -- an epoch is a full loop over our training data + for i = 1,(#train_data)[1] do + -- optim contains several optimization algorithms. + -- All of these algorithms assume the same parameters: + -- + a closure that computes the loss, and its gradient wrt to x, + -- given a point x + -- + a point x + -- + some parameters, which are algorithm-specific + _,fs = optim.adagrad(feval,x,sgd_params) + -- Functions in optim all return two things: + -- + the new x, found by the optimization method (here SGD) + -- + the value of the loss functions at all points that were used by + -- the algorithm. SGD only estimates the function once, so + -- that list just contains one value. + current_loss = current_loss + fs[1] + end + -- report average error on epoch + current_loss = current_loss / (#train_data)[1] + print('train loss = ' .. current_loss) + +end +---------------------------------------------------------------------- +-- 5. Test the trained model. + +-- Now that the model is trained, one can test it by evaluating it +-- on new samples. + +-- The text solves the model exactly using matrix techniques and determines +-- that +-- corn = 31.98 + 0.65 * fertilizer + 1.11 * insecticides + +-- We compare our approximate results with the text's results. + +print('id approx text') +local loss1 = 0.0 +local loss2 = 0.0 +local loss3 = 0.0 +local loss4 = 0.0 +for i = 1,(#test_data)[1] do + local myPrediction = model:forward(test_X[i]) + loss1 = loss1+math.abs(myPrediction[1] - test_labels[i][1]) + loss2 = loss2+math.abs(myPrediction[2] - test_labels[i][2]) + loss3 = loss3+math.abs(myPrediction[3] - test_labels[i][3]) + loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4]) +end + +loss1 = loss1/(#test_data)[1] +loss2 = loss2/(#test_data)[1] +loss3 = loss3/(#test_data)[1] +loss4 = loss4/(#test_data)[1] + +print(loss1,loss2,loss3,loss4) +torch.save('save.dat',model) diff --git a/training_for_git/recurrent.lua b/training_for_git/recurrent.lua new file mode 100644 index 0000000..2033a31 --- /dev/null +++ b/training_for_git/recurrent.lua @@ -0,0 +1,109 @@ +require 'rnn' +require 'optim' + +function range(from, to, step) + step = step or 1 + return function(_, lastvalue) + local nextvalue = lastvalue + step + if step > 0 and nextvalue <= to or step < 0 and nextvalue >= to or + step == 0 + then + return nextvalue + end + end, nil, from - step +end + +local train_file_path = 'recurrent_train.th7' +local test_file_path = 'recurrent_test.th7' +local train_data = torch.load(train_file_path) +local test_data = torch.load(test_file_path) +local Y = train_data[{{},{2,5}}] +local X = train_data[{{},{6,-1}}] +local test_labels = test_data[{{},{2,5}}] +local test_X = test_data[{{},{6,-1}}] + +batchSize = 5 +rho = 10 +hiddenSize1 = 1024 +hiddenSize2 = 512 +hiddenSize3 = 256 +inputSize = 1 +outputSize = 1 +seriesSize = 100 + +model = nn.Sequential() +model:add(nn.Sequencer(nn.FastLSTM(inputSize, hiddenSize2, rho))) +model:add(nn.Sequencer(nn.FastLSTM(hiddenSize2, hiddenSize3, rho))) +--model:add(nn.Sequencer(nn.Linear(hiddenSize2, hiddenSize3, rho))) +--model:add(nn.Sequencer(nn.Sigmoid())) +model:add(nn.Sequencer(nn.Linear(hiddenSize3, outputSize))) + +criterion = nn.SequencerCriterion(nn.MSECriterion()) + +-- dummy dataset (task predict the next item) +--dataset = torch.randn(seriesSize, inputSize) + +-- define the index of the batch elements +offsets = {} +for i= 1, batchSize do + table.insert(offsets,i) +end +offsets = torch.LongTensor(offsets) +print(offsets) +function nextBatch() + local inputs, targets = {}, {} + for step = 1, rho do + --get a batch of inputs + table.insert(inputs, X:index(1, offsets)) + -- shift of one batch indexes + offsets:add(1) + for j=1,batchSize do + if offsets[j] > seriesSize then + offsets[j] = 1 + end + end + -- a batch of targets + table.insert(targets, Y:index(1,offsets)) + end + return inputs, targets +end + +-- get weights and loss wrt weights from the model +x, dl_dx = model:getParameters() + +feval = function(x_new) + -- copy the weight if are changed + if x ~= x_new then + x:copy(x_new) + end + + -- select a training batch + local inputs, targets = nextBatch() + + -- reset gradients (gradients are always accumulated, to accommodate + -- batch methods) + dl_dx:zero() + + -- evaluate the loss function and its derivative wrt x, given mini batch + local prediction = model:forward(inputs) + local loss_x = criterion:forward(prediction, targets) + model:backward(inputs, criterion:backward(prediction, targets)) + + return loss_x, dl_dx +end + +sgd_params = { + learningRate = 0.01, + learningRateDecay = 1e-08, + weightDecay = 0, + momentum = 0 +} + +for i = 1, 2 do + -- train a mini_batch of batchSize in parallel + _, fs = optim.adagrad(feval,x, sgd_params) + + if sgd_params.evalCounter % 100 == 0 then + print('error for iteration ' .. sgd_params.evalCounter .. ' is ' .. fs[1] / rho) + end +end diff --git a/training_for_git/recurrent_training.lua b/training_for_git/recurrent_training.lua new file mode 100644 index 0000000..27bd5f3 --- /dev/null +++ b/training_for_git/recurrent_training.lua @@ -0,0 +1,144 @@ +require 'rnn' +require 'optim' + +batchSize = 30 +rho = 20 +hiddenSize = 512 +hiddenSize1 = 256 +inputSize = 400 +outputSize = 4 +epochs = 10000 +xStart = 6 +yStart = 2 +yEnd = 5 + + +local train_file_path = 'recurrent_train.th7' +local train_data = torch.load(train_file_path) +local Y = train_data[{{},{yStart,yEnd}}] +local X = train_data[{{},{xStart,-1}}] +local place = train_data[{{},{1}}] +seriesSize = (#train_data)[1] +print(seriesSize) +local test_file_path = 'recurrent_test.th7' +local test_data = torch.load(test_file_path) +local test_labels = test_data[{{},{yStart,yEnd}}] +local test_X = test_data[{{},{xStart,-1}}] + +model = nn.Sequential() +model:add(nn.Sequencer(nn.FastLSTM(inputSize, hiddenSize, rho))) +model:add(nn.Sequencer(nn.FastLSTM(hiddenSize, hiddenSize1, rho))) +model:add(nn.Sequencer(nn.Linear(hiddenSize1, outputSize))) + +criterion = nn.SequencerCriterion(nn.AbsCriterion()) +--local method = 'xavier' +--local model_new = require('weight-init')(model, method) + +-- define the index of the batch elements +offsets = {} +function offset_(seed) +offsets = {} +math.randomseed(seed) +for i= 1, batchSize do + table.insert(offsets, math.ceil(math.random() * batchSize)) +end +offsets = torch.LongTensor(offsets) +end +function nextBatch() + local inputs, targets = {}, {} + local nums = {} + for step = 1, rho do + --get a batch of inputs + table.insert(inputs, X:index(1, offsets)) + -- shift of one batch indexes + offsets:add(1) + for j=1,batchSize do + if offsets[j] > seriesSize then + offsets[j] = 1 + end + end + -- a batch of targets + table.insert(targets, Y[{{},{1,4}}]:index(1,offsets)) + table.insert(nums,place:index(1,offsets)) + end + return inputs, targets +end + +-- get weights and loss wrt weights from the model +x, dl_dx = model:getParameters() + +feval = function(x_new) + -- copy the weight if are changed + if x ~= x_new then + x:copy(x_new) + end + + -- select a training batch + local inputs, targets = nextBatch() + + -- reset gradients (gradients are always accumulated, to accommodate + -- batch methods) + dl_dx:zero() + + -- evaluate the loss function and its derivative wrt x, given mini batch + local prediction = model:forward(inputs) + local loss_x = criterion:forward(prediction, targets) + model:backward(inputs, criterion:backward(prediction, targets)) + + return loss_x, dl_dx +end + +adagrad_params = { + learningRate = 0.01, + learningRateDecay = 1e-08, + weightDecay = 0, + momentum = 0 +} +seed = 1 +offset_(seed) +time = sys.clock() +for j = 1, epochs do + if j%1000 == 0 then + seed = seed + 1 + offset_(seed) + end + -- train a mini_batch of batchSize in parallel + _, fs = optim.adagrad(feval,x, adagrad_params) + print('error for iteration ' .. adagrad_params.evalCounter .. ' is ' .. fs[1]/rho) + +end + + +print('id approx text') +local loss1 = 0.0 +local loss2 = 0.0 +local loss3 = 0.0 +local loss4 = 0.0 +predict_batch = 100 +for i = 1,(#test_data)[1], predict_batch do + local inputs = {} + for step = 0, predict_batch-1 do + --get a batch of inputs + table.insert(inputs, test_X[i+step]) + end + local myPrediction = model:forward(inputs) + for step = 1, predict_batch do + loss1 = loss1+math.abs(myPrediction[step][1] - test_labels[i+step-1][1]) + loss2 = loss2+math.abs(myPrediction[step][2] - test_labels[i+step-1][2]) + loss3 = loss3+math.abs(myPrediction[step][3] - test_labels[i+step-1][3]) + loss4 = loss4+math.abs(myPrediction[4] - test_labels[i][4]) + end + +end + +loss1 = loss1/(#test_data)[1] +loss2 = loss2/(#test_data)[1] +loss3 = loss3/(#test_data)[1] +loss4 = loss4/(#test_data)[1] + +-- time taken +time = sys.clock() - time +print( "Time per epoch = " .. (time / epochs) .. '[s]') + +print(loss1,loss2,loss3,loss4) +torch.save('recurrent3.dat',model)