diff --git a/changelog.txt b/changelog.txt index 20ca12f..7ce9ec6 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,11 @@ +v106 + -Change mixer, so that there is minimum limit for update. + -Add more StationaryMap contexts to jpegModel + -Use larger update limit for some mixer context in jpegModel + -Update lstm to v3 +v105 + -Fix CD detection. + -Fix MRB detection. v104 -in textModel also use regular word in stemmer -in wordModel, textModel use upper char flag from wrt diff --git a/lstm1.inc b/lstm1.inc index 27fdf94..0f4a155 100644 --- a/lstm1.inc +++ b/lstm1.inc @@ -1,191 +1,289 @@ //original source https://github.com/byronknoll/lstm-compress - -#include -#include -#include +// v3 #include -#include #include #include -#include -#include + +float lstmerr=0.000021; + namespace LSTM { -class Layer { + +class Sigmoid { public: - Layer(unsigned int input_size, unsigned int auxiliary_input_size, + Sigmoid(int logit_size); + float Logit(float p) const; + static float Logistic(float p); + + private: + float SlowLogit(float p); + int logit_size_; + std::vector logit_table_; +}; +Sigmoid::Sigmoid(int logit_size) : logit_size_(logit_size), + logit_table_(logit_size, 0) { + for (int i = 0; i < logit_size_; ++i) { + logit_table_[i] = SlowLogit((i + 0.5) / logit_size_); + } +} + +float Sigmoid::Logit(float p) const { + int index = p * logit_size_; + if (index >= logit_size_) index = logit_size_ - 1; + else if (index < 0) index = 0; + return logit_table_[index]; +} + +float Sigmoid::Logistic(float p) { + return 1 / (1 + exp(-p)); +} + +float Sigmoid::SlowLogit(float p) { + return log(p / (1 - p)); +} + + + +struct NeuronLayer { + NeuronLayer(unsigned int input_size, unsigned int num_cells, int horizon, + int offset) : error_(num_cells), ivar_(horizon), gamma_(1.0, num_cells), + gamma_u_(num_cells), gamma_m_(num_cells), gamma_v_(num_cells), + beta_(num_cells), beta_u_(num_cells), beta_m_(num_cells), + beta_v_(num_cells), weights_(std::valarray(input_size), num_cells), + state_(std::valarray(num_cells), horizon), + update_(std::valarray(input_size), num_cells), + m_(std::valarray(input_size), num_cells), + v_(std::valarray(input_size), num_cells), + transpose_(std::valarray(num_cells), input_size - offset), + norm_(std::valarray(num_cells), horizon) {}; + + std::valarray error_, ivar_, gamma_, gamma_u_, gamma_m_, gamma_v_, + beta_, beta_u_, beta_m_, beta_v_; + std::valarray> weights_, state_, update_, m_, v_, + transpose_, norm_; +}; + +class LstmLayer { + public: + LstmLayer(unsigned int input_size, unsigned int auxiliary_input_size, unsigned int output_size, unsigned int num_cells, int horizon, - float learning_rate); - const std::valarray& ForwardPass(const std::valarray& input); - const std::valarray& BackwardPass(const std::valarray& input, - const std::valarray& hidden_error, int epoch); + float gradient_clip, float learning_rate); + void ForwardPass(const std::valarray& input, int input_symbol, + std::valarray* hidden, int hidden_start); + void BackwardPass(const std::valarray& input, int epoch, + int layer, int input_symbol, std::valarray* hidden_error); static inline float Rand() { return static_cast (rand()) / static_cast (RAND_MAX); } - static inline float Logistic(float val) { return 1 / (1 + exp(-val)); } + std::vector>*> Weights(); private: - std::valarray state_, hidden_, hidden_error_, output_gate_error_, - state_error_, input_node_error_, input_gate_error_, forget_gate_error_, - stored_error_; - std::valarray> tanh_state_, output_gate_state_, - input_node_state_, input_gate_state_, forget_gate_state_, last_state_, - forget_gate_, input_node_, input_gate_, output_gate_, forget_gate_update_, - input_node_update_, input_gate_update_, output_gate_update_; - float learning_rate_; + std::valarray state_, state_error_, stored_error_; + std::valarray> tanh_state_, input_gate_state_, + last_state_; + float gradient_clip_, learning_rate_; unsigned int num_cells_, epoch_, horizon_, input_size_, output_size_; -}; + unsigned long long update_steps_ = 0; + NeuronLayer forget_gate_, input_node_, output_gate_; -Layer::Layer(unsigned int input_size, unsigned int auxiliary_input_size, + void ClipGradients(std::valarray* arr); + void ForwardPass(NeuronLayer& neurons, const std::valarray& input, + int input_symbol); + void BackwardPass(NeuronLayer& neurons, const std::valarray&input, + int epoch, int layer, int input_symbol, + std::valarray* hidden_error); + void Adam(std::valarray* g, std::valarray* m, + std::valarray* v, std::valarray* w, float learning_rate, + float t) { + float beta1 = 0.025, beta2 = 0.9999, alpha = learning_rate * 0.1 / + sqrt(5e-5 * t + 1), eps = 1e-6; + (*m) *= beta1; + (*m) += (1 - beta1) * (*g); + (*v) *= beta2; + (*v) += (1 - beta2) * (*g) * (*g); + (*w) -= alpha * (((*m) / (float)(1 - pow(beta1, t))) / + (sqrt((*v) / (float)(1 - pow(beta2, t)) + eps))); +} +}; +LstmLayer::LstmLayer(unsigned int input_size, unsigned int auxiliary_input_size, unsigned int output_size, unsigned int num_cells, int horizon, - float learning_rate) : state_(num_cells), hidden_(num_cells), - hidden_error_(num_cells), output_gate_error_(num_cells), - state_error_(num_cells), input_node_error_(num_cells), - input_gate_error_(num_cells), forget_gate_error_(num_cells), - stored_error_(num_cells), + float gradient_clip, float learning_rate) : + state_(num_cells), state_error_(num_cells), stored_error_(num_cells), tanh_state_(std::valarray(num_cells), horizon), - output_gate_state_(std::valarray(num_cells), horizon), - input_node_state_(std::valarray(num_cells), horizon), input_gate_state_(std::valarray(num_cells), horizon), - forget_gate_state_(std::valarray(num_cells), horizon), last_state_(std::valarray(num_cells), horizon), - forget_gate_(std::valarray(input_size), num_cells), - input_node_(std::valarray(input_size), num_cells), - input_gate_(std::valarray(input_size), num_cells), - output_gate_(std::valarray(input_size), num_cells), - forget_gate_update_(std::valarray(input_size), num_cells), - input_node_update_(std::valarray(input_size), num_cells), - input_gate_update_(std::valarray(input_size), num_cells), - output_gate_update_(std::valarray(input_size), num_cells), - learning_rate_(learning_rate), num_cells_(num_cells), epoch_(0), - horizon_(horizon), input_size_(auxiliary_input_size), - output_size_(output_size) { + gradient_clip_(gradient_clip), learning_rate_(learning_rate), + num_cells_(num_cells), epoch_(0), horizon_(horizon), + input_size_(auxiliary_input_size), output_size_(output_size), + forget_gate_(input_size, num_cells, horizon, output_size_ + input_size_), + input_node_(input_size, num_cells, horizon, output_size_ + input_size_), + output_gate_(input_size, num_cells, horizon, output_size_ + input_size_) { float low = -0.2; float range = 0.4; - for (unsigned int i = 0; i < forget_gate_.size(); ++i) { - for (unsigned int j = 0; j < forget_gate_[i].size(); ++j) { - forget_gate_[i][j] = low + Rand() * range; - input_node_[i][j] = low + Rand() * range; - input_gate_[i][j] = low + Rand() * range; - output_gate_[i][j] = low + Rand() * range; + for (unsigned int i = 0; i < num_cells_; ++i) { + for (unsigned int j = 0; j < forget_gate_.weights_[i].size(); ++j) { + forget_gate_.weights_[i][j] = low + Rand() * range; + input_node_.weights_[i][j] = low + Rand() * range; + output_gate_.weights_[i][j] = low + Rand() * range; } - forget_gate_[i][forget_gate_[i].size() - 1] = 1; + forget_gate_.weights_[i][forget_gate_.weights_[i].size() - 1] = 1; } } -const std::valarray& Layer::ForwardPass(const std::valarray& - input) { +void LstmLayer::ForwardPass(const std::valarray& input, int input_symbol, + std::valarray* hidden, int hidden_start) { last_state_[epoch_] = state_; - for (unsigned int i = 0; i < state_.size(); ++i) { - forget_gate_state_[epoch_][i] = Logistic(std::inner_product(&input[0], - &input[input.size()], &forget_gate_[i][0], 0.0)); - state_[i] *= forget_gate_state_[epoch_][i]; - input_node_state_[epoch_][i] = tanh(std::inner_product(&input[0], - &input[input.size()], &input_node_[i][0], 0.0)); - input_gate_state_[epoch_][i] = Logistic(std::inner_product(&input[0], - &input[input.size()], &input_gate_[i][0], 0.0)); - state_[i] += input_node_state_[epoch_][i] * input_gate_state_[epoch_][i]; - tanh_state_[epoch_][i] = tanh(state_[i]); - output_gate_state_[epoch_][i] = Logistic(std::inner_product(&input[0], - &input[input.size()], &output_gate_[i][0], 0.0)); - hidden_[i] = output_gate_state_[epoch_][i] * tanh_state_[epoch_][i]; + ForwardPass(forget_gate_, input, input_symbol); + ForwardPass(input_node_, input, input_symbol); + ForwardPass(output_gate_, input, input_symbol); + for (unsigned int i = 0; i < num_cells_; ++i) { + forget_gate_.state_[epoch_][i] = Sigmoid::Logistic( + forget_gate_.state_[epoch_][i]); + input_node_.state_[epoch_][i] = tanh(input_node_.state_[epoch_][i]); + output_gate_.state_[epoch_][i] = Sigmoid::Logistic( + output_gate_.state_[epoch_][i]); } + input_gate_state_[epoch_] = 1.0f - forget_gate_.state_[epoch_]; + state_ *= forget_gate_.state_[epoch_]; + state_ += input_node_.state_[epoch_] * input_gate_state_[epoch_]; + tanh_state_[epoch_] = tanh(state_); + std::slice slice = std::slice(hidden_start, num_cells_, 1); + (*hidden)[slice] = output_gate_.state_[epoch_] * tanh_state_[epoch_]; ++epoch_; if (epoch_ == horizon_) epoch_ = 0; - return hidden_; } -void ClipGradients(std::valarray* arr) { +void LstmLayer::ForwardPass(NeuronLayer& neurons, + const std::valarray& input, int input_symbol) { + for (unsigned int i = 0; i < num_cells_; ++i) { + float f = neurons.weights_[i][input_symbol]; + for (unsigned int j = 0; j < input.size(); ++j) { + f += input[j] * neurons.weights_[i][output_size_ + j]; + } + neurons.norm_[epoch_][i] = f; + } + neurons.ivar_[epoch_] = 1.0 / sqrt(((neurons.norm_[epoch_] * + neurons.norm_[epoch_]).sum() / num_cells_) + 1e-5); + neurons.norm_[epoch_] *= neurons.ivar_[epoch_]; + neurons.state_[epoch_] = neurons.norm_[epoch_] * neurons.gamma_ + + neurons.beta_; +} + +void LstmLayer::ClipGradients(std::valarray* arr) { for (unsigned int i = 0; i < arr->size(); ++i) { - if ((*arr)[i] < -2) (*arr)[i] = -2; - else if ((*arr)[i] > 2) (*arr)[i] = 2; + if ((*arr)[i] < -gradient_clip_) (*arr)[i] = -gradient_clip_; + else if ((*arr)[i] > gradient_clip_) (*arr)[i] = gradient_clip_; } } -const std::valarray& Layer::BackwardPass(const std::valarray& - input, const std::valarray& hidden_error, int epoch) { +void LstmLayer::BackwardPass(const std::valarray&input, int epoch, + int layer, int input_symbol, std::valarray* hidden_error) { if (epoch == (int)horizon_ - 1) { - stored_error_ = hidden_error; + stored_error_ = *hidden_error; state_error_ = 0; - for (unsigned int i = 0; i < input_node_.size(); ++i) { - forget_gate_update_[i] = 0; - input_node_update_[i] = 0; - input_gate_update_[i] = 0; - output_gate_update_[i] = 0; - } } else { - stored_error_ += hidden_error; + stored_error_ += *hidden_error; } - output_gate_error_ = tanh_state_[epoch] * stored_error_ * - output_gate_state_[epoch] * (1.0f - output_gate_state_[epoch]); - state_error_ += stored_error_ * output_gate_state_[epoch] * (1.0f - + output_gate_.error_ = tanh_state_[epoch] * stored_error_ * + output_gate_.state_[epoch] * (1.0f - output_gate_.state_[epoch]); + state_error_ += stored_error_ * output_gate_.state_[epoch] * (1.0f - (tanh_state_[epoch] * tanh_state_[epoch])); - input_node_error_ = state_error_ * input_gate_state_[epoch] * (1.0f - - (input_node_state_[epoch] * input_node_state_[epoch])); - input_gate_error_ = state_error_ * input_node_state_[epoch] * - input_gate_state_[epoch] * (1.0f - input_gate_state_[epoch]); - forget_gate_error_ = state_error_ * last_state_[epoch] * - forget_gate_state_[epoch] * (1.0f - forget_gate_state_[epoch]); - - hidden_error_ = 0; - if (input.size() > output_size_ + 1 + num_cells_ + input_size_) { - int offset = output_size_ + num_cells_ + input_size_; - for (unsigned int i = 0; i < input_node_.size(); ++i) { - for (unsigned int j = offset; j < input.size() - 1; ++j) { - hidden_error_[j-offset] += input_node_[i][j] * input_node_error_[i]; - hidden_error_[j-offset] += input_gate_[i][j] * input_gate_error_[i]; - hidden_error_[j-offset] += forget_gate_[i][j] * forget_gate_error_[i]; - hidden_error_[j-offset] += output_gate_[i][j] * output_gate_error_[i]; - } - } - } + input_node_.error_ = state_error_ * input_gate_state_[epoch] * (1.0f - + (input_node_.state_[epoch] * input_node_.state_[epoch])); + forget_gate_.error_ = (last_state_[epoch] - input_node_.state_[epoch]) * + state_error_ * forget_gate_.state_[epoch] * input_gate_state_[epoch]; + *hidden_error = 0; if (epoch > 0) { - state_error_ *= forget_gate_state_[epoch]; + state_error_ *= forget_gate_.state_[epoch]; stored_error_ = 0; - for (unsigned int i = 0; i < input_node_.size(); ++i) { - int offset = output_size_ + input_size_; - for (unsigned int j = offset; j < offset + num_cells_; ++j) { - stored_error_[j-offset] += input_node_[i][j] * input_node_error_[i]; - stored_error_[j-offset] += input_gate_[i][j] * input_gate_error_[i]; - stored_error_[j-offset] += forget_gate_[i][j] * forget_gate_error_[i]; - stored_error_[j-offset] += output_gate_[i][j] * output_gate_error_[i]; - } - } + } else { + ++update_steps_; } + BackwardPass(forget_gate_, input, epoch, layer, input_symbol, hidden_error); + BackwardPass(input_node_, input, epoch, layer, input_symbol, hidden_error); + BackwardPass(output_gate_, input, epoch, layer, input_symbol, hidden_error); + ClipGradients(&state_error_); ClipGradients(&stored_error_); - ClipGradients(&hidden_error_); + ClipGradients(hidden_error); +} - for (unsigned int i = 0; i < input_node_.size(); ++i) { - forget_gate_update_[i] += (learning_rate_ * forget_gate_error_[i]) * input; - input_node_update_[i] += (learning_rate_ * input_node_error_[i]) * input; - input_gate_update_[i] += (learning_rate_ * input_gate_error_[i]) * input; - output_gate_update_[i] += (learning_rate_ * output_gate_error_[i]) * input; +void LstmLayer::BackwardPass(NeuronLayer& neurons, + const std::valarray&input, int epoch, int layer, int input_symbol, + std::valarray* hidden_error) { + if (epoch == (int)horizon_ - 1) { + neurons.gamma_u_ = 0; + neurons.beta_u_ = 0; + for (unsigned int i = 0; i < num_cells_; ++i) { + neurons.update_[i] = 0; + int offset = output_size_ + input_size_; + for (unsigned int j = 0; j < neurons.transpose_.size(); ++j) { + neurons.transpose_[j][i] = neurons.weights_[i][j + offset]; + } + } + } + neurons.beta_u_ += neurons.error_; + neurons.gamma_u_ += neurons.error_ * neurons.norm_[epoch]; + neurons.error_ *= neurons.gamma_ * neurons.ivar_[epoch]; + neurons.error_ -= ((neurons.error_ * neurons.norm_[epoch]).sum() / + num_cells_) * neurons.norm_[epoch]; + if (layer > 0) { + for (unsigned int i = 0; i < num_cells_; ++i) { + float f = 0; + for (unsigned int j = 0; j < num_cells_; ++j) { + f += neurons.error_[j] * neurons.transpose_[num_cells_ + i][j]; + } + (*hidden_error)[i] += f; + } + } + if (epoch > 0) { + for (unsigned int i = 0; i < num_cells_; ++i) { + float f = 0; + for (unsigned int j = 0; j < num_cells_; ++j) { + f += neurons.error_[j] * neurons.transpose_[i][j]; + } + stored_error_[i] += f; + } + } + std::slice slice = std::slice(output_size_, input.size(), 1); + for (unsigned int i = 0; i < num_cells_; ++i) { + neurons.update_[i][slice] += neurons.error_[i] * input; + neurons.update_[i][input_symbol] += neurons.error_[i]; } if (epoch == 0) { - for (unsigned int i = 0; i < input_node_.size(); ++i) { - forget_gate_[i] += forget_gate_update_[i]; - input_node_[i] += input_node_update_[i]; - input_gate_[i] += input_gate_update_[i]; - output_gate_[i] += output_gate_update_[i]; + for (unsigned int i = 0; i < num_cells_; ++i) { + Adam(&neurons.update_[i], &neurons.m_[i], &neurons.v_[i], + &neurons.weights_[i], learning_rate_, update_steps_); } + Adam(&neurons.gamma_u_, &neurons.gamma_m_, &neurons.gamma_v_, + &neurons.gamma_, learning_rate_, update_steps_); + Adam(&neurons.beta_u_, &neurons.beta_m_, &neurons.beta_v_, + &neurons.beta_, learning_rate_, update_steps_); } - return hidden_error_; } - +std::vector>*> LstmLayer::Weights() { + std::vector>*> weights; + weights.push_back(&forget_gate_.weights_); + weights.push_back(&input_node_.weights_); + weights.push_back(&output_gate_.weights_); + return weights; +} + + class Lstm { public: Lstm(unsigned int input_size, unsigned int output_size, unsigned int - num_cells, unsigned int num_layers, int horizon, float learning_rate); + num_cells, unsigned int num_layers, int horizon, float learning_rate, + float gradient_clip); std::valarray& Perceive(unsigned int input); std::valarray& Predict(unsigned int input); - void SetInput(int index, float val); -int ep(); + int ep(); + private: - std::vector> layers_; + std::vector> layers_; std::vector input_history_; std::valarray hidden_, hidden_error_; std::valarray>> layer_input_, @@ -194,12 +292,13 @@ int ep(); float learning_rate_; unsigned int num_cells_, epoch_, horizon_, input_size_, output_size_; }; + Lstm::Lstm(unsigned int input_size, unsigned int output_size, unsigned int - num_cells, unsigned int num_layers, int horizon, float learning_rate) : - input_history_(horizon), hidden_(num_cells * num_layers + 1), - hidden_error_(num_cells), + num_cells, unsigned int num_layers, int horizon, float learning_rate, + float gradient_clip) : input_history_(horizon), + hidden_(num_cells * num_layers + 1), hidden_error_(num_cells), layer_input_(std::valarray>(std::valarray - (input_size + output_size + 1 + num_cells * 2), num_layers), horizon), + (input_size + 1 + num_cells * 2), num_layers), horizon), output_layer_(std::valarray>(std::valarray (num_cells * num_layers + 1), output_size), horizon), output_(std::valarray(1.0 / output_size, output_size), horizon), @@ -207,26 +306,23 @@ Lstm::Lstm(unsigned int input_size, unsigned int output_size, unsigned int horizon_(horizon), input_size_(input_size), output_size_(output_size) { hidden_[hidden_.size() - 1] = 1; for (int epoch = 0; epoch < horizon; ++epoch) { - layer_input_[epoch][0].resize(output_size + 1 + num_cells + input_size); + layer_input_[epoch][0].resize(1 + num_cells + input_size); for (unsigned int i = 0; i < num_layers; ++i) { layer_input_[epoch][i][layer_input_[epoch][i].size() - 1] = 1; } } for (unsigned int i = 0; i < num_layers; ++i) { - layers_.push_back(std::unique_ptr(new Layer(layer_input_[0][i]. - size(), input_size_, output_size_, num_cells, horizon, learning_rate))); + layers_.push_back(std::unique_ptr(new LstmLayer( + layer_input_[0][i].size() + output_size, input_size_, output_size_, + num_cells, horizon, gradient_clip, learning_rate))); } } -void Lstm::SetInput(int index, float val) { - for (unsigned int i = 0; i < layers_.size(); ++i) { - layer_input_[epoch_][i][output_size_ + index] = val; - } -} std::valarray& Lstm::Perceive(unsigned int input) { int last_epoch = epoch_ - 1; if (last_epoch == -1) last_epoch = horizon_ - 1; + int old_input = input_history_[last_epoch]; input_history_[last_epoch] = input; if (epoch_ == 0) { for (int epoch = horizon_ - 1; epoch >= 0; --epoch) { @@ -234,57 +330,62 @@ std::valarray& Lstm::Perceive(unsigned int input) { int offset = layer * num_cells_; for (unsigned int i = 0; i < output_size_; ++i) { float error = 0; - if (i == input_history_[epoch]) error = (1 - output_[epoch][i]); - else error = -output_[epoch][i]; + if (i == input_history_[epoch]) error = output_[epoch][i] - 1; + else error = output_[epoch][i]; + if(error<-lstmerr || error>lstmerr){ for (unsigned int j = 0; j < hidden_error_.size(); ++j) { hidden_error_[j] += output_layer_[epoch][i][j + offset] * error; } + } } - hidden_error_ = layers_[layer]->BackwardPass(layer_input_[epoch][layer], - hidden_error_, epoch); + int prev_epoch = epoch - 1; + if (prev_epoch == -1) prev_epoch = horizon_ - 1; + int input_symbol = input_history_[prev_epoch]; + if (epoch == 0) input_symbol = old_input; + layers_[layer]->BackwardPass(layer_input_[epoch][layer], epoch, layer, + input_symbol, &hidden_error_); } } } - output_layer_[epoch_] = output_layer_[last_epoch]; for (unsigned int i = 0; i < output_size_; ++i) { float error = 0; - if (i == input) error = (1 - output_[last_epoch][i]); - else error = -output_[last_epoch][i]; - output_layer_[epoch_][i] += learning_rate_ * error * hidden_; + if (i == input) error = output_[last_epoch][i] - 1; + else error = output_[last_epoch][i]; + output_layer_[epoch_][i] = output_layer_[last_epoch][i]; + output_layer_[epoch_][i] -= learning_rate_ * error * hidden_; } return Predict(input); } std::valarray& Lstm::Predict(unsigned int input) { for (unsigned int i = 0; i < layers_.size(); ++i) { - std::fill_n(begin(layer_input_[epoch_][i]), output_size_, 0); - layer_input_[epoch_][i][input] = 1; auto start = begin(hidden_) + i * num_cells_; std::copy(start, start + num_cells_, begin(layer_input_[epoch_][i]) + - output_size_ + input_size_); - const auto& hidden = layers_[i]->ForwardPass(layer_input_[epoch_][i]); - std::copy(begin(hidden), end(hidden), start); + input_size_); + layers_[i]->ForwardPass(layer_input_[epoch_][i], input, &hidden_, i * + num_cells_); if (i < layers_.size() - 1) { - start = begin(layer_input_[epoch_][i + 1]) + output_size_ + num_cells_ + + auto start2 = begin(layer_input_[epoch_][i + 1]) + num_cells_ + input_size_; - std::copy(begin(hidden), end(hidden), start); + std::copy(start, start + num_cells_, start2); } } for (unsigned int i = 0; i < output_size_; ++i) { - output_[epoch_][i] = exp(std::inner_product(&hidden_[0], - &hidden_[hidden_.size()], &output_layer_[epoch_][i][0], 0.0)); - } - double sum = 0; - for (unsigned int i = 0; i < output_size_; ++i) { - sum += output_[epoch_][i]; + float sum = 0; + for (unsigned int j = 0; j < hidden_.size(); ++j) { + sum += hidden_[j] * output_layer_[epoch_][i][j]; + } + output_[epoch_][i] = exp(sum); } - output_[epoch_] /= sum; + output_[epoch_] /= output_[epoch_].sum(); int epoch = epoch_; ++epoch_; if (epoch_ == horizon_) epoch_ = 0; return output_[epoch]; } + + int Lstm::ep() { return epoch_; } @@ -308,7 +409,7 @@ int expected(); ByteModel::ByteModel(unsigned int num_cells, unsigned int num_layers, int horizon, float learning_rate) : top_(255), mid_(0), bot_(0), probs_(1.0 / 256, 256), bit_context_(1),ex(0), lstm_(0, 256, num_cells, - num_layers, horizon, learning_rate) {} + num_layers, horizon, learning_rate,2) {} unsigned int ByteModel::Discretize(float p) { return 1 + 4094 * p; } @@ -349,4 +450,4 @@ void ByteModel::Perceive(int bit) { } int ByteModel::epoch() {return lstm_.ep();} int ByteModel::expected() {return ex;} -} \ No newline at end of file +} diff --git a/paq8pxd.cpp b/paq8pxd.cpp index f3037ec..201d6c5 100644 --- a/paq8pxd.cpp +++ b/paq8pxd.cpp @@ -547,7 +547,7 @@ which computes 8 elements at a time, is not any faster). */ -#define PROGNAME "paq8pxd105" // Please change this if you change the program. +#define PROGNAME "paq8pxd106" // Please change this if you change the program. #define SIMD_GET_SSE //uncomment to use SSE2 in ContexMap //#define MT //uncomment for multithreading, compression only. Handled by CMake and gcc when -DMT is passed. #define SIMD_CM_R // SIMD ContextMap byterun @@ -1923,7 +1923,8 @@ class Mixer { Array tx; // N inputs from add() Array wx; // N*M weights Array cxt; // S contexts - Array mrate; // S contexts + Array mrate; // S contexts + Array merr; int ncxt; // number of contexts (0 to S) int base; // offset of next context public: @@ -2078,6 +2079,7 @@ void train(short *t, short *w, int n, int err) { if (ncxt>1) err=((x.y<<12)-pr[i])*mrate[i]/2; else err=((x.y<<12)-pr[i])*lrate/lshift; assert(err>=-32768 && err<32768); + if(err>=-merr[i] && err<=merr[i]) err=0; train(&tx[0], &wx[cxt[i]*N], nx, err); } reset(); @@ -2141,13 +2143,14 @@ void train(short *t, short *w, int n, int err) { tx[nx++]=p; } // Set a context (call S times, sum of ranges <= M) - void set(int cx, int range,int mr=14) { + void set(int cx, int range,int mr=14,int me=7*2) { assert(range>=0); assert(ncxt=0); assert(cx1?7:g),lshift(S>1?1:h){ assert(n>0 && N>0 && (N&15)==0 && M>0); int i; for (i=0; i adv_pred,adv_pred1,adv_pred2, run_pred, sumu, sumv ; + Array adv_pred,adv_pred1,adv_pred2,adv_pred3, run_pred, sumu, sumv ; Array ls; // block -> distance to previous block Array lcp, zpos; Array blockW, blockN,/* nBlocks,*/ SamplingFactors; @@ -9802,11 +9806,12 @@ class jpegModelx: public Model { U32 skip; StateMap smx; U32 jmiss,zux,ccount,lma,ama; - StationaryMap Map1[34-5+6+1+1+1+1+1+1+3+1+2+1+2+2+3+3] = { + StationaryMap Map1[66] = { {16,3},{16,3},{16,3},{16,3}, {16,3},{16,3},{16,3}, {15,3},{15,3},{15,3},{15,3},{15,3}, {15,3},{15,3},{15,3},{15,3},{15,3} ,{15,3},{15,3},{15,3} ,{15,3} ,{15,3},{15,3} ,{15,3},{15,3}, {15,3},{15,3},{15,3},{14,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3}, - {15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3} + {15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3}, + {15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3},{15,3} }; public: @@ -9814,16 +9819,16 @@ class jpegModelx: public Model { lastPos(0), jpeg(0),app(0),sof(0),sos(0),data(0),ht(8),htsize(0),huffcode(0), huffbits(0),huffsize(0),rs(-1), mcupos(0), huf(128), mcusize(0),linesize(0), hbuf(2048),color(10), pred(4), dc(0),width(0), row(0),column(0),cbuf(0x20000), - cpos(0), rs1(0), ssum(0), ssum1(0), ssum2(0), ssum3(0),cbuf2(0x20000),adv_pred(4),adv_pred1(3),adv_pred2(3), run_pred(6), + cpos(0), rs1(0), ssum(0), ssum1(0), ssum2(0), ssum3(0),cbuf2(0x20000),adv_pred(4),adv_pred1(3),adv_pred2(3),adv_pred3(3), run_pred(6), sumu(8), sumv(8), ls(10),lcp(7), zpos(64), blockW(10), blockN(10), SamplingFactors(4),dqt_state(-1),dqt_end(0),qnum(0),pr0(0), - qtab(256),qmap(10),N(41),M(58),cxt(N),m1(32+32+3+4+1+1+1+1+N*3+1+3*M+6,2050+3+1024+64+1024 +256+16+64,bd, 3+1+1+1+1+1,0,3,2), + qtab(256),qmap(10),N(41),M(66),cxt(N),m1(32+32+3+4+1+1+1+1+N*3+1+3*M+6,2050+3+1024+64+1024 +256+16+64,bd, 8,0,3,2), apm{{0x40000,20-4},{0x40000,20-4},{0x20000,20-4}, {0x20000,20-4},{0x20000,20-4},{0x20000,20-4},{0x20000,20-4},{0x20000,27},{0x20000,20-4}}, x(bd),buf(bd.buf),MJPEGMap( {21, 3, 128, 127}), hbcount(2),prev_coef(0),prev_coef2(0), prev_coef_rs(0), rstpos(0),rstlen(0), hmap(level>10?0x8000000:(CMlimit(MEM()*2)),9,N,bd),skip(0), smx(256*256),jmiss(0),zux(0),ccount(1),lma(0),ama(0) { } - int inputs() {return 7+3*N+1+1+2+2+2+2+2+2+M+2+1;} + int inputs() {return 2*N+24+M;} int nets() {return 9 + 1025 + 1024 + 512 + 4096 + 64 + 4096 + 1024;} int netcount() {return 8;} int p(Mixer& m,int val1=0,int val2=0){ @@ -10209,6 +10214,7 @@ class jpegModelx: public Model { p=(p<0?-1:+1)*ilog(abs(p)+1); if (st==1) adv_pred1[i]=p; if (st==2) adv_pred2[i]=p; + if (st==3) adv_pred3[i]=p; if (st==0) { adv_pred[i]=p; } @@ -10326,9 +10332,9 @@ class jpegModelx: public Model { cxt[0]=hash(++n, coef, adv_pred[2]/12+(run_pred[2]<<8), ssum2>>6, prev_coef/72); cxt[1]=hash(++n, coef, adv_pred[0]/12+(run_pred[0]<<8), ssum2>>6, prev_coef/72); cxt[2]=hash(++n, coef, adv_pred[1]/11+(run_pred[1]<<8), ssum2>>6); - cxt[3]=hash(++n, rs1, adv_pred[2]/7,adv_pred1[2]/11, adv_pred2[2]/11,run_pred[5]/2, prev_coef/10); - cxt[4]=hash(++n, rs1, adv_pred[0]/7, adv_pred1[0]/11,adv_pred2[0]/11,run_pred[3]/2, prev_coef/10); - cxt[5]=hash(++n, rs1, adv_pred[1]/11,adv_pred1[1]/11,adv_pred2[1]/11, run_pred[4]); + cxt[3]=hash(++n, rs1, adv_pred[2]/7,adv_pred1[2]/11, adv_pred2[2]/11, adv_pred3[2]/11,run_pred[5]/2, prev_coef/10); + cxt[4]=hash(++n, rs1, adv_pred[0]/7, adv_pred1[0]/11,adv_pred2[0]/11,adv_pred3[0]/11,run_pred[3]/2, prev_coef/10); + cxt[5]=hash(++n, rs1, adv_pred[1]/11,adv_pred1[1]/11,adv_pred2[1]/11,adv_pred3[1]/11, run_pred[4]); cxt[6]=hash(++n, adv_pred[2]/14, run_pred[2], adv_pred[0]/14, run_pred[0]); cxt[7]=hash(++n, cbuf[cpos-blockN[mcupos>>6]]>>4, adv_pred[3]/17, run_pred[1], run_pred[5]); cxt[8]=hash(++n, cbuf[cpos-blockW[mcupos>>6]]>>4, adv_pred[3]/17, run_pred[1], run_pred[3]); @@ -10381,7 +10387,6 @@ if (slow==true) x.count=0; m1.add((p-2048)>>3); m1.add(p=stretch(p)); m.add(p>>(1+hmap.siy())); - m.add((p>>2)*(n1-n0)); int p0=4095-p1; m.add((((p1&n0)-(p0&n1))*1)/(4*4)); m1.add((((p1&n1)-(p0&n0))*1)/(4*4)); @@ -10395,7 +10400,6 @@ if (slow==true) x.count=0; m1.add((p-2048)>>3); m1.add(p=stretch(p)); m.add(p>>(1+hmap.siy())); - m.add((p>>2)*(n1-n0)); int p0=4095-p1; m.add((((p1&n0)-(p0&n1))*1)/(4*4)); m1.add((((p1&n1)-(p0&n0))*1)/(4*4)); @@ -10409,7 +10413,6 @@ if (slow==true) x.count=0; m1.add((p-2048)>>3); m1.add(p=stretch(p)); m.add(p>>(1+hmap.siy())); - m.add((p>>2)*(n1-n0)); int p0=4095-p1; m.add((((p1&n0)-(p0&n1))*1)/(4*4)); m1.add((((p1&n1)-(p0&n0))*1)/(4*4)); @@ -10491,6 +10494,15 @@ if (slow==true) x.count=0; Map1[i++].set(hash(hc>> 2, adv_pred1[1]/16,prev_coef/42)); Map1[i++].set(hash(hc>> 2, adv_pred1[0]/16,prev_coef/42)); Map1[i++].set(hash(hc>> 2, adv_pred2[2]/16,prev_coef/42)); + + Map1[i++].set(hash(hc>> 2, adv_pred1[2]/16,prev_coef/42)); + Map1[i++].set(hash(hc>> 2, adv_pred2[2]/16,prev_coef/42)); + Map1[i++].set(hash(hc>> 2, adv_pred2[1]/16,prev_coef/42)); + Map1[i++].set(hash(hc>> 2, adv_pred2[0]/16,prev_coef/42)); + Map1[i++].set(hash(hc>> 2, adv_pred2[1]/7,rs1)); + Map1[i++].set(hash(hc>> 2, adv_pred3[2]/16,prev_coef/42)); + Map1[i++].set(hash(hc>> 2, adv_pred3[1]/16,prev_coef/42)); + Map1[i++].set(hash(hc>> 2, adv_pred3[0]/16,prev_coef/42)); // etc MJPEGMap.set(hash(mcupos, column, row, hc >> 2)); @@ -10508,9 +10520,9 @@ if (slow==true) x.count=0; m1.add(sd=stretch(sd)); m.add(sd); - m1.set(firstcol, 2); - m1.set( coef+256*min(3,huffbits), 1024,13 ); - m1.set( (hc&0x3FE)*2+min(3,ilog2(zu+zv)), 2048,13 ); + m1.set(firstcol, 2,14,250); + m1.set( coef+256*min(3,huffbits), 1024,13,250*2 ); + m1.set( (hc&0x3FE)*2+min(3,ilog2(zu+zv)), 2048,13,250*2 ); m1.set(mcupos&63, 64 ); int colCtx=(width>1024)?(min(1023, column/max(1, width/1024))):column; m1.set(colCtx, 1024); @@ -10553,8 +10565,8 @@ if (slow==true) x.count=0; m.add(stretch(pr)>>1); m.add((pr-2048)>>3); - m.set( 1 + (zu+zv<5)+(huffbits>8)*2+firstcol*4, 9 ,15); - m.set( 1 + (hc&0xFF) + 256*min(3,(zu+zv)/3), 1025,9 ); + m.set( 1 + (zu+zv<5)+(huffbits>8)*2+firstcol*4, 9 ,15,250); + m.set( 1 + (hc&0xFF) + 256*min(3,(zu+zv)/3), 1025,9 ,250); m.set( coef+256*min(3,huffbits/2), 1024 ,13); m.set((hc)&511, 512);