From 3b7842bd845a74f039aa3076bf1ee86db4d56cbf Mon Sep 17 00:00:00 2001
From: woodjohndavid <57116722+woodjohndavid@users.noreply.github.com>
Date: Sat, 26 Jun 2021 14:01:11 -0700
Subject: [PATCH 1/8] recodebeam changes

---
 src/lstm/jdwcrap.cpp    | 1469 +++++++++++++++++++++++++++++++++++++++
 src/lstm/recodebeam.cpp |    3 +
 src/lstm/recodebeam.h   |    8 +
 3 files changed, 1480 insertions(+)
 create mode 100644 src/lstm/jdwcrap.cpp
diff --git a/src/lstm/jdwcrap.cpp b/src/lstm/jdwcrap.cpp
new file mode 100644
index 0000000000..d3df8e7f0d
--- /dev/null
+++ b/src/lstm/jdwcrap.cpp
@@ -0,0 +1,1469 @@
+///////////////////////////////////////////////////////////////////////
+// File:        recodebeam.cpp
+// Description: Beam search to decode from the re-encoded CJK as a sequence of
+//              smaller numbers in place of a single large code.
+// Author:      Ray Smith
+//
+// (C) Copyright 2015, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "recodebeam.h"
+
+#include "networkio.h"
+#include "pageres.h"
+#include "unicharcompress.h"
+
+#include <algorithm> // for std::reverse
+#include <deque>
+#include <map>
+#include <set>
+#include <tuple>
+#include <unordered_set>
+#include <vector>
+
+namespace tesseract {
+
+// The beam width at each code position.
+const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
+    5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
+};
+
+static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
+
+// Prints debug details of the node.
+void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) const {
+  if (code == null_char) {
+    tprintf("null_char");
+  } else {
+    tprintf("label=%d, uid=%d=%s", code, unichar_id, unicharset.debug_str(unichar_id).c_str());
+  }
+  tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%" PRIx64, score, certainty,
+          start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
+          end_of_word ? " End" : "", permuter, code_hash);
+  if (depth > 0 && prev != nullptr) {
+    tprintf(" prev:");
+    prev->Print(null_char, unicharset, depth - 1);
+  } else {
+    tprintf("\n");
+  }
+}
+
+// Borrows the pointer, which is expected to survive until *this is deleted.
+RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text,
+                                   Dict *dict)
+    : recoder_(recoder)
+    , beam_size_(0)
+    , top_code_(-1)
+    , second_code_(-1)
+    , in_double_whammy_(false)  // JDWTODO
+    , first_whammy_(-1)  // JDWTODO
+    , second_whammy_(-1)  // JDWTODO
+    , dict_(dict)
+    , space_delimited_(true)
+    , is_simple_text_(simple_text)
+    , null_char_(null_char) {
+  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) {
+    space_delimited_ = false;
+  }
+}
+
+RecodeBeamSearch::~RecodeBeamSearch() {
+  for (auto data : beam_) {
+    delete data;
+  }
+  for (auto data : secondary_beam_) {
+    delete data;
+  }
+}
+
+// Decodes the set of network outputs, storing the lattice internally.
+void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double cert_offset,
+                              double worst_dict_cert, const UNICHARSET *charset,
+                              int lstm_choice_mode) {
+  beam_size_ = 0;
+  int width = output.Width();
+  fprintf(stderr, "recodebeam decode #1 outputwidth= %i lstmchoice= %i \n", width, lstm_choice_mode);  // JDWDEBUG
+  if (lstm_choice_mode) {
+    timesteps.clear();
+  }
+  for (int t = 0; t < width; ++t) {
+    fprintf(stderr, "recodebeam decode #1 unicharid,code= timestep# %i \n", t);  // JDWDEBUG
+    ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
+    DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
+    if (lstm_choice_mode) {
+      SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
+    }
+  }
+}
+    
+void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output, double dict_ratio,
+                              double cert_offset, double worst_dict_cert,
+                              const UNICHARSET *charset) {
+  fprintf(stderr, "recodebeam decode #2 \n");  // JDWDEBUG
+  beam_size_ = 0;
+  int width = output.dim1();
+  for (int t = 0; t < width; ++t) {
+    fprintf(stderr, "recodebeam decode #@ unicharid,code= timestep# %i \n", t);  // JDWDEBUG
+    ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
+    DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
+  }
+}
+
+void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio,
+                                            double cert_offset, double worst_dict_cert,
+                                            const UNICHARSET *charset, int lstm_choice_mode) {
+  fprintf(stderr, "recodebeam decode secondary \n");  // JDWDEBUG
+  for (auto data : secondary_beam_) {
+    delete data;
+  }
+  secondary_beam_.clear();
+  if (character_boundaries_.size() < 2) {
+    return;
+  }
+  int width = output.Width();
+  int bucketNumber = 0;
+  for (int t = 0; t < width; ++t) {
+    while ((bucketNumber + 1) < character_boundaries_.size() &&
+           t >= character_boundaries_[bucketNumber + 1]) {
+      ++bucketNumber;
+    }
+    ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t), output.NumFeatures(),
+                   kBeamWidths[0]);
+    DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
+  }
+}
+
+void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outputs,
+                                              const UNICHARSET *charset, int xCoord) {
+  fprintf(stderr, "recodebeam savemostcertainchoices \n");  // JDWDEBUG
+  std::vector<std::pair<const char *, float>> choices;
+  for (int i = 0; i < num_outputs; ++i) {
+    if (outputs[i] >= 0.01f) {
+      const char *character;
+      if (i + 2 >= num_outputs) {
+        character = "";
+      } else if (i > 0) {
+        character = charset->id_to_unichar_ext(i + 2);
+      } else {
+        character = charset->id_to_unichar_ext(i);
+      }
+      size_t pos = 0;
+      // order the possible choices within one timestep
+      // beginning with the most likely
+      while (choices.size() > pos && choices[pos].second > outputs[i]) {
+        pos++;
+      }
+      choices.insert(choices.begin() + pos, std::pair<const char *, float>(character, outputs[i]));
+    }
+  }
+  timesteps.push_back(choices);
+}
+
+void RecodeBeamSearch::segmentTimestepsByCharacters() {
+  for (int i = 1; i < character_boundaries_.size(); ++i) {
+    std::vector<std::vector<std::pair<const char *, float>>> segment;
+    for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i]; ++j) {
+      segment.push_back(timesteps[j]);
+    }
+    segmentedTimesteps.push_back(segment);
+  }
+}
+std::vector<std::vector<std::pair<const char *, float>>>
+RecodeBeamSearch::combineSegmentedTimesteps(
+    std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *segmentedTimesteps) {
+  std::vector<std::vector<std::pair<const char *, float>>> combined_timesteps;
+  for (auto &segmentedTimestep : *segmentedTimesteps) {
+    for (auto &j : segmentedTimestep) {
+      combined_timesteps.push_back(j);
+    }
+  }
+  return combined_timesteps;
+}
+
+void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
+                                               std::vector<int> *char_bounds_, int maxWidth) {
+  fprintf(stderr, "recodebeam calculatecharboundaries maxwidth= %i \n", maxWidth);  // JDWDEBUG
+  // char_bounds_->push_back(0);   // JDWTODO
+  char_bounds_->push_back((*starts)[0]);   // JDWTODO
+  for (int i = 0; i < ends->size(); ++i) {
+    int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
+    fprintf(stderr, "%s %i %i %i \n", "calculatecharboundaries start&end&middle=", (*starts)[i + 1], (*ends)[i], middle);
+    char_bounds_->push_back((*ends)[i] + middle);
+  }
+  char_bounds_->pop_back();
+  char_bounds_->push_back(maxWidth);
+}
+
+// Returns the best path as labels/scores/xcoords similar to simple CTC.
+void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector<int> *labels,
+                                               std::vector<int> *xcoords) const {
+  fprintf(stderr, "recodebeam extractbestpathaslabels \n");  // JDWDEBUG
+  labels->clear();
+  xcoords->clear();
+  std::vector<const RecodeNode *> best_nodes;
+  ExtractBestPaths(&best_nodes, nullptr);
+  // Now just run CTC on the best nodes.
+  int t = 0;
+  int width = best_nodes.size();
+  while (t < width) {
+    int label = best_nodes[t]->code;
+    if (label != null_char_) {
+      labels->push_back(label);
+      xcoords->push_back(t);
+    }
+    while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
+    }
+  }
+  xcoords->push_back(width);
+}
+
+// Returns the best path as unichar-ids/certs/ratings/xcoords skipping
+// duplicates, nulls and intermediate parts.
+void RecodeBeamSearch::ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset,
+                                                   std::vector<int> *unichar_ids,
+                                                   std::vector<float> *certs,
+                                                   std::vector<float> *ratings,
+                                                   std::vector<int> *xcoords) const {
+  fprintf(stderr, "recodebeam extractbestpathasunicharids \n");  // JDWDEBUG
+  std::vector<const RecodeNode *> best_nodes;
+  ExtractBestPaths(&best_nodes, nullptr);
+  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
+  if (debug) {
+    DebugPath(unicharset, best_nodes);
+    DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings, *xcoords);
+  }
+}
+
+// Returns the best path as a set of WERD_RES.
+void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug,
+                                              const UNICHARSET *unicharset,
+                                              PointerVector<WERD_RES> *words,
+                                              int lstm_choice_mode) {
+fprintf(stderr, "recodebeam extractbestpathaswords \n");  // JDWDEBUG
+words->truncate(0);
+  std::vector<int> unichar_ids;
+  std::vector<float> certs;
+  std::vector<float> ratings;
+  std::vector<int> xcoords;
+  std::vector<const RecodeNode *> best_nodes;
+  std::vector<const RecodeNode *> second_nodes;
+  character_boundaries_.clear();
+  ExtractBestPaths(&best_nodes, &second_nodes);
+  if (debug) {
+    DebugPath(unicharset, best_nodes);
+    ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings, &xcoords);
+    tprintf("\nSecond choice path:\n");
+    DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings, xcoords);
+  }
+  // If lstm choice mode is required in granularity level 2, it stores the x
+  // Coordinates of every chosen character, to match the alternative choices to
+  // it.
+  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords,
+                          &character_boundaries_);
+  int num_ids = unichar_ids.size();
+  // JDWDEBUG START
+  for (int i = 0; i < num_ids; i++){
+    const char *c = unicharset->id_to_unichar_ext(unichar_ids[i]);
+    fprintf(stderr, "recodebeam extractbestpathaswords unichar,unicharid= %s %i \n", c, unichar_ids[i]);
+  }
+  // JDWDEBUG END
+  if (debug) {
+    DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings, xcoords);
+  }
+  // Convert labels to unichar-ids.
+  int word_end = 0;
+  float prev_space_cert = 0.0f;
+  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
+    for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
+      // A word is terminated when a space character or start_of_word flag is
+      // hit. We also want to force a separate word for every non
+      // space-delimited character when not in a dictionary context.
+      if (unichar_ids[word_end] == UNICHAR_SPACE) {
+        break;
+      }
+      int index = xcoords[word_end];
+      if (best_nodes[index]->start_of_word) {
+        break;
+      }
+      if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
+          (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
+           !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1]))) {
+        break;
+      }
+    }
+    float space_cert = 0.0f;
+    if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
+      space_cert = certs[word_end];
+    }
+    bool leading_space = word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
+    // Create a WERD_RES for the output word.
+    WERD_RES *word_res =
+        InitializeWord(leading_space, line_box, word_start, word_end,
+                       std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
+    for (int i = word_start; i < word_end; ++i) {
+      auto *choices = new BLOB_CHOICE_LIST;
+      BLOB_CHOICE_IT bc_it(choices);
+      auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
+                                     static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
+      int col = i - word_start;
+      choice->set_matrix_cell(col, col);
+      bc_it.add_after_then_move(choice);
+      word_res->ratings->put(col, col, choices);
+    }
+    int index = xcoords[word_end - 1];
+    word_res->FakeWordFromRatings(best_nodes[index]->permuter);
+    words->push_back(word_res);
+    prev_space_cert = space_cert;
+    if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
+      ++word_end;
+    }
+  }
+}
+
+struct greater_than {
+  inline bool operator()(const RecodeNode *&node1, const RecodeNode *&node2) {
+    return (node1->score > node2->score);
+  }
+};
+
+void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *charset,
+                                  bool secondary) const {
+  std::vector<std::vector<const RecodeNode *>> topology;
+  std::unordered_set<const RecodeNode *> visited;
+  const std::vector<RecodeBeam *> &beam = !secondary ? beam_ : secondary_beam_;
+  // create the topology
+  for (int step = beam.size() - 1; step >= 0; --step) {
+    std::vector<const RecodeNode *> layer;
+    topology.push_back(layer);
+  }
+  // fill the topology with depths first
+  for (int step = beam.size() - 1; step >= 0; --step) {
+    std::vector<tesseract::RecodePair> &heaps = beam.at(step)->beams_->heap();
+    for (auto node : heaps) {
+      int backtracker = 0;
+      const RecodeNode *curr = &node.data();
+      while (curr != nullptr && !visited.count(curr)) {
+        visited.insert(curr);
+        topology[step - backtracker].push_back(curr);
+        curr = curr->prev;
+        ++backtracker;
+      }
+    }
+  }
+  int ct = 0;
+  int cb = 1;
+  for (std::vector<const RecodeNode *> layer : topology) {
+    if (cb >= character_boundaries_.size()) {
+      break;
+    }
+    if (ct == character_boundaries_[cb]) {
+      tprintf("***\n");
+      ++cb;
+    }
+    for (const RecodeNode *node : layer) {
+      const char *code;
+      int intCode;
+      if (node->unichar_id != INVALID_UNICHAR_ID) {
+        code = charset->id_to_unichar(node->unichar_id);
+        intCode = node->unichar_id;
+      } else if (node->code == null_char_) {
+        intCode = 0;
+        code = " ";
+      } else {
+        intCode = 666;
+        code = "*";
+      }
+      int intPrevCode = 0;
+      const char *prevCode;
+      float prevScore = 0;
+      if (node->prev != nullptr) {
+        prevScore = node->prev->score;
+        if (node->prev->unichar_id != INVALID_UNICHAR_ID) {
+          prevCode = charset->id_to_unichar(node->prev->unichar_id);
+          intPrevCode = node->prev->unichar_id;
+        } else if (node->code == null_char_) {
+          intPrevCode = 0;
+          prevCode = " ";
+        } else {
+          prevCode = "*";
+          intPrevCode = 666;
+        }
+      } else {
+        prevCode = " ";
+      }
+      if (uids) {
+        tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode, node->score);
+      } else {
+        tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score);
+      }
+    }
+    tprintf("-\n");
+    ++ct;
+  }
+  tprintf("***\n");
+}
+
+void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
+  if (character_boundaries_.size() < 2) {
+    return;
+  }
+  fprintf(stderr, "recodebeam extractsymbolchoices \n");  // JDWDEBUG
+  // For the first iteration the original beam is analyzed. After that a
+  // new beam is calculated based on the results from the original beam.
+  std::vector<RecodeBeam *> &currentBeam = secondary_beam_.empty() ? beam_ : secondary_beam_;
+  character_boundaries_[0] = 0;
+  for (int j = 1; j < character_boundaries_.size(); ++j) {
+    std::vector<int> unichar_ids;
+    std::vector<float> certs;
+    std::vector<float> ratings;
+    std::vector<int> xcoords;
+    int backpath = character_boundaries_[j] - character_boundaries_[j - 1];
+    std::vector<tesseract::RecodePair> &heaps =
+      currentBeam.at(character_boundaries_[j] - 1)->beams_->heap();
+    std::vector<const RecodeNode *> best_nodes;
+    std::vector<const RecodeNode *> best;
+    // Scan the segmented node chain for valid unichar ids.
+    for (auto entry : heaps) {
+      bool validChar = false;
+      int backcounter = 0;
+      const RecodeNode *node = &entry.data();
+      while (node != nullptr && backcounter < backpath) {
+        if (node->code != null_char_ && node->unichar_id != INVALID_UNICHAR_ID) {
+          validChar = true;
+          break;
+        }
+        node = node->prev;
+        ++backcounter;
+      }
+      if (validChar) {
+        best.push_back(&entry.data());
+      }
+    }
+    // find the best rated segmented node chain and extract the unichar id.
+    if (!best.empty()) {
+      std::sort(best.begin(), best.end(), greater_than());
+      ExtractPath(best[0], &best_nodes, backpath);
+      ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords);
+    }
+    if (!unichar_ids.empty()) {
+      int bestPos = 0;
+      for (int i = 1; i < unichar_ids.size(); ++i) {
+        if (ratings[i] < ratings[bestPos]) {
+          bestPos = i;
+        }
+      }
+      // TODO: bestCode is currently unused (see commit 2dd5d0d60).
+      int bestCode = -10;
+      for (auto &node : best_nodes) {
+        if (node->unichar_id == unichar_ids[bestPos]) {
+          bestCode = node->code;
+        }
+      }
+      // Exclude the best choice for the followup decoding.
+      std::unordered_set<int> excludeCodeList;
+      for (auto &best_node : best_nodes) {
+        if (best_node->code != null_char_) {
+          excludeCodeList.insert(best_node->code);
+        }
+      }
+      if (j - 1 < excludedUnichars.size()) {
+        for (auto elem : excludeCodeList) {
+          excludedUnichars[j - 1].insert(elem);
+        }
+      } else {
+        excludedUnichars.push_back(excludeCodeList);
+      }
+      // Save the best choice for the choice iterator.
+      if (j - 1 < ctc_choices.size()) {
+        int id = unichar_ids[bestPos];
+        const char *result = unicharset->id_to_unichar_ext(id);
+        float rating = ratings[bestPos];
+        ctc_choices[j - 1].push_back(std::pair<const char *, float>(result, rating));
+      } else {
+        std::vector<std::pair<const char *, float>> choice;
+        int id = unichar_ids[bestPos];
+        const char *result = unicharset->id_to_unichar_ext(id);
+        float rating = ratings[bestPos];
+        choice.emplace_back(result, rating);
+        ctc_choices.push_back(choice);
+      }
+      // fill the blank spot with an empty array
+    } else {
+      if (j - 1 >= excludedUnichars.size()) {
+        std::unordered_set<int> excludeCodeList;
+        excludedUnichars.push_back(excludeCodeList);
+      }
+      if (j - 1 >= ctc_choices.size()) {
+        std::vector<std::pair<const char *, float>> choice;
+        ctc_choices.push_back(choice);
+      }
+    }
+  }
+  for (auto data : secondary_beam_) {
+    delete data;
+  }
+  secondary_beam_.clear();
+}
+
+// Generates debug output of the content of the beams after a Decode.
+void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const {
+  fprintf(stderr, "recodebeam debugbeams \n");  // JDWDEBUG
+  for (int p = 0; p < beam_size_; ++p) {
+    for (int d = 0; d < 2; ++d) {
+      for (int c = 0; c < NC_COUNT; ++c) {
+        auto cont = static_cast<NodeContinuation>(c);
+        int index = BeamIndex(d, cont, 0);
+        if (beam_[p]->beams_[index].empty()) {
+          continue;
+        }
+        // Print all the best scoring nodes for each unichar found.
+        tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict", kNodeContNames[c]);
+        DebugBeamPos(unicharset, beam_[p]->beams_[index]);
+      }
+    }
+  }
+}
+
+// Generates debug output of the content of a single beam position.
+void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHeap &heap) const {
+  std::vector<const RecodeNode *> unichar_bests(unicharset.size());
+  const RecodeNode *null_best = nullptr;
+  int heap_size = heap.size();
+  for (int i = 0; i < heap_size; ++i) {
+    const RecodeNode *node = &heap.get(i).data();
+    if (node->unichar_id == INVALID_UNICHAR_ID) {
+      if (null_best == nullptr || null_best->score < node->score) {
+        null_best = node;
+      }
+    } else {
+      if (unichar_bests[node->unichar_id] == nullptr ||
+          unichar_bests[node->unichar_id]->score < node->score) {
+        unichar_bests[node->unichar_id] = node;
+      }
+    }
+  }
+  for (auto &unichar_best : unichar_bests) {
+    if (unichar_best != nullptr) {
+      const RecodeNode &node = *unichar_best;
+      node.Print(null_char_, unicharset, 1);
+    }
+  }
+  if (null_best != nullptr) {
+    null_best->Print(null_char_, unicharset, 1);
+  }
+}
+
+// Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
+// duplicates, nulls and intermediate parts.
+/* static */
+void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNode *> &best_nodes,
+                                               std::vector<int> *unichar_ids,
+                                               std::vector<float> *certs,
+                                               std::vector<float> *ratings,
+                                               std::vector<int> *xcoords,
+                                               std::vector<int> *character_boundaries) {
+  unichar_ids->clear();
+  certs->clear();
+  ratings->clear();
+  xcoords->clear();
+  std::vector<int> starts;
+  std::vector<int> ends;
+  // Backtrack extracting only valid, non-duplicate unichar-ids.
+  fprintf(stderr, "recodebeam extractpathasunicharids \n");  // JDWDEBUG
+  int t = 0;
+  int width = best_nodes.size();
+  fprintf(stderr, "%s %i \n", "extractpathasunicharids width=", width);
+  while (t < width) {
+    double certainty = 0.0;
+    double rating = 0.0;
+    bool foundInvalid = false;    // JDWDEBUG
+    while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
+      fprintf(stderr, "%s %i \n", "extractpathasunicharids bypass invalid unicharid code=", best_nodes[t]->code);   // JDWDEBUG
+      foundInvalid = true;    // JDWDEBUG
+      double cert = best_nodes[t++]->certainty;
+      if (cert < certainty) {
+        certainty = cert;
+      }
+      rating -= cert;
+    }
+    // starts.push_back(t);    // JDWTODO
+    if (t < width) {
+      starts.push_back(t);    // JDWTODO
+      fprintf(stderr, "%s %i %i %f \n", "extractpathasunicharids valid unicharid,code,score=", best_nodes[t]->unichar_id, best_nodes[t]->code, best_nodes[t]->score);   // JDWDEBUG
+      // JDWDEBUG START
+      if (!foundInvalid)
+        fprintf(stderr, "%s \n", "extractpathasunicharids foundvalid with no invalid");
+      // JDWDEBUG END
+      int unichar_id = best_nodes[t]->unichar_id;
+      if (unichar_id == UNICHAR_SPACE && !certs->empty() && best_nodes[t]->permuter != NO_PERM) {
+        // All the rating and certainty go on the previous character except
+        // for the space itself.
+        fprintf(stderr, "%s %i \n", "extractpathasunicharids unicharid space", best_nodes[t]->code);  // JDWDEBUG
+        if (certainty < certs->back()) {
+          certs->back() = certainty;
+        }
+        ratings->back() += rating;
+        certainty = 0.0;
+        rating = 0.0;
+      }
+      xcoords->push_back(t);
+      unichar_ids->push_back(unichar_id);
+      t++;  // JDWTODO
+      // do {   // JDWTODO
+      while (t < width && best_nodes[t]->duplicate) {   // JDWTODO
+        // double cert = best_nodes[t++]->certainty;    // JDWTODO
+        double cert = best_nodes[t]->certainty;    // JDWTODO
+        // Special-case NO-PERM space to forget the certainty of the previous
+        // nulls. See long comment in ContinueContext.
+        if (cert < certainty ||
+            (unichar_id == UNICHAR_SPACE && best_nodes[t - 1]->permuter == NO_PERM)) {
+          certainty = cert;
+        }
+        rating -= cert;
+        // JDWDEBUG START
+        if (t < width && best_nodes[t]->duplicate)
+          fprintf(stderr, "%s %i %i \n", "extractpathasunicharids duplicate removed unicharid,code=", best_nodes[t]->unichar_id, best_nodes[t]->code);  // JDWDEBUG
+        // JDWDEBUG END
+        t++;    // JDWTODO
+      }   // JDWTODO
+      // } while (t < width && best_nodes[t]->duplicate);   // JDWTODO
+      ends.push_back(t);
+      certs->push_back(certainty);
+      ratings->push_back(rating);
+    } else if (!certs->empty()) {
+      if (certainty < certs->back()) {
+        certs->back() = certainty;
+      }
+      ratings->back() += rating;
+    }
+  }
+  starts.push_back(width);
+  if (character_boundaries != nullptr) {
+    calculateCharBoundaries(&starts, &ends, character_boundaries, width);
+  }
+  xcoords->push_back(width);
+}
+
+// Sets up a word with the ratings matrix and fake blobs with boxes in the
+// right places.
+WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_box, int word_start,
+                                           int word_end, float space_certainty,
+                                           const UNICHARSET *unicharset,
+                                           const std::vector<int> &xcoords, float scale_factor) {
+  // Make a fake blob for each non-zero label.
+  fprintf(stderr, "recodebeam initializeword scalefactor= %f \n", scale_factor);  // JDWDEBUG
+  fprintf(stderr, "recodebeam initializeword start,end= %i %i \n", word_start, word_end);  // JDWDEBUG
+  C_BLOB_LIST blobs;
+  C_BLOB_IT b_it(&blobs);
+  for (int i = word_start; i < word_end; ++i) {
+    if (character_boundaries_.size() > (i + 1)) {
+      TBOX box(static_cast<int16_t>(std::floor(character_boundaries_[i] * scale_factor)) +
+                   line_box.left(),
+               line_box.bottom(),
+               static_cast<int16_t>(std::ceil(character_boundaries_[i + 1] * scale_factor)) +
+                   line_box.left(),
+               line_box.top());
+      // JDWDEBUG START
+      std::string debug_str;
+      debug_str = "fake boxblob for werd being built in recodebeam ";
+      box.print_to_str(debug_str);
+      fprintf(stderr, "%s %i %i %i \n", debug_str.c_str(), i, character_boundaries_[i], character_boundaries_[i + 1]);
+      // JDWDEBUG END
+      b_it.add_after_then_move(C_BLOB::FakeBlob(box));
+    }
+  }
+  // Make a fake word from the blobs.
+  WERD *word = new WERD(&blobs, leading_space, nullptr);
+  // Make a WERD_RES from the word.
+  auto *word_res = new WERD_RES(word);
+  word_res->end = word_end - word_start + leading_space;
+  word_res->uch_set = unicharset;
+  word_res->combination = true; // Give it ownership of the word.
+  word_res->space_certainty = space_certainty;
+  word_res->ratings = new MATRIX(word_end - word_start, 1);
+  return word_res;
+}
+
+// Fills top_n_flags_ with bools that are true iff the corresponding output
+// is one of the top_n.
+void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int top_n) {
+  fprintf(stderr, "recodebeam computetopn \n");  // JDWDEBUG
+  top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
+  top_code_ = -1;
+  second_code_ = -1;
+  top_heap_.clear();
+  for (int i = 0; i < num_outputs; ++i) {
+    if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) {
+      TopPair entry(outputs[i], i);
+      top_heap_.Push(&entry);
+      if (top_heap_.size() > top_n) {
+        top_heap_.Pop(&entry);
+      }
+    }
+  }
+
+  float top_key = 0.0F;   // JDWTODO
+  float second_key = 0.0F;   // JDWTODO
+  bool found_first_whammy = false;    // JDWTODO
+  bool found_second_whammy = false;    // JDWTODO
+  while (!top_heap_.empty()) {
+    TopPair entry;
+    top_heap_.Pop(&entry);
+    if (in_double_whammy_ && entry.data() == first_whammy_)    // JDWTODO
+      found_first_whammy = true;    // JDWTODO
+    if (in_double_whammy_ && entry.data() == second_whammy_)    // JDWTODO
+      found_second_whammy = true;    // JDWTODO
+    if (top_heap_.size() > 1) {
+      top_n_flags_[entry.data()] = TN_TOPN;
+      fprintf(stderr, "recodebeam computetopn topn code,key= %i %f \n", entry.data(), entry.key());  // JDWDEBUG
+    } else {
+      top_n_flags_[entry.data()] = TN_TOP2;
+      fprintf(stderr, "recodebeam computetopn top2 code,key= %i %f \n", entry.data(), entry.key());  // JDWDEBUG
+      if (top_heap_.empty()) {
+        top_code_ = entry.data();
+        top_key = entry.key();   // JDWTODO
+      } else {
+        second_code_ = entry.data();
+        second_key = entry.key();   // JDWTODO
+      }
+    }
+  }
+
+  // JDWTODO START
+  if (in_double_whammy_) {
+    if (!found_first_whammy && !found_second_whammy){
+      in_double_whammy_ = false;
+      first_whammy_ = -1;
+      second_whammy_ = -1;
+      fprintf(stderr, "recodebeam computetopn double whammy cleared unicharid,code= \n");
+    }
+  }
+  // JDWTODO END
+
+  // JDWTODO START
+  if (!in_double_whammy_) {
+    if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > 0.25F && second_key > 0.25F){
+      in_double_whammy_ = true;
+      first_whammy_ = top_code_;
+      second_whammy_ = second_code_;
+      fprintf(stderr, "recodebeam computetopn double whammy found unicharid,code= %f %f \n", top_key, second_key);
+    }
+  }
+  // JDWTODO END
+
+  fprintf(stderr, "recodebeam computetopn unicharid,code= top_code,second_code= %i %i \n", top_code_, second_code_);  // JDWDEBUG
+  top_n_flags_[null_char_] = TN_TOP2;
+}
+
+void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const float *outputs,
+                                      int num_outputs, int top_n) {
+  fprintf(stderr, "recodebeam computesectopn \n");  // JDWDEBUG
+  top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
+  top_code_ = -1;
+  second_code_ = -1;
+  top_heap_.clear();
+  for (int i = 0; i < num_outputs; ++i) {
+    if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) && !exList->count(i)) {
+      TopPair entry(outputs[i], i);
+      top_heap_.Push(&entry);
+      if (top_heap_.size() > top_n) {
+        top_heap_.Pop(&entry);
+      }
+    }
+  }
+  while (!top_heap_.empty()) {
+    TopPair entry;
+    top_heap_.Pop(&entry);
+    if (top_heap_.size() > 1) {
+      top_n_flags_[entry.data()] = TN_TOPN;
+    } else {
+      top_n_flags_[entry.data()] = TN_TOP2;
+      if (top_heap_.empty()) {
+        top_code_ = entry.data();
+      } else {
+        second_code_ = entry.data();
+      }
+    }
+  }
+  top_n_flags_[null_char_] = TN_TOP2;
+}
+
+// Adds the computation for the current time-step to the beam. Call at each
+// time-step in sequence from left to right. outputs is the activation vector
+// for the current timestep.
+void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio,
+                                  double cert_offset, double worst_dict_cert,
+                                  const UNICHARSET *charset, bool debug) {
+  fprintf(stderr, "recodebeam decodestep timestep= %i \n", t);  // JDWDEBUG
+  if (t == beam_.size()) {
+    beam_.push_back(new RecodeBeam);
+  }
+  RecodeBeam *step = beam_[t];
+  beam_size_ = t + 1;
+  step->Clear();
+  if (t == 0) {
+    // The first step can only use singles and initials.
+    ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
+                    dict_ratio, cert_offset, worst_dict_cert, step);
+    if (dict_ != nullptr) {
+      ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
+                      dict_ratio, cert_offset, worst_dict_cert, step);
+    }
+  } else {
+    RecodeBeam *prev = beam_[t - 1];
+    if (debug) {
+      int beam_index = BeamIndex(true, NC_ANYTHING, 0);
+      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
+        std::vector<const RecodeNode *> path;
+        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
+        tprintf("Step %d: Dawg beam %d:\n", t, i);
+        DebugPath(charset, path);
+      }
+      beam_index = BeamIndex(false, NC_ANYTHING, 0);
+      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
+        std::vector<const RecodeNode *> path;
+        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
+        tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
+        DebugPath(charset, path);
+      }
+    }
+    int total_beam = 0;
+    // Work through the scores by group (top-2, top-n, the rest) while the beam
+    // is empty. This enables extending the context using only the top-n results
+    // first, which may have an empty intersection with the valid codes, so we
+    // fall back to the rest if the beam is empty.
+    for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
+      auto top_n = static_cast<TopNState>(tn);
+      for (int index = 0; index < kNumBeams; ++index) {
+        // Working backwards through the heaps doesn't guarantee that we see the
+        // best first, but it comes before a lot of the worst, so it is slightly
+        // more efficient than going forwards.
+        for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
+          ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
+                          dict_ratio, cert_offset, worst_dict_cert, step);
+        }
+      }
+      for (int index = 0; index < kNumBeams; ++index) {
+        if (ContinuationFromBeamsIndex(index) == NC_ANYTHING) {
+          total_beam += step->beams_[index].size();
+        }
+      }
+    }
+    // Special case for the best initial dawg. Push it on the heap if good
+    // enough, but there is only one, so it doesn't blow up the beam.
+    for (int c = 0; c < NC_COUNT; ++c) {
+      if (step->best_initial_dawgs_[c].code >= 0) {
+        fprintf(stderr, "recodebeam decodestep special case initial dawg %i \n", t);  // JDWDEBUG
+        int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
+        RecodeHeap *dawg_heap = &step->beams_[index];
+        PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
+      }
+    }
+  }
+}
+
+void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double dict_ratio,
+                                           double cert_offset, double worst_dict_cert,
+                                           const UNICHARSET *charset, bool debug) {
+  fprintf(stderr, "recodebeam decodesecondarystep \n");  // JDWDEBUG
+  if (t == secondary_beam_.size()) {
+    secondary_beam_.push_back(new RecodeBeam);
+  }
+  RecodeBeam *step = secondary_beam_[t];
+  step->Clear();
+  if (t == 0) {
+    // The first step can only use singles and initials.
+    ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
+                    dict_ratio, cert_offset, worst_dict_cert, step);
+    if (dict_ != nullptr) {
+      ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
+                      dict_ratio, cert_offset, worst_dict_cert, step);
+    }
+  } else {
+    RecodeBeam *prev = secondary_beam_[t - 1];
+    if (debug) {
+      int beam_index = BeamIndex(true, NC_ANYTHING, 0);
+      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
+        std::vector<const RecodeNode *> path;
+        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
+        tprintf("Step %d: Dawg beam %d:\n", t, i);
+        DebugPath(charset, path);
+      }
+      beam_index = BeamIndex(false, NC_ANYTHING, 0);
+      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
+        std::vector<const RecodeNode *> path;
+        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
+        tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
+        DebugPath(charset, path);
+      }
+    }
+    int total_beam = 0;
+    // Work through the scores by group (top-2, top-n, the rest) while the beam
+    // is empty. This enables extending the context using only the top-n results
+    // first, which may have an empty intersection with the valid codes, so we
+    // fall back to the rest if the beam is empty.
+    for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
+      auto top_n = static_cast<TopNState>(tn);
+      for (int index = 0; index < kNumBeams; ++index) {
+        // Working backwards through the heaps doesn't guarantee that we see the
+        // best first, but it comes before a lot of the worst, so it is slightly
+        // more efficient than going forwards.
+        for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
+          ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
+                          dict_ratio, cert_offset, worst_dict_cert, step);
+        }
+      }
+      for (int index = 0; index < kNumBeams; ++index) {
+        if (ContinuationFromBeamsIndex(index) == NC_ANYTHING) {
+          total_beam += step->beams_[index].size();
+        }
+      }
+    }
+    // Special case for the best initial dawg. Push it on the heap if good
+    // enough, but there is only one, so it doesn't blow up the beam.
+    for (int c = 0; c < NC_COUNT; ++c) {
+      if (step->best_initial_dawgs_[c].code >= 0) {
+        int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
+        RecodeHeap *dawg_heap = &step->beams_[index];
+        PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
+      }
+    }
+  }
+}
+
+// Adds to the appropriate beams the legal (according to recoder)
+// continuations of context prev, which is of the given length, using the
+// given network outputs to provide scores to the choices. Uses only those
+// choices for which top_n_flags[index] == top_n_flag.
+void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const float *outputs,
+                                       TopNState top_n_flag, const UNICHARSET *charset,
+                                       double dict_ratio, double cert_offset,
+                                       double worst_dict_cert, RecodeBeam *step) {
+  // JDWDEBUG START
+  if (prev != nullptr) {
+    const char *ucc = charset->id_to_unichar_ext(prev->unichar_id);
+    fprintf(stderr, "recodebeam continuecontext unicharid,code,unichar,index,topn = %i %i %s %i %i \n", prev->unichar_id, prev->code, ucc, index, top_n_flag);
+  }
+  else {
+    fprintf(stderr, "recodebeam continuecontext top prev null index,topn = %i %i \n", index, top_n_flag);
+  }
+  // JDWDEBUG END
+  RecodedCharID prefix;
+  RecodedCharID full_code;
+  const RecodeNode *previous = prev;
+  int length = LengthFromBeamsIndex(index);
+  bool use_dawgs = IsDawgFromBeamsIndex(index);
+  NodeContinuation prev_cont = ContinuationFromBeamsIndex(index);
+  for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
+    while (previous != nullptr && (previous->duplicate || previous->code == null_char_)) {
+      fprintf(stderr, "recodebeam continuecontext stepping back code= %i \n", previous->code);
+      previous = previous->prev;
+    }
+    if (previous != nullptr) {
+      prefix.Set(p, previous->code);
+      full_code.Set(p, previous->code);
+    }
+  }
+  if (prev != nullptr && !is_simple_text_) {
+    if (top_n_flags_[prev->code] == top_n_flag) {
+      if (prev_cont != NC_NO_DUP) {
+        float cert = NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
+        fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", prev->unichar_id, 1);
+        PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
+                                dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
+      }
+      if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 && prev->code != null_char_) {
+        float cert =
+            NetworkIO::ProbToCertainty(outputs[prev->code] + outputs[null_char_]) + cert_offset;
+        fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", prev->unichar_id, 1);
+        PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
+                                dict_ratio, use_dawgs, NC_NO_DUP, prev, step);
+      }
+    }
+    if (prev_cont == NC_ONLY_DUP) {
+      return;
+    }
+    if (prev->code != null_char_ && length > 0 && top_n_flags_[null_char_] == top_n_flag) {
+      // Allow nulls within multi code sequences, as the nulls within are not
+      // explicitly included in the code sequence.
+      float cert = NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
+      fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", INVALID_UNICHAR_ID, 0);
+      PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID, cert, worst_dict_cert,
+                              dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
+    }
+  }
+  const std::vector<int> *final_codes = recoder_.GetFinalCodes(prefix);
+  if (final_codes != nullptr) {
+    for (int code : *final_codes) {
+      if (top_n_flags_[code] != top_n_flag) {
+        continue;
+      }
+      if (prev != nullptr && prev->code == code && !is_simple_text_) {
+        continue;
+      }
+      float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
+      if (cert < kMinCertainty && code != null_char_) {
+        continue;
+      }
+      full_code.Set(length, code);
+      int unichar_id = recoder_.DecodeUnichar(full_code);
+      // Map the null char to INVALID.
+      if (length == 0 && code == null_char_) {
+        unichar_id = INVALID_UNICHAR_ID;
+      }
+      if (unichar_id != INVALID_UNICHAR_ID && charset != nullptr &&
+          !charset->get_enabled(unichar_id)) {
+        continue; // disabled by whitelist/blacklist
+      }
+      ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ANYTHING,
+                      prev, step);
+      if (top_n_flag == TN_TOP2 && code != null_char_) {
+        float prob = outputs[code] + outputs[null_char_];
+        if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
+            ((prev->code == top_code_ && code == second_code_) ||
+             (code == top_code_ && prev->code == second_code_))) {
+          prob += outputs[prev->code];
+        }
+        float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
+        ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ONLY_DUP,
+                        prev, step);
+      }
+    }
+  }
+  const std::vector<int> *next_codes = recoder_.GetNextCodes(prefix);
+  if (next_codes != nullptr) {
+    for (int code : *next_codes) {
+      if (top_n_flags_[code] != top_n_flag) {
+        continue;
+      }
+      if (prev != nullptr && prev->code == code && !is_simple_text_) {
+        continue;
+      }
+      float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
+      fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", INVALID_UNICHAR_ID, 0);
+      PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
+                              dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
+      if (top_n_flag == TN_TOP2 && code != null_char_) {
+        float prob = outputs[code] + outputs[null_char_];
+        if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
+            ((prev->code == top_code_ && code == second_code_) ||
+             (code == top_code_ && prev->code == second_code_))) {
+          prob += outputs[prev->code];
+        }
+        float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
+        fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", INVALID_UNICHAR_ID, 0);
+        PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
+                                dict_ratio, use_dawgs, NC_ONLY_DUP, prev, step);
+      }
+    }
+  }
+}
+
+// Continues for a new unichar, using dawg or non-dawg as per flag.
+void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, float worst_dict_cert,
+                                       float dict_ratio, bool use_dawgs, NodeContinuation cont,
+                                       const RecodeNode *prev, RecodeBeam *step) {
+  fprintf(stderr, "recodebeam ContinueUnichar unicharid,code,cont= %i %i %i \n", unichar_id, code, cont);  // JDWDEBUG
+  if (use_dawgs) {
+    if (cert > worst_dict_cert) {
+      ContinueDawg(code, unichar_id, cert, cont, prev, step);
+    }
+  } else {
+    RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
+    fprintf(stderr, "recodebeam ContinueUnichar before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
+    PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false, false, false, false,
+                     cert * dict_ratio, prev, nullptr, nodawg_heap);
+    if (dict_ != nullptr && ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
+                             !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
+      // Any top choice position that can start a new word, ie a space or
+      // any non-space-delimited character, should also be considered
+      // by the dawg search, so push initial dawg to the dawg heap.
+      float dawg_cert = cert;
+      PermuterType permuter = TOP_CHOICE_PERM;
+      // Since we use the space either side of a dictionary word in the
+      // certainty of the word, (to properly handle weak spaces) and the
+      // space is coming from a non-dict word, we need special conditions
+      // to avoid degrading the certainty of the dict word that follows.
+      // With a space we don't multiply the certainty by dict_ratio, and we
+      // flag the space with NO_PERM to indicate that we should not use the
+      // predecessor nulls to generate the confidence for the space, as they
+      // have already been multiplied by dict_ratio, and we can't go back to
+      // insert more entries in any previous heaps.
+      if (unichar_id == UNICHAR_SPACE) {
+        permuter = NO_PERM;
+      } else {
+        dawg_cert *= dict_ratio;
+      }
+      fprintf(stderr, "recodebeam ContinueUnichar before pushinitialdawgifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
+      PushInitialDawgIfBetter(code, unichar_id, permuter, false, false, dawg_cert, cont, prev,
+                              step);
+    }
+  }
+}
+
+// Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
+// appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
+// is a valid continuation of whatever is in prev.
+void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont,
+                                    const RecodeNode *prev, RecodeBeam *step) {
+  fprintf(stderr, "recodebeam ContinueDawg unicharid,code,cont= %i %i %i \n", unichar_id, code, cont);  // JDWDEBUG
+  RecodeHeap *dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
+  RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
+  if (unichar_id == INVALID_UNICHAR_ID) {
+    fprintf(stderr, "recodebeam ContinueDawg before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
+    PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false, false, false, cert,
+                     prev, nullptr, dawg_heap);
+    return;
+  }
+  // Avoid dictionary probe if score a total loss.
+  float score = cert;
+  if (prev != nullptr) {
+    score += prev->score;
+  }
+  if (dawg_heap->size() >= kBeamWidths[0] && score <= dawg_heap->PeekTop().data().score &&
+      nodawg_heap->size() >= kBeamWidths[0] && score <= nodawg_heap->PeekTop().data().score) {
+    return;
+  }
+  const RecodeNode *uni_prev = prev;
+  // Prev may be a partial code, null_char, or duplicate, so scan back to the
+  // last valid unichar_id.
+  while (uni_prev != nullptr &&
+         (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate)) {
+    uni_prev = uni_prev->prev;
+  }
+  if (unichar_id == UNICHAR_SPACE) {
+    if (uni_prev != nullptr && uni_prev->end_of_word) {
+      // Space is good. Push initial state, to the dawg beam and a regular
+      // space to the top choice beam.
+      fprintf(stderr, "recodebeam ContinueDawg before PushInitialDawgIfBetter unicharid= %i \n", unichar_id);  // JDWDEBUG
+      PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false, false, cert, cont, prev,
+                              step);
+      fprintf(stderr, "recodebeam ContinueDawg before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
+      PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter, false, false, false,
+                       false, cert, prev, nullptr, nodawg_heap);
+    }
+    return;
+  } else if (uni_prev != nullptr && uni_prev->start_of_dawg &&
+             uni_prev->unichar_id != UNICHAR_SPACE &&
+             dict_->getUnicharset().IsSpaceDelimited(uni_prev->unichar_id) &&
+             dict_->getUnicharset().IsSpaceDelimited(unichar_id)) {
+    return; // Can't break words between space delimited chars.
+  }
+  DawgPositionVector initial_dawgs;
+  auto *updated_dawgs = new DawgPositionVector;
+  DawgArgs dawg_args(&initial_dawgs, updated_dawgs, NO_PERM);
+  bool word_start = false;
+  if (uni_prev == nullptr) {
+    // Starting from beginning of line.
+    dict_->default_dawgs(&initial_dawgs, false);
+    word_start = true;
+  } else if (uni_prev->dawgs != nullptr) {
+    // Continuing a previous dict word.
+    dawg_args.active_dawgs = uni_prev->dawgs;
+    word_start = uni_prev->start_of_dawg;
+  } else {
+    return; // Can't continue if not a dict word.
+  }
+  auto permuter = static_cast<PermuterType>(
+      dict_->def_letter_is_okay(&dawg_args, dict_->getUnicharset(), unichar_id, false));
+  if (permuter != NO_PERM) {
+    PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start,
+                     dawg_args.valid_end, false, cert, prev, dawg_args.updated_dawgs, dawg_heap);
+    if (dawg_args.valid_end && !space_delimited_) {
+      // We can start another word right away, so push initial state as well,
+      // to the dawg beam, and the regular character to the top choice beam,
+      // since non-dict words can start here too.
+      fprintf(stderr, "recodebeam ContinueDawg before PushInitialDawgIfBetter unicharid= %i \n", unichar_id);  // JDWDEBUG
+      PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, cert, cont, prev, step);
+      fprintf(stderr, "recodebeam ContinueDawg before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
+      PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start, true, false,
+                       cert, prev, nullptr, nodawg_heap);
+    }
+  } else {
+    delete updated_dawgs;
+  }
+}
+
+// Adds a RecodeNode composed of the tuple (code, unichar_id,
+// initial-dawg-state, prev, cert) to the given heap if/ there is room or if
+// better than the current worst element if already full.
+void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter,
+                                               bool start, bool end, float cert,
+                                               NodeContinuation cont, const RecodeNode *prev,
+                                               RecodeBeam *step) {
+  fprintf(stderr, "recodebeam PushInitialDawgIfBetter unicharid,code= %i %i \n", unichar_id, code);  // JDWDEBUG
+  RecodeNode *best_initial_dawg = &step->best_initial_dawgs_[cont];
+  float score = cert;
+  if (prev != nullptr) {
+    score += prev->score;
+  }
+  if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
+    auto *initial_dawgs = new DawgPositionVector;
+    dict_->default_dawgs(initial_dawgs, false);
+    fprintf(stderr, "recodebeam PushInitialDawgIfBetter adding new node unicharid,code= %i %i \n", unichar_id, code);  // JDWDEBUG
+    RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, score, prev,
+                    initial_dawgs, ComputeCodeHash(code, false, prev));
+    *best_initial_dawg = node;
+  }
+}
+
+// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
+// false, false, false, false, cert, prev, nullptr) to heap if there is room
+// or if better than the current worst element if already full.
+/* static */
+void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id,
+                                               float cert, float worst_dict_cert, float dict_ratio,
+                                               bool use_dawgs, NodeContinuation cont,
+                                               const RecodeNode *prev, RecodeBeam *step) {
+  fprintf(stderr, "recodebeam PushDupOrNoDawgIfBetter %i \n", unichar_id);  // JDWDEBUG
+  int index = BeamIndex(use_dawgs, cont, length);
+  if (use_dawgs) {
+    if (cert > worst_dict_cert) {
+      PushHeapIfBetter(kBeamWidths[length], code, unichar_id, prev ? prev->permuter : NO_PERM,
+                       false, false, false, dup, cert, prev, nullptr, &step->beams_[index]);
+    }
+  } else {
+    cert *= dict_ratio;
+    if (cert >= kMinCertainty || code == null_char_) {
+      PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
+                       prev ? prev->permuter : TOP_CHOICE_PERM, false, false, false, dup, cert,
+                       prev, nullptr, &step->beams_[index]);
+    }
+  }
+}
+
+// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
+// dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
+// or if better than the current worst element if already full.
+void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
+                                        PermuterType permuter, bool dawg_start, bool word_start,
+                                        bool end, bool dup, float cert, const RecodeNode *prev,
+                                        DawgPositionVector *d, RecodeHeap *heap) {
+  fprintf(stderr, "recodebeam PushHeapIfBetter #1 %i \n", unichar_id);  // JDWDEBUG
+  float score = cert;
+  if (prev != nullptr) {
+    score += prev->score;
+  }
+  if (heap->size() < max_size || score > heap->PeekTop().data().score) {
+    uint64_t hash = ComputeCodeHash(code, dup, prev);
+    RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, dup, cert, score, prev,
+                    d, hash);
+    if (UpdateHeapIfMatched(&node, heap)) {
+      return;
+    }
+    // JDWTODO START
+    if (!AddToHeapIsAllowed(&node)) {
+      return;
+    }
+    // JDWTODO END
+    fprintf(stderr, "recodebeam PushHeapIfBetter #1 adding node unicharid,code= %i %i \n", unichar_id, code);  // JDWDEBUG
+    RecodePair entry(score, node);
+    heap->Push(&entry);
+    ASSERT_HOST(entry.data().dawgs == nullptr);
+    if (heap->size() > max_size) {
+      heap->Pop(&entry);
+    }
+  } else {
+    delete d;
+  }
+}
+
+// Adds a RecodeNode to heap if there is room
+// or if better than the current worst element if already full.
+void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHeap *heap) {
+  fprintf(stderr, "recodebeam PushHeapIfBetter #1 %i \n", node->unichar_id);  // JDWDEBUG
+  if (heap->size() < max_size || node->score > heap->PeekTop().data().score) {
+    if (UpdateHeapIfMatched(node, heap)) {
+      return;
+    }
+    fprintf(stderr, "recodebeam PushHeapIfBetter #2 adding node unicharid,code= %i %i \n", node->unichar_id, node->code);  // JDWDEBUG
+    RecodePair entry(node->score, *node);
+    heap->Push(&entry);
+    ASSERT_HOST(entry.data().dawgs == nullptr);
+    if (heap->size() > max_size) {
+      heap->Pop(&entry);
+    }
+  }
+}
+
+// Searches the heap for a matching entry, and updates the score with
+// reshuffle if needed. Returns true if there was a match.
+bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap) {
+  // TODO(rays) consider hash map instead of linear search.
+  // It might not be faster because the hash map would have to be updated
+  // every time a heap reshuffle happens, and that would be a lot of overhead.
+  fprintf(stderr, "recodebeam UpdateHeapIfMatched %i \n", new_node->unichar_id);  // JDWDEBUG
+  std::vector<RecodePair> &nodes = heap->heap();
+  for (auto &i : nodes) {
+    RecodeNode &node = i.data();
+    if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
+        node.permuter == new_node->permuter && node.start_of_dawg == new_node->start_of_dawg) {
+      if (new_node->score > node.score) {
+        // The new one is better. Update the entire node in the heap and
+        // reshuffle.
+        fprintf(stderr, "recodebeam UpdateHeapIfMatched doing update unicharid,code= %i %i \n", new_node->unichar_id, new_node->code);  // JDWDEBUG
+        node = *new_node;
+        i.key() = node.score;
+        heap->Reshuffle(&i);
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+// JDWTODO START
+bool RecodeBeamSearch::AddToHeapIsAllowed(RecodeNode *new_node) {
+  if (!in_double_whammy_)
+    return true;
+  const RecodeNode *prev_node = new_node->prev;
+  if (prev_node != nullptr && prev_node->code == first_whammy_ && new_node->code == second_whammy_) {
+    fprintf(stderr, "recodebeam AddToHeapIsAllowed second whammy not allowed unicharid,code= + prevcode %i %i %i \n", new_node->unichar_id, new_node->code, prev_node->code);
+    return false;
+  }
+  if (prev_node != nullptr && prev_node->code == second_whammy_ && new_node->code == first_whammy_) {
+    fprintf(stderr, "recodebeam AddToHeapIsAllowed first whammy not allowed unicharid,code= + prevcode %i %i %i \n", new_node->unichar_id, new_node->code, prev_node->code);
+    return false;
+  }
+if (prev_node != nullptr){
+  fprintf(stderr, "recodebeam AddToHeapIsAllowed allowed unicharid,code= + prevcode %i %i %i \n", new_node->unichar_id, new_node->code, prev_node->code);
+}
+else {
+  fprintf(stderr, "recodebeam AddToHeapIsAllowed allowed unicharid,code= + prevcode %i %i null \n", new_node->unichar_id, new_node->code);
+}
+  return true;
+}
+// JDWTODO END
+
+// Computes and returns the code-hash for the given code and prev.
+uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const {
+  uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
+  if (!dup && code != null_char_) {
+    int num_classes = recoder_.code_range();
+    uint64_t carry = (((hash >> 32) * num_classes) >> 32);
+    hash *= num_classes;
+    hash += carry;
+    hash += code;
+  }
+  return hash;
+}
+
+// Backtracks to extract the best path through the lattice that was built
+// during Decode. On return the best_nodes vector essentially contains the set
+// of code, score pairs that make the optimal path with the constraint that
+// the recoder can decode the code sequence back to a sequence of unichar-ids.
+void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_nodes,
+                                        std::vector<const RecodeNode *> *second_nodes) const {
+  // Scan both beams to extract the best and second best paths.
+  fprintf(stderr, "recodebeam extractbestpaths \n");  // JDWDEBUG
+  const RecodeNode *best_node = nullptr;
+  const RecodeNode *second_best_node = nullptr;
+  const RecodeBeam *last_beam = beam_[beam_size_ - 1];
+  for (int c = 0; c < NC_COUNT; ++c) {
+    if (c == NC_ONLY_DUP) {
+      continue;
+    }
+    auto cont = static_cast<NodeContinuation>(c);
+    for (int is_dawg = 0; is_dawg < 2; ++is_dawg) {
+      int beam_index = BeamIndex(is_dawg, cont, 0);
+      int heap_size = last_beam->beams_[beam_index].size();
+      for (int h = 0; h < heap_size; ++h) {
+        const RecodeNode *node = &last_beam->beams_[beam_index].get(h).data();
+        if (is_dawg) {
+          // dawg_node may be a null_char, or duplicate, so scan back to the
+          // last valid unichar_id.
+          const RecodeNode *dawg_node = node;
+          while (dawg_node != nullptr &&
+                 (dawg_node->unichar_id == INVALID_UNICHAR_ID || dawg_node->duplicate)) {
+            dawg_node = dawg_node->prev;
+          }
+          if (dawg_node == nullptr ||
+              (!dawg_node->end_of_word && dawg_node->unichar_id != UNICHAR_SPACE)) {
+            // Dawg node is not valid.
+            continue;
+          }
+        }
+        if (best_node == nullptr || node->score > best_node->score) {
+          second_best_node = best_node;
+          best_node = node;
+          fprintf(stderr, "recodebeam extractbestpaths bestnodebeam= %i \n", beam_index);  // JDWDEBUG
+        } else if (second_best_node == nullptr || node->score > second_best_node->score) {
+          second_best_node = node;
+          fprintf(stderr, "recodebeam extractbestpaths secondbestnodebeam= %i \n", beam_index);  // JDWDEBUG
+        }
+      }
+    }
+  }
+  if (second_nodes != nullptr) {
+    fprintf(stderr, "recodebeam extractbestpaths extract second best \n");  // JDWDEBUG
+    ExtractPath(second_best_node, second_nodes);
+  }
+  fprintf(stderr, "recodebeam extractbestpaths extract best \n");  // JDWDEBUG
+  ExtractPath(best_node, best_nodes);
+}
+
+// Helper backtracks through the lattice from the given node, storing the
+// path and reversing it.
+void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
+                                   std::vector<const RecodeNode *> *path) const {
+  path->clear();
+  while (node != nullptr) {
+    fprintf(stderr, "recodebeam extractpath unicharid,code,cert,score= %i %i %f %f %i \n", node->unichar_id, node->code, node->certainty, node->score, node->duplicate);  // JDWDEBUG
+    path->push_back(node);
+    node = node->prev;
+  }
+  std::reverse(path->begin(), path->end());
+}
+
+void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector<const RecodeNode *> *path,
+                                   int limiter) const {
+  int pathcounter = 0;
+  path->clear();
+  while (node != nullptr && pathcounter < limiter) {
+    path->push_back(node);
+    node = node->prev;
+    ++pathcounter;
+  }
+  std::reverse(path->begin(), path->end());
+}
+
+// Helper prints debug information on the given lattice path.
+void RecodeBeamSearch::DebugPath(const UNICHARSET *unicharset,
+                                 const std::vector<const RecodeNode *> &path) const {
+  for (int c = 0; c < path.size(); ++c) {
+    const RecodeNode &node = *path[c];
+    tprintf("%d ", c);
+    node.Print(null_char_, *unicharset, 1);
+  }
+}
+
+// Helper prints debug information on the given unichar path.
+void RecodeBeamSearch::DebugUnicharPath(const UNICHARSET *unicharset,
+                                        const std::vector<const RecodeNode *> &path,
+                                        const std::vector<int> &unichar_ids,
+                                        const std::vector<float> &certs,
+                                        const std::vector<float> &ratings,
+                                        const std::vector<int> &xcoords) const {
+  int num_ids = unichar_ids.size();
+  double total_rating = 0.0;
+  for (int c = 0; c < num_ids; ++c) {
+    int coord = xcoords[c];
+    tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
+            unicharset->debug_str(unichar_ids[c]).c_str(), ratings[c], certs[c],
+            path[coord]->start_of_word, path[coord]->end_of_word, path[coord]->permuter);
+    total_rating += ratings[c];
+  }
+  tprintf("Path total rating = %g\n", total_rating);
+}
+
+} // namespace tesseract.
diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 485723ea45..816198d43d 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -65,6 +65,9 @@ RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, int null_char
     , beam_size_(0)
     , top_code_(-1)
     , second_code_(-1)
+    , in_possible_diplopia_(false)
+    , first_diplopia_code_(-1)
+    , second_diplopia_code_(-1)
     , dict_(dict)
     , space_delimited_(true)
     , is_simple_text_(simple_text)
diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h
index 62703e36c6..78ff911bd4 100644
--- a/src/lstm/recodebeam.h
+++ b/src/lstm/recodebeam.h
@@ -377,6 +377,9 @@ class TESS_API RecodeBeamSearch {
   // Searches the heap for an entry matching new_node, and updates the entry
   // with reshuffle if needed. Returns true if there was a match.
   bool UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap);
+  // Determines if new node can be added to the heap for the current beam.
+  // Returns false if we are in possible diplopia situation
+  bool AddToHeapIsAllowed(RecodeNode *new_node);
   // Computes and returns the code-hash for the given code and prev.
   uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const;
   // Backtracks to extract the best path through the lattice that was built
@@ -425,6 +428,11 @@ class TESS_API RecodeBeamSearch {
   bool is_simple_text_;
   // The encoded (class label) of the null/reject character.
   int null_char_;
+  // Variables used in tracking possible diplopia case
+  // Refer to ComputeTopN routine for more information
+  bool in_possible_diplopia_;
+  int first_diplopia_code_;
+  int second_diplopia_code_;
 };
 
 } // namespace tesseract.

From b29668135e26669486430add7f4cddb9d4934366 Mon Sep 17 00:00:00 2001
From: woodjohndavid <57116722+woodjohndavid@users.noreply.github.com>
Date: Sat, 26 Jun 2021 14:19:01 -0700
Subject: [PATCH 2/8] Update recodebeam.cpp

---
 src/lstm/recodebeam.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 816198d43d..96e5fbb276 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -185,7 +185,7 @@ RecodeBeamSearch::combineSegmentedTimesteps(
 
 void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
                                                std::vector<int> *char_bounds_, int maxWidth) {
-  char_bounds_->push_back(0);
+  char_bounds_->push_back((*starts)[0]); 
   for (int i = 0; i < ends->size(); ++i) {
     int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
     char_bounds_->push_back((*ends)[i] + middle);
@@ -570,8 +570,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
       }
       rating -= cert;
     }
-    starts.push_back(t);
     if (t < width) {
+      starts.push_back(t);
       int unichar_id = best_nodes[t]->unichar_id;
       if (unichar_id == UNICHAR_SPACE && !certs->empty() && best_nodes[t]->permuter != NO_PERM) {
         // All the rating and certainty go on the previous character except
@@ -585,8 +585,9 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
       }
       unichar_ids->push_back(unichar_id);
       xcoords->push_back(t);
-      do {
-        double cert = best_nodes[t++]->certainty;
+      t++;
+      while (t < width && best_nodes[t]->duplicate) {
+        double cert = best_nodes[t]->certainty;
         // Special-case NO-PERM space to forget the certainty of the previous
         // nulls. See long comment in ContinueContext.
         if (cert < certainty ||
@@ -594,7 +595,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNod
           certainty = cert;
         }
         rating -= cert;
-      } while (t < width && best_nodes[t]->duplicate);
+        t++;
+      }
       ends.push_back(t);
       certs->push_back(certainty);
       ratings->push_back(rating);

From d9244fc063be8f435e49eca64e1bbd9632b98463 Mon Sep 17 00:00:00 2001
From: woodjohndavid <57116722+woodjohndavid@users.noreply.github.com>
Date: Sun, 27 Jun 2021 10:07:24 -0700
Subject: [PATCH 3/8] diplopia interim solution

---
 src/lstm/jdwcrap.cpp    | 1469 ---------------------------------------
 src/lstm/recodebeam.cpp |   44 ++
 2 files changed, 44 insertions(+), 1469 deletions(-)
 delete mode 100644 src/lstm/jdwcrap.cpp

diff --git a/src/lstm/jdwcrap.cpp b/src/lstm/jdwcrap.cpp
deleted file mode 100644
index d3df8e7f0d..0000000000
--- a/src/lstm/jdwcrap.cpp
+++ /dev/null
@@ -1,1469 +0,0 @@
-///////////////////////////////////////////////////////////////////////
-// File:        recodebeam.cpp
-// Description: Beam search to decode from the re-encoded CJK as a sequence of
-//              smaller numbers in place of a single large code.
-// Author:      Ray Smith
-//
-// (C) Copyright 2015, Google Inc.
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-///////////////////////////////////////////////////////////////////////
-
-#include "recodebeam.h"
-
-#include "networkio.h"
-#include "pageres.h"
-#include "unicharcompress.h"
-
-#include <algorithm> // for std::reverse
-#include <deque>
-#include <map>
-#include <set>
-#include <tuple>
-#include <unordered_set>
-#include <vector>
-
-namespace tesseract {
-
-// The beam width at each code position.
-const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
-    5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
-};
-
-static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
-
-// Prints debug details of the node.
-void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) const {
-  if (code == null_char) {
-    tprintf("null_char");
-  } else {
-    tprintf("label=%d, uid=%d=%s", code, unichar_id, unicharset.debug_str(unichar_id).c_str());
-  }
-  tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%" PRIx64, score, certainty,
-          start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
-          end_of_word ? " End" : "", permuter, code_hash);
-  if (depth > 0 && prev != nullptr) {
-    tprintf(" prev:");
-    prev->Print(null_char, unicharset, depth - 1);
-  } else {
-    tprintf("\n");
-  }
-}
-
-// Borrows the pointer, which is expected to survive until *this is deleted.
-RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text,
-                                   Dict *dict)
-    : recoder_(recoder)
-    , beam_size_(0)
-    , top_code_(-1)
-    , second_code_(-1)
-    , in_double_whammy_(false)  // JDWTODO
-    , first_whammy_(-1)  // JDWTODO
-    , second_whammy_(-1)  // JDWTODO
-    , dict_(dict)
-    , space_delimited_(true)
-    , is_simple_text_(simple_text)
-    , null_char_(null_char) {
-  if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) {
-    space_delimited_ = false;
-  }
-}
-
-RecodeBeamSearch::~RecodeBeamSearch() {
-  for (auto data : beam_) {
-    delete data;
-  }
-  for (auto data : secondary_beam_) {
-    delete data;
-  }
-}
-
-// Decodes the set of network outputs, storing the lattice internally.
-void RecodeBeamSearch::Decode(const NetworkIO &output, double dict_ratio, double cert_offset,
-                              double worst_dict_cert, const UNICHARSET *charset,
-                              int lstm_choice_mode) {
-  beam_size_ = 0;
-  int width = output.Width();
-  fprintf(stderr, "recodebeam decode #1 outputwidth= %i lstmchoice= %i \n", width, lstm_choice_mode);  // JDWDEBUG
-  if (lstm_choice_mode) {
-    timesteps.clear();
-  }
-  for (int t = 0; t < width; ++t) {
-    fprintf(stderr, "recodebeam decode #1 unicharid,code= timestep# %i \n", t);  // JDWDEBUG
-    ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
-    DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
-    if (lstm_choice_mode) {
-      SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
-    }
-  }
-}
-    
-void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY<float> &output, double dict_ratio,
-                              double cert_offset, double worst_dict_cert,
-                              const UNICHARSET *charset) {
-  fprintf(stderr, "recodebeam decode #2 \n");  // JDWDEBUG
-  beam_size_ = 0;
-  int width = output.dim1();
-  for (int t = 0; t < width; ++t) {
-    fprintf(stderr, "recodebeam decode #@ unicharid,code= timestep# %i \n", t);  // JDWDEBUG
-    ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
-    DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
-  }
-}
-
-void RecodeBeamSearch::DecodeSecondaryBeams(const NetworkIO &output, double dict_ratio,
-                                            double cert_offset, double worst_dict_cert,
-                                            const UNICHARSET *charset, int lstm_choice_mode) {
-  fprintf(stderr, "recodebeam decode secondary \n");  // JDWDEBUG
-  for (auto data : secondary_beam_) {
-    delete data;
-  }
-  secondary_beam_.clear();
-  if (character_boundaries_.size() < 2) {
-    return;
-  }
-  int width = output.Width();
-  int bucketNumber = 0;
-  for (int t = 0; t < width; ++t) {
-    while ((bucketNumber + 1) < character_boundaries_.size() &&
-           t >= character_boundaries_[bucketNumber + 1]) {
-      ++bucketNumber;
-    }
-    ComputeSecTopN(&(excludedUnichars)[bucketNumber], output.f(t), output.NumFeatures(),
-                   kBeamWidths[0]);
-    DecodeSecondaryStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset);
-  }
-}
-
-void RecodeBeamSearch::SaveMostCertainChoices(const float *outputs, int num_outputs,
-                                              const UNICHARSET *charset, int xCoord) {
-  fprintf(stderr, "recodebeam savemostcertainchoices \n");  // JDWDEBUG
-  std::vector<std::pair<const char *, float>> choices;
-  for (int i = 0; i < num_outputs; ++i) {
-    if (outputs[i] >= 0.01f) {
-      const char *character;
-      if (i + 2 >= num_outputs) {
-        character = "";
-      } else if (i > 0) {
-        character = charset->id_to_unichar_ext(i + 2);
-      } else {
-        character = charset->id_to_unichar_ext(i);
-      }
-      size_t pos = 0;
-      // order the possible choices within one timestep
-      // beginning with the most likely
-      while (choices.size() > pos && choices[pos].second > outputs[i]) {
-        pos++;
-      }
-      choices.insert(choices.begin() + pos, std::pair<const char *, float>(character, outputs[i]));
-    }
-  }
-  timesteps.push_back(choices);
-}
-
-void RecodeBeamSearch::segmentTimestepsByCharacters() {
-  for (int i = 1; i < character_boundaries_.size(); ++i) {
-    std::vector<std::vector<std::pair<const char *, float>>> segment;
-    for (int j = character_boundaries_[i - 1]; j < character_boundaries_[i]; ++j) {
-      segment.push_back(timesteps[j]);
-    }
-    segmentedTimesteps.push_back(segment);
-  }
-}
-std::vector<std::vector<std::pair<const char *, float>>>
-RecodeBeamSearch::combineSegmentedTimesteps(
-    std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *segmentedTimesteps) {
-  std::vector<std::vector<std::pair<const char *, float>>> combined_timesteps;
-  for (auto &segmentedTimestep : *segmentedTimesteps) {
-    for (auto &j : segmentedTimestep) {
-      combined_timesteps.push_back(j);
-    }
-  }
-  return combined_timesteps;
-}
-
-void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
-                                               std::vector<int> *char_bounds_, int maxWidth) {
-  fprintf(stderr, "recodebeam calculatecharboundaries maxwidth= %i \n", maxWidth);  // JDWDEBUG
-  // char_bounds_->push_back(0);   // JDWTODO
-  char_bounds_->push_back((*starts)[0]);   // JDWTODO
-  for (int i = 0; i < ends->size(); ++i) {
-    int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
-    fprintf(stderr, "%s %i %i %i \n", "calculatecharboundaries start&end&middle=", (*starts)[i + 1], (*ends)[i], middle);
-    char_bounds_->push_back((*ends)[i] + middle);
-  }
-  char_bounds_->pop_back();
-  char_bounds_->push_back(maxWidth);
-}
-
-// Returns the best path as labels/scores/xcoords similar to simple CTC.
-void RecodeBeamSearch::ExtractBestPathAsLabels(std::vector<int> *labels,
-                                               std::vector<int> *xcoords) const {
-  fprintf(stderr, "recodebeam extractbestpathaslabels \n");  // JDWDEBUG
-  labels->clear();
-  xcoords->clear();
-  std::vector<const RecodeNode *> best_nodes;
-  ExtractBestPaths(&best_nodes, nullptr);
-  // Now just run CTC on the best nodes.
-  int t = 0;
-  int width = best_nodes.size();
-  while (t < width) {
-    int label = best_nodes[t]->code;
-    if (label != null_char_) {
-      labels->push_back(label);
-      xcoords->push_back(t);
-    }
-    while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
-    }
-  }
-  xcoords->push_back(width);
-}
-
-// Returns the best path as unichar-ids/certs/ratings/xcoords skipping
-// duplicates, nulls and intermediate parts.
-void RecodeBeamSearch::ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset,
-                                                   std::vector<int> *unichar_ids,
-                                                   std::vector<float> *certs,
-                                                   std::vector<float> *ratings,
-                                                   std::vector<int> *xcoords) const {
-  fprintf(stderr, "recodebeam extractbestpathasunicharids \n");  // JDWDEBUG
-  std::vector<const RecodeNode *> best_nodes;
-  ExtractBestPaths(&best_nodes, nullptr);
-  ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
-  if (debug) {
-    DebugPath(unicharset, best_nodes);
-    DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings, *xcoords);
-  }
-}
-
-// Returns the best path as a set of WERD_RES.
-void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug,
-                                              const UNICHARSET *unicharset,
-                                              PointerVector<WERD_RES> *words,
-                                              int lstm_choice_mode) {
-fprintf(stderr, "recodebeam extractbestpathaswords \n");  // JDWDEBUG
-words->truncate(0);
-  std::vector<int> unichar_ids;
-  std::vector<float> certs;
-  std::vector<float> ratings;
-  std::vector<int> xcoords;
-  std::vector<const RecodeNode *> best_nodes;
-  std::vector<const RecodeNode *> second_nodes;
-  character_boundaries_.clear();
-  ExtractBestPaths(&best_nodes, &second_nodes);
-  if (debug) {
-    DebugPath(unicharset, best_nodes);
-    ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings, &xcoords);
-    tprintf("\nSecond choice path:\n");
-    DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings, xcoords);
-  }
-  // If lstm choice mode is required in granularity level 2, it stores the x
-  // Coordinates of every chosen character, to match the alternative choices to
-  // it.
-  ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords,
-                          &character_boundaries_);
-  int num_ids = unichar_ids.size();
-  // JDWDEBUG START
-  for (int i = 0; i < num_ids; i++){
-    const char *c = unicharset->id_to_unichar_ext(unichar_ids[i]);
-    fprintf(stderr, "recodebeam extractbestpathaswords unichar,unicharid= %s %i \n", c, unichar_ids[i]);
-  }
-  // JDWDEBUG END
-  if (debug) {
-    DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings, xcoords);
-  }
-  // Convert labels to unichar-ids.
-  int word_end = 0;
-  float prev_space_cert = 0.0f;
-  for (int word_start = 0; word_start < num_ids; word_start = word_end) {
-    for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
-      // A word is terminated when a space character or start_of_word flag is
-      // hit. We also want to force a separate word for every non
-      // space-delimited character when not in a dictionary context.
-      if (unichar_ids[word_end] == UNICHAR_SPACE) {
-        break;
-      }
-      int index = xcoords[word_end];
-      if (best_nodes[index]->start_of_word) {
-        break;
-      }
-      if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
-          (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
-           !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1]))) {
-        break;
-      }
-    }
-    float space_cert = 0.0f;
-    if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
-      space_cert = certs[word_end];
-    }
-    bool leading_space = word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
-    // Create a WERD_RES for the output word.
-    WERD_RES *word_res =
-        InitializeWord(leading_space, line_box, word_start, word_end,
-                       std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
-    for (int i = word_start; i < word_end; ++i) {
-      auto *choices = new BLOB_CHOICE_LIST;
-      BLOB_CHOICE_IT bc_it(choices);
-      auto *choice = new BLOB_CHOICE(unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
-                                     static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
-      int col = i - word_start;
-      choice->set_matrix_cell(col, col);
-      bc_it.add_after_then_move(choice);
-      word_res->ratings->put(col, col, choices);
-    }
-    int index = xcoords[word_end - 1];
-    word_res->FakeWordFromRatings(best_nodes[index]->permuter);
-    words->push_back(word_res);
-    prev_space_cert = space_cert;
-    if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE) {
-      ++word_end;
-    }
-  }
-}
-
-struct greater_than {
-  inline bool operator()(const RecodeNode *&node1, const RecodeNode *&node2) {
-    return (node1->score > node2->score);
-  }
-};
-
-void RecodeBeamSearch::PrintBeam2(bool uids, int num_outputs, const UNICHARSET *charset,
-                                  bool secondary) const {
-  std::vector<std::vector<const RecodeNode *>> topology;
-  std::unordered_set<const RecodeNode *> visited;
-  const std::vector<RecodeBeam *> &beam = !secondary ? beam_ : secondary_beam_;
-  // create the topology
-  for (int step = beam.size() - 1; step >= 0; --step) {
-    std::vector<const RecodeNode *> layer;
-    topology.push_back(layer);
-  }
-  // fill the topology with depths first
-  for (int step = beam.size() - 1; step >= 0; --step) {
-    std::vector<tesseract::RecodePair> &heaps = beam.at(step)->beams_->heap();
-    for (auto node : heaps) {
-      int backtracker = 0;
-      const RecodeNode *curr = &node.data();
-      while (curr != nullptr && !visited.count(curr)) {
-        visited.insert(curr);
-        topology[step - backtracker].push_back(curr);
-        curr = curr->prev;
-        ++backtracker;
-      }
-    }
-  }
-  int ct = 0;
-  int cb = 1;
-  for (std::vector<const RecodeNode *> layer : topology) {
-    if (cb >= character_boundaries_.size()) {
-      break;
-    }
-    if (ct == character_boundaries_[cb]) {
-      tprintf("***\n");
-      ++cb;
-    }
-    for (const RecodeNode *node : layer) {
-      const char *code;
-      int intCode;
-      if (node->unichar_id != INVALID_UNICHAR_ID) {
-        code = charset->id_to_unichar(node->unichar_id);
-        intCode = node->unichar_id;
-      } else if (node->code == null_char_) {
-        intCode = 0;
-        code = " ";
-      } else {
-        intCode = 666;
-        code = "*";
-      }
-      int intPrevCode = 0;
-      const char *prevCode;
-      float prevScore = 0;
-      if (node->prev != nullptr) {
-        prevScore = node->prev->score;
-        if (node->prev->unichar_id != INVALID_UNICHAR_ID) {
-          prevCode = charset->id_to_unichar(node->prev->unichar_id);
-          intPrevCode = node->prev->unichar_id;
-        } else if (node->code == null_char_) {
-          intPrevCode = 0;
-          prevCode = " ";
-        } else {
-          prevCode = "*";
-          intPrevCode = 666;
-        }
-      } else {
-        prevCode = " ";
-      }
-      if (uids) {
-        tprintf("%x(|)%f(>)%x(|)%f\n", intPrevCode, prevScore, intCode, node->score);
-      } else {
-        tprintf("%s(|)%f(>)%s(|)%f\n", prevCode, prevScore, code, node->score);
-      }
-    }
-    tprintf("-\n");
-    ++ct;
-  }
-  tprintf("***\n");
-}
-
-void RecodeBeamSearch::extractSymbolChoices(const UNICHARSET *unicharset) {
-  if (character_boundaries_.size() < 2) {
-    return;
-  }
-  fprintf(stderr, "recodebeam extractsymbolchoices \n");  // JDWDEBUG
-  // For the first iteration the original beam is analyzed. After that a
-  // new beam is calculated based on the results from the original beam.
-  std::vector<RecodeBeam *> &currentBeam = secondary_beam_.empty() ? beam_ : secondary_beam_;
-  character_boundaries_[0] = 0;
-  for (int j = 1; j < character_boundaries_.size(); ++j) {
-    std::vector<int> unichar_ids;
-    std::vector<float> certs;
-    std::vector<float> ratings;
-    std::vector<int> xcoords;
-    int backpath = character_boundaries_[j] - character_boundaries_[j - 1];
-    std::vector<tesseract::RecodePair> &heaps =
-      currentBeam.at(character_boundaries_[j] - 1)->beams_->heap();
-    std::vector<const RecodeNode *> best_nodes;
-    std::vector<const RecodeNode *> best;
-    // Scan the segmented node chain for valid unichar ids.
-    for (auto entry : heaps) {
-      bool validChar = false;
-      int backcounter = 0;
-      const RecodeNode *node = &entry.data();
-      while (node != nullptr && backcounter < backpath) {
-        if (node->code != null_char_ && node->unichar_id != INVALID_UNICHAR_ID) {
-          validChar = true;
-          break;
-        }
-        node = node->prev;
-        ++backcounter;
-      }
-      if (validChar) {
-        best.push_back(&entry.data());
-      }
-    }
-    // find the best rated segmented node chain and extract the unichar id.
-    if (!best.empty()) {
-      std::sort(best.begin(), best.end(), greater_than());
-      ExtractPath(best[0], &best_nodes, backpath);
-      ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, &xcoords);
-    }
-    if (!unichar_ids.empty()) {
-      int bestPos = 0;
-      for (int i = 1; i < unichar_ids.size(); ++i) {
-        if (ratings[i] < ratings[bestPos]) {
-          bestPos = i;
-        }
-      }
-      // TODO: bestCode is currently unused (see commit 2dd5d0d60).
-      int bestCode = -10;
-      for (auto &node : best_nodes) {
-        if (node->unichar_id == unichar_ids[bestPos]) {
-          bestCode = node->code;
-        }
-      }
-      // Exclude the best choice for the followup decoding.
-      std::unordered_set<int> excludeCodeList;
-      for (auto &best_node : best_nodes) {
-        if (best_node->code != null_char_) {
-          excludeCodeList.insert(best_node->code);
-        }
-      }
-      if (j - 1 < excludedUnichars.size()) {
-        for (auto elem : excludeCodeList) {
-          excludedUnichars[j - 1].insert(elem);
-        }
-      } else {
-        excludedUnichars.push_back(excludeCodeList);
-      }
-      // Save the best choice for the choice iterator.
-      if (j - 1 < ctc_choices.size()) {
-        int id = unichar_ids[bestPos];
-        const char *result = unicharset->id_to_unichar_ext(id);
-        float rating = ratings[bestPos];
-        ctc_choices[j - 1].push_back(std::pair<const char *, float>(result, rating));
-      } else {
-        std::vector<std::pair<const char *, float>> choice;
-        int id = unichar_ids[bestPos];
-        const char *result = unicharset->id_to_unichar_ext(id);
-        float rating = ratings[bestPos];
-        choice.emplace_back(result, rating);
-        ctc_choices.push_back(choice);
-      }
-      // fill the blank spot with an empty array
-    } else {
-      if (j - 1 >= excludedUnichars.size()) {
-        std::unordered_set<int> excludeCodeList;
-        excludedUnichars.push_back(excludeCodeList);
-      }
-      if (j - 1 >= ctc_choices.size()) {
-        std::vector<std::pair<const char *, float>> choice;
-        ctc_choices.push_back(choice);
-      }
-    }
-  }
-  for (auto data : secondary_beam_) {
-    delete data;
-  }
-  secondary_beam_.clear();
-}
-
-// Generates debug output of the content of the beams after a Decode.
-void RecodeBeamSearch::DebugBeams(const UNICHARSET &unicharset) const {
-  fprintf(stderr, "recodebeam debugbeams \n");  // JDWDEBUG
-  for (int p = 0; p < beam_size_; ++p) {
-    for (int d = 0; d < 2; ++d) {
-      for (int c = 0; c < NC_COUNT; ++c) {
-        auto cont = static_cast<NodeContinuation>(c);
-        int index = BeamIndex(d, cont, 0);
-        if (beam_[p]->beams_[index].empty()) {
-          continue;
-        }
-        // Print all the best scoring nodes for each unichar found.
-        tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict", kNodeContNames[c]);
-        DebugBeamPos(unicharset, beam_[p]->beams_[index]);
-      }
-    }
-  }
-}
-
-// Generates debug output of the content of a single beam position.
-void RecodeBeamSearch::DebugBeamPos(const UNICHARSET &unicharset, const RecodeHeap &heap) const {
-  std::vector<const RecodeNode *> unichar_bests(unicharset.size());
-  const RecodeNode *null_best = nullptr;
-  int heap_size = heap.size();
-  for (int i = 0; i < heap_size; ++i) {
-    const RecodeNode *node = &heap.get(i).data();
-    if (node->unichar_id == INVALID_UNICHAR_ID) {
-      if (null_best == nullptr || null_best->score < node->score) {
-        null_best = node;
-      }
-    } else {
-      if (unichar_bests[node->unichar_id] == nullptr ||
-          unichar_bests[node->unichar_id]->score < node->score) {
-        unichar_bests[node->unichar_id] = node;
-      }
-    }
-  }
-  for (auto &unichar_best : unichar_bests) {
-    if (unichar_best != nullptr) {
-      const RecodeNode &node = *unichar_best;
-      node.Print(null_char_, unicharset, 1);
-    }
-  }
-  if (null_best != nullptr) {
-    null_best->Print(null_char_, unicharset, 1);
-  }
-}
-
-// Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
-// duplicates, nulls and intermediate parts.
-/* static */
-void RecodeBeamSearch::ExtractPathAsUnicharIds(const std::vector<const RecodeNode *> &best_nodes,
-                                               std::vector<int> *unichar_ids,
-                                               std::vector<float> *certs,
-                                               std::vector<float> *ratings,
-                                               std::vector<int> *xcoords,
-                                               std::vector<int> *character_boundaries) {
-  unichar_ids->clear();
-  certs->clear();
-  ratings->clear();
-  xcoords->clear();
-  std::vector<int> starts;
-  std::vector<int> ends;
-  // Backtrack extracting only valid, non-duplicate unichar-ids.
-  fprintf(stderr, "recodebeam extractpathasunicharids \n");  // JDWDEBUG
-  int t = 0;
-  int width = best_nodes.size();
-  fprintf(stderr, "%s %i \n", "extractpathasunicharids width=", width);
-  while (t < width) {
-    double certainty = 0.0;
-    double rating = 0.0;
-    bool foundInvalid = false;    // JDWDEBUG
-    while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
-      fprintf(stderr, "%s %i \n", "extractpathasunicharids bypass invalid unicharid code=", best_nodes[t]->code);   // JDWDEBUG
-      foundInvalid = true;    // JDWDEBUG
-      double cert = best_nodes[t++]->certainty;
-      if (cert < certainty) {
-        certainty = cert;
-      }
-      rating -= cert;
-    }
-    // starts.push_back(t);    // JDWTODO
-    if (t < width) {
-      starts.push_back(t);    // JDWTODO
-      fprintf(stderr, "%s %i %i %f \n", "extractpathasunicharids valid unicharid,code,score=", best_nodes[t]->unichar_id, best_nodes[t]->code, best_nodes[t]->score);   // JDWDEBUG
-      // JDWDEBUG START
-      if (!foundInvalid)
-        fprintf(stderr, "%s \n", "extractpathasunicharids foundvalid with no invalid");
-      // JDWDEBUG END
-      int unichar_id = best_nodes[t]->unichar_id;
-      if (unichar_id == UNICHAR_SPACE && !certs->empty() && best_nodes[t]->permuter != NO_PERM) {
-        // All the rating and certainty go on the previous character except
-        // for the space itself.
-        fprintf(stderr, "%s %i \n", "extractpathasunicharids unicharid space", best_nodes[t]->code);  // JDWDEBUG
-        if (certainty < certs->back()) {
-          certs->back() = certainty;
-        }
-        ratings->back() += rating;
-        certainty = 0.0;
-        rating = 0.0;
-      }
-      xcoords->push_back(t);
-      unichar_ids->push_back(unichar_id);
-      t++;  // JDWTODO
-      // do {   // JDWTODO
-      while (t < width && best_nodes[t]->duplicate) {   // JDWTODO
-        // double cert = best_nodes[t++]->certainty;    // JDWTODO
-        double cert = best_nodes[t]->certainty;    // JDWTODO
-        // Special-case NO-PERM space to forget the certainty of the previous
-        // nulls. See long comment in ContinueContext.
-        if (cert < certainty ||
-            (unichar_id == UNICHAR_SPACE && best_nodes[t - 1]->permuter == NO_PERM)) {
-          certainty = cert;
-        }
-        rating -= cert;
-        // JDWDEBUG START
-        if (t < width && best_nodes[t]->duplicate)
-          fprintf(stderr, "%s %i %i \n", "extractpathasunicharids duplicate removed unicharid,code=", best_nodes[t]->unichar_id, best_nodes[t]->code);  // JDWDEBUG
-        // JDWDEBUG END
-        t++;    // JDWTODO
-      }   // JDWTODO
-      // } while (t < width && best_nodes[t]->duplicate);   // JDWTODO
-      ends.push_back(t);
-      certs->push_back(certainty);
-      ratings->push_back(rating);
-    } else if (!certs->empty()) {
-      if (certainty < certs->back()) {
-        certs->back() = certainty;
-      }
-      ratings->back() += rating;
-    }
-  }
-  starts.push_back(width);
-  if (character_boundaries != nullptr) {
-    calculateCharBoundaries(&starts, &ends, character_boundaries, width);
-  }
-  xcoords->push_back(width);
-}
-
-// Sets up a word with the ratings matrix and fake blobs with boxes in the
-// right places.
-WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_box, int word_start,
-                                           int word_end, float space_certainty,
-                                           const UNICHARSET *unicharset,
-                                           const std::vector<int> &xcoords, float scale_factor) {
-  // Make a fake blob for each non-zero label.
-  fprintf(stderr, "recodebeam initializeword scalefactor= %f \n", scale_factor);  // JDWDEBUG
-  fprintf(stderr, "recodebeam initializeword start,end= %i %i \n", word_start, word_end);  // JDWDEBUG
-  C_BLOB_LIST blobs;
-  C_BLOB_IT b_it(&blobs);
-  for (int i = word_start; i < word_end; ++i) {
-    if (character_boundaries_.size() > (i + 1)) {
-      TBOX box(static_cast<int16_t>(std::floor(character_boundaries_[i] * scale_factor)) +
-                   line_box.left(),
-               line_box.bottom(),
-               static_cast<int16_t>(std::ceil(character_boundaries_[i + 1] * scale_factor)) +
-                   line_box.left(),
-               line_box.top());
-      // JDWDEBUG START
-      std::string debug_str;
-      debug_str = "fake boxblob for werd being built in recodebeam ";
-      box.print_to_str(debug_str);
-      fprintf(stderr, "%s %i %i %i \n", debug_str.c_str(), i, character_boundaries_[i], character_boundaries_[i + 1]);
-      // JDWDEBUG END
-      b_it.add_after_then_move(C_BLOB::FakeBlob(box));
-    }
-  }
-  // Make a fake word from the blobs.
-  WERD *word = new WERD(&blobs, leading_space, nullptr);
-  // Make a WERD_RES from the word.
-  auto *word_res = new WERD_RES(word);
-  word_res->end = word_end - word_start + leading_space;
-  word_res->uch_set = unicharset;
-  word_res->combination = true; // Give it ownership of the word.
-  word_res->space_certainty = space_certainty;
-  word_res->ratings = new MATRIX(word_end - word_start, 1);
-  return word_res;
-}
-
-// Fills top_n_flags_ with bools that are true iff the corresponding output
-// is one of the top_n.
-void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int top_n) {
-  fprintf(stderr, "recodebeam computetopn \n");  // JDWDEBUG
-  top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
-  top_code_ = -1;
-  second_code_ = -1;
-  top_heap_.clear();
-  for (int i = 0; i < num_outputs; ++i) {
-    if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) {
-      TopPair entry(outputs[i], i);
-      top_heap_.Push(&entry);
-      if (top_heap_.size() > top_n) {
-        top_heap_.Pop(&entry);
-      }
-    }
-  }
-
-  float top_key = 0.0F;   // JDWTODO
-  float second_key = 0.0F;   // JDWTODO
-  bool found_first_whammy = false;    // JDWTODO
-  bool found_second_whammy = false;    // JDWTODO
-  while (!top_heap_.empty()) {
-    TopPair entry;
-    top_heap_.Pop(&entry);
-    if (in_double_whammy_ && entry.data() == first_whammy_)    // JDWTODO
-      found_first_whammy = true;    // JDWTODO
-    if (in_double_whammy_ && entry.data() == second_whammy_)    // JDWTODO
-      found_second_whammy = true;    // JDWTODO
-    if (top_heap_.size() > 1) {
-      top_n_flags_[entry.data()] = TN_TOPN;
-      fprintf(stderr, "recodebeam computetopn topn code,key= %i %f \n", entry.data(), entry.key());  // JDWDEBUG
-    } else {
-      top_n_flags_[entry.data()] = TN_TOP2;
-      fprintf(stderr, "recodebeam computetopn top2 code,key= %i %f \n", entry.data(), entry.key());  // JDWDEBUG
-      if (top_heap_.empty()) {
-        top_code_ = entry.data();
-        top_key = entry.key();   // JDWTODO
-      } else {
-        second_code_ = entry.data();
-        second_key = entry.key();   // JDWTODO
-      }
-    }
-  }
-
-  // JDWTODO START
-  if (in_double_whammy_) {
-    if (!found_first_whammy && !found_second_whammy){
-      in_double_whammy_ = false;
-      first_whammy_ = -1;
-      second_whammy_ = -1;
-      fprintf(stderr, "recodebeam computetopn double whammy cleared unicharid,code= \n");
-    }
-  }
-  // JDWTODO END
-
-  // JDWTODO START
-  if (!in_double_whammy_) {
-    if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > 0.25F && second_key > 0.25F){
-      in_double_whammy_ = true;
-      first_whammy_ = top_code_;
-      second_whammy_ = second_code_;
-      fprintf(stderr, "recodebeam computetopn double whammy found unicharid,code= %f %f \n", top_key, second_key);
-    }
-  }
-  // JDWTODO END
-
-  fprintf(stderr, "recodebeam computetopn unicharid,code= top_code,second_code= %i %i \n", top_code_, second_code_);  // JDWDEBUG
-  top_n_flags_[null_char_] = TN_TOP2;
-}
-
-void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const float *outputs,
-                                      int num_outputs, int top_n) {
-  fprintf(stderr, "recodebeam computesectopn \n");  // JDWDEBUG
-  top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
-  top_code_ = -1;
-  second_code_ = -1;
-  top_heap_.clear();
-  for (int i = 0; i < num_outputs; ++i) {
-    if ((top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key()) && !exList->count(i)) {
-      TopPair entry(outputs[i], i);
-      top_heap_.Push(&entry);
-      if (top_heap_.size() > top_n) {
-        top_heap_.Pop(&entry);
-      }
-    }
-  }
-  while (!top_heap_.empty()) {
-    TopPair entry;
-    top_heap_.Pop(&entry);
-    if (top_heap_.size() > 1) {
-      top_n_flags_[entry.data()] = TN_TOPN;
-    } else {
-      top_n_flags_[entry.data()] = TN_TOP2;
-      if (top_heap_.empty()) {
-        top_code_ = entry.data();
-      } else {
-        second_code_ = entry.data();
-      }
-    }
-  }
-  top_n_flags_[null_char_] = TN_TOP2;
-}
-
-// Adds the computation for the current time-step to the beam. Call at each
-// time-step in sequence from left to right. outputs is the activation vector
-// for the current timestep.
-void RecodeBeamSearch::DecodeStep(const float *outputs, int t, double dict_ratio,
-                                  double cert_offset, double worst_dict_cert,
-                                  const UNICHARSET *charset, bool debug) {
-  fprintf(stderr, "recodebeam decodestep timestep= %i \n", t);  // JDWDEBUG
-  if (t == beam_.size()) {
-    beam_.push_back(new RecodeBeam);
-  }
-  RecodeBeam *step = beam_[t];
-  beam_size_ = t + 1;
-  step->Clear();
-  if (t == 0) {
-    // The first step can only use singles and initials.
-    ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
-                    dict_ratio, cert_offset, worst_dict_cert, step);
-    if (dict_ != nullptr) {
-      ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
-                      dict_ratio, cert_offset, worst_dict_cert, step);
-    }
-  } else {
-    RecodeBeam *prev = beam_[t - 1];
-    if (debug) {
-      int beam_index = BeamIndex(true, NC_ANYTHING, 0);
-      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
-        std::vector<const RecodeNode *> path;
-        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
-        tprintf("Step %d: Dawg beam %d:\n", t, i);
-        DebugPath(charset, path);
-      }
-      beam_index = BeamIndex(false, NC_ANYTHING, 0);
-      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
-        std::vector<const RecodeNode *> path;
-        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
-        tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
-        DebugPath(charset, path);
-      }
-    }
-    int total_beam = 0;
-    // Work through the scores by group (top-2, top-n, the rest) while the beam
-    // is empty. This enables extending the context using only the top-n results
-    // first, which may have an empty intersection with the valid codes, so we
-    // fall back to the rest if the beam is empty.
-    for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
-      auto top_n = static_cast<TopNState>(tn);
-      for (int index = 0; index < kNumBeams; ++index) {
-        // Working backwards through the heaps doesn't guarantee that we see the
-        // best first, but it comes before a lot of the worst, so it is slightly
-        // more efficient than going forwards.
-        for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
-          ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
-                          dict_ratio, cert_offset, worst_dict_cert, step);
-        }
-      }
-      for (int index = 0; index < kNumBeams; ++index) {
-        if (ContinuationFromBeamsIndex(index) == NC_ANYTHING) {
-          total_beam += step->beams_[index].size();
-        }
-      }
-    }
-    // Special case for the best initial dawg. Push it on the heap if good
-    // enough, but there is only one, so it doesn't blow up the beam.
-    for (int c = 0; c < NC_COUNT; ++c) {
-      if (step->best_initial_dawgs_[c].code >= 0) {
-        fprintf(stderr, "recodebeam decodestep special case initial dawg %i \n", t);  // JDWDEBUG
-        int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
-        RecodeHeap *dawg_heap = &step->beams_[index];
-        PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
-      }
-    }
-  }
-}
-
-void RecodeBeamSearch::DecodeSecondaryStep(const float *outputs, int t, double dict_ratio,
-                                           double cert_offset, double worst_dict_cert,
-                                           const UNICHARSET *charset, bool debug) {
-  fprintf(stderr, "recodebeam decodesecondarystep \n");  // JDWDEBUG
-  if (t == secondary_beam_.size()) {
-    secondary_beam_.push_back(new RecodeBeam);
-  }
-  RecodeBeam *step = secondary_beam_[t];
-  step->Clear();
-  if (t == 0) {
-    // The first step can only use singles and initials.
-    ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
-                    dict_ratio, cert_offset, worst_dict_cert, step);
-    if (dict_ != nullptr) {
-      ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2, charset,
-                      dict_ratio, cert_offset, worst_dict_cert, step);
-    }
-  } else {
-    RecodeBeam *prev = secondary_beam_[t - 1];
-    if (debug) {
-      int beam_index = BeamIndex(true, NC_ANYTHING, 0);
-      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
-        std::vector<const RecodeNode *> path;
-        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
-        tprintf("Step %d: Dawg beam %d:\n", t, i);
-        DebugPath(charset, path);
-      }
-      beam_index = BeamIndex(false, NC_ANYTHING, 0);
-      for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
-        std::vector<const RecodeNode *> path;
-        ExtractPath(&prev->beams_[beam_index].get(i).data(), &path);
-        tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
-        DebugPath(charset, path);
-      }
-    }
-    int total_beam = 0;
-    // Work through the scores by group (top-2, top-n, the rest) while the beam
-    // is empty. This enables extending the context using only the top-n results
-    // first, which may have an empty intersection with the valid codes, so we
-    // fall back to the rest if the beam is empty.
-    for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
-      auto top_n = static_cast<TopNState>(tn);
-      for (int index = 0; index < kNumBeams; ++index) {
-        // Working backwards through the heaps doesn't guarantee that we see the
-        // best first, but it comes before a lot of the worst, so it is slightly
-        // more efficient than going forwards.
-        for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
-          ContinueContext(&prev->beams_[index].get(i).data(), index, outputs, top_n, charset,
-                          dict_ratio, cert_offset, worst_dict_cert, step);
-        }
-      }
-      for (int index = 0; index < kNumBeams; ++index) {
-        if (ContinuationFromBeamsIndex(index) == NC_ANYTHING) {
-          total_beam += step->beams_[index].size();
-        }
-      }
-    }
-    // Special case for the best initial dawg. Push it on the heap if good
-    // enough, but there is only one, so it doesn't blow up the beam.
-    for (int c = 0; c < NC_COUNT; ++c) {
-      if (step->best_initial_dawgs_[c].code >= 0) {
-        int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
-        RecodeHeap *dawg_heap = &step->beams_[index];
-        PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c], dawg_heap);
-      }
-    }
-  }
-}
-
-// Adds to the appropriate beams the legal (according to recoder)
-// continuations of context prev, which is of the given length, using the
-// given network outputs to provide scores to the choices. Uses only those
-// choices for which top_n_flags[index] == top_n_flag.
-void RecodeBeamSearch::ContinueContext(const RecodeNode *prev, int index, const float *outputs,
-                                       TopNState top_n_flag, const UNICHARSET *charset,
-                                       double dict_ratio, double cert_offset,
-                                       double worst_dict_cert, RecodeBeam *step) {
-  // JDWDEBUG START
-  if (prev != nullptr) {
-    const char *ucc = charset->id_to_unichar_ext(prev->unichar_id);
-    fprintf(stderr, "recodebeam continuecontext unicharid,code,unichar,index,topn = %i %i %s %i %i \n", prev->unichar_id, prev->code, ucc, index, top_n_flag);
-  }
-  else {
-    fprintf(stderr, "recodebeam continuecontext top prev null index,topn = %i %i \n", index, top_n_flag);
-  }
-  // JDWDEBUG END
-  RecodedCharID prefix;
-  RecodedCharID full_code;
-  const RecodeNode *previous = prev;
-  int length = LengthFromBeamsIndex(index);
-  bool use_dawgs = IsDawgFromBeamsIndex(index);
-  NodeContinuation prev_cont = ContinuationFromBeamsIndex(index);
-  for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
-    while (previous != nullptr && (previous->duplicate || previous->code == null_char_)) {
-      fprintf(stderr, "recodebeam continuecontext stepping back code= %i \n", previous->code);
-      previous = previous->prev;
-    }
-    if (previous != nullptr) {
-      prefix.Set(p, previous->code);
-      full_code.Set(p, previous->code);
-    }
-  }
-  if (prev != nullptr && !is_simple_text_) {
-    if (top_n_flags_[prev->code] == top_n_flag) {
-      if (prev_cont != NC_NO_DUP) {
-        float cert = NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
-        fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", prev->unichar_id, 1);
-        PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
-                                dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
-      }
-      if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 && prev->code != null_char_) {
-        float cert =
-            NetworkIO::ProbToCertainty(outputs[prev->code] + outputs[null_char_]) + cert_offset;
-        fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", prev->unichar_id, 1);
-        PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id, cert, worst_dict_cert,
-                                dict_ratio, use_dawgs, NC_NO_DUP, prev, step);
-      }
-    }
-    if (prev_cont == NC_ONLY_DUP) {
-      return;
-    }
-    if (prev->code != null_char_ && length > 0 && top_n_flags_[null_char_] == top_n_flag) {
-      // Allow nulls within multi code sequences, as the nulls within are not
-      // explicitly included in the code sequence.
-      float cert = NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
-      fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", INVALID_UNICHAR_ID, 0);
-      PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID, cert, worst_dict_cert,
-                              dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
-    }
-  }
-  const std::vector<int> *final_codes = recoder_.GetFinalCodes(prefix);
-  if (final_codes != nullptr) {
-    for (int code : *final_codes) {
-      if (top_n_flags_[code] != top_n_flag) {
-        continue;
-      }
-      if (prev != nullptr && prev->code == code && !is_simple_text_) {
-        continue;
-      }
-      float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
-      if (cert < kMinCertainty && code != null_char_) {
-        continue;
-      }
-      full_code.Set(length, code);
-      int unichar_id = recoder_.DecodeUnichar(full_code);
-      // Map the null char to INVALID.
-      if (length == 0 && code == null_char_) {
-        unichar_id = INVALID_UNICHAR_ID;
-      }
-      if (unichar_id != INVALID_UNICHAR_ID && charset != nullptr &&
-          !charset->get_enabled(unichar_id)) {
-        continue; // disabled by whitelist/blacklist
-      }
-      ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ANYTHING,
-                      prev, step);
-      if (top_n_flag == TN_TOP2 && code != null_char_) {
-        float prob = outputs[code] + outputs[null_char_];
-        if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
-            ((prev->code == top_code_ && code == second_code_) ||
-             (code == top_code_ && prev->code == second_code_))) {
-          prob += outputs[prev->code];
-        }
-        float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
-        ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio, use_dawgs, NC_ONLY_DUP,
-                        prev, step);
-      }
-    }
-  }
-  const std::vector<int> *next_codes = recoder_.GetNextCodes(prefix);
-  if (next_codes != nullptr) {
-    for (int code : *next_codes) {
-      if (top_n_flags_[code] != top_n_flag) {
-        continue;
-      }
-      if (prev != nullptr && prev->code == code && !is_simple_text_) {
-        continue;
-      }
-      float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
-      fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", INVALID_UNICHAR_ID, 0);
-      PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
-                              dict_ratio, use_dawgs, NC_ANYTHING, prev, step);
-      if (top_n_flag == TN_TOP2 && code != null_char_) {
-        float prob = outputs[code] + outputs[null_char_];
-        if (prev != nullptr && prev_cont == NC_ANYTHING && prev->code != null_char_ &&
-            ((prev->code == top_code_ && code == second_code_) ||
-             (code == top_code_ && prev->code == second_code_))) {
-          prob += outputs[prev->code];
-        }
-        float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
-        fprintf(stderr, "recodebeam continuecontext before pushdupornodawgifbetter unicharid,dup= %i %i \n", INVALID_UNICHAR_ID, 0);
-        PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert, worst_dict_cert,
-                                dict_ratio, use_dawgs, NC_ONLY_DUP, prev, step);
-      }
-    }
-  }
-}
-
-// Continues for a new unichar, using dawg or non-dawg as per flag.
-void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert, float worst_dict_cert,
-                                       float dict_ratio, bool use_dawgs, NodeContinuation cont,
-                                       const RecodeNode *prev, RecodeBeam *step) {
-  fprintf(stderr, "recodebeam ContinueUnichar unicharid,code,cont= %i %i %i \n", unichar_id, code, cont);  // JDWDEBUG
-  if (use_dawgs) {
-    if (cert > worst_dict_cert) {
-      ContinueDawg(code, unichar_id, cert, cont, prev, step);
-    }
-  } else {
-    RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
-    fprintf(stderr, "recodebeam ContinueUnichar before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
-    PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false, false, false, false,
-                     cert * dict_ratio, prev, nullptr, nodawg_heap);
-    if (dict_ != nullptr && ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
-                             !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
-      // Any top choice position that can start a new word, ie a space or
-      // any non-space-delimited character, should also be considered
-      // by the dawg search, so push initial dawg to the dawg heap.
-      float dawg_cert = cert;
-      PermuterType permuter = TOP_CHOICE_PERM;
-      // Since we use the space either side of a dictionary word in the
-      // certainty of the word, (to properly handle weak spaces) and the
-      // space is coming from a non-dict word, we need special conditions
-      // to avoid degrading the certainty of the dict word that follows.
-      // With a space we don't multiply the certainty by dict_ratio, and we
-      // flag the space with NO_PERM to indicate that we should not use the
-      // predecessor nulls to generate the confidence for the space, as they
-      // have already been multiplied by dict_ratio, and we can't go back to
-      // insert more entries in any previous heaps.
-      if (unichar_id == UNICHAR_SPACE) {
-        permuter = NO_PERM;
-      } else {
-        dawg_cert *= dict_ratio;
-      }
-      fprintf(stderr, "recodebeam ContinueUnichar before pushinitialdawgifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
-      PushInitialDawgIfBetter(code, unichar_id, permuter, false, false, dawg_cert, cont, prev,
-                              step);
-    }
-  }
-}
-
-// Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
-// appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
-// is a valid continuation of whatever is in prev.
-void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert, NodeContinuation cont,
-                                    const RecodeNode *prev, RecodeBeam *step) {
-  fprintf(stderr, "recodebeam ContinueDawg unicharid,code,cont= %i %i %i \n", unichar_id, code, cont);  // JDWDEBUG
-  RecodeHeap *dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
-  RecodeHeap *nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
-  if (unichar_id == INVALID_UNICHAR_ID) {
-    fprintf(stderr, "recodebeam ContinueDawg before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
-    PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false, false, false, cert,
-                     prev, nullptr, dawg_heap);
-    return;
-  }
-  // Avoid dictionary probe if score a total loss.
-  float score = cert;
-  if (prev != nullptr) {
-    score += prev->score;
-  }
-  if (dawg_heap->size() >= kBeamWidths[0] && score <= dawg_heap->PeekTop().data().score &&
-      nodawg_heap->size() >= kBeamWidths[0] && score <= nodawg_heap->PeekTop().data().score) {
-    return;
-  }
-  const RecodeNode *uni_prev = prev;
-  // Prev may be a partial code, null_char, or duplicate, so scan back to the
-  // last valid unichar_id.
-  while (uni_prev != nullptr &&
-         (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate)) {
-    uni_prev = uni_prev->prev;
-  }
-  if (unichar_id == UNICHAR_SPACE) {
-    if (uni_prev != nullptr && uni_prev->end_of_word) {
-      // Space is good. Push initial state, to the dawg beam and a regular
-      // space to the top choice beam.
-      fprintf(stderr, "recodebeam ContinueDawg before PushInitialDawgIfBetter unicharid= %i \n", unichar_id);  // JDWDEBUG
-      PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false, false, cert, cont, prev,
-                              step);
-      fprintf(stderr, "recodebeam ContinueDawg before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
-      PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter, false, false, false,
-                       false, cert, prev, nullptr, nodawg_heap);
-    }
-    return;
-  } else if (uni_prev != nullptr && uni_prev->start_of_dawg &&
-             uni_prev->unichar_id != UNICHAR_SPACE &&
-             dict_->getUnicharset().IsSpaceDelimited(uni_prev->unichar_id) &&
-             dict_->getUnicharset().IsSpaceDelimited(unichar_id)) {
-    return; // Can't break words between space delimited chars.
-  }
-  DawgPositionVector initial_dawgs;
-  auto *updated_dawgs = new DawgPositionVector;
-  DawgArgs dawg_args(&initial_dawgs, updated_dawgs, NO_PERM);
-  bool word_start = false;
-  if (uni_prev == nullptr) {
-    // Starting from beginning of line.
-    dict_->default_dawgs(&initial_dawgs, false);
-    word_start = true;
-  } else if (uni_prev->dawgs != nullptr) {
-    // Continuing a previous dict word.
-    dawg_args.active_dawgs = uni_prev->dawgs;
-    word_start = uni_prev->start_of_dawg;
-  } else {
-    return; // Can't continue if not a dict word.
-  }
-  auto permuter = static_cast<PermuterType>(
-      dict_->def_letter_is_okay(&dawg_args, dict_->getUnicharset(), unichar_id, false));
-  if (permuter != NO_PERM) {
-    PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start,
-                     dawg_args.valid_end, false, cert, prev, dawg_args.updated_dawgs, dawg_heap);
-    if (dawg_args.valid_end && !space_delimited_) {
-      // We can start another word right away, so push initial state as well,
-      // to the dawg beam, and the regular character to the top choice beam,
-      // since non-dict words can start here too.
-      fprintf(stderr, "recodebeam ContinueDawg before PushInitialDawgIfBetter unicharid= %i \n", unichar_id);  // JDWDEBUG
-      PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true, cert, cont, prev, step);
-      fprintf(stderr, "recodebeam ContinueDawg before pushheapifbetter unicharid,dup= %i %i \n", unichar_id, 0 );  // JDWDEBUG
-      PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false, word_start, true, false,
-                       cert, prev, nullptr, nodawg_heap);
-    }
-  } else {
-    delete updated_dawgs;
-  }
-}
-
-// Adds a RecodeNode composed of the tuple (code, unichar_id,
-// initial-dawg-state, prev, cert) to the given heap if/ there is room or if
-// better than the current worst element if already full.
-void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id, PermuterType permuter,
-                                               bool start, bool end, float cert,
-                                               NodeContinuation cont, const RecodeNode *prev,
-                                               RecodeBeam *step) {
-  fprintf(stderr, "recodebeam PushInitialDawgIfBetter unicharid,code= %i %i \n", unichar_id, code);  // JDWDEBUG
-  RecodeNode *best_initial_dawg = &step->best_initial_dawgs_[cont];
-  float score = cert;
-  if (prev != nullptr) {
-    score += prev->score;
-  }
-  if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
-    auto *initial_dawgs = new DawgPositionVector;
-    dict_->default_dawgs(initial_dawgs, false);
-    fprintf(stderr, "recodebeam PushInitialDawgIfBetter adding new node unicharid,code= %i %i \n", unichar_id, code);  // JDWDEBUG
-    RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert, score, prev,
-                    initial_dawgs, ComputeCodeHash(code, false, prev));
-    *best_initial_dawg = node;
-  }
-}
-
-// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
-// false, false, false, false, cert, prev, nullptr) to heap if there is room
-// or if better than the current worst element if already full.
-/* static */
-void RecodeBeamSearch::PushDupOrNoDawgIfBetter(int length, bool dup, int code, int unichar_id,
-                                               float cert, float worst_dict_cert, float dict_ratio,
-                                               bool use_dawgs, NodeContinuation cont,
-                                               const RecodeNode *prev, RecodeBeam *step) {
-  fprintf(stderr, "recodebeam PushDupOrNoDawgIfBetter %i \n", unichar_id);  // JDWDEBUG
-  int index = BeamIndex(use_dawgs, cont, length);
-  if (use_dawgs) {
-    if (cert > worst_dict_cert) {
-      PushHeapIfBetter(kBeamWidths[length], code, unichar_id, prev ? prev->permuter : NO_PERM,
-                       false, false, false, dup, cert, prev, nullptr, &step->beams_[index]);
-    }
-  } else {
-    cert *= dict_ratio;
-    if (cert >= kMinCertainty || code == null_char_) {
-      PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
-                       prev ? prev->permuter : TOP_CHOICE_PERM, false, false, false, dup, cert,
-                       prev, nullptr, &step->beams_[index]);
-    }
-  }
-}
-
-// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
-// dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
-// or if better than the current worst element if already full.
-void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
-                                        PermuterType permuter, bool dawg_start, bool word_start,
-                                        bool end, bool dup, float cert, const RecodeNode *prev,
-                                        DawgPositionVector *d, RecodeHeap *heap) {
-  fprintf(stderr, "recodebeam PushHeapIfBetter #1 %i \n", unichar_id);  // JDWDEBUG
-  float score = cert;
-  if (prev != nullptr) {
-    score += prev->score;
-  }
-  if (heap->size() < max_size || score > heap->PeekTop().data().score) {
-    uint64_t hash = ComputeCodeHash(code, dup, prev);
-    RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end, dup, cert, score, prev,
-                    d, hash);
-    if (UpdateHeapIfMatched(&node, heap)) {
-      return;
-    }
-    // JDWTODO START
-    if (!AddToHeapIsAllowed(&node)) {
-      return;
-    }
-    // JDWTODO END
-    fprintf(stderr, "recodebeam PushHeapIfBetter #1 adding node unicharid,code= %i %i \n", unichar_id, code);  // JDWDEBUG
-    RecodePair entry(score, node);
-    heap->Push(&entry);
-    ASSERT_HOST(entry.data().dawgs == nullptr);
-    if (heap->size() > max_size) {
-      heap->Pop(&entry);
-    }
-  } else {
-    delete d;
-  }
-}
-
-// Adds a RecodeNode to heap if there is room
-// or if better than the current worst element if already full.
-void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode *node, RecodeHeap *heap) {
-  fprintf(stderr, "recodebeam PushHeapIfBetter #1 %i \n", node->unichar_id);  // JDWDEBUG
-  if (heap->size() < max_size || node->score > heap->PeekTop().data().score) {
-    if (UpdateHeapIfMatched(node, heap)) {
-      return;
-    }
-    fprintf(stderr, "recodebeam PushHeapIfBetter #2 adding node unicharid,code= %i %i \n", node->unichar_id, node->code);  // JDWDEBUG
-    RecodePair entry(node->score, *node);
-    heap->Push(&entry);
-    ASSERT_HOST(entry.data().dawgs == nullptr);
-    if (heap->size() > max_size) {
-      heap->Pop(&entry);
-    }
-  }
-}
-
-// Searches the heap for a matching entry, and updates the score with
-// reshuffle if needed. Returns true if there was a match.
-bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap) {
-  // TODO(rays) consider hash map instead of linear search.
-  // It might not be faster because the hash map would have to be updated
-  // every time a heap reshuffle happens, and that would be a lot of overhead.
-  fprintf(stderr, "recodebeam UpdateHeapIfMatched %i \n", new_node->unichar_id);  // JDWDEBUG
-  std::vector<RecodePair> &nodes = heap->heap();
-  for (auto &i : nodes) {
-    RecodeNode &node = i.data();
-    if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
-        node.permuter == new_node->permuter && node.start_of_dawg == new_node->start_of_dawg) {
-      if (new_node->score > node.score) {
-        // The new one is better. Update the entire node in the heap and
-        // reshuffle.
-        fprintf(stderr, "recodebeam UpdateHeapIfMatched doing update unicharid,code= %i %i \n", new_node->unichar_id, new_node->code);  // JDWDEBUG
-        node = *new_node;
-        i.key() = node.score;
-        heap->Reshuffle(&i);
-      }
-      return true;
-    }
-  }
-  return false;
-}
-
-// JDWTODO START
-bool RecodeBeamSearch::AddToHeapIsAllowed(RecodeNode *new_node) {
-  if (!in_double_whammy_)
-    return true;
-  const RecodeNode *prev_node = new_node->prev;
-  if (prev_node != nullptr && prev_node->code == first_whammy_ && new_node->code == second_whammy_) {
-    fprintf(stderr, "recodebeam AddToHeapIsAllowed second whammy not allowed unicharid,code= + prevcode %i %i %i \n", new_node->unichar_id, new_node->code, prev_node->code);
-    return false;
-  }
-  if (prev_node != nullptr && prev_node->code == second_whammy_ && new_node->code == first_whammy_) {
-    fprintf(stderr, "recodebeam AddToHeapIsAllowed first whammy not allowed unicharid,code= + prevcode %i %i %i \n", new_node->unichar_id, new_node->code, prev_node->code);
-    return false;
-  }
-if (prev_node != nullptr){
-  fprintf(stderr, "recodebeam AddToHeapIsAllowed allowed unicharid,code= + prevcode %i %i %i \n", new_node->unichar_id, new_node->code, prev_node->code);
-}
-else {
-  fprintf(stderr, "recodebeam AddToHeapIsAllowed allowed unicharid,code= + prevcode %i %i null \n", new_node->unichar_id, new_node->code);
-}
-  return true;
-}
-// JDWTODO END
-
-// Computes and returns the code-hash for the given code and prev.
-uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const {
-  uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
-  if (!dup && code != null_char_) {
-    int num_classes = recoder_.code_range();
-    uint64_t carry = (((hash >> 32) * num_classes) >> 32);
-    hash *= num_classes;
-    hash += carry;
-    hash += code;
-  }
-  return hash;
-}
-
-// Backtracks to extract the best path through the lattice that was built
-// during Decode. On return the best_nodes vector essentially contains the set
-// of code, score pairs that make the optimal path with the constraint that
-// the recoder can decode the code sequence back to a sequence of unichar-ids.
-void RecodeBeamSearch::ExtractBestPaths(std::vector<const RecodeNode *> *best_nodes,
-                                        std::vector<const RecodeNode *> *second_nodes) const {
-  // Scan both beams to extract the best and second best paths.
-  fprintf(stderr, "recodebeam extractbestpaths \n");  // JDWDEBUG
-  const RecodeNode *best_node = nullptr;
-  const RecodeNode *second_best_node = nullptr;
-  const RecodeBeam *last_beam = beam_[beam_size_ - 1];
-  for (int c = 0; c < NC_COUNT; ++c) {
-    if (c == NC_ONLY_DUP) {
-      continue;
-    }
-    auto cont = static_cast<NodeContinuation>(c);
-    for (int is_dawg = 0; is_dawg < 2; ++is_dawg) {
-      int beam_index = BeamIndex(is_dawg, cont, 0);
-      int heap_size = last_beam->beams_[beam_index].size();
-      for (int h = 0; h < heap_size; ++h) {
-        const RecodeNode *node = &last_beam->beams_[beam_index].get(h).data();
-        if (is_dawg) {
-          // dawg_node may be a null_char, or duplicate, so scan back to the
-          // last valid unichar_id.
-          const RecodeNode *dawg_node = node;
-          while (dawg_node != nullptr &&
-                 (dawg_node->unichar_id == INVALID_UNICHAR_ID || dawg_node->duplicate)) {
-            dawg_node = dawg_node->prev;
-          }
-          if (dawg_node == nullptr ||
-              (!dawg_node->end_of_word && dawg_node->unichar_id != UNICHAR_SPACE)) {
-            // Dawg node is not valid.
-            continue;
-          }
-        }
-        if (best_node == nullptr || node->score > best_node->score) {
-          second_best_node = best_node;
-          best_node = node;
-          fprintf(stderr, "recodebeam extractbestpaths bestnodebeam= %i \n", beam_index);  // JDWDEBUG
-        } else if (second_best_node == nullptr || node->score > second_best_node->score) {
-          second_best_node = node;
-          fprintf(stderr, "recodebeam extractbestpaths secondbestnodebeam= %i \n", beam_index);  // JDWDEBUG
-        }
-      }
-    }
-  }
-  if (second_nodes != nullptr) {
-    fprintf(stderr, "recodebeam extractbestpaths extract second best \n");  // JDWDEBUG
-    ExtractPath(second_best_node, second_nodes);
-  }
-  fprintf(stderr, "recodebeam extractbestpaths extract best \n");  // JDWDEBUG
-  ExtractPath(best_node, best_nodes);
-}
-
-// Helper backtracks through the lattice from the given node, storing the
-// path and reversing it.
-void RecodeBeamSearch::ExtractPath(const RecodeNode *node,
-                                   std::vector<const RecodeNode *> *path) const {
-  path->clear();
-  while (node != nullptr) {
-    fprintf(stderr, "recodebeam extractpath unicharid,code,cert,score= %i %i %f %f %i \n", node->unichar_id, node->code, node->certainty, node->score, node->duplicate);  // JDWDEBUG
-    path->push_back(node);
-    node = node->prev;
-  }
-  std::reverse(path->begin(), path->end());
-}
-
-void RecodeBeamSearch::ExtractPath(const RecodeNode *node, std::vector<const RecodeNode *> *path,
-                                   int limiter) const {
-  int pathcounter = 0;
-  path->clear();
-  while (node != nullptr && pathcounter < limiter) {
-    path->push_back(node);
-    node = node->prev;
-    ++pathcounter;
-  }
-  std::reverse(path->begin(), path->end());
-}
-
-// Helper prints debug information on the given lattice path.
-void RecodeBeamSearch::DebugPath(const UNICHARSET *unicharset,
-                                 const std::vector<const RecodeNode *> &path) const {
-  for (int c = 0; c < path.size(); ++c) {
-    const RecodeNode &node = *path[c];
-    tprintf("%d ", c);
-    node.Print(null_char_, *unicharset, 1);
-  }
-}
-
-// Helper prints debug information on the given unichar path.
-void RecodeBeamSearch::DebugUnicharPath(const UNICHARSET *unicharset,
-                                        const std::vector<const RecodeNode *> &path,
-                                        const std::vector<int> &unichar_ids,
-                                        const std::vector<float> &certs,
-                                        const std::vector<float> &ratings,
-                                        const std::vector<int> &xcoords) const {
-  int num_ids = unichar_ids.size();
-  double total_rating = 0.0;
-  for (int c = 0; c < num_ids; ++c) {
-    int coord = xcoords[c];
-    tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
-            unicharset->debug_str(unichar_ids[c]).c_str(), ratings[c], certs[c],
-            path[coord]->start_of_word, path[coord]->end_of_word, path[coord]->permuter);
-    total_rating += ratings[c];
-  }
-  tprintf("Path total rating = %g\n", total_rating);
-}
-
-} // namespace tesseract.
diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 96e5fbb276..40976bafc2 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -662,20 +662,46 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
       }
     }
   }
+  float top_key = 0.0F;
+  float second_key = 0.0F;
+  bool found_first_code = false;
+  bool found_second_code = false;
   while (!top_heap_.empty()) {
     TopPair entry;
     top_heap_.Pop(&entry);
+    if (in_possible_diplopia_ && entry.data() == first_diplopia_code_)
+      found_first_code = true;
+    if (in_possible_diplopia_ && entry.data() == second_diplopia_code_)
+      found_second_code = true;
     if (top_heap_.size() > 1) {
       top_n_flags_[entry.data()] = TN_TOPN;
     } else {
       top_n_flags_[entry.data()] = TN_TOP2;
       if (top_heap_.empty()) {
         top_code_ = entry.data();
+        top_key = entry.key();
       } else {
         second_code_ = entry.data();
+        second_key = entry.key();
       }
     }
   }
+  // need to identify if we are in a potential diplopia situation
+  // or if we already are, then determine if it is ended
+  if (in_possible_diplopia_) {
+    if (!found_first_code && !found_second_code){
+      in_possible_diplopia_ = false;
+      first_diplopia_code_ = -1;
+      second_diplopia_code_ = -1;
+    }
+  }
+  if (!in_possible_diplopia_) {
+    if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > 0.25F && second_key > 0.25F){
+      in_possible_diplopia_ = true;
+      first_diplopia_code_ = top_code_;
+      second_diplopia_code_ = second_code_;
+    }
+  }
   top_n_flags_[null_char_] = TN_TOP2;
 }
 
@@ -1143,6 +1169,10 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
     if (UpdateHeapIfMatched(&node, heap)) {
       return;
     }
+    // check to see if node is possible diplopia
+    if (!AddToHeapIsAllowed(&node)) {
+      return;
+    }
     RecodePair entry(score, node);
     heap->Push(&entry);
     ASSERT_HOST(entry.data().dawgs == nullptr);
@@ -1194,6 +1224,20 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea
   return false;
 }
 
+// Determines if node can be added to heap based on possible diplopia status
+bool RecodeBeamSearch::AddToHeapIsAllowed(RecodeNode *new_node) {
+  if (!in_possible_diplopia_)
+    return true;
+  const RecodeNode *prev_node = new_node->prev;
+  if (prev_node != nullptr && prev_node->code == first_diplopia_code_ && new_node->code == second_diplopia_code_) {
+    return false;
+  }
+  if (prev_node != nullptr && prev_node->code == second_diplopia_code_ && new_node->code == first_diplopia_code_) {
+    return false;
+  }
+  return true;
+}
+
 // Computes and returns the code-hash for the given code and prev.
 uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const {
   uint64_t hash = prev == nullptr ? 0 : prev->code_hash;

From ae85bc4aab2b9df86da332014af6641994ffef1c Mon Sep 17 00:00:00 2001
From: woodjohndavid <57116722+woodjohndavid@users.noreply.github.com>
Date: Sun, 27 Jun 2021 10:19:27 -0700
Subject: [PATCH 4/8] diplopia interim solution final

---
 src/lstm/recodebeam.cpp | 7 ++++++-
 src/lstm/recodebeam.h   | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 40976bafc2..45e5efd5cd 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -40,6 +40,11 @@ const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
 
 static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
 
+// the minimum diplopia key is the minimum score (key) from
+// the network output to qualify as a likely 'real' character
+// for the purposes of identifying possible diplopia
+static const float kMinDiplopiaKey = 0.25;
+
 // Prints debug details of the node.
 void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) const {
   if (code == null_char) {
@@ -696,7 +701,7 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
     }
   }
   if (!in_possible_diplopia_) {
-    if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > 0.25F && second_key > 0.25F){
+    if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > kMinDiplopiaKey && second_key > kMinDiplopiaKey){
       in_possible_diplopia_ = true;
       first_diplopia_code_ = top_code_;
       second_diplopia_code_ = second_code_;
diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h
index 78ff911bd4..14076ea772 100644
--- a/src/lstm/recodebeam.h
+++ b/src/lstm/recodebeam.h
@@ -429,7 +429,7 @@ class TESS_API RecodeBeamSearch {
   // The encoded (class label) of the null/reject character.
   int null_char_;
   // Variables used in tracking possible diplopia case
-  // Refer to ComputeTopN routine for more information
+  // Refer to ComputeTopN routine for use of these variables
   bool in_possible_diplopia_;
   int first_diplopia_code_;
   int second_diplopia_code_;

From a8af23f0f0a350ce3eff31eeab40aed5f7320ac9 Mon Sep 17 00:00:00 2001
From: woodjohndavid <57116722+woodjohndavid@users.noreply.github.com>
Date: Tue, 29 Jun 2021 13:51:04 -0700
Subject: [PATCH 5/8] Style Changes Per stweil comments

---
 src/lstm/recodebeam.cpp | 9 ++++++---
 src/lstm/recodebeam.h   | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 45e5efd5cd..8667841047 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -674,10 +674,12 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
   while (!top_heap_.empty()) {
     TopPair entry;
     top_heap_.Pop(&entry);
-    if (in_possible_diplopia_ && entry.data() == first_diplopia_code_)
+    if (in_possible_diplopia_ && entry.data() == first_diplopia_code_) {
       found_first_code = true;
-    if (in_possible_diplopia_ && entry.data() == second_diplopia_code_)
+    }
+    if (in_possible_diplopia_ && entry.data() == second_diplopia_code_) {
       found_second_code = true;
+    }
     if (top_heap_.size() > 1) {
       top_n_flags_[entry.data()] = TN_TOPN;
     } else {
@@ -1231,8 +1233,9 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea
 
 // Determines if node can be added to heap based on possible diplopia status
 bool RecodeBeamSearch::AddToHeapIsAllowed(RecodeNode *new_node) {
-  if (!in_possible_diplopia_)
+  if (!in_possible_diplopia_) {
     return true;
+  }
   const RecodeNode *prev_node = new_node->prev;
   if (prev_node != nullptr && prev_node->code == first_diplopia_code_ && new_node->code == second_diplopia_code_) {
     return false;
diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h
index 14076ea772..ed1daf21ca 100644
--- a/src/lstm/recodebeam.h
+++ b/src/lstm/recodebeam.h
@@ -426,13 +426,13 @@ class TESS_API RecodeBeamSearch {
   // True if the input is simple text, ie adjacent equal chars are not to be
   // eliminated.
   bool is_simple_text_;
-  // The encoded (class label) of the null/reject character.
-  int null_char_;
   // Variables used in tracking possible diplopia case
   // Refer to ComputeTopN routine for use of these variables
   bool in_possible_diplopia_;
   int first_diplopia_code_;
   int second_diplopia_code_;
+  // The encoded (class label) of the null/reject character.
+  int null_char_;
 };
 
 } // namespace tesseract.

From bf6d32ef847b6ba96d7f345173f4853775fd5ab7 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Sat, 22 Jan 2022 13:06:06 +0100
Subject: [PATCH 6/8] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Suggested-by: Robert Pösel
Signed-off-by: Stefan Weil <sw@weilnetz.de>

Co-authored-by: Robert Pösel <robyer@seznam.cz>
---
 src/lstm/recodebeam.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 8667841047..bc0e5e7a69 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -43,7 +43,7 @@ static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
 // the minimum diplopia key is the minimum score (key) from
 // the network output to qualify as a likely 'real' character
 // for the purposes of identifying possible diplopia
-static const float kMinDiplopiaKey = 0.25;
+static const float kMinDiplopiaKey = 0.25f;
 
 // Prints debug details of the node.
 void RecodeNode::Print(int null_char, const UNICHARSET &unicharset, int depth) const {
@@ -190,7 +190,7 @@ RecodeBeamSearch::combineSegmentedTimesteps(
 
 void RecodeBeamSearch::calculateCharBoundaries(std::vector<int> *starts, std::vector<int> *ends,
                                                std::vector<int> *char_bounds_, int maxWidth) {
-  char_bounds_->push_back((*starts)[0]); 
+  char_bounds_->push_back((*starts)[0]);
   for (int i = 0; i < ends->size(); ++i) {
     int middle = ((*starts)[i + 1] - (*ends)[i]) / 2;
     char_bounds_->push_back((*ends)[i] + middle);
@@ -667,8 +667,8 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
       }
     }
   }
-  float top_key = 0.0F;
-  float second_key = 0.0F;
+  float top_key = 0.0f;
+  float second_key = 0.0f;
   bool found_first_code = false;
   bool found_second_code = false;
   while (!top_heap_.empty()) {
@@ -696,14 +696,14 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
   // need to identify if we are in a potential diplopia situation
   // or if we already are, then determine if it is ended
   if (in_possible_diplopia_) {
-    if (!found_first_code && !found_second_code){
+    if (!found_first_code && !found_second_code) {
       in_possible_diplopia_ = false;
       first_diplopia_code_ = -1;
       second_diplopia_code_ = -1;
     }
   }
   if (!in_possible_diplopia_) {
-    if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > kMinDiplopiaKey && second_key > kMinDiplopiaKey){
+    if (top_code_ != null_char_ && second_code_ != null_char_ && top_key > kMinDiplopiaKey && second_key > kMinDiplopiaKey) {
       in_possible_diplopia_ = true;
       first_diplopia_code_ = top_code_;
       second_diplopia_code_ = second_code_;

From 5b445e103c0d2fa0718a7487fc486846deb59d9e Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Sat, 22 Jan 2022 13:42:57 +0100
Subject: [PATCH 7/8] Apply suggestions from code review

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 src/lstm/recodebeam.cpp | 12 ++++++------
 src/lstm/recodebeam.h   |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index bc0e5e7a69..13c2962ff1 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -40,9 +40,9 @@ const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
 
 static const char *kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
 
-// the minimum diplopia key is the minimum score (key) from
+// The minimum diplopia key is the minimum score (key) from
 // the network output to qualify as a likely 'real' character
-// for the purposes of identifying possible diplopia
+// for the purposes of identifying possible diplopia.
 static const float kMinDiplopiaKey = 0.25f;
 
 // Prints debug details of the node.
@@ -693,8 +693,8 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
       }
     }
   }
-  // need to identify if we are in a potential diplopia situation
-  // or if we already are, then determine if it is ended
+  // Need to identify if we are in a potential diplopia situation
+  // or if we already are, then determine if it is ended.
   if (in_possible_diplopia_) {
     if (!found_first_code && !found_second_code) {
       in_possible_diplopia_ = false;
@@ -1176,7 +1176,7 @@ void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
     if (UpdateHeapIfMatched(&node, heap)) {
       return;
     }
-    // check to see if node is possible diplopia
+    // Check to see if node is possible diplopia.
     if (!AddToHeapIsAllowed(&node)) {
       return;
     }
@@ -1231,7 +1231,7 @@ bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *hea
   return false;
 }
 
-// Determines if node can be added to heap based on possible diplopia status
+// Determines if node can be added to heap based on possible diplopia status.
 bool RecodeBeamSearch::AddToHeapIsAllowed(RecodeNode *new_node) {
   if (!in_possible_diplopia_) {
     return true;
diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h
index ed1daf21ca..2dfb18a74b 100644
--- a/src/lstm/recodebeam.h
+++ b/src/lstm/recodebeam.h
@@ -378,7 +378,7 @@ class TESS_API RecodeBeamSearch {
   // with reshuffle if needed. Returns true if there was a match.
   bool UpdateHeapIfMatched(RecodeNode *new_node, RecodeHeap *heap);
   // Determines if new node can be added to the heap for the current beam.
-  // Returns false if we are in possible diplopia situation
+  // Returns false if we are in possible diplopia situation.
   bool AddToHeapIsAllowed(RecodeNode *new_node);
   // Computes and returns the code-hash for the given code and prev.
   uint64_t ComputeCodeHash(int code, bool dup, const RecodeNode *prev) const;
@@ -426,8 +426,8 @@ class TESS_API RecodeBeamSearch {
   // True if the input is simple text, ie adjacent equal chars are not to be
   // eliminated.
   bool is_simple_text_;
-  // Variables used in tracking possible diplopia case
-  // Refer to ComputeTopN routine for use of these variables
+  // Variables used in tracking possible diplopia case.
+  // Refer to ComputeTopN routine for use of these variables.
   bool in_possible_diplopia_;
   int first_diplopia_code_;
   int second_diplopia_code_;

From f8f7a3f6862c95de7a56deeac0e533f0023f43cc Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Sat, 22 Jan 2022 13:45:04 +0100
Subject: [PATCH 8/8] Apply suggestions from code review

Signed-off-by: Stefan Weil <sw@weilnetz.de>
---
 src/lstm/recodebeam.cpp | 3 ---
 src/lstm/recodebeam.h   | 6 +++---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 13c2962ff1..4413181a8d 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -70,9 +70,6 @@ RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress &recoder, int null_char
     , beam_size_(0)
     , top_code_(-1)
     , second_code_(-1)
-    , in_possible_diplopia_(false)
-    , first_diplopia_code_(-1)
-    , second_diplopia_code_(-1)
     , dict_(dict)
     , space_delimited_(true)
     , is_simple_text_(simple_text)
diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h
index 2dfb18a74b..8ae8e4f52e 100644
--- a/src/lstm/recodebeam.h
+++ b/src/lstm/recodebeam.h
@@ -428,9 +428,9 @@ class TESS_API RecodeBeamSearch {
   bool is_simple_text_;
   // Variables used in tracking possible diplopia case.
   // Refer to ComputeTopN routine for use of these variables.
-  bool in_possible_diplopia_;
-  int first_diplopia_code_;
-  int second_diplopia_code_;
+  bool in_possible_diplopia_ = false;
+  int first_diplopia_code_ = -1;
+  int second_diplopia_code_ = -1;
   // The encoded (class label) of the null/reject character.
   int null_char_;
 };