diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index 103ca7b1c9..927191caaa 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -326,6 +326,17 @@ class TESS_API TessBaseAPI { */ void SetImage(Pix *pix); + /** + * Preprocessing the InputImage + * Grayscale normalizatin based on nlbin (Thomas Breuel) + * Current modes: + * - 0 = No normalization + * - 1 = Thresholding+Recognition + * - 2 = Thresholding + * - 3 = Recognition + */ + bool NormalizeImage(int mode); + /** * Set the resolution of the source image in pixels per inch so font size * information can be calculated in results. Call this after SetImage(). diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index a21798429a..f573f5f492 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -926,6 +926,25 @@ Pix *TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } +// Grayscale normalization (preprocessing) +bool TessBaseAPI::NormalizeImage(int mode){ + if (!GetInputImage()){ + tprintf("Please use SetImage before applying the image pre-processing steps."); + return false; + } + if (mode == 1) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + thresholder_->SetImage(GetInputImage()); + } else if (mode == 2) { + thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); + } else if (mode == 3) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + } else { + return false; + } + return true; +} + const char *TessBaseAPI::GetInputName() { if (!input_file_.empty()) { return input_file_.c_str(); @@ -1265,8 +1284,31 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer) { + SetInputName(filename); + SetImage(pix); + + // Image preprocessing on image + // Grayscale normalization + int graynorm_mode; + GetIntVariable("preprocess_graynorm_mode", &graynorm_mode); + if (graynorm_mode > 0 && NormalizeImage(graynorm_mode) && tesseract_->tessedit_write_images) { + // Write normalized image + std::string output_filename = output_file_ + ".preprocessed"; + if (page_index > 0) { + output_filename += std::to_string(page_index); + } + output_filename += ".tif"; + if (graynorm_mode == 2) { + pixWrite(output_filename.c_str(), thresholder_->GetPixRect(), IFF_TIFF_G4); + } else { + pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4); + } + } + + // Recognition + bool failed = false; if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { @@ -1313,6 +1355,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, // Switch to alternate mode for retry. ReadConfigFile(retry_config); SetImage(pix); + + // Apply image preprocessing + NormalizeImage(graynorm_mode); + + //if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); Recognize(nullptr); // Restore saved config variables. ReadConfigFile(kOldVarsFile); @@ -1321,7 +1368,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, if (renderer && !failed) { failed = !renderer->AddImage(this); } - + //pixDestroy(&pixs); return !failed; } diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index fd58ac8746..89056f179c 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -81,6 +81,11 @@ Tesseract::Tesseract() "11=sparse_text, 12=sparse_text+osd, 13=raw_line" " (Values from PageSegMode enum in tesseract/publictypes.h)", this->params()) + , INT_MEMBER(preprocess_graynorm_mode, 0, + "Grayscale normalization mode: 0=no normalization, 1=tresholding+recognition, " + "2=tresholding_only, 3=recognition_only " + "The modes 1–3 are applied on the fullimage", + this->params()) , INT_MEMBER(thresholding_method, static_cast(ThresholdMethod::Otsu), "Thresholding method: 0 = Otsu, 1 = LeptonicaOtsu, 2 = " diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 732bb9e62e..12d7bf188b 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -759,6 +759,7 @@ class TESS_API Tesseract : public Wordrec { BOOL_VAR_H(tessedit_do_invert); double_VAR_H(invert_threshold); INT_VAR_H(tessedit_pageseg_mode); + INT_VAR_H(preprocess_graynorm_mode); INT_VAR_H(thresholding_method); BOOL_VAR_H(thresholding_debug); double_VAR_H(thresholding_window_size); diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index fa8e568849..2a2b8f4d02 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -183,6 +183,103 @@ void ImageThresholder::SetImage(const Image pix) { Init(); } +/*----------------------------------------------------------------------* + * Non-linear contrast normalization * + *----------------------------------------------------------------------*/ +/*! + * \brief pixNLNorm() + * + * \param[in] pixs 8 or 32 bpp + * \param[out] ptresh l_int32 global threshold value + * \return pixd 8 bpp grayscale, or NULL on error + * + *
+ * Notes:
+ *      (1) This composite operation is good for adaptively removing
+ *          dark background. Adaption of Thomas Breuel's nlbin version
+ *          from ocropus.
+ *      (2) A good thresholder together NLNorm is WAN
+ * 
+ */ +Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) { + l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval; + l_uint32 black_val, white_val; + l_float32 factor, threshpos, avefg, avebg; + PIX *pixg, *pixd, *pixd2; + BOX *pixbox; + NUMA *na; + + PROCNAME("pixNLNorm"); + + if (!pixs || (d = pixGetDepth(pixs)) < 8) { + return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL); + } + if (d == 32) { + // ITU-R 601-2 luma + pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114); + // Legacy converting + // pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3); + } else { + pixg = pixConvertTo8(pixs, 0); + } + + /// Normalize contrast + // pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val); + // if (black_val>0) pixAddConstantGray(pixg, -1 * black_val); + // pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val); + // if (white_val<255) pixMultConstantGray(pixg, (255. / white_val)); + pixd = pixMaxDynamicRange(pixg, L_LINEAR_SCALE); + pixDestroy(&pixg); + pixg = pixCopy(nullptr, pixd); + pixDestroy(&pixd); + + /// Calculate flat version + pixGetDimensions(pixg, &w1, &h1, NULL); + pixd = pixScaleGeneral(pixg, 0.5, 0.5, 0.0, 0); + pixd2 = pixRankFilter(pixd, 20, 2, 0.8); + pixDestroy(&pixd); + pixd = pixRankFilter(pixd2, 2, 20, 0.8); + pixDestroy(&pixd2); + pixGetDimensions(pixd, &w2, &h2, NULL); + pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2, + (l_float32)h1 / (l_float32)h2); + pixDestroy(&pixd); + pixInvert(pixd2, pixd2); + pixAddGray(pixg, pixg, pixd2); + pixDestroy(&pixd2); + + /// Local contrast enhancement + // Ignore a border of 10 % and get a mean threshold, + // background and foreground value + pixbox = boxCreate(w1 * 0.1, h1 * 0.1, w1 * 0.9, h1 * 0.9); + na = pixGetGrayHistogramInRect(pixg, pixbox, 1); + numaSplitDistribution(na, 0.1, &thresh, &avefg, &avebg, NULL, NULL, NULL); + boxDestroy(&pixbox); + numaDestroy(&na); + + /// Subtract by a foreground value and multiply by factor to + // set a background value to 255 + fgval = (l_int32)(avefg + 0.5); + bgval = (l_int32)(avebg + 0.5); + threshpos = (l_float32)(thresh - fgval) / (bgval - fgval); + // Todo: fgval or fgval + slightly offset + fgval = fgval; // + (l_int32) ((thresh - fgval)*.25); + bgval = bgval + + (l_int32)std::min((l_int32)((bgval - thresh) * .5), (255 - bgval)); + factor = 255. / (bgval - fgval); + if (pthresh) { + *pthresh = (l_int32)threshpos * factor - threshpos * .1; + } + pixAddConstantGray(pixg, -1 * fgval); + pixMultConstantGray(pixg, factor); + + return pixg; +} + +/*----------------------------------------------------------------------* + * Thresholding * + *----------------------------------------------------------------------*/ + std::tuple ImageThresholder::Threshold( TessBaseAPI *api, ThresholdMethod method) { @@ -203,7 +300,7 @@ std::tuple ImageThresholder::Threshold( int r; l_int32 pix_w, pix_h; - pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr); + pixGetDimensions(pix_, &pix_w, &pix_h, nullptr); bool thresholding_debug; api->GetBoolVariable("thresholding_debug", &thresholding_debug); @@ -381,6 +478,17 @@ Image ImageThresholder::GetPixRectGrey() { return pix; } +// Get a clone/copy of the source image rectangle, reduced to normalized greyscale, +// and at the same resolution as the output binary. +// The returned Pix must be pixDestroyed. +// Provided to the classifier to extract features from the greyscale image. +Image ImageThresholder::GetPixNormRectGrey() { + auto pix = GetPixRect(); + auto result = ImageThresholder::pixNLNorm(pix, nullptr); + pix.destroy(); + return result; +} + // Otsu thresholds the rectangle, taking the rectangle from *this. void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const { std::vector thresholds; diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h index e20c065bc8..279adcdad2 100644 --- a/src/ccmain/thresholder.h +++ b/src/ccmain/thresholder.h @@ -154,6 +154,12 @@ class TESS_API ImageThresholder { // Provided to the classifier to extract features from the greyscale image. virtual Image GetPixRectGrey(); + // Get a clone/copy of the source image rectangle, reduced to normalized greyscale, + // and at the same resolution as the output binary. + // The returned Pix must be pixDestroyed. + // Provided to the classifier to extract features from the greyscale image. + virtual Image GetPixNormRectGrey(); + protected: // ---------------------------------------------------------------------- // Utility functions that may be useful components for other thresholders. @@ -170,6 +176,9 @@ class TESS_API ImageThresholder { // Otsu thresholds the rectangle, taking the rectangle from *this. void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const; + // Return non-linear normalized grayscale + Pix *pixNLNorm(Pix *pixs, int *pthresh); + /// Threshold the rectangle, taking everything except the src_pix /// from the class, using thresholds/hi_values to the output pix. /// NOTE that num_channels is the size of the thresholds and hi_values