From ea5c9b77ef14239f4e66006b9d9c1c05c92afb7b Mon Sep 17 00:00:00 2001 From: JKamlah Date: Mon, 4 Apr 2022 17:41:40 +0200 Subject: [PATCH 1/6] Add: Non-linear grayscale normalization as preprocessing based on nlbin (Thomas Breuel). --- src/api/baseapi.cpp | 35 ++++++++- src/ccmain/tesseractclass.cpp | 3 + src/ccmain/tesseractclass.h | 1 + src/ccmain/thresholder.cpp | 102 ++++++++++++++++++++++++++- src/ccmain/thresholder.h | 9 +++ tessdata/configs/Makefile.am | 2 +- tessdata/configs/normalize_grayscale | 1 + 7 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 tessdata/configs/normalize_grayscale diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index f78894ce74..11246cc486 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1253,8 +1253,37 @@ bool TessBaseAPI::ProcessPagesInternal(const char *filename, const char *retry_c bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer) { + + auto pixs = pixCopy(nullptr, pix); + SetInputName(filename); - SetImage(pix); + SetImage(pixs); + + // Image preprocessing + + // Process input image to a normalized grayscale + // atm it uses a non-linear algorithm + bool normalize_grayscale=false; + GetBoolVariable("normalize_grayscale", &normalize_grayscale); + if (normalize_grayscale) { + Pix *pixg = thresholder_->GetPixNormRectGrey(); + pixDestroy(&pixs); + pixs = pixCopy(nullptr, pixg); + pixDestroy(&pixg); + thresholder_->SetImage(pixs); + SetInputImage(pixs); + if (tesseract_->tessedit_write_images) { + std::string output_filename = output_file_ + ".norm_gray"; + if (page_index > 0) { + output_filename += std::to_string(page_index); + } + output_filename += ".tif"; + pixWrite(output_filename.c_str(), pixs, IFF_TIFF_G4); + } + } + + // Recognition + bool failed = false; if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { @@ -1300,7 +1329,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, } // Switch to alternate mode for retry. ReadConfigFile(retry_config); - SetImage(pix); + SetImage(pixs); Recognize(nullptr); // Restore saved config variables. ReadConfigFile(kOldVarsFile); @@ -1309,7 +1338,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, if (renderer && !failed) { failed = !renderer->AddImage(this); } - + pixDestroy(&pixs); return !failed; } diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index 1eabcc512d..b675e166c2 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -75,6 +75,9 @@ Tesseract::Tesseract() "11=sparse_text, 12=sparse_text+osd, 13=raw_line" " (Values from PageSegMode enum in tesseract/publictypes.h)", this->params()) + , BOOL_MEMBER(normalize_grayscale, false, + "Applys non-linear normalization (nlnorm) on a grayscale version of the input image", + this->params()) , INT_MEMBER(thresholding_method, static_cast(ThresholdMethod::Otsu), "Thresholding method: 0 = Otsu, 1 = LeptonicaOtsu, 2 = " diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 94681ab61d..c023e620b0 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -756,6 +756,7 @@ class TESS_API Tesseract : public Wordrec { BOOL_VAR_H(tessedit_train_line_recognizer); BOOL_VAR_H(tessedit_dump_pageseg_images); BOOL_VAR_H(tessedit_do_invert); + BOOL_VAR_H(normalize_grayscale); INT_VAR_H(tessedit_pageseg_mode); INT_VAR_H(thresholding_method); BOOL_VAR_H(thresholding_debug); diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index f8da90ccf4..465eea14c0 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -187,6 +187,94 @@ void ImageThresholder::SetImage(const Image pix) { Init(); } +/*----------------------------------------------------------------------* + * Non-linear contrast normalization * + *----------------------------------------------------------------------*/ +/*! + * \brief pixNLNorm() + * + * \param[in] pixs 8 or 32 bpp + * \param[out] ptresh l_int32 global threshold value + * \return pixd 8 bpp grayscale, or NULL on error + * + *
+ * Notes:
+ *      (1) This composite operation is good for adaptively removing
+ *          dark background. Adaption of Thomas Breuel's nlbin version from ocropus.
+ *      (2) A good thresholder together NLNorm is WAN
+ * 
+ */ +Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) +{ +l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval; +l_uint32 black_val, white_val; +l_float32 factor, threshpos, avefg, avebg; +PIX *pixg, *pixd, *pixd2; +BOX *pixbox; +NUMA *na; + + PROCNAME("pixNLNorm"); + + if (!pixs || (d = pixGetDepth(pixs)) < 8) + return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL); + if (d == 32) + // ITU-R 601-2 luma + pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114); + //pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3); + else + pixg = pixConvertTo8(pixs, 0); + + /* Normalize contrast */ + /*pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val); + if (black_val>0) pixAddConstantGray(pixg, -1 * black_val); + pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val); + if (white_val<255) pixMultConstantGray(pixg, (255. / white_val));*/ + pixd = pixMaxDynamicRange(pixg, L_LINEAR_SCALE); + pixDestroy(&pixg); + pixg = pixCopy(nullptr, pixd); + pixDestroy(&pixd); + + /* Calculate flat version */ + pixGetDimensions(pixg, &w1, &h1, NULL); + pixd = pixScaleGeneral(pixg, 0.5, 0.5, 0.0, 0); + pixd2 = pixRankFilter(pixd, 20, 2, 0.8); + pixDestroy(&pixd); + pixd = pixRankFilter(pixd2, 2, 20, 0.8); + pixDestroy(&pixd2); + pixGetDimensions(pixd, &w2, &h2, NULL); + pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2, (l_float32)h1 / (l_float32)h2); + pixInvert(pixd2, pixd2); + pixAddGray(pixg, pixg, pixd2); + pixDestroy(&pixd); + pixDestroy(&pixd2); + + /* Local contrast enhancement */ + /* Ignore a border of 10 % and get a mean threshold, background and foreground value */ + pixbox = boxCreate(w1*0.1, h1*0.1, w1*0.9, h1*0.9); + na = pixGetGrayHistogramInRect(pixg, pixbox, 1); + numaSplitDistribution(na, 0.1, &thresh, &avefg, &avebg, NULL, NULL, NULL); + boxDestroy(&pixbox); + numaDestroy(&na); + + /* Subtract by a foreground value and multiply by factor to set a background value to 255 */ + fgval = (l_int32)(avefg + 0.5); + bgval = (l_int32)(avebg + 0.5); + threshpos = (l_float32) (thresh-fgval)/(bgval-fgval); + // Todo: fgval or fgval + slightly offset + fgval = fgval;// + (l_int32) ((thresh - fgval)*.25); + bgval = bgval + (l_int32) std::min((l_int32) ((bgval - thresh)*.5),(255 - bgval)); + factor = 255. / (bgval-fgval) ; + if (pthresh) *pthresh = (l_int32) threshpos*factor - threshpos*.1; + pixAddConstantGray(pixg, -1 * fgval); + pixMultConstantGray(pixg, factor); + return pixg; +} + + +/*----------------------------------------------------------------------* + * Thresholding * + *----------------------------------------------------------------------*/ + std::tuple ImageThresholder::Threshold( TessBaseAPI *api, ThresholdMethod method) { @@ -207,7 +295,7 @@ std::tuple ImageThresholder::Threshold( int r; l_int32 pix_w, pix_h; - pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr); + pixGetDimensions(pix_, &pix_w, &pix_h, nullptr); bool thresholding_debug; api->GetBoolVariable("thresholding_debug", &thresholding_debug); @@ -370,6 +458,18 @@ Image ImageThresholder::GetPixRectGrey() { return pix; } +// Get a clone/copy of the source image rectangle, reduced to normalized greyscale, +// and at the same resolution as the output binary. +// The returned Pix must be pixDestroyed. +// Provided to the classifier to extract features from the greyscale image. +Image ImageThresholder::GetPixNormRectGrey() { + auto pix = GetPixRect(); + auto result = ImageThresholder::pixNLNorm(pix, nullptr); + pix.destroy(); + return result; +} + + // Otsu thresholds the rectangle, taking the rectangle from *this. void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const { std::vector thresholds; diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h index e20c065bc8..279adcdad2 100644 --- a/src/ccmain/thresholder.h +++ b/src/ccmain/thresholder.h @@ -154,6 +154,12 @@ class TESS_API ImageThresholder { // Provided to the classifier to extract features from the greyscale image. virtual Image GetPixRectGrey(); + // Get a clone/copy of the source image rectangle, reduced to normalized greyscale, + // and at the same resolution as the output binary. + // The returned Pix must be pixDestroyed. + // Provided to the classifier to extract features from the greyscale image. + virtual Image GetPixNormRectGrey(); + protected: // ---------------------------------------------------------------------- // Utility functions that may be useful components for other thresholders. @@ -170,6 +176,9 @@ class TESS_API ImageThresholder { // Otsu thresholds the rectangle, taking the rectangle from *this. void OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const; + // Return non-linear normalized grayscale + Pix *pixNLNorm(Pix *pixs, int *pthresh); + /// Threshold the rectangle, taking everything except the src_pix /// from the class, using thresholds/hi_values to the output pix. /// NOTE that num_channels is the size of the thresholds and hi_values diff --git a/tessdata/configs/Makefile.am b/tessdata/configs/Makefile.am index 90619378f8..6f0cf16f84 100644 --- a/tessdata/configs/Makefile.am +++ b/tessdata/configs/Makefile.am @@ -4,5 +4,5 @@ data_DATA += api_config kannada box.train.stderr quiet logfile digits get.images data_DATA += lstmbox wordstrbox # Configurations for OCR output. data_DATA += alto hocr pdf tsv txt -data_DATA += linebox rebox strokewidth bigram +data_DATA += linebox rebox strokewidth bigram normalize_grayscale EXTRA_DIST = $(data_DATA) diff --git a/tessdata/configs/normalize_grayscale b/tessdata/configs/normalize_grayscale new file mode 100644 index 0000000000..80fe569289 --- /dev/null +++ b/tessdata/configs/normalize_grayscale @@ -0,0 +1 @@ +normalize_grayscale 1 From 37462ac5f4aca3a7083742465317a86dd99af487 Mon Sep 17 00:00:00 2001 From: JKamlah Date: Wed, 20 Apr 2022 17:44:52 +0200 Subject: [PATCH 2/6] Add three normalization modi: Only for thresholding, only for recognition and for both tasks. --- src/api/baseapi.cpp | 41 ++++++++++++++++++++++------------- src/ccmain/tesseractclass.cpp | 11 +++++++++- src/ccmain/tesseractclass.h | 2 ++ src/ccmain/thresholder.cpp | 5 +++-- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 11246cc486..8d0a2ee967 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1254,31 +1254,32 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer) { - auto pixs = pixCopy(nullptr, pix); - SetInputName(filename); - SetImage(pixs); + + SetImage(pix); // Image preprocessing // Process input image to a normalized grayscale // atm it uses a non-linear algorithm - bool normalize_grayscale=false; - GetBoolVariable("normalize_grayscale", &normalize_grayscale); - if (normalize_grayscale) { - Pix *pixg = thresholder_->GetPixNormRectGrey(); - pixDestroy(&pixs); - pixs = pixCopy(nullptr, pixg); - pixDestroy(&pixg); - thresholder_->SetImage(pixs); - SetInputImage(pixs); + bool nlnorm, nlth, nlrec; + GetBoolVariable("normalize_grayscale", &nlnorm); + GetBoolVariable("normalize_thresholding", &nlth); + GetBoolVariable("normalize_recognition", &nlrec); + if (nlnorm || nlth || nlrec) { + if (nlnorm || (nlth && nlrec)) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + thresholder_->SetImage(GetInputImage()); + } else if (nlth) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); + else if (nlrec) SetInputImage(thresholder_->GetPixNormRectGrey()); + if (tesseract_->tessedit_write_images) { std::string output_filename = output_file_ + ".norm_gray"; if (page_index > 0) { output_filename += std::to_string(page_index); } output_filename += ".tif"; - pixWrite(output_filename.c_str(), pixs, IFF_TIFF_G4); + pixWrite(output_filename.c_str(), pix, IFF_TIFF_G4); } } @@ -1329,7 +1330,17 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, } // Switch to alternate mode for retry. ReadConfigFile(retry_config); - SetImage(pixs); + SetImage(pix); + + // Apply image preprocessing + if (nlnorm || nlth || nlrec) { + if (nlnorm || (nlth && nlrec)) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + thresholder_->SetImage(GetInputImage()); + } else if (nlth) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); + else if (nlrec) SetInputImage(thresholder_->GetPixNormRectGrey()); + } + //if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); Recognize(nullptr); // Restore saved config variables. ReadConfigFile(kOldVarsFile); @@ -1338,7 +1349,7 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, if (renderer && !failed) { failed = !renderer->AddImage(this); } - pixDestroy(&pixs); + //pixDestroy(&pixs); return !failed; } diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index b675e166c2..b0b4bda164 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -76,7 +76,16 @@ Tesseract::Tesseract() " (Values from PageSegMode enum in tesseract/publictypes.h)", this->params()) , BOOL_MEMBER(normalize_grayscale, false, - "Applys non-linear normalization (nlnorm) on a grayscale version of the input image", + "Applys non-linear normalization (nlnorm) on a grayscale version " + "of the input image and replace it for all tasks", + this->params()) + , BOOL_MEMBER(normalize_thresholding, false, + "Applys non-linear normalization (nlnorm) on a grayscale version " + "of the input image only for thresholding tasks (layout analysis)", + this->params()) + , BOOL_MEMBER(normalize_recognition, false, + "Applys non-linear normalization (nlnorm) on a grayscale version " + "of the input image only for the character recognition task", this->params()) , INT_MEMBER(thresholding_method, static_cast(ThresholdMethod::Otsu), diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index c023e620b0..534e5ad601 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -757,6 +757,8 @@ class TESS_API Tesseract : public Wordrec { BOOL_VAR_H(tessedit_dump_pageseg_images); BOOL_VAR_H(tessedit_do_invert); BOOL_VAR_H(normalize_grayscale); + BOOL_VAR_H(normalize_thresholding); + BOOL_VAR_H(normalize_recognition); INT_VAR_H(tessedit_pageseg_mode); INT_VAR_H(thresholding_method); BOOL_VAR_H(thresholding_debug); diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index 465eea14c0..f21a1d8d43 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -220,7 +220,8 @@ NUMA *na; if (d == 32) // ITU-R 601-2 luma pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114); - //pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3); + // Legacy converting + // pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3); else pixg = pixConvertTo8(pixs, 0); @@ -243,9 +244,9 @@ NUMA *na; pixDestroy(&pixd2); pixGetDimensions(pixd, &w2, &h2, NULL); pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2, (l_float32)h1 / (l_float32)h2); + pixDestroy(&pixd); pixInvert(pixd2, pixd2); pixAddGray(pixg, pixg, pixd2); - pixDestroy(&pixd); pixDestroy(&pixd2); /* Local contrast enhancement */ From a09b6b43c34500b0f8cdf0250d8c5fa12ee448a4 Mon Sep 17 00:00:00 2001 From: JKamlah Date: Mon, 25 Apr 2022 19:19:50 +0200 Subject: [PATCH 3/6] Reformat code. --- src/api/baseapi.cpp | 17 +++++--- src/ccmain/thresholder.cpp | 81 +++++++++++++++++++++----------------- 2 files changed, 56 insertions(+), 42 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 8d0a2ee967..00614f30ce 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1270,16 +1270,18 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, if (nlnorm || (nlth && nlrec)) { SetInputImage(thresholder_->GetPixNormRectGrey()); thresholder_->SetImage(GetInputImage()); - } else if (nlth) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); - else if (nlrec) SetInputImage(thresholder_->GetPixNormRectGrey()); - + } else if (nlth) { + thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); + } else if (nlrec) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + } if (tesseract_->tessedit_write_images) { std::string output_filename = output_file_ + ".norm_gray"; if (page_index > 0) { output_filename += std::to_string(page_index); } output_filename += ".tif"; - pixWrite(output_filename.c_str(), pix, IFF_TIFF_G4); + pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4); } } @@ -1337,8 +1339,11 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, if (nlnorm || (nlth && nlrec)) { SetInputImage(thresholder_->GetPixNormRectGrey()); thresholder_->SetImage(GetInputImage()); - } else if (nlth) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); - else if (nlrec) SetInputImage(thresholder_->GetPixNormRectGrey()); + } else if (nlth) { + thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); + } else if (nlrec) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + } } //if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); Recognize(nullptr); diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index f21a1d8d43..a0207b6adf 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -200,42 +200,44 @@ void ImageThresholder::SetImage(const Image pix) { *
  * Notes:
  *      (1) This composite operation is good for adaptively removing
- *          dark background. Adaption of Thomas Breuel's nlbin version from ocropus.
+ *          dark background. Adaption of Thomas Breuel's nlbin version
+ *          from ocropus.
  *      (2) A good thresholder together NLNorm is WAN
  * 
*/ -Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) -{ -l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval; -l_uint32 black_val, white_val; -l_float32 factor, threshpos, avefg, avebg; -PIX *pixg, *pixd, *pixd2; -BOX *pixbox; -NUMA *na; +Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) { + l_int32 d, thresh, w1, h1, w2, h2, fgval, bgval; + l_uint32 black_val, white_val; + l_float32 factor, threshpos, avefg, avebg; + PIX *pixg, *pixd, *pixd2; + BOX *pixbox; + NUMA *na; PROCNAME("pixNLNorm"); - - if (!pixs || (d = pixGetDepth(pixs)) < 8) - return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL); - if (d == 32) - // ITU-R 601-2 luma - pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114); - // Legacy converting - // pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3); - else - pixg = pixConvertTo8(pixs, 0); - - /* Normalize contrast */ - /*pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val); - if (black_val>0) pixAddConstantGray(pixg, -1 * black_val); - pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val); - if (white_val<255) pixMultConstantGray(pixg, (255. / white_val));*/ + + if (!pixs || (d = pixGetDepth(pixs)) < 8) { + return (PIX *)ERROR_PTR("pixs undefined or d < 8 bpp", procName, NULL); + } + if (d == 32) { + // ITU-R 601-2 luma + pixg = pixConvertRGBToGray(pixs, 0.299, 0.587, 0.114); + // Legacy converting + // pixg = pixConvertRGBToGray(pixs, 0.3, 0.4, 0.3); + } else { + pixg = pixConvertTo8(pixs, 0); + } + + /// Normalize contrast + // pixGetBlackOrWhiteVal(pixg, L_GET_BLACK_VAL, &black_val); + // if (black_val>0) pixAddConstantGray(pixg, -1 * black_val); + // pixGetBlackOrWhiteVal(pixg, L_GET_WHITE_VAL, &white_val); + // if (white_val<255) pixMultConstantGray(pixg, (255. / white_val)); pixd = pixMaxDynamicRange(pixg, L_LINEAR_SCALE); pixDestroy(&pixg); pixg = pixCopy(nullptr, pixd); pixDestroy(&pixd); - /* Calculate flat version */ + /// Calculate flat version pixGetDimensions(pixg, &w1, &h1, NULL); pixd = pixScaleGeneral(pixg, 0.5, 0.5, 0.0, 0); pixd2 = pixRankFilter(pixd, 20, 2, 0.8); @@ -243,31 +245,38 @@ NUMA *na; pixd = pixRankFilter(pixd2, 2, 20, 0.8); pixDestroy(&pixd2); pixGetDimensions(pixd, &w2, &h2, NULL); - pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2, (l_float32)h1 / (l_float32)h2); + pixd2 = pixScaleGrayLI(pixd, (l_float32)w1 / (l_float32)w2, + (l_float32)h1 / (l_float32)h2); pixDestroy(&pixd); pixInvert(pixd2, pixd2); pixAddGray(pixg, pixg, pixd2); pixDestroy(&pixd2); - /* Local contrast enhancement */ - /* Ignore a border of 10 % and get a mean threshold, background and foreground value */ - pixbox = boxCreate(w1*0.1, h1*0.1, w1*0.9, h1*0.9); + /// Local contrast enhancement + // Ignore a border of 10 % and get a mean threshold, + // background and foreground value + pixbox = boxCreate(w1 * 0.1, h1 * 0.1, w1 * 0.9, h1 * 0.9); na = pixGetGrayHistogramInRect(pixg, pixbox, 1); numaSplitDistribution(na, 0.1, &thresh, &avefg, &avebg, NULL, NULL, NULL); boxDestroy(&pixbox); numaDestroy(&na); - /* Subtract by a foreground value and multiply by factor to set a background value to 255 */ + /// Subtract by a foreground value and multiply by factor to + // set a background value to 255 fgval = (l_int32)(avefg + 0.5); bgval = (l_int32)(avebg + 0.5); - threshpos = (l_float32) (thresh-fgval)/(bgval-fgval); + threshpos = (l_float32)(thresh - fgval) / (bgval - fgval); // Todo: fgval or fgval + slightly offset - fgval = fgval;// + (l_int32) ((thresh - fgval)*.25); - bgval = bgval + (l_int32) std::min((l_int32) ((bgval - thresh)*.5),(255 - bgval)); - factor = 255. / (bgval-fgval) ; - if (pthresh) *pthresh = (l_int32) threshpos*factor - threshpos*.1; + fgval = fgval; // + (l_int32) ((thresh - fgval)*.25); + bgval = bgval + + (l_int32)std::min((l_int32)((bgval - thresh) * .5), (255 - bgval)); + factor = 255. / (bgval - fgval); + if (pthresh) { + *pthresh = (l_int32)threshpos * factor - threshpos * .1; + } pixAddConstantGray(pixg, -1 * fgval); pixMultConstantGray(pixg, factor); + return pixg; } From 6dfb216e23757dd9098aa589c721e414a8d51be9 Mon Sep 17 00:00:00 2001 From: JKamlah Date: Fri, 1 Jul 2022 15:59:49 +0200 Subject: [PATCH 4/6] Add preprocessing parameter for grayscale normalization (preprocess_graynorm_mode). There are 4 modes 0 - no normalization, 1 - thresholding+recognition, 2 - thresholding (only), 3 - recognition (only). --- include/tesseract/baseapi.h | 11 ++++++ src/api/baseapi.cpp | 68 ++++++++++++++++++----------------- src/ccmain/tesseractclass.cpp | 17 +++------ src/ccmain/tesseractclass.h | 4 +-- 4 files changed, 52 insertions(+), 48 deletions(-) diff --git a/include/tesseract/baseapi.h b/include/tesseract/baseapi.h index dd9fe4a299..c5576a1af0 100644 --- a/include/tesseract/baseapi.h +++ b/include/tesseract/baseapi.h @@ -326,6 +326,17 @@ class TESS_API TessBaseAPI { */ void SetImage(Pix *pix); + /** + * Preprocessing the InputImage + * Grayscale normalizatin based on nlbin (Thomas Breuel) + * Current modes: + * - 0 = No normalization + * - 1 = Thresholding+Recognition + * - 2 = Thresholding + * - 3 = Recognition + */ + bool NormalizeImage(int mode); + /** * Set the resolution of the source image in pixels per inch so font size * information can be calculated in results. Call this after SetImage(). diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 00614f30ce..bea0cd3db7 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -925,6 +925,25 @@ Pix *TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } +// Grayscale normalization (preprocessing) +bool TessBaseAPI::NormalizeImage(int mode){ + if (!GetInputImage()){ + tprintf("Please use SetImage() befor using applying image preprocessing steps."); + return false; + } + if (mode == 1) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + thresholder_->SetImage(GetInputImage()); + } else if (mode == 2) { + thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); + } else if (mode == 3) { + SetInputImage(thresholder_->GetPixNormRectGrey()); + } else { + return false; + } + return true; +} + const char *TessBaseAPI::GetInputName() { if (!input_file_.empty()) { return input_file_.c_str(); @@ -1258,30 +1277,21 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, SetImage(pix); - // Image preprocessing - - // Process input image to a normalized grayscale - // atm it uses a non-linear algorithm - bool nlnorm, nlth, nlrec; - GetBoolVariable("normalize_grayscale", &nlnorm); - GetBoolVariable("normalize_thresholding", &nlth); - GetBoolVariable("normalize_recognition", &nlrec); - if (nlnorm || nlth || nlrec) { - if (nlnorm || (nlth && nlrec)) { - SetInputImage(thresholder_->GetPixNormRectGrey()); - thresholder_->SetImage(GetInputImage()); - } else if (nlth) { - thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); - } else if (nlrec) { - SetInputImage(thresholder_->GetPixNormRectGrey()); - } - if (tesseract_->tessedit_write_images) { - std::string output_filename = output_file_ + ".norm_gray"; - if (page_index > 0) { - output_filename += std::to_string(page_index); - } - output_filename += ".tif"; + // Image preprocessing on image + // Grayscale normalization + int graynorm_mode; + GetIntVariable("preprocess_graynorm_mode", &graynorm_mode); + if (graynorm_mode > 0 && NormalizeImage(graynorm_mode) && tesseract_->tessedit_write_images) { + // Write normalized image + std::string output_filename = output_file_ + ".preprocessed"; + if (page_index > 0) { + output_filename += std::to_string(page_index); + } + output_filename += ".tif"; + if (graynorm_mode == 2 ) { pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4); + } else { + pixWrite(output_filename.c_str(), thresholder_->GetPixRect(), IFF_TIFF_G4); } } @@ -1335,16 +1345,8 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, SetImage(pix); // Apply image preprocessing - if (nlnorm || nlth || nlrec) { - if (nlnorm || (nlth && nlrec)) { - SetInputImage(thresholder_->GetPixNormRectGrey()); - thresholder_->SetImage(GetInputImage()); - } else if (nlth) { - thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); - } else if (nlrec) { - SetInputImage(thresholder_->GetPixNormRectGrey()); - } - } + NormalizeImage(graynorm_mode); + //if (normalize_grayscale) thresholder_->SetImage(thresholder_->GetPixNormRectGrey()); Recognize(nullptr); // Restore saved config variables. diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index b0b4bda164..1607bb7d63 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -75,18 +75,11 @@ Tesseract::Tesseract() "11=sparse_text, 12=sparse_text+osd, 13=raw_line" " (Values from PageSegMode enum in tesseract/publictypes.h)", this->params()) - , BOOL_MEMBER(normalize_grayscale, false, - "Applys non-linear normalization (nlnorm) on a grayscale version " - "of the input image and replace it for all tasks", - this->params()) - , BOOL_MEMBER(normalize_thresholding, false, - "Applys non-linear normalization (nlnorm) on a grayscale version " - "of the input image only for thresholding tasks (layout analysis)", - this->params()) - , BOOL_MEMBER(normalize_recognition, false, - "Applys non-linear normalization (nlnorm) on a grayscale version " - "of the input image only for the character recognition task", - this->params()) + , INT_MEMBER(preprocess_graynorm_mode, 0, + "Grayscale normalization mode: 0=no normalization, 1=tresholding+recognition, " + "2=tresholding_only, 3=recognition_only " + "The modes 1–3 are applied on the fullimage", + this->params()) , INT_MEMBER(thresholding_method, static_cast(ThresholdMethod::Otsu), "Thresholding method: 0 = Otsu, 1 = LeptonicaOtsu, 2 = " diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 534e5ad601..be077d7cde 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -756,10 +756,8 @@ class TESS_API Tesseract : public Wordrec { BOOL_VAR_H(tessedit_train_line_recognizer); BOOL_VAR_H(tessedit_dump_pageseg_images); BOOL_VAR_H(tessedit_do_invert); - BOOL_VAR_H(normalize_grayscale); - BOOL_VAR_H(normalize_thresholding); - BOOL_VAR_H(normalize_recognition); INT_VAR_H(tessedit_pageseg_mode); + INT_VAR_H(preprocess_graynorm_mode); INT_VAR_H(thresholding_method); BOOL_VAR_H(thresholding_debug); double_VAR_H(thresholding_window_size); From 18517a5173eaa2aa072d0231bc8ab0925e75029c Mon Sep 17 00:00:00 2001 From: JKamlah Date: Fri, 1 Jul 2022 17:11:40 +0200 Subject: [PATCH 5/6] Fix write preprocess image with tessedit_write_images. --- src/api/baseapi.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index bea0cd3db7..a3ac86bbe3 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1288,10 +1288,10 @@ bool TessBaseAPI::ProcessPage(Pix *pix, int page_index, const char *filename, output_filename += std::to_string(page_index); } output_filename += ".tif"; - if (graynorm_mode == 2 ) { - pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4); - } else { + if (graynorm_mode == 2) { pixWrite(output_filename.c_str(), thresholder_->GetPixRect(), IFF_TIFF_G4); + } else { + pixWrite(output_filename.c_str(), GetInputImage(), IFF_TIFF_G4); } } From c049002d3fc7331ce41ba9631cdb50bb9c35e688 Mon Sep 17 00:00:00 2001 From: JKamlah Date: Mon, 4 Jul 2022 15:35:19 +0200 Subject: [PATCH 6/6] Fix error warning text, delete empty lines and old parameter config. --- src/api/baseapi.cpp | 2 +- src/ccmain/thresholder.cpp | 2 -- tessdata/configs/Makefile.am | 2 +- tessdata/configs/normalize_grayscale | 1 - 4 files changed, 2 insertions(+), 5 deletions(-) delete mode 100644 tessdata/configs/normalize_grayscale diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index a3ac86bbe3..ec3de40161 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -928,7 +928,7 @@ Pix *TessBaseAPI::GetInputImage() { // Grayscale normalization (preprocessing) bool TessBaseAPI::NormalizeImage(int mode){ if (!GetInputImage()){ - tprintf("Please use SetImage() befor using applying image preprocessing steps."); + tprintf("Please use SetImage before applying the image pre-processing steps."); return false; } if (mode == 1) { diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index a0207b6adf..121f56c0cd 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -280,7 +280,6 @@ Pix *ImageThresholder::pixNLNorm(Pix *pixs, int *pthresh) { return pixg; } - /*----------------------------------------------------------------------* * Thresholding * *----------------------------------------------------------------------*/ @@ -479,7 +478,6 @@ Image ImageThresholder::GetPixNormRectGrey() { return result; } - // Otsu thresholds the rectangle, taking the rectangle from *this. void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const { std::vector thresholds; diff --git a/tessdata/configs/Makefile.am b/tessdata/configs/Makefile.am index 6f0cf16f84..90619378f8 100644 --- a/tessdata/configs/Makefile.am +++ b/tessdata/configs/Makefile.am @@ -4,5 +4,5 @@ data_DATA += api_config kannada box.train.stderr quiet logfile digits get.images data_DATA += lstmbox wordstrbox # Configurations for OCR output. data_DATA += alto hocr pdf tsv txt -data_DATA += linebox rebox strokewidth bigram normalize_grayscale +data_DATA += linebox rebox strokewidth bigram EXTRA_DIST = $(data_DATA) diff --git a/tessdata/configs/normalize_grayscale b/tessdata/configs/normalize_grayscale deleted file mode 100644 index 80fe569289..0000000000 --- a/tessdata/configs/normalize_grayscale +++ /dev/null @@ -1 +0,0 @@ -normalize_grayscale 1