Skip to content

Commit

Permalink
Merge pull request #1 from g4-api/development
Browse files Browse the repository at this point in the history
OCR improvements
  • Loading branch information
gravity-api committed Jun 3, 2024
2 parents 4fb72d5 + 00a94af commit 84bdc0a
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 16 deletions.
11 changes: 10 additions & 1 deletion src/OcrInspector/MainWindow.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,19 @@
HorizontalAlignment="Left"
VerticalAlignment="Top"
Width="100"
Margin="10"
Margin="10,10,0,0"
Click="BtnLoadImage_Click"
Grid.Row="1"/>

<Button Name="BtnTakeScreenshot"
Content="Take Screenshot..."
HorizontalAlignment="Center"
VerticalAlignment="Top"
Width="100"
Margin="10,10,0,0"
Click="BtnTakeScreenshot_Click"
Grid.Row="1"/>

<ScrollViewer Name="MainScrollViewer"
HorizontalAlignment="Stretch"
VerticalAlignment="Stretch"
Expand Down
154 changes: 139 additions & 15 deletions src/OcrInspector/MainWindow.xaml.cs
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
using Emgu.CV;
using Emgu.CV.OCR;
using Emgu.CV.Structure;

using Microsoft.Win32;

using System;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;

using Emgu.CV;
using Emgu.CV.OCR;
using Emgu.CV.Structure;

using Microsoft.Win32;



namespace OcrInspector
{
Expand Down Expand Up @@ -83,18 +87,71 @@ private void BtnLoadImage_Click(object sender, RoutedEventArgs e)
MainCanvas.Height = bitmap.PixelHeight;
}

// Event handler for the Take Screenshot button click event to take a screenshot of the main monitor
// to process using Tesseract OCR and display in the window UI elements
private void BtnTakeScreenshot_Click(object sender, RoutedEventArgs e)
{
// Prevent app window to be seen in the screenshot.
Application.Current.MainWindow.Hide();
Thread.Sleep(500);

// Take screenshot of main display.
const int ENUM_CURRENT_SETTINGS = -1;
DevMode devMode = default;
devMode.dmSize = (short)Marshal.SizeOf(devMode);
EnumDisplaySettings(null, ENUM_CURRENT_SETTINGS, ref devMode);
var bitmap = new Bitmap(devMode.dmPelsWidth,
devMode.dmPelsHeight,
System.Drawing.Imaging.PixelFormat.Format32bppArgb);


var gfxScreenshot = Graphics.FromImage(bitmap);
gfxScreenshot.CopyFromScreen(devMode.dmPositionX, devMode.dmPositionY, 0, 0, bitmap.Size);
Application.Current.MainWindow.Show();

// Increase DPI for better accuracy.
bitmap.SetResolution(300, 300);

// Process the image using Tesseract OCR and get the recognized words along with the
// processed image with bounding boxes around the recognized words drawn on it
var resolvedImage = ResolveWords(bitmap);

// Clear the existing clickable points on the MainCanvas to display the new image and recognized words
MainCanvas.Children.RemoveRange(1, MainCanvas.Children.Count - 2);

// Add clickable points to the MainCanvas at the locations of the recognized words
foreach (var word in resolvedImage.Words)
{
AddClickablePoint(word);
}

// Display the image and the recognized words in the UI elements of the window
MainImage.Source = resolvedImage.ImageSource;

// Set the image size to its original size to display the image correctly in
// the window without distortion or cropping issues
MainImage.Width = bitmap.Width;
MainImage.Height = bitmap.Height;

// Set the width of the MainCanvas to match the width of the MainImage
MainCanvas.Width = bitmap.Width;

// Set the height of the MainCanvas to match the height of the MainImage
MainCanvas.Height = bitmap.Height;
}

// Processes the image at the given path using Tesseract OCR, converts it to grayscale, recognizes the text,
// draws bounding boxes around the recognized words, and returns the processed image along with the list of recognized words.
private static (ImageSource ImageSource, List<Tesseract.Word> Words) ResolveWords(string imagePath)
{
// Load the image
var image = new Image<Bgr, byte>(imagePath);
using var image = new Image<Bgr, byte>(imagePath);

// Initialize Tesseract OCR
var ocr = new Tesseract("TrainData/", "eng", OcrEngineMode.TesseractOnly);

using var ocr = new Tesseract("TrainData/", "eng", OcrEngineMode.Default);
ocr.PageSegMode = Emgu.CV.OCR.PageSegMode.SparseText;
// Convert to grayscale for OCR processing (Tesseract requires a grayscale image)
var grayImage = image.Convert<Gray, byte>();
using var grayImage = image.Convert<Gray, byte>();

// Perform OCR on the grayscale image and get the recognized words
ocr.SetImage(grayImage);
Expand All @@ -107,6 +164,29 @@ private static (ImageSource ImageSource, List<Tesseract.Word> Words) ResolveWord
return (image.ToBitmapSource(), words);
}

// Converts the provided Bitmap for processing with Tesseract OCR, converts it to grayscale, recognizes the text,
// draws bounding boxes around the recognized words, and returns the processed image along with the list of recognized words.
private static (ImageSource ImageSource, List<Tesseract.Word> Words) ResolveWords(Bitmap bitmap)
{
// Load the image
using var image = bitmap.ToImage<Bgr, byte>();
// Initialize Tesseract OCR
using var ocr = new Tesseract("TrainData/", "eng", OcrEngineMode.Default);
ocr.PageSegMode = Emgu.CV.OCR.PageSegMode.SparseText;
// Convert to grayscale for OCR processing (Tesseract requires a grayscale image)
using var grayImage = image.Convert<Gray, byte>();

// Perform OCR on the grayscale image and get the recognized words
ocr.SetImage(grayImage);
ocr.Recognize();

// Get the words from the OCR result
var words = ocr.GetWords().ToList();

// Save or display the image
return (grayImage.ToBitmapSource(), words);
}

// Adds a clickable point to the MainCanvas at the location of the given word.
// The point displays a tooltip with the word text and accuracy.
private void AddClickablePoint(Tesseract.Word word)
Expand All @@ -126,11 +206,11 @@ private void AddClickablePoint(Tesseract.Word word)
};

// Create a rectangle to represent the clickable point on the image
Rectangle rectangle = new()
System.Windows.Shapes.Rectangle rectangle = new()
{
Fill = Brushes.Transparent,
Fill = System.Windows.Media.Brushes.Transparent,
Height = word.Region.Height,
Stroke = Brushes.Blue,
Stroke = System.Windows.Media.Brushes.Blue,
StrokeThickness = 1,
Tag = word.Region.Location,
ToolTip = tooltip,
Expand All @@ -155,5 +235,49 @@ private void AddClickablePoint(Tesseract.Word word)
// Add the rectangle to the MainCanvas to display the clickable point on the image
MainCanvas.Children.Add(rectangle);
}

#region ExternalMethods
[DllImport("user32.dll")]
static extern bool EnumDisplaySettings(string deviceName, int modeNum, ref DevMode devMode);

[StructLayout(LayoutKind.Sequential)]
struct DevMode
{
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 0x20)]
public string dmDeviceName;
public short dmSpecVersion;
public short dmDriverVersion;
public short dmSize;
public short dmDriverExtra;
public int dmFields;
public int dmPositionX;
public int dmPositionY;
public int dmDisplayOrientation;
public int dmDisplayFixedOutput;
public short dmColor;
public short dmDuplex;
public short dmYResolution;
public short dmTTOption;
public short dmCollate;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 0x20)]
public string dmFormName;
public short dmLogPixels;
public int dmBitsPerPel;
public int dmPelsWidth;
public int dmPelsHeight;
public int dmDisplayFlags;
public int dmDisplayFrequency;
public int dmICMMethod;
public int dmICMIntent;
public int dmMediaType;
public int dmDitherType;
public int dmReserved1;
public int dmReserved2;
public int dmPanningWidth;
public int dmPanningHeight;
}
#endregion
}


}
Binary file modified src/OcrInspector/TrainData/eng.traineddata
Binary file not shown.

0 comments on commit 84bdc0a

Please sign in to comment.