1. 程式人生 > >C#中Tesseract-OCR的使用,可識別中英日韓所有語言

C#中Tesseract-OCR的使用,可識別中英日韓所有語言

原始碼下載:

先上效果圖。測試中文英文日語識別,其他語言也都行,只要下載相應的語言包,操作使用後面都有講

1.首先在Nuget中搜索Tesseract,下載到專案中

3.程式碼操作

首先先初始化類,設定語言

TesseractEngine ocr;
ocr = new TesseractEngine("./tessdata", "chi_sim");//設定語言   中文
//ocr = new TesseractEngine("./tessdata", "eng", EngineMode.TesseractAndCube);//設定語言   英文
//ocr = new TesseractEngine("./tessdata", "jpn");//設定語言   日語

匯入圖片進行識別

Bitmap bit = new Bitmap(Image.FromFile(filename.FileName.ToString()));
//bit = PreprocesImage(bit);//進行影象處理,如果識別率低可試試
Page page = ocr.Process(bit);
string str = page.GetText();//識別後的內容
page.Dispose();

圖片處理演算法,如果是識別數字,識別率低可以試試這個方法

/// <summary>
/// 圖片顏色區分,剩下白色和黑色
/// </summary>
/// <param name="image"></param>
/// <returns></returns>
private Bitmap PreprocesImage(Bitmap image)
{
    //You can change your new color here. Red,Green,LawnGreen any..
    Color actualColor;
    //make an empty bitmap the same size as scrBitmap
    image = ResizeImage(image, image.Width * 5, image.Height * 5);
    //image.Save(@"D:\UpWork\OCR_WinForm\Preprocess_Resize.jpg");

    Bitmap newBitmap = new Bitmap(image.Width, image.Height);
    for (int i = 0; i < image.Width; i++)
    {
        for (int j = 0; j < image.Height; j++)
        {
            //get the pixel from the scrBitmap image
            actualColor = image.GetPixel(i, j);
            // > 150 because.. Images edges can be of low pixel colr. if we set all pixel color to new then there will be no smoothness left.
            if (actualColor.R > 23 || actualColor.G > 23 || actualColor.B > 23)//在這裡設定RGB
                newBitmap.SetPixel(i, j, Color.White);
            else
                newBitmap.SetPixel(i, j, Color.Black);
        }
    }
    return newBitmap;
}

/// <summary>
/// 調整圖片大小和對比度
/// </summary>
/// <param name="image"></param>
/// <param name="width"></param>
/// <param name="height"></param>
/// <returns></returns>
private Bitmap ResizeImage(Image image, int width, int height)
{
    var destRect = new Rectangle(0, 0, width, height);
    var destImage = new Bitmap(width, height);

    destImage.SetResolution(image.HorizontalResolution, image.VerticalResolution * 2);//2,3
    //image.Save(@"D:\UpWork\OCR_WinForm\Preprocess_HighRes.jpg");

    using (var graphics = Graphics.FromImage(destImage))
    {
        graphics.CompositingMode = CompositingMode.SourceOver;
        graphics.CompositingQuality = CompositingQuality.HighQuality;
        graphics.InterpolationMode = InterpolationMode.HighQualityBicubic;
        graphics.SmoothingMode = SmoothingMode.HighQuality;
        graphics.PixelOffsetMode = PixelOffsetMode.HighQuality;

        using (var wrapMode = new ImageAttributes())
        {
            wrapMode.SetWrapMode(WrapMode.Clamp);
            graphics.DrawImage(image, destRect, 0, 0, image.Width, image.Height, GraphicsUnit.Pixel, wrapMode);
        }
    }

    return destImage;
}