本篇文件介绍如何通过OCR分别识别获取文档的每一行文本
有一种方法类似于我们的IOcrZoneCharacters.GetWords方法可检索文档的每一行。OCR引擎识别的每个字符都有一个位置。我们可以通过OcrCharacter 结构的position 属性访问这个位置。返回一个或多个OcrCharacterPosition 枚举成员:
下面写了一个简单的小例子,用OcrCharacterPosition 来分别识别出每一行的文字。
using (RasterCodecs codecs = new RasterCodecs()) { codecs.Options.RasterizeDocument.Load.XResolution = 300; codecs.Options.RasterizeDocument.Load.YResolution = 300; RasterImage image = codecs.Load(inputFile); using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.Professional, false)) { ocrEngine.Startup(null, null, null, @"C:\LEADTOOLS 19\Bin\Common\OcrProfessionalRuntime64"); using (IOcrDocument document = ocrEngine.DocumentManager.CreateDocument()) { document.Pages.AddPage(image, null); document.Pages[0].Recognize(null); IOcrPageCharacters pageCharacters = document.Pages[0].GetRecognizedCharacters(); for (int i = 0; i < document.Pages[0].Zones.Count; i++) { IOcrZoneCharacters zoneCharacters = pageCharacters.FindZoneCharacters(i); if (zoneCharacters != null) { foreach (var ocrCharacter in zoneCharacters) { OcrCharacterPosition position; position = ocrCharacter.Position; if ((position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine) { Console.Write(ocrCharacter.Code + "\n"); } else { Console.Write(ocrCharacter.Code); } } } } } } }