Dim ImageID As Integer
Dim bContinue As Boolean
Dim PdfID As Integer
Dim oGdPictureImaging As New GdPicture.GdPictureImaging
oGdPictureImaging.SetLicenseNumber("GDPICTURE.NET_LICENSE_KEY")
oGdPictureImaging.SetLicenseNumberOCRTesseract("GDPICTURE_TESSERACT_PLUGIN_LICENSE")
If oGdPictureImaging.TwainOpenDefaultSource(Me.Handle) Then
oGdPictureImaging.TwainOpenDefaultSource(Me.Handle)
oGdPictureImaging.TwainSetAutoFeed(True) 'Set AutoFeed Enabled
oGdPictureImaging.TwainSetAutoScan(True) 'To achieve the maximum scanning rate
oGdPictureImaging.TwainSetResolution(200)
oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW) 'Black & White
oGdPictureImaging.TwainSetBitDepth(1) ' 1 bpp
PdfID = oGdPictureImaging.PdfOCRStart("c:\pdfocr.pdf", True, "", "", "", "", "")
Do
ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(Me.Handle)
If ImageID <> 0 Then
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(PdfID, ImageID, TesseractDictionary.TesseractDictionaryEnglish, "C:\Program Files\GdPicture.NET\Redist\OCR", "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
End If
If oGdPictureImaging.TwainGetState <= TwainStatus.TWAIN_SOURCE_ENABLED Then
If MsgBox("Do you want acqure other pages ?", MsgBoxStyle.YesNo) = 6 Then
bContinue = True
Else
bContinue = False
End If
Else
bContinue = True
End If
Loop While bContinue
oGdPictureImaging.PdfOCRStop(PdfID)
Call oGdPictureImaging.TwainCloseSource()
MsgBox("Done !")
Else
MsgBox("can't open default source, twain state is: " & oGdPictureImaging.TwainGetState.ToString)
End If
Dim ImageID As Integer
Dim oGdPictureImaging As New GdPicture.GdPictureImaging
oGdPictureImaging.SetLicenseNumber("GDPICTURE.NET_LICENSE_KEY")
oGdPictureImaging.SetLicenseNumberOCRTesseract("GDPICTURE_TESSERACT_PLUGIN_LICENSE")
oGdPictureImaging.TiffOpenMultiPageAsReadOnly(True)
ImageID = oGdPictureImaging.TiffCreateMultiPageFromFile("")
oGdPictureImaging.PdfOCRCreateFromMultipageTIFF(ImageID, TesseractDictionary.TesseractDictionaryEnglish, "C:\Program
Files\GdPicture.NET\Redist\OCR", "", "c:\pdfocr.pdf", True, "", "", "", "", "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID) Dim ImageID As Integer
Dim oGdPictureImaging As New GdPicture.GdPictureImaging
oGdPictureImaging.SetLicenseNumber("GDPICTURE.NET_LICENSE_KEY")
oGdPictureImaging.SetLicenseNumberOCRTesseract("GDPICTURE_TESSERACT_PLUGIN_LICENSE")
ImageID = oGdPictureImaging.CreateGdPictureImageFromFile("")
oGdPictureImaging.SaveAsPDFOCR(ImageID, "c:\pdfocr.pdf", TesseractDictionary.TesseractDictionaryEnglish, "C:\Program
Files\GdPicture.NET\Redist\OCR", "", True, "", "", "", "", "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID) Dim ImageID As Integer
Dim oGdViewer As New GdPicture.GdViewer
Dim oGdPictureImaging As New GdPicture.GdPictureImaging
Dim PdfID As Integer
oGdViewer.SetLicenseNumber("GDPICTURE.NET_LICENSE_KEY")
oGdPictureImaging.SetLicenseNumber("GDPICTURE.NET_LICENSE_KEY")
oGdPictureImaging.SetLicenseNumberOCRTesseract("GDPICTURE_TESSERACT_PLUGIN_LICENSE")
oGdViewer.DisplayFromFile("")
PdfID = oGdPictureImaging.PdfOCRStart("c:\pdfocr.pdf", True, "", "", "", "", "")
For i As Integer = 1 To oGdViewer.PageCount
ImageID = oGdViewer.PdfRenderPageToGdPictureImage(200, i)
oGdPictureImaging.ConvertTo1Bpp(ImageID)
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(PdfID, ImageID, TesseractDictionary.TesseractDictionaryEnglish, "C:\Program Files\GdPicture.NET\Redist\OCR", "")
oGdViewer.ReleaseGdPictureImage(ImageID)
Next
oGdPictureImaging.PdfOCRStop(PdfID)
oGdViewer.CloseDocument()
How can extract the text from the pdf , using PdfGetPageText()?
Return to Example requests & Code samples
Users browsing this forum: No registered users and 0 guests