Dim ImageID As Integer
Dim bContinue As Boolean
Dim PdfID As Integer
Dim oGdPictureImaging As New GdPicture.GdPictureImaging
If oGdPictureImaging.TwainOpenDefaultSource(Me.Handle) Then
oGdPictureImaging.TwainOpenDefaultSource(Me.Handle)
oGdPictureImaging.TwainSetAutoFeed(True) 'Set AutoFeed Enabled
oGdPictureImaging.TwainSetAutoScan(True) 'To achieve the maximum scanning rate
oGdPictureImaging.TwainSetResolution(200)
oGdPictureImaging.TwainSetPixelType(TwainPixelType.TWPT_BW) 'Black & White
oGdPictureImaging.TwainSetBitDepth(1) ' 1 bpp
PdfID = oGdPictureImaging.PdfOCRStart("c:\pdfocr.pdf", True, "MyTitle", "MyAuthor", "MySubject", "MyKeywords", "MyCreator") 'We generate PDF/A
Do
ImageID = oGdPictureImaging.TwainAcquireToGdPictureImage(Me.Handle)
If ImageID <> 0 Then
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(PdfID, ImageID, "eng", "C:\Program Files\GdPicture.NET 8\Redist\OCR", "")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
End If
If oGdPictureImaging.TwainGetState <= TwainStatus.TWAIN_SOURCE_ENABLED Then
If MsgBox("Do you want acqure other pages ?", MsgBoxStyle.YesNo) = 6 Then
bContinue = True
Else
bContinue = False
End If
Else
bContinue = True
End If
Loop While bContinue
oGdPictureImaging.PdfOCRStop(PdfID)
Call oGdPictureImaging.TwainCloseSource()
MsgBox("Done !")
Else
MsgBox("can't open default source, twain state is: " & oGdPictureImaging.TwainGetState.ToString)
End If
oGdPictureImaging.Dispose()
Dim oGdPictureImaging As New GdPicture.GdPictureImaging
Dim ImageID As Integer = oGdPictureImaging.TiffCreateMultiPageFromFile("")
If ImageID <> 0 Then
oGdPictureImaging.PdfOCRCreateFromMultipageTIFF(ImageID, "eng", "C:\Program Files\GdPicture.NET 8\Redist\OCR", "", "c:\pdfocr.pdf", True, "MyTitle", "MyAuthor", "MySubject", "MyKeywords", "MyCreator")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
End If
oGdPictureImaging.Dispose() Dim oGdPictureImaging As New GdPicture.GdPictureImaging
Dim ImageID As Integer = oGdPictureImaging.CreateGdPictureImageFromFile("")
If ImageID <> 0 Then
oGdPictureImaging.PdfOCRCreateFromMultipageTIFF(ImageID, "eng", "C:\Program Files\GdPicture.NET 8\Redist\OCR", "", "c:\pdfocr.pdf", True, "MyTitle", "MyAuthor", "MySubject", "MyKeywords", "MyCreator")
oGdPictureImaging.ReleaseGdPictureImage(ImageID)
End If
oGdPictureImaging.Dispose()
Dim oGdPictureImaging As New GdPicture.GdPictureImaging
Dim pdfOcrID As Integer
Dim pdfInput As New GdPicture.GdPicturePDF
If pdfInput.LoadFromFile("c:\test.pdf", False) = GdPictureStatus.OK Then
pdfOcrID = oGdPictureImaging.PdfOCRStart("c:\pdfocr.pdf", True, "MyTitle", "MyAuthor", "MySubject", "MyKeywords", "MyCreator")
For i As Integer = 1 To pdfInput.GetPageCount
If pdfInput.SelectPage(1) Then
Dim rasterPageID As Integer = pdfInput.RenderPageToGdPictureImage(200, True) 'Set False to don't render formfields & annots
If rasterPageID <> 0 Then
oGdPictureImaging.ConvertTo1BppAT(rasterPageID) 'We generate bitonal PDF output, comment this line to keep true colour document
oGdPictureImaging.PdfAddGdPictureImageToPdfOCR(pdfOcrID, rasterPageID, "eng", "C:\Program Files\GdPicture.NET 8\Redist\OCR", "")
oGdPictureImaging.ReleaseGdPictureImage(rasterPageID)
End If
End If
Next
pdfInput.CloseDocument()
oGdPictureImaging.PdfOCRStop(pdfOcrID)
oGdPictureImaging.Dispose()
End If
How can extract the text from the pdf , using PdfGetPageText()?
Return to Example requests & Code samples
Users browsing this forum: No registered users and 0 guests