Reputation: 21
I have a specific requirement of extracting text and images from a specific area in a pdf file.The area might be a selected or highlighted or from a given set of coordinates.
When i went through, all the approaches are to extract images and text entirely from the PDF on not in a specified location. I tried with iTextSharp,Syncfussion,Apose but couldn figure out a better approach for this.
If somebody could help me out in this it would be greatfull. Can you share your ideas and suggestion on how to implement this in .net.
Regards, Arun.M
Upvotes: 2
Views: 2495
Reputation: 11
this code extract images from pdf
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Drawing.Imaging;
using System.IO;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using Bytescout.PDFExtractor;
namespace ExtractAllImages
{
public partial class _Default : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
// This test file will be copied to the project directory on the pre-build event (see the project properties).
String inputFile = Server.MapPath("sample1.pdf");
// Create Bytescout.PDFExtractor.ImageExtractor instance
ImageExtractor extractor = new ImageExtractor();
extractor.RegistrationName = "demo";
extractor.RegistrationKey = "demo";
// Load sample PDF document
extractor.LoadDocumentFromFile("sample1.pdf");
Response.Clear();
int i = 0;
// Initialize image enumeration
if (extractor.GetFirstImage())
{
do
{
if (i == 0) // Write the fist image to the Response stream
{
string imageFileName = "image" + i + ".png";
Response.Write("<b>" + imageFileName + "</b>");
Response.ContentType = "image/png";
Response.AddHeader("Content-Disposition", "inline;filename=" + imageFileName);
// Write the image bytes into the Response output stream
Response.BinaryWrite(extractor.GetCurrentImageAsArrayOfBytes());
}
i++;
} while (extractor.GetNextImage()); // Advance image enumeration
}
Response.End();
}
}
}
Upvotes: 1