Reputation: 156
I am writing a Java application to work as a template reader and writer. I have had success with working with text, but having some dificulty with the images...
Getting the images was the easy part - using a class extending PDFStreamEngine
package readingPdf;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;
public class ImageStripper extends PDFStreamEngine {
ArrayList<Object []> imagesData = null;
public ImageStripper() throws IOException {
// preparing PDFStreamEngine
addOperator(new Concatenate());
addOperator(new DrawObject());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new SetMatrix());
imagesData = new ArrayList<Object[]>();
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
if ("Do".equals(operation)) {
COSName objectName = (COSName) operands.get(0);
// get the PDF object
PDXObject xobject = getResources().getXObject(objectName);
// check if the object is an image object
if (xobject instanceof PDImageXObject) {
Object[] imageData = new Object[3];
PDImageXObject image = (PDImageXObject) xobject;
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
// position of image in the pdf in terms of user space units
System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY()
+ " in user space units");
imageData[0] = ctmNew.getTranslateX();// xPos
imageData[1] = ctmNew.getTranslateY();// yPos
imageData[2] = image;//Image
imagesData.add(imageData);
} else if (xobject instanceof PDFormXObject) {
PDFormXObject form = (PDFormXObject) xobject;
showForm(form);
}
} else {
super.processOperator(operator, operands);
}
}
public ArrayList<Object[]> getImagesList(){
return imagesData;
}
}
next is the implementation thereof
public class PDFManager{
private PDFParser parser;
private PDDocument pdDoc;
private PDDocument retDoc;
private COSDocument cosDoc;
private PDPage page;
private String filePath;
private File file;
public PDDocument transferImage() throws IOException {
this.pdDoc = null;
this.cosDoc = null;
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file, "r"));
parser.parse();
cosDoc = parser.getDocument();
pdDoc = new PDDocument(cosDoc);
//Get Image Data
ImageStripper imageStripper = new ImageStripper();
imageStripper.processPage(pdDoc.getPage(0));
ArrayList<Object []> imageList = imageStripper.getImagesList();
//Close Doc
pdDoc.close();
cosDoc.close();
//Create new PDF Doc
retDoc = new PDDocument();
page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
retDoc.addPage(page);
PDPageContentStream cs = new PDPageContentStream(retDoc, page, AppendMode.OVERWRITE, true);
for(int pos = 0; pos < imageList.size() ; pos++) {
Object [] imageData = imageList.get(pos);
float xPos = (float)imageData[0];
float yPos = (float)imageData[1];
PDImageXObject image = (PDImageXObject)imageData[2];
cs.drawImage(image, xPos, yPos);
}
cs.close();
return retDoc;
}
public static void main(String[] args) throws IOException {
PDFManager pdfManager = new PDFManager();
PDDocument doc =pdfManager.ToText("c:\\test\\test.pdf");
doc.save("c:\\test\\test2.pdf");
doc.close();
}
}
Now the problem comes in at the point where I am writing calling the cs.drawImage
. All the code executes without any issue except when trying to save the new file... I get the exception COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?
I suspect that there is still metadata linking the image to the original Document it was extracted from as calling PDImageXobject.createFromFile("c:\\test\\testImage.png", doc)
returns a new instance of PDImageXObject
which writes perfectly. As the PDDocument
that gets written to gets passed into the PDImageXObject
I suspect it gets linked in some way or another.
I can not save the image to a temp location as this is just testing for a POC.
Any assistance would be appreciated
Upvotes: 0
Views: 1862
Reputation: 156
@ Tilman Hausherr
Thanks for the solution
I moved the closing of the original document into a seperate method which I called after writing the file
public void closeFiles(){
pdDoc.close();
cosDoc.close();
}
Upvotes: 1