euler
euler

Reputation: 1411

How to retain page labels when concatenating an existing pdf with a pdf created from scratch?

I have a code which is creating a "cover page" and then merging it with an existing pdf. The pdf labels were lost after merging. How can I retain the pdf labels of the existing pdf and then add a page label to the pdf page created from scratch (eg "Cover page")? The example of the book I think is about retrieving and replacing page labels. I don't know how to apply this when concatenating an existing pdf with a pdf created from scratch. I am using itext 5.3.0. Thanks in advance.

EDIT as per comment of mkl

public ByteArrayOutputStream getConcatenatePDF()
{
    if (bitstream == null)
        return null;

    if (item == null)
    {
        item = getItem();
        if (item == null)
            return null;
    }

    ByteArrayOutputStream byteout = null;
    InputStream coverStream = null;

    try
    {
        // Get Cover Page
        coverStream = getCoverStream();
        if (coverStream == null) 
            return null;

        byteout = new ByteArrayOutputStream();
        int pageOffset = 0;
        ArrayList<HashMap<String, Object>> master = new ArrayList<HashMap<String, Object>>();

        Document document = null;
        PdfCopy    writer = null;
        PdfReader  reader = null;

        byte[] password = (ownerpass != null && !"".equals(ownerpass)) ? ownerpass.getBytes() : null;

        // Get infomation of the original pdf
        reader = new PdfReader(bitstream.retrieve(), password);

        boolean isPortfolio = reader.getCatalog().contains(PdfName.COLLECTION);
        char version = reader.getPdfVersion();
        int permissions = reader.getPermissions();

        // Get metadata
        HashMap<String, String> info = reader.getInfo();
        String title = (info.get("Title") == null || "".equals(info.get("Title")))
            ? getFieldValue("dc.title") : info.get("Title");
        String author = (info.get("Author") == null || "".equals(info.get("Author")))
            ? getFieldValue("dc.contributor.author") : info.get("Author");
        String subject = (info.get("Subject") == null || "".equals(info.get("Subject")))
            ? "" : info.get("Subject");
        String keywords = (info.get("Keywords") == null || "".equals(info.get("Keywords")))
            ? getFieldValue("dc.subject") : info.get("Keywords");

        reader.close();

        // Merge cover page and the original pdf
        InputStream[] is = new InputStream[2];
        is[0] = coverStream;
        is[1] = bitstream.retrieve();

        for (int i = 0; i < is.length; i++) 
        {
            // we create a reader for a certain document
            reader = new PdfReader(is[i], password);
            reader.consolidateNamedDestinations();

            if (i == 0) 
            {
                // step 1: creation of a document-object
                document = new Document(reader.getPageSizeWithRotation(1));

                // step 2: we create a writer that listens to the document
                writer = new PdfCopy(document, byteout);

                // Set metadata from the original pdf 
                // the position of these lines is important
                document.addTitle(title);
                document.addAuthor(author);
                document.addSubject(subject);
                document.addKeywords(keywords);

                if (pdfa)
                {
                    // Set thenecessary information for PDF/A-1B
                    // the position of these lines is important
                    writer.setPdfVersion(PdfWriter.VERSION_1_4);
                    writer.setPDFXConformance(PdfWriter.PDFA1B);
                    writer.createXmpMetadata();
                }
                else if (version == '5')
                    writer.setPdfVersion(PdfWriter.VERSION_1_5);
                else if (version == '6')
                    writer.setPdfVersion(PdfWriter.VERSION_1_6);
                else if (version == '7')
                    writer.setPdfVersion(PdfWriter.VERSION_1_7);
                else
                    ;  // no operation

                // Set security parameters
                if (!pdfa)
                {
                    if (password != null)
                    {
                        if (security && permissions != 0) 
                        {
                            writer.setEncryption(null, password, permissions, PdfWriter.STANDARD_ENCRYPTION_128);
                        } 
                        else
                        {
                            writer.setEncryption(null, password, PdfWriter.ALLOW_PRINTING | PdfWriter.ALLOW_COPY | PdfWriter.ALLOW_SCREENREADERS, PdfWriter.STANDARD_ENCRYPTION_128);
                        }
                    }
                }

                // step 3: we open the document
                document.open();

                // if this pdf is portfolio, does not add cover page
                if (isPortfolio)
                {
                    reader.close();
                    byte[] coverByte = getCoverByte();
                    if (coverByte == null || coverByte.length == 0) 
                        return null;
                    PdfCollection collection = new PdfCollection(PdfCollection.TILE);
                    writer.setCollection(collection);

                    PdfFileSpecification fs = PdfFileSpecification.fileEmbedded(writer, null, "cover.pdf", coverByte);
                    fs.addDescription("cover.pdf", false);
                    writer.addFileAttachment(fs);
                    continue;
                }
            }
            int n = reader.getNumberOfPages();
            // step 4: we add content
            PdfImportedPage page;
            PdfCopy.PageStamp stamp;
            for (int j = 0; j < n; )
            {
                ++j;
                page = writer.getImportedPage(reader, j);
                if (i == 1) {
                    stamp = writer.createPageStamp(page);
                    Rectangle mediabox = reader.getPageSize(j);
                    Rectangle crop = new Rectangle(mediabox);
                    writer.setCropBoxSize(crop);
                    // add overlay text
                    //<-- Code for adding overlay text -->
                    stamp.alterContents();
                }
                writer.addPage(page);
            }

            PRAcroForm form = reader.getAcroForm();
            if (form != null && !pdfa)
            {
                writer.copyAcroForm(reader);
            }
            // we retrieve the total number of pages
            List<HashMap<String, Object>> bookmarks = SimpleBookmark.getBookmark(reader);
            //if (bookmarks != null && !pdfa) 
            if (bookmarks != null) 
            {
                if (pageOffset != 0)
                {
                    SimpleBookmark.shiftPageNumbers(bookmarks, pageOffset, null);
                }
                master.addAll(bookmarks);
            }
            pageOffset += n;
        }
        if (!master.isEmpty())
        {
            writer.setOutlines(master);
        }

        if (isPortfolio)
        {
            reader = new PdfReader(bitstream.retrieve(), password);
            PdfDictionary catalog = reader.getCatalog();
            PdfDictionary documentnames = catalog.getAsDict(PdfName.NAMES);
            PdfDictionary embeddedfiles = documentnames.getAsDict(PdfName.EMBEDDEDFILES);
            PdfArray filespecs = embeddedfiles.getAsArray(PdfName.NAMES);
            PdfDictionary filespec;
            PdfDictionary refs;
            PRStream stream;
            PdfFileSpecification fs;
            String path;
            // copy embedded files
            for (int i = 0; i < filespecs.size(); ) 
            {
                filespecs.getAsString(i++);     // remove description
                filespec = filespecs.getAsDict(i++);
                refs = filespec.getAsDict(PdfName.EF);
                for (PdfName key : refs.getKeys()) 
                {
                    stream = (PRStream) PdfReader.getPdfObject(refs.getAsIndirectObject(key));
                    path = filespec.getAsString(key).toString();
                    fs = PdfFileSpecification.fileEmbedded(writer, null, path, PdfReader.getStreamBytes(stream));
                    fs.addDescription(path, false);
                    writer.addFileAttachment(fs);
                }
            }
        }

        if (pdfa)
        {
            InputStream iccFile = this.getClass().getClassLoader().getResourceAsStream(PROFILE);
            ICC_Profile icc = ICC_Profile.getInstance(iccFile);
            writer.setOutputIntents("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1", icc);
            writer.setViewerPreferences(PdfWriter.PageModeUseOutlines);
        }
        // step 5: we close the document
        document.close();
    } 
    catch (Exception e) 
    {
        log.info(LogManager.getHeader(context, "cover_page: getConcatenatePDF", "bitstream_id="+bitstream.getID()+", error="+e.getMessage()));
        // e.printStackTrace();
        return null;
    }

    return byteout;
}

UPDATE

Based on mkl's answer, I modified the code above to look like this:

    public ByteArrayOutputStream getConcatenatePDF()
{
    if (bitstream == null)
        return null;

    if (item == null)
    {
        item = getItem();
        if (item == null)
            return null;
    }

    ByteArrayOutputStream byteout = null;
    try
    {
        // Get Cover Page
        InputStream coverStream = getCoverStream();
        if (coverStream == null) 
            return null;

        byteout = new ByteArrayOutputStream();

        InputStream documentStream = bitstream.retrieve();


        PdfReader coverPageReader = new PdfReader(coverStream);
        PdfReader reader = new PdfReader(documentStream);
        PdfStamper stamper = new PdfStamper(reader, byteout);

        PdfImportedPage page = stamper.getImportedPage(coverPageReader, 1);
        stamper.insertPage(1, coverPageReader.getPageSize(1));

        PdfContentByte content = stamper.getUnderContent(1);

        int n = reader.getNumberOfPages();
        for (int j = 2; j <= n; j++) {
           //code for overlay text
            ColumnText.showTextAligned(stamper.getOverContent(j), Element.ALIGN_CENTER, overlayText,
                    crop.getLeft(10), crop.getHeight() / 2 + crop.getBottom(), 90);
        }
        content.addTemplate(page, 0, 0);
        stamper.close();
    }
    catch (Exception e) 
    {
        log.info(LogManager.getHeader(context, "cover_page: getConcatenatePDF", "bitstream_id="+bitstream.getID()+", error="+e.getMessage()));
        e.printStackTrace();
        return null;
    }

    return byteout;
}

And then I set the page labels to the cover page. I omitted code not relevant to my question.

/**
 * 
 * @return InputStream the resulting output stream
 */
private InputStream getCoverStream()
{
    ByteArrayOutputStream byteout = getCover();
    return new ByteArrayInputStream(byteout.toByteArray());
}

/**
 * 
 * @return InputStream the resulting output stream
 */
private byte[] getCoverByte()
{
    ByteArrayOutputStream byteout = getCover();
    return byteout.toByteArray();
}

/**
 * 
 * @return InputStream the resulting output stream
 */
    private ByteArrayOutputStream getCover()


{
    ByteArrayOutputStream byteout;
    Document doc = null;
    try 
    {
        byteout = new ByteArrayOutputStream();   
        doc = new Document(PageSize.LETTER, 24, 24, 20, 40);
        PdfWriter pdfwriter = PdfWriter.getInstance(doc, byteout);
        PdfPageLabels labels = new PdfPageLabels();
        labels.addPageLabel(1, PdfPageLabels.EMPTY, "Cover page", 1);
        pdfwriter.setPageLabels(labels);

        pdfwriter.setPageEvent(new HeaderFooter());
        doc.open(); 
        //code omitted (contents of cover page)
        doc.close();
        return byteout; 
    } 
    catch (Exception e)
    {
        log.info(LogManager.getHeader(context, "cover_page", "bitstream_id="+bitstream.getID()+", error="+e.getMessage()));
        return null;
    }
}

The modified code retained the page labels of the existing pdf (see screenshot 1) (documentStream), but the resulting merged pdf (screenshots 2 and 3) is off by 1 page since a cover page was inserted. As suggested by mkl, I should use page labels to the cover page, but it seems the pdf labels of the imported page was lost. My concern now is how do I set the page labels to the final document state as also suggested by mkl? I suppose I should use PdfWriter but I don't know where to put that in my modified code. Am I correct to assume that after the stamper.close() portion, that is the final state of my document? Thanks again in advance.

Screenshot 1. Notice the actual page 1 labeled Front cover Original

Screenshot 2. Merged pdf, after the generated on-the-fly "cover page" was inserted. The page label "Front cover" was now assigned to the cover page even after I've set the pdf label of the inserted page using labels.addPageLabel(1, PdfPageLabels.EMPTY, "Cover page", 1) Inserted Cover page

Screenshot 3. Note that the page label 3 was assigned to page 2. Page 2

FINAL UPDATE Kudos to @mkl The screenshot below is the result after I applied the latest update of mkl's answer. The pages labels are now assigned correctly to pages. Also, using PdfStamper instead of PdfCopy (as used in my original code) did not break the PDF/A compliance of the existing pdf.

enter image description here

Upvotes: 1

Views: 1407

Answers (1)

mkl
mkl

Reputation: 95918

Adding the cover page

Usually using PdfCopy for merging PDFs is the right choice, it creates a new document from the copied pages copying as much of the page-level information as possible not preferring any single document.

Your case is somewhat special, though: You have one document whose structure and content you prefer and want to apply a small change to it by adding a single page, a title page. All the while all information including document-level information (e.g. metadata, embedded files, ...) from the main document shall still be present in the result.

In such a use case it is more appropriate to use a PdfStamper which you use to "stamp" changes onto an existing PDF.

You might want to start from something like this:

try (   InputStream documentStream = getClass().getResourceAsStream("template.pdf");
        InputStream titleStream = getClass().getResourceAsStream("title.pdf");
        OutputStream outputStream = new FileOutputStream(new File(RESULT_FOLDER, "test-with-title-page.pdf"))    )
{
    PdfReader titleReader = new PdfReader(titleStream);
    PdfReader reader = new PdfReader(documentStream);
    PdfStamper stamper = new PdfStamper(reader, outputStream);

    PdfImportedPage page = stamper.getImportedPage(titleReader, 1);
    stamper.insertPage(1, titleReader.getPageSize(1));
    PdfContentByte content = stamper.getUnderContent(1);
    content.addTemplate(page, 0, 0);

    stamper.close();
}

PS: Concerning questions in comments:

In my code above, I should have an overlay text supposedly (before the stamp.alterContents() portion) but I omitted that part of code for testing purposes. Can you please give me an idea how to implement that?

Do you mean something like an overlayed watermark? The PdfStamper allows you to access an "over content" for each page onto which you can draw any content:

PdfContentByte overContent = stamper.getOverContent(pageNumber);

Keeping page labels

My other question is about page offset, because I inserted the cover page, the page numbering are off by 1 page. How can I resolve that?

Unfortunately iText's PdfStamper does not automatically update the page label definition of the manipulated PDF. Actually this is no wonder because it is not clear how the inserted page is meant to be labeled. @Bruno At least, though, iText could change the page label sections starting after the insertion page number.

Using iText's low level API it is possible, though, to fix the original label positions and add a label for the inserted page. This can be implemented similarly to the iText in Action PageLabelExample example, more exactly its manipulatePageLabel part; simply add this before stamper.close():

    PdfDictionary root = reader.getCatalog();
    PdfDictionary labels = root.getAsDict(PdfName.PAGELABELS);
    if (labels != null)
    {
        PdfArray newNums = new PdfArray();

        newNums.add(new PdfNumber(0));
        PdfDictionary coverDict = new PdfDictionary();
        coverDict.put(PdfName.P, new PdfString("Cover Page"));
        newNums.add(coverDict);

        PdfArray nums = labels.getAsArray(PdfName.NUMS);
        if (nums != null)
        {
            for (int i = 0; i < nums.size() - 1; )
            {
                int n = nums.getAsNumber(i++).intValue();
                newNums.add(new PdfNumber(n+1));
                newNums.add(nums.getPdfObject(i++));
            }
        }

        labels.put(PdfName.NUMS, newNums);
        stamper.markUsed(labels);
    }

For a document with these labels:

enter image description here

It generates a document with these labels:

enter image description here

Keeping links

I just found out that the inserted page "Cover Page" lost its link annotations. I wonder if there's a workaround for this, since according to the book, the interactive features of the inserted page are lost when using PdfStamper.

Indeed, among the iText PDF generating classes only Pdf*Copy* keeps interactive features like annotations. Unfortunately one has to decide whether one wants to

  • create a genuinely new PDF (PdfWriter) with no information from other PDFs beyond contents being embedable;
  • manipulate a single existing PDF ('PdfStamper') with all information from that one PDF being preserved but no information from other PDFs beyond contents being embedable;
  • merge any number of existing PDFs (PdfCopy) with most page-level information from all those PDFs being preserved but no document-level information from any.

In your case I thought the new cover page had only static content, no dynamic features, and so assumes the PdfStamper was best. If you only have to deal with links, you may consider copying links manually, e.g. using this helper method

/**
 * <p>
 * A primitive attempt at copying links from page <code>sourcePage</code>
 * of <code>PdfReader reader</code> to page <code>targetPage</code> of
 * <code>PdfStamper stamper</code>.
 * </p>
 * <p>
 * This method is meant only for the use case at hand, i.e. copying a link
 * to an external URI without expecting any advanced features.
 * </p>
 */
void copyLinks(PdfStamper stamper, int targetPage, PdfReader reader, int sourcePage)
{
    PdfDictionary sourcePageDict = reader.getPageNRelease(sourcePage);
    PdfArray annotations = sourcePageDict.getAsArray(PdfName.ANNOTS);
    if (annotations != null && annotations.size() > 0)
    {
        for (PdfObject annotationObject : annotations)
        {
            annotationObject = PdfReader.getPdfObject(annotationObject);
            if (!annotationObject.isDictionary())
                continue;
            PdfDictionary annotation = (PdfDictionary) annotationObject;
            if (!PdfName.LINK.equals(annotation.getAsName(PdfName.SUBTYPE)))
                continue;

            PdfArray rectArray = annotation.getAsArray(PdfName.RECT);
            if (rectArray == null || rectArray.size() < 4)
                continue;
            Rectangle rectangle = PdfReader.getNormalizedRectangle(rectArray);

            PdfName hightLight = annotation.getAsName(PdfName.H);
            if (hightLight == null)
                hightLight = PdfAnnotation.HIGHLIGHT_INVERT;

            PdfDictionary actionDict = annotation.getAsDict(PdfName.A);
            if (actionDict == null || !PdfName.URI.equals(actionDict.getAsName(PdfName.S)))
                continue;
            PdfString urlPdfString = actionDict.getAsString(PdfName.URI);
            if (urlPdfString == null)
                continue;
            PdfAction action = new PdfAction(urlPdfString.toString());

            PdfAnnotation link = PdfAnnotation.createLink(stamper.getWriter(), rectangle, hightLight, action);
            stamper.addAnnotation(link, targetPage);
        }
    }
}

which you can call right after inserting the original page:

        PdfImportedPage page = stamper.getImportedPage(titleReader, 1);
        stamper.insertPage(1, titleReader.getPageSize(1));
        PdfContentByte content = stamper.getUnderContent(1);
        content.addTemplate(page, 0, 0);
        copyLinks(stamper, 1, titleReader, 1);

Beware, this method is really simple. It only considers links with URI actions and creates a link on the target page using the same location, target, and highlight setting as the original one. If the original one uses more refined features (e.g. if it brings along its own appearance streams or even merely uses the border style attributes) and you want to keep these features, you have to improve the method to also copy the entries for these features to the new annotation.

Upvotes: 2

Related Questions