Reputation: 136
from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf','/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
with open(file_name, 'rb') as f:
merger.append(f)
merger.write("result.pdf")
While merger 2 pdf by python code I got Error Unexpected destination '/__WKANCHOR_2' and I working with following code, please provide me solution
Upvotes: 10
Views: 5054
Reputation: 2444
Deval
You can simply concatenate files by using the append method.You can use PyPdf2s PdfMerger class for merging pdf with simple File Concatenation. Check the simple example without any patch :
from PyPDF2 import PdfFileMerger
pdf_files = ['pdf1.pdf', 'pdf2.pdf']
merger = PdfFileMerger()
for pdf_file in pdf_files:
merger.append(pdf_file)
merger.write("merge_pdf.pdf")
merger.close()
On the provided link you can find the many more option surround the pdf which can help you to achieve more preciously.
Thanks
Upvotes: -1
Reputation: 131
If the method @Tonechas mentions doesn't work for you, try the method @hannal mentions on GitHub. I implemented it into a separate file that I import like so:
from __pypdf2_fix import NewPdfFileReader as PdfFileReader, NewPdfFileMerger as PdfFileMerger
The file:
from PyPDF2 import PdfFileReader, PdfFileMerger
from PyPDF2.pdf import ArrayObject, NameObject
from PyPDF2.utils import isString
from PyPDF2.merger import _MergedPage
from io import BytesIO
from io import FileIO as file
StreamIO = BytesIO
class NewPdfFileReader(PdfFileReader):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def _newBuildOutline(self, node):
dest, title, outline = None, None, None
if "/A" in node and "/Title" in node:
# Action, section 8.5 (only type GoTo supported)
title = node["/Title"]
action = node["/A"]
if action["/S"] == "/GoTo":
dest = action["/D"]
elif "/Dest" in node and "/Title" in node:
# Destination, section 8.2.1
title = node["/Title"]
dest = node["/Dest"]
# if destination found, then create outline
if dest:
if isinstance(dest, ArrayObject):
outline = self._buildDestination(title, dest)
elif isString(dest) and dest in self._namedDests:
outline = self._namedDests[dest]
outline[NameObject("/Title")] = title
elif isinstance(dest, NameObject):
pass
else:
raise utils.PdfReadError("Unexpected destination %r" % dest)
return outline
NewPdfFileReader._buildOutline = _newBuildOutline
class NewPdfFileMerger(PdfFileMerger):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def newMerge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
"""
Merges the pages from the given file into the output file at the
specified page number.
:param int position: The *page number* to insert this file. File will
be inserted after the given number.
:param fileobj: A File Object or an object that supports the standard read
and seek methods similar to a File Object. Could also be a
string representing a path to a PDF file.
:param str bookmark: Optionally, you may specify a bookmark to be applied at
the beginning of the included file by supplying the text of the bookmark.
:param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
to merge only the specified range of pages from the source
document into the output document.
:param bool import_bookmarks: You may prevent the source document's bookmarks
from being imported by specifying this as ``False``.
"""
# This parameter is passed to self.inputs.append and means
# that the stream used was created in this method.
my_file = False
# If the fileobj parameter is a string, assume it is a path
# and create a file object at that location. If it is a file,
# copy the file's contents into a BytesIO (or StreamIO) stream object; if
# it is a PdfFileReader, copy that reader's stream into a
# BytesIO (or StreamIO) stream.
# If fileobj is none of the above types, it is not modified
decryption_key = None
if isString(fileobj):
fileobj = file(fileobj, 'rb')
my_file = True
elif isinstance(fileobj, file):
fileobj.seek(0)
filecontent = fileobj.read()
fileobj = StreamIO(filecontent)
my_file = True
elif isinstance(fileobj, PdfFileReader):
orig_tell = fileobj.stream.tell()
fileobj.stream.seek(0)
filecontent = StreamIO(fileobj.stream.read())
fileobj.stream.seek(orig_tell) # reset the stream to its original location
fileobj = filecontent
if hasattr(fileobj, '_decryption_key'):
decryption_key = fileobj._decryption_key
my_file = True
# Create a new PdfFileReader instance using the stream
# (either file or BytesIO or StringIO) created above
pdfr = NewPdfFileReader(fileobj, strict=self.strict)
if decryption_key is not None:
pdfr._decryption_key = decryption_key
# Find the range of pages to merge.
if pages == None:
pages = (0, pdfr.getNumPages())
elif isinstance(pages, PageRange):
pages = pages.indices(pdfr.getNumPages())
elif not isinstance(pages, tuple):
raise TypeError('"pages" must be a tuple of (start, stop[, step])')
srcpages = []
if bookmark:
bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
outline = []
if import_bookmarks:
outline = pdfr.getOutlines()
outline = self._trim_outline(pdfr, outline, pages)
if bookmark:
self.bookmarks += [bookmark, outline]
else:
self.bookmarks += outline
dests = pdfr.namedDestinations
dests = self._trim_dests(pdfr, dests, pages)
self.named_dests += dests
# Gather all the pages that are going to be merged
for i in range(*pages):
pg = pdfr.getPage(i)
id = self.id_count
self.id_count += 1
mp = _MergedPage(pg, pdfr, id)
srcpages.append(mp)
self._associate_dests_to_pages(srcpages)
self._associate_bookmarks_to_pages(srcpages)
# Slice to insert the pages at the specified position
self.pages[position:position] = srcpages
# Keep track of our input files so we can close them later
self.inputs.append((fileobj, pdfr, my_file))
NewPdfFileMerger.merge = newMerge
Upvotes: 1
Reputation: 342
This is a temporary fix, when you pass in the file in the append method, pass in import_bookmarks=False
. This works for me
from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf', '/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
with open(file_name, 'rb') as f:
merger.append(f, import_bookmarks=False )
merger.write("result.pdf")
Upvotes: 22