Tom Grundy
Tom Grundy

Reputation: 826

How to generate searchable PDF using reportlab?

Here's some code that generates pdfs and has been in stable use for a few years - however, I just noticed that the generated pdf is not searchable in acrobat reader. How can I make the generated pdf searchable?

Notice that the element containing the content to be searched is a table - maybe that's the hitch?

from reportlab.lib import colors,utils
from reportlab.lib.pagesizes import letter,landscape,portrait
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Image, Spacer
from reportlab.lib.styles import getSampleStyleSheet,ParagraphStyle
from reportlab.lib.units import inch

... ...

        doc = SimpleDocTemplate(pdfName, pagesize=landscape(letter),leftMargin=0.5*inch,rightMargin=0.5*inch,topMargin=1.03*inch,bottomMargin=0.5*inch) # or pagesize=letter
#       self.logMsgBox.show()
#       QTimer.singleShot(5000,self.logMsgBox.close)
        QCoreApplication.processEvents()
        elements=[]
        for team in teamFilterList:
            extTeamNameLower=getExtTeamName(team).lower()
            radioLogPrint=[]
            styles = getSampleStyleSheet()
            styles.add(ParagraphStyle(
                name='operator',
                parent=styles['Normal'],
                backColor='lightgrey'
                ))
            headers=MyTableModel.header_labels[0:6]
            if self.useOperatorLogin:
                operatorImageFile=os.path.join(iconsDir,'user_icon_80px.png')
                if os.path.isfile(operatorImageFile):
                    rprint('operator image file found: '+operatorImageFile)
                    headers.append(Image(operatorImageFile,width=0.16*inch,height=0.16*inch))
                else:
                    rprint('operator image file not found: '+operatorImageFile)
                    headers.append('Op.')
            radioLogPrint.append(headers)
##          if teams and opPeriod==1: # if request op period = 1, include 'Radio Log Begins' in all team tables
##              radioLogPrint.append(self.radioLog[0])
            entryOpPeriod=1 # update this number when 'Operational Period <x> Begins' lines are found
##          hits=False # flag to indicate whether this team has any entries in the requested op period; if not, don't make a table for this team
            for row in self.radioLog:
                opStartRow=False
##              rprint("message:"+row[3]+":"+str(row[3].split()))
                if row[3].startswith("Radio Log Begins:"):
                    opStartRow=True
                if row[3].startswith("Operational Period") and row[3].split()[3] == "Begins:":
                    opStartRow=True
                    entryOpPeriod=int(row[3].split()[2])
                # #523: handled continued incidents
                if row[3].startswith('Radio Log Begins - Continued incident'):
                    opStartRow=True
                    entryOpPeriod=int(row[3].split(': Operational Period ')[1].split()[0])
##              rprint("desired op period="+str(opPeriod)+"; this entry op period="+str(entryOpPeriod))
                if entryOpPeriod == opPeriod:
                    if team=="" or extTeamNameLower==getExtTeamName(row[2]).lower() or opStartRow: # filter by team name if argument was specified
                        style=styles['Normal']
                        if 'RADIO OPERATOR LOGGED IN' in row[3]:
                            style=styles['operator']
                        printRow=[row[0],row[1],row[2],Paragraph(row[3],style),Paragraph(row[4],styles['Normal']),Paragraph(row[5],styles['Normal'])]
                        if self.useOperatorLogin:
                            if len(row)>10:
                                printRow.append(row[10])
                            else:
                                printRow.append('')
                        radioLogPrint.append(printRow)
##                      hits=True
            if not teams:
                # #523: avoid exception 
                try:
                    radioLogPrint[1][4]=self.datum
                except:
                    rprint('Nothing to print for specified operational period '+str(opPeriod))
                    return
            rprint("length:"+str(len(radioLogPrint)))
            if not teams or len(radioLogPrint)>2: # don't make a table for teams that have no entries during the requested op period
                if self.useOperatorLogin:
                    colWidths=[x*inch for x in [0.5,0.6,1.25,5.2,1.25,0.9,0.3]]
                else:
                    colWidths=[x*inch for x in [0.5,0.6,1.25,5.5,1.25,0.9]]
                t=Table(radioLogPrint,repeatRows=1,colWidths=colWidths)
                t.setStyle(TableStyle([('FONT',(0,0),(-1,-1),'Helvetica'),
                                        ('FONT',(0,0),(-1,1),'Helvetica-Bold'),
                                        ('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
                                     ('BOX', (0,0), (-1,-1), 2, colors.black),
                                      ('BOX', (0,0), (-1,0), 2, colors.black)]))
                elements.append(t)
                if teams and team!=teamFilterList[-1]: # don't add a spacer after the last team - it could cause another page!
                    elements.append(Spacer(0,0.25*inch))
        doc.build(elements,onFirstPage=functools.partial(self.printLogHeaderFooter,opPeriod=opPeriod,teams=teams),onLaterPages=functools.partial(self.printLogHeaderFooter,opPeriod=opPeriod,teams=teams))
#       self.logMsgBox.setInformativeText("Finalizing and Printing...")
        self.printPDF(pdfName)

... ...

def printPDF(self,pdfName):
    try:
        win32api.ShellExecute(0,"print",pdfName,'/d:"%s"' % win32print.GetDefaultPrinter(),".",0)
    except Exception as e:
        estr=str(e)

... ...

Upvotes: 0

Views: 89

Answers (1)

Tom Grundy
Tom Grundy

Reputation: 826

Thanks Marijn, that was the ticket, my mistake: the pdf generated by reportlab >is< searchable already (from the .build call at the end of the code); the pdf subsequently generated by windows print to pdf is not searchable. Interesting, but, this particular question is solved.

Upvotes: 0

Related Questions