Reputation: 31
Is there any equivalent method for getJavascript in pdf reader in itext7? we are looking for sanitizing the pdf document for malicious content using itext7.
Upvotes: 2
Views: 1999
Reputation: 21
I know this is a very old thread, still sharing my code to add couple of things, on top of what mkl and James wrote.
// iText V 7.0.2
//To get javascript that is added through OpenAction
PdfDocument srcPdf = new PdfDocument(new PdfReader(srcFilePath));
PdfDictionary pdfDictionaryCatalog = srcPdf.getCatalog().getPdfObject();
PdfDictionary namesDictionary = pdfDictionaryCatalog.getAsDictionary(PdfName.OpenAction);
if(namesDictionary != null) {
PdfObject pdfObj = namesDictionary.get(PdfName.JS);
if(pdfObj != null) {
StringBuilder strBuf = new StringBuilder();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString()) {
strBuf.append(((PdfString) pdfObj).getValue());
}
System.out.println("*****OPENACTION****** "+strBuf.toString());
}
}
// To get java script available from NAMES dictionary
namesDictionary = pdfDictionaryCatalog.getAsDictionary(PdfName.Names);
if(namesDictionary != null) {
PdfDictionary javascriptDictionary = namesDictionary.getAsDictionary(PdfName.JavaScript);
if(javascriptDictionary != null) {
StringBuilder strBuf = new StringBuilder();
Set<Entry<PdfName, PdfObject>> set = javascriptDictionary.entrySet();
for (Entry<PdfName, PdfObject> entry : set) {
PdfObject pdfObj = entry.getValue();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString() && entry.getKey().getValue().equals(PdfName.JS.getValue())) {
strBuf.append(((PdfString) pdfObj).getValue());
}
}
System.out.println("*****JAVASCRIPT****** "+strBuf.toString());
}
}
// To get java script from name tree JAVASCRIPT
PdfNameTree nameTree = srcPdf.getCatalog().getNameTree(PdfName.JavaScript);
if(nameTree != null) {
Map<String, PdfObject> objs = nameTree.getNames();
if(objs != null) {
StringBuilder strBuf = new StringBuilder();
for (Entry<String, PdfObject> entry : objs.entrySet()) {
PdfObject pdfObj = entry.getValue();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString() && entry.getKey().equals(PdfName.JS.getValue())) {
strBuf.append(((PdfString) pdfObj).getValue());
}
}
System.out.println("*****JAVASCRIPT NAMED TREE****** "+strBuf.toString());
}
}
// To get java script at the annotation action level
for (int i = 1; i <= srcPdf.getNumberOfPages(); i++) {
PdfPage page = srcPdf.getPage(i);
List<PdfAnnotation> annotList = page.getAnnotations();
if(annotList != null) {
for (PdfAnnotation pdfAnnotation : annotList) {
if(pdfAnnotation.getPdfObject() != null) {
PdfDictionary annotationAction = pdfAnnotation.getPdfObject().getAsDictionary(PdfName.A);
if (annotationAction != null && PdfName.JavaScript.equals(annotationAction.get(PdfName.S))) {
PdfString javascript = annotationAction.getAsString(PdfName.JS);
if(javascript != null) {
System.out.println("ANNOTATION "+javascript);
}
}
}
}
}
}
/*getJavaScriptFromPdfDictionary() and getJavaScriptFromPdfArray() methods are the same from James answer. */
Upvotes: 1
Reputation: 133
For anyone able to use iText 7.1.1 or newer, I expect the solution by mkl to be better. If you are forced to use iText 7.0.5 like I was, the following worked for my reference PDF:
private static boolean hasJavascript(PdfDocument pdfDoc, String theFile) {
int n = pdfDoc.getNumberOfPages();
for (int i = 1; i <= n; i++) {
PdfPage pdfPage = pdfDoc.getPage(i);
List<PdfAnnotation> annotList = pdfPage.getAnnotations();
if (ListUtility.hasData(annotList)) {
for (PdfAnnotation annot : annotList) {
if (annot.getSubtype().equals(PdfName.Link)) {
continue;
}
PdfDictionary annotationAction = annot.getPdfObject().getAsDictionary(PdfName.A);
if (annotationAction != null && PdfName.JavaScript.equals(annotationAction.get(PdfName.S))) {
PdfString javascript = annotationAction.getAsString(PdfName.JS);
if (StringUtility.hasData(javascript.getValue())) {
log.debug("JavaScript found in PDF on page " + i);
log.trace(javascript.getValue());
return true;
}
}
}
}
}
String javaScriptInPdf = getJavaScriptFromPdfDocument(pdfDoc);
if (StringUtility.hasData(javaScriptInPdf)) {
log.debug("JavaScript found using iText 7");
log.trace(javaScriptInPdf);
return true;
}
log.debug("JavaScript not found in PDF");
return false;
}
//
private static String getJavaScriptFromPdfDocument(PdfDocument pdfDoc) {
StringBuilder strBuf = new StringBuilder();
try {
PdfDictionary pdfDictionaryCatalog = pdfDoc.getCatalog().getPdfObject();
if (pdfDictionaryCatalog == null) {
log.trace("getJavaScriptFromPdfDocument(): pdfDictionaryCatalog null; return null");
return null;
}
PdfDictionary pdfDictionaryNames = pdfDictionaryCatalog.getAsDictionary(PdfName.Names);
if (pdfDictionaryNames == null) {
log.trace("getJavaScriptFromPdfDocument(): PdfDictionary for PdfName.Names null; return null");
return null;
}
PdfDictionary pdfDictionaryJavaScript = pdfDictionaryNames.getAsDictionary(PdfName.JavaScript);
if (pdfDictionaryJavaScript == null) {
log.trace("getJavaScriptFromPdfDocument(): PdfDictionary for PdfName.JavaScript null; return null");
return null;
}
Set<Entry<PdfName, PdfObject>> set = pdfDictionaryJavaScript.entrySet();
for (Entry<PdfName, PdfObject> pdfObjectEntry : set) {
PdfObject pdfObj = pdfObjectEntry.getValue();
if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
} else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
} else if (pdfObj.isString() && pdfObjectEntry.getKey().getValue().equals(PdfName.JS.getValue())) {
strBuf.append(((PdfString) pdfObj).getValue());
}
}
}
catch (Exception e) {
log.debug(e,e);
}
return strBuf.toString();
}
//
private static void getJavaScriptFromPdfArray(PdfArray pdfArray, StringBuilder strBuf) {
if (pdfArray == null) {
return;
}
for (PdfObject pdfObj : pdfArray) {
// To get same output as getJavaScriptUsingiText559(), not appending String values found in array to strBuf
if (pdfObj == null) {
continue;
}
else if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary)pdfObj, strBuf);
}
else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
}
}
}
//
private static void getJavaScriptFromPdfDictionary(PdfDictionary pdfDict, StringBuilder strBuf) {
if (pdfDict == null) {
return;
}
PdfObject pdfObj = pdfDict.get(PdfName.JS);
if (pdfObj == null) {
return;
}
if (pdfObj.isString()) {
strBuf.append(((PdfString) pdfObj).getValue());
}
else if (pdfObj.isStream()) {
strBuf.append(getStringFromPdfStream((PdfStream) pdfObj, TRUNCATE_PDF_STREAM_AT));
}
else if (pdfObj.isDictionary()) {
getJavaScriptFromPdfDictionary((PdfDictionary) pdfObj, strBuf);
}
else if (pdfObj.isArray()) {
getJavaScriptFromPdfArray((PdfArray)pdfObj, strBuf);
}
}
Upvotes: 2
Reputation: 95918
As far as I can see there is not a dedicated method for that in iText 7.
Essentially, though, the old PdfReader.getJavaScript()
method merely looked for the JavaScript name tree and put all the values into a string buffer.
You can output these values like this in iText 7:
PdfNameTree javascript = pdfDocument.getCatalog().getNameTree(PdfName.JavaScript);
Map<String, PdfObject> objs2 = javascript.getNames();
for (Map.Entry<String, PdfObject> entry : objs2.entrySet())
{
System.out.println();
System.out.println(entry.getKey());
System.out.println();
PdfObject object = entry.getValue();
if (object.isDictionary()) {
object = ((PdfDictionary)object).get(PdfName.JS);
if (object.isString()) {
System.out.println(((PdfString)object).getValue());
} else if (object.isStream()) {
System.out.println(new String(((PdfStream)object).getBytes()));
}
}
System.out.println();
}
(ShowDocumentLevelJavaScript test testREJECT_ContainsJavaScript
)
Obviously you can in a similar manner collect the pieces of JavaScript into some string buffer.
In a comment James claimed
I tried using (and extending) your answer but cannot detect the JavaScript popup that fires when I open a sample PDF
Applying the above code to the PDF file provided by @James I get the output:
e.pdf Freeware Hinweis
if (app.viewerVersion>=5)
{
var result=app.alert(
"Diese Datei wurde mit der Freeware Version von CIB e.pdf erzeugt.\n\nMöchten Sie nähere Informationen?"
, 3
, 2
, "e.pdf Freeware Hinweis"
);
if (result==4)
getURL("http://www.cib.de/deutsch/products/pdfplugin/epdfbeta.asp", false);
}
The JavaScript popup can clearly be seen as an app.alert
call here. Thus, I cannot reproduce the issue.
Upvotes: 3