Harpreet
Harpreet

Reputation: 63

How to get footnote hyperlink while reading a Word document using Apache POI?

I am using Apache POI to convert a Word document to HTML. I have a Word document that has a footnote which includes an external hyperlink. I am not able to get the hyperlink URL for that hyperlink. Here is my code:

List<CTHyperlink> links = paragraph.getCTP().getHyperlinkList();
log.debug("Count of hyperlinks="+links.size());
for (CTHyperlink ctHyperlink : links) {
   String rId = ctHyperlink.getId();
   log.debug("rid="+rId);
   XWPFHyperlink link = document.getHyperlinkByID(rId);
   if(link!=null) {
      log.debug("link not NULL");
   }else {
      log.debug("link is NULL");
   }
}

From the above code, I see that in my case, the count of hyperlinks is 2. I am getting the rId correctly as "rId1" and "rId2" but link is always coming as NULL.

In the OOXML, I see that the hyperlinks in the document are stored in package name "/word/_rels/document.xml.rels" while hyperlinks in the footnote are stored in the package name "/word/_rels/footnotes.xml.rels". Probably that is the reason why my link variable is coming as NULL. But I am not sure how to get the hyperlink element from the footnote relationship package.

Upvotes: 0

Views: 658

Answers (1)

Axel Richter
Axel Richter

Reputation: 61852

You are correct. If the paragraph in your code snippet is in a XWPFAbstractFootnoteEndnote then it is in package part /word/footnotes.xml or /word/endnotes.xml and not in /word/document.xml. And XWPFDocument.getHyperlinkByID only gets the hyperlinks stored in /word/document.xml.

The solution depends on where the paragraph in your code snippet is coming from. This you are not showing.

But simplest solution would be to get the XWPFHyperlinkRun from the XWPFParagraph and then get the XWPFHyperlink from that XWPFHyperlinkRun. If the parent package part of the XWPFHyperlinkRun is not the XWPFDocument then this must be done using underlying PackageRelationship since a hyperlink list only exists for XWPFDocument until now.

In Unable to read all content in order of a word document (docx) in Apache POI I have shown a basic example for how to traverse a Worddocument. This code I have extended now to traverse footnotes and endnotes as well as headers and footers and to handle found XWPFHyperlinkRuns.

Example:

import java.io.FileInputStream;

import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.apache.poi.openxml4j.opc.PackageRelationship;

import java.util.List;

public class WordTraverseAll {
    
 static void traversePictures(List<XWPFPicture> pictures) throws Exception {
  for (XWPFPicture picture : pictures) {
   System.out.println(picture);
   XWPFPictureData pictureData = picture.getPictureData();
   System.out.println(pictureData);
  }
 }

 static void traverseComments(XWPFRun run) throws Exception {
  CTMarkup comr = null;
  if (run.getCTR().getCommentReferenceList().size() > 0) {
   comr = run.getCTR().getCommentReferenceList().get(0);
  }
  if (comr != null) {
   XWPFComment comment = run.getDocument().getCommentByID(String.valueOf(comr.getId().intValue())); 
   System.out.println("Comment from " + comment.getAuthor() + ": " + comment.getText());   
  }
 }

 static void traverseFootnotes(XWPFRun run) throws Exception {
  CTFtnEdnRef ftn = null;
  if (run.getCTR().getFootnoteReferenceList().size() > 0) {
   ftn = run.getCTR().getFootnoteReferenceList().get(0);
  } else if (run.getCTR().getEndnoteReferenceList().size() > 0) {
   ftn = run.getCTR().getEndnoteReferenceList().get(0);   
  }
  if (ftn != null) {
   XWPFAbstractFootnoteEndnote footnote =
    ftn.getDomNode().getLocalName().equals("footnoteReference") ?
     run.getDocument().getFootnoteByID(ftn.getId().intValue()) :
     run.getDocument().getEndnoteByID(ftn.getId().intValue());
   for (XWPFParagraph paragraph : footnote.getParagraphs()) {
    traverseRunElements(paragraph.getIRuns());   
   }
  } 
 }
 
 static void traverseRunElements(List<IRunElement> runElements) throws Exception {
  for (IRunElement runElement : runElements) {
   if (runElement instanceof XWPFFieldRun) {
    XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
    //System.out.println(fieldRun.getClass().getName());
    System.out.println(fieldRun);
    traversePictures(fieldRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFHyperlinkRun) {
    XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
    //System.out.println(hyperlinkRun.getClass().getName());
    String rId = hyperlinkRun.getHyperlinkId();
    XWPFHyperlink hyperlink = null;
    if (hyperlinkRun.getParent().getPart() instanceof XWPFAbstractFootnotesEndnotes) {
     PackageRelationship rel = hyperlinkRun.getParent().getPart().getPackagePart().getRelationships().getRelationshipByID(rId);
     hyperlink = new XWPFHyperlink(rId, rel.getTargetURI().toString()); 
    } else if (hyperlinkRun.getParent().getPart() instanceof XWPFHeaderFooter) {
     PackageRelationship rel = hyperlinkRun.getParent().getPart().getPackagePart().getRelationships().getRelationshipByID(rId);
     hyperlink = new XWPFHyperlink(rId, rel.getTargetURI().toString()); 
    } else if (hyperlinkRun.getParent().getPart() instanceof XWPFDocument) {
     hyperlink = hyperlinkRun.getDocument().getHyperlinkByID(rId);
    }
    System.out.print(hyperlinkRun);
    if (hyperlink != null) System.out.println("->" + hyperlink.getURL());
    traversePictures(hyperlinkRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFRun) {
    XWPFRun run = (XWPFRun)runElement;
    //System.out.println(run.getClass().getName());
    System.out.println(run);
    traverseFootnotes(run);
    traverseComments(run);
    traversePictures(run.getEmbeddedPictures());
   } else if (runElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)runElement;
    System.out.println(sDT);
    System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   }
  }
 }

 static void traverseTableCells(List<ICell> tableICells) throws Exception {
  for (ICell tableICell : tableICells) {
   if (tableICell instanceof XWPFSDTCell) {
    XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
    System.out.println(sDTCell);
    //ToDo: The SDTCell may have traversable content too.
   } else if (tableICell instanceof XWPFTableCell) {
    XWPFTableCell tableCell = (XWPFTableCell)tableICell;
    //System.out.println(tableCell);
    traverseBodyElements(tableCell.getBodyElements());
   }
  }
 }

 static void traverseTableRows(List<XWPFTableRow> tableRows) throws Exception {
  for (XWPFTableRow tableRow : tableRows) {
   //System.out.println(tableRow);
   traverseTableCells(tableRow.getTableICells());
  }
 }

 static void traverseBodyElements(List<IBodyElement> bodyElements) throws Exception {
  for (IBodyElement bodyElement : bodyElements) {
   if (bodyElement instanceof XWPFParagraph) {
    XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
    //System.out.println(paragraph);
    traverseRunElements(paragraph.getIRuns());
   } else if (bodyElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)bodyElement;
    System.out.println(sDT);
    System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   } else if (bodyElement instanceof XWPFTable) {
    XWPFTable table = (XWPFTable)bodyElement;
    //System.out.println(table);
    traverseTableRows(table.getRows());
   }
  }
 }
 
 static void traverseHeaderFooterElements(XWPFDocument document) throws Exception {
  for (XWPFHeader header : document.getHeaderList()) {
   traverseBodyElements(header.getBodyElements());
  }   
  for (XWPFFooter footer : document.getFooterList()) {
   traverseBodyElements(footer.getBodyElements());
  }   
 }
    
 public static void main(String[] args) throws Exception {

  XWPFDocument document = new XWPFDocument(new FileInputStream("WordHavingHyperlinks.docx"));

  System.out.println("===== Document body elements =====");
  traverseBodyElements(document.getBodyElements());
  
  System.out.println("===== Header and footer elements =====");
  traverseHeaderFooterElements(document);
   
  document.close();

 }
}

Upvotes: 1

Related Questions