timmydodger
timmydodger

Reputation: 101

how to create bookmarks in a word document, then create internal hyperlinks to the bookmark w/ python

I have written a script using python-docx to search word documents (by searching the runs) for reference numbers and technical key words, then create a table which summarizes the search results which is appended to the end of the word document.

some of the documents are 100+ pages, so I want to make it easier for the user by creating internal hyperlinks in the search result table, so it will bring you to the location in the document where the search result was detected.

once a reference run is found, I don't know how to mark it as a bookmark or how to create a hyperlink to that bookmark in the results table.

I was able to create bookmarks to external urls using the code in this page Adding an hyperlink in MSWord by using python-docx

I have also tried creating bookmarks, I found this page: https://github.com/python-openxml/python-docx/issues/109

the title relates to creating bookmarks, but the code seems to generate figures in word.

I feel like the two solutions can be put together, but I don't have enough understanding of xml/ word docs to be able to do it.

Update: I found some code that will add bookmarks to a word document, what is now needed is a way to link to this using a link in the word document https://github.com/python-openxml/python-docx/issues/403

*from docx import Document

def add_bookmark(paragraph, bookmark_text, bookmark_name):
    run = paragraph.add_run()
    tag = run._r  # for reference the following also works: tag =  document.element.xpath('//w:r')[-1]
    start = docx.oxml.shared.OxmlElement('w:bookmarkStart')
    start.set(docx.oxml.ns.qn('w:id'), '0')
    start.set(docx.oxml.ns.qn('w:name'), bookmark_name)
    tag.append(start)

    text = docx.oxml.OxmlElement('w:r')
    text.text = bookmark_text
    tag.append(text)

    end = docx.oxml.shared.OxmlElement('w:bookmarkEnd')
    end.set(docx.oxml.ns.qn('w:id'), '0')
    end.set(docx.oxml.ns.qn('w:name'), bookmark_name)
    tag.append(end)


doc = Document("test_input_1.docx")

# add a bookmakr to every paragraph
for paranum, paragraph in enumerate(doc.paragraphs):
    add_bookmark(paragraph=paragraph, bookmark_text=f"temp{paranum}", bookmark_name=f"temp{paranum+1}")
doc.save("output.docx")*

Upvotes: 3

Views: 6504

Answers (3)

caram
caram

Reputation: 1719

Here is a version inspired by @timmydoger, based on SubElement:

from docx.oxml.ns import qn
from lxml.etree import SubElement

def add_bookmark(paragraph, bookmark_text, bookmark_name):
    run = paragraph.add_run()
    r = run._r
    
    SubElement(r, qn('w:bookmarkStart'), {
        qn('w:id'): '0',
        qn('w:name'): bookmark_name,
    })

    SubElement(r, qn('w:r'), {
        qn('w:t'): bookmark_text,
    })

    SubElement(r, qn('w:bookmarkEnd'), {
        qn('w:id'): '0',
        qn('w:name'): bookmark_name,
    })

def add_hyperlink(paragraph, url, fragment, text):    
    part = paragraph.part
    r_id = part.relate_to(
        url, RELATIONSHIP_TYPE.HYPERLINK, is_external=True
    )

    hyperlink = SubElement(paragraph._p, qn('w:hyperlink'), {
        qn('r:id'): r_id,
        qn('w:anchor'): fragment,
        qn('w:history'): '1',
    })

    r = SubElement(hyperlink, qn('w:r'))
    rPr = SubElement(r, qn('w:rPr'))
    rStyle = SubElement(rPr, qn('w:rStyle'), {
        qn('w:val'): 'Hyperlink',
    })
    r.text = text

Upvotes: 0

Abd_Allah
Abd_Allah

Reputation: 1

The previous solution doesn't work with me on Libreoffice (6.4).

After checking the xml of 2 documents, with bookmark and without, also after checking this: http://officeopenxml.com/WPbookmark.php, we can see that:

For Bookmark The solution is to add the bookmark in the paragraph not in a run. so in this line:

tag = run._r  # for reference the following also works: tag =  document.element.xpath('//w:r')[-1]

you should change the "r" to "p" in "('//w:r')" :

tag = doc.element.xpath('//w:p')[-1]

and then it will work

For Link, you have to make the same thing, here the function:

def add_link(paragraph, link_to, text, tool_tip=None):
    # create hyperlink node
    hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')

    # set attribute for link to bookmark
    hyperlink.set(docx.oxml.shared.qn('w:anchor'), link_to,)

    if tool_tip is not None:
        # set attribute for link to bookmark
        hyperlink.set(docx.oxml.shared.qn('w:tooltip'), tool_tip,)

    new_run = docx.oxml.shared.OxmlElement('w:r')
# here to change the font color, and add underline
    rPr = docx.oxml.shared.OxmlElement('w:rPr')
    c = docx.oxml.shared.OxmlElement('w:color')
    c.set(docx.oxml.shared.qn('w:val'), '2A6099')
    rPr.append(c)
    u = docx.oxml.shared.OxmlElement('w:u')
    u.set(docx.oxml.shared.qn('w:val'), 'single')
    rPr.append(u)
#
    new_run.append(rPr)
    new_run.text = text
    hyperlink.append(new_run)
    paragraph._p.append(hyperlink)  # this to add the link in the w:p
# this is wrong:
    # r = paragraph.add_run()
    # r._r.append(hyperlink)  
    # r.font.name = "Calibri"
    # r.font.color.theme_color = MSO_THEME_COLOR_INDEX.HYPERLINK
    # r.font.underline = True

Upvotes: 0

timmydodger
timmydodger

Reputation: 101

Solved: I got it from this post adding hyperlink to a bookmark

this is the key line

hyperlink.set(docx.oxml.shared.qn('w:anchor'), link_to,)

As a bonus I have added in the ability to add a tool tip to your link:

enjoy

here is the answer:

from docx import Document
import docx
from docx.enum.dml import MSO_THEME_COLOR_INDEX

def add_bookmark(paragraph, bookmark_text, bookmark_name):
    run = paragraph.add_run()
    tag = run._r
    start = docx.oxml.shared.OxmlElement('w:bookmarkStart')
    start.set(docx.oxml.ns.qn('w:id'), '0')
    start.set(docx.oxml.ns.qn('w:name'), bookmark_name)
    tag.append(start)

    text = docx.oxml.OxmlElement('w:r')
    text.text = bookmark_text
    tag.append(text)

    end = docx.oxml.shared.OxmlElement('w:bookmarkEnd')
    end.set(docx.oxml.ns.qn('w:id'), '0')
    end.set(docx.oxml.ns.qn('w:name'), bookmark_name)
    tag.append(end)

def add_link(paragraph, link_to, text, tool_tip=None):
    # create hyperlink node
    hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')

    # set attribute for link to bookmark
    hyperlink.set(docx.oxml.shared.qn('w:anchor'), link_to,)

    if tool_tip is not None:
        # set attribute for link to bookmark
        hyperlink.set(docx.oxml.shared.qn('w:tooltip'), tool_tip,)

    new_run = docx.oxml.shared.OxmlElement('w:r')
    rPr = docx.oxml.shared.OxmlElement('w:rPr')
    new_run.append(rPr)
    new_run.text = text
    hyperlink.append(new_run)
    r = paragraph.add_run()
    r._r.append(hyperlink)
    r.font.name = "Calibri"
    r.font.color.theme_color = MSO_THEME_COLOR_INDEX.HYPERLINK
    r.font.underline = True

# test the functions
if __name__ == "__main__":

    # input test document
    doc = Document(r"test_input_1.docx")

    # add a bookmark to every paragraph
    for paranum, paragraph in enumerate(doc.paragraphs):
        add_bookmark(paragraph=paragraph,
                     bookmark_text=f"{paranum}", bookmark_name=f"temp{paranum+1}")

    # add page to the end to put your link
    doc.add_page_break()
    paragraph = doc.add_paragraph("This is where the internal link will live")

    # add a link to the first paragraph
    add_link(paragraph=paragraph, link_to="temp0",
             text="this is a link to ", tool_tip="your message here")


    doc.save(r"output.docx")

Upvotes: 7

Related Questions