Reputation: 129
I am having some TextBoxes ( Shape > Text Box) inside a word document. The document is a CV template which includes a lot of them. I would like to select all Textboxes of the document, extract text, remove the text boxes and inject the extracted text. I have tried
const range = context.document.getSelection();
range.load("text");
and then sync the context so that I can get the text.
Upvotes: 1
Views: 1107
Reputation: 129
I finally took the following workaround. It is and fast and works nicely in both Windows & macOS
Get OOXML of the document's body
Parse OOXL.value & Generate an xmlDocument (xmlDoc)
Detect existing Textboxes & Shapes that contain text: getElementsByTagName("wps:wsp")
Extract text from (3)
Generate a simple xml TextElement with text extracted
Replace (3) with (5)
Serialize to xmlString the updated xmlDoc and get the updated OOXML.value
Insert updated OOXML.value to document replacing the existing one
Word.run(function (context) {
//Select document body and extract OOXML
const body = context.document.body;
const ooxml = body.getOoxml();
return context.sync().then(function () {
//Initialize DOM Parser
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(ooxml.value, "text/xml");
//Get all runs
const rows = xmlDoc.getElementsByTagName("w:r");
for (let j = 0; j < rows.length; j++) {
const row = rows[j];
const rowHasTextBox = row.getElementsByTagName("wps:txbx").length > 0;
//If no textbox, shape, wordart exists skip current run
if (!rowHasTextBox) continue;
//Select textbox, shape, wordart and get paragraphs
const textboxContainer = row.getElementsByTagName("wps:txbx")[0];
const paragraphs = textboxContainer.getElementsByTagName("w:p");
// Create a new run which will replace the existing run
const newRow = xmlDoc.createElement("w:r");
const breakLine = xmlDoc.createElement("w:br");
//Append breakline and "{{"
newRow.appendChild(breakLine);
newRow.appendChild(startRow);
for (let p = 0; p < paragraphs.length; p++) {
//Check whether paragrapj has text
const paragraphHasText = paragraphs[p].getElementsByTagName("w:t").length > 0;
if (!paragraphHasText) continue;
//Extract text
let textExtracted = "";
const textBoxTexts = paragraphs[p].getElementsByTagName("w:t");
for (let k = 0; k < textBoxTexts.length; k++) {
const textBoxText = textBoxTexts[k].innerHTML;
textExtracted = textExtracted + textBoxText;
textExtracted = textExtracted + " ";
}
// Create a temp run which will hold the etxtracted text
const tempRow = xmlDoc.createElement("w:r");
const newText = xmlDoc.createElement('w:t');
newText.setAttribute("xml:space", "preserve");
newText.innerHTML = textExtracted;
textExtracted = "";
tempRow.appendChild(newText);
newRow.appendChild(tempRow);
const breakLine = xmlDoc.createElement("w:br");
newRow.appendChild(breakLine);
}
//Replace existing run with the new one
row.replaceWith(newRow);
}
//Serialize dom , clear body and replace OOXML
const serializedXML = new XMLSerializer().serializeToString(xmlDoc.documentElement);
body.clear();
return context.sync().then(function () {
body.insertOoxml(serializedXML, Word.InsertLocation.replace);
console.log('done');
});
});
})
.catch(error => {
console.log('Error: ', error);
resolve(false);
});
Upvotes: 1