Aaron
Aaron

Reputation: 11

convert html content to save as docx document

I want to upload .docx file and do some modifications to remove special characters, adjust the style, and then save it back as .docx document.

Read file: Currently, I am using the docx-preview library to read the file and convert it to html content to get all the font, style, and special characters.

Save file: After research, I found out that html-to-docx library might be useful, but I have encountered the following error:

test.html:77 Uncaught (in promise) 
ReferenceError: HTMLtoDOCX is not defined
    at test.html:77:32
    at HTMLButtonElement.<anonymous> (test.html:90:15)
(anonymous) @   test.html:77
(anonymous) @   test.html:90

I have tried for a while already, but I still cannot successfully use this library in my script. Therefore I want to ask for help with this part. Does anyone have the experience to save HTML content as .docx document? not necessarily need to use the html-to-docx library.

my whole code is as follows, it can be directly saved to test.html and browsed in browser:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Word Document Reader</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
        }
        #container {
            margin-top: 20px;
            font-size: 16px;
            color: #333;
        }
        #downloadButton {
            display: none;
            margin-top: 20px;
        }
    </style>
</head>
<body>
    <h1>Word Document Reader</h1>
    <input type="file" id="fileInput" accept=".docx">
    <button id="downloadButton">Download Document</button>
    <div id="container"></div>

    <!-- Optional polyfill for promise -->
    <script src="https://unpkg.com/promise-polyfill/dist/polyfill.min.js"></script>
    <!-- lib uses jszip -->
    <script src="https://unpkg.com/jszip/dist/jszip.min.js"></script>
    <script src="https://unpkg.com/docx-preview/dist/docx-preview.min.js"></script>
    <script src="https://unpkg.com/docx/build/index.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/html-to-docx.umd.js"></script>
    <script>
        let currentBlob = null;
        let modifiedContent = '';

        document.getElementById('fileInput').addEventListener('change', function(event) {
            const file = event.target.files[0];

            if (file && file.name.endsWith('.docx')) {
                const reader = new FileReader();

                reader.onload = function(event) {
                    const arrayBuffer = event.target.result;
                    currentBlob = new Blob([arrayBuffer], { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });

                    docx.renderAsync(currentBlob, document.getElementById("container"))
                        .then(() => {
                            console.log("docx: finished");
                            let renderedContent = document.getElementById("container").innerHTML;
                            console.log("Original Rendered Content:", renderedContent);

                            // Replace the specific span pattern with -->
                            const pattern = /<\/span><span><span style="font-family: Wingdings;"><\/span><\/span><span>/g;
                            modifiedContent = renderedContent.replace(pattern, '-->');

                            // Update the container with the modified content
                            document.getElementById("container").innerHTML = modifiedContent;
                            console.log("Modified Rendered Content:", modifiedContent);

                            document.getElementById('downloadButton').style.display = 'block';
                        })
                        .catch(err => console.error("docx: error", err));
                };

                reader.readAsArrayBuffer(file);
            } else {
                alert('Please upload a valid .docx file.');
            }
        });

        document.getElementById('downloadButton').addEventListener('click', function() {
            (async () => {
            const fileBuffer = await HTMLtoDOCX(htmlString, null, {
                table: { row: { cantSplit: true } },
                footer: true,
                pageNumber: true,
            });

            fs.writeFile(filePath, fileBuffer, (error) => {
                if (error) {
                console.log('Docx file creation failed');
                return;
                }
                console.log('Docx file created successfully');
            });
            })();
        });
    </script>
</body>
</html>

Tried: use html-to-docx library to convert HTML content to docx document but fail.

Expect: the conversion from html content to docx file can be successfully executed.

Upvotes: 0

Views: 402

Answers (1)

Yogesh Rajput
Yogesh Rajput

Reputation: 71

**

<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>Word Document Reader</title>
  <style>
    body {
      font-family: Arial, sans-serif;
      margin: 20px;
    }
    
    #container {
      margin-top: 20px;
      font-size: 16px;
      color: #333;
    }
    
    #downloadButton {
      display: none;
      margin-top: 20px;
    }
  </style>
</head>

<body>
  <h1>Word Document Reader</h1>
  <input type="file" id="fileInput" accept=".docx" />
  <button id="downloadButton">Download Document</button>
  <div id="container"></div>

  <!-- Optional polyfill for promise -->
  <script src="https://unpkg.com/promise-polyfill/dist/polyfill.min.js"></script>
  <!-- lib uses jszip -->
  <script src="https://unpkg.com/jszip/dist/jszip.min.js"></script>
  <script src="https://unpkg.com/docx-preview/dist/docx-preview.min.js"></script>
  <script src="https://unpkg.com/docx/build/index.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/html-to-docx.umd.js"></script>
  <script>
    let currentBlob = null;
    let modifiedContent = "";

    document
      .getElementById("fileInput")
      .addEventListener("change", function(event) {
        const file = event.target.files[0];

        if (file && file.name.endsWith(".docx")) {
          const reader = new FileReader();

          reader.onload = function(event) {
            const arrayBuffer = event.target.result;
            currentBlob = new Blob([arrayBuffer], {
              type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
            });

            docx
              .renderAsync(currentBlob, document.getElementById("container"))
              .then(() => {
                console.log("docx: finished");
                let renderedContent =
                  document.getElementById("container").innerHTML;
                console.log("Original Rendered Content:", renderedContent);

                // Replace the specific span pattern with -->
                const pattern =
                  /<\/span><span><span style="font-family: Wingdings;"><\/span><\/span><span>/g;
                modifiedContent = renderedContent.replace(pattern, "-->");

                // Update the container with the modified content
                document.getElementById("container").innerHTML =
                  modifiedContent;
                console.log("Modified Rendered Content:", modifiedContent);

                document.getElementById("downloadButton").style.display =
                  "block";
              })
              .catch((err) => console.error("docx: error", err));
          };

          reader.readAsArrayBuffer(file);
        } else {
          alert("Please upload a valid .docx file.");
        }
      });

    document
      .getElementById("downloadButton").addEventListener("click", async function() {
          const htmlString = document.getElementById("container").innerHTML;
          const fileBuffer = await window.HTMLtoDOCX(htmlString, null, {
            table: {
              row: {
                cantSplit: true
              }
            },
            footer: true,
            pageNumber: true,
          });

          // Create a Blob from the file buffer
          const blob = new Blob([fileBuffer], {
            type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
          });

          // Use FileSaver.js to save the Blob as a file
          saveAs(blob, "modified_document.docx");
  </script>
</body>

</html>

**

document
            .getElementById("downloadButton").addEventListener("click", async function () {
              const htmlString = document.getElementById("container").innerHTML;
              const fileBuffer = await window.HTMLtoDOCX(htmlString, null, {
                table: { row: { cantSplit: true } },
                footer: true,
                pageNumber: true,
              });

            // Create a Blob from the file buffer
            const blob = new Blob([fileBuffer], { type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" });

            // Use FileSaver.js to save the Blob as a file
            saveAs(blob, "modified_document.docx");

Upvotes: 0

Related Questions