Gireesh SB
Gireesh SB

Reputation: 126

How to convert HTML to RTF using JavaScript

I have an input HTML file with header and footer. It needs to be converted to RTF. The header/footer of HTML should be repeated in the resultant RTF file.

Is there any plugin to convert HTML to RTF by only using JavaScript??

Upvotes: 9

Views: 19703

Answers (5)

Amargi
Amargi

Reputation: 71

I combined the answers from @Samra and @Dertalai, and I added support for headers (h1, h2, h3, h4, h5, h6), ordered lists and unordered lists.

function convertHtmlToRtf(html) {
    if (!(typeof html === "string" && html)) {
        return null;
    }

    var tmpRichText, hasHyperlinks;
    var richText = html;

    // Delete HTML comments
    richText = richText.replace(/<!--[\s\S]*?-->/ig,"");

    // Singleton tags
    richText = richText.replace(/<(?:hr)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard \\brdrb \\brdrs \\brdrw10 \\brsp20 \\par}\n{\\pard\\par}\n");
    richText = richText.replace(/<(?:br)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard\\par}\n");

    // Empty tags
    richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?\s*[\/]>/ig, "{\\pard\\par}\n");
    richText = richText.replace(/<(?:[^>]+)\/>/g, "");

    // Hyperlinks
    richText = richText.replace(
        /<a(?:\s+[^>]*)?(?:\s+href=(["'])(?:javascript:void\(0?\);?|#|return false;?|void\(0?\);?|)\1)(?:\s+[^>]*)?>/ig,
        "{{{\n");
    tmpRichText = richText;
    richText = richText.replace(
        /<a(?:\s+[^>]*)?(?:\s+href=(["'])(.+)\1)(?:\s+[^>]*)?>/ig,
        "{\\field{\\*\\fldinst{HYPERLINK\n \"$2\"\n}}{\\fldrslt{\\ul\\cf1\n");
    hasHyperlinks = richText !== tmpRichText;
    richText = richText.replace(/<a(?:\s+[^>]*)?>/ig, "{{{\n");
    richText = richText.replace(/<\/a(?:\s+[^>]*)?>/ig, "\n}}}");

    // Start tags
    richText = richText.replace(/<(?:b|strong)(?:\s+[^>]*)?>/ig, "{\\b\n");
    richText = richText.replace(/<(?:i|em)(?:\s+[^>]*)?>/ig, "{\\i\n");
    richText = richText.replace(/<(?:u|ins)(?:\s+[^>]*)?>/ig, "{\\ul\n");
    richText = richText.replace(/<(?:strike|del)(?:\s+[^>]*)?>/ig, "{\\strike\n");
    richText = richText.replace(/<sup(?:\s+[^>]*)?>/ig, "{\\super\n");
    richText = richText.replace(/<sub(?:\s+[^>]*)?>/ig, "{\\sub\n");
    richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "{\\pard\n");
    richText = richText.replace(/<(?:h1|h2|h3|h4|h5|h6)(?:\s+[^>]*)?>/ig, "{\\pard\\par}{\\pard\\b\n");
    richText = richText.replace(/<(?:ol)(?:\s+[^>]*)?>/ig,
        "{\\pard\\par}{{\\*\\pn\\pnlvlbody\\pnindent0\\pnstart1\\pndec{\\pntxta.}}\\fi-240\\li720\\sa200\\sl180\\slmult1");
    richText = richText.replace(/<(?:ul)(?:\s+[^>]*)?>/ig,
        "{\\pard\\par}{{\\*\\pn\\pnlvlblt\\pnf1\\pnindent0{\\pntxtb\\\'B7}}\\fi-240\\li720\\sa200\\sl180\\slmult1");
    richText = richText.replace(/<(?:li)(?:\s+[^>]*)?>/ig, "{\\pntext\\tab}");

    // End tags
    richText = richText.replace(/<\/(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "\n\\par}\n");
    richText = richText.replace(/<\/(?:h1|h2|h3|h4|h5|h6)(?:\s+[^>]*)?>/ig, "\n\\par}\n");
    richText = richText.replace(/<\/(?:b|strong|i|em|u|ins|strike|del|sup|sub|ol|ul)(?:\s+[^>]*)?>/ig, "\n}");
    richText = richText.replace(/<\/(?:li)(?:\s+[^>]*)?>/ig, "\\par");

    // Strip any other remaining HTML tags [but leave their contents]
    richText = richText.replace(/<(?:[^>]+)>/g, "");

    // Remove empty line at the beginning of the text
    richText = richText.startsWith("{\\pard\\par}")  ? richText.substring(11) : richText;

    // Prefix and suffix the rich text with the necessary syntax
    richText =
        "{\\rtf1\\ansi\n" + (hasHyperlinks ? "{\\colortbl\n;\n\\red0\\green0\\blue255;\n}\n" : "") + richText + "\n}";

    return richText;
}

Upvotes: 0

geraphl
geraphl

Reputation: 315

According to Francisco Valles answer i created a bundle to easily include it your web-project:

https://github.com/geraphl/javascript-html-to-rtf-browser

<script src="~/js/html-to-rtf-browser.min.js"></script>

Then you can convert html to microsofts rtf format by adding

var htmlToRtfLocal = new window.htmlToRtf();
var rtfContent = htmlToRtfLocal.convertHtmlToRtf(htmlContent);

to you javascript.

Upvotes: 0

Francisco Valle
Francisco Valle

Reputation: 643

After a bit of search I found a working solution:

https://www.npmjs.com/package/html-to-rtf

With html-to-rtf the conversion is easy (here's a piece of code based on browserify):

var htmlToRtf = require('html-to-rtf');
var htmlText = "<div>...</div>"; //or whatever html you want to transform
var htmlAsRtf = htmlToRtf.convertHtmlToRtf(htmlText); // html transformed to rtf

This solution worked for me. Without browserify you'll have to find implied js inside downloaded modules with npm and link them to your html page.

Upvotes: 7

Dertalai
Dertalai

Reputation: 208

I applied @Samra solution and it was working good. But then I spotted a bug in the output: some text was cut off. After a lot of investigation, it seemed to be about HTML comments (<!-- xxxx -->) weren't being handled properly. My solution was to add this richText transformation as the first one:

// Delete HTML comments
richText = richText.replace(/<!--[\s\S]*?-->/ig,"");

Upvotes: 1

Samra
Samra

Reputation: 2013

You can use this converter

However it does not address bullet points (ul, li elements)

function convertHtmlToRtf(html) {
  if (!(typeof html === "string" && html)) {
      return null;
  }

  var tmpRichText, hasHyperlinks;
  var richText = html;

  // Singleton tags
  richText = richText.replace(/<(?:hr)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard \\brdrb \\brdrs \\brdrw10 \\brsp20 \\par}\n{\\pard\\par}\n");
  richText = richText.replace(/<(?:br)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard\\par}\n");

  // Empty tags
  richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?\s*[\/]>/ig, "{\\pard\\par}\n");
  richText = richText.replace(/<(?:[^>]+)\/>/g, "");

  // Hyperlinks
  richText = richText.replace(
      /<a(?:\s+[^>]*)?(?:\s+href=(["'])(?:javascript:void\(0?\);?|#|return false;?|void\(0?\);?|)\1)(?:\s+[^>]*)?>/ig,
      "{{{\n");
  tmpRichText = richText;
  richText = richText.replace(
      /<a(?:\s+[^>]*)?(?:\s+href=(["'])(.+)\1)(?:\s+[^>]*)?>/ig,
      "{\\field{\\*\\fldinst{HYPERLINK\n \"$2\"\n}}{\\fldrslt{\\ul\\cf1\n");
  hasHyperlinks = richText !== tmpRichText;
  richText = richText.replace(/<a(?:\s+[^>]*)?>/ig, "{{{\n");
  richText = richText.replace(/<\/a(?:\s+[^>]*)?>/ig, "\n}}}");

  // Start tags
  richText = richText.replace(/<(?:b|strong)(?:\s+[^>]*)?>/ig, "{\\b\n");
  richText = richText.replace(/<(?:i|em)(?:\s+[^>]*)?>/ig, "{\\i\n");
  richText = richText.replace(/<(?:u|ins)(?:\s+[^>]*)?>/ig, "{\\ul\n");
  richText = richText.replace(/<(?:strike|del)(?:\s+[^>]*)?>/ig, "{\\strike\n");
  richText = richText.replace(/<sup(?:\s+[^>]*)?>/ig, "{\\super\n");
  richText = richText.replace(/<sub(?:\s+[^>]*)?>/ig, "{\\sub\n");
  richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "{\\pard\n");

  // End tags
  richText = richText.replace(/<\/(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "\n\\par}\n");
  richText = richText.replace(/<\/(?:b|strong|i|em|u|ins|strike|del|sup|sub)(?:\s+[^>]*)?>/ig, "\n}");

  // Strip any other remaining HTML tags [but leave their contents]
  richText = richText.replace(/<(?:[^>]+)>/g, "");

  // Prefix and suffix the rich text with the necessary syntax
  richText =
      "{\\rtf1\\ansi\n" + (hasHyperlinks ? "{\\colortbl\n;\n\\red0\\green0\\blue255;\n}\n" : "") + richText +  "\n}";

  return richText;
}

Upvotes: 12

Related Questions