Reputation: 126
I have an input HTML file with header and footer. It needs to be converted to RTF. The header/footer of HTML should be repeated in the resultant RTF file.
Is there any plugin to convert HTML to RTF by only using JavaScript??
Upvotes: 9
Views: 19703
Reputation: 71
I combined the answers from @Samra and @Dertalai, and I added support for headers (h1, h2, h3, h4, h5, h6), ordered lists and unordered lists.
function convertHtmlToRtf(html) {
if (!(typeof html === "string" && html)) {
return null;
}
var tmpRichText, hasHyperlinks;
var richText = html;
// Delete HTML comments
richText = richText.replace(/<!--[\s\S]*?-->/ig,"");
// Singleton tags
richText = richText.replace(/<(?:hr)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard \\brdrb \\brdrs \\brdrw10 \\brsp20 \\par}\n{\\pard\\par}\n");
richText = richText.replace(/<(?:br)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard\\par}\n");
// Empty tags
richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?\s*[\/]>/ig, "{\\pard\\par}\n");
richText = richText.replace(/<(?:[^>]+)\/>/g, "");
// Hyperlinks
richText = richText.replace(
/<a(?:\s+[^>]*)?(?:\s+href=(["'])(?:javascript:void\(0?\);?|#|return false;?|void\(0?\);?|)\1)(?:\s+[^>]*)?>/ig,
"{{{\n");
tmpRichText = richText;
richText = richText.replace(
/<a(?:\s+[^>]*)?(?:\s+href=(["'])(.+)\1)(?:\s+[^>]*)?>/ig,
"{\\field{\\*\\fldinst{HYPERLINK\n \"$2\"\n}}{\\fldrslt{\\ul\\cf1\n");
hasHyperlinks = richText !== tmpRichText;
richText = richText.replace(/<a(?:\s+[^>]*)?>/ig, "{{{\n");
richText = richText.replace(/<\/a(?:\s+[^>]*)?>/ig, "\n}}}");
// Start tags
richText = richText.replace(/<(?:b|strong)(?:\s+[^>]*)?>/ig, "{\\b\n");
richText = richText.replace(/<(?:i|em)(?:\s+[^>]*)?>/ig, "{\\i\n");
richText = richText.replace(/<(?:u|ins)(?:\s+[^>]*)?>/ig, "{\\ul\n");
richText = richText.replace(/<(?:strike|del)(?:\s+[^>]*)?>/ig, "{\\strike\n");
richText = richText.replace(/<sup(?:\s+[^>]*)?>/ig, "{\\super\n");
richText = richText.replace(/<sub(?:\s+[^>]*)?>/ig, "{\\sub\n");
richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "{\\pard\n");
richText = richText.replace(/<(?:h1|h2|h3|h4|h5|h6)(?:\s+[^>]*)?>/ig, "{\\pard\\par}{\\pard\\b\n");
richText = richText.replace(/<(?:ol)(?:\s+[^>]*)?>/ig,
"{\\pard\\par}{{\\*\\pn\\pnlvlbody\\pnindent0\\pnstart1\\pndec{\\pntxta.}}\\fi-240\\li720\\sa200\\sl180\\slmult1");
richText = richText.replace(/<(?:ul)(?:\s+[^>]*)?>/ig,
"{\\pard\\par}{{\\*\\pn\\pnlvlblt\\pnf1\\pnindent0{\\pntxtb\\\'B7}}\\fi-240\\li720\\sa200\\sl180\\slmult1");
richText = richText.replace(/<(?:li)(?:\s+[^>]*)?>/ig, "{\\pntext\\tab}");
// End tags
richText = richText.replace(/<\/(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "\n\\par}\n");
richText = richText.replace(/<\/(?:h1|h2|h3|h4|h5|h6)(?:\s+[^>]*)?>/ig, "\n\\par}\n");
richText = richText.replace(/<\/(?:b|strong|i|em|u|ins|strike|del|sup|sub|ol|ul)(?:\s+[^>]*)?>/ig, "\n}");
richText = richText.replace(/<\/(?:li)(?:\s+[^>]*)?>/ig, "\\par");
// Strip any other remaining HTML tags [but leave their contents]
richText = richText.replace(/<(?:[^>]+)>/g, "");
// Remove empty line at the beginning of the text
richText = richText.startsWith("{\\pard\\par}") ? richText.substring(11) : richText;
// Prefix and suffix the rich text with the necessary syntax
richText =
"{\\rtf1\\ansi\n" + (hasHyperlinks ? "{\\colortbl\n;\n\\red0\\green0\\blue255;\n}\n" : "") + richText + "\n}";
return richText;
}
Upvotes: 0
Reputation: 315
According to Francisco Valles answer i created a bundle to easily include it your web-project:
https://github.com/geraphl/javascript-html-to-rtf-browser
<script src="~/js/html-to-rtf-browser.min.js"></script>
Then you can convert html
to microsofts rtf format by adding
var htmlToRtfLocal = new window.htmlToRtf();
var rtfContent = htmlToRtfLocal.convertHtmlToRtf(htmlContent);
to you javascript.
Upvotes: 0
Reputation: 643
After a bit of search I found a working solution:
https://www.npmjs.com/package/html-to-rtf
With html-to-rtf
the conversion is easy (here's a piece of code based on browserify):
var htmlToRtf = require('html-to-rtf');
var htmlText = "<div>...</div>"; //or whatever html you want to transform
var htmlAsRtf = htmlToRtf.convertHtmlToRtf(htmlText); // html transformed to rtf
This solution worked for me. Without browserify you'll have to find implied js
inside downloaded modules with npm
and link them to your html page.
Upvotes: 7
Reputation: 208
I applied @Samra solution and it was working good. But then I spotted a bug in the output: some text was cut off. After a lot of investigation, it seemed to be about HTML comments (<!-- xxxx -->
) weren't being handled properly. My solution was to add this richText transformation as the first one:
// Delete HTML comments
richText = richText.replace(/<!--[\s\S]*?-->/ig,"");
Upvotes: 1
Reputation: 2013
You can use this converter
However it does not address bullet points (ul, li elements)
function convertHtmlToRtf(html) {
if (!(typeof html === "string" && html)) {
return null;
}
var tmpRichText, hasHyperlinks;
var richText = html;
// Singleton tags
richText = richText.replace(/<(?:hr)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard \\brdrb \\brdrs \\brdrw10 \\brsp20 \\par}\n{\\pard\\par}\n");
richText = richText.replace(/<(?:br)(?:\s+[^>]*)?\s*[\/]?>/ig, "{\\pard\\par}\n");
// Empty tags
richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?\s*[\/]>/ig, "{\\pard\\par}\n");
richText = richText.replace(/<(?:[^>]+)\/>/g, "");
// Hyperlinks
richText = richText.replace(
/<a(?:\s+[^>]*)?(?:\s+href=(["'])(?:javascript:void\(0?\);?|#|return false;?|void\(0?\);?|)\1)(?:\s+[^>]*)?>/ig,
"{{{\n");
tmpRichText = richText;
richText = richText.replace(
/<a(?:\s+[^>]*)?(?:\s+href=(["'])(.+)\1)(?:\s+[^>]*)?>/ig,
"{\\field{\\*\\fldinst{HYPERLINK\n \"$2\"\n}}{\\fldrslt{\\ul\\cf1\n");
hasHyperlinks = richText !== tmpRichText;
richText = richText.replace(/<a(?:\s+[^>]*)?>/ig, "{{{\n");
richText = richText.replace(/<\/a(?:\s+[^>]*)?>/ig, "\n}}}");
// Start tags
richText = richText.replace(/<(?:b|strong)(?:\s+[^>]*)?>/ig, "{\\b\n");
richText = richText.replace(/<(?:i|em)(?:\s+[^>]*)?>/ig, "{\\i\n");
richText = richText.replace(/<(?:u|ins)(?:\s+[^>]*)?>/ig, "{\\ul\n");
richText = richText.replace(/<(?:strike|del)(?:\s+[^>]*)?>/ig, "{\\strike\n");
richText = richText.replace(/<sup(?:\s+[^>]*)?>/ig, "{\\super\n");
richText = richText.replace(/<sub(?:\s+[^>]*)?>/ig, "{\\sub\n");
richText = richText.replace(/<(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "{\\pard\n");
// End tags
richText = richText.replace(/<\/(?:p|div|section|article)(?:\s+[^>]*)?>/ig, "\n\\par}\n");
richText = richText.replace(/<\/(?:b|strong|i|em|u|ins|strike|del|sup|sub)(?:\s+[^>]*)?>/ig, "\n}");
// Strip any other remaining HTML tags [but leave their contents]
richText = richText.replace(/<(?:[^>]+)>/g, "");
// Prefix and suffix the rich text with the necessary syntax
richText =
"{\\rtf1\\ansi\n" + (hasHyperlinks ? "{\\colortbl\n;\n\\red0\\green0\\blue255;\n}\n" : "") + richText + "\n}";
return richText;
}
Upvotes: 12