mccambridge
mccambridge

Reputation: 1022

Regex: Slack-like 'markdown' for matching exactly one formatting character

I've tried researching this but could only figure out how to make my search terms so precise.

I want the user to be able to surround text with symbols, _, *, ~, etc. for markdown-like formatting, basically the way Slack does it. I got it working pretty well, but I have one issue.

When the user types _bold_, I render <strong>bold</strong>, which is the desired effect. But when the user types __bold__, I get <strong>_bold_</strong> when I would rather just get __bold__.

Here's my regex:

const rules = [
    {regex: /_{1}(.+?)_{1}/g, replacement: '<em>$1</em>'},
    {regex: /\*{1}(.+?)\*{1}/g, replacement: '<strong>$1</strong>'},
    // etc
];

Then I'm running:

let formattedText = '__some text__'; // the user input
rules.forEach(rule => formattedText = formattedText.replace(rule.regex, rule.replacement));

It matches only the one with {1}. But how do I make it ignore cases where there are multiple?

Upvotes: 1

Views: 2258

Answers (2)

Wiktor Stribiżew
Wiktor Stribiżew

Reputation: 627600

You may use

{regex: /(^|[^_])_(?!_)((?:[^]*?[^_])?)_(?!_)/g, replacement: '$1<em>$2</em>'}

See the regex demo.

Details

  • (^|[^_]) - Group 1 (referenced to with $1 from the replacement pattern, as this text should be put back into the result): start of string or any char other than _
  • _(?!_) - a _ not followed with _
  • ((?:[^]*?[^_])?) - Group 2 (referenced to with $2 from the replacement pattern): any 0+ chars ([^]*?) as few as possible (*?), followed with a char other than _, zero or one time (optional)
  • _(?!_) - a _ not followed with _

Upvotes: 2

wp78de
wp78de

Reputation: 19000

First, Slack does not support markdown formatting (also noted here) but features a similiar more simple markup language:

Slack messages may be formatted using a simple markup language similar to Markdown. Supported formatting includes: pre, code, italic, bold, and even ~strike~.;

Anyways, your best bet is using a popular markdown library like Showdown or if you really want to keep it simple you can use something like this:

;(function() { "use strict";

var
	output = "",

	BLOCK = "block",
	INLINE = "inline",

	/**
	 * Used to attach MarkdownToHtml object to `window` in browser
	 * context, or as an AMD module where appropriate.
	 * @type {Object}
	 */
	exports,

	/**
	 * An array of parse rule descriptor objects. Each object has two keys;
	 * pattern (the RegExp to match), and replace (the replacement string or
	 * function to execute).
	 * @type {Array}
	 */
	parseMap = [
		{
			// <h1>
			// A line starting with 1-6 hashes.
			pattern: /(#{1,6})([^\n]+)/g,
			replace: "<h$L1>$2</h$L1>",
			type: BLOCK,
		},
		{
			// <p>
			// Any line surrounded by newlines that doesn't start with
			// an HTML tag, asterisk or numeric value with dot following.
			pattern: /\n(?!<\/?\w+>|\s?\*|\s?[0-9]+|>|\&gt;|-{5,})([^\n]+)/g,
			replace: "<p>$1</p>",
			type: BLOCK,
		},
		{
			// <blockquote>
			// A greater-than character preceding any characters.
			pattern: /\n(?:&gt;|\>)\W*(.*)/g,
			replace: "<blockquote><p>$1</p></blockquote>",
			type: BLOCK,
		},
		{
			// <ul>
			//
			pattern: /\n\s?\*\s*(.*)/g,
			replace: "<ul>\n\t<li>$1</li>\n</ul>",
			type: BLOCK,
		},
		{
			// <ol>
			//
			pattern: /\n\s?[0-9]+\.\s*(.*)/g,
			replace: "<ol>\n\t<li>$1</li>\n</ol>",
			type: BLOCK,
		},
		{
			// <strong>
			// Either two asterisks or two underscores, followed by any
			// characters, followed by the same two starting characters.
			pattern: /(\*\*|__)(.*?)\1/g,
			replace: "<strong>$2</strong>",
			type: INLINE,
		},
		{
			// <em>
			// Either one asterisk or one underscore, followed by any
			// characters, followed by the starting character.
			pattern: /(\*|_)(.*?)\1/g,
			replace: "<em>$2</em>",
			type: INLINE,
		},
		{
			// <a>
			// Not starting with an exclamation mark, square brackets
			// surrounding any characters, followed by parenthesis surrounding
			// any characters.
			pattern: /([^!])\[([^\[]+)\]\(([^\)]+)\)/g,
			replace: "$1<a href=\"$3\">$2</a>",
			type: INLINE,
		},
		{
			// <img>
			// Starting with an exclamation mark, then followed by square
			// brackets surrounding any characters, followed by parenthesis
			// surrounding any characters.
			pattern: /!\[([^\[]+)\]\(([^\)]+)\)/g,
			replace: "<img src=\"$2\" alt=\"$1\" />",
			type: INLINE,
		},
		{
			// <del>
			// Double tilde characters surrounding any characters.
			pattern: /\~\~(.*?)\~\~/g,
			replace: "<del>$1</del>",
			type: INLINE,
		},
		{
			// <code>
			//
			pattern: /`(.*?)`/g,
			replace: "<code>$1</code>",
			type: INLINE,
		},
		{
			// <hr>
			//
			pattern: /\n-{5,}\n/g,
			replace: "<hr />",
			type: BLOCK,
		},
	],
$$;

/**
 * Self-executing function to handle exporting the parse function for
 * external use.
 */
(function go() {
	// Export AMD module if possible.
	if(typeof module !== "undefined"
	&& typeof module.exports !== "undefined") {
		exports = module.exports;
	}
	// Otherwise check for browser context.
	else if(typeof window !== "undefined") {
		window.MarkdownToHtml = {};
		exports = window.MarkdownToHtml;
	}

	exports.parse = parse;
})();

/**
 * Parses a provided Markdown string into valid HTML.
 *
 * @param  {string} string Markdown input for transformation
 * @return {string}        Transformed HTML output
 */
function parse(string) {
	// Pad with newlines for compatibility.
	output = "\n" + string + "\n";

	parseMap.forEach(function(p) {
		// Replace all matches of provided RegExp pattern with either the
		// replacement string or callback function.
		output = output.replace(p.pattern, function() {
			// console.log(this, arguments);
			return replace.call(this, arguments, p.replace, p.type);
		});
	});

	// Perform any post-processing required.
	output = clean(output);
	// Trim for any spaces or newlines.
	output = output.trim();
	// Tidy up newlines to condense where more than 1 occurs back to back.
	output = output.replace(/[\n]{1,}/g, "\n");
	return output;
}

function replace(matchList, replacement, type) {
	var
		i,
	$$;

	for(i in matchList) {
		if(!matchList.hasOwnProperty(i)) {
			continue;
		}

		// Replace $n with the matching regexp group.
		replacement = replacement.split("$" + i).join(matchList[i]);
		// Replace $Ln with the matching regexp group's string length.
		replacement = replacement.split("$L" + i).join(matchList[i].length);
	}

	if(type === BLOCK) {
		replacement = replacement.trim() + "\n";
	}

	return replacement;
}

function clean(string) {
	var cleaningRuleArray = [
		{
			match: /<\/([uo]l)>\s*<\1>/g,
			replacement: "",
		},
		{
			match: /(<\/\w+>)<\/(blockquote)>\s*<\2>/g,
			replacement: "$1",
		},
	];

	cleaningRuleArray.forEach(function(rule) {
		string = string.replace(rule.match, rule.replacement);
	});

	return string;
}

})();

var element = document.getElementById("example-markdown-content");
console.log(MarkdownToHtml.parse(element.innerHTML));
<div id="example-markdown-content">
# Hello, Markdown!

Here is some example **formatting** in _Markdown_.
</div>

And if even this is too much you can trim it down easily since it is so simple. I guess this will save you a ton of time.

Upvotes: 0

Related Questions