Reputation: 2191
How to extract all links between url tags in the following content using Regex
:
/* cyrillic-ext */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 400;
src: local('Montserrat Regular'), local('Montserrat-Regular'), url(https://fonts.gstatic.com/s/montserrat/v12/rBHvpRWBkgyW99dXT88n7yEAvth_LlrfE80CYdSH47w.woff2) format('woff2');
unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;
}
/* cyrillic */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 400;
src: local('Montserrat Regular'), local('Montserrat-Regular'), url(https://fonts.gstatic.com/s/montserrat/v12/NX1NravqaXESu9fFv7KuqiEAvth_LlrfE80CYdSH47w.woff2) format('woff2');
unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;
}
/* vietnamese */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 400;
src: local('Montserrat Regular'), local('Montserrat-Regular'), url(https://fonts.gstatic.com/s/montserrat/v12/SKK6Nusyv8QPNMtI4j9J2yEAvth_LlrfE80CYdSH47w.woff2) format('woff2');
unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB;
}
/* latin-ext */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 400;
src: local('Montserrat Regular'), local('Montserrat-Regular'), url(https://fonts.gstatic.com/s/montserrat/v12/gFXtEMCp1m_YzxsBpKl68iEAvth_LlrfE80CYdSH47w.woff2) format('woff2');
unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+20A0-20AB, U+20AD-20CF, U+2C60-2C7F, U+A720-A7FF;
}
/* latin */
@font-face {
font-family: 'Montserrat';
font-style: normal;
font-weight: 400;
src: local('Montserrat Regular'), local('Montserrat-Regular'), url(https://fonts.gstatic.com/s/montserrat/v12/zhcz-_WihjSQC0oHJ9TCYPk_vArhqVIZ0nv9q090hN8.woff2) format('woff2');
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2212, U+2215;
}
It should returns a list of links:
...
Thanks
Upvotes: 0
Views: 228
Reputation: 131180
Try url\((.*?)\)
. This will capture everything starting from the opening parenthesis up to the first closing parenthesis. .*?
means that the pattern will capture the minimum text that satisfies the pattern instead of all the text.
The captured URL will appear in the first capture group, eg :
var regex=new Regex(@"url\((.*?)\)");
var urls= ( from match in regex.Matches(input).Cast<Match>()
select match.Groups[1].Value
).Distinct().ToArray();
Or
var urls=regex.Matches(input)
.Cast<Match>()
.Select(match=>match.Groups[1].Value)
.Distinct()
.ToArray();
Upvotes: 1
Reputation: 7054
You can use this code:
var content = "..."; // your input here
var regex = new Regex("url\\((?<url>[^\\)]+)");
var urls = regex.Matches(content).Cast<Match>()
.Select(m => m.Groups["url"].Value)
.Distinct()
.ToArray();
Regex explanation:
url // match "url" literaly
\( // match open brace
(?<url> // named capture group
[^\)]+ // match all chars until close brace
) // close capture group
Upvotes: 2
Reputation: 31282
string content = "...";
Regex regex = new Regex(@"url\((https?://.+?)\)");
foreach (Match match in regex.Matches(content))
{
Console.WriteLine(match.Groups[1].Value);
}
Upvotes: 2