Reputation: 2021
I need to decide whether file name fits to file mask. The file mask could contain * or ? characters. Is there any simple solution for this?
bool bFits = Fits("myfile.txt", "my*.txt");
private bool Fits(string sFileName, string sFileMask)
{
??? anything simple here ???
}
Upvotes: 34
Views: 33514
Reputation: 21
Use DP:
public static bool MatchesWildCard(this string s, string wildCard)
{
bool[,] dp = new bool[wildCard.Length + 1, s.Length + 1];
dp[0, 0] = true;
for(int i = 1; i <= wildCard.Length; i++)
{
if (wildCard[i - 1] != '*')
break;
for (int j = 0; j <= s.Length; j++)
dp[i, j] = true;
}
for(int i = 0; i < wildCard.Length; i++)
{
for(int j = 0; j < s.Length; j++)
{
char sChar = s[j];
char wChar = wildCard[i];
bool left = dp[i + 1, j];
bool bottom = dp[i, j + 1];
bool diag = dp[i, j];
bool current;
if (wChar == '*')
{
current = left || diag || bottom;
}
else if (wChar == '?')
{
current = diag;
}
else
{
current = diag && (wChar == sChar);
}
dp[i + 1, j + 1] = current;
}
}
return dp[wildCard.Length, s.Length];
}
The following is the test cases:
[TestClass]
public class UnitTestWildCardHelper
{
[TestMethod]
[DynamicData(nameof(TestMatchesWildCardDataTrue))]
public void TestMatchesWildCardTrue(string s, string wildCard)
{
Assert.IsTrue(WildCardHelper.MatchesWildCard(s, wildCard));
}
[TestMethod]
[DynamicData(nameof(TestMatchesWildCardDataFalse))]
public void TestMatchesWildCardFalse(string s, string wildCard)
{
Assert.IsFalse(WildCardHelper.MatchesWildCard(s, wildCard));
}
public static IEnumerable<object[]> TestMatchesWildCardDataTrue
{
get
{
return new[]
{
new object[]{"",""},
new object[]{"","*"},
new object[]{"","**"},
new object[]{"","***"},
new object[]{"a","a"},
new object[]{"a","?"},
new object[]{"a","a*"},
new object[]{"a","*a"},
new object[]{"a","*"},
new object[]{"a","**"},
new object[]{"abc","abc"},
new object[]{"abc","?bc"},
new object[]{"abc","a?c"},
new object[]{"abc","ab?"},
new object[]{"abc","*bc"},
new object[]{"abc","*c"},
new object[]{"abc","*"},
new object[]{"abc","a*c"},
new object[]{"abc","a*"},
new object[]{"abc","ab*"},
new object[]{"abc","a*?"},
new object[]{"abc","a**"},
new object[]{"abc","*?"},
new object[]{"abc","???"},
new object[]{"abc","?*"},
new object[]{"aaaaaaaaaaaaaaaaaab","*b"},
new object[]{"aaaaaaaaaaaaaaaaaab","a*b"},
new object[]{"aaaaaaaaaaaaaaaaaab","a*"},
new object[]{"aaaaaaaaaaaaaaaaaab","a*a*a**b"},
new object[]{"abc.xyz.txt","*txt"},
new object[]{"abc.xyz.txt","*.txt"},
new object[]{"abc.xyz.txt","abc*.txt"},
new object[]{"abc.xyz.txt","abc.*.txt"},
};
}
}
public static IEnumerable<object[]> TestMatchesWildCardDataFalse
{
get
{
return new[]
{
new object[]{"","a"},
new object[]{"","?"},
new object[]{"a",""},
new object[]{"a","b"},
new object[]{"abc","a?"},
new object[]{"a","??"},
new object[]{"abc","*b"},
new object[]{"abc","b*"},
new object[]{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa","*b"},
new object[]{"abc.xyz.txt","*.ttt"},
};
}
}
}
Upvotes: 1
Reputation: 5629
None of these answers quite seem to do the trick, and msorens's is needlessly complex. This one should work just fine:
public static Boolean MatchesMask(string fileName, string fileMask)
{
String convertedMask = "^" + Regex.Escape(fileMask).Replace("\\*", ".*").Replace("\\?", ".") + "$";
Regex regexMask = new Regex(convertedMask, RegexOptions.IgnoreCase);
return regexMask.IsMatch(fileName);
}
This makes sure possible regex chars in the mask are escaped, replaces the \* and \?, and surrounds it all by ^ and $ to mark the boundaries.
Of course, in most situations, it's far more useful to simply make this into a FileMaskToRegex
tool function which returns the Regex object, so you just got it once and can then make a loop in which you check all strings from your files list on it.
public static Regex FileMaskToRegex(string fileMask)
{
String convertedMask = "^" + Regex.Escape(fileMask).Replace("\\*", ".*").Replace("\\?", ".") + "$";
return new Regex(convertedMask, RegexOptions.IgnoreCase);
}
Upvotes: 14
Reputation: 1196
For .net Core the way microsoft does.
private bool MatchPattern(ReadOnlySpan<char> relativePath)
{
ReadOnlySpan<char> name = IO.Path.GetFileName(relativePath);
if (name.Length == 0)
return false;
if (Filters.Count == 0)
return true;
foreach (string filter in Filters)
{
if (FileSystemName.MatchesSimpleExpression(filter, name, ignoreCase: !PathInternal.IsCaseSensitive))
return true;
}
return false;
}
The way microsoft itself seemed to do for .NET 4.6 is documented in github:
private bool MatchPattern(string relativePath) {
string name = System.IO.Path.GetFileName(relativePath);
if (name != null)
return PatternMatcher.StrictMatchPattern(filter.ToUpper(CultureInfo.InvariantCulture), name.ToUpper(CultureInfo.InvariantCulture));
else
return false;
}
Upvotes: 1
Reputation: 31733
I didn't want to copy the source code and like @frankhommers I came up with a reflection based solution.
Notice the code comment about the use of wildcards in the name argument I found in the reference source.
public static class PatternMatcher
{
static MethodInfo strictMatchPatternMethod;
static PatternMatcher()
{
var typeName = "System.IO.PatternMatcher";
var methodName = "StrictMatchPattern";
var assembly = typeof(Uri).Assembly;
var type = assembly.GetType(typeName, true);
strictMatchPatternMethod = type.GetMethod(methodName, BindingFlags.Static | BindingFlags.Public) ?? throw new MissingMethodException($"{typeName}.{methodName} not found");
}
/// <summary>
/// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) semantics.
/// </summary>
/// <param name="expression">Supplies the input expression to check against</param>
/// <param name="name">Supplies the input name to check for.</param>
/// <returns></returns>
public static bool StrictMatchPattern(string expression, string name)
{
// https://referencesource.microsoft.com/#system/services/io/system/io/PatternMatcher.cs
// If this class is ever exposed for generic use,
// we need to make sure that name doesn't contain wildcards. Currently
// the only component that calls this method is FileSystemWatcher and
// it will never pass a name that contains a wildcard.
if (name.Contains('*')) throw new FormatException("Wildcard not allowed");
return (bool)strictMatchPatternMethod.Invoke(null, new object[] { expression, name });
}
}
Upvotes: 2
Reputation: 1305
How about using reflection to get access to the function in the .NET framework?
Like this:
public class PatternMatcher
{
public delegate bool StrictMatchPatternDelegate(string expression, string name);
public StrictMatchPatternDelegate StrictMatchPattern;
public PatternMatcher()
{
Type patternMatcherType = typeof(FileSystemWatcher).Assembly.GetType("System.IO.PatternMatcher");
MethodInfo patternMatchMethod = patternMatcherType.GetMethod("StrictMatchPattern", System.Reflection.BindingFlags.Static | System.Reflection.BindingFlags.Public);
StrictMatchPattern = (expression, name) => (bool)patternMatchMethod.Invoke(null, new object[] { expression, name });
}
}
void Main()
{
PatternMatcher patternMatcher = new PatternMatcher();
Console.WriteLine(patternMatcher.StrictMatchPattern("*.txt", "test.txt")); //displays true
Console.WriteLine(patternMatcher.StrictMatchPattern("*.doc", "test.txt")); //displays false
}
Upvotes: 0
Reputation: 41
From Windows 7 using P/Invoke (without 260 char count limit):
// UNICODE_STRING for Rtl... method
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Unicode)]
public struct UNICODE_STRING
{
public ushort Length;
public ushort MaximumLength;
[MarshalAs(UnmanagedType.LPWStr)]
string Buffer;
public UNICODE_STRING(string buffer)
{
if (buffer == null)
Length = MaximumLength = 0;
else
Length = MaximumLength = unchecked((ushort)(buffer.Length * 2));
Buffer = buffer;
}
}
// RtlIsNameInExpression method from NtDll.dll system library
public static class NtDll
{
[DllImport("NtDll.dll", CharSet=CharSet.Unicode, ExactSpelling=true)]
[return: MarshalAs(UnmanagedType.U1)]
public extern static bool RtlIsNameInExpression(
ref UNICODE_STRING Expression,
ref UNICODE_STRING Name,
[MarshalAs(UnmanagedType.U1)]
bool IgnoreCase,
IntPtr Zero
);
}
public bool MatchMask(string mask, string fileName)
{
// Expression must be uppercase for IgnoreCase == true (see MSDN for RtlIsNameInExpression)
UNICODE_STRING expr = new UNICODE_STRING(mask.ToUpper());
UNICODE_STRING name = new UNICODE_STRING(fileName);
if (NtDll.RtlIsNameInExpression(ref expr, ref name, true, IntPtr.Zero))
{
// MATCHES !!!
}
}
Upvotes: 4
Reputation: 2671
My version, which supports ** wild card:
static Regex FileMask2Regex(string mask)
{
var sb = new StringBuilder(mask);
// hide wildcards
sb.Replace("**", "affefa0d52e84c2db78f5510117471aa-StarStar");
sb.Replace("*", "affefa0d52e84c2db78f5510117471aa-Star");
sb.Replace("?", "affefa0d52e84c2db78f5510117471aa-Question");
sb.Replace("/", "affefa0d52e84c2db78f5510117471aa-Slash");
sb.Replace("\\", "affefa0d52e84c2db78f5510117471aa-Slash");
sb = new StringBuilder(Regex.Escape(sb.ToString()));
// unhide wildcards
sb.Replace("affefa0d52e84c2db78f5510117471aa-StarStar", @".*");
sb.Replace("affefa0d52e84c2db78f5510117471aa-Star", @"[^/\\]*");
sb.Replace("affefa0d52e84c2db78f5510117471aa-Question", @"[^/\\]");
sb.Replace("affefa0d52e84c2db78f5510117471aa-Slash", @"[/\\]");
sb.Append("$");
// allowed to have prefix
sb.Insert(0, @"^(?:.*?[/\\])?");
return new Regex(sb.ToString(), RegexOptions.IgnoreCase);
}
Upvotes: 0
Reputation: 13138
Use WildCardPattern
class from System.Management.Automation
available as NuGet package or in Windows PowerShell SDK.
WildcardPattern pattern = new WildcardPattern("my*.txt");
bool fits = pattern.IsMatch("myfile.txt");
Upvotes: 4
Reputation: 5359
Fastest version of the previously proposed function:
public static bool FitsMasks(string filePath, params string[] fileMasks)
// or
public static Regex FileMasksToRegex(params string[] fileMasks)
{
if (!_maskRegexes.ContainsKey(fileMasks))
{
StringBuilder sb = new StringBuilder("^");
bool first = true;
foreach (string fileMask in fileMasks)
{
if(first) first =false; else sb.Append("|");
sb.Append('(');
foreach (char c in fileMask)
{
switch (c)
{
case '*': sb.Append(@".*"); break;
case '?': sb.Append(@"."); break;
default:
sb.Append(Regex.Escape(c.ToString()));
break;
}
}
sb.Append(')');
}
sb.Append("$");
_maskRegexes[fileMasks] = new Regex(sb.ToString(), RegexOptions.IgnoreCase);
}
return _maskRegexes[fileMasks].IsMatch(filePath);
// or
return _maskRegexes[fileMasks];
}
static readonly Dictionary<string[], Regex> _maskRegexes = new Dictionary<string[], Regex>(/*unordered string[] comparer*/);
Notes:
Upvotes: 3
Reputation: 415880
Try this:
private bool FitsMask(string sFileName, string sFileMask)
{
Regex mask = new Regex(sFileMask.Replace(".", "[.]").Replace("*", ".*").Replace("?", "."));
return mask.IsMatch(sFileName);
}
Upvotes: 22
Reputation: 36708
I appreciate finding Joel's answer--saved me some time as well ! I did, however, have to make a few changes to make the method do what most users would expect:
private bool FitsMask(string fileName, string fileMask)
{
Regex mask = new Regex(
'^' +
fileMask
.Replace(".", "[.]")
.Replace("*", ".*")
.Replace("?", ".")
+ '$',
RegexOptions.IgnoreCase);
return mask.IsMatch(fileName);
}
For even more flexibility, here is a plug-compatible method built on top of the original. This version lets you pass multiple masks (hence the plural on the second parameter name fileMasks) separated by lines, commas, vertical bars, or spaces. I wanted it so that I could let the user put as many choices as desired in a ListBox and then select all files matching any of them. Note that some controls (like a ListBox) use CR-LF for line breaks while others (e.g. RichTextBox) use just LF--that is why both "\r\n" and "\n" show up in the Split list.
private bool FitsOneOfMultipleMasks(string fileName, string fileMasks)
{
return fileMasks
.Split(new string[] {"\r\n", "\n", ",", "|", " "},
StringSplitOptions.RemoveEmptyEntries)
.Any(fileMask => FitsMask(fileName, fileMask));
}
The earlier version of FitsMask (which I have left in for comparison) does a fair job but since we are treating it as a regular expression it will throw an exception if it is not a valid regular expression when it comes in. The solution is that we actually want any regex metacharacters in the input fileMask to be considered literals, not metacharacters. But we still need to treat period, asterisk, and question mark specially. So this improved version of FitsMask safely moves these three characters out of the way, transforms all remaining metacharacters into literals, then puts the three interesting characters back, in their "regex'ed" form.
One other minor improvement is to allow for case-independence, per standard Windows behavior.
private bool FitsMask(string fileName, string fileMask)
{
string pattern =
'^' +
Regex.Escape(fileMask.Replace(".", "__DOT__")
.Replace("*", "__STAR__")
.Replace("?", "__QM__"))
.Replace("__DOT__", "[.]")
.Replace("__STAR__", ".*")
.Replace("__QM__", ".")
+ '$';
return new Regex(pattern, RegexOptions.IgnoreCase).IsMatch(fileName);
}
I have been remiss in not updating this earlier but these references will likely be of interest to readers who have made it to this point:
Upvotes: 33
Reputation: 6553
Many people don't know that, but .NET includes an internal class, called "PatternMatcher" (under the "System.IO" namespace).
This static class contains only 1 method:
public static bool StrictMatchPattern(string expression, string name)
This method is used by .net whenever it needs to compare files with wildcard (FileSystemWatcher, GetFiles(), etc)
Using reflector, I exposed the code here. Didn't really go through it to understand how it works, but it works great,
So this is the code for anyone who doesn't want to work with the inefficient RegEx way:
public static class PatternMatcher
{
// Fields
private const char ANSI_DOS_QM = '<';
private const char ANSI_DOS_STAR = '>';
private const char DOS_DOT = '"';
private const int MATCHES_ARRAY_SIZE = 16;
// Methods
public static bool StrictMatchPattern(string expression, string name)
{
expression = expression.ToLowerInvariant();
name = name.ToLowerInvariant();
int num9;
char ch = '\0';
char ch2 = '\0';
int[] sourceArray = new int[16];
int[] numArray2 = new int[16];
bool flag = false;
if (((name == null) || (name.Length == 0)) || ((expression == null) || (expression.Length == 0)))
{
return false;
}
if (expression.Equals("*") || expression.Equals("*.*"))
{
return true;
}
if ((expression[0] == '*') && (expression.IndexOf('*', 1) == -1))
{
int length = expression.Length - 1;
if ((name.Length >= length) && (string.Compare(expression, 1, name, name.Length - length, length, StringComparison.OrdinalIgnoreCase) == 0))
{
return true;
}
}
sourceArray[0] = 0;
int num7 = 1;
int num = 0;
int num8 = expression.Length * 2;
while (!flag)
{
int num3;
if (num < name.Length)
{
ch = name[num];
num3 = 1;
num++;
}
else
{
flag = true;
if (sourceArray[num7 - 1] == num8)
{
break;
}
}
int index = 0;
int num5 = 0;
int num6 = 0;
while (index < num7)
{
int num2 = (sourceArray[index++] + 1) / 2;
num3 = 0;
Label_00F2:
if (num2 != expression.Length)
{
num2 += num3;
num9 = num2 * 2;
if (num2 == expression.Length)
{
numArray2[num5++] = num8;
}
else
{
ch2 = expression[num2];
num3 = 1;
if (num5 >= 14)
{
int num11 = numArray2.Length * 2;
int[] destinationArray = new int[num11];
Array.Copy(numArray2, destinationArray, numArray2.Length);
numArray2 = destinationArray;
destinationArray = new int[num11];
Array.Copy(sourceArray, destinationArray, sourceArray.Length);
sourceArray = destinationArray;
}
if (ch2 == '*')
{
numArray2[num5++] = num9;
numArray2[num5++] = num9 + 1;
goto Label_00F2;
}
if (ch2 == '>')
{
bool flag2 = false;
if (!flag && (ch == '.'))
{
int num13 = name.Length;
for (int i = num; i < num13; i++)
{
char ch3 = name[i];
num3 = 1;
if (ch3 == '.')
{
flag2 = true;
break;
}
}
}
if ((flag || (ch != '.')) || flag2)
{
numArray2[num5++] = num9;
numArray2[num5++] = num9 + 1;
}
else
{
numArray2[num5++] = num9 + 1;
}
goto Label_00F2;
}
num9 += num3 * 2;
switch (ch2)
{
case '<':
if (flag || (ch == '.'))
{
goto Label_00F2;
}
numArray2[num5++] = num9;
goto Label_028D;
case '"':
if (flag)
{
goto Label_00F2;
}
if (ch == '.')
{
numArray2[num5++] = num9;
goto Label_028D;
}
break;
}
if (!flag)
{
if (ch2 == '?')
{
numArray2[num5++] = num9;
}
else if (ch2 == ch)
{
numArray2[num5++] = num9;
}
}
}
}
Label_028D:
if ((index < num7) && (num6 < num5))
{
while (num6 < num5)
{
int num14 = sourceArray.Length;
while ((index < num14) && (sourceArray[index] < numArray2[num6]))
{
index++;
}
num6++;
}
}
}
if (num5 == 0)
{
return false;
}
int[] numArray4 = sourceArray;
sourceArray = numArray2;
numArray2 = numArray4;
num7 = num5;
}
num9 = sourceArray[num7 - 1];
return (num9 == num8);
}
}
Upvotes: 17
Reputation: 109035
If PowerShell is available, it has direct support for wildcard type matching (as well as Regex).
WildcardPattern pat = new WildcardPattern("a*.b*");
if (pat.IsMatch(filename)) { ... }
Upvotes: 2