Reputation: 2952
I am trying to take file names that look like:
MAX_1.01.01.03.pdf look like Max_1010103.pdf.
Currently I have this code:
public void Sanitizer(List<string> paths)
{
string regPattern = (@"[~#&!%+{}]+");
string replacement = " ";
Regex regExPattern = new Regex(regPattern);
Regex regExPattern2 = new Regex(@"\s{2,}");
Regex regExPattern3 = new Regex(@"\.(?=.*\.)");
string replace = "";
var filesCount = new Dictionary<string, int>();
dataGridView1.Rows.Clear();
try
{
foreach (string files2 in paths)
{
string filenameOnly = System.IO.Path.GetFileName(files2);
string pathOnly = System.IO.Path.GetDirectoryName(files2);
string sanitizedFileName = regExPattern.Replace(filenameOnly, replacement);
sanitizedFileName = regExPattern2.Replace(sanitizedFileName, replacement);
string sanitized = System.IO.Path.Combine(pathOnly, sanitizedFileName);
if (!System.IO.File.Exists(sanitized))
{
DataGridViewRow clean = new DataGridViewRow();
clean.CreateCells(dataGridView1);
clean.Cells[0].Value = pathOnly;
clean.Cells[1].Value = filenameOnly;
clean.Cells[2].Value = sanitizedFileName;
dataGridView1.Rows.Add(clean);
System.IO.File.Move(files2, sanitized);
}
else
{
if (filesCount.ContainsKey(sanitized))
{
filesCount[sanitized]++;
}
else
{
filesCount.Add(sanitized, 1);
string newFileName = String.Format("{0}{1}{2}",
System.IO.Path.GetFileNameWithoutExtension(sanitized),
filesCount[sanitized].ToString(),
System.IO.Path.GetExtension(sanitized));
string newFilePath = System.IO.Path.Combine(
System.IO.Path.GetDirectoryName(sanitized), newFileName);
newFileName = regExPattern2.Replace(newFileName, replacement);
System.IO.File.Move(files2, newFilePath);
sanitized = newFileName;
DataGridViewRow clean = new DataGridViewRow();
clean.CreateCells(dataGridView1);
clean.Cells[0].Value = pathOnly;
clean.Cells[1].Value = filenameOnly;
clean.Cells[2].Value = newFileName;
dataGridView1.Rows.Add(clean);
}
//HERE IS WHERE I AM TRYING TO GET RID OF DOUBLE PERIODS//
if (regExPattern3.IsMatch(files2))
{
string filewithDoublePName = System.IO.Path.GetFileName(files2);
string doublepPath = System.IO.Path.GetDirectoryName(files2);
string name = System.IO.Path.GetFileNameWithoutExtension(files2);
string newName = name.Replace(".", "");
string filesDir = System.IO.Path.GetDirectoryName(files2);
string fileExt = System.IO.Path.GetExtension(files2);
string newPath = System.IO.Path.Combine(filesDir, newName+fileExt);
DataGridViewRow clean = new DataGridViewRow();
clean.CreateCells(dataGridView1);
clean.Cells[0].Value =doublepPath;
clean.Cells[1].Value = filewithDoublePName;
clean.Cells[2].Value = newName;
dataGridView1.Rows.Add(clean);
}
}
}
catch (Exception e)
{
throw;
//errors.Write(e);
}
}
I ran this and instead of getting rid of ALL period (minus the period before a file extension), I get results like: MAX_1.0103.pdf
If there are multiple periods like: Test....1.txt
I get these results: Test...1.txt
It seems to only get rid of ONE period. I am pretty new to Regular Expressions and it is a REQUIREMENT for this project. Can anybody help me figure out what I'm doing wrong here?
Thanks!
EDITED to show changes made in code
Upvotes: 4
Views: 5756
Reputation: 1603
This regex will remove all periods except for the period before the 3 or 4 letter extension.
string filename = "test.test......t.test.pdf";
string newFilename = new Regex(@"\.(?!(\w{3,4}$))").Replace(filename, "");
If you want it to work with 2 letter extensions just change the {3,4} to {2,4}
Good luck!
Upvotes: 0
Reputation: 3856
Something like this, maybe:
string fileName = "MAX_1.01.01.03.pdf";
fileName = fileName.Substring(0, 1).ToUpper() + fileName.Substring(1).ToLower();
fileName = fileName.Replace(".", "");
Upvotes: -1
Reputation: 137138
Why not use the Path
class:
string name = Path.GetFileNameWithoutExtension(yourPath);
string newName = name.Replace(".", "");
string newPath = Path.Combine(Path.GetDirectoryName(yourPath),
newName + Path.GetExtension(yourPath));
Each step separated for clarity.
So for the input
"C:\Users\Fred\MAX_1.01.01.03.pdf"
I get the output
"C:\Users\Fred\MAX_1010103.pdf"
which is what I'd expect.
If I supply:
"C:\Users\Fred.Flintstone\MAX_1.01.01.03.pdf"
I get:
"C:\Users\Fred.Flintstone\MAX_1010103.pdf"
again what I expect as I'm not processing the "DirectoryName" part of the path.
NOTE I missed the bit about RegEx being a REQUIREMENT. Still sticking by this answer though.
Upvotes: 12
Reputation: 128317
Say, didn't you already ask this question?
Anyway, I stick by my original answer:
string RemovePeriodsFromFilename(string fullPath)
{
string dir = Path.GetDirectoryName(fullPath);
string filename = Path.GetFileNameWithoutExtension(fullPath);
string sanitized = filename.Replace(".", string.Empty);
string ext = Path.GetExtension(fullPath);
return Path.Combine(dir, sanitized + ext);
}
Now, since you specified that you must use RegEx, I suppose you could always force it in there:
string RemovePeriodsFromFilename(string fullPath)
{
string dir = Path.GetDirectoryName(fullPath);
string filename = Path.GetFileNameWithoutExtension(fullPath);
// Look! Now the solution uses RegEx!
string sanitized = Regex.Replace(filename, @"\.", string.Empty);
string ext = Path.GetExtension(fullPath);
return Path.Combine(dir, sanitized + ext);
}
Note: This is basically the exact same approach that ChrisF suggested.
Whoever is requiring that you use RegEx, I suggest you request an explanation why.
Upvotes: 2
Reputation: 5501
I'd forgo regexes all together, do it like this:
Upvotes: 0