Reputation: 11010
How can I simplify this? I am trying to get the count of Excel files from a directory and subdirectories based on their size. I have at least 10 different groupings.
var queryList2Only = from i in di.GetFiles("*.xls", SearchOption.TopDirectoryOnly)
.Where(f => f.Length <= 5120)
select i.Length;
if (queryList2Only.Any())
{
dest.WriteLine("Excel File <= 5 KB");
dest.WriteLine(queryList2Only.Count());
dest.WriteLine("");
}
var queryList3Only = from i in di.GetFiles("*.xls", SearchOption.TopDirectoryOnly)
.Where(f => f.Length > 5120 && f.Length <= 10240)
select i.Length;
if (queryList3Only.Any())
{
dest.WriteLine("Excel File > 5 KB and <= 10 KB");
dest.WriteLine(queryList3Only.Count());
dest.WriteLine("");
EDIT: I need this
<= 5 KB,> 5 KB and <= 10 KB,> 10 KB and <= 20 KB,> 20 KB and <= 100 KB,> 100 KB and <= 1000 KB,> 1000 KB and <=5 MB,> 5 MB and <=10 MB,> 10 MB and <=20 MB,> 20 MB and <=50 MB,> 50 MB and <=100 MB
private void button1_Click(object sender, EventArgs e)
{
DirectoryInfo Folder = new DirectoryInfo(textBox1.Text);
var _logFolderPath4 = Path.Combine(textBox1.Text.Trim(), "log");
if (Folder.Exists)
if (!Directory.Exists(_logFolderPath4))
Directory.CreateDirectory(_logFolderPath4);
DirectoryInfo di = new DirectoryInfo(@"D:\Material\");
bool time = false;
using (var dest = File.AppendText(Path.Combine(_logFolderPath4, "Excel.txt")))
{
if (!time)
{
dest.WriteLine("---------------------" + DateTime.Now + "---------------------");
dest.WriteLine("");
time = true;
}
CountFiles(dest, di, @"*.txt");
}
}
Upvotes: 1
Views: 672
Reputation: 134491
You don't necessarily need LINQ for this. It would be more efficient for you to just loop through it. Though Rup's solution is a great use of LINQ here.
Here's a more complete version tailored for exactly what you want to do.
// count it
CountFiles(dest, di, @"*.xls");
public void CountFiles(TextWriter writer, DirectoryInfo directory, string searchPattern)
{
var counter = new FileGroupCounter
{
{ 5, Multiplier.K },
{ 10, Multiplier.K },
{ 20, Multiplier.K },
{ 100, Multiplier.K },
{ 1000, Multiplier.K },
{ 5, Multiplier.M },
{ 10, Multiplier.M },
{ 20, Multiplier.M },
{ 50, Multiplier.M },
{ 100, Multiplier.M },
};
foreach (var file in directory.EnumerateFiles(searchPattern, SearchOption.AllDirectories))
// or use GetFiles() if you're not targeting .NET 4.0
{
counter.CountFile(file);
}
foreach (var result in counter)
{
writer.WriteLine("Excel File " + result);
writer.WriteLine(result.Count);
writer.WriteLine();
}
}
// and the supporting classes
public enum Multiplier : long
{
K = 1 << 10,
M = 1 << 20,
G = 1 << 30,
T = 1 << 40,
}
public class FileGroupCounter : IEnumerable<FileGroupCounter.Result>
{
public ReadOnlyCollection<long> Limits { get { return roLimits; } }
public ReadOnlyCollection<int> Counts { get { return roCounts; } }
public ReadOnlyCollection<Multiplier> Multipliers { get { return roMultipliers; } }
public FileGroupCounter()
{
limits = new List<long>();
counts = new List<int>();
multipliers = new List<Multiplier>();
roLimits= limits.AsReadOnly();
roCounts= counts.AsReadOnly();
roMultipliers= multipliers.AsReadOnly();
}
private List<long> limits;
private List<int> counts;
private List<Multiplier> multipliers;
private ReadOnlyCollection<long> roLimits;
private ReadOnlyCollection<int> roCounts;
private ReadOnlyCollection<Multiplier> roMultipliers;
private long CalculateLength(int index)
{
return limits[index] * (long)multipliers[index];
}
public void Add(long limit, Multiplier multiplier)
{
int lastIndex = limits.Count - 1;
if (lastIndex >= 0 && limit * (long)multiplier <= CalculateLength(lastIndex))
throw new ArgumentOutOfRangeException("limit, multiplier", "must be added in increasing order");
limits.Add(limit);
counts.Add(0);
multipliers.Add(multiplier);
}
public bool CountFile(FileInfo file)
{
if (file == null)
throw new ArgumentNullException("file");
for (int i = 0; i < limits.Count; i++)
{
if (file.Length <= CalculateLength(i))
{
counts[i]++;
return true;
}
}
return false;
}
public IEnumerator<Result> GetEnumerator()
{
for (int i = 0; i < limits.Count; i++)
{
if (counts[i] > 0)
yield return new Result(this, i);
}
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); }
public class Result
{
public long Limit { get { return counter.limits[index]; } }
public int Count { get { return counter.counts[index]; } }
public Multiplier Multiplier { get { return counter.multipliers[index]; } }
internal Result(FileGroupCounter counter, int index)
{
this.counter = counter;
this.index = index;
}
private FileGroupCounter counter;
private int index;
public override string ToString()
{
if (index > 0)
return String.Format("> {0} {1}B and <= {2} {3}B",
counter.limits[index - 1], counter.multipliers[index - 1],
counter.limits[index], counter.multipliers[index]);
else
return String.Format("<= {0} {1}B",
counter.limits[index], counter.multipliers[index]);
}
}
}
Upvotes: 2
Reputation: 34418
I think only real optimisation here would be to ensure you only call di.GetFiles("*.xls", SearchOption.TopDirectoryOnly)
once - since that will actually hit the filesystem rather than being lazily executed like most LINQ. Sure, the filesystem will cache the results of this but can't be slower to stay in memory and reuse the list.
Once you're in memory Jeff might be right - just count yourself - thought that doesn't seem very elegant :-) and it probably doesn't make a lot of difference here unless you're dealing with huge numbers. You just want to try and keep the number of allocations / reallocations down. With as much LINQ as I can cram in
var files = di.GetFiles("*.xls", SearchOption.TopDirectoryOnly);
// map to a list of numbers, 0 = up to 5K, 1 = 5-10, etc.
var sizes = files.Select(f => (f.Length / 5120));
var countsBySize = sizes.GroupBy(s => s)
.Select(g => new { Size = g.Key, Count = g.Count() })
.OrderBy(s => s.Size);
var results = countBySize.ToList();
which returns a list of 5K buckets and count of files in each bucket. If you're just going to foreach this then don't do the final ToList. If you wanted the individual files in each bucket you should group by the (f.Length / 5120) without selecting it first.
Upvotes: 2
Reputation: 164331
You need to have your ranges in a collection, and enumerate over them. Here is an example that should get you going - the sizes array contains the steps, of course you should choose the steps that makes sense to your application:
int[] sizes = Enumerable.Range(0,10).Select(n => (int)Math.Pow(2,n + 8)).ToArray();
int lower = 0;
foreach(var size in sizes)
{
var files = di.GetFiles("*.*").Where(f => f.Length >= lower && f.Length < size);
Console.WriteLine("Between {0} and {1} bytes:", lower,size);
foreach(var file in files)
Console.WriteLine("\t{0}",file);
lower = size;
}
Upvotes: 4