Reputation: 10025
I want to implement a list with fixed size, which contains exactly N values in order. I wrote some code, and it work sometimes, but generally it produces an incorrect result. I tried to change it multiple times but without any success.
For my little tests everything works fine but when I'm trying to test it on big
data (not really big, but which is generated and cannot be checked manually) i get an incorrect result.
What can be wrong here? I even tried to wrap Add
method with lock
but it didn't help, so it's not a multithreading case:
public class LimitedSizeSortedList<T> : IEnumerable<T>
{
private readonly IComparer<T> _comparer;
private readonly T[] _items;
private readonly Dictionary<T, int> _itemsIndices;
public int Size => _items.Length;
public int Count { get; private set; }
public T Minimal => _items[Count - 1];
public LimitedSizeSortedList(IComparer<T> comparer, IEqualityComparer<T> eqComparer, int size)
{
if (size < 1)
throw new ArgumentException();
_comparer = comparer;
_items = new T[size];
_itemsIndices = new Dictionary<T, int>(eqComparer);
}
public void Add(T item)
{
if (Count < Size)
Count++;
else if (IsBigger(Minimal, item))
return;
int alreadyExistingIndex;
if (_itemsIndices.TryGetValue(item, out alreadyExistingIndex)) // already present in collection so we replace it
{
_items[alreadyExistingIndex] = item;
return;
}
int index = FindInsertIndex(item);
_itemsIndices.Remove(Minimal);
for (int i = _items.Length - 1; i > index; i--)
{
MoveToNewIndex(_items[i - 1], i);
}
MoveToNewIndex(item, index);
}
private void MoveToNewIndex(T item, int index)
{
_items[index] = item;
_itemsIndices[item] = index;
}
private int FindInsertIndex(T item)
{
int lo = 0, hi = Count - 1;
while (lo < hi)
{
int m = lo + (hi - lo)/2; // (hi + lo)/2 без переполнения
if (IsBigger(_items[m], item))
lo = m + 1;
else
hi = m - 1;
}
if (IsBigger(_items[lo], item))
lo++;
return lo;
}
private bool IsBigger(T x, T y)
{
return _comparer.Compare(x, y) > 0;
}
public IEnumerator<T> GetEnumerator()
{
for (int i = 0; i < Count; i++)
{
yield return _items[i];
}
}
IEnumerator IEnumerable.GetEnumerator()
{
return _items.GetEnumerator();
}
public void Clear()
{
Count = 0;
_itemsIndices.Clear();
}
}
Test code:
[TestClass]
public class LimitedSizeSortedListTest
{
[TestMethod]
public void Insert()
{
int[] items = {4, 1, 7, 2, 3};
var result = FromArray(items);
bool sequenceEqual = result.SequenceEqual(items.OrderByDescending(x => x));
Assert.IsTrue(sequenceEqual);
}
[TestMethod]
public void Clear()
{
int[] items = { 4, 1, 7, 2, 3 };
var result = FromArray(items);
result.Clear();
bool sequenceEqual = result.SequenceEqual(Enumerable.Empty<int>());
Assert.IsTrue(sequenceEqual);
}
[TestMethod]
public void TestMultiple()
{
KeyValuePair<char, int>[] items =
{
new KeyValuePair<char, int>('C', 1032508),
new KeyValuePair<char, int>('E', 1609137),
new KeyValuePair<char, int>('D', 1236174),
new KeyValuePair<char, int>('_', 568439),
new KeyValuePair<char, int>('\\', 287371),
new KeyValuePair<char, int>('[', 1006805),
new KeyValuePair<char, int>('A', 680143),
new KeyValuePair<char, int>('L', 155975),
new KeyValuePair<char, int>('I', 974892),
new KeyValuePair<char, int>('F', 1197310),
new KeyValuePair<char, int>('M', 1201940),
new KeyValuePair<char, int>('B', 1820738),
new KeyValuePair<char, int>('N', 640575),
new KeyValuePair<char, int>('S', 1221010),
new KeyValuePair<char, int>('R', 926485),
new KeyValuePair<char, int>('U', 1742070),
new KeyValuePair<char, int>('P', 602809),
new KeyValuePair<char, int>('X', 886691),
new KeyValuePair<char, int>('Y', 3020863),
new KeyValuePair<char, int>('W', 1091417),
new KeyValuePair<char, int>('Z', 834877),
new KeyValuePair<char, int>('V', 82777),
new KeyValuePair<char, int>('H', 920902),
new KeyValuePair<char, int>('O', 288008),
new KeyValuePair<char, int>('G', 616626)
};
var result = new LimitedSizeSortedList<KeyValuePair<char, int>>(FrequencyComparer.Instance, FrequencyComparer.Instance, 5);
foreach (var pair in items)
{
result.Add(pair);
}
var expected = items.OrderByDescending(x => x.Value).Take(5).ToArray();
bool sequenceEqual = result.SequenceEqual(expected);
Assert.IsTrue(sequenceEqual);
}
private static LimitedSizeSortedList<T> FromArray<T>(T[] items) where T: IComparable<T>
{
var list1 = XLimitedSizeSortedList.FromComparable<T>(items.Length);
foreach (T item in items)
{
list1.Add(item);
}
return list1;
}
}
public class FrequencyComparer : IComparer<KeyValuePair<char, int>>, IEqualityComparer<KeyValuePair<char, int>>
{
public int Compare(KeyValuePair<char, int> x, KeyValuePair<char, int> y)
{
return x.Value.CompareTo(y.Value);
}
public bool Equals(KeyValuePair<char, int> x, KeyValuePair<char, int> y)
{
return x.Key == y.Key;
}
public int GetHashCode(KeyValuePair<char, int> obj)
{
return obj.Key.GetHashCode();
}
public static FrequencyComparer Instance { get; } = new FrequencyComparer();
}
Upvotes: 0
Views: 628
Reputation: 109762
Assuming you don't want duplicates (from inspecting your code), I think you would be better off using a SortedSet<T>
to implement this functionality.
That would look something like this:
public sealed class LimitedSizeSortedList<T>: IEnumerable<T>
{
readonly int _size;
readonly SortedSet<T> _items;
public LimitedSizeSortedList(IComparer<T> comparer, int size)
{
if (size < 1)
throw new ArgumentException();
_size = size;
_items = new SortedSet<T>(comparer);
}
public void Add(T item)
{
if (_items.Contains(item))
return;
if (_items.Count < _size)
{
_items.Add(item);
return;
}
if (_items.Comparer.Compare(item, _items.Min) <= 0)
return;
_items.Remove(_items.Min);
_items.Add(item);
}
public void Clear()
{
_items.Clear();
}
public IEnumerator<T> GetEnumerator()
{
return _items.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
Upvotes: 1