vdaiep
vdaiep

Reputation: 47

B-Tree implementation on C++ has a memory leak?

I have this B-Tree implementation, which I'm testing using search() and insert(). Testing is basically this:

void function(){
    BTree b16(16);
    // do lots of inserts and searchs on b16
}
for(many_times){
    function();
}

If I understand correctly, after each iteration of function(), b16 should get destroyed. However, after ~250 iterations, I get a bad_alloc error, which means I have a memory leak.

Is there a problem with the destructors? Here's the implementation:

#include <iostream>
#include <vector>
using namespace std;
typedef unsigned int uint;

class BNode{
    private:
        uint *keys;
        int B;
        BNode **C;
        int n;
        bool leaf;

    public:
        BNode(int temp, bool bool_leaf);
        ~BNode();
        void insertNonFull(uint k);
        void splitChild(int i, BNode *y);
        void traverse();
        BNode *search(uint k);
        friend class BTree;
};

class BTree{
    private:
        BNode *root;
        int B;
    public:
        BTree(int temp);
        ~BTree();
        BNode *search(uint k);
        int search_bool(uint k);
        void insert(uint k);
};

BNode::BNode(int t1, bool leaf1) {
    B = t1;
    leaf = leaf1;
    keys = new uint[2*B - 1];
    C = new BNode *[2*B];
    n = 0;
}

BNode::~BNode(){
    delete[] keys;
    delete[] C;
}

BNode *BNode::search(uint k){
    int i = 0;
    while (i < n && k > keys[i]){
        i++;
    }
    if(keys[i] == k){
        return this;
    }
    if (leaf == true){
        return NULL;
    }
    return C[i]->search(k);
}

void BTree::insert(uint k){
    if (root == NULL) {
        root = new BNode(B, true);
        root->keys[0] = k;
        root->n = 1;
    } 
    else{
        if (root->n == 2*B - 1){
            BNode *s = new BNode(B, false);
            s->C[0] = root;
            s->splitChild(0, root);
            int i = 0;
            if (s->keys[0] < k){
                i++;
            }
            s->C[i]->insertNonFull(k);
            root = s;
        } 
        else{
            root->insertNonFull(k);
        }
    }
}

void BNode::insertNonFull(uint k) {
    int i = n - 1;
    if (leaf == true) {
        while (i>=0 && keys[i] > k) {
            keys[i+1] = keys[i];
            i--;
        }
        keys[i+1] = k;
        n = n + 1;
    } 
    else {
        while (i>=0 && keys[i] > k){
            i--;
        }
        if (C[i+1]->n == 2*B-1) {
            splitChild(i+1, C[i+1]);
            if (keys[i + 1] < k){
                i++;
            }
        }
        C[i+1]->insertNonFull(k);
    }
}

void BNode::splitChild(int i, BNode *y) {
    BNode *z = new BNode(y->B, y->leaf);
    z->n = B - 1;
    for (int j = 0; j < B - 1; j++){
        z->keys[j] = y->keys[j+B];
    }
    if (!y->leaf){
        for (int j = 0; j < B; j++)
        z->C[j] = y->C[j + B];
    }
    y->n = B - 1;
    for(int j=n; j >= i+1; j--){
        C[j+1] = C[j];
    }
    C[i + 1] = z;
    for (int j = n - 1; j >= i; j--){
        keys[j+1] = keys[j];
    }
    keys[i] = y->keys[B - 1];
    n = n + 1;
}

BTree::BTree(int temp){
    root = NULL;
    B = temp;
}

BTree::~BTree(){
    delete root;
}

BNode *BTree::search(uint k){
    return (root == NULL) ? NULL : root->search(k);
}

int BTree::search_bool(uint k){
    if(search(k) != NULL){
        return true;
    }
    else{
        return false;
    }
}

Thanks in advance.

Upvotes: 1

Views: 531

Answers (1)

sehe
sehe

Reputation: 393557

So, the simple diagnosis is that

delete[] C;

deletes only the array, not the nodes contained by it. So, (a) make sure they're properlu zero-initialized (b) delete them as well:

BNode** C    = new BNode* [2 * B] { 0 };

// in the destructor:
for (int i = 0; i < 2 * B; ++i)
    delete C[i];
delete[] C;

HOWEVER.

This doesn't work well because nodes can be split. After you moved some nodes from one node's C to another node's C, you run into double-free. So, when you split nodes, you have to make sure you set the moved-from C elemeent to NULL again:

            z->C[j] = y->C[j + B];
            y->C[j + B] = nullptr;

Now, this program runs clean under valgrind, ubsan and asan and without leaks:

Live On Coliru

#include <iostream>
#include <vector>
using uint = unsigned;

class BNode {
  private:
    int     B;
    bool    leaf;
    uint*   keys = new uint[2 * B - 1]{0};
    BNode** C    = new BNode* [2 * B] { 0 };
    int     n    = 0;

  public:
    BNode(int t1, bool leaf1) : B(t1), leaf(leaf1) {}

    ~BNode()
    {
        delete[] keys;
        for (int i = 0; i < 2 * B; ++i)
            delete C[i];
        delete[] C;
    }

    BNode const* search(uint k) const
    {
        int i = 0;
        while (i < n && k > keys[i]) {
            i++;
        }
        if (keys[i] == k) {
            return this;
        }
        return leaf ? nullptr : C[i]->search(k);
    }

    void insertNonFull(uint k)
    {
        int i = n - 1;
        if (leaf == true) {
            while (i >= 0 && keys[i] > k) {
                keys[i + 1] = keys[i];
                i--;
            }
            keys[i + 1] = k;
            n           = n + 1;
        } else {
            while (i >= 0 && keys[i] > k) {
                i--;
            }
            if (C[i + 1]->n == 2 * B - 1) {
                splitChild(i + 1, C[i + 1]);
                if (keys[i + 1] < k) {
                    i++;
                }
            }
            C[i + 1]->insertNonFull(k);
        }
    }

    void splitChild(int i, BNode* y)
    {
        BNode* z = new BNode(y->B, y->leaf);
        z->n     = B - 1;
        for (int j = 0; j < B - 1; j++) {
            z->keys[j] = y->keys[j + B];
        }
        if (!y->leaf) {
            for (int j = 0; j < B; j++) {
                z->C[j]     = y->C[j + B];
                y->C[j + B] = nullptr;
            }
        }
        y->n = B - 1;
        for (int j = n; j >= i + 1; j--) {
            C[j + 1] = C[j];
        }
        C[i + 1] = z;
        for (int j = n - 1; j >= i; j--) {
            keys[j + 1] = keys[j];
        }
        keys[i] = y->keys[B - 1];
        n       = n + 1;
    }

    friend class BTree;
};

class BTree {
  private:
    BNode* root = nullptr;
    int    B;

  public:
    BTree(int temp)
    {
        root = nullptr;
        B    = temp;
    }

    ~BTree() { delete root; }

    BNode const* search(uint k) const
    {
        return (root == nullptr) ? nullptr : root->search(k);
    }

    bool search_bool(uint k) const { return search(k) != nullptr; }

    void insert(uint k)
    {
        if (!root) {
            root          = new BNode(B, true);
            root->keys[0] = k;
            root->n       = 1;
        } else {
            if (root->n == 2 * B - 1) {
                BNode* s = new BNode(B, false);
                s->C[0]  = root;
                s->splitChild(0, root);
                int i = 0;
                if (s->keys[0] < k) {
                    i++;
                }
                s->C[i]->insertNonFull(k);
                root = s;
            } else {
                root->insertNonFull(k);
            }
        }
    }
};

int main()
{
    for (int b = 8; b < 17; ++b) {
        BTree tree(b);
        for (int i = 0; i < 100'000; ++i)
            tree.insert(rand() % 1000);
    }
}

In Closing

Kudos for getting the algorithmics largely right here. That's not easy.

As you can see in my cleanup/review I hardened a lot of stuff, mainly around initialization. This is an important habit. I can't actually rule out that not having that would have exposed other sleeping bugs.

Also note the increased const-correctness.

Also, like other said, prefer smart pointers and modern C++ techniques. It will be amazing how much less error prone just using std::array, unique_ptr and so will be. For example, the bug with moving nodes in splitChild would never have compiled because you have to explcitly move-assign unique_ptr

Bonus

Example in more modern C++:

  • without any new/delete
  • without any raw pointer
  • no more manual destructors (even no constructors required, really)
  • compiler checked move semantics, NICE!
  • statically known B factor, so everything becomes 10x more performant, while still being tunable
  • generic key (element) type, so you can now store std::string, double, whatnot
  • generic comparator, so you sort your keys in alternative orders (e.g. Btree<std::string, 16, std::greater<> > to store the keys in descending order instead of ascending).
  • No leaks!

Live On Coliru

#include <array>
#include <algorithm>
#include <memory>
#include <iostream>

template <typename T, unsigned B = 16, typename Cmp = std::less<T>>
class BTree {
  private:
    static constexpr unsigned MaxKeys     = 2 * B - 1;
    static constexpr unsigned MaxChildren = 2 * B;

    struct BNode;
    using NodePtr = std::unique_ptr<BNode>;

    struct BNode {
        bool _leaf;
        int  _n = 0;

        std::array<T, MaxKeys>           _keys{};
        std::array<NodePtr, MaxChildren> _children{};

        BNode(bool leaf1) : _leaf(leaf1) {}

        BNode const* search(T k, Cmp cmp) const
        {
            int i = 0;
            while (i < _n && cmp(_keys[i], k)) {
                i++;
            }
            if (_keys[i] == k) {
                return this;
            }
            return _leaf ? nullptr : _children[i]->search(k, cmp);
        }

        void insertNonFull(T k, Cmp cmp)
        {
            int i = _n - 1;
            if (_leaf == true) {
                while (i >= 0 && cmp(k, _keys[i])) {
                    _keys[i + 1] = _keys[i];
                    i--;
                }
                _keys[i + 1] = k;
                _n           = _n + 1;
            } else {
                while (i >= 0 && cmp(k, _keys[i])) {
                    i--;
                }
                if (_children[i + 1]->_n == MaxKeys) {
                    splitChild(i + 1, *_children[i + 1]);
                    if (cmp(_keys[i + 1], k)) {
                        i++;
                    }
                }
                _children[i + 1]->insertNonFull(k, cmp);
            }
        }

        void splitChild(int i, BNode& y)
        {
            NodePtr z = std::make_unique<BNode>(y._leaf);
            z->_n     = B - 1;
            for (unsigned j = 0; j < B - 1; j++) {
                z->_keys[j] = y._keys[j + B];
            }
            if (!y._leaf) {
                for (unsigned j = 0; j < B; j++) {
                    z->_children[j] = std::move(y._children[j + B]);
                }
            }
            y._n = B - 1;
            for (int j = _n; j >= i + 1; j--) {
                _children[j + 1] = std::move(_children[j]);
            }
            _children[i + 1] = std::move(z);
            for (int j = _n - 1; j >= i; j--) {
                _keys[j + 1] = _keys[j];
            }
            _keys[i] = y._keys[B - 1];
            _n       = _n + 1;
        }
    };

    NodePtr root = nullptr;
    Cmp     _cmp{};

  public:
    BTree(Cmp cmp = {}) : _cmp(std::move(cmp)) {}

    BNode const* search(T k) const {
        return root ? root->search(k, _cmp) : nullptr;
    }

    bool search_bool(T k) const { return search(k) != nullptr; }

    void insert(T k)
    {
        if (!root) {
            root           = std::make_unique<BNode>(true);
            root->_keys[0] = k;
            root->_n       = 1;
        } else {
            if (root->_n == MaxKeys) {
                NodePtr s       = std::make_unique<BNode>(false);
                s->splitChild(0, *root);
                s->_children[0] = std::move(root);
                int i = 0;
                if (_cmp(s->_keys[0], k)) {
                    i++;
                }
                s->_children[i]->insertNonFull(k, _cmp);
                root = std::move(s);
            } else {
                root->insertNonFull(k, _cmp);
            }
        }
    }
};

int main()
{
    using Asc  = std::less<>;
    using Desc = std::greater<>;
    BTree<double,   8,  Asc>  b8;
    BTree<int,      9,  Desc> b9;
    BTree<unsigned, 10, Asc>  b10;
    BTree<size_t,   11, Desc> b11;
    BTree<double,   12, Asc>  b12;
    BTree<int,      13, Desc> b13;
    BTree<unsigned, 14, Asc>  b14;
    BTree<size_t,   15, Desc> b15;
    BTree<int,      16> b16; // default is ascending

    for (int i = 0; i < 100'000; ++i) {
        b8.insert(rand() % 10000);
        b9.insert(rand() % 10000);
        b10.insert(rand() % 10000);
        b11.insert(rand() % 10000);
        b12.insert(rand() % 10000);
        b13.insert(rand() % 10000);
        b14.insert(rand() % 10000);
        b15.insert(rand() % 10000);
        b16.insert(rand() % 10000);
    }

    {
        struct NC {
            int v;
            bool operator==(NC const& o) const { return v == o.v; }
        };
        struct CMP {
            bool operator()(NC const& a, NC const& b) const { return a.v < b.v; }
        };

        BTree<NC, 8, CMP> b8;
        b8.insert({42});
        bool ok = b8.search_bool({42});
    }
}

Upvotes: 2

Related Questions