Meerkat
Meerkat

Reputation: 281

How to write a simple tree to file and read it back?

I have some code to create a simple tree based on pointers to nodes, and would like to write this tree (with its data) to a file and than read it back from file into memory. How can I do this ? Here is my code to create a simple tree:

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h>

struct Node 
{ 
    void *data; 

    struct Node *left; 
    struct Node *right;
};

int main(void) 
{ 
    unsigned int_size = sizeof(int);
    int data;

    /* Tree root node */
    struct Node *start = (struct Node*)malloc(sizeof(struct Node));

    data = 5;

    start->data = malloc(int_size);
    memcpy(start->data, &data, int_size);

    /* Left node of root */
    start->left = (struct Node*)malloc(sizeof(struct Node));

    data = 4;

    start->left->data = malloc(int_size);
    memcpy(start->left->data, &data, int_size);

    start->left->left = NULL;
    start->left->right = NULL;

    /* Right node of root */
    start->right = (struct Node*)malloc(sizeof(struct Node));

    data = 3;

    start->right->data = malloc(int_size);
    memcpy(start->right->data, &data, int_size);

    start->right->left = NULL;
    start->right->right = NULL;

    /* Print data */

    printf("%d\n",*(int*)(start->data));
    printf("%d\n",*(int*)(start->left->data));
    printf("%d\n",*(int*)(start->right->data));

    return 0; 
}

Upvotes: 0

Views: 225

Answers (2)

Serge Ballesta
Serge Ballesta

Reputation: 149135

There are two common ways for serializing objects having pointers on other objects. First one is to replace the pointer with an offset in the file, second one is to use a simple index. In fact, the offset way is mainly used when the data is used directly from the file (database files for example) while the index way is more used for simple storage.

Here a simple way would be to store the nodes into a file recursively as left_node - right_node - main_node. That way when you store a node you already know the index in the file of its left and right children. A possible implementation is then:

static size_t do_save(FILE *fd, struct Node *node, unsigned data_size, int curr) {
    size_t left=0, right=0;
    if (node->left != NULL) {   // first left sub_hierarchy
        left = do_save(fd, node->left, data_size, curr);
        curr = left;
    }
    if (node->left != NULL) {   // then right one
        right = do_save(fd, node->right, data_size, curr);
        curr = right;
    }
    fwrite(&left, sizeof(left), 1, fd);       // store index of left child
    fwrite(&right, sizeof(right), 1, fd);     // then index of right child
    fwrite(node->data, data_size, 1, fd);     // then the data
    return curr + 1;                          // and return current index
}
size_t save(FILE *fd, struct Node *root, unsigned data_size) {
    size_t nb = do_save(fd, root, data_size, 0);
    return nb;
}

Here, an index of 0 means a null pointer, and a non null index is a one based index in the file

To deserialize, as I assume that you want each node to be individually allocated, I would use a temporary array of pointers to keep the actual addresses of the allocated nodes:

struct Node* load(FILE *fd, unsigned data_size) {
    size_t nb_elts, left, right;
    fseek(fd, 0, SEEK_END);                // computer number of nodes in the file
    nb_elts = ftell(fd) / (data_size + 2*sizeof(size_t));
    struct Node** ptx = malloc(nb_elts * sizeof(*ptx));  // allocate array of pointers
    fseek(fd, 0, SEEK_SET);
    for(size_t i=0; i<nb_elts; i++) {                    // loop reading nodes
        ptx[i] = malloc(sizeof(struct Node));            // allocate node and data
        ptx[i]->data = malloc(data_size);
        fread(&left, sizeof(size_t), 1, fd);             // extract child indices
        fread(&right, sizeof(size_t), 1, fd);
        fread(ptx[i]->data, data_size, 1, fd);                 // read data
        ptx[i]->left = (left == 0) ? NULL : ptx[left - 1];     // convert indices to pointers
        ptx[i]->right = (right == 0) ? NULL : ptx[right - 1];
    }
    struct Node *last = ptx[nb_elts - 1];
    free(ptx);                                     // free the temporary array
    return last;                                   // and return the root node
}

Upvotes: 1

bruno
bruno

Reputation: 32596

Here a proposal, I use preprocessor macro TYPE to be able to change type and separated functions to read/write an element of that type, I also use function mk to easily create node rather than to duplicate the code as you did for each node.

For the serialization I use a very simple way

  • 'e' indicates an empty node
  • 'n' indicate a non empty node, then I write the value then the left then the right node

I also added pr to easily print a tree as a debug function.

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h>

#define TYPE int

struct Node 
{ 
  TYPE data; 

  struct Node *left; 
  struct Node *right;
};

/* make a new Node */
struct Node * mk(TYPE v, struct Node * l, struct Node * r)
{
  struct Node * n = malloc(sizeof(struct Node));

  if (n == NULL) {
    fprintf(stderr, "not enough memory\n");
    exit(-1);
  }

  n->data = v;
  n->left = l;
  n->right = r;

  return n;
}

/* write/read data */
void wrData(TYPE v, FILE * fp)
{
  fprintf(fp, "%d", v); /* dedicated to int */
}

TYPE rdData(FILE * fp)
{
  TYPE v;

  fscanf(fp, "%d", &v); /* dedicated to int */
  return v;
}

/* serialize a tree */
void wr(struct Node * n, FILE * fp)
{
  if (n == NULL)
    fputc('e', fp);
  else {
    fputc('n', fp);
    wrData(n->data, fp);
    wr(n->left, fp);
    wr(n->right, fp);
  }
}

/* unserialize a tree */
struct Node * rd(FILE * fp)
{
  switch (fgetc(fp)) {
  case 'e':
    return NULL;
  case 'n':
    {
      TYPE v = rdData(fp);
      struct Node * l = rd(fp);

      return mk(v, l, rd(fp));
    }
  default:
    fprintf(stderr, "invalid file");
    exit(-1);
  }
}

/* for debug, show tree */
void pr(struct Node * t)
{
  if (t == NULL)
    printf("()");
  else {
    putchar('(');
    pr(t->left);
    putchar(' ');
    wrData(t->data, stdout);
    putchar(' ');
    pr(t->right);
    putchar(')');
  }
}

/* free a tree */
void del(struct Node * t)
{
  if (t != NULL) {
    del(t->left);
    del(t->right);
    free(t);
  }
}

int main() 
{ 
    /* Tree root node */
    struct Node *start = mk(5, mk(4, NULL, NULL), mk(3, NULL, NULL));

    /* show it */
    pr(start);
    putchar('\n');

    /* serialize */
    FILE * fp;

    if ((fp = fopen("/tmp/t", "w")) == 0) {
      fprintf(stderr, " cannot open /tmp/t to write");
      exit(-1);
    }
    wr(start, fp);
    fclose(fp);

    /* free tree */
    del(start);

    /* unserialize */
    if ((fp = fopen("/tmp/t", "r")) == 0) {
      fprintf(stderr, " cannot open /tmp/t to read");
      exit(-1);
    }
    start = rd(fp);
    fclose(fp);

    /* show it */
    pr(start);
    putchar('\n');

    /* free it */
    del(start);

    return 0; 
}

Compilation and execution :

/tmp % gcc -pedantic -Wextra -Wall t.c
/tmp % ./a.out
((() 4 ()) 5 (() 3 ()))
((() 4 ()) 5 (() 3 ()))
/tmp % cat t ; echo
n5n4een3ee

Execution under valgrind :

/tmp % valgrind ./a.out
==19907== Memcheck, a memory error detector
==19907== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==19907== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==19907== Command: ./a.out
==19907== 
((() 4 ()) 5 (() 3 ()))
((() 4 ()) 5 (() 3 ()))
==19907== 
==19907== HEAP SUMMARY:
==19907==     in use at exit: 0 bytes in 0 blocks
==19907==   total heap usage: 8 allocs, 8 frees, 1,280 bytes allocated
==19907== 
==19907== All heap blocks were freed -- no leaks are possible
==19907== 
==19907== For counts of detected and suppressed errors, rerun with: -v
==19907== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 6 from 6)

Upvotes: 1

Related Questions