woshidashen
woshidashen

Reputation: 351

C Programming output text file

Hi I just started programming and have a beginner question: I would like to better understand how the fprint()function work because sometimes when I create a text file with it, I realized there are various types of file eg.(read only, append and write). And when I want to write on the file I created with a loop, the order in which the content is added seem to change when I do

file = fopen(name,"a+");

And I cannot add all the content in the loop if it is

file = fopen(name,"w");

So what's the most convenient way to create a text file? Thank you!

So say I want to write all the words from a trie tree, the order in the text file would be different than just replacing fprint() with print() I have a global node for the tree and a node pointer pointing to it for other functions

struct node *root = (struct node *)malloc(sizeof(struct node));

And the function is:

void printResult(struct node* r){
struct node *p = r;
FILE *file;
sprintf(name, "man%d.txt", num);
file = fopen(name,"a+");
int i=0;
int temp;
while(i!=26){
 if(p->child[i]==NULL){
  i++;
  continue;}
 if(p->child[i]->isword==1&&p->child[i]->leaf==1){
  word[k]=i+'a';
  word[k+1]='\0';
  fprintf(file,"%s", word);fprintf(file,"%s"," " );
  fprintf(file,"%d", p->child[i]->occurrence);fprintf(file,"%s"," " );
  fprintf(file,"%d\n", p->child[i]->super);
  i++;
  continue;} 
 if(p->child[i]->isword==0){
  word[k]=i+'a';
  temp=k;
  k++;
  p=p->child[i];
  printResult(p);
  k=temp;
  p=p->parent;
  }
 if(p->child[i]->isword==1&&p->child[i]->leaf==0){
  word[k]=i+'a';
  word[k+1]='\0';
  temp=k;
  k++;
  p->child[i]->isword=0;
  fprintf(file,"%s", word);fprintf(file,"%s"," " );
  fprintf(file,"%d", p->child[i]->occurrence);fprintf(file,"%s"," " );
  fprintf(file,"%d\n", p->child[i]->super);
  p=p->child[i];
  printResult(p);
  k=temp;
  p=p->parent;
}
 i++;
}fclose(file);
}

And the node:

struct node{   
struct node * parent;
int noempty;
int isword;
int super;
int occurrence;
int leaf;
struct node * child[26];
};

Lastly it's the insert function

struct node* insert(struct node *root,char *c){
int i=0;
struct node *temp=root;
int l=length(c);
while(i!=l){
int index=c[i]-'a';
if(temp->child[index]==NULL){
//New Node
struct node *n=(struct node *)malloc(sizeof(struct node)); 
n->parent=temp;
temp->child[index]=n;
temp->noempty=1;}
//Node Exist
if(i!=l&&temp->leaf==1){temp->leaf=0;}
temp=temp->child[index];
i++;}
if(temp->noempty==0){
temp->leaf=1;}
temp->isword=1;
return root;
 };

Upvotes: 1

Views: 833

Answers (2)

Jonathan Leffler
Jonathan Leffler

Reputation: 755074

After discussion in chat, we came up with:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define length(x) strlen(x)

struct node
{
    struct node *parent;
    int noempty;
    int isword;
    int super;
    int occurrence;
    int leaf;
    struct node *child[26];
};

static struct node *root = 0;
static char word[1024];
static int k = 0;

static
void printResult(FILE * file, struct node *r)
{
    struct node *p = r;
    int i = 0;
    int temp;
    while (i != 26)
    {
        if (p->child[i] == NULL)
        {
            i++;
            continue;
        }
        if (p->child[i]->isword == 1 && p->child[i]->leaf == 1)
        {
            word[k] = i + 'a';
            word[k + 1] = '\0';
            fprintf(file, "%s", word);
            fprintf(file, "%s", " ");
            fprintf(file, "%d", p->child[i]->occurrence);
            fprintf(file, "%s", " ");
            fprintf(file, "%d\n", p->child[i]->super);
            i++;
            continue;
        }
        if (p->child[i]->isword == 0)
        {
            word[k] = i + 'a';
            temp = k;
            k++;
            p = p->child[i];
            printResult(file, p);
            k = temp;
            p = p->parent;
        }
        if (p->child[i]->isword == 1 && p->child[i]->leaf == 0)
        {
            word[k] = i + 'a';
            word[k + 1] = '\0';
            temp = k;
            k++;
            p->child[i]->isword = 0;
            fprintf(file, "%s", word);
            fprintf(file, "%s", " ");
            fprintf(file, "%d", p->child[i]->occurrence);
            fprintf(file, "%s", " ");
            fprintf(file, "%d\n", p->child[i]->super);
            p = p->child[i];
            printResult(file, p);
            k = temp;
            p = p->parent;
        }
        i++;
    }
}

static
struct node *insert(struct node *root, char *c)
{
    int i = 0;
    struct node *temp = root;
    int l = length(c);
    while (i != l)
    {
        int index = c[i] - 'a';
        if (temp->child[index] == NULL)
        {
// New Node
            struct node *n = (struct node *)malloc(sizeof(struct node));
            n->parent = temp;
            temp->child[index] = n;
            temp->noempty = 1;
        }
// Node Exist
        if (i != l && temp->leaf == 1)
        {
            temp->leaf = 0;
        }
        temp = temp->child[index];
        i++;
    }
    if (temp->noempty == 0)
    {
        temp->leaf = 1;
    }
    temp->isword = 1;
    return root;
}

int main(void)
{
    root = (struct node *)malloc(sizeof(struct node));
    memset(root, '\0', sizeof(*root));

    char line[1024];

    while (fgets(line, sizeof(line), stdin) != 0)
    {
        line[strcspn(line, "\n")] = '\0';
        printf("[%s]\n", line);
        root = insert(root, line);
    }

    FILE *file;
    char name[1024];
    int num = 0;
    sprintf(name, "man%d.txt", num);
    file = fopen(name, "w");

    printResult(file, root);

    fclose(file);

    return 0;
}

Given input file:

elephant
rhinoceros
mouse

the output in man0.txt was:

elephant 0 0
mouse 0 0
rhinoceros 0 0

This isn't dreadfully exciting; each word starts at its own node.

Similarly, given the input:

boo
book
booking
john
tex
text

the output was:

boo 0 0
book 0 0
booking 0 0
john 0 0
tex 0 0
text 0 0

It seems that the task specified that printResults() could take no arguments. That makes life extraordinarily difficult with a recursive function. The code shown passes a node to the function — and also the file stream to write to. It uses "w" to open the file rather than "a+". Since the file was never read, the + was not needed; using "a" instead of "w" means that the information was appended to the file from the previous run.

There are too many globals; there were more when I started. k should not still be a global, but I've not removed it yet.

Upvotes: 1

autistic
autistic

Reputation: 15652

what's the most convenient way to create a text file?

I want to write all the words from a trie tree, the order in the text file would be different than just replacing fprint() with print() I have a global node for the tree and a node pointer pointing to it for other functions

In order to most conveniently write a trie to disk, it'll help massively if you can refer to the entire trie as an array. i.e. Don't just have a root that leads to separate allocations; have everything refer back to the same array! Then it is as simple as writing nodes straight from the array to the disk, providing you don't need to translate integers for other implementations, or translating integers for other implementations otherwise... and you won't need to worry about the differences between a+ and w.

There are other benefits to using a single allocation:

  • One allocation means one free, which doubly means your code will be much faster. (Actually, no allocation is necessary as you'll see below)
  • Cache consistency; a single allocation is likely to be cached better as one entry rather than multiple allocations. This will lead to less cache misses in a portable way, and less non-portable manual optimisation required as a result. Again, your code will be faster.
  • How many C-standard functions actually allocate memory behind the scenes? None, except for the memory allocation functions, of course... How does this benefit you, as the user of those functions? If you think about it, you can write your code malloc-free, so the caller decides what type of allocation to use (be it automatic storage duration, static storage duration, malloc/realloc) similar to how scanf, strcat, sprintf, etc lets you choose... This makes things much easier to test, for example you can check my PATRICIA trie test code and my PATRICIA library code to see I don't need to test using malloc and free, which makes testing look cleaner obviously... Not to mention, there are usecases where malloc and free aren't the best choices!

The last point is particularly useful, as it allows the caller to store data within the trie nodes as an extension, rather than pointing to the data externally as you've done. This makes it even easier to write your file, as you could technically just dump the entire array to the file in one foul swoop if all of the information is there. Food for thought: Do you suppose a disk-based trie using RAM as a level of cache might be doable?

Upvotes: 1

Related Questions