Siddharth Ubana
Siddharth Ubana

Reputation: 31

Getting segmentation fault using malloc for 2D array

I had initialized a 2D array using malloc for adjacency matrix of a large graph and then initializing each index with 0 or 1 depending upon the edge list.But I m getting a segmentation fault. Here is my code.

#include <stdio.h>
#include <stdlib.h>
int MAX = 50000;
void clustering(int **adj);

int main()
{
  int i, j, k;  
  FILE *ptr_file1;
  int **adj;

  adj = (int **)malloc(sizeof(int *)*MAX);
  for(i=0;i<MAX;++i)
  adj[i] = (int *)malloc(sizeof(int)*MAX);

  struct adjacency
  {
     int node1;
     int node2;
  };
  struct adjacency a;

  ptr_file1 = fopen("Email-Enron.txt","r"); //Opening file containing edgelist of approx  37000 nodes.

  if (!ptr_file1)
    return 1;

  while(fscanf(ptr_file1,"%d %d",&a.node1, &a.node2)!=EOF)
  {
     adj[a.node1][a.node2] = 1;                   //Getting segmentation fault here   
     adj[a.node2][a.node1] = 1; 

  printf("adj[%d][%d] = %d   adj[%d][%d] = %d\n",a.node1,a.node2,adj[a.node1][a.node2],a.node2,a.node1,adj[a.node2][a.node1]);  
  }
  clustering(adj);
  return (0);
 }

Here is my output

......
......
adj[85][119] = 1   adj[119][85] = 1
adj[85][154] = 1   adj[154][85] = 1
adj[85][200] = 1   adj[200][85] = 1
adj[85][528] = 1   adj[528][85] = 1
adj[85][604] = 1   adj[604][85] = 1
adj[85][661] = 1   adj[661][85] = 1
adj[85][662] = 1   adj[662][85] = 1
adj[85][686] = 1   adj[686][85] = 1
adj[85][727] = 1   adj[727][85] = 1
adj[85][1486] = 1   adj[1486][85] = 1
adj[85][1615] = 1   adj[1615][85] = 1
adj[85][2148] = 1   adj[2148][85] = 1
adj[85][2184] = 1   adj[2184][85] = 1
adj[85][2189] = 1   adj[2189][85] = 1
adj[85][2190] = 1   adj[2190][85] = 1
adj[85][2211] = 1   adj[2211][85] = 1
adj[85][3215] = 1   adj[3215][85] = 1
adj[85][4583] = 1   adj[4583][85] = 1
adj[85][4585] = 1   adj[4585][85] = 1
adj[85][4586] = 1   adj[4586][85] = 1
adj[85][4589] = 1   adj[4589][85] = 1
adj[85][4590] = 1   adj[4590][85] = 1
Segmentation fault (core dumped)

What is wrong here.Pls help...

Upvotes: 2

Views: 1387

Answers (6)

Ivan Ivanovich
Ivan Ivanovich

Reputation: 956

for comment. use one dimensions bitmap, but one dimension may be use as two dimention and can be useful for graphs

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>

#define MAX 4000000

unsigned char *bitmapinit(int n);
unsigned char chkbit(unsigned char *map, int n);
void setbit(unsigned char *map, int n);
void unsetbit(unsigned char *map, int n);

int main(int argc, char *argv[])
{
        unsigned int i;
        unsigned char *bitmap = bitmapinit(MAX);
        if (!bitmap) {
                perror("malloc: ");
                exit(EXIT_FAILURE);
        }
        for (i = 0; i < MAX; i++) {
                setbit(bitmap, i);
        }
        for (i = 0; i < MAX; i += 5) {
                 unsetbit(bitmap, i);
        }
        for (i = 0; i < MAX; i++) {
                printf("bit #%d = %d\n", i, (chkbit(bitmap, i))?1:0);
        }
        return 0;
}
unsigned char *bitmapinit(int n)
{
        return calloc(sizeof(unsigned char), n / 8 + 1);
}
unsigned char chkbit(unsigned char *map, int n)
{
        return (unsigned char)map[n / 8] & (1 << (n % 8));
}
void setbit(unsigned char *map, int n)
{
        map[n / 8] = map[n / 8] | (1 << (n % 8));
}
void unsetbit(unsigned char *map, int n)
{
        map[n / 8] = map[n / 8] & ~(1 << (n % 8));
}

I can explain how it is used for graphs, if you need.

space-saving 8x. For matrix from 50000 x 50000 you need ~300MB, graph can be oriented, but not multiply-linked

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>    
#include <errno.h>

#define ROW 50
#define COL 55

unsigned int *bitmapinit(int, int);
bool chkbit(unsigned int *, int, int, int);
void setbit(unsigned int *, int, int, int);
void unsetbit(unsigned int *, int, int, int);


int main(int argc, char *argv[])
{
    unsigned int i, j;
    unsigned int *bitmap = bitmapinit(ROW, COL);
    if (!bitmap) {
        perror("malloc: ");
        exit(EXIT_FAILURE);
    }
    for (i = 0; i < ROW; i+=2)
        for (j = 0; j < COL; j+=2)
            setbit(bitmap, i, j, COL);    

    for (i = 0; i < ROW; i++) {
        for (j = 0; j < COL; j++) {
            printf("%d ",(chkbit(bitmap, i, j, COL)) ? 1 : 0);
        }
        printf("\n");
    }
    printf("\n");
    for (i = 0; i < ROW; i++)
        for (j = 0; j < COL; j++)
            setbit(bitmap, i, j, COL);

    for (i = 0; i < ROW; i += 3)
        for (j = 0; j < COL; j += 3)
            unsetbit(bitmap, i, j, COL);    

    for (i = 0; i < ROW; i++) {
        for (j = 0; j < COL; j++) {
            printf("%d ",(chkbit(bitmap, i, j, COL)) ? 1 : 0);
        }
        printf("\n");
    }
    return 0;
}

unsigned int *bitmapinit(int row, int col) //n it is ROWS, m it is COLUMNS
{
    return calloc(sizeof(unsigned int), (row * col) / 32 + 1);
}
bool chkbit(unsigned int *map, int row, int col, int n)
{
    return map[(row * n + col) / 32] & (1 << (row * n + col) % 32);
}
void setbit(unsigned int *map, int row, int col, int n)
{
    map[(row * n + col) / 32] = map[(row * n + col) / 32] | (1 << (row * n + col) % 32);
}
void unsetbit(unsigned int *map, int row, int col, int n)
{
    map[(row * n + col) / 32] = map[(row * n + col) / 32] & ~(1 << (row * n + col) % 32);
}

program is not complicated, in fact it is a 2-dimensional array, but each elements of the array can be set to only 0 or 1

but with values ​​50000 * 50000 will work for a long time

respectively, to set the bit XY you need to call the setbit(unsigned char *map, int Y, int X, int LenOfRow); to clear the bit XY unsetbit(unsigned char *map, int Y, int X, int LenOfRow); and obtain the values ​​of bit XY checkbit(unsigned char *map, int Y, int X, int LenOfRow);

once again remind you that the value of LenOfRow should not change within a one array

Upvotes: 0

As the others have noted, your problem is very likely the sheer size of your 2D array. So you have three options:

  1. Optimize the size of your adjacency matrix. You can cut the memory consumption by a factor of four (on most systems) by using int8_t instead of int. You can cut it by another factor of eight by using individual bits of the integers that comprise the matrix. That's a factor of 32 which should be enough to get your matrix down to manageable size.

    You could use accessors like this:

    void setAdjacent(int32_t** matrix, int x, int y) {
        matrix[x][y/32] |= (1 << (y & 31));
    }
    
    int isAdjacent(int32_t** matrix, int x, int y) {
        return matrix[x][y/32] & (1 << (y & 31));
    }
    
  2. Exploit the fact that your adjacency matrix is sparse. For each node, store a list of all the other nodes that it is adjacent to.

  3. Buy more RAM.


You can also use a true 2D array like this:

int32_t (*matrix)[MAX] = malloc(MAX*sizeof(*matrix));

This avoids the hassle of allocating an array for each line, and avoids the overhead of one pointer indirection. You would just need to change the signature of the accessors accordingly, their contents does not change at all.

Upvotes: 0

Rerito
Rerito

Reputation: 6086

The problem must be coming from the memory allocation. On a classic computer, sizeof(int) is 4 and sizeof(int*) can be 4 (32 bits OS) or 8 (64 bits OS).

There, you are first allocating the room for 50000 pointers, thus 50000*4 = 200000 bytes at least.

Then, you loop through this in order to allocate 50.000*50.000*4 = 10.000.000.000 bytes = 10 GB !

Since you do NOT check on malloc() return value, my guess is that at some point in this loop :

for(i=0;i<MAX;++i)
    adj[i] = (int *)malloc(sizeof(int)*MAX);

malloc always returns NULL. Let denote M such an index. In your case I can guess that M ≥ 4591.

Later, when reading the data from your file, you try to access a NULL pointer if M ≤ a.node1.

By the way, you could allocate 2D arrays like this :

int **array, i;
if(NULL == (array = malloc(sizeof(int*)*MAX))) {
    printf("Oops, not enough memory ...\n");
    return EXIT_FAILURE;
}
if(NULL == (array[0] = malloc(sizeof(int)*MAX*MAX))) {
    printf("Oops, not enough memory ...\n");
    free(array);
    return EXIT_FAILURE;
}
for(i = 1; i < MAX; i++)
    array[i] = array[0]+i;
// At this point, array is ready to use.
do_stuff();
// When you are done, freeing the memory is not tiresome :
free(array[0]);
free(array);

(Notice that in C, you never cast the return of malloc.)

What is the difference between this allocation and yours ? In yours, each adj[i] point to a dynamically allocated chunk of data. As a consequence, there is little chance that these chunks of data will be contiguous in memory. In the one I propose, there is only 2 memory allocations and in the end the chunks of data pointed by adj[i] and adj[i+1] are contiguous.

NB :

adjacency matrix of a large graph

Although adjacency matrix is a perfectly valid way to store a graph in memory, when the graph tends to be large, you should use adjacency list instead.

Upvotes: 2

Paul R
Paul R

Reputation: 213220

Firstly, add a debug printf before the error:

  while(fscanf(ptr_file1,"%d %d",&a.node1, &a.node2)!=EOF)
  {
     printf("%d %d\n", a.node1, a.node2);

     adj[a.node1][a.node2] = 1;                   //Getting segmentation fault here   
     adj[a.node2][a.node1] = 1; 
  }

That way you can see if your array indices are out of range before your program crashes.

This is just a quick fix for debugging purposes though - really you should have proper error checking:

  while(fscanf(ptr_file1,"%d %d",&a.node1, &a.node2)!=EOF)
  {
     if (a.node1 >= MAX || a.node2 >= MAX)
     {
         fprintf(stderr, "range error: a.node1 = %d, a.node2 = %d\n", a.node1, a.node2);
         exit(1);
     }

     adj[a.node1][a.node2] = 1;                   //Getting segmentation fault here   
     adj[a.node2][a.node1] = 1; 
  }

Upvotes: 0

Aneri
Aneri

Reputation: 1362

50000 * 50000 ints is quite a lot. Namely, it is 9Gb memory for 4 byte integer. Are you quite sure you get all the memory allocated?

Add a check:

if (!adj[i])
   return 2;

Note that you have to compile for x64 and run on an x64 machine for it to work. Most probably you don't need that much data.

Upvotes: 1

Claudio
Claudio

Reputation: 10947

In your specific case, there is no need to allocate an array of pointers pointing to arrays of ints. Just allocate one single array of ints with the size of MAX*MAX.

Upvotes: 0

Related Questions