Reputation: 31
I had initialized a 2D array using malloc for adjacency matrix of a large graph and then initializing each index with 0 or 1 depending upon the edge list.But I m getting a segmentation fault. Here is my code.
#include <stdio.h>
#include <stdlib.h>
int MAX = 50000;
void clustering(int **adj);
int main()
{
int i, j, k;
FILE *ptr_file1;
int **adj;
adj = (int **)malloc(sizeof(int *)*MAX);
for(i=0;i<MAX;++i)
adj[i] = (int *)malloc(sizeof(int)*MAX);
struct adjacency
{
int node1;
int node2;
};
struct adjacency a;
ptr_file1 = fopen("Email-Enron.txt","r"); //Opening file containing edgelist of approx 37000 nodes.
if (!ptr_file1)
return 1;
while(fscanf(ptr_file1,"%d %d",&a.node1, &a.node2)!=EOF)
{
adj[a.node1][a.node2] = 1; //Getting segmentation fault here
adj[a.node2][a.node1] = 1;
printf("adj[%d][%d] = %d adj[%d][%d] = %d\n",a.node1,a.node2,adj[a.node1][a.node2],a.node2,a.node1,adj[a.node2][a.node1]);
}
clustering(adj);
return (0);
}
Here is my output
......
......
adj[85][119] = 1 adj[119][85] = 1
adj[85][154] = 1 adj[154][85] = 1
adj[85][200] = 1 adj[200][85] = 1
adj[85][528] = 1 adj[528][85] = 1
adj[85][604] = 1 adj[604][85] = 1
adj[85][661] = 1 adj[661][85] = 1
adj[85][662] = 1 adj[662][85] = 1
adj[85][686] = 1 adj[686][85] = 1
adj[85][727] = 1 adj[727][85] = 1
adj[85][1486] = 1 adj[1486][85] = 1
adj[85][1615] = 1 adj[1615][85] = 1
adj[85][2148] = 1 adj[2148][85] = 1
adj[85][2184] = 1 adj[2184][85] = 1
adj[85][2189] = 1 adj[2189][85] = 1
adj[85][2190] = 1 adj[2190][85] = 1
adj[85][2211] = 1 adj[2211][85] = 1
adj[85][3215] = 1 adj[3215][85] = 1
adj[85][4583] = 1 adj[4583][85] = 1
adj[85][4585] = 1 adj[4585][85] = 1
adj[85][4586] = 1 adj[4586][85] = 1
adj[85][4589] = 1 adj[4589][85] = 1
adj[85][4590] = 1 adj[4590][85] = 1
Segmentation fault (core dumped)
What is wrong here.Pls help...
Upvotes: 2
Views: 1387
Reputation: 956
for comment. use one dimensions bitmap, but one dimension may be use as two dimention and can be useful for graphs
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#define MAX 4000000
unsigned char *bitmapinit(int n);
unsigned char chkbit(unsigned char *map, int n);
void setbit(unsigned char *map, int n);
void unsetbit(unsigned char *map, int n);
int main(int argc, char *argv[])
{
unsigned int i;
unsigned char *bitmap = bitmapinit(MAX);
if (!bitmap) {
perror("malloc: ");
exit(EXIT_FAILURE);
}
for (i = 0; i < MAX; i++) {
setbit(bitmap, i);
}
for (i = 0; i < MAX; i += 5) {
unsetbit(bitmap, i);
}
for (i = 0; i < MAX; i++) {
printf("bit #%d = %d\n", i, (chkbit(bitmap, i))?1:0);
}
return 0;
}
unsigned char *bitmapinit(int n)
{
return calloc(sizeof(unsigned char), n / 8 + 1);
}
unsigned char chkbit(unsigned char *map, int n)
{
return (unsigned char)map[n / 8] & (1 << (n % 8));
}
void setbit(unsigned char *map, int n)
{
map[n / 8] = map[n / 8] | (1 << (n % 8));
}
void unsetbit(unsigned char *map, int n)
{
map[n / 8] = map[n / 8] & ~(1 << (n % 8));
}
I can explain how it is used for graphs, if you need.
space-saving 8x. For matrix from 50000 x 50000 you need ~300MB, graph can be oriented, but not multiply-linked
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdbool.h>
#include <errno.h>
#define ROW 50
#define COL 55
unsigned int *bitmapinit(int, int);
bool chkbit(unsigned int *, int, int, int);
void setbit(unsigned int *, int, int, int);
void unsetbit(unsigned int *, int, int, int);
int main(int argc, char *argv[])
{
unsigned int i, j;
unsigned int *bitmap = bitmapinit(ROW, COL);
if (!bitmap) {
perror("malloc: ");
exit(EXIT_FAILURE);
}
for (i = 0; i < ROW; i+=2)
for (j = 0; j < COL; j+=2)
setbit(bitmap, i, j, COL);
for (i = 0; i < ROW; i++) {
for (j = 0; j < COL; j++) {
printf("%d ",(chkbit(bitmap, i, j, COL)) ? 1 : 0);
}
printf("\n");
}
printf("\n");
for (i = 0; i < ROW; i++)
for (j = 0; j < COL; j++)
setbit(bitmap, i, j, COL);
for (i = 0; i < ROW; i += 3)
for (j = 0; j < COL; j += 3)
unsetbit(bitmap, i, j, COL);
for (i = 0; i < ROW; i++) {
for (j = 0; j < COL; j++) {
printf("%d ",(chkbit(bitmap, i, j, COL)) ? 1 : 0);
}
printf("\n");
}
return 0;
}
unsigned int *bitmapinit(int row, int col) //n it is ROWS, m it is COLUMNS
{
return calloc(sizeof(unsigned int), (row * col) / 32 + 1);
}
bool chkbit(unsigned int *map, int row, int col, int n)
{
return map[(row * n + col) / 32] & (1 << (row * n + col) % 32);
}
void setbit(unsigned int *map, int row, int col, int n)
{
map[(row * n + col) / 32] = map[(row * n + col) / 32] | (1 << (row * n + col) % 32);
}
void unsetbit(unsigned int *map, int row, int col, int n)
{
map[(row * n + col) / 32] = map[(row * n + col) / 32] & ~(1 << (row * n + col) % 32);
}
program is not complicated, in fact it is a 2-dimensional array, but each elements of the array can be set to only 0 or 1
but with values 50000 * 50000 will work for a long time
respectively, to set the bit XY you need to call the setbit(unsigned char *map, int Y, int X, int LenOfRow);
to clear the bit XY unsetbit(unsigned char *map, int Y, int X, int LenOfRow);
and obtain the values of bit XY checkbit(unsigned char *map, int Y, int X, int LenOfRow);
once again remind you that the value of LenOfRow
should not change within a one array
Upvotes: 0
Reputation: 40695
As the others have noted, your problem is very likely the sheer size of your 2D array. So you have three options:
Optimize the size of your adjacency matrix. You can cut the memory consumption by a factor of four (on most systems) by using int8_t
instead of int
. You can cut it by another factor of eight by using individual bits of the integers that comprise the matrix. That's a factor of 32 which should be enough to get your matrix down to manageable size.
You could use accessors like this:
void setAdjacent(int32_t** matrix, int x, int y) {
matrix[x][y/32] |= (1 << (y & 31));
}
int isAdjacent(int32_t** matrix, int x, int y) {
return matrix[x][y/32] & (1 << (y & 31));
}
Exploit the fact that your adjacency matrix is sparse. For each node, store a list of all the other nodes that it is adjacent to.
Buy more RAM.
You can also use a true 2D array like this:
int32_t (*matrix)[MAX] = malloc(MAX*sizeof(*matrix));
This avoids the hassle of allocating an array for each line, and avoids the overhead of one pointer indirection. You would just need to change the signature of the accessors accordingly, their contents does not change at all.
Upvotes: 0
Reputation: 6086
The problem must be coming from the memory allocation. On a classic computer, sizeof(int)
is 4 and sizeof(int*)
can be 4 (32 bits OS) or 8 (64 bits OS).
There, you are first allocating the room for 50000 pointers, thus 50000*4 = 200000 bytes at least.
Then, you loop through this in order to allocate 50.000*50.000*4 = 10.000.000.000 bytes = 10 GB !
Since you do NOT check on malloc()
return value, my guess is that at some point in this loop :
for(i=0;i<MAX;++i)
adj[i] = (int *)malloc(sizeof(int)*MAX);
malloc
always returns NULL
. Let denote M such an index. In your case I can guess that M ≥ 4591.
Later, when reading the data from your file, you try to access a NULL
pointer if M ≤ a.node1
.
By the way, you could allocate 2D arrays like this :
int **array, i;
if(NULL == (array = malloc(sizeof(int*)*MAX))) {
printf("Oops, not enough memory ...\n");
return EXIT_FAILURE;
}
if(NULL == (array[0] = malloc(sizeof(int)*MAX*MAX))) {
printf("Oops, not enough memory ...\n");
free(array);
return EXIT_FAILURE;
}
for(i = 1; i < MAX; i++)
array[i] = array[0]+i;
// At this point, array is ready to use.
do_stuff();
// When you are done, freeing the memory is not tiresome :
free(array[0]);
free(array);
(Notice that in C, you never cast the return of malloc.)
What is the difference between this allocation and yours ? In yours, each adj[i]
point to a dynamically allocated chunk of data. As a consequence, there is little chance that these chunks of data will be contiguous in memory. In the one I propose, there is only 2 memory allocations and in the end the chunks of data pointed by adj[i]
and adj[i+1]
are contiguous.
NB :
adjacency matrix of a large graph
Although adjacency matrix is a perfectly valid way to store a graph in memory, when the graph tends to be large, you should use adjacency list instead.
Upvotes: 2
Reputation: 213220
Firstly, add a debug printf before the error:
while(fscanf(ptr_file1,"%d %d",&a.node1, &a.node2)!=EOF)
{
printf("%d %d\n", a.node1, a.node2);
adj[a.node1][a.node2] = 1; //Getting segmentation fault here
adj[a.node2][a.node1] = 1;
}
That way you can see if your array indices are out of range before your program crashes.
This is just a quick fix for debugging purposes though - really you should have proper error checking:
while(fscanf(ptr_file1,"%d %d",&a.node1, &a.node2)!=EOF)
{
if (a.node1 >= MAX || a.node2 >= MAX)
{
fprintf(stderr, "range error: a.node1 = %d, a.node2 = %d\n", a.node1, a.node2);
exit(1);
}
adj[a.node1][a.node2] = 1; //Getting segmentation fault here
adj[a.node2][a.node1] = 1;
}
Upvotes: 0
Reputation: 1362
50000 * 50000
ints is quite a lot. Namely, it is 9Gb memory for 4 byte integer. Are you quite sure you get all the memory allocated?
Add a check:
if (!adj[i])
return 2;
Note that you have to compile for x64 and run on an x64 machine for it to work. Most probably you don't need that much data.
Upvotes: 1
Reputation: 10947
In your specific case, there is no need to allocate an array of pointers pointing to arrays of ints. Just allocate one single array of ints with the size of MAX*MAX.
Upvotes: 0