Daniel.Liu
Daniel.Liu

Reputation: 41

Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))

I am practicing simple parallel programming by using MPI now. The code is meant to simulate the image processing by randomly generating N*N matrix and using simple neighborhood weighted-averaging filter without processing the first and the last row and column. I got no error during the compile time but I got some error during the runtime which I can not figure out. PLEASE HELP! THANKS! Here is the code below:

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>

int rank, size;         // for storing this process' rank, and the number of processes
int i, j;               //misc
int rec_buf[100];       // buffer where the received data should be stored
void initialize_data (int **, int );
void distribute_data (int **, int );
void mask_operation (int **, int , int**);
void collect_results (int **, int );

/*   ttype: type to use for representing time */
typedef double ttype;
ttype tdiff(struct timespec a, struct timespec b)
/* Find the time difference. */
{
  ttype dt = (( b.tv_sec - a.tv_sec ) + ( b.tv_nsec - a.tv_nsec ) / 1E9);
  return dt;
}

struct timespec now()
/* Return the current time. */
{
  struct timespec t;
  clock_gettime(CLOCK_REALTIME, &t);
  return t;
}

//clock_t begin, end;
struct timespec begin, end;
double time_spent;

int main(int argc, char **argv) 
{  
    MPI_Init(&argc, &argv); 
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &size);

    int N = atof(argv[1]); 
    int A[N][N];
    int Ap[N][N];
    begin = now();

    initialize_data(A, N); 
    distribute_data (A, N);   // use scatterv 
    mask_operation(A, N, Ap); 
    collect_results(Ap, N);   // use gatherv 

    end = now();
    time_spent = tdiff(begin, end);
    printf("Total Spent Time: %.8f sec.\n", time_spent);

    MPI_Finalize(); 
    return 0; 
}

/*----------------------------------------------------*/
void initialize_data (int **A, int N){
    //generating the random matrix
    for(i=0;i<N;i++)
        for(j=0;j<N;j++)
            A[i][j]=rand()%256;
    
    //print test for verify random
    /*
    for (i=0; i<N; i++) {  
        for (j=0; j<N; j++)
        printf("%d\t", A[i][j]);
        printf("\n");
      }  */   
}
void distribute_data (int **A, int N){
    int rem = (N-2)%size;   // rows remaining after division among processes
    int *sendcounts = malloc(sizeof(int)*size);
         // array describing how many elements to send to each process
    int *displs = malloc(sizeof(int)*size);
         // array describing the displacements where each segment begins

    // calculate send counts and displacements & print for each process
    for (i = 0; i < size; i++) {
        int sum = 0;
        sendcounts[i] = ((int)((N-2)/size))*N;   //to guarantee all rows will be calculated except the first and the last one
        if (rem > 0) {
            sendcounts[i] = sendcounts[i] + N;
            rem--;
        }
        displs[i] = sum;
        sum += sendcounts[i];
        sendcounts[i] = sendcounts[i] + 2*N;    //to add the local first and last row       
        //printf("sendcounts[%d] = %d\tdispls[%d] = %d\n", i, sendcounts[i], i, displs[i]);
    }

    // divide the data among processes as described by sendcounts and displs
    MPI_Scatterv(&A, sendcounts, displs, MPI_INT, &rec_buf, 100, MPI_INT, 0, MPI_COMM_WORLD);

    // print what each process received
    printf("%d: ", rank);
    for (i = 0; i < sendcounts[rank]; i++) {
        printf("%d\t", rec_buf[i]);
    }
    printf("\n");
} 
void mask_operation (int **A, int N, int **Ap){
    int local_i, local_j;       //local row and column 5*5, misc
    int local_row_number = sizeof(A) / sizeof(int) / N;

    for(i = 0; i < local_row_number-2; i++){
        for(j = 0; j < N-2; j++){
            if(rank == 0 && i ==0)
                Ap[i][j] = A[i][j];
            if(rank == size-1 && i ==N-3)
                Ap[i+2][j] = A[i+2][j];
            if(j == 0)
                Ap[i][j] = A[i][j];
            if(j==local_row_number-3)
                Ap[i][j+2] = A[i][j+2];

            for(local_j = 0; local_j < 5; local_j++){
                for(local_i = 0; local_i < 5; local_i++){
                    Ap[i + local_i + 1][j + local_j + 1] = (Ap[i + local_i][j + local_j] + Ap[i + local_i][j + local_j + 1] + Ap[i + local_i][j + local_j + 2] + Ap[i + local_i + 1][j + local_j] + 2 * Ap[i + local_i + 1][j + local_j + 1] + Ap[i + local_i + 1][j + local_j + 2] + Ap[i + local_i + 2][j + local_j] + Ap[i + local_i + 2][j + local_j + 1] + Ap[i + local_i + 2][j + local_j + 2]) / 10;
                }
            }
        }
    }
}  
void collect_results (int **Ap, int N){
    int rem = (N-2)%size;   // rows remaining after division among processes
    int *sendcounts = malloc(sizeof(int)*size);
         // array describing how many elements to send to each process
    int *displs = malloc(sizeof(int)*size);
         // array describing the displacements where each segment begins

    // calculate send counts and displacements & print for each process
    for (i = 0; i < size; i++) {
        int sum = 0;
        sendcounts[i] = ((int)((N-2)/size))*N;   //to guarantee all rows will be calculated except the first and the last one
        if (rem > 0) {
            sendcounts[i] = sendcounts[i] + N;
            rem--;
        }
        displs[i] = sum;
        sum += sendcounts[i];
        sendcounts[i] = sendcounts[i] + 2*N;    //to add the local first and last row       
        //printf("sendcounts[%d] = %d\tdispls[%d] = %d\n", i, sendcounts[i], i, displs[i]);
    }

    //collect the processed matrix by sendcounts and displs    
    MPI_Gatherv(&rec_buf, sendcounts[rank], MPI_INT, &Ap, sendcounts, displs, MPI_INT,  0, MPI_COMM_WORLD);
    //print the result
    if(rank == 0){
        printf("******************************************************\n");
        printf ("Result: \n");
        for (i = 0; i < N; i++) {
            for (j = 0; j < N; j++) {
                printf ("%d\t", Ap[i][j]);
            }
        }
        printf ("\n");
        free(sendcounts);
        free(displs);
    }    
}

I used 4 ranks and set N=15 in the .sh file correctly, and my error report is below:

[c0706a-s2:82816:0:82816] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x4)
==== backtrace (tid:  82816) ====
 0 0x000000000004ee05 ucs_debug_print_backtrace()  ???:0
 1 0x0000000000402285 main()  ???:0
 2 0x0000000000022545 __libc_start_main()  ???:0
 3 0x00000000004020a9 _start()  ???:0
=================================
[c0706a-s2:82818:0:82818] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
[c0706a-s2:82820:0:82820] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
[c0706a-s2:82815:0:82815] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
==== backtrace (tid:  82818) ====
 0 0x000000000004ee05 ucs_debug_print_backtrace()  ???:0
 1 0x0000000000402285 main()  ???:0
 2 0x0000000000022545 __libc_start_main()  ???:0
 3 0x00000000004020a9 _start()  ???:0
=================================
==== backtrace (tid:  82820) ====
 0 0x000000000004ee05 ucs_debug_print_backtrace()  ???:0
 1 0x0000000000402285 main()  ???:0
 2 0x0000000000022545 __libc_start_main()  ???:0
 3 0x00000000004020a9 _start()  ???:0
=================================
==== backtrace (tid:  82815) ====
 0 0x000000000004ee05 ucs_debug_print_backtrace()  ???:0
 1 0x0000000000402285 main()  ???:0
 2 0x0000000000022545 __libc_start_main()  ???:0
 3 0x00000000004020a9 _start()  ???:0
=================================
srun: error: c0706a-s2: tasks 1-3: Segmentation fault (core dumped)
srun: error: c0706a-s2: task 0: Segmentation fault (core dumped)

Upvotes: 4

Views: 27049

Answers (1)

Jianfeng Jia
Jianfeng Jia

Reputation: 11

On my workstation, this problem is caused by the stack limits. Just ulimit it.

modify /etc/security/limits.conf

add:

"* soft stack unlimited"

"* hard stack unlimited"

modify /etc/pam.d/login

add:

session required /usr/lib64/security/pam_limits.so

Then, Restart ssh

Upvotes: 1

Related Questions