Reputation: 41
I am practicing simple parallel programming by using MPI now. The code is meant to simulate the image processing by randomly generating N*N matrix and using simple neighborhood weighted-averaging filter without processing the first and the last row and column. I got no error during the compile time but I got some error during the runtime which I can not figure out. PLEASE HELP! THANKS! Here is the code below:
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
int rank, size; // for storing this process' rank, and the number of processes
int i, j; //misc
int rec_buf[100]; // buffer where the received data should be stored
void initialize_data (int **, int );
void distribute_data (int **, int );
void mask_operation (int **, int , int**);
void collect_results (int **, int );
/* ttype: type to use for representing time */
typedef double ttype;
ttype tdiff(struct timespec a, struct timespec b)
/* Find the time difference. */
{
ttype dt = (( b.tv_sec - a.tv_sec ) + ( b.tv_nsec - a.tv_nsec ) / 1E9);
return dt;
}
struct timespec now()
/* Return the current time. */
{
struct timespec t;
clock_gettime(CLOCK_REALTIME, &t);
return t;
}
//clock_t begin, end;
struct timespec begin, end;
double time_spent;
int main(int argc, char **argv)
{
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int N = atof(argv[1]);
int A[N][N];
int Ap[N][N];
begin = now();
initialize_data(A, N);
distribute_data (A, N); // use scatterv
mask_operation(A, N, Ap);
collect_results(Ap, N); // use gatherv
end = now();
time_spent = tdiff(begin, end);
printf("Total Spent Time: %.8f sec.\n", time_spent);
MPI_Finalize();
return 0;
}
/*----------------------------------------------------*/
void initialize_data (int **A, int N){
//generating the random matrix
for(i=0;i<N;i++)
for(j=0;j<N;j++)
A[i][j]=rand()%256;
//print test for verify random
/*
for (i=0; i<N; i++) {
for (j=0; j<N; j++)
printf("%d\t", A[i][j]);
printf("\n");
} */
}
void distribute_data (int **A, int N){
int rem = (N-2)%size; // rows remaining after division among processes
int *sendcounts = malloc(sizeof(int)*size);
// array describing how many elements to send to each process
int *displs = malloc(sizeof(int)*size);
// array describing the displacements where each segment begins
// calculate send counts and displacements & print for each process
for (i = 0; i < size; i++) {
int sum = 0;
sendcounts[i] = ((int)((N-2)/size))*N; //to guarantee all rows will be calculated except the first and the last one
if (rem > 0) {
sendcounts[i] = sendcounts[i] + N;
rem--;
}
displs[i] = sum;
sum += sendcounts[i];
sendcounts[i] = sendcounts[i] + 2*N; //to add the local first and last row
//printf("sendcounts[%d] = %d\tdispls[%d] = %d\n", i, sendcounts[i], i, displs[i]);
}
// divide the data among processes as described by sendcounts and displs
MPI_Scatterv(&A, sendcounts, displs, MPI_INT, &rec_buf, 100, MPI_INT, 0, MPI_COMM_WORLD);
// print what each process received
printf("%d: ", rank);
for (i = 0; i < sendcounts[rank]; i++) {
printf("%d\t", rec_buf[i]);
}
printf("\n");
}
void mask_operation (int **A, int N, int **Ap){
int local_i, local_j; //local row and column 5*5, misc
int local_row_number = sizeof(A) / sizeof(int) / N;
for(i = 0; i < local_row_number-2; i++){
for(j = 0; j < N-2; j++){
if(rank == 0 && i ==0)
Ap[i][j] = A[i][j];
if(rank == size-1 && i ==N-3)
Ap[i+2][j] = A[i+2][j];
if(j == 0)
Ap[i][j] = A[i][j];
if(j==local_row_number-3)
Ap[i][j+2] = A[i][j+2];
for(local_j = 0; local_j < 5; local_j++){
for(local_i = 0; local_i < 5; local_i++){
Ap[i + local_i + 1][j + local_j + 1] = (Ap[i + local_i][j + local_j] + Ap[i + local_i][j + local_j + 1] + Ap[i + local_i][j + local_j + 2] + Ap[i + local_i + 1][j + local_j] + 2 * Ap[i + local_i + 1][j + local_j + 1] + Ap[i + local_i + 1][j + local_j + 2] + Ap[i + local_i + 2][j + local_j] + Ap[i + local_i + 2][j + local_j + 1] + Ap[i + local_i + 2][j + local_j + 2]) / 10;
}
}
}
}
}
void collect_results (int **Ap, int N){
int rem = (N-2)%size; // rows remaining after division among processes
int *sendcounts = malloc(sizeof(int)*size);
// array describing how many elements to send to each process
int *displs = malloc(sizeof(int)*size);
// array describing the displacements where each segment begins
// calculate send counts and displacements & print for each process
for (i = 0; i < size; i++) {
int sum = 0;
sendcounts[i] = ((int)((N-2)/size))*N; //to guarantee all rows will be calculated except the first and the last one
if (rem > 0) {
sendcounts[i] = sendcounts[i] + N;
rem--;
}
displs[i] = sum;
sum += sendcounts[i];
sendcounts[i] = sendcounts[i] + 2*N; //to add the local first and last row
//printf("sendcounts[%d] = %d\tdispls[%d] = %d\n", i, sendcounts[i], i, displs[i]);
}
//collect the processed matrix by sendcounts and displs
MPI_Gatherv(&rec_buf, sendcounts[rank], MPI_INT, &Ap, sendcounts, displs, MPI_INT, 0, MPI_COMM_WORLD);
//print the result
if(rank == 0){
printf("******************************************************\n");
printf ("Result: \n");
for (i = 0; i < N; i++) {
for (j = 0; j < N; j++) {
printf ("%d\t", Ap[i][j]);
}
}
printf ("\n");
free(sendcounts);
free(displs);
}
}
I used 4 ranks and set N=15 in the .sh file correctly, and my error report is below:
[c0706a-s2:82816:0:82816] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x4)
==== backtrace (tid: 82816) ====
0 0x000000000004ee05 ucs_debug_print_backtrace() ???:0
1 0x0000000000402285 main() ???:0
2 0x0000000000022545 __libc_start_main() ???:0
3 0x00000000004020a9 _start() ???:0
=================================
[c0706a-s2:82818:0:82818] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
[c0706a-s2:82820:0:82820] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
[c0706a-s2:82815:0:82815] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
==== backtrace (tid: 82818) ====
0 0x000000000004ee05 ucs_debug_print_backtrace() ???:0
1 0x0000000000402285 main() ???:0
2 0x0000000000022545 __libc_start_main() ???:0
3 0x00000000004020a9 _start() ???:0
=================================
==== backtrace (tid: 82820) ====
0 0x000000000004ee05 ucs_debug_print_backtrace() ???:0
1 0x0000000000402285 main() ???:0
2 0x0000000000022545 __libc_start_main() ???:0
3 0x00000000004020a9 _start() ???:0
=================================
==== backtrace (tid: 82815) ====
0 0x000000000004ee05 ucs_debug_print_backtrace() ???:0
1 0x0000000000402285 main() ???:0
2 0x0000000000022545 __libc_start_main() ???:0
3 0x00000000004020a9 _start() ???:0
=================================
srun: error: c0706a-s2: tasks 1-3: Segmentation fault (core dumped)
srun: error: c0706a-s2: task 0: Segmentation fault (core dumped)
Upvotes: 4
Views: 27049
Reputation: 11
On my workstation, this problem is caused by the stack limits. Just ulimit it.
modify /etc/security/limits.conf
add:
"* soft stack unlimited"
"* hard stack unlimited"
modify /etc/pam.d/login
add:
session required /usr/lib64/security/pam_limits.so
Then, Restart ssh
Upvotes: 1