Ahmed Khalaf
Ahmed Khalaf

Reputation: 1419

MPI_Gather segmentation fault

I have this parallel Gaussian elimination code. A segmentation error happens upon calling either MPI_Gather function calls. I know such error may rise if memory is not allocated properly for either buffers. But I cannot see any wrong with the memory management code.

Can someone help?


Notes: The program reads from a .txt file in the same directory called input.txt.


#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "mpi.h"

/*void print2dAddresses(double** array2d, int rows, int cols)
    int i;

    for(i = 0; i < rows; i++)
        int j;

        for(j = 0; j < cols; j++)
            printf("%d ", &(array2d[i][j]));



double** newMatrix(int rows, int cols)
    double *data = (double*) malloc(rows * cols * sizeof(double));
    double **array= (double **)malloc(rows * sizeof(double*));
    int i;

    for (i=0; i<rows; i++)
        array[i] = &(data[cols*i]);

    return array;

void freeMatrix(double** mat)


double** new2dArray(int nrows, int ncols)
    int i;
    double** array2d;

    array2d = (double**) malloc(nrows * sizeof(double*));

    for(i = 0; i < nrows; i++)
        array2d[i] = (double*) malloc(ncols * sizeof(double));

    return array2d;

double* new1dArray(int size)
    return (double*) malloc(size * sizeof(double));

void free2dArray(double** array2d, int nrows)
    int i;

    for(i = 0; i < nrows; i++)


void print2dArray(double** array2d, int nrows, int ncols)
    int i, j;

    for(i = 0; i < nrows; i++)
        for(j = 0; j < ncols; j++)
            printf("%lf ", array2d[i][j]);



void print1dArray(double* array, int size)
    int i;

    for(i = 0; i < size; i++)
        printf("%lf\n", array[i]);


void read2dArray(FILE* fp, double** array2d, int nrows, int ncols)
    int i, j;

    for(i = 0; i < nrows; i++)
        for(j = 0; j < ncols; j++)
            fscanf(fp, "%lf", &(array2d[i][j]));

void read1dArray(FILE* fp, double* array, int size)
    int i;

    for(i = 0; i < size; i++)
        fscanf(fp, "%lf", &(array[i]));

void readSymbols(char* symbols, int size, FILE* fp)
    int i;

    for(i = 0; i < size; i++)
        char c = '\n';

        while(c == '\n' | c == ' ' | c == '\t' | c == '\r')
            fscanf(fp, "%c", &c);

        symbols[i] = c;

void printSolution(char* symbols, double* x, int size)
    int i;

    for(i = 0; i < size; i++)
        printf("%c = %lf\n", symbols[i], x[i]);

double* copy_1d_array(double* original, int size)
    double* copy_version;
    int i;

    copy_version = (double*) malloc(size * sizeof(double));

    for(i = 0; i < size; i++)
        copy_version[i] = original[i];

    return copy_version;

int main(int argc, char** argv)
    int p, rank, i, j, k, l, msize, rowsPerProcess, remainder, startingRow, dest, rowCounter, remainingRows, neededProcesses;
    double **A, *b, *x, **smallA, *currentRow, *smallB, currentB, **receivedA, *receivedB;
    char *symbols;
    MPI_Status status;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &p);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    if(rank == 0)
        FILE* fp;

        fp = fopen("input.txt", "r");

        fscanf(fp, "%d", &msize);

        A = newMatrix(msize, msize);
        b = new1dArray(msize);
        x = new1dArray(msize);

        symbols = (char*) malloc(msize * sizeof(char));

        read2dArray(fp, A, msize, msize);
        read1dArray(fp, b, msize);

        readSymbols(symbols, msize, fp);

        /*print2dArray(A, msize, msize);
        print1dArray(b, msize);*/


    MPI_Bcast(&msize, 1, MPI_INT, 0, MPI_COMM_WORLD);

    for(i = 0; i < (msize - 1); i++)
        int maxIndex;
        double maxCoef, tmp, r;
        /*finding max row*/

        if(rank == 0)
            maxIndex = i;
            maxCoef = fabs(A[i][i]);
            for(j = i + 1; j < msize; j++)
                if(fabs(A[j][i]) > maxCoef)
                    maxCoef = A[j][i];
                    maxIndex = j;

            /*swapping the current row with the max row*/
            for(j = 0; j < msize; j++)
                tmp = A[i][j];
                A[i][j] = A[maxIndex][j];
                A[maxIndex][j] = tmp;

            tmp = b[i];
            b[i] = b[maxIndex];
            b[maxIndex] = tmp;

            /*for(j = i + 1; j < msize; j++)
                double r = A[j][i] / A[i][i];

                subtracting r * row i from row j
                for(k = i; k < msize; k++)
                    A[j][k] -= r * A[i][k];

                b[j] -= r * b[i];

            /*parallel elimination*/
            startingRow = i + 1;
            neededProcesses = p;

            remainingRows = msize - startingRow;

            if(remainingRows < neededProcesses)
                neededProcesses = remainingRows;

            rowsPerProcess = remainingRows / neededProcesses;
            remainder = remainingRows % neededProcesses;

        MPI_Bcast(&startingRow, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(&rowsPerProcess, 1, MPI_INT, 0, MPI_COMM_WORLD);

        if(rank == 0)
            currentRow = copy_1d_array(A[startingRow-1], msize);
            currentB = b[startingRow-1];
            currentRow = new1dArray(msize);

        MPI_Bcast(currentRow, msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        MPI_Bcast(&currentB, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        if(rank == 0)
            receivedA = newMatrix(remainingRows, msize);
            receivedB = new1dArray(remainingRows);
        smallA = newMatrix(rowsPerProcess, msize);
        smallB = new1dArray(rowsPerProcess);

        MPI_Scatter(&(A[startingRow][0]), rowsPerProcess*msize, MPI_DOUBLE, &(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        MPI_Scatter(&(b[startingRow]), rowsPerProcess, MPI_DOUBLE, &(smallB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        for(j = 0; j < rowsPerProcess; j++)
            r = smallA[j][startingRow-1] / currentRow[startingRow-1];

            for(k = 0; k < msize; k++)
                smallA[j][k] -= r * currentRow[k];

            smallB[j] -= r * currentB;

        MPI_Gather(&(smallA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, &(receivedA[0][0]), rowsPerProcess*msize, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        MPI_Gather(&(smallB[0]), rowsPerProcess, MPI_DOUBLE, &(receivedB[0]), rowsPerProcess, MPI_DOUBLE, 0, MPI_COMM_WORLD);


        if(rank == 0)
            for(j = 0; j < remainingRows; j++)
                for(k = 0; k < msize; k++)
                    A[j+startingRow][k] = receivedA[j][k];

                b[j+startingRow] = receivedB[j];

        if(rank == 0)
            if(remainder > 0)
                for(j = (msize - remainder); j < msize; j++)
                    r = A[j][i] / A[i][i];

                    for(k = 0; k < msize; k++)
                        A[j][k] -= r * A[i][k];

                    b[j] -= r * b[i];


    if(rank == 0)
        /*backward substitution*/

        for(i = msize - 1; i >= 0; i--)
            x[i] = b[i];

            for(j = msize - 1; j > i; j--)
                x[i] -= A[i][j] * x[j];

            x[i] /= A[i][i];

        printf("solution = \n");
        //print1dArray(x, msize);

        printSolution(symbols, x, msize);


    return 0;

Input File:

1 1 1
1 1 3
2 1 4

Upvotes: 2

Views: 2517

Answers (1)


Reputation: 40252

It might be this: &(receivedA[0][0]) on processes where rank != 0. You're indexing an array that hasn't been allocated. You might have to create another pointer, like this:

    if(rank == 0)
        receivedA = newMatrix(remainingRows, msize);
        recievedAHead = &(receivedA[0][0]);
        receivedB = new1dArray(remainingRows);
    else {
        recievedAHead = NULL;

and use recievedAHead in the MPI_Gather call.

Upvotes: 2

Related Questions