user1858485
user1858485

Reputation: 29

Problem with multiplying matrices in parallel

I am having trouble with using MPI for multiplying matrices.

I have the program reading two n x n matrices from two files and am supposed to use MPI. But I am getting a segmentation fault in one of the processes. This is the output I get when I run my code:

read matrix A from matrixA
read matrix B from matrixB

mpirun noticed that process rank 1 with PID 15599 on node VirtualBox exited on signal 11 (Segmentation fault).

Here is my code:

int main (int argc, char * argv[])
{
    /* Check the number of arguments */
    int n; /* Dimension of the matrix */
    float *sa, *sb, *sc; /* Storage for matrix A, B, and C */
    float **a, **b, **c; /* 2D array to access matrix A, B, and C */
    int i, j, k;

    MPI_Init(&argc, &argv); //Initialize MPI operations
    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //Get the rank
    MPI_Comm_size(MPI_COMM_WORLD, &size); //Get number of processes

    if(argc != 4) {
        printf("Usage: %s fileA fileB fileC\n", argv[0]);
        return 1;
    }

    if(rank == 0)
    {
        /* Read matrix A */
        printf("read matrix A from %s\n", argv[1]);
        read_matrix(argv[1], &a, &sa, &i, &j);
        if(i != j) {
            printf("ERROR: matrix A not square\n"); return 2;
        }
        n = i;

        //printf("%d", n);

        /* Read matrix B */
        printf("Read matrix B from %s\n", argv[2]);
        read_matrix(argv[2], &b, &sb, &i, &j);
        if(i != j) {
            printf("ERROR: matrix B not square\n");
            return 2;
        }
        if(n != i) {
            printf("ERROR: matrix A and B incompatible\n");
            return 2;
         }
    }

    printf("test");

    if(rank == 0)
    {
        /* Initialize matrix C */
        sc = (float*)malloc(n*n*sizeof(float));
        memset(sc, 0, n*n*sizeof(float));
        c = (float**)malloc(n*sizeof(float*));
        for(i=0; i<n; i++) c[i] = &sc[i*n];
    }

    ////////////////////////////////////////////////////////////////////////////////////////////
    float matrA[n][n];
    float matrB[n][n];
    float matrC[n][n];

    for(i = 0; i < n; i++)
    {
        for(j = 0; j < n; j++)
        {
            matrA[i][j] = sa[(i*n) + j];
            matrB[i][j] = sb[(i*n) + j];
        }
    }
    /* Master initializes work*/
    if (rank == 0)
    {
        start_time = MPI_Wtime();
        for (i = 1; i < size; i++)
        {
            //For each slave other than the master
            portion = (n / (size - 1)); // Calculate portion without master
            low_bound = (i - 1) * portion;
            if (((i + 1) == size) && ((n % (size - 1)) != 0))
            {
                //If rows of [A] cannot be equally divided among slaves,
                upper_bound = n; //the last slave gets all the remaining rows.
            }
            else
            {
                upper_bound = low_bound + portion; //Rows of [A] are equally divisable among slaves
            }
            //Send the low bound first without blocking, to the intended slave.
            MPI_Isend(&low_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);

            //Next send the upper bound without blocking, to the intended slave
            MPI_Isend(&upper_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);

            //Finally send the allocated row portion of [A] without blocking, to the intended slave
            MPI_Isend(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, i,     MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
        }
    }


    //broadcast [B] to all the slaves
    MPI_Bcast(&matrB, n*n, MPI_FLOAT, 0, MPI_COMM_WORLD);
    /* work done by slaves*/
    if (rank > 0)
    {
        //receive low bound from the master
        MPI_Recv(&low_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
        //next receive upper bound from the master
        MPI_Recv(&upper_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
        //finally receive row portion of [A] to be processed from the master
        MPI_Recv(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, 0,   MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
        for (i = low_bound; i < upper_bound; i++)
        {
        //iterate through a given set of rows of [A]
        for (j = 0; j < n; j++)
        {
            //iterate through columns of [B]
                for (k = 0; k < n; k++)
            {
                //iterate through rows of [B]
                matrC[i][j] += (matrA[i][k] * matrB[k][j]);
            }
        }
        }


        //send back the low bound first without blocking, to the master
        MPI_Isend(&low_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
        //send the upper bound next without blocking, to the master
        MPI_Isend(&upper_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
        //finally send the processed portion of data without blocking, to the master
        MPI_Isend(&matrC[low_bound][0],
                  (upper_bound - low_bound) * n,
                  MPI_FLOAT,
                  0,
                  SLAVE_TO_MASTER_TAG + 2,
                  MPI_COMM_WORLD,
                  &request);
    }

    /* Master gathers processed work*/
    if (rank == 0)
    {
        for (i = 1; i < size; i++)
        {
            // Until all slaves have handed back the processed data,
            // receive low bound from a slave.
            MPI_Recv(&low_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);

            //Receive upper bound from a slave
            MPI_Recv(&upper_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);

            //Receive processed data from a slave
            MPI_Recv(&matrC[low_bound][0],
                     (upper_bound - low_bound) * n,
                     MPI_FLOAT,
                     i,
                     SLAVE_TO_MASTER_TAG + 2,
                     MPI_COMM_WORLD,
                     &status);
        }
        end_time = MPI_Wtime();
        printf("\nRunning Time = %f\n\n", end_time - start_time);
    }
    MPI_Finalize(); //Finalize MPI operations

    /* Do the multiplication */
    ////////////////////////////////////////////////////  matmul(a, b, c, n);
    for(i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
        {
            sc[(i*n) + j] = matrC[i][j];
        }
    }
}

Upvotes: 2

Views: 973

Answers (1)

dreamcrash
dreamcrash

Reputation: 51443

Every process declares the pointers to the matrices, namely:

float *sa, *sb, *sc; /* storage for matrix A, B, and C */

but only the process 0 (allocates and) fills up the arrays sa and sb:

if(rank == 0)
  {
      ...
      read_matrix(argv[1], &a, &sa, &i, &j);
      ...
      read_matrix(argv[2], &b, &sb, &i, &j);
      ...
  }

However, afterward every process tries to access the positions of the sa and sb array:

for(i = 0; i < n; i++)
{
   for(j = 0; j < n; j++)
   {
       matrA[i][j] = sa[(i*n) + j];
       matrB[i][j] = sb[(i*n) + j];
   }
}

Since only the process 0 had (allocated and) filled up the arrays sa and sb, the remaining processes are trying to access memory (sa[(i*n) + j] and sb[(i*n) + j]) that they have not allocated. Hence, the reason why you get segmentation fault.

On a side note, there is another problem in your program - you initiate non-blocking sends with MPI_Isend but never wait on the completion of the returned request handles. MPI implementations are not even required to start the send operation until it is properly progressed to completion, mostly by a call to one of the wait or test operations (MPI_Wait, MPI_Waitsome, MPI_Waitall, and so on). Even worse, you reuse the same handle variable request, effectively losing the handles to all previously initiated requests, which makes them unwaitable/untestable. Use an array of requests instead and wait for all of them to finish with MPI_Waitall after the send loop.

Also think about this - do you really need non-blocking operations to send data back from the workers?

Upvotes: 3

Related Questions