Hybrid openmp mpi mandelbrot code

Im facing one problem: ive done the mpi version of mandelbrot and it works perfect. Then i have to implement an hybrid version using openmp. Ive put openmp parallelization in the loop with all the calculations ( inside function calculoMandelbrot). If i delete the omp instruction ( omp for parallel and omp barrier) it works perfect ( it was my mpi implementation). Should work but i cant guess where i get lost.

Im having an image of IMAGEHEIGHT*IMAGEWIDTH and every process mades it part ( ex: if i have an image of 100 height, and 4 processes, every process calculates 25 rows, done in the calculoMandelbrot function). Then i send a message to master with the results of the calculation of its part of every process.

The ppm that results is a mess. Dont know why... any help would be apreciated...

/
//  mandelbrot.c
//
//
//  The Mandelbrot calculation is to iterate the equation
//  z = z*z + c, where z and c are complex numbers, z is initially
//  zero, and c is the coordinate of the point being tested. If
//  the magnitude of z remains less than 2 for ever, then the point
//  c is in the Mandelbrot set. In this code We write out the number of iterations
//  before the magnitude of z exceeds 2, or UCHAR_MAX, whichever is
//  smaller.//
//
//

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <omp.h>
#include <math.h>


#define TAG_ENVIO 3

int IMAGEWIDTH = 600;
int IMAGEHEIGHT = 400;
int ITERATIONS = 100000;
int CHUNK = 1;
int SIZE;


void escribirfichero(char* pixels) {

    int i;
    FILE *fp;
    fp = fopen("MandelbrotSet.ppm", "w");

    if (fp == NULL) {
        perror ( "Unable to open file" );
        exit (EXIT_FAILURE);
    }

    //printf("Empezamos a escribir en el fichero");

    fprintf(fp, "P6\n# CREATOR: mandel program\n");
    fprintf(fp, "%d %d\n255\n", IMAGEWIDTH, IMAGEHEIGHT);

    for (i = 0; i < (IMAGEWIDTH*3*IMAGEHEIGHT); i++)
        fputc((char) pixels[i],fp);

    fclose(fp);
}

void calculoMandelbrot(char *destino, int iproc, int height)
{
    int posInici = iproc * height;
    int xactual, yactual;
    int posLocal = 0;
    int chunkSize = height * IMAGEWIDTH * 3;

    omp_set_dynamic(1);

    //each iteration, it calculates: newz = oldz*oldz + p, where p is the current pixel, and oldz stars at the origin
    double pr, pi;                              //real and imaginary part of the pixel p
    double newRe, newIm, oldRe, oldIm;          //real and imaginary parts of new and old z
    double zoom = 1, moveX = -0.5, moveY = 0;   //you can change these to zoom and change position

    int numcpu = omp_get_num_procs();
    omp_set_num_threads(numcpu);

    if(iproc != 0)
        destino = (char *)malloc(sizeof(char) * chunkSize);

#pragma omp parallel //shared(moveX, moveY, zoom) private(xactual, yactual, pr, pi, newRe, newIm)  if (numcpu>1)
    {
#pragma omp for schedule(dynamic) //, CHUNK
        for(yactual = posInici; yactual < posInici + height; yactual++)
        {
            for(xactual = 0; xactual < IMAGEWIDTH; xactual++)
            {
                //calculate the initial real and imaginary part of z, based on the pixel location and zoom and position values
                pr = 1.5 * (xactual - IMAGEWIDTH / 2) / (0.5 * zoom * IMAGEWIDTH) + moveX;
                pi = (yactual - IMAGEHEIGHT / 2) / (0.5 * zoom * IMAGEHEIGHT) + moveY;
                newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
                //"i" will represent the number of iterations
                int i;
                //start the iteration process
                for(i = 0; i < ITERATIONS; i++)
                {
                    //remember value of previous iteration
                    oldRe = newRe;
                    oldIm = newIm;
                    //the actual iteration, the real and imaginary part are calculated
                    newRe = oldRe * oldRe - oldIm * oldIm + pr;
                    newIm = 2 * oldRe * oldIm + pi;
                    //if the point is outside the circle with radius 2: stop
                    if((newRe * newRe + newIm * newIm) > 4) break;
                }

                if(i == ITERATIONS)
                {
                    //escribirArray(destino, posLocal, 0, 0, 0);
                    destino[posLocal] = 0;
                    destino[++posLocal] = 0;
                    destino[++posLocal] = 0;
                    ++posLocal;                     //me preparo para colocar siguiente.
                }
                else
                {
                    double z = sqrt(newRe * newRe + newIm * newIm);
                    int brightness = 256 * log2(1.75 + i - log2(log2(z))) / log2((double)ITERATIONS);

                    //escribirArray(envioArr, xactual, yactual, brightness, brightness, 255);
                    destino[posLocal] = brightness;
                    destino[++posLocal] = brightness;
                    destino[++posLocal] = 255;
                    ++posLocal;                     //me preparo para colocar siguiente
                }
            }
        }
    }
    #pragma omp barrier

    if(iproc != 0)
    {
        MPI_Send(destino, chunkSize, MPI_CHAR, 0, TAG_ENVIO, MPI_COMM_WORLD);
        free(destino);
    }
}


void stringCopy(char *pixels, char *reciboArr, int sender, int height)
{
    int posInici = sender * height * IMAGEWIDTH*3;
    int pos;

    for (pos = 0; pos < height * IMAGEWIDTH*3; pos++, posInici++) {
        pixels[posInici] = reciboArr[pos];
    }
}


int main(int argc, char** argv) {

    // mandelbrot
    char* pixels;

    // mpi
    int nproc, iproc;

    // calculos tiempo
    double begin;
    double end_calc;
    double end_total;


    if(argc >= 3)
    {
        // se supone que la línia de ejecucion sera del tipo
        // -n 4 ${workspace_loc:MPI}/Debug/MPI${build_files} 100 200
        // FALTA PROVAR EN MOORE

        IMAGEWIDTH = atoi(argv[1]);
        IMAGEHEIGHT = atoi(argv[2]);
    }

    if(argc == 4)
    {
        ITERATIONS = atoi(argv[3]);
    }

    if(argc == 5)
    {
        CHUNK = atoi(argv[4]);
    }

    SIZE = IMAGEHEIGHT * IMAGEWIDTH * 3;



    if (MPI_Init(&argc, &argv) != MPI_SUCCESS) {
        fprintf(stderr, "Error al inicializar MPI.\n");
        return 100;
    }

    begin = MPI_Wtime();

    if (MPI_Comm_size(MPI_COMM_WORLD, &nproc) != MPI_SUCCESS) {
        fprintf(stderr, "No se puede obtener el contador de procesos.\n");
        MPI_Finalize();
        return 101;
    } else if (nproc < 2) {
        fprintf(stderr, "Se necesitan almenos 2 procesos (se usan %d)\n", nproc);
        MPI_Finalize();
        return 102;
    }

    if (MPI_Comm_rank(MPI_COMM_WORLD, &iproc) != MPI_SUCCESS) {
        fprintf(stderr, "No se puede obtener el rango para el proceso.\n");
        MPI_Finalize();
        return 103;
    }

    if ((IMAGEHEIGHT % nproc) != 0)
    {
        printf("Incompatable number of processes requested\nExiting...\n");
        exit(EXIT_FAILURE);
    }


    int height = IMAGEHEIGHT/nproc;
    int chunkSize = height * IMAGEWIDTH * 3;

    if (iproc != 0) {
        char *envioArr = (char *)malloc(sizeof(char) * chunkSize);
        calculoMandelbrot(envioArr, iproc, height);
    }

    else if(iproc == 0) {
        printf("Empezando los calculos de Mandelbrot...\n");
        printf("IMAGEWIDTH %d IMAGEHEIGHT %d ITERATIONS %d num procesos %d CHUNK %d\n", IMAGEWIDTH, IMAGEHEIGHT, ITERATIONS, nproc, CHUNK);

        pixels = (char *)malloc(SIZE * sizeof(char));
        calculoMandelbrot(pixels, iproc, height);


        //inicio recibir el resto de mensajes
        char *reciboArr = (char *)malloc(sizeof(char)*chunkSize);
        int i;
        MPI_Status status;

        for (i = 1; i<nproc; i++)
        {
            MPI_Recv(reciboArr, height*IMAGEWIDTH*3, MPI_CHAR, MPI_ANY_SOURCE, TAG_ENVIO, MPI_COMM_WORLD, &status);
            stringCopy(pixels, reciboArr, status.MPI_SOURCE, height);
        }
        free(reciboArr);
        //final de recibir resto de mensajes

        end_calc = MPI_Wtime() - begin;
        printf("Tiempo en calculos: %.10lf segundos \n", end_calc);
        //MPI_Barrier(MPI_COMM_WORLD);

        //printf("Escribiendo la imagen\n");
        escribirfichero(pixels);
        end_total = MPI_Wtime() - begin;
        printf("Tiempo en total: %.10lf segundos \n", end_total);

        free(pixels);
    }

    MPI_Finalize();
    return 0;
}

Upvotes: 2

Views: 966

Answers (1)

Just if someone heads the same or similar problem the solution was:

The begining point was as said in the question, that the mpi version worked fine ( without openmp clauses). The variable posLocal varies from 0 to 180000 ( imagewidth*3*height) in the case im debuggint: - IMAGEWIDTH values 600 - height is 100( was IMAGEHEIGHT 400 divided between 4 process )

So when i introduce the openmp clauses dont know exactly why althoug using the open mp barrier, posLocal final value was stuck at 175000 or near. After many tests i have been able that posLocal arrives to 180000, making destino and posLocal shared, and xactual and yactual private. Perhaps other combinations works as well, but this works for me.

Thanks to coincoin for the interest.

#pragma omp parallel shared ( destino) private ( xactual, yactual)
{
#pragma omp parallel for schedule(static)
        for(yactual = posInici; yactual < posInici + height; yactual++)
        {
            for(xactual = 0; xactual < IMAGEWIDTH; xactual++)
            {
                //calculate the initial real and imaginary part of z, based on the pixel location and zoom and position values
                pr = 1.5 * (xactual - IMAGEWIDTH / 2) / (0.5 * zoom * IMAGEWIDTH) + moveX;
                pi = (yactual - IMAGEHEIGHT / 2) / (0.5 * zoom * IMAGEHEIGHT) + moveY;
                newRe = newIm = oldRe = oldIm = 0; //these should start at 0,0
                //"i" will represent the number of iterations
                int i;
                //start the iteration process
                for(i = 0; i < ITERATIONS; i++)
                {
                    //remember value of previous iteration
                    oldRe = newRe;
                    oldIm = newIm;
                    //the actual iteration, the real and imaginary part are calculated
                    newRe = oldRe * oldRe - oldIm * oldIm + pr;
                    newIm = 2 * oldRe * oldIm + pi;
                    //if the point is outside the circle with radius 2: stop
                    if((newRe * newRe + newIm * newIm) > 4) break;
                }


                //printf("Antes zona paralela: %d process Thread: %d \n", iproc, numcpu);
                if(i == ITERATIONS)
                {
                    //escribirArray(destino, posLocal, 0, 0, 0);
                    destino[posLocal] = 0;
                    destino[++posLocal] = 0;
                    destino[++posLocal] = 0;
                    ++posLocal;                     //me preparo para colocar siguiente.
                }
                else
                {
                    double z = sqrt(newRe * newRe + newIm * newIm);
                    int brightness = 256 * log2(1.75 + i - log2(log2(z))) / log2((double)ITERATIONS);

                    //escribirArray(envioArr, xactual, yactual, brightness, brightness, 255);
                    destino[posLocal] = brightness;
                    destino[++posLocal] = brightness;
                    destino[++posLocal] = 255;
                    ++posLocal;                     //me preparo para colocar siguiente
                }
            }

        }

}

    #pragma omp barrier
    printf("Despuess zona paralela: %d process Thread: %d y posLocal %d \n", iproc, numcpu, posLocal);
    if(iproc != 0)
    {
        MPI_Send(destino, chunkSize, MPI_CHAR, 0, TAG_ENVIO, MPI_COMM_WORLD);
        printf("Estoy enviando en el proceso: %d y la posLocal es %d \n", iproc, posLocal);

        free(destino);
    }

Upvotes: 1

Related Questions