OpenMP #pragma omp parallel for is slower

Question

I'm trying to improve my C source code to execute in parallel. I got a quad-core CPU, so I thought 4 was a good number of threads (one for CPU) to run my program really faster than optimized like a sequential code.

But it's not working. My code without OpenMP takes 11 min to be executed, and with the parallel code over 11 minutes. I report ALL my source, but the parallel code is only in getBasin() function.

#include 
#include 
#include 
#include 
#include 

#define BLD "\x1B[1m"
#define RED "\x1B[31m"
#define GRN "\x1B[32m"
#define RST "\x1B[0m"

struct point{
  double x;
  double v;
  double t;
  double E0;
  double E;
  double dE;
} typedef point;

struct parametri {
    double gamma;
    double epsilon;
    double dt;
    double t_star;
    double t_basin;
    double t_max;
    double x0;
    double v0;
    int choice;
    int alg[1];
    double dx;
} typedef parametri;


// Prototipi delle funzioni
void Setup();
void getParams(parametri *pars);
void getError(point *xv, parametri *pars, int *i, int k, int *n_passi, double *xn1, double *vn1);
void getBasin(point *xv, parametri *pars, int *h, int *n_passi, double *xn1, double *vn1);

double f(double x, double v, parametri *pars);

void algEulero(point *xv, parametri *pars, double *xn1, double *vn1);
void algEuleroCromer(point *xv, parametri *pars, double *xn1, double *vn1);
void algPuntoDiMezzo(point *xv, parametri *pars, double *xn1, double *vn1);
void algVerlet(point *xv, parametri *pars, double *xn1, double *vn1);
void algRungeKutta2(point *xv, parametri *pars, double *xn1, double *vn1);

int main(void) {

    // Inizializzo il display per l'utente. Maggiori informazioni vedere funzione Setup();
    Setup();

    // Dichiaro e recupero i parametri richiesti per il corretto funzionamento del programma;
    parametri pars;
    getParams(&pars);

    // Dichiaro e ricavo i parametri essenziali, annesse variabili di supporto;
    int i, n_passi = pars.t_max/pars.dt;
    double dt0, xn1, vn1, t_star = 5.;
    point xv;

    // Imposto i parametri iniziali;
    xv.x = pars.x0;
    xv.v = pars.v0;
    xv.E0 = 0.5*(xv.v)*(xv.v) - (xv.x)*(xv.x)/8. + (xv.x)*(xv.x)*(xv.x)*(xv.x)/4.;
    xv.E = xv.E0;
    xv.dE = 0;
    xv.t = 0;
    pars.t_star = 5;
    pars.t_basin = 60;
    dt0 = pars.dt;

    // Formato dell'output;
    printf ("t	x(t)	v(t)	E(t)	dE
");

    if ((pars.choice == 1) || (pars.choice == 3) || (pars.choice == 4)) { // L'utente ha deciso di affrontare il primo/terzo/quarto esercizio;

        // Avverto l'utente che il processo sta per iniziare. Può risultare inutile su tempi brevi, ma efficace su tempi molto lunghi;
        fprintf(stderr, "
Avvio integrazione numerica... ");       

        if (pars.alg[0] == 1) { // L'utente ha selezionato l'algoritmo di Eulero;
            for (i=0; i Selezionare il secondo algoritmo:
");
            fprintf(stderr, "	>> [1] Eulero
");
            fprintf(stderr, "	>> [2] EuleroCromer - RungeKutta (1 ordine)
");
            fprintf(stderr, "	>> [3] PuntoDiMezzo
");
            fprintf(stderr, "	>> [4] Verlet
");
            fprintf(stderr, "	>> [5] RungeKutta (2 ordine)
");
            fprintf(stderr, "	>> ");
            scanf("%d", &pars.alg[1]);
        } while (( pars.alg[1] <= 0 ));

        // Avverto l'utente che il processo sta per iniziare. Può risultare inutile su tempi brevi, ma efficace su tempi molto lunghi;
        fprintf(stderr, "
Avvio calcolo errori... ");

        // Eseguo lo studio degli errori d'integrazione mediante il primo e secondo algoritmo scelto, rispettivamente nei file error_1.dat, error_2.dat;
        getError(&xv, &pars, &i, 0, &n_passi, &xn1, &vn1);

        // Resetto le variabili e richiamo l'algoritmo per calcolare gli errori;
        pars.dt = dt0;
        n_passi = pars.t_max/pars.dt;
        xv.x = pars.x0;
        xv.v = pars.v0;
        xv.E = xv.E0;
        xv.dE = 0;
        xv.t = 0;

        getError(&xv, &pars, &i, 1, &n_passi, &xn1, &vn1);

        // Processo terminato;
        fprintf(stderr, "

Stato: [%s%sDONE%s]

", BLD, GRN, RST);

    } else if (pars.choice == 5) { // L'utente ha deciso di affrontare il quinto esercizio;

        // Avverto l'utente che il processo sta per iniziare. Può risultare inutile su tempi brevi, ma efficace su tempi molto lunghi;
        fprintf(stderr, "
Avvio calcolo griglia... ");

        getBasin(&xv, &pars, &i, &n_passi, &xn1, &vn1);

        // Processo terminato;
        fprintf(stderr, "[%s%sDONE%s]

", BLD, GRN, RST);

    } else { // L'utente non ha selezionato un esercizio valido;
        fprintf(stderr, "
[%s%sFAILED%s] Esercizio non disponibile.
", BLD, RED, RST);
        exit(EXIT_FAILURE);
    }

    return 0;
}


void Setup() {

    fprintf(stderr, "
Analisi numerica di un'equazione differenziale
");
    fprintf(stderr, "==============================================

");

}

void getParams(parametri *pars) {

    do {
        fprintf(stderr, "> Inserire un valore per gamma      : ");
        scanf("%lf", &pars->gamma);
    } while (pars->gamma < 0);

    do {
        fprintf(stderr, "> Inserire un valore per epsilon    : ");
        scanf("%lf", &pars->epsilon);
    } while (pars->epsilon < 0);

    do {
        fprintf(stderr, "> Inserire un valore per dt         : ");
        scanf("%lf", &pars->dt);
    } while (pars->dt < 0);

    do {
        fprintf(stderr, "> Inserire un valore per tmax t.c.  :
");
        fprintf(stderr, "  >> (tmax > 5) per I eserc.
");
        fprintf(stderr, "  >> (tmax > 60) per V eserc.       : ");
        scanf("%lf", &pars->t_max);
    } while (pars->t_max < 0);

    do {
        fprintf(stderr, "> Inserire un valore per x(0)       : ");
        scanf("%lf", &pars->x0);
    } while (pars->x0 < -1);

    do {
        fprintf(stderr, "> Inserire un valore per v(0)       : ");
        scanf("%lf", &pars->v0);
    } while (pars->v0 < -1);

    do {
        fprintf(stderr, "> Selezionare l'esercizio richiesto :
");
        fprintf(stderr, "	>> [1] Esercizio 1
");
        fprintf(stderr, "	>> [2] Esercizio 2
");
        fprintf(stderr, "	>> [3] Esercizio 3
");
        fprintf(stderr, "	>> [4] Esercizio 4
");
        fprintf(stderr, "	>> [5] Esercizio 5

");

        fprintf(stderr, "	>> ");
        scanf("%d", &pars->choice);
    } while (( pars->choice <= 0 ));

    do {
        fprintf(stderr, "> Selezionare l'algoritmo voluto    :
");
        fprintf(stderr, "	>> [1] Eulero
");
        fprintf(stderr, "	>> [2] EuleroCromer - RungeKutta (1 ordine)
");
        fprintf(stderr, "	>> [3] PuntoDiMezzo
");
        fprintf(stderr, "	>> [4] Verlet
");
        fprintf(stderr, "	>> [5] RungeKutta (2 ordine)

");
        fprintf(stderr, "	>> ");
        scanf("%d", &pars->alg[0]);
    } while (( pars->alg[0] <= 0 ));

}

void getError(point *xv, parametri *pars, int *i, int k, int *n_passi, double *xn1, double *vn1) {
    void (*algF)(point *, parametri *, double *, double *);
    int j, n = 0;
    FILE *fp;

    // Questo if controlla se il codice sta eseguendo lo studio degli errori per il primo o secondo algoritmo (pars->alg[k]);
    if (k == 0) {
        fp = fopen("error_1.dat", "w+");
    } else {
        fp = fopen("error_2.dat", "w+");
    }

    // Assegno il puntatore corretto a seconda dell'algoritmo scelto;
    if (pars->alg[k] == 1) {
        algF = algEulero;
    } else if (pars->alg[k] == 2) {
        algF = algEuleroCromer;
    } else if (pars->alg[k] == 3) {
        algF = algPuntoDiMezzo;
    } else if (pars->alg[k] == 4) {
        algF = algVerlet;
    } else if (pars->alg[k] == 5) {
        algF = algRungeKutta2;
    } else {
        fprintf(stderr, "
[%s%sFAILED%s] E' stato selezionato un algoritmo non valido.
", BLD, RED, RST);
        exit(EXIT_FAILURE);
    }

    // Informo l'utente che il processo sta iniziando;
    fprintf(stderr, "
>> Avvio %d algoritmo... ", k+1);

    // Formattazione dell'output del file contenente gli errori;
    fprintf(fp, "dt	E(t*)	E(0)	dE/E(0)	# passi	i	t
");

    for (j=0; j<8; j++) {

        for ((*i)=0; (*i)<(*n_passi); (*i)++){
            (*algF)(xv, pars, xn1, vn1);

            if (((pars->t_star - pars->dt/2. <= xv->t) && (xv->t >= pars->t_star + pars->dt/2.)) && (n == 0)) {
                fprintf(fp, "%+.14lf	%+.14lf	%+.14lf	%+.14lf	%+.14d	%+.14d	%+.14lf
", pars->dt, xv->E, xv->E0, (xv->E - xv->E0)/xv->E0, (*n_passi), (*i), xv->t);
                n = 1;
            }
        }

        // Resetto le variabili per rilanciare l'algoritmo con dt/2^j
        n = 0;
        xv->t = 0;
        xv->x = pars->x0;
        xv->v = pars->v0;
        pars->dt = pars->dt/2.;
        (*n_passi) = pars->t_max/pars->dt;
        (*xn1) = 0;
        (*vn1) = 0;
        xv->E = xv->E0;
    }

    fclose(fp);

    fprintf(stderr, "[%s%sDONE%s]", BLD, GRN, RST);
}

void getBasin(point *xv, parametri *pars, int *h, int *n_passi,  double *xn1, double *vn1) {

    // Dichiaro e setto i parametri che delimitano la griglia;
    point xv1;
    pars->dx = 0.01;
    xv1.x = -1;
    xv1.v = -1;

    // Dichiaro le variabili necessarie per il bacino d'attrazione;
    int i, j, i_max = 2./pars->dx, j_max = 2./pars->dx;
    void (*algF)(point *, parametri *, double *, double *);
    FILE *fp = fopen("basin.dat", "w+");

    // Assegno il puntatore corretto a seconda dell'algoritmo scelto;
    if (pars->alg[0] == 1) {
        algF = algEulero;
    } else if (pars->alg[0] == 2) {
        algF = algEuleroCromer;
    } else if (pars->alg[0] == 3) {
        algF = algPuntoDiMezzo;
    } else if (pars->alg[0] == 4) {
        algF = algVerlet;
    } else if (pars->alg[0] == 5) {
        algF = algRungeKutta2;
    } else {
        fprintf(stderr, "
[%s%sFAILED%s] E' stato selezionato un algoritmo non valido.
", BLD, RED, RST);
        exit(EXIT_FAILURE);
    }

    // Formattazione output file basin.dat;
    fprintf(fp, "x(0)	x'(0)	x(t*)	v(t*)
");

    omp_set_num_threads(4);

    #pragma omp parallel for
    // Eseguo il for della griglia sull'asse x';
    for (j=0; j<=j_max; j++) {      

        // Eseguo il for della griglia sull'asse x;
        for (i=0; i<=i_max; i++) {
            fprintf(fp, "%lf	%lf	", xv1.x, xv1.v);

            xv->x = xv1.x;
            xv->v = xv1.v;

            // Eseguo l'integrazione numerica
            for ((*h)=0; (*h)<(*n_passi); (*h)++) {
                (*algF)(xv, pars, xn1, vn1);

                // Entro in t = t*, stampo v(t*) ed esco dal ciclo;
                if ((pars->t_basin - pars->dt/2. <= xv->t) && (xv->t >= pars->t_basin + pars->dt/2.)) {
                    fprintf(fp, "%lf	%lf
", xv->x, xv->v);
                    break;
                }
            }

            xv1.x += pars->dx;
            xv->t = 0;
            (*xn1) = 0;
            (*vn1) = 0;
        }

        // Resetto la x e incremento la x';
        xv1.x = -1;
        xv1.v += pars->dx;
    }

}

double f(double x, double v, parametri *pars) {
    return 0.25*x - x*x*x + (pars->gamma - pars->epsilon*(x*x))*v;
}

void algEulero(point *xv, parametri *pars, double *xn1, double *vn1){
  printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  *xn1 = xv->x + (xv->v)*(pars->dt);
  *vn1 = xv->v + f(xv->x, xv->v, pars)*(pars->dt);

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
  xv->x = *xn1;
  xv->v = *vn1;
}

void algEuleroCromer(point *xv, parametri *pars, double *xn1, double *vn1){
  printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  xv->v = xv->v + f(xv->x, xv->v, pars)*(pars->dt);
  xv->x = xv->x + (xv->v)*(pars->dt);

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
}

void algPuntoDiMezzo(point *xv, parametri *pars, double *xn1, double *vn1) {
  printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  *vn1 = xv->v + f(xv->x, xv->v, pars)*(pars->dt);
  xv->x = xv->x + (0.5*(xv->v + (*vn1)))*(pars->dt);

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
  xv->v = *vn1;
}

void algVerlet(point *xv, parametri *pars, double *xn1, double *vn1) {
  printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  *xn1 = xv->x + xv->v*pars->dt + 0.5*(f(xv->x, xv->v, pars))*pars->dt*pars->dt;
  *vn1 = xv->v + 0.5*(f(xv->x, xv->v, pars) + f((*xn1), xv->v, pars))*pars->dt;

  xv->x = *xn1;
  xv->v = *vn1;

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
}


void algRungeKutta2(point *xv, parametri *pars, double *xn1, double *vn1) {
      printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
      *xn1 = xv->x + xv->v*pars->dt + 0.5*f(xv->x, xv->v, pars)*pars->dt*pars->dt;
      *vn1 = xv->v + f(xv->x + 0.5*xv->v*pars->dt, xv->v + 0.5*f(xv->x, xv->v, pars)*pars->dt, pars)*pars->dt;

      xv->x = *xn1;
      xv->v = *vn1;

      xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
      xv->dE = fabs(xv->E-xv->E0)/xv->E0;

      xv->t += (pars->dt);
}

------------------ EDIT -----------------

Dear Gilles, I explain you what my program do. This program is oriented to solve numerically a differential equation with different algorithm (algEulero, algEuleroCromer, algPuntoDiMezzo, algVerlet, algRungeKutta2). It's working fine. The physic equation is d^2x/dt^2 = f(x, v, gamma, epsilon). This f() is exactly the f() you can find in my code. The "big" problem of my simple C program is that he is really slow: when I choose the 5' exercise (pars.choice == 5), the program will do exactly:

1) Calculating (the two for in getBasin()) an area of [-1, 1]x[-1, 1] with pars.dx increment; 2) Every increment, on x and on y, will start the algorithm which solves numerically the differential equation with the two starting condition (x, y) of the for. When the algorithm reached an asynthotic t* (pars.t_basin), he will write the output of x(t*) and v(t*) in basin.dat. 3) (x, y) will change and go to point (1) again.

Now, you can test with the following parameters: 0.83, 4, 0.01, 62, 1, 1, 5, 5.

I tested you code but is not faster than my sequential code (over 11 min, for example). In order to improve it:

1) The order of the output of basin.dat is insignificant, because I will plot the space (x, y) coloring my point depending the 3' and 4' column value (with images on Gnuplot). 2) Why do you create threads also before the function getBasin() and not only for the two for() ? Should this not repeat, for every thread, the getBasin() ?

Sorry for my bad english and parallel programming, I'm trying to improve it reading tutorial online.

Gilles · Accepted Answer

Well, the obvious main problem on your code is that the parallel version is (very) wrong. You define all your variables outside of the parallel region, but do not declare any of them private (not even the loop indexes).

Moreover, since you passed all the arguments of your getBasin() function as pointers, declaring them private afterwards becomes trickier.

However, a quick analysis of the code shows that although these parameters are passed as pointer, you actually don't care about their value upon exit of the routine. Moreover, it looks like there is no data dependency for the loop you tried to parallelise (although I didn't do a fully comprehensive check of that). The only clear dependency I found is about the output file you opened, which will be tricky to update in parallel while keeping the sequential order...

So in order to fix the parallelisation of your code, this is what I did:

Passing the parameters to getBasin() by values rather than by reference. This will generate some extra copies of the data, but since these data would have needed to be declared private, a copy would have been necessary anyway.
Enclosing the call to getBasin() within a parallel region. Indeed, it looked to me simpler to do like this rather than to deal with data privatisation and IO issues inside the function itself.
Opening one private copy of the output file per thread, named "basinXX.dat" with "XX" being the id of the current thread, left-padded with 0. These files will contain the share of the global outputs corresponding to the current thread. Let's hope this will be suitable for you. Otherwise, dealing with the ordering of the printing will probably be a bit trickier.
Use a single orphan omp for directive inside the function to parallelise the loop.

With that, the code should (hopefully) scale much better. However, since you didn't indicate the input parameters to use for the computation, I cannot test it, neither for correctness, nor for performance. So it can just fail miserably...

Anyway, here is what I came up with:

#include 
#include 
#include 
#include 
#include 

#define BLD "\x1B[1m"
#define RED "\x1B[31m"
#define GRN "\x1B[32m"
#define RST "\x1B[0m"

struct point{
  double x;
  double v;
  double t;
  double E0;
  double E;
  double dE;
} typedef point;

struct parametri {
    double gamma;
    double epsilon;
    double dt;
    double t_star;
    double t_basin;
    double t_max;
    double x0;
    double v0;
    int choice;
    int alg[1];
    double dx;
} typedef parametri;


// Prototipi delle funzioni
void Setup();
void getParams(parametri *pars);
void getError(point *xv, parametri *pars, int *i, int k, int *n_passi, double *xn1, double *vn1);
void getBasin(point xv, parametri pars, int h, int n_passi, double xn1, double vn1);

double f(double x, double v, parametri *pars);

void algEulero(point *xv, parametri *pars, double *xn1, double *vn1);
void algEuleroCromer(point *xv, parametri *pars, double *xn1, double *vn1);
void algPuntoDiMezzo(point *xv, parametri *pars, double *xn1, double *vn1);
void algVerlet(point *xv, parametri *pars, double *xn1, double *vn1);
void algRungeKutta2(point *xv, parametri *pars, double *xn1, double *vn1);

int main(void) {

    // Inizializzo il display per l'utente. Maggiori informazioni vedere funzione Setup();
    Setup();

    // Dichiaro e recupero i parametri richiesti per il corretto funzionamento del programma;
    parametri pars;
    getParams(&pars);

    // Dichiaro e ricavo i parametri essenziali, annesse variabili di supporto;
    int i, n_passi = pars.t_max/pars.dt;
    double dt0, xn1, vn1, t_star = 5.;
    point xv;

    // Imposto i parametri iniziali;
    xv.x = pars.x0;
    xv.v = pars.v0;
    xv.E0 = 0.5*(xv.v)*(xv.v) - (xv.x)*(xv.x)/8. + (xv.x)*(xv.x)*(xv.x)*(xv.x)/4.;
    xv.E = xv.E0;
    xv.dE = 0;
    xv.t = 0;
    pars.t_star = 5;
    pars.t_basin = 60;
    dt0 = pars.dt;

    // Formato dell'output;
    printf ("t	x(t)	v(t)	E(t)	dE
");

    if ((pars.choice == 1) || (pars.choice == 3) || (pars.choice == 4)) { // L'utente ha deciso di affrontare il primo/terzo/quarto esercizio;

        // Avverto l'utente che il processo sta per iniziare. Può risultare inutile su tempi brevi, ma efficace su tempi molto lunghi;
        fprintf(stderr, "
Avvio integrazione numerica... ");       

        if (pars.alg[0] == 1) { // L'utente ha selezionato l'algoritmo di Eulero;
            for (i=0; i Selezionare il secondo algoritmo:
");
            fprintf(stderr, "	>> [1] Eulero
");
            fprintf(stderr, "	>> [2] EuleroCromer - RungeKutta (1 ordine)
");
            fprintf(stderr, "	>> [3] PuntoDiMezzo
");
            fprintf(stderr, "	>> [4] Verlet
");
            fprintf(stderr, "	>> [5] RungeKutta (2 ordine)
");
            fprintf(stderr, "	>> ");
            scanf("%d", &pars.alg[1]);
        } while (( pars.alg[1] <= 0 ));

        // Avverto l'utente che il processo sta per iniziare. Può risultare inutile su tempi brevi, ma efficace su tempi molto lunghi;
        fprintf(stderr, "
Avvio calcolo errori... ");

        // Eseguo lo studio degli errori d'integrazione mediante il primo e secondo algoritmo scelto, rispettivamente nei file error_1.dat, error_2.dat;
        getError(&xv, &pars, &i, 0, &n_passi, &xn1, &vn1);

        // Resetto le variabili e richiamo l'algoritmo per calcolare gli errori;
        pars.dt = dt0;
        n_passi = pars.t_max/pars.dt;
        xv.x = pars.x0;
        xv.v = pars.v0;
        xv.E = xv.E0;
        xv.dE = 0;
        xv.t = 0;

        getError(&xv, &pars, &i, 1, &n_passi, &xn1, &vn1);

        // Processo terminato;
        fprintf(stderr, "

Stato: [%s%sDONE%s]

", BLD, GRN, RST);

    } else if (pars.choice == 5) { // L'utente ha deciso di affrontare il quinto esercizio;

        // Avverto l'utente che il processo sta per iniziare. Può risultare inutile su tempi brevi, ma efficace su tempi molto lunghi;
        fprintf(stderr, "
Avvio calcolo griglia... ");

        #pragma omp parallel num_threads( 4 )
        getBasin(xv, pars, i, n_passi, xn1, vn1);

        // Processo terminato;
        fprintf(stderr, "[%s%sDONE%s]

", BLD, GRN, RST);

    } else { // L'utente non ha selezionato un esercizio valido;
        fprintf(stderr, "
[%s%sFAILED%s] Esercizio non disponibile.
", BLD, RED, RST);
        exit(EXIT_FAILURE);
    }

    return 0;
}


void Setup() {

    fprintf(stderr, "
Analisi numerica di un'equazione differenziale
");
    fprintf(stderr, "==============================================

");

}

void getParams(parametri *pars) {

    do {
        fprintf(stderr, "> Inserire un valore per gamma      : ");
        scanf("%lf", &pars->gamma);
    } while (pars->gamma < 0);

    do {
        fprintf(stderr, "> Inserire un valore per epsilon    : ");
        scanf("%lf", &pars->epsilon);
    } while (pars->epsilon < 0);

    do {
        fprintf(stderr, "> Inserire un valore per dt         : ");
        scanf("%lf", &pars->dt);
    } while (pars->dt < 0);

    do {
        fprintf(stderr, "> Inserire un valore per tmax t.c.  :
");
        fprintf(stderr, "  >> (tmax > 5) per I eserc.
");
        fprintf(stderr, "  >> (tmax > 60) per V eserc.       : ");
        scanf("%lf", &pars->t_max);
    } while (pars->t_max < 0);

    do {
        fprintf(stderr, "> Inserire un valore per x(0)       : ");
        scanf("%lf", &pars->x0);
    } while (pars->x0 < -1);

    do {
        fprintf(stderr, "> Inserire un valore per v(0)       : ");
        scanf("%lf", &pars->v0);
    } while (pars->v0 < -1);

    do {
        fprintf(stderr, "> Selezionare l'esercizio richiesto :
");
        fprintf(stderr, "	>> [1] Esercizio 1
");
        fprintf(stderr, "	>> [2] Esercizio 2
");
        fprintf(stderr, "	>> [3] Esercizio 3
");
        fprintf(stderr, "	>> [4] Esercizio 4
");
        fprintf(stderr, "	>> [5] Esercizio 5

");

        fprintf(stderr, "	>> ");
        scanf("%d", &pars->choice);
    } while (( pars->choice <= 0 ));

    do {
        fprintf(stderr, "> Selezionare l'algoritmo voluto    :
");
        fprintf(stderr, "	>> [1] Eulero
");
        fprintf(stderr, "	>> [2] EuleroCromer - RungeKutta (1 ordine)
");
        fprintf(stderr, "	>> [3] PuntoDiMezzo
");
        fprintf(stderr, "	>> [4] Verlet
");
        fprintf(stderr, "	>> [5] RungeKutta (2 ordine)

");
        fprintf(stderr, "	>> ");
        scanf("%d", &pars->alg[0]);
    } while (( pars->alg[0] <= 0 ));

}

void getError(point *xv, parametri *pars, int *i, int k, int *n_passi, double *xn1, double *vn1) {
    void (*algF)(point *, parametri *, double *, double *);
    int j, n = 0;
    FILE *fp;

    // Questo if controlla se il codice sta eseguendo lo studio degli errori per il primo o secondo algoritmo (pars->alg[k]);
    if (k == 0) {
        fp = fopen("error_1.dat", "w+");
    } else {
        fp = fopen("error_2.dat", "w+");
    }

    // Assegno il puntatore corretto a seconda dell'algoritmo scelto;
    if (pars->alg[k] == 1) {
        algF = algEulero;
    } else if (pars->alg[k] == 2) {
        algF = algEuleroCromer;
    } else if (pars->alg[k] == 3) {
        algF = algPuntoDiMezzo;
    } else if (pars->alg[k] == 4) {
        algF = algVerlet;
    } else if (pars->alg[k] == 5) {
        algF = algRungeKutta2;
    } else {
        fprintf(stderr, "
[%s%sFAILED%s] E' stato selezionato un algoritmo non valido.
", BLD, RED, RST);
        exit(EXIT_FAILURE);
    }

    // Informo l'utente che il processo sta iniziando;
    fprintf(stderr, "
>> Avvio %d algoritmo... ", k+1);

    // Formattazione dell'output del file contenente gli errori;
    fprintf(fp, "dt	E(t*)	E(0)	dE/E(0)	# passi	i	t
");

    for (j=0; j<8; j++) {

        for ((*i)=0; (*i)<(*n_passi); (*i)++){
            (*algF)(xv, pars, xn1, vn1);

            if (((pars->t_star - pars->dt/2. <= xv->t) && (xv->t >= pars->t_star + pars->dt/2.)) && (n == 0)) {
                fprintf(fp, "%+.14lf	%+.14lf	%+.14lf	%+.14lf	%+.14d	%+.14d	%+.14lf
", pars->dt, xv->E, xv->E0, (xv->E - xv->E0)/xv->E0, (*n_passi), (*i), xv->t);
                n = 1;
            }
        }

        // Resetto le variabili per rilanciare l'algoritmo con dt/2^j
        n = 0;
        xv->t = 0;
        xv->x = pars->x0;
        xv->v = pars->v0;
        pars->dt = pars->dt/2.;
        (*n_passi) = pars->t_max/pars->dt;
        (*xn1) = 0;
        (*vn1) = 0;
        xv->E = xv->E0;
    }

    fclose(fp);

    fprintf(stderr, "[%s%sDONE%s]", BLD, GRN, RST);
}

void getBasin(point xv, parametri pars, int h, int n_passi,  double xn1, double vn1) {

    // Dichiaro e setto i parametri che delimitano la griglia;
    point xv1;
    pars.dx = 0.01;
    xv1.x = -1;
    xv1.v = -1;

    // Dichiaro le variabili necessarie per il bacino d'attrazione;
    int i, j, i_max = 2./pars.dx, j_max = 2./pars.dx;
    void (*algF)(point *, parametri *, double *, double *);
    char fname[13];
    sprintf( fname, "bassin%02d.dat", omp_get_thread_num() );

    FILE *fp = fopen( fname, "w+");

    // Assegno il puntatore corretto a seconda dell'algoritmo scelto;
    if (pars.alg[0] == 1) {
        algF = algEulero;
    } else if (pars.alg[0] == 2) {
        algF = algEuleroCromer;
    } else if (pars.alg[0] == 3) {
        algF = algPuntoDiMezzo;
    } else if (pars.alg[0] == 4) {
        algF = algVerlet;
    } else if (pars.alg[0] == 5) {
        algF = algRungeKutta2;
    } else {
        fprintf(stderr, "
[%s%sFAILED%s] E' stato selezionato un algoritmo non valido.
", BLD, RED, RST);
        exit(EXIT_FAILURE);
    }

    // Formattazione output file basin.dat;
    fprintf(fp, "x(0)	x'(0)	x(t*)	v(t*)
");

    #pragma omp for
    // Eseguo il for della griglia sull'asse x';
    for (j=0; j<=j_max; j++) {      

        // Eseguo il for della griglia sull'asse x;
        for (i=0; i<=i_max; i++) {
            fprintf(fp, "%lf	%lf	", xv1.x, xv1.v);

            xv.x = xv1.x;
            xv.v = xv1.v;

            // Eseguo l'integrazione numerica
            for (h=0; h= pars.t_basin + pars.dt/2.)) {
                    fprintf(fp, "%lf	%lf
", xv.x, xv.v);
                    break;
                }
            }

            xv1.x += pars.dx;
            xv.t = 0;
            xn1 = 0;
            vn1 = 0;
        }

        // Resetto la x e incremento la x';
        xv1.x = -1;
        xv1.v += pars.dx;
    }

}

double f(double x, double v, parametri *pars) {
    return 0.25*x - x*x*x + (pars->gamma - pars->epsilon*(x*x))*v;
}

void algEulero(point *xv, parametri *pars, double *xn1, double *vn1){
  //printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  *xn1 = xv->x + (xv->v)*(pars->dt);
  *vn1 = xv->v + f(xv->x, xv->v, pars)*(pars->dt);

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
  xv->x = *xn1;
  xv->v = *vn1;
}

void algEuleroCromer(point *xv, parametri *pars, double *xn1, double *vn1){
  //printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  xv->v = xv->v + f(xv->x, xv->v, pars)*(pars->dt);
  xv->x = xv->x + (xv->v)*(pars->dt);

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
}

void algPuntoDiMezzo(point *xv, parametri *pars, double *xn1, double *vn1) {
  //printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  *vn1 = xv->v + f(xv->x, xv->v, pars)*(pars->dt);
  xv->x = xv->x + (0.5*(xv->v + (*vn1)))*(pars->dt);

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
  xv->v = *vn1;
}

void algVerlet(point *xv, parametri *pars, double *xn1, double *vn1) {
  //printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
  *xn1 = xv->x + xv->v*pars->dt + 0.5*(f(xv->x, xv->v, pars))*pars->dt*pars->dt;
  *vn1 = xv->v + 0.5*(f(xv->x, xv->v, pars) + f((*xn1), xv->v, pars))*pars->dt;

  xv->x = *xn1;
  xv->v = *vn1;

  xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
  xv->dE = fabs(xv->E-xv->E0)/xv->E0;

  xv->t += (pars->dt);
}


void algRungeKutta2(point *xv, parametri *pars, double *xn1, double *vn1) {
      //printf("%.14g	%.14g	%.14g	%.14g	%.14g
", xv->t, xv->x, xv->v, xv->E, xv->dE);
      *xn1 = xv->x + xv->v*pars->dt + 0.5*f(xv->x, xv->v, pars)*pars->dt*pars->dt;
      *vn1 = xv->v + f(xv->x + 0.5*xv->v*pars->dt, xv->v + 0.5*f(xv->x, xv->v, pars)*pars->dt, pars)*pars->dt;

      xv->x = *xn1;
      xv->v = *vn1;

      xv->E = 0.5*(xv->v)*(xv->v) - (xv->x)*(xv->x)/8. + (xv->x)*(xv->x)*(xv->x)*(xv->x)/4.;
      xv->dE = fabs(xv->E-xv->E0)/xv->E0;

      xv->t += (pars->dt);
}

EDIT

With the new input parameters you gave, I was able to test the code and it turns out that:

I had a small bug in the code I gave you (for opening the output files, using "fname" instead of fname)
Your code was spending most (all) of its time in printf().

After fixing these two, and compiling like this:

gcc -O3 -fopenmp -mtune=native -march=native -w bassin.c

I ran it on my dual-core laptop in 2.12s

The code is updated. Please try it by yourself.

OpenMP #pragma omp parallel for is slower

Answers (1)

Related Questions