Reputation: 59
I am trying to convert this to openMP but I am not able to convert it properly any help would be appreciated. I can now convert simple loop to openMp but not with functions. So trying to learn how it works. Thanks!
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
int i, j, k, nc, id;
struct timeval start, stop, elapse;
float fmax = (float)RAND_MAX;
#pragma omp parallel for
for (k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
for (j = 0; j < TRACE_COUNT; j++)
{
#pragma omp parallel for
for (i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
}
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL])
{
int i, j, k, nc;
for (j = 0; j < NT; j++)
for (i = 0; i < TL; i++)
for (k = 0; k < FL; k++)
out[j][i] = filt[k] * traces[j][i + k];
}
Upvotes: 1
Views: 205
Reputation: 51553
There are some issues with your code, in the function main
the iterations of the loop are not being assigned to threads as you wanted. Because you have added again the clause parallel
to #pragma omp for
, and assuming that you have nested parallelism disabled, which by default it is, each of the threads created in the outer parallel region will execute "sequentially" the code within that region. For more detail about it read this SO thread.
Besides that this code:
for (int j = 0; j < TRACE_COUNT; j++)
{
#pragma omp for
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
can be improve by moving the #pragma omp for
to the outer loop:
#pragma omp for
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
And the scope of the parallel region should be reduced. Everything put together:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#define FILTERLENGTH 960
#define TRACE_LENGTH 16384
#define TRACE_COUNT 32
#define TRACE_SPACE (TRACE_LENGTH + FILTERLENGTH)
float f[FILTERLENGTH];
float d[TRACE_COUNT][TRACE_SPACE];
float out[TRACE_COUNT][TRACE_LENGTH];
void filter(int FL, float filt[FL], int NT, int TL, float traces[NT][TL + FL], float out[NT][TL]);
int main(int argc, char *argv[])
{
#pragma omp parallel
{
float fmax = (float)RAND_MAX;
#pragma omp for nowait
for (int k = 0; k < FILTERLENGTH; k++)
f[k] = k - (FILTERLENGTH - 1) / 2.0;
#pragma omp for nowait
for (int j = 0; j < TRACE_COUNT; j++)
{
for (int i = 0; i < FILTERLENGTH / 2; i++)
d[j][i] = 0;
for (int i = FILTERLENGTH / 2; i < TRACE_LENGTH + FILTERLENGTH / 2; i++)
d[j][i] = rand() / fmax;
for (int i = TRACE_LENGTH + FILTERLENGTH / 2; i < TRACE_SPACE; i++)
d[j][i] = 0;
}
}
struct timeval start, stop, elapse;
gettimeofday(&start, NULL);
filter(FILTERLENGTH, f, TRACE_COUNT, TRACE_LENGTH, d, out);
gettimeofday(&stop, NULL);
timersub(&stop, &start, &elapse);
fprintf(stderr, "Filter length\t%d\n", FILTERLENGTH);
fprintf(stderr, "Trace length\t%d\n", TRACE_LENGTH);
fprintf(stderr, "Trace count\t%d\n", TRACE_COUNT);
fprintf(stderr, "Elapse time\t%g\n", elapse.tv_sec + 0.000001 * elapse.tv_usec);
}
You can still try to parallelize the function filter
.
Upvotes: 3