Anonymous Noob
Anonymous Noob

Reputation: 310

CUDA: Access violation reading location

I'm new to CUDA and was trying to make a simple program for blurring .tga files, when I encountered an access violation reading location runtime exception. Because I'm so new to CUDA I couldn't figure out how to fix it and google wasn't very helpful. Here's the code:

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <math.h>
#include <time.h>

#define println(...) std::cout << __VA_ARGS__ << "\n";
#define WIDTH 1920
#define HEIGHT 1080
#define BLUR 5

unsigned char in[HEIGHT][WIDTH][3];
unsigned char out[HEIGHT][WIDTH][3];
unsigned char header[18];

void read(char input[256]) {
    FILE* f;
    f = fopen(input, "rb");
    if (!f) {
        printf("File Reading Failed\n");
    }
    fread(&header, 1, 18, f);
    fread(&in, 1, HEIGHT*WIDTH * 3, f);
    fclose(f);
}

void write(char output[256]) {
    FILE* fw;
    fw = fopen(output, "wb+");
    if (!fw) {
        printf("File Writing Failed\n");
    }
    header[16] = 24;
    header[13] = WIDTH / 256;
    header[12] = WIDTH % 256;
    header[15] = HEIGHT / 256;
    header[14] = HEIGHT % 256;
    fwrite(&header, 1, 18, fw);
    fwrite(&out, 1, HEIGHT*WIDTH * 3, fw);
    fclose(fw);
}

__device__
int toIndex(int x, int y) {
    return x + y / WIDTH;
}

__device__
void doPixel(int x, int y, char *red, char *green, char *blue) {
    int avgRed = 0;
    int avgGreen = 0;
    int avgBlue = 0;
    for (int i = -BLUR; i <= BLUR; i++) {
        for (int j = -BLUR; j <= BLUR; j++) {
            avgRed += red[toIndex(i, j)];
            avgBlue += blue[toIndex(i, j)];
            avgGreen += green[toIndex(i, j)];
        }
    }
    red[toIndex(x, y)] = avgRed / (BLUR*BLUR);
    green[toIndex(x, y)] = avgGreen / (BLUR*BLUR);
    blue[toIndex(x, y)] = avgBlue / (BLUR*BLUR);
}

__global__
void setValue(char *red, char *green, char *blue) {
    int x;
    int y;
    for (int i = threadIdx.x; i < WIDTH * HEIGHT; i += 1024) {
        x = i % WIDTH;
        y = i / WIDTH;
        doPixel(x, y, red, green, blue);
    }
}

int main(void) {
    char *red, *green, *blue;
    double time;
    read("test.tga");
    cudaMallocManaged(&red, WIDTH * HEIGHT);
    cudaMallocManaged(&green, WIDTH * HEIGHT);
    cudaMallocManaged(&blue, WIDTH * HEIGHT);
    for (int x = 0; x < WIDTH; x++) {
        for (int y = 0; y < HEIGHT; y++) {
            red[x + y*WIDTH] = in[y][x][2];
            green[x + y*WIDTH] = in[y][x][1];
            blue[x + y*WIDTH] = in[y][x][0];
        }
    }
    time = clock();
    setValue<<<1, 1024>>>(red, green, blue);
    cudaDeviceSynchronize();
    println((clock() - time) / CLOCKS_PER_SEC);
    int x;
    int y;
    for (int i = 0; i < WIDTH * HEIGHT; i++) {
        x = i % WIDTH;
        y = i / WIDTH;
        out[y][x][0] = blue[i];      //Program gives error here
        out[y][x][1] = green[i];
        out[y][x][2] = red[i];
    }
    write("test.tga");
    cudaFree(red);
    cudaFree(green);
    cudaFree(blue);
    getchar();
}

I read that cudaDeviceSynchronize() was the way to fix this issue, but that doesn't seem to be working. cudaThreadSynchronize() also doesn't fix the issue.

Upvotes: 0

Views: 1252

Answers (1)

BAdhi
BAdhi

Reputation: 510

Easiest way to find an illegal memory access error is to run the binary with cuda-gdb. Make sure you give -g -G -O0 flags when compiling

In your case you might find some errors in this code segment

for (int i = -BLUR; i <= BLUR; i++) {
    for (int j = -BLUR; j <= BLUR; j++) {
        avgRed   += red[toIndex(i, j)];
        avgBlue  += blue[toIndex(i, j)];
        avgGreen += green[toIndex(i, j)];
    }
}

Upvotes: 1

Related Questions