Reputation: 9
I am trying to decrypt a file xored with my NVIDIA Geforce 3060 Laptop (With 5 Async Engine) (CUDA) in C.
My program works fine but when I profiled it, I saw that my differents streams are not asynchronous (see pictures 1 & 2(with pinned memory).
second try with pinned memory:
First try with non-pinned memory:
This is my function which call my kernel function:
__global__ void decryptKernel(unsigned char* buffer, unsigned char key, int size) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < size) {
buffer[idx] ^= 0x43;
}
}
void decrypt_large_shellcode(unsigned char* shellcode, unsigned char key) {
unsigned char* d_buffers[NUM_STREAMS];
cudaStream_t streams[NUM_STREAMS];
unsigned char* pinned_shellcode;
cudaHostAlloc((void**)&pinned_shellcode, TOTAL_SIZE, cudaHostAllocDefault);
memcpy(pinned_shellcode, shellcode, TOTAL_SIZE);
for (int i = 0; i < NUM_STREAMS; i++) {
cudaMalloc((void**)&d_buffers[i], CHUNK_SIZE);
cudaStreamCreate(&streams[i]);
}
for (int offset = 0; offset < TOTAL_SIZE; offset += CHUNK_SIZE * NUM_STREAMS) {
for (int i = 0; i < NUM_STREAMS; i++) {
int chunk_offset = offset + i * CHUNK_SIZE;
if (chunk_offset >= TOTAL_SIZE) break;
int chunk_size = (TOTAL_SIZE - chunk_offset) < CHUNK_SIZE ? (TOTAL_SIZE - chunk_offset) : CHUNK_SIZE;
cudaMemcpyAsync(d_buffers[i], pinned_shellcode + chunk_offset, chunk_size, cudaMemcpyHostToDevice, streams[i]);
int threadsPerBlock = 256;
int blocksPerGrid = (chunk_size + threadsPerBlock - 1) / threadsPerBlock;
decryptKernel << <blocksPerGrid, threadsPerBlock, 0, streams[i] >> > (d_buffers[i], key, chunk_size);
cudaMemcpyAsync(pinned_shellcode + chunk_offset, d_buffers[i], chunk_size, cudaMemcpyDeviceToHost, streams[i]);
}
}
for (int i = 0; i < NUM_STREAMS; i++) {
//cudaStreamSynchronize(streams[i]);
cudaFree(d_buffers[i]);
cudaStreamDestroy(streams[i]);
}
memcpy(shellcode, pinned_shellcode, TOTAL_SIZE);
cudaFreeHost(pinned_shellcode);
}
Edit : I activated the Windows Hardware Accelerated and I have now this : https://ibb.co/2YyB4PTt
But not the result I want :/
Upvotes: 0
Views: 70