I'm just trying to use CUDA to blank an image. But "before" and "after" I get the same original image. Can't figure out the problem.
sumKernel.cu:
#include "sumKernel.h"
__global__ void _sumKernel(char *image, int width, int height, char *kernel, int kerwidth, int kerheight) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
image[idx] = 0;
}
void sumKernel(char *image, int width, int height, char *kernel, int kerwidth, int kerheight) {
dim3 blocks(1);
dim3 threads(width*height);
_sumKernel<<<blocks, threads>>>(image, width, height, kernel, kerwidth, kerheight);
}
sumKernel.h:
void sumKernel(char *image, int width, int height, char *kernel, int kerwidth, int kerheight);
main.cpp:
findCudaDevice(argc, (const char **)argv);
Mat image = imread("a.jpg");
Mat gray; cvtColor(image, gray, CV_BGR2GRAY);
imshow("before", gray);
char *gray_g;
cudaMalloc((void **)&gray_g, gray.size().area());
cudaMemcpy(gray_g, gray.data, sizeof(char)*gray.size().area(), cudaMemcpyHostToDevice);
char kernel[9];
char *kernel_g;
cudaMalloc((void **)&kernel_g, sizeof(char)*9);
sumKernel(gray_g, gray.cols, gray.rows, kernel, 3, 3);
cudaMemcpy(gray.data, gray_g, sizeof(char)*gray.size().area(), cudaMemcpyDeviceToHost);
imshow("after", gray);
waitKey(0);
cudaFree(kernel);
cudaFree(gray_g);