Всем привет!
Не знаю, куда писать по Cuda.
не могу скопировать двумерные массивы в память устройства.
Массивы наподобие char c_copy_crd_lst[50][2], используются для хранения строк стиля c.
Привожу кусок программы, непосредственно связанный с запуском ядра.
Код:
cudaError_t GrowUpWithCuda(char c_copy_crd_lst_fst[50][2],
char c_copy_crd_lst_snd[50][2], char c_seven_fst[7][2], char c_seven_snd[7][2],
char c_scores_fst[7364][7], double c_scores_snd[7364], double block_value[17])
{
char dev_c_copy_crd_lst_fst[50][2];
char dev_c_cv_c_seven_fst[7][2];
char dev_c_seven_snd[7][2];
char dev_c_scores_fst[7364][7];
double dev_c_scores_snd[7364];
double dev_block_value[17];
cudaError_t cudaStatus;
// Choose which GPU to run on, change this on a multi-GPU system.
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");
goto Error;
}
// Allocate GPU buffers for three vectors (six input, one output)
cudaStatus = cudaMalloc((void**)& dev_c_copy_crd_lst_fst, 50 * 2 * sizeof(char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)& dev_c_copy_crd_lst_snd, 50 * 2 * sizeof(char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)& dev_c_seven_fst, 7 * 2 * sizeof(char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)& dev_c_seven_snd, 7 * 2 * sizeof(char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)& dev_c_scores_fst, 7364 * 7 * sizeof(char));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)& dev_c_scores_snd, 7364 * sizeof(double));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
cudaStatus = cudaMalloc((void**)& dev_block_value, 17 * sizeof(double));
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaMalloc failed!");
goto Error;
}
// Copy input vectors from host memory to GPU buffers.
cudaStatus = cudaMemcpy(dev_c_copy_crd_lst_fst, c_copy_crd_lst_fst,
50 * 2 * sizeof(char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "c_copy_crd_lst_fst cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_c_copy_crd_lst_snd, c_copy_crd_lst_snd,
50 * 2 * sizeof(char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "c_copy_crd_lst_snd cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_c_seven_fst, c_seven_fst,
7 * 2 * sizeof(char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "c_seven_fst cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_c_seven_snd, c_seven_snd,
7 * 2 * sizeof(char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "c_seven_snd cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_c_scores_fst, c_scores_fst,
7364 * 7 * sizeof(char), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "c_scores_fst cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_c_scores_snd, c_scores_snd,
7364 * sizeof(double), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "c_scores_snd cudaMemcpy failed!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_block_value, block_value,
17 * sizeof(double), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "block_value cudaMemcpy failed!");
goto Error;
}
// Launch a kernel on the GPU with one thread for each element.
addKernel <<< 17, 64 >>> (dev_c_copy_crd_lst_fst, dev_c_copy_crd_lst_snd, dev_c_seven_fst,
dev_c_seven_snd, dev_c_scores_fst, dev_c_scores_snd, dev_block_value);
// Check for any errors launching the kernel
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
// cudaDeviceSynchronize waits for the kernel to finish, and returns
// any errors encountered during the launch.
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus);
goto Error;
}
// Copy output vector from GPU buffer to host memory.
cudaStatus = cudaMemcpy(block_value, dev_block_value, 17 * sizeof(double), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
fprintf(stderr, "block_value cudaMemcpy DeviceToHost failed!");
goto Error;
}
Error:
cudaFree(dev_c_copy_crd_lst_fst);
cudaFree(dev_c_copy_crd_lst_snd);
cudaFree(dev_c_seven_fst);
cudaFree(dev_c_seven_snd);
cudaFree(dev_c_scores_fst);
cudaFree(dev_c_scores_snd);
cudaFree(dev_block_value);
return cudaError_t();
}
При запуске в консоли отображается ошибка "c_copy_crd_lst_fst cudaMemcpy failed!".
Кто знает, в чем проблема, отпишитесь, пожалуйста.