CUDA并行计算 #include __global__ void add(int x,int y,int *z){\x05*z=x+y;}int main(void){\x05int c;\x05int *dev_c;cudaMalloc( (void**)&dev_c,sizeof(int) );\x05 add(2,7,dev_c);\x05 cudaMemcpy(&c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);\x05 print
来源:学生作业帮助网 编辑:作业帮 时间:2024/06/30 16:32:39
![CUDA并行计算 #include __global__ void add(int x,int y,int *z){\x05*z=x+y;}int main(void){\x05int c;\x05int *dev_c;cudaMalloc( (void**)&dev_c,sizeof(int) );\x05 add(2,7,dev_c);\x05 cudaMemcpy(&c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);\x05 print](/uploads/image/z/8238624-24-4.jpg?t=CUDA%E5%B9%B6%E8%A1%8C%E8%AE%A1%E7%AE%97+%23include+__global__+void+add%28int+x%2Cint+y%2Cint+%2Az%29%7B%5Cx05%2Az%3Dx%2By%3B%7Dint+main%28void%29%7B%5Cx05int+c%3B%5Cx05int+%2Adev_c%3BcudaMalloc%28+%28void%2A%2A%29%26dev_c%2Csizeof%28int%29+%29%3B%5Cx05+add%282%2C7%2Cdev_c%29%3B%5Cx05+cudaMemcpy%28%26c%2Cdev_c%2Csizeof%28int%29%2CcudaMemcpyDeviceToHost%29%3B%5Cx05+print)
CUDA并行计算 #include __global__ void add(int x,int y,int *z){\x05*z=x+y;}int main(void){\x05int c;\x05int *dev_c;cudaMalloc( (void**)&dev_c,sizeof(int) );\x05 add(2,7,dev_c);\x05 cudaMemcpy(&c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);\x05 print
CUDA并行计算
#include
__global__ void add(int x,int y,int *z){
\x05*z=x+y;
}
int main(void){
\x05int c;
\x05int *dev_c;
cudaMalloc( (void**)&dev_c,sizeof(int) );
\x05 add(2,7,dev_c);
\x05 cudaMemcpy(&c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);
\x05 printf("2+7=%d\n",c);
\x05 cudaFree(dev_c);
\x05 return 0;
}
//这段程序为何运行出来结果为2+7=0?求指教
CUDA并行计算 #include __global__ void add(int x,int y,int *z){\x05*z=x+y;}int main(void){\x05int c;\x05int *dev_c;cudaMalloc( (void**)&dev_c,sizeof(int) );\x05 add(2,7,dev_c);\x05 cudaMemcpy(&c,dev_c,sizeof(int),cudaMemcpyDeviceToHost);\x05 print
可能是环境配置的问题,cuda运行环境需要配置各种各样的参数什么的,不建议你自己配,cuda的sdk里面有很多例子,源码和vs工程文件都有,里面的环境都是配好的.
建议你先导入一个简单的例子,在你的机器上跑一遍看看有没有问题,如果没有问题再改成你的程序,一般情况下sdk的例子能正确执行的话,改成你这个简单的例子应该是不会有问题的.
推荐你可以先把sdk里面的DeviceQuary跑一遍,那个程序是检查你的gpu设备的.