예전에는 복잡하게 룰을 추가해야 했는데
2011/01/04 - [Programming/openCL / CUDA] - Visual Studio 2008 에서 CUDA 프로젝트 만들기
기본 템플렛이 추가 되어 있어서 편리하게 만들수 있다.
그런데.. 기본적으로 5개 짜리 더하는 예제가 추가되는건가?
2011/01/04 - [Programming/openCL / CUDA] - Visual Studio 2008 에서 CUDA 프로젝트 만들기
기본 템플렛이 추가 되어 있어서 편리하게 만들수 있다.
그런데.. 기본적으로 5개 짜리 더하는 예제가 추가되는건가?
001 | #include "cuda_runtime.h" |
002 | #include "device_launch_parameters.h" |
003 |
004 | #include < stdio.h > |
005 |
006 | cudaError_t addWithCuda( int *c, const int *a, const int *b, unsigned int size); |
007 |
008 | __global__ void addKernel( int *c, const int *a, const int *b) |
009 | { |
010 | int i = threadIdx.x; |
011 | c[i] = a[i] + b[i]; |
012 | } |
013 |
014 | int main() |
015 | { |
016 | const int arraySize = 5; |
017 | const int a[arraySize] = { 1, 2, 3, 4, 5 }; |
018 | const int b[arraySize] = { 10, 20, 30, 40, 50 }; |
019 | int c[arraySize] = { 0 }; |
020 |
021 | // Add vectors in parallel. |
022 | cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize); |
023 | if (cudaStatus != cudaSuccess) { |
024 | fprintf (stderr, "addWithCuda failed!" ); |
025 | return 1; |
026 | } |
027 |
028 | printf ( "{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n" , |
029 | c[0], c[1], c[2], c[3], c[4]); |
030 |
031 | // cudaDeviceReset must be called before exiting in order for profiling and |
032 | // tracing tools such as Nsight and Visual Profiler to show complete traces. |
033 | cudaStatus = cudaDeviceReset(); |
034 | if (cudaStatus != cudaSuccess) { |
035 | fprintf (stderr, "cudaDeviceReset failed!" ); |
036 | return 1; |
037 | } |
038 |
039 | return 0; |
040 | } |
041 |
042 | // Helper function for using CUDA to add vectors in parallel. |
043 | cudaError_t addWithCuda( int *c, const int *a, const int *b, unsigned int size) |
044 | { |
045 | int *dev_a = 0; |
046 | int *dev_b = 0; |
047 | int *dev_c = 0; |
048 | cudaError_t cudaStatus; |
049 |
050 | // Choose which GPU to run on, change this on a multi-GPU system. |
051 | cudaStatus = cudaSetDevice(0); |
052 | if (cudaStatus != cudaSuccess) { |
053 | fprintf (stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?" ); |
054 | goto Error; |
055 | } |
056 |
057 | // Allocate GPU buffers for three vectors (two input, one output) . |
058 | cudaStatus = cudaMalloc(( void **)&dev_c, size * sizeof ( int )); |
059 | if (cudaStatus != cudaSuccess) { |
060 | fprintf (stderr, "cudaMalloc failed!" ); |
061 | goto Error; |
062 | } |
063 |
064 | cudaStatus = cudaMalloc(( void **)&dev_a, size * sizeof ( int )); |
065 | if (cudaStatus != cudaSuccess) { |
066 | fprintf (stderr, "cudaMalloc failed!" ); |
067 | goto Error; |
068 | } |
069 |
070 | cudaStatus = cudaMalloc(( void **)&dev_b, size * sizeof ( int )); |
071 | if (cudaStatus != cudaSuccess) { |
072 | fprintf (stderr, "cudaMalloc failed!" ); |
073 | goto Error; |
074 | } |
075 |
076 | // Copy input vectors from host memory to GPU buffers. |
077 | cudaStatus = cudaMemcpy(dev_a, a, size * sizeof ( int ), cudaMemcpyHostToDevice); |
078 | if (cudaStatus != cudaSuccess) { |
079 | fprintf (stderr, "cudaMemcpy failed!" ); |
080 | goto Error; |
081 | } |
082 |
083 | cudaStatus = cudaMemcpy(dev_b, b, size * sizeof ( int ), cudaMemcpyHostToDevice); |
084 | if (cudaStatus != cudaSuccess) { |
085 | fprintf (stderr, "cudaMemcpy failed!" ); |
086 | goto Error; |
087 | } |
088 |
089 | // Launch a kernel on the GPU with one thread for each element. |
090 | addKernel<<<1, size>>>(dev_c, dev_a, dev_b); |
091 |
092 | // Check for any errors launching the kernel |
093 | cudaStatus = cudaGetLastError(); |
094 | if (cudaStatus != cudaSuccess) { |
095 | fprintf (stderr, "addKernel launch failed: %s\n" , cudaGetErrorString(cudaStatus)); |
096 | goto Error; |
097 | } |
098 | |
099 | // cudaDeviceSynchronize waits for the kernel to finish, and returns |
100 | // any errors encountered during the launch. |
101 | cudaStatus = cudaDeviceSynchronize(); |
102 | if (cudaStatus != cudaSuccess) { |
103 | fprintf (stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n" , cudaStatus); |
104 | goto Error; |
105 | } |
106 |
107 | // Copy output vector from GPU buffer to host memory. |
108 | cudaStatus = cudaMemcpy(c, dev_c, size * sizeof ( int ), cudaMemcpyDeviceToHost); |
109 | if (cudaStatus != cudaSuccess) { |
110 | fprintf (stderr, "cudaMemcpy failed!" ); |
111 | goto Error; |
112 | } |
113 |
114 | Error: |
115 | cudaFree(dev_c); |
116 | cudaFree(dev_a); |
117 | cudaFree(dev_b); |
118 | |
119 | return cudaStatus; |
120 | } |
'Programming > openCL & CUDA' 카테고리의 다른 글
cuda dim3 변수 초기화 (0) | 2014.01.13 |
---|---|
vs2008 cuda syntax highlight (0) | 2014.01.13 |
nvidia ion + ubuntu 12.04 LTS + cuda5.5 (0) | 2014.01.08 |
cuda on windows 7 (0) | 2014.01.06 |
cuda / cpu 테스트 드라이브 (0) | 2014.01.06 |