Serial
int main()
{
float *A, *B, *C;
for (int i = 0; i < sizeof(C); i++)
{
C[i] = A[i] + B[i];
}
return 0;
}
Parallel
__global__ void vecAdd(float* A, float* B, float* C)
{
int i = threadIdx.x;
C[i] = A[i] + B[i];
}
int main()
{
// Kernel invocation
vecAdd<<<1, N>>>(A, B, C);
}















0 Responses to “NVIDIA CUDA”
Leave a Reply
You must login to post a comment.