#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#define DATA_SIZE 1048576
#define THREAD_NUM 256 //最多仅支持512个thread
int data[DATA_SIZE];
void GenerateNumbers(int
*number, int size)
{
for(int i =
0; i < size; i++)
{
number = rand() %
10;
}
}
bool InitCUDA() //CUDA设备初始化代码
{
int count;
cudaGetDeviceCount(&count);
if(count ==
0)
{
fprintf(stderr, "There is no device.n");
return
false;
}
int i;
for(i =
0; i < count; i++)
{
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess)
{
if(prop.major >=
1)
{
break;
}
}
}
if(i == count)
{
fprintf(stderr, "There is no device supporting CUDA 1.x.n");
return
false;
}
cudaSetDevice(i); //只能使用一个CUDA设备?
return
true;
}
__global__ static
void sumOfSquares(int
*num, int* result,clock_t* time) //例程
{
const
int tid=threadIdx.x;
const
int size=DATA_SIZE/THREAD_NUM;
int sum =
0;
int i;
clock_t start;
if(tid==0)
start=clock();
for(i = tid*size; i < (tid+1)*size; i++)
{
sum += num * num;
}
result[tid] = sum;
if(tid==0)
*time=clock()-start; //计算时间代码
}
int main()
{
if(!InitCUDA())
{
return
0;
}
printf("CUDA initialized.n");
GenerateNumbers(data, DATA_SIZE); //随机生成数字
int* gpudata, *result;
clock_t* time;
cudaMalloc((void**) &gpudata, sizeof(int) * DATA_SIZE);
cudaMalloc((void**) &result, sizeof(int) *THREAD_NUM);
cudaMalloc((void**) &time,sizeof(clock_t));
cudaMemcpy(gpudata, data, sizeof(int) * DATA_SIZE, cudaMemcpyHostToDevice);
sumOfSquares<<<1, THREAD_NUM, 0>>>(gpudata, result,time);
int sum[THREAD_NUM];
clock_t time_used;
cudaMemcpy(&sum, result, sizeof(int)*THREAD_NUM, cudaMemcpyDeviceToHost);
cudaMemcpy(&time_used,time,sizeof(clock_t),cudaMemcpyDeviceToHost);
cudaFree(gpudata);
cudaFree(result);
cudaFree(time);
int final_sum =
0;
for(int i =
0; i < THREAD_NUM; i++)
{
final_sum += sum;
}
printf("sum=%d time=%dn",final_sum,time_used);
final_sum =
0;
for(int i =
0; i < DATA_SIZE; i++)
{
final_sum+= data * data;
}
printf("sum(CPU)= %dn", sum);
system("pause");
return
0;
}
在这个程序中,当我提升显卡的核心和SP单元频率时,发现程序的运算时间反而加长了,当我提升显卡的显存频率时,程序的运算时间倒是降低了,请问这是怎么回事呢?
环境VS2008,CUDA 2.1 beta。
非常感谢! |