|
Pixel Shader 3.0 Branch Test v0.2
NV42: GeForce 6800GS
Tile Size: 128x128, FPS: 306
Tile Size: 64x64, FPS: 272
Tile Size: 48x48, FPS: 231
Tile Size: 32x32, FPS: 218
Tile Size: 24x24, FPS: 204
Tile Size: 16x16, FPS: 202
Tile Size: 8x8, FPS: 202
Tile Size: Dither(1x1), FPS: 198
RV570: Radeon X1950PRO
Tile Size: 128x128, FPS: 739
Tile Size: 64x64, FPS: 729
Tile Size: 48x48, FPS: 707
Tile Size: 32x32, FPS: 722
Tile Size: 24x24, FPS: 668
Tile Size: 16x16, FPS: 705
Tile Size: 8x8, FPS: 632
Tile Size: Dither(1x1), FPS: 407
啥也不说了,去年在这里的人都知道有这个故事了,ATI 得卡当时不能运行这个测试程序,结果是因为 PS/VS 必须都用 3.0 才能运
行,而 NV 得驱动 VS 1.1 也可以和 PS 3.0 配合。直接导致了 R5 系列的分支性能不能测试
现在终于有了结果!
外行看热闹,内行看门道
这个测试是利用 Pixel Shader 得分支指令来执行总计 3 个分支 (if... else if.... else....) 通过测试不同的块大小来看架构
对分支的容忍能力 RV570 仔 8x8 - 1x1 才出现明显性能下降,说明在处理 64 Pixel 得时候还没有达到分支能力的极限
谁有 G80 我给你们发下载地址,测测看吧,看看 G80 得分支能力怎么样
Pixel Shader source code listing:
ps_3_0
dcl_2d s0
dcl_2d s1
dcl_2d s2
dcl_2d s3
dcl_2d s4
dcl_texcoord0 v0.xy
def c2, 0.2, 0.6, 0, 0
def c5, 1, 0, -1, 0.6
def c6, 2, 3, 4, 0
def c7, -0.5, -0.5, 0, 0.2
def c9, -0.01, 0.01, 0, 0
def c10, -0.01, -0.01, 0, 0
def c11, 0.01, -0.01, 0, 0
def c12, 0.01, 0.01, 0, 0
; Fetch branch source mask data (r1)
texldl r1, v0.xy, s0
; Calculate light dir (r3)
mov r2, c5.yyxy
add r2, r2, c0.xyzz
nrm r3, r2
if_gt r1.r, c2.y
; True part
; Fetch surface color
texldl r5, v0.xy, s1
; Fetch normal
mov r2, c6.y
mul r2, r2, v0.xy
texldl r6, r2.xy, s2
; Do dot product lighting calculation
add r6, r6, c7
nrm r2, r6
dp3 r3.x, r3, r2
; Blend final color
mul r5, r5, r3.xxx
else
if_lt r1.r, c2.x
; True part 2
; Fetch surface color
; 5 point blur filter
texldl r5, v0.xy, s3
mov r0, c9
add r0, r0, v0.xy
texldl r6, r0.xy, s3
add r5, r5, r6
mov r0, c10
add r0, r0, v0.xy
texldl r6, r0.xy, s3
add r5, r5, r6
mov r0, c11
add r0, r0, v0.xy
texldl r6, r0.xy, s3
add r5, r5, r6
mov r0, c12
add r0, r0, v0.xy
texldl r6, r0.xy, s3
add r5, r5, r6
mul r5, r5, c7.w
; Fetch normal
mov r2, c6.y
mul r2, r2, v0.xy
texldl r6, r2.xy, s4
; Do dot product lighting calculation
add r6, r6, c7
nrm r2, r6
dp3 r3.x, r3, r2
; Blend final color
mul r5, r5, r3.xxx
else
; False part
; Fetch surface color
texldl r5, v0.xy, s3
; Blend final color
add r5, r5, c7.wwww
endif
endif
mov oC0, r5 |
|