|
关于ATI的问题,我自己就用ATI的卡,2600Pro,被这个问题搞的头疼,不过自己写了一个work around的办法,在shader中加入两个,一个是UV Blur,然后在UV Sharpen,使用最简单的高斯Kernel:
UV Blur:
sampler s0 : register(s0);
float4 p0 : register(c0);
float4 p1 : register(c1);
#define width (p0[0])
#define height (p0[1])
#define counter (p0[2])
#define clock (p0[3])
#define one_over_width (p1[0])
#define one_over_height (p1[1])
#define PI acos(-1)
static float4x4 RGB2YUV709Matrix =
{
0.21263900587151, 0.715168678767756, 0.0721923153607337, 0.0,
-0.21263900587151, -0.715168678767756, 0.927807684639266, 0.0,
0.78736099412849, -0.715168678767756, -0.0721923153607337, 0.0,
0.0, 0.0, 0.0, 0.0
};
static float4x4 YUV7092RGBMatrix =
{
1.0, -0.00000000000000005288, 1.0, 0.0,
1.0, -0.100944458984308, -0.297327067284168, 0.0,
1.0, 1.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0
};
static float4x4 RGB2YUV601Matrix =
{
0.29896661812479, 0.586421210132983, 0.114612171742227, 0.0,
-0.29896661812479, -0.586421210132983, 0.885387828257773, 0.0,
0.70103338187521, -0.586421210132983, -0.114612171742227, 0.0,
0.0, 0.0, 0.0, 0.0
};
static float4x4 YUV6012RGBMatrix =
{
1.0, 0.0000000000000000198, 1.0, 0.0,
1.0, -0.195443428310234, -0.509815492616635, 0.0,
1.0, 1.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0
};
float4 main(float2 tex : TEXCOORD0) : COLOR
{
float4 c0 = tex2D(s0, tex);
float4 yuvc0 = {0, 0, 0, 0};
if (height>=720)
{
yuvc0 = mul(RGB2YUV709Matrix, c0);
yuvc0.g = ( ( mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, 1/height)))).g*2 + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, -1/height)))).g*2
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 0)))).g*2 + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 0)))).g*2
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).g + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).g
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).g + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).g
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, 0)))).g*4)/16.0;
yuvc0.b = ( ( mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, 1/height)))).b*2 + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, -1/height)))).b*2
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 0)))).b*2 + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 0)))).b*2
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).b + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).b
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).b + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).b
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, 0)))).b*4)/16.0;
c0 = mul(YUV7092RGBMatrix, yuvc0);
}
else
{
yuvc0 = mul(RGB2YUV601Matrix, c0);
yuvc0.g = ( ( mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, 1/height)))).g*2 + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, -1/height)))).g*2
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 0)))).g*2 + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 0)))).g*2
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).g + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).g
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).g + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).g
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, 0)))).g*4)/16.0;
yuvc0.b = ( ( mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, 1/height)))).b*2 + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, -1/height)))).b*2
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 0)))).b*2 + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 0)))).b*2
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).b + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).b
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).b + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).b
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, 0)))).b*4)/16.0;
c0 = mul(YUV6012RGBMatrix, yuvc0);
}
return c0;
}
UV Sharpen:
sampler s0 : register(s0);
float4 p0 : register(c0);
float4 p1 : register(c1);
#define width (p0[0])
#define height (p0[1])
#define counter (p0[2])
#define clock (p0[3])
#define one_over_width (p1[0])
#define one_over_height (p1[1])
#define PI acos(-1)
static float4x4 RGB2YUV709Matrix =
{
0.21263900587151, 0.715168678767756, 0.0721923153607337, 0.0,
-0.21263900587151, -0.715168678767756, 0.927807684639266, 0.0,
0.78736099412849, -0.715168678767756, -0.0721923153607337, 0.0,
0.0, 0.0, 0.0, 0.0
};
static float4x4 YUV7092RGBMatrix =
{
1.0, -0.00000000000000005288, 1.0, 0.0,
1.0, -0.100944458984308, -0.297327067284168, 0.0,
1.0, 1.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0
};
static float4x4 RGB2YUV601Matrix =
{
0.29896661812479, 0.586421210132983, 0.114612171742227, 0.0,
-0.29896661812479, -0.586421210132983, 0.885387828257773, 0.0,
0.70103338187521, -0.586421210132983, -0.114612171742227, 0.0,
0.0, 0.0, 0.0, 0.0
};
static float4x4 YUV6012RGBMatrix =
{
1.0, 0.0000000000000000198, 1.0, 0.0,
1.0, -0.195443428310234, -0.509815492616635, 0.0,
1.0, 1.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0
};
float4 main(float2 tex : TEXCOORD0) : COLOR
{
float4 c0 = tex2D(s0, tex);
float4 yuvc0 = {0, 0, 0, 0};
if (height >= 720)
{
yuvc0 = mul(RGB2YUV709Matrix, c0);
yuvc0.g = -( (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, 1/height)))).g + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, -1/height)))).g
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 0)))).g + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 0)))).g
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).g + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).g
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).g + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).g)/8.0 + 2* (mul(RGB2YUV709Matrix, tex2D(s0, tex))).g;
yuvc0.b = -( (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, 1/height)))).b + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(0, -1/height)))).b
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 0)))).b + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 0)))).b
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).b + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).b
+(mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).b + (mul(RGB2YUV709Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).b)/8.0 + 2* (mul(RGB2YUV709Matrix, tex2D(s0, tex))).b;
c0 = mul(YUV7092RGBMatrix, yuvc0);
return c0;
}
else
{
yuvc0 = mul(RGB2YUV601Matrix, c0);
yuvc0.g = -( (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, 1/height)))).g + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, -1/height)))).g
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 0)))).g + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 0)))).g
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).g + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).g
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).g + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).g)/8.0 + 2* (mul(RGB2YUV601Matrix, tex2D(s0, tex))).g;
yuvc0.b = -( (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, 1/height)))).b + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(0, -1/height)))).b
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 0)))).b + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 0)))).b
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, 1/height)))).b + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, -1/height)))).b
+(mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(-1/width, 1/height)))).b + (mul(RGB2YUV601Matrix, tex2D(s0, tex+float2(1/width, -1/height)))).b)/8.0 + 2* (mul(RGB2YUV601Matrix, tex2D(s0, tex))).b;
c0 = mul(YUV6012RGBMatrix, yuvc0);
return c0;
}
}
用MPC的combine shader来达成,注意顺序就可以了,当然ATI最好在注册表中打开UseBT601CSC=1的功能,这样在SD和HD下默认拉伸到0-255,在这种情况下nVidia和ATI的画面差别微乎其微,在用SMPTE监测的时候有细微的差别,应该是Convert Matrix系数的细微差别了。
以上仅供大家参考。 |
|