mirror of
https://github.com/outbackdingo/UltraGrid.git
synced 2026-03-20 04:09:03 +00:00
571 lines
15 KiB
Plaintext
571 lines
15 KiB
Plaintext
/*
|
|
Real-time DXT1 & YCoCg DXT5 compression (Cg 2.0)
|
|
Copyright (c) NVIDIA Corporation.
|
|
Written by: Ignacio Castano <icastano@nvidia.com>
|
|
|
|
Thanks to JMP van Waveren, Simon Green, Eric Werness, Simon Brown
|
|
|
|
Permission is hereby granted, free of charge, to any person
|
|
obtaining a copy of this software and associated documentation
|
|
files (the "Software"), to deal in the Software without
|
|
restriction, including without limitation the rights to use,
|
|
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the
|
|
Software is furnished to do so, subject to the following
|
|
conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
// Image formats
|
|
const int FORMAT_RGB = 0;
|
|
const int FORMAT_YUV = 1;
|
|
|
|
// Covert YUV to RGB
|
|
vec3 ConvertYUVToRGB(vec3 color)
|
|
{
|
|
float Y = color[0];
|
|
float U = color[1] - 0.5;
|
|
float V = color[2] - 0.5;
|
|
Y = 1.1643 * (Y - 0.0625);
|
|
|
|
float R = Y + 1.5958 * V;
|
|
float G = Y - 0.39173 * U - 0.81290 * V;
|
|
float B = Y + 2.017 * U;
|
|
|
|
return vec3(R, G, B);
|
|
}
|
|
const float offset = 128.0 / 255.0;
|
|
|
|
vec3 ConvertRGBToYCoCg(vec3 color)
|
|
{
|
|
float Y = (color.r + 2 * color.g + color.b) * 0.25;
|
|
float Co = ( ( 2 * color.r - 2 * color.b ) * 0.25 + offset );
|
|
float Cg = ( ( -color.r + 2 * color.g - color.b) * 0.25 + offset );
|
|
|
|
return vec3(Y, Co, Cg);
|
|
}
|
|
|
|
typedef unsigned int uint;
|
|
typedef unsigned int2 uint2;
|
|
typedef unsigned int3 uint3;
|
|
typedef unsigned int4 uint4;
|
|
|
|
// Use dot product to minimize RMS instead absolute distance like in the CPU compressor.
|
|
float colorDistance(float3 c0, float3 c1)
|
|
{
|
|
return dot(c0-c1, c0-c1);
|
|
}
|
|
float colorDistance(float2 c0, float2 c1)
|
|
{
|
|
return dot(c0-c1, c0-c1);
|
|
}
|
|
|
|
void ExtractColorBlock(out float3 col[16], sampler2D image, float2 texcoord, float2 imageSize)
|
|
{
|
|
float2 texelSize = (1.0f / imageSize);
|
|
texcoord -= texelSize * 2;
|
|
for (int i = 0; i < 4; i++) {
|
|
for (int j = 0; j < 4; j++) {
|
|
col[i*4+j] = tex2D(image, texcoord + float2(j, i) * texelSize).rgb;
|
|
}
|
|
}
|
|
}
|
|
|
|
// find minimum and maximum colors based on bounding box in color space
|
|
void FindMinMaxColorsBox(float3 block[16], out float3 mincol, out float3 maxcol)
|
|
{
|
|
mincol = block[0];
|
|
maxcol = block[0];
|
|
|
|
for (int i = 1; i < 16; i++) {
|
|
mincol = min(mincol, block[i]);
|
|
maxcol = max(maxcol, block[i]);
|
|
}
|
|
}
|
|
|
|
void InsetBBox(in out float3 mincol, in out float3 maxcol)
|
|
{
|
|
float3 inset = (maxcol - mincol) / 16.0 - (8.0 / 255.0) / 16;
|
|
mincol = saturate(mincol + inset);
|
|
maxcol = saturate(maxcol - inset);
|
|
}
|
|
void InsetYBBox(in out float mincol, in out float maxcol)
|
|
{
|
|
float inset = (maxcol - mincol) / 32.0 - (16.0 / 255.0) / 32.0;
|
|
mincol = saturate(mincol + inset);
|
|
maxcol = saturate(maxcol - inset);
|
|
}
|
|
void InsetCoCgBBox(in out float2 mincol, in out float2 maxcol)
|
|
{
|
|
float2 inset = (maxcol - mincol) / 16.0 - (8.0 / 255.0) / 16;
|
|
mincol = saturate(mincol + inset);
|
|
maxcol = saturate(maxcol - inset);
|
|
}
|
|
|
|
void SelectDiagonal(float3 block[16], in out float3 mincol, in out float3 maxcol)
|
|
{
|
|
float3 center = (mincol + maxcol) * 0.5;
|
|
|
|
float2 cov = 0;
|
|
for (int i = 0; i < 16; i++)
|
|
{
|
|
float3 t = block[i] - center;
|
|
cov.x += t.x * t.z;
|
|
cov.y += t.y * t.z;
|
|
}
|
|
|
|
if (cov.x < 0) {
|
|
float temp = maxcol.x;
|
|
maxcol.x = mincol.x;
|
|
mincol.x = temp;
|
|
}
|
|
if (cov.y < 0) {
|
|
float temp = maxcol.y;
|
|
maxcol.y = mincol.y;
|
|
mincol.y = temp;
|
|
}
|
|
}
|
|
|
|
float3 RoundAndExpand(float3 v, out uint w)
|
|
{
|
|
int3 c = round(v * float3(31, 63, 31));
|
|
w = (c.r << 11) | (c.g << 5) | c.b;
|
|
|
|
c.rb = (c.rb << 3) | (c.rb >> 2);
|
|
c.g = (c.g << 2) | (c.g >> 4);
|
|
|
|
return (float3)c * (1.0 / 255.0);
|
|
}
|
|
|
|
uint EmitEndPointsDXT1(in out float3 mincol, in out float3 maxcol)
|
|
{
|
|
uint2 output;
|
|
maxcol = RoundAndExpand(maxcol, output.x);
|
|
mincol = RoundAndExpand(mincol, output.y);
|
|
|
|
// We have to do this in case we select an alternate diagonal.
|
|
if (output.x < output.y)
|
|
{
|
|
float3 tmp = mincol;
|
|
mincol = maxcol;
|
|
maxcol = tmp;
|
|
return output.y | (output.x << 16);
|
|
}
|
|
|
|
return output.x | (output.y << 16);
|
|
}
|
|
|
|
#if 0
|
|
|
|
uint EmitIndicesDXT1(float3 block[16], float3 mincol, float3 maxcol)
|
|
{
|
|
const float RGB_RANGE = 3;
|
|
|
|
float3 dir = (maxcol - mincol);
|
|
float3 origin = maxcol + dir / (2.0 * RGB_RANGE);
|
|
dir /= dot(dir, dir);
|
|
|
|
// Compute indices
|
|
uint indices = 0;
|
|
for (int i = 0; i < 16; i++)
|
|
{
|
|
uint index = saturate(dot(origin - block[i], dir)) * RGB_RANGE;
|
|
indices |= index << (i * 2);
|
|
}
|
|
|
|
uint i0 = (indices & 0x55555555);
|
|
uint i1 = (indices & 0xAAAAAAAA) >> 1;
|
|
indices = ((i0 ^ i1) << 1) | i1;
|
|
|
|
// Output indices
|
|
return indices;
|
|
}
|
|
|
|
#else
|
|
|
|
uint EmitIndicesDXT1(float3 col[16], float3 mincol, float3 maxcol)
|
|
{
|
|
// Compute palette
|
|
float3 c[4];
|
|
c[0] = maxcol;
|
|
c[1] = mincol;
|
|
c[2] = lerp(c[0], c[1], 1.0/3.0);
|
|
c[3] = lerp(c[0], c[1], 2.0/3.0);
|
|
|
|
// Compute indices
|
|
uint indices = 0;
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
// find index of closest color
|
|
float4 dist;
|
|
dist.x = colorDistance(col[i], c[0]);
|
|
dist.y = colorDistance(col[i], c[1]);
|
|
dist.z = colorDistance(col[i], c[2]);
|
|
dist.w = colorDistance(col[i], c[3]);
|
|
|
|
uint4 b = dist.xyxy > dist.wzzw;
|
|
uint b4 = dist.z > dist.w;
|
|
|
|
uint index = (b.x & b4) | (((b.y & b.z) | (b.x & b.w)) << 1);
|
|
indices |= index << (i*2);
|
|
}
|
|
|
|
// Output indices
|
|
return indices;
|
|
}
|
|
|
|
#endif
|
|
|
|
int ScaleYCoCg(float2 minColor, float2 maxColor)
|
|
{
|
|
float2 m0 = abs(minColor - offset);
|
|
float2 m1 = abs(maxColor - offset);
|
|
|
|
float m = max(max(m0.x, m0.y), max(m1.x, m1.y));
|
|
|
|
const float s0 = 64.0 / 255.0;
|
|
const float s1 = 32.0 / 255.0;
|
|
|
|
int scale = 1;
|
|
if (m < s0) scale = 2;
|
|
if (m < s1) scale = 4;
|
|
|
|
return scale;
|
|
}
|
|
|
|
void SelectYCoCgDiagonal(const float3 block[16], in out float2 minColor, in out float2 maxColor)
|
|
{
|
|
float2 mid = (maxColor + minColor) * 0.5;
|
|
|
|
float cov = 0;
|
|
for (int i = 0; i < 16; i++)
|
|
{
|
|
float2 t = block[i].yz - mid;
|
|
cov += t.x * t.y;
|
|
}
|
|
if (cov < 0) {
|
|
float tmp = maxColor.y;
|
|
maxColor.y = minColor.y;
|
|
minColor.y = tmp;
|
|
}
|
|
}
|
|
|
|
|
|
uint EmitEndPointsYCoCgDXT5(in out float2 mincol, in out float2 maxcol, int scale)
|
|
{
|
|
maxcol = (maxcol - offset) * scale + offset;
|
|
mincol = (mincol - offset) * scale + offset;
|
|
|
|
InsetCoCgBBox(mincol, maxcol);
|
|
|
|
maxcol = round(maxcol * float2(31, 63));
|
|
mincol = round(mincol * float2(31, 63));
|
|
|
|
int2 imaxcol = maxcol;
|
|
int2 imincol = mincol;
|
|
|
|
uint2 output;
|
|
output.x = (imaxcol.r << 11) | (imaxcol.g << 5) | (scale - 1);
|
|
output.y = (imincol.r << 11) | (imincol.g << 5) | (scale - 1);
|
|
|
|
imaxcol.r = (imaxcol.r << 3) | (imaxcol.r >> 2);
|
|
imaxcol.g = (imaxcol.g << 2) | (imaxcol.g >> 4);
|
|
imincol.r = (imincol.r << 3) | (imincol.r >> 2);
|
|
imincol.g = (imincol.g << 2) | (imincol.g >> 4);
|
|
|
|
maxcol = (float2)imaxcol * (1.0 / 255.0);
|
|
mincol = (float2)imincol * (1.0 / 255.0);
|
|
|
|
// Undo rescale.
|
|
maxcol = (maxcol - offset) / scale + offset;
|
|
mincol = (mincol - offset) / scale + offset;
|
|
|
|
return output.x | (output.y << 16);
|
|
}
|
|
|
|
#if 0
|
|
|
|
uint EmitIndicesYCoCgDXT5(float3 block[16], float2 mincol, float2 maxcol)
|
|
{
|
|
const float COCG_RANGE = 3;
|
|
|
|
float2 dir = (maxcol - mincol);
|
|
float2 origin = maxcol + dir / (2.0 * COCG_RANGE);
|
|
dir /= dot(dir, dir);
|
|
|
|
// Compute indices
|
|
uint indices = 0;
|
|
for (int i = 0; i < 16; i++)
|
|
{
|
|
uint index = saturate(dot(origin - block[i].yz, dir)) * COCG_RANGE;
|
|
indices |= index << (i * 2);
|
|
}
|
|
|
|
uint i0 = (indices & 0x55555555);
|
|
uint i1 = (indices & 0xAAAAAAAA) >> 1;
|
|
indices = ((i0 ^ i1) << 1) | i1;
|
|
|
|
// Output indices
|
|
return indices;
|
|
}
|
|
|
|
#else
|
|
|
|
uint EmitIndicesYCoCgDXT5(float3 block[16], float2 mincol, float2 maxcol)
|
|
{
|
|
// Compute palette
|
|
float2 c[4];
|
|
c[0] = maxcol;
|
|
c[1] = mincol;
|
|
c[2] = lerp(c[0], c[1], 1.0/3.0);
|
|
c[3] = lerp(c[0], c[1], 2.0/3.0);
|
|
|
|
// Compute indices
|
|
uint indices = 0;
|
|
for (int i = 0; i < 16; i++)
|
|
{
|
|
// find index of closest color
|
|
float4 dist;
|
|
dist.x = colorDistance(block[i].yz, c[0]);
|
|
dist.y = colorDistance(block[i].yz, c[1]);
|
|
dist.z = colorDistance(block[i].yz, c[2]);
|
|
dist.w = colorDistance(block[i].yz, c[3]);
|
|
|
|
uint4 b = dist.xyxy > dist.wzzw;
|
|
uint b4 = dist.z > dist.w;
|
|
|
|
uint index = (b.x & b4) | (((b.y & b.z) | (b.x & b.w)) << 1);
|
|
indices |= index << (i*2);
|
|
}
|
|
|
|
// Output indices
|
|
return indices;
|
|
}
|
|
|
|
#endif
|
|
|
|
uint EmitAlphaEndPointsYCoCgDXT5(float mincol, float maxcol)
|
|
{
|
|
uint c0 = round(mincol * 255);
|
|
uint c1 = round(maxcol * 255);
|
|
|
|
return (c0 << 8) | c1;
|
|
}
|
|
|
|
#if 0
|
|
|
|
// Optimized index selection.
|
|
uint2 EmitAlphaIndicesYCoCgDXT5(float3 block[16], float minAlpha, float maxAlpha)
|
|
{
|
|
const int ALPHA_RANGE = 7;
|
|
|
|
float bias = maxAlpha + (maxAlpha - minAlpha) / (2.0 * ALPHA_RANGE);
|
|
float scale = 1.0f / (maxAlpha - minAlpha);
|
|
|
|
uint2 indices = 0;
|
|
|
|
for (int i = 0; i < 6; i++)
|
|
{
|
|
uint index = saturate((bias - block[i].x) * scale) * ALPHA_RANGE;
|
|
indices.x |= index << (3 * i);
|
|
}
|
|
|
|
for (int i = 6; i < 16; i++)
|
|
{
|
|
uint index = saturate((bias - block[i].x) * scale) * ALPHA_RANGE;
|
|
indices.y |= index << (3 * i - 18);
|
|
}
|
|
|
|
uint2 i0 = (indices >> 0) & 0x09249249;
|
|
uint2 i1 = (indices >> 1) & 0x09249249;
|
|
uint2 i2 = (indices >> 2) & 0x09249249;
|
|
|
|
i2 ^= i0 & i1;
|
|
i1 ^= i0;
|
|
i0 ^= (i1 | i2);
|
|
|
|
indices.x = (i2.x << 2) | (i1.x << 1) | i0.x;
|
|
indices.y = (((i2.y << 2) | (i1.y << 1) | i0.y) << 2) | (indices.x >> 16);
|
|
indices.x <<= 16;
|
|
|
|
return indices;
|
|
}
|
|
|
|
#else
|
|
|
|
// Version shown in the YCoCg-DXT article.
|
|
uint2 EmitAlphaIndicesYCoCgDXT5(float3 block[16], float minAlpha, float maxAlpha)
|
|
{
|
|
const int ALPHA_RANGE = 7;
|
|
|
|
float mid = (maxAlpha - minAlpha) / (2.0 * ALPHA_RANGE);
|
|
|
|
float ab1 = minAlpha + mid;
|
|
float ab2 = (6 * maxAlpha + 1 * minAlpha) * (1.0 / ALPHA_RANGE) + mid;
|
|
float ab3 = (5 * maxAlpha + 2 * minAlpha) * (1.0 / ALPHA_RANGE) + mid;
|
|
float ab4 = (4 * maxAlpha + 3 * minAlpha) * (1.0 / ALPHA_RANGE) + mid;
|
|
float ab5 = (3 * maxAlpha + 4 * minAlpha) * (1.0 / ALPHA_RANGE) + mid;
|
|
float ab6 = (2 * maxAlpha + 5 * minAlpha) * (1.0 / ALPHA_RANGE) + mid;
|
|
float ab7 = (1 * maxAlpha + 6 * minAlpha) * (1.0 / ALPHA_RANGE) + mid;
|
|
|
|
uint2 indices = 0;
|
|
|
|
uint index;
|
|
for (int i = 0; i < 6; i++)
|
|
{
|
|
float a = block[i].x;
|
|
index = 1;
|
|
index += (a <= ab1);
|
|
index += (a <= ab2);
|
|
index += (a <= ab3);
|
|
index += (a <= ab4);
|
|
index += (a <= ab5);
|
|
index += (a <= ab6);
|
|
index += (a <= ab7);
|
|
index &= 7;
|
|
index ^= (2 > index);
|
|
indices.x |= index << (3 * i + 16);
|
|
}
|
|
|
|
indices.y = index >> 1;
|
|
|
|
for (int i = 6; i < 16; i++)
|
|
{
|
|
float a = block[i].x;
|
|
index = 1;
|
|
index += (a <= ab1);
|
|
index += (a <= ab2);
|
|
index += (a <= ab3);
|
|
index += (a <= ab4);
|
|
index += (a <= ab5);
|
|
index += (a <= ab6);
|
|
index += (a <= ab7);
|
|
index &= 7;
|
|
index ^= (2 > index);
|
|
indices.y |= index << (3 * i - 16);
|
|
}
|
|
|
|
return indices;
|
|
}
|
|
|
|
#endif
|
|
|
|
varying out uvec4 colorInt;
|
|
uniform int imageFormat = 0;
|
|
|
|
// compress a 4x4 block to DXT1 format
|
|
// integer version, renders to 2 x int32 buffer
|
|
void compress_DXT1_fp(
|
|
float2 texcoord : TEXCOORD0,
|
|
uniform sampler2D image,
|
|
uniform float2 imageSize)
|
|
{
|
|
// Read block
|
|
float3 block[16];
|
|
ExtractColorBlock(block, image, texcoord, imageSize);
|
|
|
|
// Convert to RGB
|
|
if ( int(imageFormat) == FORMAT_YUV ) {
|
|
for ( int index = 0; index < 16; index++ )
|
|
block[index] = ConvertYUVToRGB(block[index]);
|
|
}
|
|
|
|
// Find min and max colors
|
|
float3 mincol, maxcol;
|
|
FindMinMaxColorsBox(block, mincol, maxcol);
|
|
|
|
SelectDiagonal(block, mincol, maxcol);
|
|
|
|
InsetBBox(mincol, maxcol);
|
|
|
|
uint4 output;
|
|
output.x = EmitEndPointsDXT1(mincol, maxcol);
|
|
output.w = EmitIndicesDXT1(block, mincol, maxcol);
|
|
|
|
colorInt = output;
|
|
}
|
|
|
|
// compress a 4x4 block to YCoCg DXT5 format
|
|
// integer version, renders to 4 x int32 buffer
|
|
void compress_YCoCgDXT5_fp(
|
|
float2 texcoord : TEXCOORD0,
|
|
uniform sampler2D image,
|
|
uniform float2 imageSize)
|
|
{
|
|
// Read block
|
|
float3 block[16];
|
|
ExtractColorBlock(block, image, texcoord, imageSize);
|
|
|
|
// Convert to RGB
|
|
if ( int(imageFormat) == FORMAT_YUV ) {
|
|
for ( int index = 0; index < 16; index++ )
|
|
block[index] = ConvertYUVToRGB(block[index]);
|
|
}
|
|
|
|
// Convert to YCoCg
|
|
for ( int index = 0; index < 16; index++ )
|
|
block[index] = ConvertRGBToYCoCg(block[index]);
|
|
|
|
// Find min and max colors
|
|
float3 mincol, maxcol;
|
|
FindMinMaxColorsBox(block, mincol, maxcol);
|
|
|
|
SelectYCoCgDiagonal(block, mincol.yz, maxcol.yz);
|
|
|
|
int scale = ScaleYCoCg(mincol.yz, maxcol.yz);
|
|
|
|
// Output CoCg in DXT1 block.
|
|
uint4 output;
|
|
output.z = EmitEndPointsYCoCgDXT5(mincol.yz, maxcol.yz, scale);
|
|
output.w = EmitIndicesYCoCgDXT5(block, mincol.yz, maxcol.yz);
|
|
|
|
InsetYBBox(mincol.x, maxcol.x);
|
|
|
|
// Output Y in DXT5 alpha block.
|
|
output.x = EmitAlphaEndPointsYCoCgDXT5(mincol.x, maxcol.x);
|
|
|
|
uint2 indices = EmitAlphaIndicesYCoCgDXT5(block, mincol.x, maxcol.x);
|
|
output.x |= indices.x;
|
|
output.y = indices.y;
|
|
|
|
colorInt = output;
|
|
}
|
|
|
|
uniform sampler2D image : TEXUNIT0;
|
|
|
|
float4 display_fp(float2 texcoord : TEXCOORD0) : COLOR
|
|
{
|
|
float4 rgba = tex2D(image, texcoord);
|
|
|
|
return rgba;
|
|
}
|
|
|
|
float4 display_YCoCgDXT5_fp(float2 texcoord : TEXCOORD0) : COLOR
|
|
{
|
|
float4 rgba = tex2D(image, texcoord);
|
|
|
|
float Y = rgba.a;
|
|
float scale = 1.0 / ((255.0 / 8.0) * rgba.b + 1);
|
|
float Co = (rgba.r - offset) * scale;
|
|
float Cg = (rgba.g - offset) * scale;
|
|
|
|
float R = Y + Co - Cg;
|
|
float G = Y + Cg;
|
|
float B = Y - Co - Cg;
|
|
|
|
rgba = float4(R, G, B, 1);
|
|
|
|
return rgba;
|
|
}
|