Files
UltraGrid/libgpujpeg/gpujpeg_table.c
2012-02-10 10:51:15 +01:00

427 lines
17 KiB
C

/**
* Copyright (c) 2011, CESNET z.s.p.o
* Copyright (c) 2011, Silicon Genome, LLC.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "gpujpeg_table.h"
#include "gpujpeg_util.h"
#include <npp.h>
/** Default Quantization Table for Y component (zig-zag order)*/
static uint8_t gpujpeg_table_default_quantization_luminance[] = {
16, 11, 12, 14, 12, 10, 16, 14,
13, 14, 18, 17, 16, 19, 24, 40,
26, 24, 22, 22, 24, 49, 35, 37,
29, 40, 58, 51, 61, 60, 57, 51,
56, 55, 64, 72, 92, 78, 64, 68,
87, 69, 55, 56, 80, 109, 81, 87,
95, 98, 103, 104, 103, 62, 77, 113,
121, 112, 100, 120, 92, 101, 103, 99
};
/** Default Quantization Table for Cb or Cr component (zig-zag order) */
static uint8_t gpujpeg_table_default_quantization_chrominance[] = {
17, 18, 18, 24, 21, 24, 47, 26,
26, 47, 99, 66, 56, 66, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99,
99, 99, 99, 99, 99, 99, 99, 99
};
/**
* Set default quantization table
*
* @param table_raw Table buffer
* @param type Quantization table type
*/
void
gpujpeg_table_quantization_set_default(uint8_t* table_raw, enum gpujpeg_component_type type)
{
uint8_t* table_default = NULL;
if ( type == GPUJPEG_COMPONENT_LUMINANCE )
table_default = gpujpeg_table_default_quantization_luminance;
else if ( type == GPUJPEG_COMPONENT_CHROMINANCE )
table_default = gpujpeg_table_default_quantization_chrominance;
else
assert(0);
memcpy(table_raw, table_default, 64 * sizeof(uint8_t));
}
/** Documented at declaration */
int
gpujpeg_table_quantization_encoder_init(struct gpujpeg_table_quantization* table, enum gpujpeg_component_type type, int quality)
{
// Load raw table in zig-zag order
gpujpeg_table_quantization_set_default(table->table_raw, type);
// Update raw table by quality
nppiQuantFwdRawTableInit_JPEG_8u(table->table_raw, quality);
// Fix NPP bug before version 4.1 [http://forums.nvidia.com/index.php?showtopic=191896]
const NppLibraryVersion* npp_version = nppGetLibVersion();
if ( npp_version->major < 4 || (npp_version->major == 4 && npp_version->minor == 0) ) {
for ( int i = 0; i < 64; i++ ) {
table->table[gpujpeg_order_natural[i]] = ((1 << 15) / (double)table->table_raw[i]) + 0.5;
}
} else {
// Load forward table from raw table
nppiQuantFwdTableInit_JPEG_8u16u(table->table_raw, table->table);
}
// Copy tables to device memory
if ( cudaSuccess != cudaMemcpy(table->d_table, table->table, 64 * sizeof(uint16_t), cudaMemcpyHostToDevice) )
return -1;
return 0;
}
/** Documented at declaration */
int
gpujpeg_table_quantization_decoder_init(struct gpujpeg_table_quantization* table, enum gpujpeg_component_type type, int quality)
{
// Load raw table in zig-zag order
gpujpeg_table_quantization_set_default(table->table_raw, type);
// Update raw table by quality
nppiQuantFwdRawTableInit_JPEG_8u(table->table_raw, quality);
// Fix NPP bug before version 4.1 [http://forums.nvidia.com/index.php?showtopic=191896]
const NppLibraryVersion* npp_version = nppGetLibVersion();
if ( npp_version->major < 4 || (npp_version->major == 4 && npp_version->minor == 0) ) {
for ( int i = 0; i < 64; i++ ) {
table->table[gpujpeg_order_natural[i]] = table->table_raw[i];
}
} else {
// Load inverse table from raw table
nppiQuantInvTableInit_JPEG_8u16u(table->table_raw, table->table);
}
// Copy tables to device memory
if ( cudaSuccess != cudaMemcpy(table->d_table, table->table, 64 * sizeof(uint16_t), cudaMemcpyHostToDevice) )
return -1;
return 0;
}
int
gpujpeg_table_quantization_decoder_compute(struct gpujpeg_table_quantization* table)
{
// Fix NPP bug before version 4.1 [http://forums.nvidia.com/index.php?showtopic=191896]
const NppLibraryVersion* npp_version = nppGetLibVersion();
if ( npp_version->major < 4 || (npp_version->major == 4 && npp_version->minor == 0) ) {
for ( int i = 0; i < 64; i++ ) {
table->table[gpujpeg_order_natural[i]] = table->table_raw[i];
}
} else {
// Load inverse table from raw table
nppiQuantInvTableInit_JPEG_8u16u(table->table_raw, table->table);
}
// Copy tables to device memory
if ( cudaSuccess != cudaMemcpy(table->d_table, table->table, 64 * sizeof(uint16_t), cudaMemcpyHostToDevice) )
return -1;
return 0;
}
/** Documented at declaration */
void
gpujpeg_table_quantization_print(struct gpujpeg_table_quantization* table)
{
puts("Raw Table (with quality):");
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 8; ++j) {
printf("%4u", table->table_raw[i * 8 + j]);
}
puts("");
}
puts("Forward/Inverse Table:");
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 8; ++j) {
printf("%6u", table->table[i * 8 + j]);
}
puts("");
}
}
/** Huffman Table DC for Y component */
static unsigned char gpujpeg_table_huffman_y_dc_bits[17] = {
0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0
};
static unsigned char gpujpeg_table_huffman_y_dc_value[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
};
/** Huffman Table DC for Cb or Cr component */
static unsigned char gpujpeg_table_huffman_cbcr_dc_bits[17] = {
0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
};
static unsigned char gpujpeg_table_huffman_cbcr_dc_value[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
};
/** Huffman Table AC for Y component */
static unsigned char gpujpeg_table_huffman_y_ac_bits[17] = {
0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d
};
static unsigned char gpujpeg_table_huffman_y_ac_value[] = {
0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa
};
/** Huffman Table AC for Cb or Cr component */
static unsigned char gpujpeg_table_huffman_cbcr_ac_bits[17] = {
0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77
};
static unsigned char gpujpeg_table_huffman_cbcr_ac_value[] = {
0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa
};
/**
* Compute encoder huffman table from bits and values arrays (that are already set in table)
*
* @param table Table structure
* @return void
*/
void
gpujpeg_table_huffman_encoder_compute(struct gpujpeg_table_huffman_encoder* table)
{
char huffsize[257];
unsigned int huffcode[257];
// Figure C.1: make table of Huffman code length for each symbol
// Note that this is in code-length order
int p = 0;
for ( int l = 1; l <= 16; l++ ) {
for ( int i = 1; i <= (int) table->bits[l]; i++ )
huffsize[p++] = (char) l;
}
huffsize[p] = 0;
int lastp = p;
// Figure C.2: generate the codes themselves
// Note that this is in code-length order
unsigned int code = 0;
int si = huffsize[0];
p = 0;
while ( huffsize[p] ) {
while ( ((int) huffsize[p]) == si ) {
huffcode[p++] = code;
code++;
}
code <<= 1;
si++;
}
// Figure C.3: generate encoding tables
// These are code and size indexed by symbol value
// Set any codeless symbols to have code length 0;
// this allows EmitBits to detect any attempt to emit such symbols.
memset(table->size, 0, sizeof(table->size));
for (p = 0; p < lastp; p++) {
table->code[table->huffval[p]] = huffcode[p];
table->size[table->huffval[p]] = huffsize[p];
}
}
/** Documented at declaration */
int
gpujpeg_table_huffman_encoder_init(struct gpujpeg_table_huffman_encoder* table, struct gpujpeg_table_huffman_encoder* d_table, enum gpujpeg_component_type comp_type, enum gpujpeg_huffman_type huff_type)
{
assert(comp_type == GPUJPEG_COMPONENT_LUMINANCE || comp_type == GPUJPEG_COMPONENT_CHROMINANCE);
assert(huff_type == GPUJPEG_HUFFMAN_DC || huff_type == GPUJPEG_HUFFMAN_AC);
if ( comp_type == GPUJPEG_COMPONENT_LUMINANCE ) {
if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(table->huffval));
} else {
memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(table->huffval));
}
} else if ( comp_type == GPUJPEG_COMPONENT_CHROMINANCE ) {
if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(table->huffval));
} else {
memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(table->huffval));
}
}
gpujpeg_table_huffman_encoder_compute(table);
#ifndef GPUJPEG_HUFFMAN_CODER_TABLES_IN_CONSTANT
// Copy table to device memory
if ( cudaSuccess != cudaMemcpy(d_table, table, sizeof(struct gpujpeg_table_huffman_encoder), cudaMemcpyHostToDevice) )
return -1;
#endif
return 0;
}
/** Documented at declaration */
int
gpujpeg_table_huffman_decoder_init(struct gpujpeg_table_huffman_decoder* table, struct gpujpeg_table_huffman_decoder* d_table, enum gpujpeg_component_type comp_type, enum gpujpeg_huffman_type huff_type)
{
assert(comp_type == GPUJPEG_COMPONENT_LUMINANCE || comp_type == GPUJPEG_COMPONENT_CHROMINANCE);
assert(huff_type == GPUJPEG_HUFFMAN_DC || huff_type == GPUJPEG_HUFFMAN_AC);
if ( comp_type == GPUJPEG_COMPONENT_LUMINANCE ) {
if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
memcpy(table->bits, gpujpeg_table_huffman_y_dc_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_y_dc_value, sizeof(table->huffval));
} else {
memcpy(table->bits, gpujpeg_table_huffman_y_ac_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_y_ac_value, sizeof(table->huffval));
}
} else if ( comp_type == GPUJPEG_COMPONENT_CHROMINANCE ) {
if ( huff_type == GPUJPEG_HUFFMAN_DC ) {
memcpy(table->bits, gpujpeg_table_huffman_cbcr_dc_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_cbcr_dc_value, sizeof(table->huffval));
} else {
memcpy(table->bits, gpujpeg_table_huffman_cbcr_ac_bits, sizeof(table->bits));
memcpy(table->huffval, gpujpeg_table_huffman_cbcr_ac_value, sizeof(table->huffval));
}
}
gpujpeg_table_huffman_decoder_compute(table, d_table);
return 0;
}
/** Documented at declaration */
void
gpujpeg_table_huffman_decoder_compute(struct gpujpeg_table_huffman_decoder* table, struct gpujpeg_table_huffman_decoder* d_table)
{
// Figure C.1: make table of Huffman code length for each symbol
// Note that this is in code-length order.
char huffsize[257];
int p = 0;
for ( int l = 1; l <= 16; l++ ) {
for ( int i = 1; i <= (int) table->bits[l]; i++ )
huffsize[p++] = (char) l;
}
huffsize[p] = 0;
// Figure C.2: generate the codes themselves
// Note that this is in code-length order.
unsigned int huffcode[257];
unsigned int code = 0;
int si = huffsize[0];
p = 0;
while ( huffsize[p] ) {
while ( ((int) huffsize[p]) == si ) {
huffcode[p++] = code;
code++;
}
code <<= 1;
si++;
}
// Figure F.15: generate decoding tables for bit-sequential decoding
p = 0;
for ( int l = 1; l <= 16; l++ ) {
if ( table->bits[l] ) {
table->valptr[l] = p; // huffval[] index of 1st symbol of code length l
table->mincode[l] = huffcode[p]; // minimum code of length l
p += table->bits[l];
table->maxcode[l] = huffcode[p-1]; // maximum code of length l
} else {
table->maxcode[l] = -1; // -1 if no codes of this length
}
}
// Ensures gpujpeg_huff_decode terminates
table->maxcode[17] = 0xFFFFFL;
// Compute lookahead tables to speed up decoding.
//First we set all the table entries to 0, indicating "too long";
//then we iterate through the Huffman codes that are short enough and
//fill in all the entries that correspond to bit sequences starting
//with that code.
memset(table->look_nbits, 0, sizeof(int) * 256);
int HUFF_LOOKAHEAD = 8;
p = 0;
for ( int l = 1; l <= HUFF_LOOKAHEAD; l++ ) {
for ( int i = 1; i <= (int) table->bits[l]; i++, p++ ) {
// l = current code's length,
// p = its index in huffcode[] & huffval[]. Generate left-justified
// code followed by all possible bit sequences
int lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
for ( int ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr-- )
{
table->look_nbits[lookbits] = l;
table->look_sym[lookbits] = table->huffval[p];
lookbits++;
}
}
}
#ifndef GPUJPEG_HUFFMAN_CODER_TABLES_IN_CONSTANT
// Copy table to device memory
cudaMemcpy(d_table, table, sizeof(struct gpujpeg_table_huffman_decoder), cudaMemcpyHostToDevice);
#endif
}