123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633 |
- /*
- Jonathan Dummer
- 2007-07-31-10.32
- simple DXT compression / decompression code
- public domain
- */
- #include "image_DXT.h"
- #include <math.h>
- #include <stdlib.h>
- #include <string.h>
- #include <stdio.h>
- /* set this =1 if you want to use the covarince matrix method...
- which is better than my method of using standard deviations
- overall, except on the infintesimal chance that the power
- method fails for finding the largest eigenvector */
- #define USE_COV_MAT 1
- /********* Function Prototypes *********/
- /*
- Takes a 4x4 block of pixels and compresses it into 8 bytes
- in DXT1 format (color only, no alpha). Speed is valued
- over prettyness, at least for now.
- */
- void compress_DDS_color_block(
- int channels,
- const unsigned char *const uncompressed,
- unsigned char compressed[8] );
- /*
- Takes a 4x4 block of pixels and compresses the alpha
- component it into 8 bytes for use in DXT5 DDS files.
- Speed is valued over prettyness, at least for now.
- */
- void compress_DDS_alpha_block(
- const unsigned char *const uncompressed,
- unsigned char compressed[8] );
- /********* Actual Exposed Functions *********/
- int
- save_image_as_DDS
- (
- const char *filename,
- int width, int height, int channels,
- const unsigned char *const data
- )
- {
- /* variables */
- FILE *fout;
- unsigned char *DDS_data;
- DDS_header header;
- int DDS_size;
- /* error check */
- if( (NULL == filename) ||
- (width < 1) || (height < 1) ||
- (channels < 1) || (channels > 4) ||
- (data == NULL ) )
- {
- return 0;
- }
- /* Convert the image */
- if( (channels & 1) == 1 )
- {
- /* no alpha, just use DXT1 */
- DDS_data = convert_image_to_DXT1( data, width, height, channels, &DDS_size );
- } else
- {
- /* has alpha, so use DXT5 */
- DDS_data = convert_image_to_DXT5( data, width, height, channels, &DDS_size );
- }
- /* save it */
- memset( &header, 0, sizeof( DDS_header ) );
- header.dwMagic = ('D' << 0) | ('D' << 8) | ('S' << 16) | (' ' << 24);
- header.dwSize = 124;
- header.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT | DDSD_LINEARSIZE;
- header.dwWidth = width;
- header.dwHeight = height;
- header.dwPitchOrLinearSize = DDS_size;
- header.sPixelFormat.dwSize = 32;
- header.sPixelFormat.dwFlags = DDPF_FOURCC;
- if( (channels & 1) == 1 )
- {
- header.sPixelFormat.dwFourCC = ('D' << 0) | ('X' << 8) | ('T' << 16) | ('1' << 24);
- } else
- {
- header.sPixelFormat.dwFourCC = ('D' << 0) | ('X' << 8) | ('T' << 16) | ('5' << 24);
- }
- header.sCaps.dwCaps1 = DDSCAPS_TEXTURE;
- /* write it out */
- fout = fopen( filename, "wb");
- fwrite( &header, sizeof( DDS_header ), 1, fout );
- fwrite( DDS_data, 1, DDS_size, fout );
- fclose( fout );
- /* done */
- free( DDS_data );
- return 1;
- }
- unsigned char* convert_image_to_DXT1(
- const unsigned char *const uncompressed,
- int width, int height, int channels,
- int *out_size )
- {
- unsigned char *compressed;
- int i, j, x, y;
- unsigned char ublock[16*3];
- unsigned char cblock[8];
- int cindex = 0, chan_step = 1;
- int block_count = 0;
- /* error check */
- *out_size = 0;
- if( (width < 1) || (height < 1) ||
- (NULL == uncompressed) ||
- (channels < 1) || (channels > 4) )
- {
- return NULL;
- }
- /* for channels == 1 or 2, I do not step forward for R,G,B values */
- if( channels < 3 )
- {
- chan_step = 0;
- }
- /* get the RAM for the compressed image
- (8 bytes per 4x4 pixel block) */
- *out_size = ((width+3) >> 2) * ((height+3) >> 2) * 8;
- compressed = (unsigned char*)malloc( *out_size );
- /* go through each block */
- for( j = 0; j < height; j += 4 )
- {
- for( i = 0; i < width; i += 4 )
- {
- /* copy this block into a new one */
- int idx = 0;
- int mx = 4, my = 4;
- if( j+4 >= height )
- {
- my = height - j;
- }
- if( i+4 >= width )
- {
- mx = width - i;
- }
- for( y = 0; y < my; ++y )
- {
- for( x = 0; x < mx; ++x )
- {
- ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels];
- ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step];
- ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step+chan_step];
- }
- for( x = mx; x < 4; ++x )
- {
- ublock[idx++] = ublock[0];
- ublock[idx++] = ublock[1];
- ublock[idx++] = ublock[2];
- }
- }
- for( y = my; y < 4; ++y )
- {
- for( x = 0; x < 4; ++x )
- {
- ublock[idx++] = ublock[0];
- ublock[idx++] = ublock[1];
- ublock[idx++] = ublock[2];
- }
- }
- /* compress the block */
- ++block_count;
- compress_DDS_color_block( 3, ublock, cblock );
- /* copy the data from the block into the main block */
- for( x = 0; x < 8; ++x )
- {
- compressed[cindex++] = cblock[x];
- }
- }
- }
- return compressed;
- }
- unsigned char* convert_image_to_DXT5(
- const unsigned char *const uncompressed,
- int width, int height, int channels,
- int *out_size )
- {
- unsigned char *compressed;
- int i, j, x, y;
- unsigned char ublock[16*4];
- unsigned char cblock[8];
- int cindex = 0, chan_step = 1;
- int block_count = 0, has_alpha;
- /* error check */
- *out_size = 0;
- if( (width < 1) || (height < 1) ||
- (NULL == uncompressed) ||
- (channels < 1) || ( channels > 4) )
- {
- return NULL;
- }
- /* for channels == 1 or 2, I do not step forward for R,G,B vales */
- if( channels < 3 )
- {
- chan_step = 0;
- }
- /* # channels = 1 or 3 have no alpha, 2 & 4 do have alpha */
- has_alpha = 1 - (channels & 1);
- /* get the RAM for the compressed image
- (16 bytes per 4x4 pixel block) */
- *out_size = ((width+3) >> 2) * ((height+3) >> 2) * 16;
- compressed = (unsigned char*)malloc( *out_size );
- /* go through each block */
- for( j = 0; j < height; j += 4 )
- {
- for( i = 0; i < width; i += 4 )
- {
- /* local variables, and my block counter */
- int idx = 0;
- int mx = 4, my = 4;
- if( j+4 >= height )
- {
- my = height - j;
- }
- if( i+4 >= width )
- {
- mx = width - i;
- }
- for( y = 0; y < my; ++y )
- {
- for( x = 0; x < mx; ++x )
- {
- ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels];
- ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step];
- ublock[idx++] = uncompressed[(j+y)*width*channels+(i+x)*channels+chan_step+chan_step];
- ublock[idx++] =
- has_alpha * uncompressed[(j+y)*width*channels+(i+x)*channels+channels-1]
- + (1-has_alpha)*255;
- }
- for( x = mx; x < 4; ++x )
- {
- ublock[idx++] = ublock[0];
- ublock[idx++] = ublock[1];
- ublock[idx++] = ublock[2];
- ublock[idx++] = ublock[3];
- }
- }
- for( y = my; y < 4; ++y )
- {
- for( x = 0; x < 4; ++x )
- {
- ublock[idx++] = ublock[0];
- ublock[idx++] = ublock[1];
- ublock[idx++] = ublock[2];
- ublock[idx++] = ublock[3];
- }
- }
- /* now compress the alpha block */
- compress_DDS_alpha_block( ublock, cblock );
- /* copy the data from the compressed alpha block into the main buffer */
- for( x = 0; x < 8; ++x )
- {
- compressed[cindex++] = cblock[x];
- }
- /* then compress the color block */
- ++block_count;
- compress_DDS_color_block( 4, ublock, cblock );
- /* copy the data from the compressed color block into the main buffer */
- for( x = 0; x < 8; ++x )
- {
- compressed[cindex++] = cblock[x];
- }
- }
- }
- return compressed;
- }
- /********* Helper Functions *********/
- int convert_bit_range( int c, int from_bits, int to_bits )
- {
- int b = (1 << (from_bits - 1)) + c * ((1 << to_bits) - 1);
- return (b + (b >> from_bits)) >> from_bits;
- }
- int rgb_to_565( int r, int g, int b )
- {
- return
- (convert_bit_range( r, 8, 5 ) << 11) |
- (convert_bit_range( g, 8, 6 ) << 05) |
- (convert_bit_range( b, 8, 5 ) << 00);
- }
- void rgb_888_from_565( unsigned int c, int *r, int *g, int *b )
- {
- *r = convert_bit_range( (c >> 11) & 31, 5, 8 );
- *g = convert_bit_range( (c >> 05) & 63, 6, 8 );
- *b = convert_bit_range( (c >> 00) & 31, 5, 8 );
- }
- void compute_color_line_STDEV(
- const unsigned char *const uncompressed,
- int channels,
- float point[3], float direction[3] )
- {
- const float inv_16 = 1.0f / 16.0f;
- int i;
- float sum_r = 0.0f, sum_g = 0.0f, sum_b = 0.0f;
- float sum_rr = 0.0f, sum_gg = 0.0f, sum_bb = 0.0f;
- float sum_rg = 0.0f, sum_rb = 0.0f, sum_gb = 0.0f;
- /* calculate all data needed for the covariance matrix
- ( to compare with _rygdxt code) */
- for( i = 0; i < 16*channels; i += channels )
- {
- sum_r += uncompressed[i+0];
- sum_rr += uncompressed[i+0] * uncompressed[i+0];
- sum_g += uncompressed[i+1];
- sum_gg += uncompressed[i+1] * uncompressed[i+1];
- sum_b += uncompressed[i+2];
- sum_bb += uncompressed[i+2] * uncompressed[i+2];
- sum_rg += uncompressed[i+0] * uncompressed[i+1];
- sum_rb += uncompressed[i+0] * uncompressed[i+2];
- sum_gb += uncompressed[i+1] * uncompressed[i+2];
- }
- /* convert the sums to averages */
- sum_r *= inv_16;
- sum_g *= inv_16;
- sum_b *= inv_16;
- /* and convert the squares to the squares of the value - avg_value */
- sum_rr -= 16.0f * sum_r * sum_r;
- sum_gg -= 16.0f * sum_g * sum_g;
- sum_bb -= 16.0f * sum_b * sum_b;
- sum_rg -= 16.0f * sum_r * sum_g;
- sum_rb -= 16.0f * sum_r * sum_b;
- sum_gb -= 16.0f * sum_g * sum_b;
- /* the point on the color line is the average */
- point[0] = sum_r;
- point[1] = sum_g;
- point[2] = sum_b;
- #if USE_COV_MAT
- /*
- The following idea was from ryg.
- (https://mollyrocket.com/forums/viewtopic.php?t=392)
- The method worked great (less RMSE than mine) most of
- the time, but had some issues handling some simple
- boundary cases, like full green next to full red,
- which would generate a covariance matrix like this:
- | 1 -1 0 |
- | -1 1 0 |
- | 0 0 0 |
- For a given starting vector, the power method can
- generate all zeros! So no starting with {1,1,1}
- as I was doing! This kind of error is still a
- slight posibillity, but will be very rare.
- */
- /* use the covariance matrix directly
- (1st iteration, don't use all 1.0 values!) */
- sum_r = 1.0f;
- sum_g = 2.718281828f;
- sum_b = 3.141592654f;
- direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
- direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
- direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
- /* 2nd iteration, use results from the 1st guy */
- sum_r = direction[0];
- sum_g = direction[1];
- sum_b = direction[2];
- direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
- direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
- direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
- /* 3rd iteration, use results from the 2nd guy */
- sum_r = direction[0];
- sum_g = direction[1];
- sum_b = direction[2];
- direction[0] = sum_r*sum_rr + sum_g*sum_rg + sum_b*sum_rb;
- direction[1] = sum_r*sum_rg + sum_g*sum_gg + sum_b*sum_gb;
- direction[2] = sum_r*sum_rb + sum_g*sum_gb + sum_b*sum_bb;
- #else
- /* use my standard deviation method
- (very robust, a tiny bit slower and less accurate) */
- direction[0] = sqrt( sum_rr );
- direction[1] = sqrt( sum_gg );
- direction[2] = sqrt( sum_bb );
- /* which has a greater component */
- if( sum_gg > sum_rr )
- {
- /* green has greater component, so base the other signs off of green */
- if( sum_rg < 0.0f )
- {
- direction[0] = -direction[0];
- }
- if( sum_gb < 0.0f )
- {
- direction[2] = -direction[2];
- }
- } else
- {
- /* red has a greater component */
- if( sum_rg < 0.0f )
- {
- direction[1] = -direction[1];
- }
- if( sum_rb < 0.0f )
- {
- direction[2] = -direction[2];
- }
- }
- #endif
- }
- void LSE_master_colors_max_min(
- int *cmax, int *cmin,
- int channels,
- const unsigned char *const uncompressed )
- {
- int i, j;
- /* the master colors */
- int c0[3], c1[3];
- /* used for fitting the line */
- float sum_x[] = { 0.0f, 0.0f, 0.0f };
- float sum_x2[] = { 0.0f, 0.0f, 0.0f };
- float dot_max = 1.0f, dot_min = -1.0f;
- float vec_len2 = 0.0f;
- float dot;
- /* error check */
- if( (channels < 3) || (channels > 4) )
- {
- return;
- }
- compute_color_line_STDEV( uncompressed, channels, sum_x, sum_x2 );
- vec_len2 = 1.0f / ( 0.00001f +
- sum_x2[0]*sum_x2[0] + sum_x2[1]*sum_x2[1] + sum_x2[2]*sum_x2[2] );
- /* finding the max and min vector values */
- dot_max =
- (
- sum_x2[0] * uncompressed[0] +
- sum_x2[1] * uncompressed[1] +
- sum_x2[2] * uncompressed[2]
- );
- dot_min = dot_max;
- for( i = 1; i < 16; ++i )
- {
- dot =
- (
- sum_x2[0] * uncompressed[i*channels+0] +
- sum_x2[1] * uncompressed[i*channels+1] +
- sum_x2[2] * uncompressed[i*channels+2]
- );
- if( dot < dot_min )
- {
- dot_min = dot;
- } else if( dot > dot_max )
- {
- dot_max = dot;
- }
- }
- /* and the offset (from the average location) */
- dot = sum_x2[0]*sum_x[0] + sum_x2[1]*sum_x[1] + sum_x2[2]*sum_x[2];
- dot_min -= dot;
- dot_max -= dot;
- /* post multiply by the scaling factor */
- dot_min *= vec_len2;
- dot_max *= vec_len2;
- /* OK, build the master colors */
- for( i = 0; i < 3; ++i )
- {
- /* color 0 */
- c0[i] = (int)(0.5f + sum_x[i] + dot_max * sum_x2[i]);
- if( c0[i] < 0 )
- {
- c0[i] = 0;
- } else if( c0[i] > 255 )
- {
- c0[i] = 255;
- }
- /* color 1 */
- c1[i] = (int)(0.5f + sum_x[i] + dot_min * sum_x2[i]);
- if( c1[i] < 0 )
- {
- c1[i] = 0;
- } else if( c1[i] > 255 )
- {
- c1[i] = 255;
- }
- }
- /* down_sample (with rounding?) */
- i = rgb_to_565( c0[0], c0[1], c0[2] );
- j = rgb_to_565( c1[0], c1[1], c1[2] );
- if( i > j )
- {
- *cmax = i;
- *cmin = j;
- } else
- {
- *cmax = j;
- *cmin = i;
- }
- }
- void
- compress_DDS_color_block
- (
- int channels,
- const unsigned char *const uncompressed,
- unsigned char compressed[8]
- )
- {
- /* variables */
- int i;
- int next_bit;
- int enc_c0, enc_c1;
- int c0[4], c1[4];
- float color_line[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- float vec_len2 = 0.0f, dot_offset = 0.0f;
- /* stupid order */
- int swizzle4[] = { 0, 2, 3, 1 };
- /* get the master colors */
- LSE_master_colors_max_min( &enc_c0, &enc_c1, channels, uncompressed );
- /* store the 565 color 0 and color 1 */
- compressed[0] = (enc_c0 >> 0) & 255;
- compressed[1] = (enc_c0 >> 8) & 255;
- compressed[2] = (enc_c1 >> 0) & 255;
- compressed[3] = (enc_c1 >> 8) & 255;
- /* zero out the compressed data */
- compressed[4] = 0;
- compressed[5] = 0;
- compressed[6] = 0;
- compressed[7] = 0;
- /* reconstitute the master color vectors */
- rgb_888_from_565( enc_c0, &c0[0], &c0[1], &c0[2] );
- rgb_888_from_565( enc_c1, &c1[0], &c1[1], &c1[2] );
- /* the new vector */
- vec_len2 = 0.0f;
- for( i = 0; i < 3; ++i )
- {
- color_line[i] = (float)(c1[i] - c0[i]);
- vec_len2 += color_line[i] * color_line[i];
- }
- if( vec_len2 > 0.0f )
- {
- vec_len2 = 1.0f / vec_len2;
- }
- /* pre-proform the scaling */
- color_line[0] *= vec_len2;
- color_line[1] *= vec_len2;
- color_line[2] *= vec_len2;
- /* compute the offset (constant) portion of the dot product */
- dot_offset = color_line[0]*c0[0] + color_line[1]*c0[1] + color_line[2]*c0[2];
- /* store the rest of the bits */
- next_bit = 8*4;
- for( i = 0; i < 16; ++i )
- {
- /* find the dot product of this color, to place it on the line
- (should be [-1,1]) */
- int next_value = 0;
- float dot_product =
- color_line[0] * uncompressed[i*channels+0] +
- color_line[1] * uncompressed[i*channels+1] +
- color_line[2] * uncompressed[i*channels+2] -
- dot_offset;
- /* map to [0,3] */
- next_value = (int)( dot_product * 3.0f + 0.5f );
- if( next_value > 3 )
- {
- next_value = 3;
- } else if( next_value < 0 )
- {
- next_value = 0;
- }
- /* OK, store this value */
- compressed[next_bit >> 3] |= swizzle4[ next_value ] << (next_bit & 7);
- next_bit += 2;
- }
- /* done compressing to DXT1 */
- }
- void
- compress_DDS_alpha_block
- (
- const unsigned char *const uncompressed,
- unsigned char compressed[8]
- )
- {
- /* variables */
- int i;
- int next_bit;
- int a0, a1;
- float scale_me;
- /* stupid order */
- int swizzle8[] = { 1, 7, 6, 5, 4, 3, 2, 0 };
- /* get the alpha limits (a0 > a1) */
- a0 = a1 = uncompressed[3];
- for( i = 4+3; i < 16*4; i += 4 )
- {
- if( uncompressed[i] > a0 )
- {
- a0 = uncompressed[i];
- } else if( uncompressed[i] < a1 )
- {
- a1 = uncompressed[i];
- }
- }
- /* store those limits, and zero the rest of the compressed dataset */
- compressed[0] = a0;
- compressed[1] = a1;
- /* zero out the compressed data */
- compressed[2] = 0;
- compressed[3] = 0;
- compressed[4] = 0;
- compressed[5] = 0;
- compressed[6] = 0;
- compressed[7] = 0;
- /* store the all of the alpha values */
- next_bit = 8*2;
- scale_me = 7.9999f / (a0 - a1);
- for( i = 3; i < 16*4; i += 4 )
- {
- /* convert this alpha value to a 3 bit number */
- int svalue;
- int value = (int)((uncompressed[i] - a1) * scale_me);
- svalue = swizzle8[ value&7 ];
- /* OK, store this value, start with the 1st byte */
- compressed[next_bit >> 3] |= svalue << (next_bit & 7);
- if( (next_bit & 7) > 5 )
- {
- /* spans 2 bytes, fill in the start of the 2nd byte */
- compressed[1 + (next_bit >> 3)] |= svalue >> (8 - (next_bit & 7) );
- }
- next_bit += 3;
- }
- /* done compressing to DXT1 */
- }
|