dxt.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. /* libs/opengles/dxt.cpp
  2. **
  3. ** Copyright 2007, The Android Open Source Project
  4. **
  5. ** Licensed under the Apache License, Version 2.0 (the "License");
  6. ** you may not use this file except in compliance with the License.
  7. ** You may obtain a copy of the License at
  8. **
  9. ** http://www.apache.org/licenses/LICENSE-2.0
  10. **
  11. ** Unless required by applicable law or agreed to in writing, software
  12. ** distributed under the License is distributed on an "AS IS" BASIS,
  13. ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. ** See the License for the specific language governing permissions and
  15. ** limitations under the License.
  16. */
  17. #define TIMING 0
  18. #if TIMING
  19. #include <sys/time.h> // for optimization timing
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #endif
  23. #include <GLES/gl.h>
  24. #include <utils/Endian.h>
  25. #include "context.h"
  26. #define TIMING 0
  27. namespace android {
  28. static uint8_t avg23tab[64*64];
  29. static volatile int tables_initialized = 0;
  30. // Definitions below are equivalent to these over the valid range of arguments
  31. // #define div5(x) ((x)/5)
  32. // #define div7(x) ((x)/7)
  33. // Use fixed-point to divide by 5 and 7
  34. // 3277 = 2^14/5 + 1
  35. // 2341 = 2^14/7 + 1
  36. #define div5(x) (((x)*3277) >> 14)
  37. #define div7(x) (((x)*2341) >> 14)
  38. // Table with entry [a << 6 | b] = (2*a + b)/3 for 0 <= a,b < 64
  39. #define avg23(x0,x1) avg23tab[((x0) << 6) | (x1)]
  40. // Extract 5/6/5 RGB
  41. #define red(x) (((x) >> 11) & 0x1f)
  42. #define green(x) (((x) >> 5) & 0x3f)
  43. #define blue(x) ( (x) & 0x1f)
  44. /*
  45. * Convert 5/6/5 RGB (as 3 ints) to 8/8/8
  46. *
  47. * Operation count: 8 <<, 0 &, 5 |
  48. */
  49. inline static int rgb565SepTo888(int r, int g, int b)
  50. {
  51. return ((((r << 3) | (r >> 2)) << 16) |
  52. (((g << 2) | (g >> 4)) << 8) |
  53. ((b << 3) | (b >> 2)));
  54. }
  55. /*
  56. * Convert 5/6/5 RGB (as a single 16-bit word) to 8/8/8
  57. *
  58. * r4r3r2r1 r0g5g4g3 g2g1g0b4 b3b2b1b0 rgb
  59. * r4r3r2 r1r0g5g4 g3g2g1g0 b4b3b2b1 b0 0 0 0 rgb << 3
  60. * r4r3r2r1 r0r4r3r2 g5g4g3g2 g1g0g5g4 b4b3b2b1 b0b4b3b2 desired result
  61. *
  62. * Construct the 24-bit RGB word as:
  63. *
  64. * r4r3r2r1 r0------ -------- -------- -------- -------- (rgb << 8) & 0xf80000
  65. * r4r3r2 -------- -------- -------- -------- (rgb << 3) & 0x070000
  66. * g5g4g3g2 g1g0---- -------- -------- (rgb << 5) & 0x00fc00
  67. * g5g4 -------- -------- (rgb >> 1) & 0x000300
  68. * b4b3b2b1 b0------ (rgb << 3) & 0x0000f8
  69. * b4b3b2 (rgb >> 2) & 0x000007
  70. *
  71. * Operation count: 5 <<, 6 &, 5 | (n.b. rgb >> 3 is used twice)
  72. */
  73. inline static int rgb565To888(int rgb)
  74. {
  75. int rgb3 = rgb >> 3;
  76. return (((rgb << 8) & 0xf80000) |
  77. ( rgb3 & 0x070000) |
  78. ((rgb << 5) & 0x00fc00) |
  79. ((rgb >> 1) & 0x000300) |
  80. ( rgb3 & 0x0000f8) |
  81. ((rgb >> 2) & 0x000007));
  82. }
  83. #if __BYTE_ORDER == __BIG_ENDIAN
  84. static uint32_t swap(uint32_t x) {
  85. int b0 = (x >> 24) & 0xff;
  86. int b1 = (x >> 16) & 0xff;
  87. int b2 = (x >> 8) & 0xff;
  88. int b3 = (x ) & 0xff;
  89. return (uint32_t)((b3 << 24) | (b2 << 16) | (b1 << 8) | b0);
  90. }
  91. #endif
  92. static void
  93. init_tables()
  94. {
  95. if (tables_initialized) {
  96. return;
  97. }
  98. for (int i = 0; i < 64; i++) {
  99. for (int j = 0; j < 64; j++) {
  100. int avg = (2*i + j)/3;
  101. avg23tab[(i << 6) | j] = avg;
  102. }
  103. }
  104. asm volatile ("" : : : "memory");
  105. tables_initialized = 1;
  106. }
  107. /*
  108. * Utility to scan a DXT1 compressed texture to determine whether it
  109. * contains a transparent pixel (color0 < color1, code == 3). This
  110. * may be useful if the application lacks information as to whether
  111. * the true format is GL_COMPRESSED_RGB_S3TC_DXT1_EXT or
  112. * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT.
  113. */
  114. bool
  115. DXT1HasAlpha(const GLvoid *data, int width, int height) {
  116. #if TIMING
  117. struct timeval start_t, end_t;
  118. struct timezone tz;
  119. gettimeofday(&start_t, &tz);
  120. #endif
  121. bool hasAlpha = false;
  122. int xblocks = (width + 3)/4;
  123. int yblocks = (height + 3)/4;
  124. int numblocks = xblocks*yblocks;
  125. uint32_t const *d32 = (uint32_t *)data;
  126. for (int b = 0; b < numblocks; b++) {
  127. uint32_t colors = *d32++;
  128. #if __BYTE_ORDER == __BIG_ENDIAN
  129. colors = swap(colors);
  130. #endif
  131. uint16_t color0 = colors & 0xffff;
  132. uint16_t color1 = colors >> 16;
  133. if (color0 < color1) {
  134. // There's no need to endian-swap within 'bits'
  135. // since we don't care which pixel is the transparent one
  136. uint32_t bits = *d32++;
  137. // Detect if any (odd, even) pair of bits are '11'
  138. // bits: b31 b30 b29 ... b3 b2 b1 b0
  139. // bits >> 1: b31 b31 b30 ... b4 b3 b2 b1
  140. // &: b31 (b31 & b30) (b29 & b28) ... (b2 & b1) (b1 & b0)
  141. // & 0x55..: 0 (b31 & b30) 0 ... 0 (b1 & b0)
  142. if (((bits & (bits >> 1)) & 0x55555555) != 0) {
  143. hasAlpha = true;
  144. goto done;
  145. }
  146. } else {
  147. // Skip 4 bytes
  148. ++d32;
  149. }
  150. }
  151. done:
  152. #if TIMING
  153. gettimeofday(&end_t, &tz);
  154. long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
  155. (end_t.tv_usec - start_t.tv_usec);
  156. printf("Scanned w=%d h=%d in %ld usec\n", width, height, usec);
  157. #endif
  158. return hasAlpha;
  159. }
  160. static void
  161. decodeDXT1(const GLvoid *data, int width, int height,
  162. void *surface, int stride,
  163. bool hasAlpha)
  164. {
  165. init_tables();
  166. uint32_t const *d32 = (uint32_t *)data;
  167. // Color table for the current block
  168. uint16_t c[4];
  169. c[0] = c[1] = c[2] = c[3] = 0;
  170. // Specified colors from the previous block
  171. uint16_t prev_color0 = 0x0000;
  172. uint16_t prev_color1 = 0x0000;
  173. uint16_t* rowPtr = (uint16_t*)surface;
  174. for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
  175. uint16_t *blockPtr = rowPtr;
  176. for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
  177. uint32_t colors = *d32++;
  178. uint32_t bits = *d32++;
  179. #if __BYTE_ORDER == __BIG_ENDIAN
  180. colors = swap(colors);
  181. bits = swap(bits);
  182. #endif
  183. // Raw colors
  184. uint16_t color0 = colors & 0xffff;
  185. uint16_t color1 = colors >> 16;
  186. // If the new block has the same base colors as the
  187. // previous one, we don't need to recompute the color
  188. // table c[]
  189. if (color0 != prev_color0 || color1 != prev_color1) {
  190. // Store raw colors for comparison with next block
  191. prev_color0 = color0;
  192. prev_color1 = color1;
  193. int r0 = red(color0);
  194. int g0 = green(color0);
  195. int b0 = blue(color0);
  196. int r1 = red(color1);
  197. int g1 = green(color1);
  198. int b1 = blue(color1);
  199. if (hasAlpha) {
  200. c[0] = (r0 << 11) | ((g0 >> 1) << 6) | (b0 << 1) | 0x1;
  201. c[1] = (r1 << 11) | ((g1 >> 1) << 6) | (b1 << 1) | 0x1;
  202. } else {
  203. c[0] = color0;
  204. c[1] = color1;
  205. }
  206. int r2, g2, b2, r3, g3, b3, a3;
  207. int bbits = bits >> 1;
  208. bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
  209. bool has3 = ((bbits & bits) & 0x55555555) != 0;
  210. if (has2 || has3) {
  211. if (color0 > color1) {
  212. r2 = avg23(r0, r1);
  213. g2 = avg23(g0, g1);
  214. b2 = avg23(b0, b1);
  215. r3 = avg23(r1, r0);
  216. g3 = avg23(g1, g0);
  217. b3 = avg23(b1, b0);
  218. a3 = 1;
  219. } else {
  220. r2 = (r0 + r1) >> 1;
  221. g2 = (g0 + g1) >> 1;
  222. b2 = (b0 + b1) >> 1;
  223. r3 = g3 = b3 = a3 = 0;
  224. }
  225. if (hasAlpha) {
  226. c[2] = (r2 << 11) | ((g2 >> 1) << 6) |
  227. (b2 << 1) | 0x1;
  228. c[3] = (r3 << 11) | ((g3 >> 1) << 6) |
  229. (b3 << 1) | a3;
  230. } else {
  231. c[2] = (r2 << 11) | (g2 << 5) | b2;
  232. c[3] = (r3 << 11) | (g3 << 5) | b3;
  233. }
  234. }
  235. }
  236. uint16_t* blockRowPtr = blockPtr;
  237. for (int y = 0; y < 4; y++, blockRowPtr += stride) {
  238. // Don't process rows past the botom
  239. if (base_y + y >= height) {
  240. break;
  241. }
  242. int w = min(width - base_x, 4);
  243. for (int x = 0; x < w; x++) {
  244. int code = bits & 0x3;
  245. bits >>= 2;
  246. blockRowPtr[x] = c[code];
  247. }
  248. }
  249. }
  250. }
  251. }
  252. // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
  253. static void
  254. decodeDXT3(const GLvoid *data, int width, int height,
  255. void *surface, int stride)
  256. {
  257. init_tables();
  258. uint32_t const *d32 = (uint32_t *)data;
  259. // Specified colors from the previous block
  260. uint16_t prev_color0 = 0x0000;
  261. uint16_t prev_color1 = 0x0000;
  262. // Color table for the current block
  263. uint32_t c[4];
  264. c[0] = c[1] = c[2] = c[3] = 0;
  265. uint32_t* rowPtr = (uint32_t*)surface;
  266. for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
  267. uint32_t *blockPtr = rowPtr;
  268. for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
  269. #if __BYTE_ORDER == __BIG_ENDIAN
  270. uint32_t alphahi = *d32++;
  271. uint32_t alphalo = *d32++;
  272. alphahi = swap(alphahi);
  273. alphalo = swap(alphalo);
  274. #else
  275. uint32_t alphalo = *d32++;
  276. uint32_t alphahi = *d32++;
  277. #endif
  278. uint32_t colors = *d32++;
  279. uint32_t bits = *d32++;
  280. #if __BYTE_ORDER == __BIG_ENDIAN
  281. colors = swap(colors);
  282. bits = swap(bits);
  283. #endif
  284. uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
  285. // Raw colors
  286. uint16_t color0 = colors & 0xffff;
  287. uint16_t color1 = colors >> 16;
  288. // If the new block has the same base colors as the
  289. // previous one, we don't need to recompute the color
  290. // table c[]
  291. if (color0 != prev_color0 || color1 != prev_color1) {
  292. // Store raw colors for comparison with next block
  293. prev_color0 = color0;
  294. prev_color1 = color1;
  295. int bbits = bits >> 1;
  296. bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
  297. bool has3 = ((bbits & bits) & 0x55555555) != 0;
  298. if (has2 || has3) {
  299. int r0 = red(color0);
  300. int g0 = green(color0);
  301. int b0 = blue(color0);
  302. int r1 = red(color1);
  303. int g1 = green(color1);
  304. int b1 = blue(color1);
  305. int r2 = avg23(r0, r1);
  306. int g2 = avg23(g0, g1);
  307. int b2 = avg23(b0, b1);
  308. int r3 = avg23(r1, r0);
  309. int g3 = avg23(g1, g0);
  310. int b3 = avg23(b1, b0);
  311. c[0] = rgb565SepTo888(r0, g0, b0);
  312. c[1] = rgb565SepTo888(r1, g1, b1);
  313. c[2] = rgb565SepTo888(r2, g2, b2);
  314. c[3] = rgb565SepTo888(r3, g3, b3);
  315. } else {
  316. // Convert to 8 bits
  317. c[0] = rgb565To888(color0);
  318. c[1] = rgb565To888(color1);
  319. }
  320. }
  321. uint32_t* blockRowPtr = blockPtr;
  322. for (int y = 0; y < 4; y++, blockRowPtr += stride) {
  323. // Don't process rows past the botom
  324. if (base_y + y >= height) {
  325. break;
  326. }
  327. int w = min(width - base_x, 4);
  328. for (int x = 0; x < w; x++) {
  329. int a = alpha & 0xf;
  330. alpha >>= 4;
  331. int code = bits & 0x3;
  332. bits >>= 2;
  333. blockRowPtr[x] = c[code] | (a << 28) | (a << 24);
  334. }
  335. }
  336. }
  337. }
  338. }
  339. // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
  340. static void
  341. decodeDXT5(const GLvoid *data, int width, int height,
  342. void *surface, int stride)
  343. {
  344. init_tables();
  345. uint32_t const *d32 = (uint32_t *)data;
  346. // Specified alphas from the previous block
  347. uint8_t prev_alpha0 = 0x00;
  348. uint8_t prev_alpha1 = 0x00;
  349. // Specified colors from the previous block
  350. uint16_t prev_color0 = 0x0000;
  351. uint16_t prev_color1 = 0x0000;
  352. // Alpha table for the current block
  353. uint8_t a[8];
  354. a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = 0;
  355. // Color table for the current block
  356. uint32_t c[4];
  357. c[0] = c[1] = c[2] = c[3] = 0;
  358. int good_a5 = 0;
  359. int bad_a5 = 0;
  360. int good_a6 = 0;
  361. int bad_a6 = 0;
  362. int good_a7 = 0;
  363. int bad_a7 = 0;
  364. uint32_t* rowPtr = (uint32_t*)surface;
  365. for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
  366. uint32_t *blockPtr = rowPtr;
  367. for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
  368. #if __BYTE_ORDER == __BIG_ENDIAN
  369. uint32_t alphahi = *d32++;
  370. uint32_t alphalo = *d32++;
  371. alphahi = swap(alphahi);
  372. alphalo = swap(alphalo);
  373. #else
  374. uint32_t alphalo = *d32++;
  375. uint32_t alphahi = *d32++;
  376. #endif
  377. uint32_t colors = *d32++;
  378. uint32_t bits = *d32++;
  379. #if __BYTE_ORDER == __BIG_ENDIANx
  380. colors = swap(colors);
  381. bits = swap(bits);
  382. #endif
  383. uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
  384. uint64_t alpha0 = alpha & 0xff;
  385. alpha >>= 8;
  386. uint64_t alpha1 = alpha & 0xff;
  387. alpha >>= 8;
  388. if (alpha0 != prev_alpha0 || alpha1 != prev_alpha1) {
  389. prev_alpha0 = alpha0;
  390. prev_alpha1 = alpha1;
  391. a[0] = alpha0;
  392. a[1] = alpha1;
  393. int a01 = alpha0 + alpha1 - 1;
  394. if (alpha0 > alpha1) {
  395. a[2] = div7(6*alpha0 + alpha1);
  396. a[4] = div7(4*alpha0 + 3*alpha1);
  397. a[6] = div7(2*alpha0 + 5*alpha1);
  398. // Use symmetry to derive half of the values
  399. // A few values will be off by 1 (~.5%)
  400. // Alternate which values are computed directly
  401. // and which are derived to try to reduce bias
  402. a[3] = a01 - a[6];
  403. a[5] = a01 - a[4];
  404. a[7] = a01 - a[2];
  405. } else {
  406. a[2] = div5(4*alpha0 + alpha1);
  407. a[4] = div5(2*alpha0 + 3*alpha1);
  408. a[3] = a01 - a[4];
  409. a[5] = a01 - a[2];
  410. a[6] = 0x00;
  411. a[7] = 0xff;
  412. }
  413. }
  414. // Raw colors
  415. uint16_t color0 = colors & 0xffff;
  416. uint16_t color1 = colors >> 16;
  417. // If the new block has the same base colors as the
  418. // previous one, we don't need to recompute the color
  419. // table c[]
  420. if (color0 != prev_color0 || color1 != prev_color1) {
  421. // Store raw colors for comparison with next block
  422. prev_color0 = color0;
  423. prev_color1 = color1;
  424. int bbits = bits >> 1;
  425. bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
  426. bool has3 = ((bbits & bits) & 0x55555555) != 0;
  427. if (has2 || has3) {
  428. int r0 = red(color0);
  429. int g0 = green(color0);
  430. int b0 = blue(color0);
  431. int r1 = red(color1);
  432. int g1 = green(color1);
  433. int b1 = blue(color1);
  434. int r2 = avg23(r0, r1);
  435. int g2 = avg23(g0, g1);
  436. int b2 = avg23(b0, b1);
  437. int r3 = avg23(r1, r0);
  438. int g3 = avg23(g1, g0);
  439. int b3 = avg23(b1, b0);
  440. c[0] = rgb565SepTo888(r0, g0, b0);
  441. c[1] = rgb565SepTo888(r1, g1, b1);
  442. c[2] = rgb565SepTo888(r2, g2, b2);
  443. c[3] = rgb565SepTo888(r3, g3, b3);
  444. } else {
  445. // Convert to 8 bits
  446. c[0] = rgb565To888(color0);
  447. c[1] = rgb565To888(color1);
  448. }
  449. }
  450. uint32_t* blockRowPtr = blockPtr;
  451. for (int y = 0; y < 4; y++, blockRowPtr += stride) {
  452. // Don't process rows past the botom
  453. if (base_y + y >= height) {
  454. break;
  455. }
  456. int w = min(width - base_x, 4);
  457. for (int x = 0; x < w; x++) {
  458. int acode = alpha & 0x7;
  459. alpha >>= 3;
  460. int code = bits & 0x3;
  461. bits >>= 2;
  462. blockRowPtr[x] = c[code] | (a[acode] << 24);
  463. }
  464. }
  465. }
  466. }
  467. }
  468. /*
  469. * Decode a DXT-compressed texture into memory. DXT textures consist of
  470. * a series of 4x4 pixel blocks in left-to-right, top-down order.
  471. * The number of blocks is given by ceil(width/4)*ceil(height/4).
  472. *
  473. * 'data' points to the texture data. 'width' and 'height' indicate the
  474. * dimensions of the texture. We assume width and height are >= 0 but
  475. * do not require them to be powers of 2 or divisible by any factor.
  476. *
  477. * The output is written to 'surface' with each scanline separated by
  478. * 'stride' 2- or 4-byte words.
  479. *
  480. * 'format' indicates the type of compression and must be one of the following:
  481. *
  482. * GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
  483. * The output is written as 5/6/5 opaque RGB (16 bit words).
  484. * 8 bytes are read from 'data' for each block.
  485. *
  486. * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT
  487. * The output is written as 5/5/5/1 RGBA (16 bit words)
  488. * 8 bytes are read from 'data' for each block.
  489. *
  490. * GL_COMPRESSED_RGBA_S3TC_DXT3_EXT
  491. * GL_COMPRESSED_RGBA_S3TC_DXT5_EXT
  492. * The output is written as 8/8/8/8 ARGB (32 bit words)
  493. * 16 bytes are read from 'data' for each block.
  494. */
  495. void
  496. decodeDXT(const GLvoid *data, int width, int height,
  497. void *surface, int stride, int format)
  498. {
  499. #if TIMING
  500. struct timeval start_t, end_t;
  501. struct timezone tz;
  502. gettimeofday(&start_t, &tz);
  503. #endif
  504. switch (format) {
  505. case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
  506. decodeDXT1(data, width, height, surface, stride, false);
  507. break;
  508. case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
  509. decodeDXT1(data, width, height, surface, stride, true);
  510. break;
  511. case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
  512. decodeDXT3(data, width, height, surface, stride);
  513. break;
  514. case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
  515. decodeDXT5(data, width, height, surface, stride);
  516. break;
  517. }
  518. #if TIMING
  519. gettimeofday(&end_t, &tz);
  520. long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
  521. (end_t.tv_usec - start_t.tv_usec);
  522. printf("Loaded w=%d h=%d in %ld usec\n", width, height, usec);
  523. #endif
  524. }
  525. } // namespace android