unicode_to_pdf.c 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #include <stdlib.h>
  2. #include <string.h>
  3. #include "unicode_to_pdf.h"
  4. #include "utf8_m.h"
  5. #define ASCII_BYTE(x) (x > 9 ? x - 10 + 'A' : x + '0')
  6. unsigned short
  7. to16(const char *a)
  8. {
  9. unsigned short val;
  10. switch (utf8_octet_count(*a)) {
  11. case 4:
  12. val = ((a[0] & 0x7) << 18) + ((a[1]) << 12) +
  13. ((a[2] & 0x3f) << 6) + ((a[3] & 0x3f));
  14. break;
  15. case 3:
  16. val = ((a[0] & 0xf) << 12) + ((a[1] & 0x3f) << 6) +
  17. ((a[2] & 0x3f));
  18. break;
  19. case 2:
  20. val = ((a[0] & 0x1f) << 6) + ((a[1] & 0x3f));
  21. break;
  22. case 1:
  23. val = *a;
  24. break;
  25. default:
  26. val = 0u;
  27. }
  28. return val;
  29. }
  30. unsigned short
  31. asciibyte(char c)
  32. {
  33. char hihalf = (c & 0xf0) >> 4;
  34. char lohalf = c & 0xf;
  35. hihalf = ASCII_BYTE(hihalf);
  36. lohalf = ASCII_BYTE(lohalf);
  37. return hihalf | (lohalf << 8);
  38. }
  39. enum conversion_status
  40. convert_to_pdf16(const char **curr, int *in_bytes_count, int *bytes_used,
  41. write_callback write_fn, void *userptr)
  42. {
  43. /* Convert up to 512 of octets in one turn. */
  44. char reprbuf[512 * 4];
  45. union {
  46. unsigned short utf16code;
  47. struct _tag_u {
  48. char u_lo;
  49. char u_hi;
  50. } s_u_code;
  51. } u_code;
  52. int s_count;
  53. char *it = reprbuf;
  54. const char *r_it = reprbuf + sizeof(reprbuf) - 1;
  55. int inbuf_count = 0;
  56. while (*in_bytes_count > 0) {
  57. s_count = utf8_octet_count(**curr);
  58. u_code.utf16code = to16(*curr);
  59. if (u_code.utf16code == 0u)
  60. return CONV_STATUS_ERROR;
  61. ((unsigned short *)it)[1] = asciibyte(u_code.s_u_code.u_lo);
  62. ((unsigned short *)it)[0] = asciibyte(u_code.s_u_code.u_hi);
  63. it += sizeof(unsigned short) * 2;
  64. if (it > r_it) {
  65. write_fn(reprbuf, sizeof(reprbuf), userptr);
  66. return CONV_STATUS_MORE_DATA;
  67. }
  68. inbuf_count += sizeof(unsigned short) * 2;
  69. *bytes_used += sizeof(unsigned short) * 2;
  70. *in_bytes_count -= s_count;
  71. *curr += s_count;
  72. }
  73. write_fn(reprbuf, inbuf_count, userptr);
  74. return CONV_STATUS_DONE;
  75. }
  76. int
  77. convert_to16(const char *str, unsigned short *charpool,
  78. unsigned poolcount, unsigned short **opt_out)
  79. {
  80. const char *cptr;
  81. unsigned u8count = utf8_symcount(str);
  82. unsigned short *target = charpool;
  83. if (poolcount < (u8count + 1)) {
  84. *opt_out = calloc(u8count, sizeof(unsigned short) + 1);
  85. target = *opt_out;
  86. } else {
  87. *opt_out = NULL;
  88. }
  89. target[u8count] = 0u;
  90. for (cptr = str; *cptr; cptr += utf8_octet_count(*cptr), ++ target) {
  91. if ((*target = to16(cptr)) == 0u) {
  92. if (*opt_out)
  93. free(*opt_out);
  94. return -1;
  95. }
  96. }
  97. return 0;
  98. }