123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- #include <stdlib.h>
- #include <string.h>
- #include "unicode_to_pdf.h"
- #include "utf8_m.h"
- #define ASCII_BYTE(x) (x > 9 ? x - 10 + 'A' : x + '0')
- unsigned short
- to16(const char *a)
- {
- unsigned short val;
- switch (utf8_octet_count(*a)) {
- case 4:
- val = ((a[0] & 0x7) << 18) + ((a[1]) << 12) +
- ((a[2] & 0x3f) << 6) + ((a[3] & 0x3f));
- break;
- case 3:
- val = ((a[0] & 0xf) << 12) + ((a[1] & 0x3f) << 6) +
- ((a[2] & 0x3f));
- break;
- case 2:
- val = ((a[0] & 0x1f) << 6) + ((a[1] & 0x3f));
- break;
- case 1:
- val = *a;
- break;
- default:
- val = 0u;
- }
- return val;
- }
- unsigned short
- asciibyte(char c)
- {
- char hihalf = (c & 0xf0) >> 4;
- char lohalf = c & 0xf;
- hihalf = ASCII_BYTE(hihalf);
- lohalf = ASCII_BYTE(lohalf);
- return hihalf | (lohalf << 8);
- }
- enum conversion_status
- convert_to_pdf16(const char **curr, int *in_bytes_count, int *bytes_used,
- write_callback write_fn, void *userptr)
- {
- /* Convert up to 512 of octets in one turn. */
- char reprbuf[512 * 4];
- union {
- unsigned short utf16code;
- struct _tag_u {
- char u_lo;
- char u_hi;
- } s_u_code;
- } u_code;
- int s_count;
- char *it = reprbuf;
- const char *r_it = reprbuf + sizeof(reprbuf) - 1;
- int inbuf_count = 0;
- while (*in_bytes_count > 0) {
- s_count = utf8_octet_count(**curr);
- u_code.utf16code = to16(*curr);
- if (u_code.utf16code == 0u)
- return CONV_STATUS_ERROR;
- ((unsigned short *)it)[1] = asciibyte(u_code.s_u_code.u_lo);
- ((unsigned short *)it)[0] = asciibyte(u_code.s_u_code.u_hi);
- it += sizeof(unsigned short) * 2;
- if (it > r_it) {
- write_fn(reprbuf, sizeof(reprbuf), userptr);
- return CONV_STATUS_MORE_DATA;
- }
- inbuf_count += sizeof(unsigned short) * 2;
- *bytes_used += sizeof(unsigned short) * 2;
- *in_bytes_count -= s_count;
- *curr += s_count;
- }
- write_fn(reprbuf, inbuf_count, userptr);
- return CONV_STATUS_DONE;
- }
- int
- convert_to16(const char *str, unsigned short *charpool,
- unsigned poolcount, unsigned short **opt_out)
- {
- const char *cptr;
- unsigned u8count = utf8_symcount(str);
- unsigned short *target = charpool;
- if (poolcount < (u8count + 1)) {
- *opt_out = calloc(u8count, sizeof(unsigned short) + 1);
- target = *opt_out;
- } else {
- *opt_out = NULL;
- }
- target[u8count] = 0u;
- for (cptr = str; *cptr; cptr += utf8_octet_count(*cptr), ++ target) {
- if ((*target = to16(cptr)) == 0u) {
- if (*opt_out)
- free(*opt_out);
- return -1;
- }
- }
- return 0;
- }
|