|
- From 273a133110838ee5702e7eb6409a853c598211b2 Mon Sep 17 00:00:00 2001
- From: Ken Sharp <ken.sharp@artifex.com>
- Date: Thu, 29 Sep 2016 17:35:05 +0100
- Subject: [PATCH] Remove (and re-implement) ConvertUTF.c
- Bug #697122 " embedded ConvertUTF.c is buggy and licensed incompatibly with GPL/APGL"
- Its not clear that this code is incompatible with GPL, nor do we think
- any 'bugginess' in the code affects us, since we are using a comparatively
- small part of the included code.
- Nevertheless its possible to remove the code, and re-implement the small
- part we actually need, and that is done here.
- Also removed the DSCEncodingToUnicode option which was insanely difficult
- to use, and incorrectly documented.
- Yhis shows one difference, 692486_-_heap_overflow_in_pdf_to_ucs2.pdf
- now correctly throws an error, because the PDF file contains document
- information (Application) which has an invalid UTF16-BE sequence.
- ---
- base/ConvertUTF.c | 539 -----------------------------------------
- base/ConvertUTF.h | 155 ------------
- base/lib.mak | 4 -
- devices/devs.mak | 5 +-
- devices/vector/gdevpdf.c | 16 +-
- devices/vector/gdevpdfb.h | 1 -
- devices/vector/gdevpdfe.c | 270 +++++++++++----------
- devices/vector/gdevpdfp.c | 1 -
- devices/vector/gdevpdfx.h | 17 +-
- windows/ghostscript.vcproj | 8 -
- windows/ghostscript_rt.vcxproj | 2 -
- 11 files changed, 155 insertions(+), 863 deletions(-)
- delete mode 100644 base/ConvertUTF.c
- delete mode 100644 base/ConvertUTF.h
- diff --git a/base/ConvertUTF.c b/base/ConvertUTF.c
- deleted file mode 100644
- index cb2e2de..0000000
- --- a/base/ConvertUTF.c
- +++ /dev/null
- @@ -1,539 +0,0 @@
- -/*
- - * Copyright 2001-2004 Unicode, Inc.
- - *
- - * Disclaimer
- - *
- - * This source code is provided as is by Unicode, Inc. No claims are
- - * made as to fitness for any particular purpose. No warranties of any
- - * kind are expressed or implied. The recipient agrees to determine
- - * applicability of information provided. If this file has been
- - * purchased on magnetic or optical media from Unicode, Inc., the
- - * sole remedy for any claim will be exchange of defective media
- - * within 90 days of receipt.
- - *
- - * Limitations on Rights to Redistribute This Code
- - *
- - * Unicode, Inc. hereby grants the right to freely use the information
- - * supplied in this file in the creation of products supporting the
- - * Unicode Standard, and to make copies of this file in any form
- - * for internal or external distribution as long as this notice
- - * remains attached.
- - */
- -
- -
- -/* ---------------------------------------------------------------------
- -
- - Conversions between UTF32, UTF-16, and UTF-8. Source code file.
- - Author: Mark E. Davis, 1994.
- - Rev History: Rick McGowan, fixes & updates May 2001.
- - Sept 2001: fixed const & error conditions per
- - mods suggested by S. Parent & A. Lillich.
- - June 2002: Tim Dodd added detection and handling of incomplete
- - source sequences, enhanced error detection, added casts
- - to eliminate compiler warnings.
- - July 2003: slight mods to back out aggressive FFFE detection.
- - Jan 2004: updated switches in from-UTF8 conversions.
- - Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
- -
- - See the header file "ConvertUTF.h" for complete documentation.
- -
- ------------------------------------------------------------------------- */
- -
- -#include "ConvertUTF.h"
- -#ifdef CVTUTF_DEBUG
- -#include <stdio.h>
- -#endif
- -
- -static const int halfShift = 10; /* used for shifting by 10 bits */
- -
- -static const UTF32 halfBase = 0x0010000UL;
- -static const UTF32 halfMask = 0x3FFUL;
- -
- -#define UNI_SUR_HIGH_START (UTF32)0xD800
- -#define UNI_SUR_HIGH_END (UTF32)0xDBFF
- -#define UNI_SUR_LOW_START (UTF32)0xDC00
- -#define UNI_SUR_LOW_END (UTF32)0xDFFF
- -#define false 0
- -#define true 1
- -
- -/* --------------------------------------------------------------------- */
- -
- -ConversionResult ConvertUTF32toUTF16 (
- - const UTF32** sourceStart, const UTF32* sourceEnd,
- - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
- - ConversionResult result = conversionOK;
- - const UTF32* source = *sourceStart;
- - UTF16* target = *targetStart;
- - while (source < sourceEnd) {
- - UTF32 ch;
- - if (target >= targetEnd) {
- - result = targetExhausted; break;
- - }
- - ch = *source++;
- - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
- - /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
- - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
- - if (flags == strictConversion) {
- - --source; /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - } else {
- - *target++ = UNI_REPLACEMENT_CHAR;
- - }
- - } else {
- - *target++ = (UTF16)ch; /* normal case */
- - }
- - } else if (ch > UNI_MAX_LEGAL_UTF32) {
- - if (flags == strictConversion) {
- - result = sourceIllegal;
- - } else {
- - *target++ = UNI_REPLACEMENT_CHAR;
- - }
- - } else {
- - /* target is a character in range 0xFFFF - 0x10FFFF. */
- - if (target + 1 >= targetEnd) {
- - --source; /* Back up source pointer! */
- - result = targetExhausted; break;
- - }
- - ch -= halfBase;
- - *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
- - *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
- - }
- - }
- - *sourceStart = source;
- - *targetStart = target;
- - return result;
- -}
- -
- -/* --------------------------------------------------------------------- */
- -
- -ConversionResult ConvertUTF16toUTF32 (
- - const UTF16** sourceStart, const UTF16* sourceEnd,
- - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
- - ConversionResult result = conversionOK;
- - const UTF16* source = *sourceStart;
- - UTF32* target = *targetStart;
- - UTF32 ch, ch2;
- - while (source < sourceEnd) {
- - const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
- - ch = *source++;
- - /* If we have a surrogate pair, convert to UTF32 first. */
- - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
- - /* If the 16 bits following the high surrogate are in the source buffer... */
- - if (source < sourceEnd) {
- - ch2 = *source;
- - /* If it's a low surrogate, convert to UTF32. */
- - if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
- - ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
- - + (ch2 - UNI_SUR_LOW_START) + halfBase;
- - ++source;
- - } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
- - --source; /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - }
- - } else { /* We don't have the 16 bits following the high surrogate. */
- - --source; /* return to the high surrogate */
- - result = sourceExhausted;
- - break;
- - }
- - } else if (flags == strictConversion) {
- - /* UTF-16 surrogate values are illegal in UTF-32 */
- - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
- - --source; /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - }
- - }
- - if (target >= targetEnd) {
- - source = oldSource; /* Back up source pointer! */
- - result = targetExhausted; break;
- - }
- - *target++ = ch;
- - }
- - *sourceStart = source;
- - *targetStart = target;
- -#ifdef CVTUTF_DEBUG
- -if (result == sourceIllegal) {
- - fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
- - fflush(stderr);
- -}
- -#endif
- - return result;
- -}
- -
- -/* --------------------------------------------------------------------- */
- -
- -/*
- - * Index into the table below with the first byte of a UTF-8 sequence to
- - * get the number of trailing bytes that are supposed to follow it.
- - * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
- - * left as-is for anyone who may want to do such conversion, which was
- - * allowed in earlier algorithms.
- - */
- -static const char trailingBytesForUTF8[256] = {
- - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
- -};
- -
- -/*
- - * Magic values subtracted from a buffer value during UTF8 conversion.
- - * This table contains as many values as there might be trailing bytes
- - * in a UTF-8 sequence.
- - */
- -static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
- - 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
- -
- -/*
- - * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
- - * into the first byte, depending on how many bytes follow. There are
- - * as many entries in this table as there are UTF-8 sequence types.
- - * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
- - * for *legal* UTF-8 will be 4 or fewer bytes total.
- - */
- -static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
- -
- -/* --------------------------------------------------------------------- */
- -
- -/* The interface converts a whole buffer to avoid function-call overhead.
- - * Constants have been gathered. Loops & conditionals have been removed as
- - * much as possible for efficiency, in favor of drop-through switches.
- - * (See "Note A" at the bottom of the file for equivalent code.)
- - * If your compiler supports it, the "isLegalUTF8" call can be turned
- - * into an inline function.
- - */
- -
- -/* --------------------------------------------------------------------- */
- -
- -ConversionResult ConvertUTF16toUTF8 (
- - const UTF16** sourceStart, const UTF16* sourceEnd,
- - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
- - ConversionResult result = conversionOK;
- - const UTF16* source = *sourceStart;
- - UTF8* target = *targetStart;
- - while (source < sourceEnd) {
- - UTF32 ch;
- - unsigned short bytesToWrite = 0;
- - const UTF32 byteMask = 0xBF;
- - const UTF32 byteMark = 0x80;
- - const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
- - ch = *source++;
- - /* If we have a surrogate pair, convert to UTF32 first. */
- - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
- - /* If the 16 bits following the high surrogate are in the source buffer... */
- - if (source < sourceEnd) {
- - UTF32 ch2 = *source;
- - /* If it's a low surrogate, convert to UTF32. */
- - if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
- - ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
- - + (ch2 - UNI_SUR_LOW_START) + halfBase;
- - ++source;
- - } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
- - --source; /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - }
- - } else { /* We don't have the 16 bits following the high surrogate. */
- - --source; /* return to the high surrogate */
- - result = sourceExhausted;
- - break;
- - }
- - } else if (flags == strictConversion) {
- - /* UTF-16 surrogate values are illegal in UTF-32 */
- - if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
- - --source; /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - }
- - }
- - /* Figure out how many bytes the result will require */
- - if (ch < (UTF32)0x80) { bytesToWrite = 1;
- - } else if (ch < (UTF32)0x800) { bytesToWrite = 2;
- - } else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
- - } else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
- - } else { bytesToWrite = 3;
- - ch = UNI_REPLACEMENT_CHAR;
- - }
- -
- - target += bytesToWrite;
- - if (target > targetEnd) {
- - source = oldSource; /* Back up source pointer! */
- - target -= bytesToWrite; result = targetExhausted; break;
- - }
- - switch (bytesToWrite) { /* note: everything falls through. */
- - case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
- - case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
- - case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
- - case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
- - }
- - target += bytesToWrite;
- - }
- - *sourceStart = source;
- - *targetStart = target;
- - return result;
- -}
- -
- -/* --------------------------------------------------------------------- */
- -
- -/*
- - * Utility routine to tell whether a sequence of bytes is legal UTF-8.
- - * This must be called with the length pre-determined by the first byte.
- - * If not calling this from ConvertUTF8to*, then the length can be set by:
- - * length = trailingBytesForUTF8[*source]+1;
- - * and the sequence is illegal right away if there aren't that many bytes
- - * available.
- - * If presented with a length > 4, this returns false. The Unicode
- - * definition of UTF-8 goes up to 4-byte sequences.
- - */
- -
- -static Boolean isLegalUTF8(const UTF8 *source, int length) {
- - UTF8 a;
- - const UTF8 *srcptr = source+length;
- - switch (length) {
- - default: return false;
- - /* Everything else falls through when "true"... */
- - case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
- - case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
- - case 2: if ((a = (*--srcptr)) > 0xBF) return false;
- -
- - switch (*source) {
- - /* no fall-through in this inner switch */
- - case 0xE0: if (a < 0xA0) return false; break;
- - case 0xED: if (a > 0x9F) return false; break;
- - case 0xF0: if (a < 0x90) return false; break;
- - case 0xF4: if (a > 0x8F) return false; break;
- - default: if (a < 0x80) return false;
- - }
- -
- - case 1: if (*source >= 0x80 && *source < 0xC2) return false;
- - }
- - if (*source > 0xF4) return false;
- - return true;
- -}
- -
- -/* --------------------------------------------------------------------- */
- -
- -/*
- - * Exported function to return whether a UTF-8 sequence is legal or not.
- - * This is not used here; it's just exported.
- - */
- -Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
- - int length = trailingBytesForUTF8[*source]+1;
- - if (source+length > sourceEnd) {
- - return false;
- - }
- - return isLegalUTF8(source, length);
- -}
- -
- -/* --------------------------------------------------------------------- */
- -
- -ConversionResult ConvertUTF8toUTF16 (
- - const UTF8** sourceStart, const UTF8* sourceEnd,
- - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
- - ConversionResult result = conversionOK;
- - const UTF8* source = *sourceStart;
- - UTF16* target = *targetStart;
- - while (source < sourceEnd) {
- - UTF32 ch = 0;
- - unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- - if (source + extraBytesToRead >= sourceEnd) {
- - result = sourceExhausted; break;
- - }
- - /* Do this check whether lenient or strict */
- - if (! isLegalUTF8(source, extraBytesToRead+1)) {
- - result = sourceIllegal;
- - break;
- - }
- - /*
- - * The cases all fall through. See "Note A" below.
- - */
- - switch (extraBytesToRead) {
- - case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
- - case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
- - case 3: ch += *source++; ch <<= 6;
- - case 2: ch += *source++; ch <<= 6;
- - case 1: ch += *source++; ch <<= 6;
- - case 0: ch += *source++;
- - }
- - ch -= offsetsFromUTF8[extraBytesToRead];
- -
- - if (target >= targetEnd) {
- - source -= (extraBytesToRead+1); /* Back up source pointer! */
- - result = targetExhausted; break;
- - }
- - if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
- - /* UTF-16 surrogate values are illegal in UTF-32 */
- - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
- - if (flags == strictConversion) {
- - source -= (extraBytesToRead+1); /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - } else {
- - *target++ = UNI_REPLACEMENT_CHAR;
- - }
- - } else {
- - *target++ = (UTF16)ch; /* normal case */
- - }
- - } else if (ch > UNI_MAX_UTF16) {
- - if (flags == strictConversion) {
- - result = sourceIllegal;
- - source -= (extraBytesToRead+1); /* return to the start */
- - break; /* Bail out; shouldn't continue */
- - } else {
- - *target++ = UNI_REPLACEMENT_CHAR;
- - }
- - } else {
- - /* target is a character in range 0xFFFF - 0x10FFFF. */
- - if (target + 1 >= targetEnd) {
- - source -= (extraBytesToRead+1); /* Back up source pointer! */
- - result = targetExhausted; break;
- - }
- - ch -= halfBase;
- - *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
- - *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
- - }
- - }
- - *sourceStart = source;
- - *targetStart = target;
- - return result;
- -}
- -
- -/* --------------------------------------------------------------------- */
- -
- -ConversionResult ConvertUTF32toUTF8 (
- - const UTF32** sourceStart, const UTF32* sourceEnd,
- - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
- - ConversionResult result = conversionOK;
- - const UTF32* source = *sourceStart;
- - UTF8* target = *targetStart;
- - while (source < sourceEnd) {
- - UTF32 ch;
- - unsigned short bytesToWrite = 0;
- - const UTF32 byteMask = 0xBF;
- - const UTF32 byteMark = 0x80;
- - ch = *source++;
- - if (flags == strictConversion ) {
- - /* UTF-16 surrogate values are illegal in UTF-32 */
- - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
- - --source; /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - }
- - }
- - /*
- - * Figure out how many bytes the result will require. Turn any
- - * illegally large UTF32 things (> Plane 17) into replacement chars.
- - */
- - if (ch < (UTF32)0x80) { bytesToWrite = 1;
- - } else if (ch < (UTF32)0x800) { bytesToWrite = 2;
- - } else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
- - } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
- - } else { bytesToWrite = 3;
- - ch = UNI_REPLACEMENT_CHAR;
- - result = sourceIllegal;
- - }
- -
- - target += bytesToWrite;
- - if (target > targetEnd) {
- - --source; /* Back up source pointer! */
- - target -= bytesToWrite; result = targetExhausted; break;
- - }
- - switch (bytesToWrite) { /* note: everything falls through. */
- - case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
- - case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
- - case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
- - case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
- - }
- - target += bytesToWrite;
- - }
- - *sourceStart = source;
- - *targetStart = target;
- - return result;
- -}
- -
- -/* --------------------------------------------------------------------- */
- -
- -ConversionResult ConvertUTF8toUTF32 (
- - const UTF8** sourceStart, const UTF8* sourceEnd,
- - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
- - ConversionResult result = conversionOK;
- - const UTF8* source = *sourceStart;
- - UTF32* target = *targetStart;
- - while (source < sourceEnd) {
- - UTF32 ch = 0;
- - unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- - if (source + extraBytesToRead >= sourceEnd) {
- - result = sourceExhausted; break;
- - }
- - /* Do this check whether lenient or strict */
- - if (! isLegalUTF8(source, extraBytesToRead+1)) {
- - result = sourceIllegal;
- - break;
- - }
- - /*
- - * The cases all fall through. See "Note A" below.
- - */
- - switch (extraBytesToRead) {
- - case 5: ch += *source++; ch <<= 6;
- - case 4: ch += *source++; ch <<= 6;
- - case 3: ch += *source++; ch <<= 6;
- - case 2: ch += *source++; ch <<= 6;
- - case 1: ch += *source++; ch <<= 6;
- - case 0: ch += *source++;
- - }
- - ch -= offsetsFromUTF8[extraBytesToRead];
- -
- - if (target >= targetEnd) {
- - source -= (extraBytesToRead+1); /* Back up the source pointer! */
- - result = targetExhausted; break;
- - }
- - if (ch <= UNI_MAX_LEGAL_UTF32) {
- - /*
- - * UTF-16 surrogate values are illegal in UTF-32, and anything
- - * over Plane 17 (> 0x10FFFF) is illegal.
- - */
- - if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
- - if (flags == strictConversion) {
- - source -= (extraBytesToRead+1); /* return to the illegal value itself */
- - result = sourceIllegal;
- - break;
- - } else {
- - *target++ = UNI_REPLACEMENT_CHAR;
- - }
- - } else {
- - *target++ = ch;
- - }
- - } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
- - result = sourceIllegal;
- - *target++ = UNI_REPLACEMENT_CHAR;
- - }
- - }
- - *sourceStart = source;
- - *targetStart = target;
- - return result;
- -}
- -
- -/* ---------------------------------------------------------------------
- -
- - Note A.
- - The fall-through switches in UTF-8 reading code save a
- - temp variable, some decrements & conditionals. The switches
- - are equivalent to the following loop:
- - {
- - int tmpBytesToRead = extraBytesToRead+1;
- - do {
- - ch += *source++;
- - --tmpBytesToRead;
- - if (tmpBytesToRead) ch <<= 6;
- - } while (tmpBytesToRead > 0);
- - }
- - In UTF-8 writing code, the switches on "bytesToWrite" are
- - similarly unrolled loops.
- -
- - --------------------------------------------------------------------- */
- diff --git a/base/ConvertUTF.h b/base/ConvertUTF.h
- deleted file mode 100644
- index 538bec6..0000000
- --- a/base/ConvertUTF.h
- +++ /dev/null
- @@ -1,155 +0,0 @@
- -/*
- - * Copyright 2001-2004 Unicode, Inc.
- - *
- - * Disclaimer
- - *
- - * This source code is provided as is by Unicode, Inc. No claims are
- - * made as to fitness for any particular purpose. No warranties of any
- - * kind are expressed or implied. The recipient agrees to determine
- - * applicability of information provided. If this file has been
- - * purchased on magnetic or optical media from Unicode, Inc., the
- - * sole remedy for any claim will be exchange of defective media
- - * within 90 days of receipt.
- - *
- - * Limitations on Rights to Redistribute This Code
- - *
- - * Unicode, Inc. hereby grants the right to freely use the information
- - * supplied in this file in the creation of products supporting the
- - * Unicode Standard, and to make copies of this file in any form
- - * for internal or external distribution as long as this notice
- - * remains attached.
- - */
- -
- -
- -#ifndef ConvertUTF_INCLUDED
- -#define ConvertUTF_INCLUDED
- -
- -/* ---------------------------------------------------------------------
- -
- - Conversions between UTF32, UTF-16, and UTF-8. Header file.
- -
- - Several funtions are included here, forming a complete set of
- - conversions between the three formats. UTF-7 is not included
- - here, but is handled in a separate source file.
- -
- - Each of these routines takes pointers to input buffers and output
- - buffers. The input buffers are const.
- -
- - Each routine converts the text between *sourceStart and sourceEnd,
- - putting the result into the buffer between *targetStart and
- - targetEnd. Note: the end pointers are *after* the last item: e.g.
- - *(sourceEnd - 1) is the last item.
- -
- - The return result indicates whether the conversion was successful,
- - and if not, whether the problem was in the source or target buffers.
- - (Only the first encountered problem is indicated.)
- -
- - After the conversion, *sourceStart and *targetStart are both
- - updated to point to the end of last text successfully converted in
- - the respective buffers.
- -
- - Input parameters:
- - sourceStart - pointer to a pointer to the source buffer.
- - The contents of this are modified on return so that
- - it points at the next thing to be converted.
- - targetStart - similarly, pointer to pointer to the target buffer.
- - sourceEnd, targetEnd - respectively pointers to the ends of the
- - two buffers, for overflow checking only.
- -
- - These conversion functions take a ConversionFlags argument. When this
- - flag is set to strict, both irregular sequences and isolated surrogates
- - will cause an error. When the flag is set to lenient, both irregular
- - sequences and isolated surrogates are converted.
- -
- - Whether the flag is strict or lenient, all illegal sequences will cause
- - an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
- - or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
- - must check for illegal sequences.
- -
- - When the flag is set to lenient, characters over 0x10FFFF are converted
- - to the replacement character; otherwise (when the flag is set to strict)
- - they constitute an error.
- -
- - Output parameters:
- - The value "sourceIllegal" is returned from some routines if the input
- - sequence is malformed. When "sourceIllegal" is returned, the source
- - value will point to the illegal value that caused the problem. E.g.,
- - in UTF-8 when a sequence is malformed, it points to the start of the
- - malformed sequence.
- -
- - Author: Mark E. Davis, 1994.
- - Rev History: Rick McGowan, fixes & updates May 2001.
- - Fixes & updates, Sept 2001.
- -
- ------------------------------------------------------------------------- */
- -
- -/* ---------------------------------------------------------------------
- - The following 4 definitions are compiler-specific.
- - The C standard does not guarantee that wchar_t has at least
- - 16 bits, so wchar_t is no less portable than unsigned short!
- - All should be unsigned values to avoid sign extension during
- - bit mask & shift operations.
- ------------------------------------------------------------------------- */
- -
- -typedef unsigned long UTF32; /* at least 32 bits */
- -typedef unsigned short UTF16; /* at least 16 bits */
- -typedef unsigned char UTF8; /* typically 8 bits */
- -typedef unsigned char Boolean; /* 0 or 1 */
- -
- -/* Some fundamental constants */
- -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
- -#define UNI_MAX_BMP (UTF32)0x0000FFFF
- -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
- -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
- -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
- -
- -typedef enum {
- - conversionOK, /* conversion successful */
- - sourceExhausted, /* partial character in source, but hit end */
- - targetExhausted, /* insuff. room in target for conversion */
- - sourceIllegal /* source sequence is illegal/malformed */
- -} ConversionResult;
- -
- -typedef enum {
- - strictConversion = 0,
- - lenientConversion
- -} ConversionFlags;
- -
- -/* This is for C++ and does no harm in C */
- -#ifdef __cplusplus
- -extern "C" {
- -#endif
- -
- -ConversionResult ConvertUTF8toUTF16 (
- - const UTF8** sourceStart, const UTF8* sourceEnd,
- - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
- -
- -ConversionResult ConvertUTF16toUTF8 (
- - const UTF16** sourceStart, const UTF16* sourceEnd,
- - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
- -
- -ConversionResult ConvertUTF8toUTF32 (
- - const UTF8** sourceStart, const UTF8* sourceEnd,
- - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
- -
- -ConversionResult ConvertUTF32toUTF8 (
- - const UTF32** sourceStart, const UTF32* sourceEnd,
- - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
- -
- -ConversionResult ConvertUTF16toUTF32 (
- - const UTF16** sourceStart, const UTF16* sourceEnd,
- - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
- -
- -ConversionResult ConvertUTF32toUTF16 (
- - const UTF32** sourceStart, const UTF32* sourceEnd,
- - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
- -
- -Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
- -
- -#ifdef __cplusplus
- -}
- -#endif
- -
- -/* --------------------------------------------------------------------- */
- -
- -#endif /* ConvertUTF_INCLUDED */
- diff --git a/base/lib.mak b/base/lib.mak
- index 173e2c6..2de6565 100644
- --- a/base/lib.mak
- +++ b/base/lib.mak
- @@ -52,7 +52,6 @@ GLLCMS2CC=$(CC) $(LCMS2_CFLAGS) $(CFLAGS) $(I_)$(GLI_) $(II)$(LCMS2SRCDIR)$(D)in
- lcms2_h=$(LCMS2SRCDIR)$(D)include$(D)lcms2.h
- lcms2_plugin_h=$(LCMS2SRCDIR)$(D)include$(D)lcms2_plugin.h
-
- -ConvertUTF_h=$(GLSRC)ConvertUTF.h
- gdevdcrd_h=$(GLSRC)gdevdcrd.h
- gdevpccm_h=$(GLSRC)gdevpccm.h
-
- @@ -1097,9 +1096,6 @@ $(GLOBJ)gdevpccm.$(OBJ) : $(GLSRC)gdevpccm.c $(AK)\
- $(gx_h) $(gsmatrix_h) $(gxdevice_h) $(gdevpccm_h) $(LIB_MAK) $(MAKEDIRS)
- $(GLCC) $(GLO_)gdevpccm.$(OBJ) $(C_) $(GLSRC)gdevpccm.c
-
- -$(GLOBJ)ConvertUTF.$(OBJ) : $(GLSRC)ConvertUTF.c $(ConvertUTF_h) $(LIB_MAK) $(MAKEDIRS)
- - $(GLCC) $(GLO_)ConvertUTF.$(OBJ) $(C_) $(GLSRC)ConvertUTF.c
- -
- ### Memory devices
-
- $(GLOBJ)gdevmem.$(OBJ) : $(GLSRC)gdevmem.c $(AK) $(gx_h) $(gserrors_h) \
- diff --git a/devices/devs.mak b/devices/devs.mak
- index ea27ab0..51ec363 100644
- --- a/devices/devs.mak
- +++ b/devices/devs.mak
- @@ -835,9 +835,8 @@ pdfwrite5_=$(DEVOBJ)gdevpdfm.$(OBJ)
- pdfwrite6_=$(DEVOBJ)gdevpdfo.$(OBJ) $(DEVOBJ)gdevpdfp.$(OBJ) $(DEVOBJ)gdevpdft.$(OBJ)
- pdfwrite7_=$(DEVOBJ)gdevpdfr.$(OBJ)
- pdfwrite8_=$(DEVOBJ)gdevpdfu.$(OBJ) $(DEVOBJ)gdevpdfv.$(OBJ) $(DEVOBJ)gdevagl.$(OBJ)
- -pdfwrite9_= $(GLOBJ)ConvertUTF.$(OBJ)
- -pdfwrite10_=$(DEVOBJ)gsflip.$(OBJ)
- -pdfwrite11_=$(DEVOBJ)scantab.$(OBJ) $(DEVOBJ)sfilter2.$(OBJ)
- +pdfwrite9_=$(DEVOBJ)gsflip.$(OBJ)
- +pdfwrite10_=$(DEVOBJ)scantab.$(OBJ) $(DEVOBJ)sfilter2.$(OBJ)
- pdfwrite_=$(pdfwrite1_) $(pdfwrite2_) $(pdfwrite3_) $(pdfwrite4_)\
- $(pdfwrite5_) $(pdfwrite6_) $(pdfwrite7_) $(pdfwrite8_) $(pdfwrite9_)\
- $(pdfwrite10_) $(pdfwrite11_)
- diff --git a/devices/vector/gdevpdf.c b/devices/vector/gdevpdf.c
- index 2b3186d..20e0ae8 100644
- --- a/devices/vector/gdevpdf.c
- +++ b/devices/vector/gdevpdf.c
- @@ -111,14 +111,13 @@ ENUM_PTRS_WITH(device_pdfwrite_enum_ptrs, gx_device_pdf *pdev)
- ENUM_PTR(32, gx_device_pdf, pres_soft_mask_dict);
- ENUM_PTR(33, gx_device_pdf, PDFXTrimBoxToMediaBoxOffset.data);
- ENUM_PTR(34, gx_device_pdf, PDFXBleedBoxToTrimBoxOffset.data);
- - ENUM_PTR(35, gx_device_pdf, DSCEncodingToUnicode.data);
- - ENUM_PTR(36, gx_device_pdf, Identity_ToUnicode_CMaps[0]);
- - ENUM_PTR(37, gx_device_pdf, Identity_ToUnicode_CMaps[1]);
- - ENUM_PTR(38, gx_device_pdf, vgstack);
- - ENUM_PTR(39, gx_device_pdf, outline_levels);
- - ENUM_PTR(40, gx_device_pdf, EmbeddedFiles);
- - ENUM_PTR(41, gx_device_pdf, pdf_font_dir);
- - ENUM_PTR(42, gx_device_pdf, ExtensionMetadata);
- + ENUM_PTR(35, gx_device_pdf, Identity_ToUnicode_CMaps[0]);
- + ENUM_PTR(36, gx_device_pdf, Identity_ToUnicode_CMaps[1]);
- + ENUM_PTR(37, gx_device_pdf, vgstack);
- + ENUM_PTR(38, gx_device_pdf, outline_levels);
- + ENUM_PTR(39, gx_device_pdf, EmbeddedFiles);
- + ENUM_PTR(40, gx_device_pdf, pdf_font_dir);
- + ENUM_PTR(41, gx_device_pdf, ExtensionMetadata);
- #define e1(i,elt) ENUM_PARAM_STRING_PTR(i + gx_device_pdf_num_ptrs, gx_device_pdf, elt);
- gx_device_pdf_do_param_strings(e1)
- #undef e1
- @@ -165,7 +164,6 @@ static RELOC_PTRS_WITH(device_pdfwrite_reloc_ptrs, gx_device_pdf *pdev)
- RELOC_PTR(gx_device_pdf, pres_soft_mask_dict);
- RELOC_PTR(gx_device_pdf, PDFXTrimBoxToMediaBoxOffset.data);
- RELOC_PTR(gx_device_pdf, PDFXBleedBoxToTrimBoxOffset.data);
- - RELOC_PTR(gx_device_pdf, DSCEncodingToUnicode.data);
- RELOC_PTR(gx_device_pdf, Identity_ToUnicode_CMaps[0]);
- RELOC_PTR(gx_device_pdf, Identity_ToUnicode_CMaps[1]);
- RELOC_PTR(gx_device_pdf, vgstack);
- diff --git a/devices/vector/gdevpdfb.h b/devices/vector/gdevpdfb.h
- index 08f18c5..447f0f5 100644
- --- a/devices/vector/gdevpdfb.h
- +++ b/devices/vector/gdevpdfb.h
- @@ -141,7 +141,6 @@ const gx_device_pdf PDF_DEVICE_IDENT =
- 12000, /* MaxClipPathSize */ /* HP LaserJet 1320 hangs with 14000. */
- 256000, /* MaxShadingBitmapSize */
- PDF_DEVICE_MaxInlineImageSize, /* MaxInlineImageSize */
- - {0, 0}, /* DSCEncodingToUnicode */
- {0, 0, 0}, /* OwnerPassword */
- {0, 0, 0}, /* UserPassword */
- 0, /* KeyLength */
- diff --git a/devices/vector/gdevpdfe.c b/devices/vector/gdevpdfe.c
- index 1aa1f25..f23a02d 100644
- --- a/devices/vector/gdevpdfe.c
- +++ b/devices/vector/gdevpdfe.c
- @@ -26,7 +26,6 @@
- #include "gdevpdfx.h"
- #include "gdevpdfg.h"
- #include "gdevpdfo.h"
- -#include "ConvertUTF.h"
-
- char PDFDocEncodingLookup [92] = {
- 0x20, 0x22, 0x20, 0x20, 0x20, 0x21, 0x20, 0x26,
- @@ -343,155 +342,162 @@ decode_escape(const byte *data, int data_length, int *index)
- return c; /* A wrong escapement sequence. */
- }
-
- -static int
- -pdf_xmp_write_translated(gx_device_pdf *pdev, stream *s, const byte *data, int data_length,
- - void(*write)(stream *s, const byte *data, int data_length))
- +/*
- + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
- + * into the first byte, depending on how many bytes follow. There are
- + * as many entries in this table as there are UTF-8 sequence types.
- + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
- + * for *legal* UTF-8 will be 4 or fewer bytes total.
- + */
- +static const char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
- +
- +static int gs_ConvertUTF16(char *UTF16, int UTF16Len, unsigned char **UTF8Start, int UTF8Len)
- {
- - if (pdev->DSCEncodingToUnicode.data == 0) {
- - int i, j=0;
- - unsigned char *buf0;
- + int i, bytes = 0;
- + short U16;
- + unsigned char *UTF8 = *UTF8Start;
- + unsigned char *UTF8End = UTF8 + UTF8Len;
-
- - buf0 = (unsigned char *)gs_alloc_bytes(pdev->memory, data_length * sizeof(unsigned char),
- - "pdf_xmp_write_translated");
- - if (buf0 == NULL)
- - return_error(gs_error_VMerror);
- - for (i = 0; i < data_length; i++) {
- - byte c = data[i];
- + if (fabs(UTF16Len % sizeof(short)) != 0)
- + return gs_note_error(gs_error_rangecheck);
- +
- + for (i=0;i<UTF16Len / sizeof(short);i++)
- + {
- + U16 = (*UTF16++) << 8;
- + U16 += *UTF16++;
-
- - if (c == '\\')
- - c = decode_escape(data, data_length, &i);
- - buf0[j] = c;
- - j++;
- + if (U16 >= 0xD800 && U16 <= 0xDBFF) {
- + return gs_note_error(gs_error_rangecheck);
- }
- - if (buf0[0] != 0xfe || buf0[1] != 0xff) {
- - unsigned char *buf1;
- - /* We must assume that the information is PDFDocEncoding. In this case
- - * we need to convert it into UTF-8. If we just convert it to UTF-16
- - * then we can safely fall through to the code below.
- - */
- - /* NB the code below skips the BOM in positions 0 and 1, so we need
- - * two extra bytes, to be ignored.
- - */
- - buf1 = (unsigned char *)gs_alloc_bytes(pdev->memory, (j * sizeof(UTF16)) + 2,
- - "pdf_xmp_write_translated");
- - if (buf1 == NULL) {
- - gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - return_error(gs_error_VMerror);
- - }
- - memset(buf1, 0x00, (j * sizeof(UTF16)) + 2);
- - for (i = 0; i < j; i++) {
- - if (buf0[i] <= 0x7f || buf0[i] >= 0xAE) {
- - if (buf0[i] == 0x7f) {
- - emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n",
- - buf0[i]);
- - } else
- - buf1[(i * 2) + 3] = buf0[i];
- + if (U16 >= 0xDC00 && U16 <= 0xDFFF) {
- + return gs_note_error(gs_error_rangecheck);
- + }
- +
- + if(U16 < 0x80) {
- + bytes = 1;
- + } else {
- + if (U16 < 0x800) {
- + bytes = 2;
- + } else {
- + if (U16 < 0x10000) {
- + bytes = 3;
- } else {
- - buf1[(i * 2) + 2] = PDFDocEncodingLookup[(buf0[i] - 0x80) * 2];
- - buf1[(i * 2) + 3] = PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1];
- - if (PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1] == 0x00)
- - emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n",
- - PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1]);
- + if (U16 < 0x111000) {
- + bytes = 4;
- + } else {
- + bytes = 3;
- + U16 = 0xFFFD;
- + }
- }
- }
- + }
- + if (UTF8 + bytes > UTF8End)
- + return gs_note_error(gs_error_VMerror);
- +
- + /* Write from end to beginning, low bytes first */
- + UTF8 += bytes;
- +
- + switch(bytes) {
- + case 4:
- + *--UTF8 = (unsigned char)((U16 | 0x80) & 0xBF);
- + U16 >>= 6;
- + case 3:
- + *--UTF8 = (unsigned char)((U16 | 0x80) & 0xBF);
- + U16 >>= 6;
- + case 2:
- + *--UTF8 = (unsigned char)((U16 | 0x80) & 0xBF);
- + U16 >>= 6;
- + case 1:
- + *--UTF8 = (unsigned char)(U16 | firstByteMark[bytes]);
- + break;
- + default:
- + return gs_note_error(gs_error_rangecheck);
- + }
- +
- + /* Move to start of next set */
- + UTF8 += bytes;
- + }
- + *UTF8Start = UTF8;
- + return 0;
- +}
- +
- +static int
- +pdf_xmp_write_translated(gx_device_pdf *pdev, stream *s, const byte *data, int data_length,
- + void(*write)(stream *s, const byte *data, int data_length))
- +{
- + int i, j=0;
- + unsigned char *buf0;
- +
- + buf0 = (unsigned char *)gs_alloc_bytes(pdev->memory, data_length * sizeof(unsigned char),
- + "pdf_xmp_write_translated");
- + if (buf0 == NULL)
- + return_error(gs_error_VMerror);
- + for (i = 0; i < data_length; i++) {
- + byte c = data[i];
- +
- + if (c == '\\')
- + c = decode_escape(data, data_length, &i);
- + buf0[j] = c;
- + j++;
- + }
- + if (buf0[0] != 0xfe || buf0[1] != 0xff) {
- + unsigned char *buf1;
- + /* We must assume that the information is PDFDocEncoding. In this case
- + * we need to convert it into UTF-8. If we just convert it to UTF-16
- + * then we can safely fall through to the code below.
- + */
- + /* NB the code below skips the BOM in positions 0 and 1, so we need
- + * two extra bytes, to be ignored.
- + */
- + buf1 = (unsigned char *)gs_alloc_bytes(pdev->memory, (j * sizeof(short)) + 2,
- + "pdf_xmp_write_translated");
- + if (buf1 == NULL) {
- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - buf0 = buf1;
- - data_length = j = (j * 2) + 2;
- + return_error(gs_error_VMerror);
- }
- - {
- - /* Its a Unicode (UTF-16BE) string, convert to UTF-8 */
- - UTF16 *buf0b, U16;
- - UTF8 *buf1, *buf1b;
- -
- - /* A single UTF-16 (2 bytes) can end up as 4 bytes in UTF-8 */
- - buf1 = (UTF8 *)gs_alloc_bytes(pdev->memory, data_length * 2 * sizeof(unsigned char),
- - "pdf_xmp_write_translated");
- - if (buf1 == NULL) {
- - gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - return_error(gs_error_VMerror);
- - }
- - buf1b = buf1;
- - /* Skip the Byte Order Mark (0xfe 0xff) */
- - buf0b = (UTF16 *)(buf0 + 2);
- - /* ConvertUTF16to UTF8 expects a buffer of UTF16s in the local
- - * endian-ness, but the data is big-endian. In case this is a little-endian
- - * machine, process the buffer from big-endian to whatever is right for this platform.
- - */
- - for (i = 2; i < j; i+=2) {
- - U16 = (buf0[i] << 8) + buf0[i + 1];
- - *(buf0b++) = U16;
- - }
- - buf0b = (UTF16 *)(buf0 + 2);
- - switch (ConvertUTF16toUTF8((const UTF16**)&buf0b, (UTF16 *)(buf0 + j),
- - &buf1b, buf1 + (data_length * 2 * sizeof(unsigned char)), strictConversion)) {
- - case conversionOK:
- - write(s, buf1, buf1b - buf1);
- - gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated");
- - break;
- - case sourceExhausted:
- - case targetExhausted:
- - case sourceIllegal:
- - default:
- - gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated");
- - return_error(gs_error_rangecheck);
- + memset(buf1, 0x00, (j * sizeof(short)) + 2);
- + for (i = 0; i < j; i++) {
- + if (buf0[i] <= 0x7f || buf0[i] >= 0xAE) {
- + if (buf0[i] == 0x7f) {
- + emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n",
- + buf0[i]);
- + } else
- + buf1[(i * 2) + 3] = buf0[i];
- + } else {
- + buf1[(i * 2) + 2] = PDFDocEncodingLookup[(buf0[i] - 0x80) * 2];
- + buf1[(i * 2) + 3] = PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1];
- + if (PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1] == 0x00)
- + emprintf1(pdev->memory, "PDFDocEncoding %x cannot be represented in Unicode\n",
- + PDFDocEncodingLookup[((buf0[i] - 0x80) * 2) + 1]);
- }
- }
- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - return 0;
- - } else {
- - UTF16 *buf0;
- - const UTF16 *buf0b;
- - UTF8 *buf1, *buf1b;
- - int i, j = 0;
- -
- - buf0 = (UTF16 *)gs_alloc_bytes(pdev->memory, data_length * sizeof(UTF16),
- - "pdf_xmp_write_translated");
- - if (buf0 == NULL)
- - return_error(gs_error_VMerror);
- - buf1 = (UTF8 *)gs_alloc_bytes(pdev->memory, data_length * 2,
- - "pdf_xmp_write_translated");
- + buf0 = buf1;
- + data_length = j = (j * 2) + 2;
- + }
- + {
- + /* Its a Unicode (UTF-16BE) string, convert to UTF-8 */
- + short *buf0b;
- + char *buf1, *buf1b;
- + int code;
- +
- + /* A single UTF-16 (2 bytes) can end up as 4 bytes in UTF-8 */
- + buf1 = (char *)gs_alloc_bytes(pdev->memory, data_length * 2 * sizeof(unsigned char),
- + "pdf_xmp_write_translated");
- if (buf1 == NULL) {
- gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- return_error(gs_error_VMerror);
- }
- - buf0b = buf0;
- buf1b = buf1;
- - for (i = 0; i < data_length; i++) {
- - byte c = data[i];
- - int v;
- -
- - if (c == '\\')
- - c = decode_escape(data, data_length, &i);
- - if (c > pdev->DSCEncodingToUnicode.size) {
- - gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated");
- - return_error(gs_error_rangecheck);
- - }
- -
- - v = pdev->DSCEncodingToUnicode.data[c];
- - if (v == -1)
- - v = '?'; /* Arbitrary. */
- - buf0[j] = v;
- - j++;
- - }
- - switch (ConvertUTF16toUTF8(&buf0b, buf0 + j,
- - &buf1b, buf1 + data_length * 2, strictConversion)) {
- - case conversionOK:
- - write(s, buf1, buf1b - buf1);
- - break;
- - case sourceExhausted:
- - case targetExhausted:
- - case sourceIllegal:
- - default:
- - gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated");
- - return_error(gs_error_rangecheck);
- - }
- - gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- - gs_free_object(pdev->memory, buf1, "pdf_xmp_write_translated");
- - return 0;
- + /* Skip the Byte Order Mark (0xfe 0xff) */
- + buf0b = (short *)(buf0 + 2);
- + code = gs_ConvertUTF16((char *)buf0b, j - 2, (unsigned char **)&buf1b, data_length * 2 * sizeof(unsigned char));
- + if (code < 0)
- + return code;
- + write(s, (const byte *)buf1, buf1b - buf1);
- }
- + gs_free_object(pdev->memory, buf0, "pdf_xmp_write_translated");
- + return 0;
- }
-
- static int
- diff --git a/devices/vector/gdevpdfp.c b/devices/vector/gdevpdfp.c
- index 0fa07e3..6ebcb0d 100644
- --- a/devices/vector/gdevpdfp.c
- +++ b/devices/vector/gdevpdfp.c
- @@ -77,7 +77,6 @@ static const gs_param_item_t pdf_param_items[] = {
- pi("CompressStreams", gs_param_type_bool, CompressStreams),
- pi("PrintStatistics", gs_param_type_bool, PrintStatistics),
- pi("MaxInlineImageSize", gs_param_type_long, MaxInlineImageSize),
- - pi("DSCEncodingToUnicode", gs_param_type_int_array, DSCEncodingToUnicode),
-
- /* PDF Encryption */
- pi("OwnerPassword", gs_param_type_string, OwnerPassword),
- diff --git a/devices/vector/gdevpdfx.h b/devices/vector/gdevpdfx.h
- index 308900a..c436220 100644
- --- a/devices/vector/gdevpdfx.h
- +++ b/devices/vector/gdevpdfx.h
- @@ -601,7 +601,6 @@ struct gx_device_pdf_s {
- a bitmap representation of a shading.
- (Bigger shadings to be downsampled). */
- long MaxInlineImageSize;
- - gs_param_int_array DSCEncodingToUnicode;
- /* Encryption parameters */
- gs_param_string OwnerPassword;
- gs_param_string UserPassword;
- @@ -911,14 +910,14 @@ struct gx_device_pdf_s {
- m(28,sbstack) m(29,substream_Resources) m(30,font3)\
- m(31,accumulating_substream_resource) \
- m(32,pres_soft_mask_dict) m(33,PDFXTrimBoxToMediaBoxOffset.data)\
- - m(34,PDFXBleedBoxToTrimBoxOffset.data) m(35, DSCEncodingToUnicode.data)\
- - m(36,Identity_ToUnicode_CMaps[0]) m(37,Identity_ToUnicode_CMaps[1])\
- - m(38,vgstack)\
- - m(39, outline_levels)
- - m(40, gx_device_pdf, EmbeddedFiles);
- - m(41, gx_device_pdf, pdf_font_dir);
- - m(42, gx_device_pdf, Extension_Metadata);*/
- -#define gx_device_pdf_num_ptrs 43
- + m(34,PDFXBleedBoxToTrimBoxOffset.data)
- + m(35,Identity_ToUnicode_CMaps[0]) m(36,Identity_ToUnicode_CMaps[1])\
- + m(37,vgstack)\
- + m(38, outline_levels)
- + m(39, gx_device_pdf, EmbeddedFiles);
- + m(40, gx_device_pdf, pdf_font_dir);
- + m(41, gx_device_pdf, Extension_Metadata);*/
- +#define gx_device_pdf_num_ptrs 42
- #define gx_device_pdf_do_param_strings(m)\
- m(0, OwnerPassword) m(1, UserPassword) m(2, NoEncrypt)\
- m(3, DocumentUUID) m(4, InstanceUUID)
- diff --git a/windows/ghostscript.vcproj b/windows/ghostscript.vcproj
- index a96d317..450cb26 100644
- --- a/windows/ghostscript.vcproj
- +++ b/windows/ghostscript.vcproj
- @@ -1794,10 +1794,6 @@
- >
- </File>
- <File
- - RelativePath="..\base\ConvertUTF.c"
- - >
- - </File>
- - <File
- RelativePath="..\base\echogs.c"
- >
- </File>
- @@ -3330,10 +3326,6 @@
- >
- </File>
- <File
- - RelativePath="..\base\ConvertUTF.h"
- - >
- - </File>
- - <File
- RelativePath="..\base\ctype_.h"
- >
- </File>
- diff --git a/windows/ghostscript_rt.vcxproj b/windows/ghostscript_rt.vcxproj
- index 2348f08..fae2e1f 100644
- --- a/windows/ghostscript_rt.vcxproj
- +++ b/windows/ghostscript_rt.vcxproj
- @@ -427,7 +427,6 @@
- <ItemGroup>
- <ClCompile Include="..\base\aes.c" />
- <ClCompile Include="..\base\bench.c" />
- - <ClCompile Include="..\base\ConvertUTF.c" />
- <ClCompile Include="..\base\echogs.c" />
- <ClCompile Include="..\base\gconf.c" />
- <ClCompile Include="..\base\genarch.c" />
- @@ -1689,7 +1688,6 @@
- <ClInclude Include="..\jasper\src\libjasper\ras\ras_enc.h" />
- <ClInclude Include="..\base\aes.h" />
- <ClInclude Include="..\base\assert_.h" />
- - <ClInclude Include="..\base\ConvertUTF.h" />
- <ClInclude Include="..\base\ctype_.h" />
- <ClInclude Include="..\base\dirent_.h" />
- <ClInclude Include="..\base\dos_.h" />
- --
- 2.9.3
|