guess.c 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <sys/types.h>
  4. #include <sys/stat.h>
  5. #include "kcc.h"
  6. #include "libkcc.h"
  7. /**********************************************************************
  8. * *
  9. * Guessing *
  10. * *
  11. **********************************************************************/
  12. /*---------------------------------------------------------------------
  13. NAME
  14. guess - distinguish code system
  15. ---------------------------------------------------------------------*/
  16. unsigned Kcc_guess(str, len, extend, zenkaku, gsmode, insi, inso, innj, ingj)
  17. char *str;
  18. int len, extend;
  19. enum mode *gsmode;
  20. unsigned long *insi, *inso, *innj, *ingj;
  21. bool zenkaku;
  22. {
  23. register char *s;
  24. register int euc, sjis, dec;
  25. bool jis8;
  26. register unsigned code;
  27. register int i;
  28. enum mode old;
  29. euc = sjis = 1;
  30. dec = extend ? 1 : 0;
  31. jis8 = 1;
  32. code = 0;
  33. for (s = str; s < str + len; s += i) {
  34. i = 1;
  35. switch (*(u_char *) s) {
  36. case ESC:
  37. if (*gsmode == M_SO)
  38. continue;
  39. old = *gsmode;
  40. if (Kcc_compare("$B", s + 1) || Kcc_compare("$@", s + 1)) {
  41. *gsmode = M_KANJI; /* kanji */
  42. *insi |= bitflag(((u_char *) s)[2]);
  43. i = 3;
  44. } else if (Kcc_compare("&@\033$B", s + 1)) {
  45. *gsmode = M_KANJI; /* kanji 1990 */
  46. *innj |= bitflag('B');
  47. i = 6;
  48. } else if (Kcc_compare("(B", s + 1) ||
  49. Kcc_compare("(J", s + 1) || Kcc_compare("(H", s + 1)) {
  50. *gsmode = M_ASCII; /* kanji end */
  51. *inso |= bitflag(((u_char *) s)[2]);
  52. i = 3;
  53. } else if (Kcc_compare("(I", s + 1)) {
  54. *gsmode = M_KANJI; /* "ESC(I" */
  55. *inso |= bitflag('I');
  56. i = 3;
  57. } else if (Kcc_compare("$(D", s + 1)) {
  58. *gsmode = M_KANJI; /* gaiji */
  59. *ingj |= bitflag('D');
  60. i = 4;
  61. } else
  62. break;
  63. code |= JIS;
  64. if (old != M_ASCII)
  65. continue;
  66. break;
  67. case SO:
  68. if (*gsmode == M_ASCII) {
  69. code |= JIS;
  70. *gsmode = M_SO;
  71. break;
  72. }
  73. continue;
  74. case SI:
  75. if (*gsmode == M_SO) {
  76. *gsmode = M_ASCII;
  77. continue;
  78. }
  79. /* fall thru */
  80. default:
  81. if (*gsmode != M_ASCII)
  82. continue;
  83. break;
  84. }
  85. if (*(u_char *) s & 0x80)
  86. code |= NONASCII;
  87. switch (euc) {
  88. case 1:
  89. /*
  90. * EUC first byte.
  91. */
  92. if (*(u_char *) s & 0x80) {
  93. if ((0xa0 < *(u_char *) s && *(u_char *) s < 0xff) ||
  94. (!zenkaku && *(u_char *) s == SS2)) {
  95. euc = 2;
  96. break;
  97. }
  98. if (extend) {
  99. if (*(u_char *) s == SS3) {
  100. euc = 2;
  101. break;
  102. } else if (*(u_char *) s < 0xa0)
  103. break;
  104. }
  105. euc = 0; /* not EUC */
  106. }
  107. break;
  108. case 2:
  109. /*
  110. * EUC second byte or third byte of CS3.
  111. */
  112. if (((u_char *) s)[-1] == SS2) {
  113. if (0xa0 < *(u_char *) s &&
  114. *(u_char *) s < (extend ? 0xff : 0xe0)) {
  115. euc = 1; /* hankaku kana */
  116. break;
  117. }
  118. } else
  119. if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff) {
  120. if (((u_char *) s)[-1] != SS3)
  121. euc = 1;/* zenkaku */
  122. break;
  123. }
  124. euc = 0; /* not EUC */
  125. break;
  126. }
  127. if (extend)
  128. switch (dec) {
  129. case 1:
  130. /*
  131. * DEC first byte.
  132. */
  133. if (*(u_char *) s & 0x80) {
  134. if (0xa0 < *(u_char *) s && *(u_char *) s < 0xff) {
  135. dec = 2;
  136. break;
  137. } else if (*(u_char *) s < 0xa0)
  138. break;
  139. dec = 0; /* not DEC */
  140. }
  141. break;
  142. case 2:
  143. /*
  144. * DEC second byte.
  145. */
  146. if (0x20 < (*(u_char *) s & 0x7f) &&
  147. (*(u_char *) s & 0x7f) < 0x7f) {
  148. dec = 1;
  149. } else
  150. dec = 0; /* not DEC */
  151. break;
  152. }
  153. switch (sjis) {
  154. case 1:
  155. /*
  156. * shift-JIS first byte.
  157. */
  158. if (*(u_char *) s & 0x80) {
  159. if (0xa0 < *(u_char *) s && *(u_char *) s < 0xe0) {
  160. if (!zenkaku)
  161. break; /* hankaku */
  162. } else if (*(u_char *) s != 0x80 &&
  163. *(u_char *) s != 0xa0 &&
  164. *(u_char *) s <= (extend ? 0xfc : 0xef)) {
  165. sjis = 2; /* zenkaku */
  166. jis8 = 0;
  167. break;
  168. }
  169. sjis = 0; /* not SJIS */
  170. }
  171. break;
  172. case 2:
  173. /*
  174. * shift-JIS second byte.
  175. */
  176. if (0x40 <= *(u_char *) s && *(u_char *) s != 0x7f &&
  177. *(u_char *) s <= 0xfc)
  178. sjis = 1;
  179. else
  180. sjis = 0; /* not SJIS */
  181. break;
  182. }
  183. }
  184. if (euc == 1)
  185. code |= EUC;
  186. if (dec == 1)
  187. code |= DEC;
  188. if (sjis == 1)
  189. code |= zenkaku || !jis8 ? SJIS : SJIS | JIS8;
  190. return (code);
  191. }