iso-2022-jp.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. import { inRange, decoderError, encoderError, isASCIICodePoint,
  2. end_of_stream, finished, floor } from './text_decoder_utils.js'
  3. import index, { indexCodePointFor, indexPointerFor } from './text_decoder_indexes.js'
  4. // 13.2 iso-2022-jp
  5. // 13.2.1 iso-2022-jp decoder
  6. /**
  7. * @implements {Decoder}
  8. */
  9. export class ISO2022JPDecoder {
  10. constructor(options) {
  11. const { fatal } = options
  12. this.fatal = fatal
  13. /** @enum */
  14. this.states = {
  15. ASCII: 0,
  16. Roman: 1,
  17. Katakana: 2,
  18. LeadByte: 3,
  19. TrailByte: 4,
  20. EscapeStart: 5,
  21. Escape: 6,
  22. }
  23. // iso-2022-jp's decoder has an associated iso-2022-jp decoder
  24. // state (initially ASCII), iso-2022-jp decoder output state
  25. // (initially ASCII), iso-2022-jp lead (initially 0x00), and
  26. // iso-2022-jp output flag (initially unset).
  27. this.iso2022jp_decoder_state = this.states.ASCII
  28. this.iso2022jp_decoder_output_state = this.states.ASCII,
  29. this.iso2022jp_lead = 0x00
  30. this.iso2022jp_output_flag = false
  31. }
  32. /**
  33. * @param {Stream} stream The stream of bytes being decoded.
  34. * @param {number} bite The next byte read from the stream.
  35. */
  36. handler(stream, bite) {
  37. // switching on iso-2022-jp decoder state:
  38. switch (this.iso2022jp_decoder_state) {
  39. default:
  40. case this.states.ASCII:
  41. // ASCII
  42. // Based on byte:
  43. // 0x1B
  44. if (bite === 0x1B) {
  45. // Set iso-2022-jp decoder state to escape start and return
  46. // continue.
  47. this.iso2022jp_decoder_state = this.states.EscapeStart
  48. return null
  49. }
  50. // 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
  51. if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E
  52. && bite !== 0x0F && bite !== 0x1B) {
  53. // Unset the iso-2022-jp output flag and return a code point
  54. // whose value is byte.
  55. this.iso2022jp_output_flag = false
  56. return bite
  57. }
  58. // end-of-stream
  59. if (bite === end_of_stream) {
  60. // Return finished.
  61. return finished
  62. }
  63. // Otherwise
  64. // Unset the iso-2022-jp output flag and return error.
  65. this.iso2022jp_output_flag = false
  66. return decoderError(this.fatal)
  67. case this.states.Roman:
  68. // Roman
  69. // Based on byte:
  70. // 0x1B
  71. if (bite === 0x1B) {
  72. // Set iso-2022-jp decoder state to escape start and return
  73. // continue.
  74. this.iso2022jp_decoder_state = this.states.EscapeStart
  75. return null
  76. }
  77. // 0x5C
  78. if (bite === 0x5C) {
  79. // Unset the iso-2022-jp output flag and return code point
  80. // U+00A5.
  81. this.iso2022jp_output_flag = false
  82. return 0x00A5
  83. }
  84. // 0x7E
  85. if (bite === 0x7E) {
  86. // Unset the iso-2022-jp output flag and return code point
  87. // U+203E.
  88. this.iso2022jp_output_flag = false
  89. return 0x203E
  90. }
  91. // 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
  92. if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F
  93. && bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) {
  94. // Unset the iso-2022-jp output flag and return a code point
  95. // whose value is byte.
  96. this.iso2022jp_output_flag = false
  97. return bite
  98. }
  99. // end-of-stream
  100. if (bite === end_of_stream) {
  101. // Return finished.
  102. return finished
  103. }
  104. // Otherwise
  105. // Unset the iso-2022-jp output flag and return error.
  106. this.iso2022jp_output_flag = false
  107. return decoderError(this.fatal)
  108. case this.states.Katakana:
  109. // Katakana
  110. // Based on byte:
  111. // 0x1B
  112. if (bite === 0x1B) {
  113. // Set iso-2022-jp decoder state to escape start and return
  114. // continue.
  115. this.iso2022jp_decoder_state = this.states.EscapeStart
  116. return null
  117. }
  118. // 0x21 to 0x5F
  119. if (inRange(bite, 0x21, 0x5F)) {
  120. // Unset the iso-2022-jp output flag and return a code point
  121. // whose value is 0xFF61 − 0x21 + byte.
  122. this.iso2022jp_output_flag = false
  123. return 0xFF61 - 0x21 + bite
  124. }
  125. // end-of-stream
  126. if (bite === end_of_stream) {
  127. // Return finished.
  128. return finished
  129. }
  130. // Otherwise
  131. // Unset the iso-2022-jp output flag and return error.
  132. this.iso2022jp_output_flag = false
  133. return decoderError(this.fatal)
  134. case this.states.LeadByte:
  135. // Lead byte
  136. // Based on byte:
  137. // 0x1B
  138. if (bite === 0x1B) {
  139. // Set iso-2022-jp decoder state to escape start and return
  140. // continue.
  141. this.iso2022jp_decoder_state = this.states.EscapeStart
  142. return null
  143. }
  144. // 0x21 to 0x7E
  145. if (inRange(bite, 0x21, 0x7E)) {
  146. // Unset the iso-2022-jp output flag, set iso-2022-jp lead
  147. // to byte, iso-2022-jp decoder state to trail byte, and
  148. // return continue.
  149. this.iso2022jp_output_flag = false
  150. this.iso2022jp_lead = bite
  151. this.iso2022jp_decoder_state = this.states.TrailByte
  152. return null
  153. }
  154. // end-of-stream
  155. if (bite === end_of_stream) {
  156. // Return finished.
  157. return finished
  158. }
  159. // Otherwise
  160. // Unset the iso-2022-jp output flag and return error.
  161. this.iso2022jp_output_flag = false
  162. return decoderError(this.fatal)
  163. case this.states.TrailByte:
  164. // Trail byte
  165. // Based on byte:
  166. // 0x1B
  167. if (bite === 0x1B) {
  168. // Set iso-2022-jp decoder state to escape start and return
  169. // continue.
  170. this.iso2022jp_decoder_state = this.states.EscapeStart
  171. return decoderError(this.fatal)
  172. }
  173. // 0x21 to 0x7E
  174. if (inRange(bite, 0x21, 0x7E)) {
  175. // 1. Set the iso-2022-jp decoder state to lead byte.
  176. this.iso2022jp_decoder_state = this.states.LeadByte
  177. // 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21.
  178. const pointer = (this.iso2022jp_lead - 0x21) * 94 + bite - 0x21
  179. // 3. Let code point be the index code point for pointer in
  180. // index jis0208.
  181. const code_point = indexCodePointFor(pointer, index('jis0208'))
  182. // 4. If code point is null, return error.
  183. if (code_point === null)
  184. return decoderError(this.fatal)
  185. // 5. Return a code point whose value is code point.
  186. return code_point
  187. }
  188. // end-of-stream
  189. if (bite === end_of_stream) {
  190. // Set the iso-2022-jp decoder state to lead byte, prepend
  191. // byte to stream, and return error.
  192. this.iso2022jp_decoder_state = this.states.LeadByte
  193. stream.prepend(bite)
  194. return decoderError(this.fatal)
  195. }
  196. // Otherwise
  197. // Set iso-2022-jp decoder state to lead byte and return
  198. // error.
  199. this.iso2022jp_decoder_state = this.states.LeadByte
  200. return decoderError(this.fatal)
  201. case this.states.EscapeStart:
  202. // Escape start
  203. // 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
  204. // byte, iso-2022-jp decoder state to escape, and return
  205. // continue.
  206. if (bite === 0x24 || bite === 0x28) {
  207. this.iso2022jp_lead = bite
  208. this.iso2022jp_decoder_state = this.states.Escape
  209. return null
  210. }
  211. // 2. Prepend byte to stream.
  212. stream.prepend(bite)
  213. // 3. Unset the iso-2022-jp output flag, set iso-2022-jp
  214. // decoder state to iso-2022-jp decoder output state, and
  215. // return error.
  216. this.iso2022jp_output_flag = false
  217. this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state
  218. return decoderError(this.fatal)
  219. case this.states.Escape: {
  220. // Escape
  221. // 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
  222. // 0x00.
  223. const lead = this.iso2022jp_lead
  224. this.iso2022jp_lead = 0x00
  225. // 2. Let state be null.
  226. let state = null
  227. // 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
  228. if (lead === 0x28 && bite === 0x42)
  229. state = this.states.ASCII
  230. // 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
  231. if (lead === 0x28 && bite === 0x4A)
  232. state = this.states.Roman
  233. // 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
  234. if (lead === 0x28 && bite === 0x49)
  235. state = this.states.Katakana
  236. // 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
  237. // state to lead byte.
  238. if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
  239. state = this.states.LeadByte
  240. // 7. If state is non-null, run these substeps:
  241. if (state !== null) {
  242. // 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
  243. // output state to this.states.
  244. this.iso2022jp_decoder_state = this.iso2022jp_decoder_state = state
  245. // 2. Let output flag be the iso-2022-jp output flag.
  246. const output_flag = this.iso2022jp_output_flag
  247. // 3. Set the iso-2022-jp output flag.
  248. this.iso2022jp_output_flag = true
  249. // 4. Return continue, if output flag is unset, and error
  250. // otherwise.
  251. return !output_flag ? null : decoderError(this.fatal)
  252. }
  253. // 8. Prepend lead and byte to stream.
  254. stream.prepend([lead, bite])
  255. // 9. Unset the iso-2022-jp output flag, set iso-2022-jp
  256. // decoder state to iso-2022-jp decoder output state and
  257. // return error.
  258. this.iso2022jp_output_flag = false
  259. this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state
  260. return decoderError(this.fatal)
  261. }
  262. }
  263. }
  264. }
  265. // 13.2.2 iso-2022-jp encoder
  266. /**
  267. * @implements {Encoder}
  268. */
  269. export class ISO2022JPEncoder {
  270. constructor() {
  271. // iso-2022-jp's encoder has an associated iso-2022-jp encoder
  272. // state which is one of ASCII, Roman, and jis0208 (initially
  273. // ASCII).
  274. /** @enum */
  275. this.states = {
  276. ASCII: 0,
  277. Roman: 1,
  278. jis0208: 2,
  279. }
  280. this.iso2022jp_state = this.states.ASCII
  281. }
  282. /**
  283. * @param {Stream} stream Input stream.
  284. * @param {number} code_point Next code point read from the stream.
  285. */
  286. handler(stream, code_point) {
  287. // 1. If code point is end-of-stream and iso-2022-jp encoder
  288. // state is not ASCII, prepend code point to stream, set
  289. // iso-2022-jp encoder state to ASCII, and return three bytes
  290. // 0x1B 0x28 0x42.
  291. if (code_point === end_of_stream &&
  292. this.iso2022jp_state !== this.states.ASCII) {
  293. stream.prepend(code_point)
  294. this.iso2022jp_state = this.states.ASCII
  295. return [0x1B, 0x28, 0x42]
  296. }
  297. // 2. If code point is end-of-stream and iso-2022-jp encoder
  298. // state is ASCII, return finished.
  299. if (code_point === end_of_stream && this.iso2022jp_state === this.states.ASCII)
  300. return finished
  301. // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
  302. // point is U+000E, U+000F, or U+001B, return error with U+FFFD.
  303. if ((this.iso2022jp_state === this.states.ASCII ||
  304. this.iso2022jp_state === this.states.Roman) &&
  305. (code_point === 0x000E || code_point === 0x000F ||
  306. code_point === 0x001B)) {
  307. return encoderError(0xFFFD)
  308. }
  309. // 4. If iso-2022-jp encoder state is ASCII and code point is an
  310. // ASCII code point, return a byte whose value is code point.
  311. if (this.iso2022jp_state === this.states.ASCII &&
  312. isASCIICodePoint(code_point))
  313. return code_point
  314. // 5. If iso-2022-jp encoder state is Roman and code point is an
  315. // ASCII code point, excluding U+005C and U+007E, or is U+00A5
  316. // or U+203E, run these substeps:
  317. if (this.iso2022jp_state === this.states.Roman &&
  318. ((isASCIICodePoint(code_point) &&
  319. code_point !== 0x005C && code_point !== 0x007E) ||
  320. (code_point == 0x00A5 || code_point == 0x203E))) {
  321. // 1. If code point is an ASCII code point, return a byte
  322. // whose value is code point.
  323. if (isASCIICodePoint(code_point))
  324. return code_point
  325. // 2. If code point is U+00A5, return byte 0x5C.
  326. if (code_point === 0x00A5)
  327. return 0x5C
  328. // 3. If code point is U+203E, return byte 0x7E.
  329. if (code_point === 0x203E)
  330. return 0x7E
  331. }
  332. // 6. If code point is an ASCII code point, and iso-2022-jp
  333. // encoder state is not ASCII, prepend code point to stream, set
  334. // iso-2022-jp encoder state to ASCII, and return three bytes
  335. // 0x1B 0x28 0x42.
  336. if (isASCIICodePoint(code_point) &&
  337. this.iso2022jp_state !== this.states.ASCII) {
  338. stream.prepend(code_point)
  339. this.iso2022jp_state = this.states.ASCII
  340. return [0x1B, 0x28, 0x42]
  341. }
  342. // 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
  343. // encoder state is not Roman, prepend code point to stream, set
  344. // iso-2022-jp encoder state to Roman, and return three bytes
  345. // 0x1B 0x28 0x4A.
  346. if ((code_point === 0x00A5 || code_point === 0x203E) &&
  347. this.iso2022jp_state !== this.states.Roman) {
  348. stream.prepend(code_point)
  349. this.iso2022jp_state = this.states.Roman
  350. return [0x1B, 0x28, 0x4A]
  351. }
  352. // 8. If code point is U+2212, set it to U+FF0D.
  353. if (code_point === 0x2212)
  354. code_point = 0xFF0D
  355. // 9. Let pointer be the index pointer for code point in index
  356. // jis0208.
  357. const pointer = indexPointerFor(code_point, index('jis0208'))
  358. // 10. If pointer is null, return error with code point.
  359. if (pointer === null)
  360. return encoderError(code_point)
  361. // 11. If iso-2022-jp encoder state is not jis0208, prepend code
  362. // point to stream, set iso-2022-jp encoder state to jis0208,
  363. // and return three bytes 0x1B 0x24 0x42.
  364. if (this.iso2022jp_state !== this.states.jis0208) {
  365. stream.prepend(code_point)
  366. this.iso2022jp_state = this.states.jis0208
  367. return [0x1B, 0x24, 0x42]
  368. }
  369. // 12. Let lead be floor(pointer / 94) + 0x21.
  370. const lead = floor(pointer / 94) + 0x21
  371. // 13. Let trail be pointer % 94 + 0x21.
  372. const trail = pointer % 94 + 0x21
  373. // 14. Return two bytes whose values are lead and trail.
  374. return [lead, trail]
  375. }
  376. }