ports.c 121 KB


  1. /* Copyright 1995-2001,2003-2004,2006-2019,2021
  2. Free Software Foundation, Inc.
  3. This file is part of Guile.
  4. Guile is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published
  6. by the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. Guile is distributed in the hope that it will be useful, but WITHOUT
  9. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  11. License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with Guile. If not, see
  14. <https://www.gnu.org/licenses/>. */
  15. #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
  16. #ifdef HAVE_CONFIG_H
  17. # include <config.h>
  18. #endif
  19. #include <assert.h>
  20. #include <assert.h>
  21. #include <errno.h>
  22. #include <fcntl.h> /* for chsize on mingw */
  23. #include <iconv.h>
  24. #include <poll.h>
  25. #include <stdio.h>
  26. #include <striconveh.h>
  27. #include <string.h>
  28. #include <uniconv.h>
  29. #include <unistd.h>
  30. #include <unistr.h>
  31. #ifdef HAVE_IO_H
  32. #include <io.h>
  33. #endif
  34. #ifdef HAVE_SYS_IOCTL_H
  35. #include <sys/ioctl.h>
  36. #endif
  37. #include "alist.h"
  38. #include "async.h"
  39. #include "atomics-internal.h"
  40. #include "boolean.h"
  41. #include "chars.h"
  42. #include "deprecation.h"
  43. #include "dynwind.h"
  44. #include "eq.h"
  45. #include "eval.h"
  46. #include "extensions.h"
  47. #include "finalizers.h"
  48. #include "fluids.h"
  49. #include "fports.h" /* direct access for seek and truncate */
  50. #include "goops.h"
  51. #include "gsubr.h"
  52. #include "hashtab.h"
  53. #include "keywords.h"
  54. #include "mallocs.h"
  55. #include "modules.h"
  56. #include "numbers.h"
  57. #include "pairs.h"
  58. #include "ports-internal.h"
  59. #include "private-options.h"
  60. #include "procs.h"
  61. #include "smob.h"
  62. #include "strings.h"
  63. #include "symbols.h"
  64. #include "syscalls.h"
  65. #include "variable.h"
  66. #include "vectors.h"
  67. #include "version.h"
  68. #include "weak-set.h"
  69. #include "ports.h"
  70. /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
  71. already, but have this code here in case that wasn't so in past versions,
  72. or perhaps to help other minimal DOS environments.
  73. gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
  74. might be possibilities if we've got other systems without ftruncate. */
  75. #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
  76. #define ftruncate(fd, size) chsize (fd, size)
  77. #undef HAVE_FTRUNCATE
  78. #define HAVE_FTRUNCATE 1
  79. #endif
  80. /* We need these symbols early, before (ice-9 ports) loads in the
  81. snarfed definitions, so we can't use SCM_SYMBOL. */
  82. static SCM sym_UTF_8;
  83. static SCM sym_ISO_8859_1;
  84. static SCM sym_UTF_16;
  85. static SCM sym_UTF_16LE;
  86. static SCM sym_UTF_16BE;
  87. static SCM sym_UTF_32;
  88. static SCM sym_UTF_32LE;
  89. static SCM sym_UTF_32BE;
  90. /* Port conversion strategies. */
  91. static SCM sym_error;
  92. static SCM sym_substitute;
  93. static SCM sym_escape;
  94. /* See scm_port_auxiliary_write_buffer and scm_c_write. */
  95. static const size_t AUXILIARY_WRITE_BUFFER_SIZE = 256;
  96. /* Maximum number of bytes in a UTF-8 sequence. */
  97. static const size_t UTF8_BUFFER_SIZE = 4;
  98. /* Maximum number of codepoints to write an escape sequence. */
  99. static const size_t ESCAPE_BUFFER_SIZE = 9;
  100. /* We have to serialize operations on any given iconv descriptor. */
  101. static scm_i_pthread_mutex_t iconv_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
  102. /* See Unicode 8.0 section 5.22, "Best Practice for U+FFFD
  103. Substitution". */
  104. static const scm_t_wchar UNICODE_REPLACEMENT_CHARACTER = 0xFFFD;
  105. static void
  106. release_port (SCM port)
  107. {
  108. scm_t_port *pt = SCM_PORT (port);
  109. /* It's possible for two close-port invocations to race, and since
  110. close-port is defined to be idempotent we need to avoid
  111. decrementing the refcount past 0. The normal case is that it's
  112. open with a refcount of 1 and we're going to change it to 0.
  113. Otherwise if the refcount is higher we just subtract 1 and we're
  114. done. However if the current refcount is 0 then the port has been
  115. closed or is closing and we just return. */
  116. uint32_t cur = 1, next = 0;
  117. while (!scm_atomic_compare_and_swap_uint32 (&pt->refcount, &cur, next))
  118. {
  119. if (cur == 0)
  120. return;
  121. next = cur - 1;
  122. }
  123. if (cur > 1)
  124. return;
  125. /* FIXME: `catch' around the close call? It could throw an exception,
  126. and in that case we'd leak the iconv descriptors, if any. */
  127. if (SCM_PORT_TYPE (port)->close)
  128. SCM_PORT_TYPE (port)->close (port);
  129. scm_i_pthread_mutex_lock (&iconv_lock);
  130. pt = SCM_PORT (port);
  131. if (scm_is_true (pt->precise_encoding))
  132. {
  133. if (pt->input_cd != (iconv_t) -1)
  134. iconv_close (pt->input_cd);
  135. if (pt->output_cd != (iconv_t) -1)
  136. iconv_close (pt->output_cd);
  137. pt->precise_encoding = SCM_BOOL_F;
  138. pt->input_cd = pt->output_cd = (iconv_t) -1;
  139. }
  140. scm_i_pthread_mutex_unlock (&iconv_lock);
  141. }
  142. static void
  143. scm_dynwind_acquire_port (SCM port)
  144. {
  145. scm_t_port *pt = SCM_PORT (port);
  146. /* We're acquiring a lease on the port so that we only close it when
  147. no one is using it. The normal case is that it's open with a
  148. refcount of 1 and we're going to push it to 2. Otherwise perhaps
  149. there is someone else using it; that's fine, we just add our
  150. refcount. However if the current refcount is 0 then the port has
  151. been closed or is closing and we must throw an error. */
  152. uint32_t cur = 1, next = 2;
  153. while (!scm_atomic_compare_and_swap_uint32 (&pt->refcount, &cur, next))
  154. {
  155. if (cur == 0)
  156. scm_wrong_type_arg_msg (NULL, 0, port, "open port");
  157. next = cur + 1;
  158. }
  159. scm_dynwind_unwind_handler_with_scm (release_port, port,
  160. SCM_F_WIND_EXPLICITLY);
  161. }
  162. static SCM trampoline_to_c_read_subr;
  163. static SCM trampoline_to_c_write_subr;
  164. static int
  165. default_random_access_p (SCM port)
  166. {
  167. return SCM_PORT_TYPE (port)->seek != NULL;
  168. }
  169. static int
  170. default_read_wait_fd (SCM port)
  171. {
  172. scm_misc_error ("read_wait_fd", "unimplemented", SCM_EOL);
  173. }
  174. static int
  175. default_write_wait_fd (SCM port)
  176. {
  177. scm_misc_error ("write_wait_fd", "unimplemented", SCM_EOL);
  178. }
  179. scm_t_port_type *
  180. scm_make_port_type (char *name,
  181. size_t (*read) (SCM port, SCM dst, size_t start,
  182. size_t count),
  183. size_t (*write) (SCM port, SCM src, size_t start,
  184. size_t count))
  185. {
  186. scm_t_port_type *desc;
  187. desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
  188. memset (desc, 0, sizeof (*desc));
  189. desc->name = name;
  190. desc->print = scm_port_print;
  191. desc->c_read = read;
  192. desc->c_write = write;
  193. desc->scm_read = read ? trampoline_to_c_read_subr : SCM_BOOL_F;
  194. desc->scm_write = write ? trampoline_to_c_write_subr : SCM_BOOL_F;
  195. desc->read_wait_fd = default_read_wait_fd;
  196. desc->write_wait_fd = default_write_wait_fd;
  197. desc->random_access_p = default_random_access_p;
  198. scm_make_port_classes (desc);
  199. return desc;
  200. }
  201. static SCM
  202. trampoline_to_c_read (SCM port, SCM dst, SCM start, SCM count)
  203. #define FUNC_NAME "port-read"
  204. {
  205. size_t c_start, c_count, ret;
  206. SCM_VALIDATE_OPPORT (1, port);
  207. SCM_VALIDATE_BYTEVECTOR (2, dst);
  208. c_start = scm_to_size_t (start);
  209. c_count = scm_to_size_t (count);
  210. SCM_ASSERT_RANGE (3, start, c_start <= SCM_BYTEVECTOR_LENGTH (dst));
  211. SCM_ASSERT_RANGE (4, count, c_count <= SCM_BYTEVECTOR_LENGTH (dst) - c_start);
  212. scm_dynwind_begin (0);
  213. scm_dynwind_acquire_port (port);
  214. ret = SCM_PORT_TYPE (port)->c_read (port, dst, c_start, c_count);
  215. scm_dynwind_end ();
  216. return ret == (size_t) -1 ? SCM_BOOL_F : scm_from_size_t (ret);
  217. }
  218. #undef FUNC_NAME
  219. static size_t
  220. trampoline_to_scm_read (SCM port, SCM dst, size_t start, size_t count)
  221. {
  222. SCM ret = scm_call_4 (SCM_PORT_TYPE (port)->scm_read, port, dst,
  223. scm_from_size_t (start), scm_from_size_t (count));
  224. return scm_is_true (ret) ? scm_to_size_t (ret) : (size_t) -1;
  225. }
  226. static SCM
  227. trampoline_to_c_write (SCM port, SCM src, SCM start, SCM count)
  228. #define FUNC_NAME "port-write"
  229. {
  230. size_t c_start, c_count, ret;
  231. SCM_VALIDATE_OPPORT (1, port);
  232. SCM_VALIDATE_BYTEVECTOR (2, src);
  233. c_start = scm_to_size_t (start);
  234. c_count = scm_to_size_t (count);
  235. SCM_ASSERT_RANGE (3, start, c_start <= SCM_BYTEVECTOR_LENGTH (src));
  236. SCM_ASSERT_RANGE (4, count, c_count <= SCM_BYTEVECTOR_LENGTH (src) - c_start);
  237. scm_dynwind_begin (0);
  238. scm_dynwind_acquire_port (port);
  239. ret = SCM_PORT_TYPE (port)->c_write (port, src, c_start, c_count);
  240. scm_dynwind_end ();
  241. return ret == (size_t) -1 ? SCM_BOOL_F : scm_from_size_t (ret);
  242. }
  243. #undef FUNC_NAME
  244. static size_t
  245. trampoline_to_scm_write (SCM port, SCM src, size_t start, size_t count)
  246. {
  247. SCM ret = scm_call_4 (SCM_PORT_TYPE (port)->scm_write, port, src,
  248. scm_from_size_t (start), scm_from_size_t (count));
  249. return scm_is_true (ret) ? scm_to_size_t (ret) : (size_t) -1;
  250. }
  251. void
  252. scm_set_port_scm_read (scm_t_port_type *ptob, SCM read)
  253. {
  254. ptob->scm_read = read;
  255. ptob->c_read = trampoline_to_scm_read;
  256. }
  257. void
  258. scm_set_port_scm_write (scm_t_port_type *ptob, SCM write)
  259. {
  260. ptob->scm_write = write;
  261. ptob->c_write = trampoline_to_scm_write;
  262. }
  263. void
  264. scm_set_port_read_wait_fd (scm_t_port_type *ptob, int (*get_fd) (SCM))
  265. {
  266. ptob->read_wait_fd = get_fd;
  267. }
  268. void
  269. scm_set_port_write_wait_fd (scm_t_port_type *ptob, int (*get_fd) (SCM))
  270. {
  271. ptob->write_wait_fd = get_fd;
  272. }
  273. void
  274. scm_set_port_print (scm_t_port_type *ptob,
  275. int (*print) (SCM exp, SCM port, scm_print_state *pstate))
  276. {
  277. ptob->print = print;
  278. }
  279. void
  280. scm_set_port_close (scm_t_port_type *ptob, void (*close) (SCM))
  281. {
  282. ptob->close = close;
  283. }
  284. void
  285. scm_set_port_needs_close_on_gc (scm_t_port_type *ptob, int needs_close_p)
  286. {
  287. if (needs_close_p)
  288. ptob->flags |= SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC;
  289. else
  290. ptob->flags &= ~SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC;
  291. }
  292. void
  293. scm_set_port_seek (scm_t_port_type *ptob,
  294. scm_t_off (*seek) (SCM, scm_t_off, int))
  295. {
  296. ptob->seek = seek;
  297. }
  298. void
  299. scm_set_port_truncate (scm_t_port_type *ptob, void (*truncate) (SCM, scm_t_off))
  300. {
  301. ptob->truncate = truncate;
  302. }
  303. void
  304. scm_set_port_input_waiting (scm_t_port_type *ptob, int (*input_waiting) (SCM))
  305. {
  306. ptob->input_waiting = input_waiting;
  307. }
  308. void
  309. scm_set_port_random_access_p (scm_t_port_type *ptob,
  310. int (*random_access_p) (SCM))
  311. {
  312. ptob->random_access_p = random_access_p;
  313. }
  314. void
  315. scm_set_port_get_natural_buffer_sizes
  316. (scm_t_port_type *ptob,
  317. void (*get_natural_buffer_sizes) (SCM, size_t *, size_t *))
  318. {
  319. ptob->get_natural_buffer_sizes = get_natural_buffer_sizes;
  320. }
  321. static void
  322. scm_i_clear_pending_eof (SCM port)
  323. {
  324. scm_port_buffer_set_has_eof_p (SCM_PORT (port)->read_buf,
  325. SCM_BOOL_F);
  326. }
  327. SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
  328. (SCM port, SCM key),
  329. "Return the property of @var{port} associated with @var{key}.")
  330. #define FUNC_NAME s_scm_i_port_property
  331. {
  332. SCM_VALIDATE_OPPORT (1, port);
  333. return scm_assq_ref (SCM_PORT (port)->alist, key);
  334. }
  335. #undef FUNC_NAME
  336. SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
  337. (SCM port, SCM key, SCM value),
  338. "Set the property of @var{port} associated with @var{key} to @var{value}.")
  339. #define FUNC_NAME s_scm_i_set_port_property_x
  340. {
  341. scm_t_port *pt;
  342. SCM_VALIDATE_OPPORT (1, port);
  343. pt = SCM_PORT (port);
  344. pt->alist = scm_assq_set_x (pt->alist, key, value);
  345. return SCM_UNSPECIFIED;
  346. }
  347. #undef FUNC_NAME
  348. /* Standard ports --- current input, output, error, and more(!). */
  349. static SCM cur_inport_fluid = SCM_BOOL_F;
  350. static SCM cur_outport_fluid = SCM_BOOL_F;
  351. static SCM cur_errport_fluid = SCM_BOOL_F;
  352. static SCM cur_warnport_fluid = SCM_BOOL_F;
  353. static SCM cur_loadport_fluid = SCM_BOOL_F;
  354. SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
  355. (void),
  356. "Return the current input port. This is the default port used\n"
  357. "by many input procedures. Initially, @code{current-input-port}\n"
  358. "returns the @dfn{standard input} in Unix and C terminology.")
  359. #define FUNC_NAME s_scm_current_input_port
  360. {
  361. if (scm_is_true (cur_inport_fluid))
  362. return scm_fluid_ref (cur_inport_fluid);
  363. else
  364. return SCM_BOOL_F;
  365. }
  366. #undef FUNC_NAME
  367. SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
  368. (void),
  369. "Return the current output port. This is the default port used\n"
  370. "by many output procedures. Initially,\n"
  371. "@code{current-output-port} returns the @dfn{standard output} in\n"
  372. "Unix and C terminology.")
  373. #define FUNC_NAME s_scm_current_output_port
  374. {
  375. if (scm_is_true (cur_outport_fluid))
  376. return scm_fluid_ref (cur_outport_fluid);
  377. else
  378. return SCM_BOOL_F;
  379. }
  380. #undef FUNC_NAME
  381. SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
  382. (void),
  383. "Return the port to which errors and warnings should be sent (the\n"
  384. "@dfn{standard error} in Unix and C terminology).")
  385. #define FUNC_NAME s_scm_current_error_port
  386. {
  387. if (scm_is_true (cur_errport_fluid))
  388. return scm_fluid_ref (cur_errport_fluid);
  389. else
  390. return SCM_BOOL_F;
  391. }
  392. #undef FUNC_NAME
  393. SCM_DEFINE (scm_current_warning_port, "current-warning-port", 0, 0, 0,
  394. (void),
  395. "Return the port to which diagnostic warnings should be sent.")
  396. #define FUNC_NAME s_scm_current_warning_port
  397. {
  398. if (scm_is_true (cur_warnport_fluid))
  399. return scm_fluid_ref (cur_warnport_fluid);
  400. else
  401. return SCM_BOOL_F;
  402. }
  403. #undef FUNC_NAME
  404. SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
  405. (),
  406. "Return the current-load-port.\n"
  407. "The load port is used internally by @code{primitive-load}.")
  408. #define FUNC_NAME s_scm_current_load_port
  409. {
  410. return scm_fluid_ref (cur_loadport_fluid);
  411. }
  412. #undef FUNC_NAME
  413. SCM
  414. scm_set_current_input_port (SCM port)
  415. #define FUNC_NAME "set-current-input-port"
  416. {
  417. SCM oinp = scm_fluid_ref (cur_inport_fluid);
  418. SCM_VALIDATE_OPINPORT (1, port);
  419. scm_fluid_set_x (cur_inport_fluid, port);
  420. return oinp;
  421. }
  422. #undef FUNC_NAME
  423. SCM
  424. scm_set_current_output_port (SCM port)
  425. #define FUNC_NAME "scm-set-current-output-port"
  426. {
  427. SCM ooutp = scm_fluid_ref (cur_outport_fluid);
  428. port = SCM_COERCE_OUTPORT (port);
  429. SCM_VALIDATE_OPOUTPORT (1, port);
  430. scm_fluid_set_x (cur_outport_fluid, port);
  431. return ooutp;
  432. }
  433. #undef FUNC_NAME
  434. SCM
  435. scm_set_current_error_port (SCM port)
  436. #define FUNC_NAME "set-current-error-port"
  437. {
  438. SCM oerrp = scm_fluid_ref (cur_errport_fluid);
  439. port = SCM_COERCE_OUTPORT (port);
  440. SCM_VALIDATE_OPOUTPORT (1, port);
  441. scm_fluid_set_x (cur_errport_fluid, port);
  442. return oerrp;
  443. }
  444. #undef FUNC_NAME
  445. SCM
  446. scm_set_current_warning_port (SCM port)
  447. #define FUNC_NAME "set-current-warning-port"
  448. {
  449. SCM owarnp = scm_fluid_ref (cur_warnport_fluid);
  450. port = SCM_COERCE_OUTPORT (port);
  451. SCM_VALIDATE_OPOUTPORT (1, port);
  452. scm_fluid_set_x (cur_warnport_fluid, port);
  453. return owarnp;
  454. }
  455. #undef FUNC_NAME
  456. void
  457. scm_dynwind_current_input_port (SCM port)
  458. #define FUNC_NAME NULL
  459. {
  460. SCM_VALIDATE_OPINPORT (1, port);
  461. scm_dynwind_fluid (cur_inport_fluid, port);
  462. }
  463. #undef FUNC_NAME
  464. void
  465. scm_dynwind_current_output_port (SCM port)
  466. #define FUNC_NAME NULL
  467. {
  468. port = SCM_COERCE_OUTPORT (port);
  469. SCM_VALIDATE_OPOUTPORT (1, port);
  470. scm_dynwind_fluid (cur_outport_fluid, port);
  471. }
  472. #undef FUNC_NAME
  473. void
  474. scm_dynwind_current_error_port (SCM port)
  475. #define FUNC_NAME NULL
  476. {
  477. port = SCM_COERCE_OUTPORT (port);
  478. SCM_VALIDATE_OPOUTPORT (1, port);
  479. scm_dynwind_fluid (cur_errport_fluid, port);
  480. }
  481. #undef FUNC_NAME
  482. void
  483. scm_i_dynwind_current_load_port (SCM port)
  484. {
  485. scm_dynwind_fluid (cur_loadport_fluid, port);
  486. }
  487. /* Port buffers. */
  488. static SCM
  489. make_port_buffer (SCM port, size_t size)
  490. {
  491. SCM ret = scm_c_make_vector (SCM_PORT_BUFFER_FIELD_COUNT, SCM_INUM0);
  492. SCM_SIMPLE_VECTOR_SET (ret, SCM_PORT_BUFFER_FIELD_BYTEVECTOR,
  493. scm_c_make_bytevector (size));
  494. SCM_SIMPLE_VECTOR_SET (ret, SCM_PORT_BUFFER_FIELD_POSITION,
  495. SCM_PORT (port)->position);
  496. scm_port_buffer_set_has_eof_p (ret, SCM_BOOL_F);
  497. return ret;
  498. }
  499. /* Retrieving a port's mode. */
  500. /* Return the flags that characterize a port based on the mode
  501. * string used to open a file for that port.
  502. *
  503. * See PORT FLAGS in scm.h
  504. */
  505. static long
  506. scm_i_mode_bits_n (SCM modes)
  507. {
  508. return ((scm_i_string_contains_char (modes, 'r')
  509. || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
  510. | (scm_i_string_contains_char (modes, 'w')
  511. || scm_i_string_contains_char (modes, 'a')
  512. || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
  513. | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
  514. | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
  515. }
  516. long
  517. scm_mode_bits (char *modes)
  518. {
  519. /* Valid characters are rw+a0l. So, use latin1. */
  520. return scm_i_mode_bits (scm_from_latin1_string (modes));
  521. }
  522. long
  523. scm_i_mode_bits (SCM modes)
  524. {
  525. long bits;
  526. if (!scm_is_string (modes))
  527. scm_wrong_type_arg_msg (NULL, 0, modes, "string");
  528. bits = scm_i_mode_bits_n (modes);
  529. scm_remember_upto_here_1 (modes);
  530. return bits;
  531. }
  532. /* Return the mode flags from an open port.
  533. * Some modes such as "append" are only used when opening
  534. * a file and are not returned here. */
  535. SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
  536. (SCM port),
  537. "Return the port modes associated with the open port @var{port}.\n"
  538. "These will not necessarily be identical to the modes used when\n"
  539. "the port was opened, since modes such as \"append\" which are\n"
  540. "used only during port creation are not retained.")
  541. #define FUNC_NAME s_scm_port_mode
  542. {
  543. char modes[4];
  544. modes[0] = '\0';
  545. port = SCM_COERCE_OUTPORT (port);
  546. SCM_VALIDATE_OPPORT (1, port);
  547. if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
  548. if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
  549. strcpy (modes, "r+");
  550. else
  551. strcpy (modes, "r");
  552. }
  553. else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
  554. strcpy (modes, "w");
  555. if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
  556. strcat (modes, "0");
  557. return scm_from_latin1_string (modes);
  558. }
  559. #undef FUNC_NAME
  560. /* The port table --- a weak set of all ports.
  561. We need a global registry of ports to flush them all at exit, and to
  562. get all the ports matching a file descriptor. */
  563. SCM scm_i_port_weak_set;
  564. /* Port finalization. */
  565. static SCM close_port (SCM, int);
  566. static SCM
  567. do_close (void *data)
  568. {
  569. return close_port (SCM_PACK_POINTER (data), 0);
  570. }
  571. /* Finalize the object (a port) pointed to by PTR. */
  572. static void
  573. finalize_port (void *ptr, void *data)
  574. {
  575. SCM port = SCM_PACK_POINTER (ptr);
  576. if (!SCM_PORTP (port))
  577. abort ();
  578. if (SCM_OPENP (port))
  579. {
  580. SCM_SET_PORT_FINALIZING (port);
  581. scm_internal_catch (SCM_BOOL_T, do_close, ptr,
  582. scm_handle_by_message_noexit, NULL);
  583. scm_gc_ports_collected++;
  584. }
  585. }
  586. /* Default buffer size. Used if the port type won't supply a value. */
  587. static const size_t default_buffer_size = 1024;
  588. static void
  589. initialize_port_buffers (SCM port)
  590. {
  591. scm_t_port *pt = SCM_PORT (port);
  592. scm_t_port_type *ptob = SCM_PORT_TYPE (port);
  593. size_t read_buf_size, write_buf_size;
  594. if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
  595. read_buf_size = write_buf_size = 1;
  596. else
  597. {
  598. read_buf_size = write_buf_size = default_buffer_size;
  599. if (ptob->get_natural_buffer_sizes)
  600. ptob->get_natural_buffer_sizes (port, &read_buf_size, &write_buf_size);
  601. if (read_buf_size == 0)
  602. read_buf_size = 1;
  603. if (write_buf_size == 0)
  604. write_buf_size = 1;
  605. }
  606. if (!SCM_INPUT_PORT_P (port))
  607. read_buf_size = 1;
  608. if (!SCM_OUTPUT_PORT_P (port))
  609. write_buf_size = 1;
  610. pt->read_buffering = read_buf_size;
  611. pt->read_buf = make_port_buffer (port, read_buf_size);
  612. pt->write_buf = make_port_buffer (port, write_buf_size);
  613. pt->write_buf_aux = SCM_BOOL_F;
  614. }
  615. SCM
  616. scm_c_make_port_with_encoding (scm_t_port_type *ptob, unsigned long mode_bits,
  617. SCM encoding, SCM conversion_strategy,
  618. scm_t_bits stream)
  619. {
  620. SCM ret;
  621. scm_t_port *pt;
  622. pt = scm_gc_typed_calloc (scm_t_port);
  623. ret = scm_words (scm_tc7_port | mode_bits | SCM_OPN, 4);
  624. SCM_SET_CELL_WORD_1 (ret, stream);
  625. SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) pt);
  626. SCM_SET_CELL_WORD_3 (ret, (scm_t_bits) ptob);
  627. pt->encoding = encoding;
  628. pt->conversion_strategy = conversion_strategy;
  629. pt->file_name = SCM_BOOL_F;
  630. pt->position = scm_cons (SCM_INUM0, SCM_INUM0);
  631. pt->refcount = 1;
  632. pt->at_stream_start_for_bom_read = 1;
  633. pt->at_stream_start_for_bom_write = 1;
  634. pt->precise_encoding = SCM_BOOL_F;
  635. pt->input_cd = (iconv_t) -1;
  636. pt->output_cd = (iconv_t) -1;
  637. pt->alist = SCM_EOL;
  638. if (SCM_PORT_TYPE (ret)->flags & SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC)
  639. {
  640. scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
  641. scm_weak_set_add_x (scm_i_port_weak_set, ret);
  642. }
  643. initialize_port_buffers (ret);
  644. pt->rw_random = ptob->random_access_p (ret);
  645. return ret;
  646. }
  647. SCM
  648. scm_c_make_port (scm_t_port_type *ptob,
  649. unsigned long mode_bits, scm_t_bits stream)
  650. {
  651. return scm_c_make_port_with_encoding (ptob, mode_bits,
  652. scm_i_default_port_encoding (),
  653. scm_i_default_port_conversion_strategy (),
  654. stream);
  655. }
  656. /* Predicates. */
  657. SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
  658. (SCM x),
  659. "Return a boolean indicating whether @var{x} is a port.\n"
  660. "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
  661. "@var{x}))}.")
  662. #define FUNC_NAME s_scm_port_p
  663. {
  664. return scm_from_bool (SCM_PORTP (x));
  665. }
  666. #undef FUNC_NAME
  667. SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
  668. (SCM x),
  669. "Return @code{#t} if @var{x} is an input port, otherwise return\n"
  670. "@code{#f}. Any object satisfying this predicate also satisfies\n"
  671. "@code{port?}.")
  672. #define FUNC_NAME s_scm_input_port_p
  673. {
  674. return scm_from_bool (SCM_INPUT_PORT_P (x));
  675. }
  676. #undef FUNC_NAME
  677. SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
  678. (SCM x),
  679. "Return @code{#t} if @var{x} is an output port, otherwise return\n"
  680. "@code{#f}. Any object satisfying this predicate also satisfies\n"
  681. "@code{port?}.")
  682. #define FUNC_NAME s_scm_output_port_p
  683. {
  684. x = SCM_COERCE_OUTPORT (x);
  685. return scm_from_bool (SCM_OUTPUT_PORT_P (x));
  686. }
  687. #undef FUNC_NAME
  688. SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
  689. (SCM port),
  690. "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
  691. "open.")
  692. #define FUNC_NAME s_scm_port_closed_p
  693. {
  694. SCM_VALIDATE_PORT (1, port);
  695. return scm_from_bool (!SCM_OPPORTP (port));
  696. }
  697. #undef FUNC_NAME
  698. SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
  699. (SCM x),
  700. "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
  701. "return @code{#f}.")
  702. #define FUNC_NAME s_scm_eof_object_p
  703. {
  704. return scm_from_bool (SCM_EOF_OBJECT_P (x));
  705. }
  706. #undef FUNC_NAME
  707. /* Closing ports. */
  708. /* Close PORT. If EXPLICIT is true, then we are explicitly closing PORT
  709. with 'close-port'; otherwise PORT is just being GC'd. */
  710. static SCM
  711. close_port (SCM port, int explicit)
  712. {
  713. if (SCM_CLOSEDP (port))
  714. return SCM_BOOL_F;
  715. /* May throw an exception. */
  716. if (SCM_OUTPUT_PORT_P (port))
  717. scm_flush (port);
  718. if (explicit && SCM_FPORTP (port))
  719. /* We're closing PORT explicitly so clear its revealed count so that
  720. it really gets closed. */
  721. SCM_FSTREAM (port)->revealed = 0;
  722. SCM_CLR_PORT_OPEN_FLAG (port);
  723. if (SCM_PORT_TYPE (port)->flags & SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC)
  724. scm_weak_set_remove_x (scm_i_port_weak_set, port);
  725. release_port (port);
  726. return SCM_BOOL_T;
  727. }
  728. SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
  729. (SCM port),
  730. "Close the specified port object. Return @code{#t} if it\n"
  731. "successfully closes a port or @code{#f} if it was already\n"
  732. "closed. An exception may be raised if an error occurs, for\n"
  733. "example when flushing buffered output. See also @ref{Ports and\n"
  734. "File Descriptors, close}, for a procedure which can close file\n"
  735. "descriptors.")
  736. #define FUNC_NAME s_scm_close_port
  737. {
  738. port = SCM_COERCE_OUTPORT (port);
  739. SCM_VALIDATE_PORT (1, port);
  740. return close_port (port, 1);
  741. }
  742. #undef FUNC_NAME
  743. SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
  744. (SCM port),
  745. "Close the specified input port object. The routine has no effect if\n"
  746. "the file has already been closed. An exception may be raised if an\n"
  747. "error occurs. The value returned is unspecified.\n\n"
  748. "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
  749. "which can close file descriptors.")
  750. #define FUNC_NAME s_scm_close_input_port
  751. {
  752. SCM_VALIDATE_INPUT_PORT (1, port);
  753. scm_close_port (port);
  754. return SCM_UNSPECIFIED;
  755. }
  756. #undef FUNC_NAME
  757. SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
  758. (SCM port),
  759. "Close the specified output port object. The routine has no effect if\n"
  760. "the file has already been closed. An exception may be raised if an\n"
  761. "error occurs. The value returned is unspecified.\n\n"
  762. "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
  763. "which can close file descriptors.")
  764. #define FUNC_NAME s_scm_close_output_port
  765. {
  766. port = SCM_COERCE_OUTPORT (port);
  767. SCM_VALIDATE_OUTPUT_PORT (1, port);
  768. scm_close_port (port);
  769. return SCM_UNSPECIFIED;
  770. }
  771. #undef FUNC_NAME
  772. /* Encoding characters to byte streams, and decoding byte streams to
  773. characters. */
  774. /* Port encodings are case-insensitive ASCII strings. */
  775. static char
  776. ascii_toupper (char c)
  777. {
  778. return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
  779. }
  780. /* It is only necessary to use this function on encodings that come from
  781. the user and have not been canonicalized yet. Encodings that are set
  782. on ports or in the default encoding fluid are in upper-case, and can
  783. be compared with strcmp. */
  784. static int
  785. encoding_matches (const char *enc, SCM upper_symbol)
  786. {
  787. const char *upper = scm_i_symbol_chars (upper_symbol);
  788. if (!enc)
  789. enc = "ISO-8859-1";
  790. while (*enc)
  791. if (ascii_toupper (*enc++) != *upper++)
  792. return 0;
  793. return !*upper;
  794. }
  795. static SCM
  796. canonicalize_encoding (const char *enc)
  797. {
  798. char *ret;
  799. int i;
  800. if (!enc || encoding_matches (enc, sym_ISO_8859_1))
  801. return sym_ISO_8859_1;
  802. if (encoding_matches (enc, sym_UTF_8))
  803. return sym_UTF_8;
  804. ret = scm_gc_strdup (enc, "port");
  805. for (i = 0; ret[i]; i++)
  806. {
  807. if (ret[i] > 127)
  808. /* Restrict to ASCII. */
  809. scm_misc_error (NULL, "invalid character encoding ~s",
  810. scm_list_1 (scm_from_latin1_string (enc)));
  811. else
  812. ret[i] = ascii_toupper (ret[i]);
  813. }
  814. return scm_from_latin1_symbol (ret);
  815. }
  816. /* A fluid specifying the default encoding for newly created ports. If it is
  817. a string, that is the encoding. If it is #f, it is in the "native"
  818. (Latin-1) encoding. */
  819. static SCM default_port_encoding_var;
  820. /* Use ENCODING as the default encoding for future ports. */
  821. void
  822. scm_i_set_default_port_encoding (const char *encoding)
  823. {
  824. if (encoding_matches (encoding, sym_ISO_8859_1))
  825. scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
  826. else
  827. scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
  828. scm_symbol_to_string (canonicalize_encoding (encoding)));
  829. }
  830. /* Return the name of the default encoding for newly created ports. */
  831. SCM
  832. scm_i_default_port_encoding (void)
  833. {
  834. SCM encoding;
  835. encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
  836. if (!scm_is_string (encoding))
  837. return sym_ISO_8859_1;
  838. else
  839. return canonicalize_encoding (scm_i_string_chars (encoding));
  840. }
  841. /* A fluid specifying the default conversion handler for newly created
  842. ports. Its value should be one of the symbols below. */
  843. static SCM default_conversion_strategy_var;
  844. /* Return the default failed encoding conversion policy for new created
  845. ports. */
  846. SCM
  847. scm_i_default_port_conversion_strategy (void)
  848. {
  849. SCM value;
  850. value = scm_fluid_ref (SCM_VARIABLE_REF (default_conversion_strategy_var));
  851. if (scm_is_eq (sym_substitute, value) || scm_is_eq (sym_escape, value))
  852. return value;
  853. /* Default to 'error also when the fluid's value is not one of the
  854. valid symbols. */
  855. return sym_error;
  856. }
  857. /* Use HANDLER as the default conversion strategy for future ports. */
  858. void
  859. scm_i_set_default_port_conversion_strategy (SCM sym)
  860. {
  861. if (!scm_is_eq (sym, sym_error)
  862. && !scm_is_eq (sym, sym_substitute)
  863. && !scm_is_eq (sym, sym_escape))
  864. /* Internal error. */
  865. abort ();
  866. scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), sym);
  867. }
  868. static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
  869. static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
  870. static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
  871. static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
  872. static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
  873. /* Called with the iconv lock. Will release the lock before throwing
  874. any error. */
  875. static void
  876. prepare_iconv_descriptors (SCM port, SCM precise_encoding)
  877. {
  878. scm_t_port *pt = SCM_PORT (port);
  879. iconv_t input_cd, output_cd;
  880. const char *encoding;
  881. size_t i;
  882. /* If the specified encoding is UTF-16 or UTF-32, then default to
  883. big-endian byte order. This fallback isn't necessary if you read
  884. on the port before writing to it, as the read will sniff the BOM if
  885. any and specialize the encoding; see the manual. */
  886. if (scm_is_eq (precise_encoding, sym_UTF_16))
  887. precise_encoding = sym_UTF_16BE;
  888. else if (scm_is_eq (precise_encoding, sym_UTF_32))
  889. precise_encoding = sym_UTF_32BE;
  890. if (scm_is_eq (pt->precise_encoding, precise_encoding))
  891. return;
  892. input_cd = output_cd = (iconv_t) -1;
  893. if (!scm_is_symbol (precise_encoding))
  894. goto invalid_encoding;
  895. encoding = scm_i_symbol_chars (precise_encoding);
  896. for (i = 0; encoding[i]; i++)
  897. if (encoding[i] > 127)
  898. goto invalid_encoding;
  899. /* Open a iconv conversion descriptors between ENCODING and UTF-8. We
  900. choose UTF-8, not UTF-32, because iconv implementations can
  901. typically convert from anything to UTF-8, but not to UTF-32 (see
  902. http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html,
  903. for more details). */
  904. if (SCM_INPUT_PORT_P (port))
  905. {
  906. input_cd = iconv_open ("UTF-8", encoding);
  907. if (input_cd == (iconv_t) -1)
  908. goto invalid_encoding;
  909. }
  910. if (SCM_OUTPUT_PORT_P (port))
  911. {
  912. output_cd = iconv_open (encoding, "UTF-8");
  913. if (output_cd == (iconv_t) -1)
  914. {
  915. if (input_cd != (iconv_t) -1)
  916. iconv_close (input_cd);
  917. goto invalid_encoding;
  918. }
  919. }
  920. if (pt->input_cd != (iconv_t) -1)
  921. iconv_close (pt->input_cd);
  922. if (pt->output_cd != (iconv_t) -1)
  923. iconv_close (pt->output_cd);
  924. pt->precise_encoding = precise_encoding;
  925. pt->input_cd = input_cd;
  926. pt->output_cd = output_cd;
  927. /* Make sure this port has a finalizer. */
  928. scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL);
  929. return;
  930. invalid_encoding:
  931. scm_i_pthread_mutex_unlock (&iconv_lock);
  932. scm_misc_error ("open_iconv_descriptors",
  933. "invalid or unknown character encoding ~s",
  934. scm_list_1 (precise_encoding));
  935. }
  936. SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
  937. SCM_DEFINE (scm_specialize_port_encoding_x,
  938. "specialize-port-encoding!", 2, 0, 0,
  939. (SCM port, SCM encoding),
  940. "")
  941. #define FUNC_NAME s_scm_specialize_port_encoding_x
  942. {
  943. SCM_VALIDATE_PORT (1, port);
  944. SCM_VALIDATE_SYMBOL (2, encoding);
  945. if (scm_is_eq (SCM_PORT (port)->encoding, sym_UTF_16))
  946. {
  947. if (!scm_is_eq (encoding, sym_UTF_16LE)
  948. && !scm_is_eq (encoding, sym_UTF_16BE))
  949. SCM_OUT_OF_RANGE (2, encoding);
  950. }
  951. else if (scm_is_eq (SCM_PORT (port)->encoding, sym_UTF_32))
  952. {
  953. if (!scm_is_eq (encoding, sym_UTF_32LE)
  954. && !scm_is_eq (encoding, sym_UTF_32BE))
  955. SCM_OUT_OF_RANGE (2, encoding);
  956. }
  957. else
  958. SCM_OUT_OF_RANGE (2, encoding);
  959. scm_i_pthread_mutex_lock (&iconv_lock);
  960. prepare_iconv_descriptors (port, encoding);
  961. scm_i_pthread_mutex_unlock (&iconv_lock);
  962. return SCM_UNSPECIFIED;
  963. }
  964. #undef FUNC_NAME
  965. /* Acquire the iconv lock and fill in *INPUT_CD and/or *OUTPUT_CD. */
  966. void
  967. scm_port_acquire_iconv_descriptors (SCM port, iconv_t *input_cd,
  968. iconv_t *output_cd)
  969. {
  970. scm_t_port *pt = SCM_PORT (port);
  971. scm_i_pthread_mutex_lock (&iconv_lock);
  972. if (scm_is_false (pt->precise_encoding))
  973. prepare_iconv_descriptors (port, pt->encoding);
  974. if (input_cd)
  975. *input_cd = pt->input_cd;
  976. if (output_cd)
  977. *output_cd = pt->output_cd;
  978. }
  979. void
  980. scm_port_release_iconv_descriptors (SCM port)
  981. {
  982. scm_i_pthread_mutex_unlock (&iconv_lock);
  983. }
  984. /* The name of the encoding is itself encoded in ASCII. */
  985. void
  986. scm_i_set_port_encoding_x (SCM port, const char *encoding)
  987. {
  988. scm_t_port *pt = SCM_PORT (port);
  989. /* In order to handle cases where the encoding changes mid-stream
  990. (e.g. within an HTTP stream, or within a file that is composed of
  991. segments with different encodings), we consider this to be "stream
  992. start" for purposes of BOM handling, regardless of our actual file
  993. position. */
  994. pt->at_stream_start_for_bom_read = 1;
  995. pt->at_stream_start_for_bom_write = 1;
  996. pt->encoding = canonicalize_encoding (encoding);
  997. scm_i_pthread_mutex_lock (&iconv_lock);
  998. if (pt->input_cd != (iconv_t) -1)
  999. iconv_close (pt->input_cd);
  1000. if (pt->output_cd != (iconv_t) -1)
  1001. iconv_close (pt->output_cd);
  1002. pt->precise_encoding = SCM_BOOL_F;
  1003. pt->input_cd = pt->output_cd = (iconv_t) -1;
  1004. scm_i_pthread_mutex_unlock (&iconv_lock);
  1005. }
  1006. SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
  1007. (SCM port),
  1008. "Returns, as a symbol, the character encoding that @var{port}\n"
  1009. "uses to interpret its input and output.\n")
  1010. #define FUNC_NAME s_scm_sys_port_encoding
  1011. {
  1012. SCM_VALIDATE_OPPORT (1, port);
  1013. return SCM_PORT (port)->encoding;
  1014. }
  1015. #undef FUNC_NAME
  1016. SCM
  1017. scm_port_encoding (SCM port)
  1018. {
  1019. return scm_symbol_to_string (scm_sys_port_encoding (port));
  1020. }
  1021. SCM_DEFINE (scm_sys_set_port_encoding_x, "%set-port-encoding!", 2, 0, 0,
  1022. (SCM port, SCM enc),
  1023. "Sets the character encoding that will be used to interpret all\n"
  1024. "port I/O. New ports are created with the encoding\n"
  1025. "appropriate for the current locale if @code{setlocale} has \n"
  1026. "been called or ISO-8859-1 otherwise\n"
  1027. "and this procedure can be used to modify that encoding.\n")
  1028. #define FUNC_NAME s_scm_sys_set_port_encoding_x
  1029. {
  1030. SCM_VALIDATE_OPPORT (1, port);
  1031. SCM_VALIDATE_SYMBOL (2, enc);
  1032. scm_i_set_port_encoding_x (port, scm_i_symbol_chars (enc));
  1033. return SCM_UNSPECIFIED;
  1034. }
  1035. #undef FUNC_NAME
  1036. SCM
  1037. scm_set_port_encoding_x (SCM port, SCM enc)
  1038. {
  1039. return scm_sys_set_port_encoding_x (port, scm_string_to_symbol (enc));
  1040. }
  1041. scm_t_string_failed_conversion_handler
  1042. scm_i_string_failed_conversion_handler (SCM conversion_strategy)
  1043. {
  1044. if (scm_is_eq (conversion_strategy, sym_substitute))
  1045. return SCM_FAILED_CONVERSION_QUESTION_MARK;
  1046. if (scm_is_eq (conversion_strategy, sym_escape))
  1047. return SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
  1048. /* Default to error. */
  1049. return SCM_FAILED_CONVERSION_ERROR;
  1050. }
  1051. SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
  1052. 1, 0, 0, (SCM port),
  1053. "Returns the behavior of the port when handling a character that\n"
  1054. "is not representable in the port's current encoding.\n"
  1055. "It returns the symbol @code{error} if unrepresentable characters\n"
  1056. "should cause exceptions, @code{substitute} if the port should\n"
  1057. "try to replace unrepresentable characters with question marks or\n"
  1058. "approximate characters, or @code{escape} if unrepresentable\n"
  1059. "characters should be converted to string escapes.\n"
  1060. "\n"
  1061. "If @var{port} is @code{#f}, then the current default behavior\n"
  1062. "will be returned. New ports will have this default behavior\n"
  1063. "when they are created.\n")
  1064. #define FUNC_NAME s_scm_port_conversion_strategy
  1065. {
  1066. if (scm_is_false (port))
  1067. return scm_i_default_port_conversion_strategy ();
  1068. SCM_VALIDATE_OPPORT (1, port);
  1069. return SCM_PORT (port)->conversion_strategy;
  1070. }
  1071. #undef FUNC_NAME
  1072. SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
  1073. 2, 0, 0,
  1074. (SCM port, SCM sym),
  1075. "Sets the behavior of the interpreter when outputting a character\n"
  1076. "that is not representable in the port's current encoding.\n"
  1077. "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
  1078. "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
  1079. "when an unconvertible character is encountered. If it is\n"
  1080. "@code{'substitute}, then unconvertible characters will \n"
  1081. "be replaced with approximate characters, or with question marks\n"
  1082. "if no approximately correct character is available.\n"
  1083. "If it is @code{'escape},\n"
  1084. "it will appear as a hex escape when output.\n"
  1085. "\n"
  1086. "If @var{port} is an open port, the conversion error behavior\n"
  1087. "is set for that port. If it is @code{#f}, it is set as the\n"
  1088. "default behavior for any future ports that get created in\n"
  1089. "this thread.\n")
  1090. #define FUNC_NAME s_scm_set_port_conversion_strategy_x
  1091. {
  1092. if (!scm_is_eq (sym, sym_error)
  1093. && !scm_is_eq (sym, sym_substitute)
  1094. && !scm_is_eq (sym, sym_escape))
  1095. SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
  1096. if (scm_is_false (port))
  1097. scm_i_set_default_port_conversion_strategy (sym);
  1098. else
  1099. {
  1100. SCM_VALIDATE_OPPORT (1, port);
  1101. SCM_PORT (port)->conversion_strategy = sym;
  1102. }
  1103. return SCM_UNSPECIFIED;
  1104. }
  1105. #undef FUNC_NAME
  1106. /* Non-blocking I/O. */
  1107. static int
  1108. port_read_wait_fd (SCM port)
  1109. {
  1110. scm_t_port_type *ptob = SCM_PORT_TYPE (port);
  1111. return ptob->read_wait_fd (port);
  1112. }
  1113. static int
  1114. port_write_wait_fd (SCM port)
  1115. {
  1116. scm_t_port_type *ptob = SCM_PORT_TYPE (port);
  1117. return ptob->write_wait_fd (port);
  1118. }
  1119. SCM_INTERNAL SCM scm_port_read_wait_fd (SCM);
  1120. SCM_DEFINE (scm_port_read_wait_fd, "port-read-wait-fd", 1, 0, 0,
  1121. (SCM port), "")
  1122. #define FUNC_NAME s_scm_port_read_wait_fd
  1123. {
  1124. int fd;
  1125. port = SCM_COERCE_OUTPORT (port);
  1126. SCM_VALIDATE_OPINPORT (1, port);
  1127. fd = port_read_wait_fd (port);
  1128. return fd < 0 ? SCM_BOOL_F : scm_from_int (fd);
  1129. }
  1130. #undef FUNC_NAME
  1131. SCM_INTERNAL SCM scm_port_write_wait_fd (SCM);
  1132. SCM_DEFINE (scm_port_write_wait_fd, "port-write-wait-fd", 1, 0, 0,
  1133. (SCM port), "")
  1134. #define FUNC_NAME s_scm_port_write_wait_fd
  1135. {
  1136. int fd;
  1137. port = SCM_COERCE_OUTPORT (port);
  1138. SCM_VALIDATE_OPOUTPORT (1, port);
  1139. fd = port_write_wait_fd (port);
  1140. return fd < 0 ? SCM_BOOL_F : scm_from_int (fd);
  1141. }
  1142. #undef FUNC_NAME
  1143. /* Call while having acquired the port. */
  1144. static int
  1145. port_poll (SCM port, short events, int timeout)
  1146. #define FUNC_NAME "port-poll"
  1147. {
  1148. struct pollfd pollfd[2];
  1149. int nfds = 0, rv = 0;
  1150. if (events & POLLIN)
  1151. {
  1152. pollfd[nfds].fd = port_read_wait_fd (port);
  1153. pollfd[nfds].events = events & (POLLIN | POLLPRI);
  1154. pollfd[nfds].revents = 0;
  1155. nfds++;
  1156. }
  1157. if (events & POLLOUT)
  1158. {
  1159. pollfd[nfds].fd = port_write_wait_fd (port);
  1160. pollfd[nfds].events = events & (POLLOUT | POLLPRI);
  1161. pollfd[nfds].revents = 0;
  1162. nfds++;
  1163. }
  1164. if (nfds == 2 && pollfd[0].fd == pollfd[1].fd)
  1165. {
  1166. pollfd[0].events |= pollfd[1].events;
  1167. nfds--;
  1168. }
  1169. SCM_SYSCALL (rv = poll (pollfd, nfds, timeout));
  1170. if (rv < 0)
  1171. SCM_SYSERROR;
  1172. return rv;
  1173. }
  1174. #undef FUNC_NAME
  1175. SCM_INTERNAL SCM scm_port_poll (SCM, SCM, SCM);
  1176. SCM_DEFINE (scm_port_poll, "port-poll", 2, 1, 0,
  1177. (SCM port, SCM events, SCM timeout),
  1178. "")
  1179. #define FUNC_NAME s_scm_port_poll
  1180. {
  1181. short c_events = 0;
  1182. int c_timeout;
  1183. SCM ret;
  1184. port = SCM_COERCE_OUTPORT (port);
  1185. SCM_VALIDATE_PORT (1, port);
  1186. SCM_VALIDATE_STRING (2, events);
  1187. c_timeout = SCM_UNBNDP (timeout) ? -1 : SCM_NUM2INT (3, timeout);
  1188. if (scm_i_string_contains_char (events, 'r'))
  1189. c_events |= POLLIN;
  1190. if (scm_i_string_contains_char (events, '!'))
  1191. c_events |= POLLPRI;
  1192. if (scm_i_string_contains_char (events, 'w'))
  1193. c_events |= POLLOUT;
  1194. scm_dynwind_begin (0);
  1195. scm_dynwind_acquire_port (port);
  1196. ret = scm_from_int (port_poll (port, c_events, c_timeout));
  1197. scm_dynwind_end ();
  1198. return ret;
  1199. }
  1200. #undef FUNC_NAME
  1201. /* Input. */
  1202. static int
  1203. get_byte_or_eof (SCM port)
  1204. {
  1205. SCM buf = SCM_PORT (port)->read_buf;
  1206. SCM buf_bv, buf_cur, buf_end;
  1207. size_t cur, avail;
  1208. buf_bv = scm_port_buffer_bytevector (buf);
  1209. buf_cur = scm_port_buffer_cur (buf);
  1210. buf_end = scm_port_buffer_end (buf);
  1211. cur = SCM_I_INUM (buf_cur);
  1212. if (SCM_LIKELY (SCM_I_INUMP (buf_cur))
  1213. && SCM_LIKELY (SCM_I_INUMP (buf_end))
  1214. && SCM_LIKELY (cur < SCM_I_INUM (buf_end))
  1215. && SCM_LIKELY (cur < SCM_BYTEVECTOR_LENGTH (buf_bv)))
  1216. {
  1217. uint8_t ret = SCM_BYTEVECTOR_CONTENTS (buf_bv)[cur];
  1218. scm_port_buffer_set_cur (buf, SCM_I_MAKINUM (cur + 1));
  1219. return ret;
  1220. }
  1221. buf = scm_fill_input (port, 0, &cur, &avail);
  1222. buf_bv = scm_port_buffer_bytevector (buf);
  1223. if (avail > 0)
  1224. {
  1225. uint8_t ret = SCM_BYTEVECTOR_CONTENTS (buf_bv)[cur];
  1226. scm_port_buffer_set_cur (buf, SCM_I_MAKINUM (cur + 1));
  1227. return ret;
  1228. }
  1229. /* The next peek or get should cause the read() function to be called
  1230. to see if we still have EOF. */
  1231. scm_port_buffer_set_has_eof_p (buf, SCM_BOOL_F);
  1232. return EOF;
  1233. }
  1234. /* Like `scm_get_byte_or_eof' but does not change PORT's `read_pos'. */
  1235. static int
  1236. peek_byte_or_eof (SCM port, SCM *buf_out, size_t *cur_out)
  1237. {
  1238. SCM buf = SCM_PORT (port)->read_buf;
  1239. SCM buf_bv, buf_cur, buf_end;
  1240. size_t cur, avail;
  1241. buf_bv = scm_port_buffer_bytevector (buf);
  1242. buf_cur = scm_port_buffer_cur (buf);
  1243. buf_end = scm_port_buffer_end (buf);
  1244. cur = scm_to_size_t (buf_cur);
  1245. if (SCM_LIKELY (SCM_I_INUMP (buf_cur))
  1246. && SCM_LIKELY (SCM_I_INUMP (buf_end))
  1247. && SCM_LIKELY (cur < SCM_I_INUM (buf_end))
  1248. && SCM_LIKELY (cur < SCM_BYTEVECTOR_LENGTH (buf_bv)))
  1249. {
  1250. uint8_t ret = SCM_BYTEVECTOR_CONTENTS (buf_bv)[cur];
  1251. *buf_out = buf;
  1252. *cur_out = cur;
  1253. return ret;
  1254. }
  1255. buf = scm_fill_input (port, 0, &cur, &avail);
  1256. buf_bv = scm_port_buffer_bytevector (buf);
  1257. *buf_out = buf;
  1258. *cur_out = cur;
  1259. if (avail > 0)
  1260. {
  1261. uint8_t ret = SCM_BYTEVECTOR_CONTENTS (buf_bv)[cur];
  1262. return ret;
  1263. }
  1264. return EOF;
  1265. }
  1266. int
  1267. scm_get_byte_or_eof (SCM port)
  1268. {
  1269. return get_byte_or_eof (port);
  1270. }
  1271. int
  1272. scm_peek_byte_or_eof (SCM port)
  1273. {
  1274. SCM buf;
  1275. size_t cur;
  1276. return peek_byte_or_eof (port, &buf, &cur);
  1277. }
  1278. static size_t
  1279. scm_i_read_bytes (SCM port, SCM dst, size_t start, size_t count)
  1280. {
  1281. size_t filled;
  1282. scm_t_port_type *ptob = SCM_PORT_TYPE (port);
  1283. assert (count <= SCM_BYTEVECTOR_LENGTH (dst));
  1284. assert (start + count <= SCM_BYTEVECTOR_LENGTH (dst));
  1285. scm_dynwind_begin (0);
  1286. scm_dynwind_acquire_port (port);
  1287. retry:
  1288. filled = ptob->c_read (port, dst, start, count);
  1289. if (filled == (size_t) -1)
  1290. {
  1291. port_poll (port, POLLIN, -1);
  1292. goto retry;
  1293. }
  1294. scm_dynwind_end ();
  1295. assert (filled <= count);
  1296. return filled;
  1297. }
  1298. /* In text mode, we will slurp a BOM from the beginning of a UTF-8,
  1299. UTF-16, or UTF-32 stream, and write one at the beginning of a UTF-16
  1300. or UTF-32 stream. In binary mode, we won't. The mode depends on the
  1301. caller. */
  1302. enum bom_io_mode { BOM_IO_TEXT, BOM_IO_BINARY };
  1303. static size_t port_clear_stream_start_for_bom_read (SCM, enum bom_io_mode);
  1304. /* Used by an application to read arbitrary number of bytes from an SCM
  1305. port. Same semantics as libc read, except that scm_c_read_bytes only
  1306. returns less than SIZE bytes if at end-of-file.
  1307. Warning: Doesn't update port line and column counts! */
  1308. size_t
  1309. scm_c_read_bytes (SCM port, SCM dst, size_t start, size_t count)
  1310. #define FUNC_NAME "scm_c_read_bytes"
  1311. {
  1312. size_t to_read = count;
  1313. scm_t_port *pt;
  1314. SCM read_buf;
  1315. uint8_t *dst_ptr = (uint8_t *) SCM_BYTEVECTOR_CONTENTS (dst) + start;
  1316. SCM_VALIDATE_OPINPORT (1, port);
  1317. pt = SCM_PORT (port);
  1318. read_buf = pt->read_buf;
  1319. if (pt->rw_random)
  1320. scm_flush (port);
  1321. port_clear_stream_start_for_bom_read (port, BOM_IO_BINARY);
  1322. /* Take bytes first from the port's read buffer. */
  1323. {
  1324. size_t cur, avail, did_read;
  1325. avail = scm_port_buffer_can_take (read_buf, &cur);
  1326. did_read = scm_port_buffer_take (read_buf, dst_ptr, to_read, cur, avail);
  1327. dst_ptr += did_read;
  1328. to_read -= did_read;
  1329. }
  1330. while (to_read)
  1331. {
  1332. size_t did_read;
  1333. /* If the read is smaller than the buffering on the read side of
  1334. this port, then go through the buffer. Otherwise fill our
  1335. buffer directly. */
  1336. if (to_read < pt->read_buffering)
  1337. {
  1338. size_t cur, avail;
  1339. read_buf = scm_fill_input (port, 0, &cur, &avail);
  1340. did_read = scm_port_buffer_take (read_buf, dst_ptr, to_read,
  1341. cur, avail);
  1342. dst_ptr += did_read;
  1343. to_read -= did_read;
  1344. if (did_read == 0)
  1345. {
  1346. /* Consider that we've read off this EOF. */
  1347. scm_port_buffer_set_has_eof_p (read_buf, SCM_BOOL_F);
  1348. break;
  1349. }
  1350. }
  1351. else
  1352. {
  1353. did_read = scm_i_read_bytes (port, dst,
  1354. start + count - to_read,
  1355. to_read);
  1356. to_read -= did_read;
  1357. dst_ptr += did_read;
  1358. if (did_read == 0)
  1359. break;
  1360. }
  1361. }
  1362. return count - to_read;
  1363. }
  1364. #undef FUNC_NAME
  1365. /* Like scm_c_read_bytes, but always proxies reads through the port's
  1366. read buffer. Used by an application when it wants to read into a
  1367. memory chunk that's not owned by Guile's GC. */
  1368. size_t
  1369. scm_c_read (SCM port, void *buffer, size_t size)
  1370. #define FUNC_NAME "scm_c_read"
  1371. {
  1372. size_t copied = 0;
  1373. scm_t_port *pt;
  1374. SCM read_buf;
  1375. uint8_t *dst = buffer;
  1376. SCM_VALIDATE_OPINPORT (1, port);
  1377. pt = SCM_PORT (port);
  1378. read_buf = pt->read_buf;
  1379. if (pt->rw_random)
  1380. scm_flush (port);
  1381. while (copied < size)
  1382. {
  1383. size_t cur, avail, count;
  1384. read_buf = scm_fill_input (port, 0, &cur, &avail);
  1385. count = scm_port_buffer_take (read_buf, dst + copied, size - copied,
  1386. cur, avail);
  1387. copied += count;
  1388. if (count == 0)
  1389. {
  1390. /* Consider that we've read off this EOF. */
  1391. scm_port_buffer_set_has_eof_p (read_buf, SCM_BOOL_F);
  1392. break;
  1393. }
  1394. }
  1395. return copied;
  1396. }
  1397. #undef FUNC_NAME
  1398. /* Update the line and column number of PORT after consumption of C. */
  1399. static inline void
  1400. update_port_position (SCM position, scm_t_wchar c)
  1401. {
  1402. int column = scm_to_int (scm_port_position_column (position));
  1403. switch (c)
  1404. {
  1405. case '\a':
  1406. case EOF:
  1407. break;
  1408. case '\b':
  1409. if (column > 0)
  1410. scm_port_position_set_column (position, scm_from_int (column - 1));
  1411. break;
  1412. case '\n':
  1413. {
  1414. long line = scm_to_long (scm_port_position_line (position));
  1415. scm_port_position_set_line (position, scm_from_long (line + 1));
  1416. scm_port_position_set_column (position, SCM_INUM0);
  1417. }
  1418. break;
  1419. case '\r':
  1420. scm_port_position_set_column (position, SCM_INUM0);
  1421. break;
  1422. case '\t':
  1423. scm_port_position_set_column (position,
  1424. scm_from_int (column + 8 - column % 8));
  1425. break;
  1426. default:
  1427. scm_port_position_set_column (position, scm_from_int (column + 1));
  1428. break;
  1429. }
  1430. }
  1431. /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
  1432. UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
  1433. static scm_t_wchar
  1434. utf8_to_codepoint (const uint8_t *utf8_buf, size_t size)
  1435. {
  1436. scm_t_wchar codepoint;
  1437. if (utf8_buf[0] <= 0x7f)
  1438. {
  1439. assert (size >= 1);
  1440. codepoint = utf8_buf[0];
  1441. }
  1442. else if ((utf8_buf[0] & 0xe0) == 0xc0)
  1443. {
  1444. assert (size >= 2);
  1445. codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
  1446. | (utf8_buf[1] & 0x3f);
  1447. }
  1448. else if ((utf8_buf[0] & 0xf0) == 0xe0)
  1449. {
  1450. assert (size >= 3);
  1451. codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
  1452. | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
  1453. | (utf8_buf[2] & 0x3f);
  1454. }
  1455. else
  1456. {
  1457. assert (size >= 4);
  1458. codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
  1459. | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
  1460. | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
  1461. | (utf8_buf[3] & 0x3f);
  1462. }
  1463. return codepoint;
  1464. }
  1465. /* Peek a UTF-8 sequence from PORT. On success, return the codepoint
  1466. that was read, and set *LEN to the length in bytes. If there was a
  1467. decoding error and the port conversion strategy was `substitute',
  1468. then return #\? and set *LEN to the length of the shortest prefix
  1469. that cannot begin a valid UTF-8 sequence. Otherwise signal an
  1470. error. */
  1471. static scm_t_wchar
  1472. peek_utf8_codepoint (SCM port, SCM *buf_out, size_t *cur_out, size_t *len_out)
  1473. {
  1474. #define DECODING_ERROR(bytes) \
  1475. do { *buf_out = buf; *cur_out = cur; *len_out = bytes; goto decoding_error; } while (0)
  1476. #define RETURN(bytes, codepoint) \
  1477. do { *buf_out = buf; *cur_out = cur; *len_out = bytes; return codepoint; } while (0)
  1478. SCM buf;
  1479. size_t cur, avail;
  1480. int first_byte;
  1481. const uint8_t *ptr;
  1482. first_byte = peek_byte_or_eof (port, &buf, &cur);
  1483. if (first_byte == EOF)
  1484. RETURN (0, EOF);
  1485. else if (first_byte < 0x80)
  1486. RETURN (1, first_byte);
  1487. else if (first_byte >= 0xc2 && first_byte <= 0xdf)
  1488. {
  1489. buf = scm_fill_input (port, 2, &cur, &avail);
  1490. ptr = scm_port_buffer_take_pointer (buf, cur);
  1491. if (avail < 2 || (ptr[1] & 0xc0) != 0x80)
  1492. DECODING_ERROR (1);
  1493. RETURN (2, (first_byte & 0x1f) << 6UL | (ptr[1] & 0x3f));
  1494. }
  1495. else if ((first_byte & 0xf0) == 0xe0)
  1496. {
  1497. buf = scm_fill_input (port, 3, &cur, &avail);
  1498. ptr = scm_port_buffer_take_pointer (buf, cur);
  1499. if (avail < 2 || (ptr[1] & 0xc0) != 0x80
  1500. || (ptr[0] == 0xe0 && ptr[1] < 0xa0)
  1501. || (ptr[0] == 0xed && ptr[1] > 0x9f))
  1502. DECODING_ERROR (1);
  1503. if (avail < 3 || (ptr[2] & 0xc0) != 0x80)
  1504. DECODING_ERROR (2);
  1505. RETURN (3,
  1506. ((scm_t_wchar) ptr[0] & 0x0f) << 12UL
  1507. | ((scm_t_wchar) ptr[1] & 0x3f) << 6UL
  1508. | (ptr[2] & 0x3f));
  1509. }
  1510. else if (first_byte >= 0xf0 && first_byte <= 0xf4)
  1511. {
  1512. buf = scm_fill_input (port, 4, &cur, &avail);
  1513. ptr = scm_port_buffer_take_pointer (buf, cur);
  1514. if (avail < 2 || (ptr[1] & 0xc0) != 0x80
  1515. || (ptr[0] == 0xf0 && ptr[1] < 0x90)
  1516. || (ptr[0] == 0xf4 && ptr[1] > 0x8f))
  1517. DECODING_ERROR (1);
  1518. if (avail < 3 || (ptr[2] & 0xc0) != 0x80)
  1519. DECODING_ERROR (2);
  1520. if (avail < 4 || (ptr[3] & 0xc0) != 0x80)
  1521. DECODING_ERROR (3);
  1522. RETURN (4,
  1523. ((scm_t_wchar) ptr[0] & 0x07) << 18UL
  1524. | ((scm_t_wchar) ptr[1] & 0x3f) << 12UL
  1525. | ((scm_t_wchar) ptr[2] & 0x3f) << 6UL
  1526. | (ptr[3] & 0x3f));
  1527. }
  1528. else
  1529. DECODING_ERROR (1);
  1530. decoding_error:
  1531. if (scm_is_eq (SCM_PORT (port)->conversion_strategy, sym_substitute))
  1532. /* *len already set. */
  1533. return UNICODE_REPLACEMENT_CHARACTER;
  1534. scm_decoding_error ("peek-char", EILSEQ, "input decoding error", port);
  1535. /* Not reached. */
  1536. return 0;
  1537. #undef DECODING_ERROR
  1538. #undef RETURN
  1539. }
  1540. /* Peek an ISO-8859-1 codepoint (a byte) from PORT. On success, return
  1541. the codepoint, and set *LEN to 1. Otherwise on EOF set *LEN to 0. */
  1542. static scm_t_wchar
  1543. peek_latin1_codepoint (SCM port, SCM *buf, size_t *cur, size_t *len)
  1544. {
  1545. scm_t_wchar ret = peek_byte_or_eof (port, buf, cur);
  1546. *len = ret == EOF ? 0 : 1;
  1547. return ret;
  1548. }
  1549. SCM_INTERNAL SCM scm_port_decode_char (SCM, SCM, SCM, SCM);
  1550. SCM_DEFINE (scm_port_decode_char, "port-decode-char", 4, 0, 0,
  1551. (SCM port, SCM bv, SCM start, SCM count),
  1552. "")
  1553. #define FUNC_NAME s_scm_port_decode_char
  1554. {
  1555. char *input, *output;
  1556. uint8_t utf8_buf[UTF8_BUFFER_SIZE];
  1557. iconv_t input_cd;
  1558. size_t c_start, c_count;
  1559. size_t input_left, output_left, done;
  1560. SCM_VALIDATE_OPINPORT (1, port);
  1561. SCM_VALIDATE_BYTEVECTOR (2, bv);
  1562. c_start = scm_to_size_t (start);
  1563. c_count = scm_to_size_t (count);
  1564. SCM_ASSERT_RANGE (3, start, c_start <= SCM_BYTEVECTOR_LENGTH (bv));
  1565. SCM_ASSERT_RANGE (4, count, c_count <= SCM_BYTEVECTOR_LENGTH (bv) - c_start);
  1566. input = (char *) SCM_BYTEVECTOR_CONTENTS (bv) + c_start;
  1567. input_left = c_count;
  1568. output = (char *) utf8_buf;
  1569. output_left = sizeof (utf8_buf);
  1570. /* FIXME: locking! */
  1571. scm_port_acquire_iconv_descriptors (port, &input_cd, NULL);
  1572. done = iconv (input_cd, &input, &input_left, &output, &output_left);
  1573. scm_port_release_iconv_descriptors (port);
  1574. if (done == (size_t) -1)
  1575. {
  1576. int err = errno;
  1577. if (err == EINVAL)
  1578. /* The input byte sequence did not form a complete
  1579. character. Read another byte and try again. */
  1580. return SCM_BOOL_F;
  1581. else if (scm_is_eq (SCM_PORT (port)->conversion_strategy,
  1582. sym_substitute))
  1583. return SCM_MAKE_CHAR (UNICODE_REPLACEMENT_CHARACTER);
  1584. else
  1585. scm_decoding_error ("decode-char", err, "input decoding error", port);
  1586. }
  1587. {
  1588. size_t output_size = sizeof (utf8_buf) - output_left;
  1589. if (output_size == 0)
  1590. /* iconv consumed some bytes without producing any output.
  1591. Most likely this means that a Unicode byte-order mark
  1592. (BOM) was consumed. In any case, keep going until we get
  1593. output. */
  1594. return SCM_BOOL_F;
  1595. return scm_c_make_char (utf8_to_codepoint (utf8_buf, output_size));
  1596. }
  1597. }
  1598. #undef FUNC_NAME
  1599. /* Peek a codepoint from PORT, decoding it through iconv. On success,
  1600. return the codepoint and set *LEN to the length in bytes. If there
  1601. was a decoding error and the port conversion strategy was
  1602. `substitute', then return #\? and set *LEN to the length of the
  1603. shortest prefix that cannot begin a valid UTF-8 sequence. Otherwise
  1604. signal an error. */
  1605. static scm_t_wchar
  1606. peek_iconv_codepoint (SCM port, SCM *buf, size_t *cur, size_t *len)
  1607. {
  1608. size_t input_size = 0;
  1609. SCM maybe_char = SCM_BOOL_F;
  1610. while (scm_is_false (maybe_char))
  1611. {
  1612. size_t avail;
  1613. *buf = scm_fill_input (port, input_size + 1, cur, &avail);
  1614. if (avail <= input_size)
  1615. {
  1616. *len = input_size;
  1617. if (input_size == 0)
  1618. /* Normal EOF. */
  1619. return EOF;
  1620. /* EOF found in the middle of a multibyte character. */
  1621. if (scm_is_eq (SCM_PORT (port)->conversion_strategy,
  1622. sym_substitute))
  1623. return UNICODE_REPLACEMENT_CHARACTER;
  1624. scm_decoding_error ("peek-char", EILSEQ,
  1625. "input decoding error", port);
  1626. /* Not reached. */
  1627. return 0;
  1628. }
  1629. input_size++;
  1630. maybe_char = scm_port_decode_char (port,
  1631. scm_port_buffer_bytevector (*buf),
  1632. SCM_I_MAKINUM (*cur),
  1633. SCM_I_MAKINUM (input_size));
  1634. }
  1635. *len = input_size;
  1636. return SCM_CHAR (maybe_char);
  1637. }
  1638. /* Peek a codepoint from PORT and return it in *CODEPOINT. Set *LEN to
  1639. the length in bytes of that representation. Return 0 on success and
  1640. an errno value on error. */
  1641. static SCM_C_INLINE scm_t_wchar
  1642. peek_codepoint (SCM port, SCM *buf, size_t *cur, size_t *len)
  1643. {
  1644. SCM encoding = SCM_PORT (port)->encoding;
  1645. if (scm_is_eq (encoding, sym_UTF_8))
  1646. return peek_utf8_codepoint (port, buf, cur, len);
  1647. else if (scm_is_eq (encoding, sym_ISO_8859_1))
  1648. return peek_latin1_codepoint (port, buf, cur, len);
  1649. else
  1650. return peek_iconv_codepoint (port, buf, cur, len);
  1651. }
  1652. /* Read a codepoint from PORT and return it. */
  1653. scm_t_wchar
  1654. scm_getc (SCM port)
  1655. #define FUNC_NAME "scm_getc"
  1656. {
  1657. size_t len = 0;
  1658. size_t cur;
  1659. SCM buf;
  1660. scm_t_wchar codepoint;
  1661. codepoint = peek_codepoint (port, &buf, &cur, &len);
  1662. scm_port_buffer_did_take (buf, cur, len);
  1663. if (codepoint == EOF)
  1664. scm_i_clear_pending_eof (port);
  1665. update_port_position (SCM_PORT (port)->position, codepoint);
  1666. return codepoint;
  1667. }
  1668. #undef FUNC_NAME
  1669. SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
  1670. (SCM port),
  1671. "Return the next character available from @var{port}, updating\n"
  1672. "@var{port} to point to the following character. If no more\n"
  1673. "characters are available, the end-of-file object is returned.\n"
  1674. "\n"
  1675. "When @var{port}'s data cannot be decoded according to its\n"
  1676. "character encoding, a @code{decoding-error} is raised and\n"
  1677. "@var{port} points past the erroneous byte sequence.\n")
  1678. #define FUNC_NAME s_scm_read_char
  1679. {
  1680. scm_t_wchar c;
  1681. if (SCM_UNBNDP (port))
  1682. port = scm_current_input_port ();
  1683. SCM_VALIDATE_OPINPORT (1, port);
  1684. c = scm_getc (port);
  1685. if (EOF == c)
  1686. return SCM_EOF_VAL;
  1687. return SCM_MAKE_CHAR (c);
  1688. }
  1689. #undef FUNC_NAME
  1690. /* Pushback. */
  1691. void
  1692. scm_unget_bytes (const uint8_t *buf, size_t len, SCM port)
  1693. #define FUNC_NAME "scm_unget_bytes"
  1694. {
  1695. scm_t_port *pt = SCM_PORT (port);
  1696. SCM read_buf = pt->read_buf;
  1697. size_t cur;
  1698. if (pt->rw_random)
  1699. scm_flush (port);
  1700. cur = scm_port_buffer_can_putback (read_buf);
  1701. if (cur < len)
  1702. {
  1703. /* The bytes don't fit directly in the read_buf. */
  1704. size_t buffered, size;
  1705. buffered = scm_port_buffer_can_take (read_buf, &cur);
  1706. size = scm_port_buffer_size (read_buf);
  1707. if (len <= size - buffered)
  1708. {
  1709. /* But they would fit if we shift the not-yet-read bytes from
  1710. the read_buf right. Let's do that. */
  1711. const uint8_t *to_shift = scm_port_buffer_take_pointer (read_buf, cur);
  1712. scm_port_buffer_reset_end (read_buf);
  1713. scm_port_buffer_putback (read_buf, to_shift, buffered, size);
  1714. }
  1715. else
  1716. {
  1717. /* Bah, have to expand the read_buf for the putback. */
  1718. while (size < len + buffered)
  1719. size *= 2;
  1720. read_buf = scm_expand_port_read_buffer_x (port,
  1721. scm_from_size_t (size),
  1722. SCM_BOOL_T);
  1723. }
  1724. cur = size - buffered;
  1725. }
  1726. scm_port_buffer_putback (read_buf, buf, len, cur);
  1727. }
  1728. #undef FUNC_NAME
  1729. void
  1730. scm_unget_byte (int c, SCM port)
  1731. {
  1732. unsigned char byte = c;
  1733. scm_unget_bytes (&byte, 1, port);
  1734. }
  1735. void
  1736. scm_ungetc (scm_t_wchar c, SCM port)
  1737. #define FUNC_NAME "scm_ungetc"
  1738. {
  1739. scm_t_port *pt = SCM_PORT (port);
  1740. char *result;
  1741. char result_buf[10];
  1742. size_t len;
  1743. len = sizeof (result_buf);
  1744. if (scm_is_eq (pt->encoding, sym_UTF_8))
  1745. {
  1746. if (c < 0x80)
  1747. {
  1748. result_buf[0] = (char) c;
  1749. result = result_buf;
  1750. len = 1;
  1751. }
  1752. else
  1753. result =
  1754. (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
  1755. }
  1756. else if (scm_is_eq (pt->encoding, sym_ISO_8859_1) && c <= 0xff)
  1757. {
  1758. result_buf[0] = (char) c;
  1759. result = result_buf;
  1760. len = 1;
  1761. }
  1762. else
  1763. {
  1764. scm_t_string_failed_conversion_handler handler =
  1765. scm_i_string_failed_conversion_handler (pt->conversion_strategy);
  1766. result = u32_conv_to_encoding (scm_i_symbol_chars (pt->encoding),
  1767. (enum iconv_ilseq_handler) handler,
  1768. (uint32_t *) &c, 1, NULL,
  1769. result_buf, &len);
  1770. }
  1771. if (SCM_UNLIKELY (result == NULL || len == 0))
  1772. scm_encoding_error (FUNC_NAME, errno,
  1773. "conversion to port encoding failed",
  1774. port, SCM_MAKE_CHAR (c));
  1775. scm_unget_bytes ((unsigned char *) result, len, port);
  1776. if (SCM_UNLIKELY (result != result_buf))
  1777. free (result);
  1778. {
  1779. long line;
  1780. int column;
  1781. line = scm_to_long (scm_port_position_line (pt->position));
  1782. column = scm_to_int (scm_port_position_column (pt->position));
  1783. if (c == '\n')
  1784. scm_port_position_set_line (pt->position, scm_from_long (line - 1));
  1785. if (column > 0)
  1786. scm_port_position_set_column (pt->position, scm_from_int (column - 1));
  1787. }
  1788. }
  1789. #undef FUNC_NAME
  1790. void
  1791. scm_ungets (const char *s, int n, SCM port)
  1792. {
  1793. /* This is simple minded and inefficient, but unreading strings is
  1794. * probably not a common operation, and remember that line and
  1795. * column numbers have to be handled...
  1796. *
  1797. * Please feel free to write an optimized version!
  1798. */
  1799. while (n--)
  1800. scm_ungetc (s[n], port);
  1801. }
  1802. SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
  1803. (SCM port),
  1804. "Return the next character available from @var{port},\n"
  1805. "@emph{without} updating @var{port} to point to the following\n"
  1806. "character. If no more characters are available, the\n"
  1807. "end-of-file object is returned.\n"
  1808. "\n"
  1809. "The value returned by\n"
  1810. "a call to @code{peek-char} is the same as the value that would\n"
  1811. "have been returned by a call to @code{read-char} on the same\n"
  1812. "port. The only difference is that the very next call to\n"
  1813. "@code{read-char} or @code{peek-char} on that @var{port} will\n"
  1814. "return the value returned by the preceding call to\n"
  1815. "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
  1816. "an interactive port will hang waiting for input whenever a call\n"
  1817. "to @code{read-char} would have hung.\n"
  1818. "\n"
  1819. "As for @code{read-char}, a @code{decoding-error} may be raised\n"
  1820. "if such a situation occurs. However, unlike with @code{read-char},\n"
  1821. "@var{port} still points at the beginning of the erroneous byte\n"
  1822. "sequence when the error is raised.\n")
  1823. #define FUNC_NAME s_scm_peek_char
  1824. {
  1825. SCM buf;
  1826. scm_t_wchar c;
  1827. size_t cur, len = 0;
  1828. if (SCM_UNBNDP (port))
  1829. port = scm_current_input_port ();
  1830. SCM_VALIDATE_OPINPORT (1, port);
  1831. c = peek_codepoint (port, &buf, &cur, &len);
  1832. return c == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (c);
  1833. }
  1834. #undef FUNC_NAME
  1835. SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
  1836. (SCM cobj, SCM port),
  1837. "Place character @var{cobj} in @var{port} so that it will be\n"
  1838. "read by the next read operation. If called multiple times, the\n"
  1839. "unread characters will be read again in last-in first-out\n"
  1840. "order. If @var{port} is not supplied, the current input port\n"
  1841. "is used.")
  1842. #define FUNC_NAME s_scm_unread_char
  1843. {
  1844. int c;
  1845. SCM_VALIDATE_CHAR (1, cobj);
  1846. if (SCM_UNBNDP (port))
  1847. port = scm_current_input_port ();
  1848. SCM_VALIDATE_OPINPORT (2, port);
  1849. c = SCM_CHAR (cobj);
  1850. scm_ungetc (c, port);
  1851. return cobj;
  1852. }
  1853. #undef FUNC_NAME
  1854. SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
  1855. (SCM str, SCM port),
  1856. "Place the string @var{str} in @var{port} so that its characters will be\n"
  1857. "read in subsequent read operations. If called multiple times, the\n"
  1858. "unread characters will be read again in last-in first-out order. If\n"
  1859. "@var{port} is not supplied, the current-input-port is used.")
  1860. #define FUNC_NAME s_scm_unread_string
  1861. {
  1862. size_t n;
  1863. SCM_VALIDATE_STRING (1, str);
  1864. if (SCM_UNBNDP (port))
  1865. port = scm_current_input_port ();
  1866. SCM_VALIDATE_OPINPORT (2, port);
  1867. n = scm_i_string_length (str);
  1868. while (n--)
  1869. scm_ungetc (scm_i_string_ref (str, n), port);
  1870. return str;
  1871. }
  1872. #undef FUNC_NAME
  1873. /* Manipulating the buffers. */
  1874. SCM_SYMBOL (sym_none, "none");
  1875. SCM_SYMBOL (sym_line, "line");
  1876. SCM_SYMBOL (sym_block, "block");
  1877. SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0,
  1878. (SCM port, SCM mode, SCM size),
  1879. "Set the buffering mode for @var{port}. @var{mode} can be one\n"
  1880. "of the following symbols:\n"
  1881. "@table @code\n"
  1882. "@item none\n"
  1883. "no buffering\n"
  1884. "@item line\n"
  1885. "line buffering\n"
  1886. "@item block\n"
  1887. "block buffering, using a newly allocated buffer of @var{size} bytes.\n"
  1888. "If @var{size} is omitted, a default size will be used.\n"
  1889. "@end table\n\n"
  1890. "Only certain types of ports are supported, most importantly\n"
  1891. "file ports.")
  1892. #define FUNC_NAME s_scm_setvbuf
  1893. {
  1894. long csize;
  1895. scm_t_port *pt;
  1896. scm_t_port_type *ptob;
  1897. scm_t_bits tag_word;
  1898. size_t read_buf_size, write_buf_size, cur, avail;
  1899. SCM saved_read_buf;
  1900. port = SCM_COERCE_OUTPORT (port);
  1901. SCM_VALIDATE_OPENPORT (1, port);
  1902. pt = SCM_PORT (port);
  1903. ptob = SCM_PORT_TYPE (port);
  1904. tag_word = SCM_CELL_WORD_0 (port) & ~(SCM_BUF0 | SCM_BUFLINE);
  1905. if (scm_is_eq (mode, sym_none))
  1906. {
  1907. tag_word |= SCM_BUF0;
  1908. if (!SCM_UNBNDP (size) && !scm_is_eq (size, SCM_INUM0))
  1909. scm_out_of_range (FUNC_NAME, size);
  1910. csize = 0;
  1911. }
  1912. else if (scm_is_eq (mode, sym_line))
  1913. {
  1914. csize = SCM_UNBNDP (size) ? -1 : scm_to_int (size);
  1915. tag_word |= SCM_BUFLINE;
  1916. }
  1917. else if (scm_is_eq (mode, sym_block))
  1918. {
  1919. csize = SCM_UNBNDP (size) ? -1 : scm_to_int (size);
  1920. }
  1921. else
  1922. scm_out_of_range (FUNC_NAME, mode);
  1923. if (!SCM_UNBNDP (size) && csize < 0)
  1924. scm_out_of_range (FUNC_NAME, size);
  1925. if (csize >= 0)
  1926. read_buf_size = write_buf_size = csize;
  1927. else
  1928. {
  1929. read_buf_size = write_buf_size = default_buffer_size;
  1930. scm_dynwind_begin (0);
  1931. scm_dynwind_acquire_port (port);
  1932. if (ptob->get_natural_buffer_sizes)
  1933. ptob->get_natural_buffer_sizes (port, &read_buf_size, &write_buf_size);
  1934. scm_dynwind_end ();
  1935. }
  1936. /* Minimum buffer size is one byte. */
  1937. if (read_buf_size == 0)
  1938. read_buf_size = 1;
  1939. if (write_buf_size == 0)
  1940. write_buf_size = 1;
  1941. if (SCM_OUTPUT_PORT_P (port))
  1942. scm_flush (port);
  1943. saved_read_buf = pt->read_buf;
  1944. SCM_SET_CELL_WORD_0 (port, tag_word);
  1945. pt->read_buffering = read_buf_size;
  1946. pt->read_buf = make_port_buffer (port, read_buf_size);
  1947. pt->write_buf = make_port_buffer (port, write_buf_size);
  1948. avail = scm_port_buffer_can_take (saved_read_buf, &cur);
  1949. scm_unget_bytes (scm_port_buffer_take_pointer (saved_read_buf, cur), avail,
  1950. port);
  1951. scm_port_buffer_set_has_eof_p (pt->read_buf,
  1952. scm_port_buffer_has_eof_p (saved_read_buf));
  1953. return SCM_UNSPECIFIED;
  1954. }
  1955. #undef FUNC_NAME
  1956. /* Move up to READ_LEN bytes from PORT's read buffer into memory
  1957. starting at DEST. Return the number of bytes moved. PORT's
  1958. line/column numbers are left unchanged. */
  1959. size_t
  1960. scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
  1961. {
  1962. SCM read_buf = SCM_PORT (port)->read_buf;
  1963. size_t cur, avail;
  1964. avail = scm_port_buffer_can_take (read_buf, &cur);
  1965. return scm_port_buffer_take (read_buf, (uint8_t *) dest, read_len,
  1966. cur, avail);
  1967. }
  1968. /* Clear a port's read buffers, returning the contents. */
  1969. SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
  1970. (SCM port),
  1971. "This procedure clears a port's input buffers, similar\n"
  1972. "to the way that force-output clears the output buffer. The\n"
  1973. "contents of the buffers are returned as a single string, e.g.,\n"
  1974. "\n"
  1975. "@lisp\n"
  1976. "(define p (open-input-file ...))\n"
  1977. "(drain-input p) => empty string, nothing buffered yet.\n"
  1978. "(unread-char (read-char p) p)\n"
  1979. "(drain-input p) => initial chars from p, up to the buffer size.\n"
  1980. "@end lisp\n\n"
  1981. "Draining the buffers may be useful for cleanly finishing\n"
  1982. "buffered I/O so that the file descriptor can be used directly\n"
  1983. "for further input.")
  1984. #define FUNC_NAME s_scm_drain_input
  1985. {
  1986. SCM read_buf, result;
  1987. size_t avail, cur;
  1988. SCM_VALIDATE_OPINPORT (1, port);
  1989. read_buf = SCM_PORT (port)->read_buf;
  1990. avail = scm_port_buffer_can_take (read_buf, &cur);
  1991. if (avail)
  1992. {
  1993. const uint8_t *ptr = scm_port_buffer_take_pointer (read_buf, cur);
  1994. result = scm_from_port_stringn ((const char *) ptr, avail, port);
  1995. scm_port_buffer_did_take (read_buf, cur, avail);
  1996. }
  1997. else
  1998. result = scm_nullstr;
  1999. return result;
  2000. }
  2001. #undef FUNC_NAME
  2002. void
  2003. scm_end_input (SCM port)
  2004. {
  2005. SCM buf;
  2006. size_t cur, avail;
  2007. scm_t_off offset;
  2008. buf = SCM_PORT (port)->read_buf;
  2009. avail = scm_port_buffer_can_take (buf, &cur);
  2010. scm_port_buffer_did_take (buf, cur, avail);
  2011. offset = - (scm_t_off) avail;
  2012. if (offset != 0)
  2013. {
  2014. scm_dynwind_begin (0);
  2015. scm_dynwind_acquire_port (port);
  2016. SCM_PORT_TYPE (port)->seek (port, offset, SEEK_CUR);
  2017. scm_dynwind_end ();
  2018. }
  2019. }
  2020. SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
  2021. (SCM port),
  2022. "Flush the specified output port, or the current output port if @var{port}\n"
  2023. "is omitted. The current output buffer contents are passed to the\n"
  2024. "underlying port implementation (e.g., in the case of fports, the\n"
  2025. "data will be written to the file and the output buffer will be cleared.)\n"
  2026. "It has no effect on an unbuffered port.\n\n"
  2027. "The return value is unspecified.")
  2028. #define FUNC_NAME s_scm_force_output
  2029. {
  2030. if (SCM_UNBNDP (port))
  2031. port = scm_current_output_port ();
  2032. else
  2033. {
  2034. port = SCM_COERCE_OUTPORT (port);
  2035. SCM_VALIDATE_OPOUTPORT (1, port);
  2036. }
  2037. scm_flush (port);
  2038. return SCM_UNSPECIFIED;
  2039. }
  2040. #undef FUNC_NAME
  2041. static void scm_i_write (SCM port, SCM buf);
  2042. void
  2043. scm_flush (SCM port)
  2044. {
  2045. SCM buf = SCM_PORT (port)->write_buf;
  2046. size_t cur;
  2047. if (scm_port_buffer_can_take (buf, &cur))
  2048. scm_i_write (port, buf);
  2049. }
  2050. /* Return number of bytes consumed, or zero if no BOM was consumed. */
  2051. static size_t
  2052. maybe_consume_bom (SCM port, const unsigned char *bom, size_t bom_len)
  2053. {
  2054. SCM read_buf;
  2055. const uint8_t *buf;
  2056. size_t cur, avail;
  2057. if (peek_byte_or_eof (port, &read_buf, &cur) != bom[0])
  2058. return 0;
  2059. /* Make sure there's enough space in the buffer for a BOM. Now that
  2060. we matched the first byte, we know we're going to have to read this
  2061. many bytes anyway. */
  2062. read_buf = scm_fill_input (port, bom_len, &cur, &avail);
  2063. buf = scm_port_buffer_take_pointer (read_buf, cur);
  2064. if (avail < bom_len)
  2065. return 0;
  2066. if (memcmp (buf, bom, bom_len) != 0)
  2067. return 0;
  2068. scm_port_buffer_did_take (read_buf, cur, bom_len);
  2069. return bom_len;
  2070. }
  2071. static size_t
  2072. port_clear_stream_start_for_bom_read (SCM port, enum bom_io_mode io_mode)
  2073. {
  2074. scm_t_port *pt = SCM_PORT (port);
  2075. if (!pt->at_stream_start_for_bom_read)
  2076. return 0;
  2077. /* Maybe slurp off a byte-order marker. */
  2078. pt->at_stream_start_for_bom_read = 0;
  2079. if (pt->rw_random)
  2080. pt->at_stream_start_for_bom_write = 0;
  2081. if (io_mode == BOM_IO_BINARY)
  2082. return 0;
  2083. if (scm_is_eq (pt->encoding, sym_UTF_8))
  2084. return maybe_consume_bom (port, scm_utf8_bom, sizeof (scm_utf8_bom));
  2085. if (scm_is_eq (pt->encoding, sym_UTF_16))
  2086. {
  2087. if (maybe_consume_bom (port, scm_utf16le_bom, sizeof (scm_utf16le_bom)))
  2088. {
  2089. scm_specialize_port_encoding_x (port, sym_UTF_16LE);
  2090. return 2;
  2091. }
  2092. if (maybe_consume_bom (port, scm_utf16be_bom, sizeof (scm_utf16be_bom)))
  2093. {
  2094. scm_specialize_port_encoding_x (port, sym_UTF_16BE);
  2095. return 2;
  2096. }
  2097. /* Big-endian by default. */
  2098. scm_specialize_port_encoding_x (port, sym_UTF_16BE);
  2099. return 0;
  2100. }
  2101. if (scm_is_eq (pt->encoding, sym_UTF_32))
  2102. {
  2103. if (maybe_consume_bom (port, scm_utf32le_bom, sizeof (scm_utf32le_bom)))
  2104. {
  2105. /* Big-endian by default. */
  2106. scm_specialize_port_encoding_x (port, sym_UTF_32LE);
  2107. return 4;
  2108. }
  2109. if (maybe_consume_bom (port, scm_utf32be_bom, sizeof (scm_utf32be_bom)))
  2110. {
  2111. scm_specialize_port_encoding_x (port, sym_UTF_32BE);
  2112. return 4;
  2113. }
  2114. /* Big-endian by default. */
  2115. scm_specialize_port_encoding_x (port, sym_UTF_32BE);
  2116. return 0;
  2117. }
  2118. return 0;
  2119. }
  2120. SCM_INTERNAL SCM scm_port_clear_stream_start_for_bom_read (SCM port);
  2121. SCM_DEFINE (scm_port_clear_stream_start_for_bom_read,
  2122. "port-clear-stream-start-for-bom-read", 1, 0, 0,
  2123. (SCM port),
  2124. "")
  2125. #define FUNC_NAME s_scm_port_clear_stream_start_for_bom_read
  2126. {
  2127. scm_t_port *pt;
  2128. SCM_VALIDATE_PORT (1, port);
  2129. pt = SCM_PORT (port);
  2130. if (!pt->at_stream_start_for_bom_read)
  2131. return SCM_BOOL_F;
  2132. /* Maybe slurp off a byte-order marker. */
  2133. pt->at_stream_start_for_bom_read = 0;
  2134. if (pt->rw_random)
  2135. pt->at_stream_start_for_bom_write = 0;
  2136. return SCM_BOOL_T;
  2137. }
  2138. #undef FUNC_NAME
  2139. SCM_INTERNAL SCM scm_port_clear_stream_start_for_bom_write (SCM, SCM);
  2140. SCM_DEFINE (scm_port_clear_stream_start_for_bom_write,
  2141. "port-clear-stream-start-for-bom-write", 1, 1, 0,
  2142. (SCM port, SCM buf),
  2143. "")
  2144. #define FUNC_NAME s_scm_port_clear_stream_start_for_bom_write
  2145. {
  2146. scm_t_port *pt;
  2147. SCM_VALIDATE_PORT (1, port);
  2148. pt = SCM_PORT (port);
  2149. if (!pt->at_stream_start_for_bom_write)
  2150. return SCM_INUM0;
  2151. pt->at_stream_start_for_bom_write = 0;
  2152. if (pt->rw_random)
  2153. pt->at_stream_start_for_bom_read = 0;
  2154. if (SCM_UNBNDP (buf))
  2155. return SCM_INUM0;
  2156. /* Write a BOM if appropriate. */
  2157. if (scm_is_eq (pt->encoding, sym_UTF_16))
  2158. {
  2159. SCM precise_encoding;
  2160. size_t end, avail, ret;
  2161. scm_port_acquire_iconv_descriptors (port, NULL, NULL);
  2162. precise_encoding = pt->precise_encoding;
  2163. scm_port_release_iconv_descriptors (port);
  2164. avail = scm_port_buffer_can_put (buf, &end);
  2165. if (scm_is_eq (precise_encoding, sym_UTF_16LE))
  2166. ret = scm_port_buffer_put (buf, scm_utf16le_bom,
  2167. sizeof (scm_utf16le_bom), end, avail);
  2168. else
  2169. ret = scm_port_buffer_put (buf, scm_utf16be_bom,
  2170. sizeof (scm_utf16be_bom), end, avail);
  2171. return scm_from_size_t (ret);
  2172. }
  2173. else if (scm_is_eq (pt->encoding, sym_UTF_32))
  2174. {
  2175. SCM precise_encoding;
  2176. size_t end, avail, ret;
  2177. scm_port_acquire_iconv_descriptors (port, NULL, NULL);
  2178. precise_encoding = pt->precise_encoding;
  2179. scm_port_release_iconv_descriptors (port);
  2180. avail = scm_port_buffer_can_put (buf, &end);
  2181. if (scm_is_eq (precise_encoding, sym_UTF_32LE))
  2182. ret = scm_port_buffer_put (buf, scm_utf32le_bom,
  2183. sizeof (scm_utf32le_bom), end, avail);
  2184. else
  2185. ret = scm_port_buffer_put (buf, scm_utf32be_bom,
  2186. sizeof (scm_utf32be_bom), end, avail);
  2187. return scm_from_size_t (ret);
  2188. }
  2189. return SCM_INUM0;
  2190. }
  2191. #undef FUNC_NAME
  2192. SCM
  2193. scm_fill_input (SCM port, size_t minimum_size, size_t *cur_out,
  2194. size_t *avail_out)
  2195. {
  2196. scm_t_port *pt = SCM_PORT (port);
  2197. SCM read_buf;
  2198. size_t cur, buffered;
  2199. if (minimum_size == 0)
  2200. minimum_size = 1;
  2201. /* The default is BOM_IO_TEXT. Binary input procedures should
  2202. port_clear_stream_start_for_bom_read with BOM_IO_BINARY before
  2203. filling the input buffers. */
  2204. port_clear_stream_start_for_bom_read (port, BOM_IO_TEXT);
  2205. read_buf = pt->read_buf;
  2206. buffered = scm_port_buffer_can_take (read_buf, &cur);
  2207. if (buffered >= minimum_size
  2208. || scm_is_true (scm_port_buffer_has_eof_p (read_buf)))
  2209. {
  2210. *cur_out = cur;
  2211. *avail_out = buffered;
  2212. return read_buf;
  2213. }
  2214. if (pt->rw_random)
  2215. scm_flush (port);
  2216. /* Prepare to read. Make sure there is enough space in the buffer for
  2217. minimum_size, and ensure that cur is zero so that we fill towards
  2218. the end of the buffer. */
  2219. if (minimum_size > scm_port_buffer_size (read_buf))
  2220. /* Grow the read buffer. */
  2221. read_buf = scm_expand_port_read_buffer_x (port,
  2222. scm_from_size_t (minimum_size),
  2223. SCM_BOOL_F);
  2224. else if (buffered == 0)
  2225. scm_port_buffer_reset (read_buf);
  2226. else
  2227. {
  2228. const uint8_t *to_shift;
  2229. to_shift = scm_port_buffer_take_pointer (read_buf, cur);
  2230. scm_port_buffer_reset (read_buf);
  2231. memmove (scm_port_buffer_put_pointer (read_buf, 0), to_shift, buffered);
  2232. scm_port_buffer_did_put (read_buf, 0, buffered);
  2233. }
  2234. while (buffered < minimum_size
  2235. && !scm_is_true (scm_port_buffer_has_eof_p (read_buf)))
  2236. {
  2237. size_t count;
  2238. size_t buffering = pt->read_buffering;
  2239. size_t to_read;
  2240. if (pt->read_buffering < minimum_size)
  2241. buffering = minimum_size;
  2242. to_read = buffering - buffered;
  2243. count = scm_i_read_bytes (port, scm_port_buffer_bytevector (read_buf),
  2244. buffered, to_read);
  2245. scm_port_buffer_did_put (read_buf, buffered, count);
  2246. buffered += count;
  2247. scm_port_buffer_set_has_eof_p (read_buf, scm_from_bool (count == 0));
  2248. }
  2249. /* We ensured cur was zero. */
  2250. *cur_out = 0;
  2251. *avail_out = buffered;
  2252. return read_buf;
  2253. }
  2254. SCM_DEFINE (scm_port_random_access_p, "port-random-access?", 1, 0, 0,
  2255. (SCM port),
  2256. "Return true if the port is random-access, or false otherwise.")
  2257. #define FUNC_NAME s_scm_port_random_access_p
  2258. {
  2259. SCM_VALIDATE_OPPORT (1, port);
  2260. return scm_from_bool (SCM_PORT (port)->rw_random);
  2261. }
  2262. #undef FUNC_NAME
  2263. SCM_DEFINE (scm_port_read_buffering, "port-read-buffering", 1, 0, 0,
  2264. (SCM port),
  2265. "Return the amount of read buffering on a port, in bytes.")
  2266. #define FUNC_NAME s_scm_port_read_buffering
  2267. {
  2268. SCM_VALIDATE_OPINPORT (1, port);
  2269. return scm_from_size_t (SCM_PORT (port)->read_buffering);
  2270. }
  2271. #undef FUNC_NAME
  2272. SCM_DEFINE (scm_expand_port_read_buffer_x, "expand-port-read-buffer!", 2, 1, 0,
  2273. (SCM port, SCM size, SCM putback_p),
  2274. "Expand the read buffer of @var{port} to @var{size}. Copy the\n"
  2275. "old buffered data, if, any, to the beginning of the new\n"
  2276. "buffer, unless @var{putback_p} is true, in which case copy it\n"
  2277. "to the end instead. Return the new buffer.")
  2278. #define FUNC_NAME s_scm_expand_port_read_buffer_x
  2279. {
  2280. scm_t_port *pt;
  2281. size_t c_size, cur, avail;
  2282. SCM new_buf;
  2283. SCM_VALIDATE_OPINPORT (1, port);
  2284. pt = SCM_PORT (port);
  2285. c_size = scm_to_size_t (size);
  2286. SCM_ASSERT_RANGE (2, size, c_size > scm_port_buffer_size (pt->read_buf));
  2287. if (SCM_UNBNDP (putback_p))
  2288. putback_p = SCM_BOOL_F;
  2289. new_buf = make_port_buffer (port, c_size);
  2290. scm_port_buffer_set_has_eof_p (new_buf,
  2291. scm_port_buffer_has_eof_p (pt->read_buf));
  2292. avail = scm_port_buffer_can_take (pt->read_buf, &cur);
  2293. if (scm_is_true (putback_p))
  2294. {
  2295. scm_port_buffer_reset_end (new_buf);
  2296. scm_port_buffer_putback (new_buf,
  2297. scm_port_buffer_take_pointer (pt->read_buf, cur),
  2298. avail, c_size);
  2299. }
  2300. else
  2301. {
  2302. scm_port_buffer_reset (new_buf);
  2303. scm_port_buffer_put (new_buf,
  2304. scm_port_buffer_take_pointer (pt->read_buf, cur),
  2305. avail, 0, c_size);
  2306. }
  2307. pt->read_buf = new_buf;
  2308. return new_buf;
  2309. }
  2310. #undef FUNC_NAME
  2311. SCM_DEFINE (scm_port_read, "port-read", 1, 0, 0, (SCM port),
  2312. "Return the read function for an input port.")
  2313. #define FUNC_NAME s_scm_port_read
  2314. {
  2315. SCM_VALIDATE_OPINPORT (1, port);
  2316. return SCM_PORT_TYPE (port)->scm_read;
  2317. }
  2318. #undef FUNC_NAME
  2319. SCM_DEFINE (scm_port_write, "port-write", 1, 0, 0,
  2320. (SCM port),
  2321. "Return the write function for an output port.")
  2322. #define FUNC_NAME s_scm_port_write
  2323. {
  2324. SCM_VALIDATE_OPOUTPORT (1, port);
  2325. return SCM_PORT_TYPE (port)->scm_write;
  2326. }
  2327. #undef FUNC_NAME
  2328. SCM_DEFINE (scm_port_read_buffer, "port-read-buffer", 1, 0, 0,
  2329. (SCM port),
  2330. "Return the read buffer for a port.")
  2331. #define FUNC_NAME s_scm_port_read_buffer
  2332. {
  2333. SCM_VALIDATE_OPPORT (1, port);
  2334. return SCM_PORT (port)->read_buf;
  2335. }
  2336. #undef FUNC_NAME
  2337. SCM_DEFINE (scm_port_write_buffer, "port-write-buffer", 1, 0, 0,
  2338. (SCM port),
  2339. "Return the write buffer for a port.")
  2340. #define FUNC_NAME s_scm_port_write_buffer
  2341. {
  2342. SCM_VALIDATE_OPPORT (1, port);
  2343. return SCM_PORT (port)->write_buf;
  2344. }
  2345. #undef FUNC_NAME
  2346. SCM_DEFINE (scm_port_auxiliary_write_buffer, "port-auxiliary-write-buffer",
  2347. 1, 0, 0, (SCM port),
  2348. "Return the auxiliary write buffer for a port.")
  2349. #define FUNC_NAME s_scm_port_auxiliary_write_buffer
  2350. {
  2351. scm_t_port *pt;
  2352. SCM_VALIDATE_OPPORT (1, port);
  2353. pt = SCM_PORT (port);
  2354. if (scm_is_false (pt->write_buf_aux))
  2355. pt->write_buf_aux = make_port_buffer (port, AUXILIARY_WRITE_BUFFER_SIZE);
  2356. return pt->write_buf_aux;
  2357. }
  2358. #undef FUNC_NAME
  2359. SCM_INTERNAL SCM scm_port_line_buffered_p (SCM);
  2360. SCM_DEFINE (scm_port_line_buffered_p, "port-line-buffered?", 1, 0, 0,
  2361. (SCM port),
  2362. "Return true if the port is line buffered.")
  2363. #define FUNC_NAME s_scm_port_line_buffered_p
  2364. {
  2365. SCM_VALIDATE_OPPORT (1, port);
  2366. return scm_from_bool (SCM_CELL_WORD_0 (port) & SCM_BUFLINE);
  2367. }
  2368. #undef FUNC_NAME
  2369. /* Output. */
  2370. static void
  2371. scm_i_write_bytes (SCM port, SCM src, size_t start, size_t count)
  2372. {
  2373. size_t written = 0;
  2374. scm_t_port_type *ptob = SCM_PORT_TYPE (port);
  2375. if (count > SCM_BYTEVECTOR_LENGTH (src))
  2376. fprintf (stderr, "count: %zu %zu\n", count, scm_c_bytevector_length (src));
  2377. assert (count <= SCM_BYTEVECTOR_LENGTH (src));
  2378. assert (start + count <= SCM_BYTEVECTOR_LENGTH (src));
  2379. scm_dynwind_begin (0);
  2380. scm_dynwind_acquire_port (port);
  2381. do
  2382. {
  2383. size_t ret = ptob->c_write (port, src, start + written, count - written);
  2384. if (ret == (size_t) -1)
  2385. {
  2386. if (SCM_PORT_FINALIZING_P (port))
  2387. {
  2388. /* This port is being closed because it became unreachable
  2389. and was finalized, but it has buffered output, and the
  2390. resource is not currently writable. Instead of
  2391. blocking, discard buffered output and warn. To avoid
  2392. this situation, force-output on the port before letting
  2393. it go! */
  2394. scm_puts
  2395. ("Warning: Discarding buffered output on non-blocking port\n"
  2396. " ",
  2397. scm_current_warning_port ());
  2398. scm_display (port, scm_current_warning_port());
  2399. scm_puts
  2400. ("\n"
  2401. " closed by the garbage collector. To avoid this\n"
  2402. " behavior and this warning, call `force-output' or\n"
  2403. " `close-port' on the port before letting go of it.\n",
  2404. scm_current_warning_port ());
  2405. break;
  2406. }
  2407. else
  2408. port_poll (port, POLLOUT, -1);
  2409. }
  2410. else
  2411. written += ret;
  2412. }
  2413. while (written < count);
  2414. scm_dynwind_end ();
  2415. assert (written == count);
  2416. }
  2417. static void
  2418. scm_i_write (SCM port, SCM buf)
  2419. {
  2420. size_t start, count;
  2421. scm_port_clear_stream_start_for_bom_write (port, SCM_UNDEFINED);
  2422. /* Update cursors before attempting to write, assuming that I/O errors
  2423. are sticky. That way if the write throws an error, causing the
  2424. computation to abort, and possibly causing the port to be collected
  2425. by GC when it's open, any subsequent close-port / force-output
  2426. won't signal *another* error. */
  2427. count = scm_port_buffer_can_take (buf, &start);
  2428. scm_port_buffer_reset (buf);
  2429. scm_i_write_bytes (port, scm_port_buffer_bytevector (buf), start,
  2430. count);
  2431. }
  2432. /* Used by an application to write arbitrary number of bytes to an SCM
  2433. port. Similar semantics as libc write. However, unlike libc write,
  2434. scm_c_write writes the requested number of bytes.
  2435. Warning: Doesn't update port line and column counts! */
  2436. void
  2437. scm_c_write_bytes (SCM port, SCM src, size_t start, size_t count)
  2438. #define FUNC_NAME "scm_c_write_bytes"
  2439. {
  2440. scm_t_port *pt;
  2441. SCM write_buf;
  2442. SCM_VALIDATE_OPOUTPORT (1, port);
  2443. pt = SCM_PORT (port);
  2444. write_buf = pt->write_buf;
  2445. if (pt->rw_random)
  2446. scm_end_input (port);
  2447. if (count < scm_port_buffer_size (write_buf))
  2448. {
  2449. size_t cur, end;
  2450. /* Make it so that the write_buf "end" cursor is only nonzero if
  2451. there are buffered bytes already. */
  2452. if (scm_port_buffer_can_take (write_buf, &cur) == 0)
  2453. {
  2454. scm_port_buffer_reset (write_buf);
  2455. cur = 0;
  2456. }
  2457. /* We buffer writes that are smaller in size than the write
  2458. buffer. If the buffer is too full to hold the new data, we
  2459. flush it beforehand. Otherwise it could be that the buffer is
  2460. full after filling it with the new data; if that's the case, we
  2461. flush then instead. */
  2462. if (scm_port_buffer_can_put (write_buf, &end) < count)
  2463. {
  2464. scm_i_write (port, write_buf);
  2465. end = 0;
  2466. }
  2467. {
  2468. signed char *src_ptr = SCM_BYTEVECTOR_CONTENTS (src) + start;
  2469. scm_port_buffer_put (write_buf, (uint8_t *) src_ptr, count,
  2470. end, count);
  2471. }
  2472. if (scm_port_buffer_can_put (write_buf, &end) == 0)
  2473. scm_i_write (port, write_buf);
  2474. }
  2475. else
  2476. {
  2477. size_t tmp;
  2478. /* Our write would overflow the buffer. Flush buffered bytes (if
  2479. needed), then write our bytes with just one syscall. */
  2480. if (scm_port_buffer_can_take (write_buf, &tmp))
  2481. scm_i_write (port, write_buf);
  2482. scm_i_write_bytes (port, src, start, count);
  2483. }
  2484. }
  2485. #undef FUNC_NAME
  2486. /* Like scm_c_write_bytes, but always writes through the write buffer.
  2487. Used when an application wants to write bytes stored in an area not
  2488. managed by GC. */
  2489. void
  2490. scm_c_write (SCM port, const void *ptr, size_t size)
  2491. #define FUNC_NAME "scm_c_write"
  2492. {
  2493. scm_t_port *pt;
  2494. SCM write_buf;
  2495. size_t end, avail, written = 0;
  2496. int using_aux_buffer = 0;
  2497. const uint8_t *src = ptr;
  2498. SCM_VALIDATE_OPOUTPORT (1, port);
  2499. pt = SCM_PORT (port);
  2500. if (pt->rw_random)
  2501. scm_end_input (port);
  2502. /* Imagine we are writing 40 bytes on an unbuffered port. If we were
  2503. writing from a bytevector we could pass that write directly to the
  2504. port. But since we aren't, we need to go through a bytevector, and
  2505. if we went through the port buffer we'd have to make 40 individual
  2506. calls to the write function. That would be terrible. Really we
  2507. need an intermediate bytevector. But, we shouldn't use a trick
  2508. analogous to what we do with expand-port-read-buffer!, because the
  2509. way we use the cur and end cursors doesn't seem to facilitate that.
  2510. So instead we buffer through an auxiliary write buffer if needed.
  2511. To avoid re-allocating this buffer all the time, we store it on the
  2512. port. It should never be left with buffered data.
  2513. Use of an auxiliary write buffer is triggered if the buffer is
  2514. smaller than the size we would make for an auxiliary write buffer,
  2515. and the write is bigger than the buffer. */
  2516. write_buf = pt->write_buf;
  2517. if (scm_port_buffer_size (write_buf) < size &&
  2518. scm_port_buffer_size (write_buf) < AUXILIARY_WRITE_BUFFER_SIZE)
  2519. {
  2520. using_aux_buffer = 1;
  2521. write_buf = scm_port_auxiliary_write_buffer (port);
  2522. }
  2523. if (using_aux_buffer)
  2524. {
  2525. end = 0;
  2526. avail = AUXILIARY_WRITE_BUFFER_SIZE;
  2527. }
  2528. else
  2529. avail = scm_port_buffer_can_put (write_buf, &end);
  2530. while (written < size)
  2531. {
  2532. size_t did_put = scm_port_buffer_put (write_buf, src, size - written,
  2533. end, avail);
  2534. written += did_put;
  2535. src += did_put;
  2536. if (using_aux_buffer || did_put == avail)
  2537. {
  2538. scm_i_write (port, write_buf);
  2539. end = 0;
  2540. avail = scm_port_buffer_size (write_buf);
  2541. }
  2542. }
  2543. }
  2544. #undef FUNC_NAME
  2545. /* The encoded escape sequence will be written to BUF, and will be valid
  2546. ASCII (so also valid ISO-8859-1 and UTF-8). Return the number of
  2547. bytes written. */
  2548. static size_t
  2549. encode_escape_sequence (scm_t_wchar ch, uint8_t buf[ESCAPE_BUFFER_SIZE])
  2550. {
  2551. /* Represent CH using the in-string escape syntax. */
  2552. static const char hex[] = "0123456789abcdef";
  2553. static const char escapes[7] = "abtnvfr";
  2554. size_t i = 0;
  2555. buf[i++] = '\\';
  2556. if (ch >= 0x07 && ch <= 0x0D && ch != 0x0A)
  2557. /* Use special escapes for some C0 controls. */
  2558. buf[i++] = escapes[ch - 0x07];
  2559. else if (!SCM_R6RS_ESCAPES_P)
  2560. {
  2561. if (ch <= 0xFF)
  2562. {
  2563. buf[i++] = 'x';
  2564. buf[i++] = hex[ch / 16];
  2565. buf[i++] = hex[ch % 16];
  2566. }
  2567. else if (ch <= 0xFFFF)
  2568. {
  2569. buf[i++] = 'u';
  2570. buf[i++] = hex[(ch & 0xF000) >> 12];
  2571. buf[i++] = hex[(ch & 0xF00) >> 8];
  2572. buf[i++] = hex[(ch & 0xF0) >> 4];
  2573. buf[i++] = hex[(ch & 0xF)];
  2574. }
  2575. else if (ch > 0xFFFF)
  2576. {
  2577. buf[i++] = 'U';
  2578. buf[i++] = hex[(ch & 0xF00000) >> 20];
  2579. buf[i++] = hex[(ch & 0xF0000) >> 16];
  2580. buf[i++] = hex[(ch & 0xF000) >> 12];
  2581. buf[i++] = hex[(ch & 0xF00) >> 8];
  2582. buf[i++] = hex[(ch & 0xF0) >> 4];
  2583. buf[i++] = hex[(ch & 0xF)];
  2584. }
  2585. }
  2586. else
  2587. {
  2588. buf[i++] = 'x';
  2589. if (ch > 0xfffff) buf[i++] = hex[(ch >> 20) & 0xf];
  2590. if (ch > 0x0ffff) buf[i++] = hex[(ch >> 16) & 0xf];
  2591. if (ch > 0x00fff) buf[i++] = hex[(ch >> 12) & 0xf];
  2592. if (ch > 0x000ff) buf[i++] = hex[(ch >> 8) & 0xf];
  2593. if (ch > 0x0000f) buf[i++] = hex[(ch >> 4) & 0xf];
  2594. buf[i++] = hex[ch & 0xf];
  2595. buf[i++] = ';';
  2596. }
  2597. return i;
  2598. }
  2599. void
  2600. scm_c_put_escaped_char (SCM port, scm_t_wchar ch)
  2601. {
  2602. uint8_t escape[ESCAPE_BUFFER_SIZE];
  2603. size_t len = encode_escape_sequence (ch, escape);
  2604. scm_c_put_latin1_chars (port, escape, len);
  2605. }
  2606. /* Convert CODEPOINT to UTF-8 and store the result in UTF8. Return the
  2607. number of bytes of the UTF-8-encoded string. */
  2608. static size_t
  2609. codepoint_to_utf8 (uint32_t codepoint, uint8_t utf8[UTF8_BUFFER_SIZE])
  2610. {
  2611. size_t len;
  2612. if (codepoint <= 0x7f)
  2613. {
  2614. len = 1;
  2615. utf8[0] = codepoint;
  2616. }
  2617. else if (codepoint <= 0x7ffUL)
  2618. {
  2619. len = 2;
  2620. utf8[0] = 0xc0 | (codepoint >> 6);
  2621. utf8[1] = 0x80 | (codepoint & 0x3f);
  2622. }
  2623. else if (codepoint <= 0xffffUL)
  2624. {
  2625. len = 3;
  2626. utf8[0] = 0xe0 | (codepoint >> 12);
  2627. utf8[1] = 0x80 | ((codepoint >> 6) & 0x3f);
  2628. utf8[2] = 0x80 | (codepoint & 0x3f);
  2629. }
  2630. else
  2631. {
  2632. len = 4;
  2633. utf8[0] = 0xf0 | (codepoint >> 18);
  2634. utf8[1] = 0x80 | ((codepoint >> 12) & 0x3f);
  2635. utf8[2] = 0x80 | ((codepoint >> 6) & 0x3f);
  2636. utf8[3] = 0x80 | (codepoint & 0x3f);
  2637. }
  2638. return len;
  2639. }
  2640. static size_t
  2641. try_encode_char_to_iconv_buf (SCM port, SCM buf, uint32_t ch)
  2642. {
  2643. uint8_t utf8[UTF8_BUFFER_SIZE];
  2644. size_t utf8_len = codepoint_to_utf8 (ch, utf8);
  2645. size_t end;
  2646. size_t can_put = scm_port_buffer_can_put (buf, &end);
  2647. uint8_t *aux = scm_port_buffer_put_pointer (buf, end);
  2648. iconv_t output_cd;
  2649. int saved_errno;
  2650. char *input = (char *) utf8;
  2651. size_t input_left = utf8_len;
  2652. char *output = (char *) aux;
  2653. size_t output_left = can_put;
  2654. size_t res;
  2655. scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
  2656. res = iconv (output_cd, &input, &input_left, &output, &output_left);
  2657. saved_errno = errno;
  2658. /* Emit bytes needed to get back to initial state, if needed. */
  2659. iconv (output_cd, NULL, NULL, &output, &output_left);
  2660. scm_port_release_iconv_descriptors (port);
  2661. if (res != (size_t) -1)
  2662. {
  2663. /* Success. */
  2664. scm_port_buffer_did_put (buf, end, can_put - output_left);
  2665. return 1;
  2666. }
  2667. if (saved_errno == E2BIG)
  2668. /* No space to encode the character; try again next time. */
  2669. return 0;
  2670. /* Otherwise, re-set the output buffer and try to escape or substitute
  2671. the character, as appropriate. */
  2672. output = (char *) aux;
  2673. output_left = can_put;
  2674. /* The source buffer is valid UTF-8, so we shouldn't get EILSEQ
  2675. because of the input encoding; if we get EILSEQ, that means the
  2676. codepoint is not accessible in the target encoding. We have whole
  2677. codepoints in the source buffer, so we shouldn't get EINVAL. We
  2678. already handled E2BIG. The descriptor should be valid so we
  2679. shouldn't get EBADF. In summary, we only need to handle EILSEQ. */
  2680. if (scm_is_eq (SCM_PORT (port)->conversion_strategy, sym_escape))
  2681. {
  2682. uint8_t escape[ESCAPE_BUFFER_SIZE];
  2683. input = (char *) escape;
  2684. input_left = encode_escape_sequence (ch, escape);
  2685. scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
  2686. res = iconv (output_cd, &input, &input_left, &output, &output_left);
  2687. saved_errno = errno;
  2688. iconv (output_cd, NULL, NULL, &output, &output_left);
  2689. scm_port_release_iconv_descriptors (port);
  2690. }
  2691. else if (scm_is_eq (SCM_PORT (port)->conversion_strategy, sym_substitute))
  2692. {
  2693. uint8_t substitute[2] = "?";
  2694. input = (char *) substitute;
  2695. input_left = 1;
  2696. scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
  2697. res = iconv (output_cd, &input, &input_left, &output, &output_left);
  2698. saved_errno = errno;
  2699. iconv (output_cd, NULL, NULL, &output, &output_left);
  2700. scm_port_release_iconv_descriptors (port);
  2701. }
  2702. if (res != (size_t) -1)
  2703. {
  2704. scm_port_buffer_did_put (buf, end, can_put - output_left);
  2705. return 1;
  2706. }
  2707. /* No space to write the substitution or escape, or maybe there was an
  2708. error. If there are buffered bytes, the caller should flush and
  2709. try again; otherwise the caller should raise an error. */
  2710. return 0;
  2711. }
  2712. static size_t
  2713. encode_latin1_chars_to_latin1_buf (SCM port, SCM buf,
  2714. const uint8_t *chars, size_t count)
  2715. {
  2716. size_t end;
  2717. size_t avail = scm_port_buffer_can_put (buf, &end);
  2718. return scm_port_buffer_put (buf, chars, count, end, avail);
  2719. }
  2720. static size_t
  2721. encode_latin1_chars_to_utf8_buf (SCM port, SCM buf,
  2722. const uint8_t *chars, size_t count)
  2723. {
  2724. size_t end;
  2725. size_t buf_size = scm_port_buffer_can_put (buf, &end);
  2726. uint8_t *dst = scm_port_buffer_put_pointer (buf, end);
  2727. size_t read, written;
  2728. for (read = 0, written = 0;
  2729. read < count && written + UTF8_BUFFER_SIZE < buf_size;
  2730. read++)
  2731. written += codepoint_to_utf8 (chars[read], dst + written);
  2732. scm_port_buffer_did_put (buf, end, written);
  2733. return read;
  2734. }
  2735. static size_t
  2736. encode_latin1_chars_to_iconv_buf (SCM port, SCM buf,
  2737. const uint8_t *chars, size_t count)
  2738. {
  2739. size_t read;
  2740. for (read = 0; read < count; read++)
  2741. if (!try_encode_char_to_iconv_buf (port, buf, chars[read]))
  2742. break;
  2743. return read;
  2744. }
  2745. static size_t
  2746. encode_latin1_chars (SCM port, SCM buf, const uint8_t *chars, size_t count)
  2747. {
  2748. scm_t_port *pt = SCM_PORT (port);
  2749. SCM position;
  2750. size_t ret, i;
  2751. if (scm_is_eq (pt->encoding, sym_ISO_8859_1))
  2752. ret = encode_latin1_chars_to_latin1_buf (port, buf, chars, count);
  2753. else if (scm_is_eq (pt->encoding, sym_UTF_8))
  2754. ret = encode_latin1_chars_to_utf8_buf (port, buf, chars, count);
  2755. else
  2756. ret = encode_latin1_chars_to_iconv_buf (port, buf, chars, count);
  2757. if (ret == 0 && count > 0)
  2758. scm_encoding_error ("put-char", EILSEQ,
  2759. "conversion to port encoding failed",
  2760. port, SCM_MAKE_CHAR (chars[0]));
  2761. position = pt->position;
  2762. for (i = 0; i < ret; i++)
  2763. update_port_position (position, chars[i]);
  2764. return ret;
  2765. }
  2766. static size_t
  2767. encode_utf32_chars_to_latin1_buf (SCM port, SCM buf,
  2768. const uint32_t *chars, size_t count)
  2769. {
  2770. scm_t_port *pt = SCM_PORT (port);
  2771. size_t end;
  2772. size_t buf_size = scm_port_buffer_can_put (buf, &end);
  2773. uint8_t *dst = scm_port_buffer_put_pointer (buf, end);
  2774. size_t read, written;
  2775. for (read = 0, written = 0; read < count && written < buf_size; read++)
  2776. {
  2777. uint32_t ch = chars[read];
  2778. if (ch <= 0xff)
  2779. dst[written++] = ch;
  2780. else if (scm_is_eq (pt->conversion_strategy, sym_substitute))
  2781. dst[written++] = '?';
  2782. else if (scm_is_eq (pt->conversion_strategy, sym_escape))
  2783. {
  2784. uint8_t escape[ESCAPE_BUFFER_SIZE];
  2785. size_t escape_len = encode_escape_sequence (ch, escape);
  2786. if (escape_len > buf_size - written)
  2787. break;
  2788. memcpy (dst + written, escape, escape_len);
  2789. written += escape_len;
  2790. }
  2791. else
  2792. break;
  2793. }
  2794. scm_port_buffer_did_put (buf, end, written);
  2795. return read;
  2796. }
  2797. static size_t
  2798. encode_utf32_chars_to_utf8_buf (SCM port, SCM buf, const uint32_t *chars,
  2799. size_t count)
  2800. {
  2801. size_t end;
  2802. size_t buf_size = scm_port_buffer_can_put (buf, &end);
  2803. uint8_t *dst = scm_port_buffer_put_pointer (buf, end);
  2804. size_t read, written;
  2805. for (read = 0, written = 0;
  2806. read < count && written + UTF8_BUFFER_SIZE < buf_size;
  2807. read++)
  2808. written += codepoint_to_utf8 (chars[read], dst + written);
  2809. scm_port_buffer_did_put (buf, end, written);
  2810. return read;
  2811. }
  2812. static size_t
  2813. encode_utf32_chars_to_iconv_buf (SCM port, SCM buf, const uint32_t *chars,
  2814. size_t count)
  2815. {
  2816. size_t read;
  2817. for (read = 0; read < count; read++)
  2818. if (!try_encode_char_to_iconv_buf (port, buf, chars[read]))
  2819. break;
  2820. return read;
  2821. }
  2822. static size_t
  2823. encode_utf32_chars (SCM port, SCM buf, const uint32_t *chars, size_t count)
  2824. {
  2825. scm_t_port *pt = SCM_PORT (port);
  2826. SCM position;
  2827. size_t ret, i;
  2828. if (scm_is_eq (pt->encoding, sym_ISO_8859_1))
  2829. ret = encode_utf32_chars_to_latin1_buf (port, buf, chars, count);
  2830. else if (scm_is_eq (pt->encoding, sym_UTF_8))
  2831. ret = encode_utf32_chars_to_utf8_buf (port, buf, chars, count);
  2832. else
  2833. ret = encode_utf32_chars_to_iconv_buf (port, buf, chars, count);
  2834. if (ret == 0 && count > 0)
  2835. scm_encoding_error ("put-char", EILSEQ,
  2836. "conversion to port encoding failed",
  2837. port, SCM_MAKE_CHAR (chars[0]));
  2838. position = pt->position;
  2839. for (i = 0; i < ret; i++)
  2840. update_port_position (position, chars[i]);
  2841. return ret;
  2842. }
  2843. static size_t
  2844. port_encode_chars (SCM port, SCM buf, SCM str, size_t start, size_t count)
  2845. {
  2846. if (count == 0)
  2847. return 0;
  2848. if (scm_i_is_narrow_string (str))
  2849. {
  2850. const char *chars = scm_i_string_chars (str);
  2851. return encode_latin1_chars (port, buf,
  2852. ((const uint8_t *) chars) + start,
  2853. count);
  2854. }
  2855. else
  2856. {
  2857. const scm_t_wchar *chars = scm_i_string_wide_chars (str);
  2858. return encode_utf32_chars (port, buf,
  2859. ((const uint32_t *) chars) + start,
  2860. count);
  2861. }
  2862. }
  2863. SCM scm_port_encode_chars (SCM, SCM, SCM, SCM, SCM);
  2864. SCM_DEFINE (scm_port_encode_chars, "port-encode-chars", 5, 0, 0,
  2865. (SCM port, SCM buf, SCM str, SCM start, SCM count),
  2866. "")
  2867. #define FUNC_NAME s_scm_port_encode_chars
  2868. {
  2869. size_t c_start, c_count, c_len, encoded;
  2870. SCM_VALIDATE_OPOUTPORT (1, port);
  2871. SCM_VALIDATE_VECTOR (2, buf);
  2872. SCM_VALIDATE_STRING (3, str);
  2873. c_len = scm_i_string_length (str);
  2874. SCM_VALIDATE_SIZE_COPY (4, start, c_start);
  2875. SCM_ASSERT_RANGE (4, start, c_start <= c_len);
  2876. SCM_VALIDATE_SIZE_COPY (5, count, c_count);
  2877. SCM_ASSERT_RANGE (5, count, c_count <= c_len - c_start);
  2878. encoded = port_encode_chars (port, buf, str, c_start, c_count);
  2879. return scm_from_size_t (encoded);
  2880. }
  2881. #undef FUNC_NAME
  2882. SCM scm_port_encode_char (SCM, SCM, SCM);
  2883. SCM_DEFINE (scm_port_encode_char, "port-encode-char", 3, 0, 0,
  2884. (SCM port, SCM buf, SCM ch),
  2885. "")
  2886. #define FUNC_NAME s_scm_port_encode_char
  2887. {
  2888. uint32_t codepoint;
  2889. SCM_VALIDATE_OPOUTPORT (1, port);
  2890. SCM_VALIDATE_VECTOR (2, buf);
  2891. SCM_VALIDATE_CHAR (3, ch);
  2892. codepoint = SCM_CHAR (ch);
  2893. encode_utf32_chars (port, buf, &codepoint, 1);
  2894. return SCM_UNSPECIFIED;
  2895. }
  2896. #undef FUNC_NAME
  2897. void
  2898. scm_c_put_latin1_chars (SCM port, const uint8_t *chars, size_t len)
  2899. {
  2900. SCM aux_buf = scm_port_auxiliary_write_buffer (port);
  2901. SCM aux_bv = scm_port_buffer_bytevector (aux_buf);
  2902. SCM position = SCM_PORT (port)->position;
  2903. SCM saved_line = scm_port_position_line (position);
  2904. scm_port_clear_stream_start_for_bom_write (port, aux_buf);
  2905. while (len)
  2906. {
  2907. size_t encoded = encode_latin1_chars (port, aux_buf, chars, len);
  2908. assert(encoded <= len);
  2909. scm_c_write_bytes (port, aux_bv, 0,
  2910. scm_to_size_t (scm_port_buffer_end (aux_buf)));
  2911. scm_port_buffer_reset (aux_buf);
  2912. chars += encoded;
  2913. len -= encoded;
  2914. }
  2915. /* Handle line buffering. */
  2916. if ((SCM_CELL_WORD_0 (port) & SCM_BUFLINE) &&
  2917. !scm_is_eq (saved_line, scm_port_position_line (position)))
  2918. scm_flush (port);
  2919. }
  2920. void
  2921. scm_c_put_utf32_chars (SCM port, const uint32_t *chars, size_t len)
  2922. {
  2923. SCM aux_buf = scm_port_auxiliary_write_buffer (port);
  2924. SCM aux_bv = scm_port_buffer_bytevector (aux_buf);
  2925. SCM position = SCM_PORT (port)->position;
  2926. SCM saved_line = scm_port_position_line (position);
  2927. scm_port_clear_stream_start_for_bom_write (port, aux_buf);
  2928. while (len)
  2929. {
  2930. size_t encoded = encode_utf32_chars (port, aux_buf, chars, len);
  2931. assert(encoded <= len);
  2932. scm_c_write_bytes (port, aux_bv, 0,
  2933. scm_to_size_t (scm_port_buffer_end (aux_buf)));
  2934. scm_port_buffer_reset (aux_buf);
  2935. chars += encoded;
  2936. len -= encoded;
  2937. }
  2938. /* Handle line buffering. */
  2939. if ((SCM_CELL_WORD_0 (port) & SCM_BUFLINE) &&
  2940. !scm_is_eq (saved_line, scm_port_position_line (position)))
  2941. scm_flush (port);
  2942. }
  2943. void
  2944. scm_c_put_char (SCM port, scm_t_wchar ch)
  2945. {
  2946. if (ch <= 0xff)
  2947. {
  2948. uint8_t narrow_ch = ch;
  2949. scm_c_put_latin1_chars (port, &narrow_ch, 1);
  2950. }
  2951. else
  2952. {
  2953. uint32_t wide_ch = ch;
  2954. scm_c_put_utf32_chars (port, &wide_ch, 1);
  2955. }
  2956. }
  2957. /* Return 0 unless the port can be written out to the port's encoding
  2958. without errors, substitutions, or escapes. */
  2959. int
  2960. scm_c_can_put_char (SCM port, scm_t_wchar ch)
  2961. {
  2962. SCM encoding = SCM_PORT (port)->encoding;
  2963. if (scm_is_eq (encoding, sym_UTF_8)
  2964. || (scm_is_eq (encoding, sym_ISO_8859_1) && ch <= 0xff)
  2965. || scm_is_eq (encoding, sym_UTF_16)
  2966. || scm_is_eq (encoding, sym_UTF_16LE)
  2967. || scm_is_eq (encoding, sym_UTF_16BE)
  2968. || scm_is_eq (encoding, sym_UTF_32)
  2969. || scm_is_eq (encoding, sym_UTF_32LE)
  2970. || scm_is_eq (encoding, sym_UTF_32BE))
  2971. return 1;
  2972. {
  2973. SCM bv = scm_port_buffer_bytevector (scm_port_auxiliary_write_buffer (port));
  2974. uint8_t buf[UTF8_BUFFER_SIZE];
  2975. char *input = (char *) buf;
  2976. size_t input_len;
  2977. char *output = (char *) SCM_BYTEVECTOR_CONTENTS (bv);
  2978. size_t output_len = SCM_BYTEVECTOR_LENGTH (bv);
  2979. size_t result;
  2980. iconv_t output_cd;
  2981. input_len = codepoint_to_utf8 (ch, buf);
  2982. scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
  2983. iconv (output_cd, NULL, NULL, &output, &output_len);
  2984. result = iconv (output_cd, &input, &input_len, &output, &output_len);
  2985. iconv (output_cd, NULL, NULL, &output, &output_len);
  2986. scm_port_release_iconv_descriptors (port);
  2987. return result != (size_t) -1;
  2988. }
  2989. }
  2990. void
  2991. scm_c_put_string (SCM port, SCM string, size_t start, size_t count)
  2992. {
  2993. if (scm_i_is_narrow_string (string))
  2994. {
  2995. const char *ptr = scm_i_string_chars (string);
  2996. scm_c_put_latin1_chars (port, ((const uint8_t *) ptr) + start, count);
  2997. }
  2998. else
  2999. {
  3000. const scm_t_wchar *ptr = scm_i_string_wide_chars (string);
  3001. scm_c_put_utf32_chars (port, ((const uint32_t *) ptr) + start, count);
  3002. }
  3003. }
  3004. SCM_DEFINE (scm_put_char, "put-char", 2, 0, 0, (SCM port, SCM ch),
  3005. "Encode @var{ch} to bytes, and send those bytes to @var{port}.")
  3006. #define FUNC_NAME s_scm_put_char
  3007. {
  3008. SCM_VALIDATE_OPOUTPORT (1, port);
  3009. SCM_VALIDATE_CHAR (2, ch);
  3010. scm_c_put_char (port, SCM_CHAR (ch));
  3011. return SCM_UNSPECIFIED;
  3012. }
  3013. #undef FUNC_NAME
  3014. SCM_DEFINE (scm_put_string, "put-string", 2, 2, 0,
  3015. (SCM port, SCM string, SCM start, SCM count),
  3016. "Display the @var{count} characters from @var{string} to\n"
  3017. "@var{port}, starting with the character at index @var{start}.\n"
  3018. "@var{start} defaults to 0, and @var{count} defaults to\n"
  3019. "displaying all characters until the end of the string.\n\n"
  3020. "Calling @code{put-string} is equivalent in all respects to\n"
  3021. "calling @code{put-char} on the relevant sequence of characters,\n"
  3022. "except that it will attempt to write multiple characters to\n"
  3023. "the port at a time, even if the port is unbuffered.")
  3024. #define FUNC_NAME s_scm_put_string
  3025. {
  3026. size_t c_start, c_count, c_len;
  3027. SCM_VALIDATE_OPOUTPORT (1, port);
  3028. SCM_VALIDATE_STRING (2, string);
  3029. c_len = scm_i_string_length (string);
  3030. c_start = SCM_UNBNDP (start) ? 0 : scm_to_size_t (start);
  3031. SCM_ASSERT_RANGE (3, start, c_start <= c_len);
  3032. c_count = SCM_UNBNDP (count) ? c_len - c_start : scm_to_size_t (count);
  3033. SCM_ASSERT_RANGE (4, count, c_count <= c_len - c_start);
  3034. scm_c_put_string (port, string, c_start, c_count);
  3035. return SCM_UNSPECIFIED;
  3036. }
  3037. #undef FUNC_NAME
  3038. void
  3039. scm_putc (char c, SCM port)
  3040. {
  3041. SCM_ASSERT_TYPE (SCM_OPOUTPORTP (port), port, 0, NULL, "output port");
  3042. scm_c_put_char (port, (uint8_t) c);
  3043. }
  3044. void
  3045. scm_puts (const char *s, SCM port)
  3046. {
  3047. SCM_ASSERT_TYPE (SCM_OPOUTPORTP (port), port, 0, NULL, "output port");
  3048. scm_c_put_latin1_chars (port, (const uint8_t *) s, strlen (s));
  3049. }
  3050. /* scm_lfwrite
  3051. *
  3052. * This function differs from scm_c_write; it updates port line and
  3053. * column, flushing line-buffered ports when appropriate. */
  3054. void
  3055. scm_lfwrite (const char *ptr, size_t size, SCM port)
  3056. {
  3057. scm_c_put_latin1_chars (port, (const uint8_t *) ptr, size);
  3058. }
  3059. /* Write STR to PORT from START inclusive to END exclusive. */
  3060. void
  3061. scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
  3062. {
  3063. if (end == (size_t) -1)
  3064. end = scm_i_string_length (str);
  3065. scm_c_put_string (port, str, start, end - start);
  3066. }
  3067. /* Querying and setting positions, and character availability. */
  3068. SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
  3069. (SCM port),
  3070. "Return @code{#t} if a character is ready on input @var{port}\n"
  3071. "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
  3072. "@code{#t} then the next @code{read-char} operation on\n"
  3073. "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
  3074. "port at end of file then @code{char-ready?} returns @code{#t}.\n"
  3075. "\n"
  3076. "@code{char-ready?} exists to make it possible for a\n"
  3077. "program to accept characters from interactive ports without\n"
  3078. "getting stuck waiting for input. Any input editors associated\n"
  3079. "with such ports must make sure that characters whose existence\n"
  3080. "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
  3081. "If @code{char-ready?} were to return @code{#f} at end of file,\n"
  3082. "a port at end of file would be indistinguishable from an\n"
  3083. "interactive port that has no ready characters.")
  3084. #define FUNC_NAME s_scm_char_ready_p
  3085. {
  3086. SCM read_buf;
  3087. size_t tmp;
  3088. if (SCM_UNBNDP (port))
  3089. port = scm_current_input_port ();
  3090. /* It's possible to close the current input port, so validate even in
  3091. this case. */
  3092. SCM_VALIDATE_OPINPORT (1, port);
  3093. read_buf = SCM_PORT (port)->read_buf;
  3094. if (scm_port_buffer_can_take (read_buf, &tmp) ||
  3095. scm_is_true (scm_port_buffer_has_eof_p (read_buf)))
  3096. /* FIXME: Verify that a whole character is available? */
  3097. return SCM_BOOL_T;
  3098. else
  3099. {
  3100. scm_t_port_type *ptob = SCM_PORT_TYPE (port);
  3101. if (ptob->input_waiting)
  3102. {
  3103. SCM ret;
  3104. scm_dynwind_begin (0);
  3105. scm_dynwind_acquire_port (port);
  3106. ret = scm_from_bool (ptob->input_waiting (port));
  3107. scm_dynwind_end ();
  3108. return ret;
  3109. }
  3110. else
  3111. return SCM_BOOL_T;
  3112. }
  3113. }
  3114. #undef FUNC_NAME
  3115. SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
  3116. (SCM fd_port, SCM offset, SCM whence),
  3117. "Sets the current position of @var{fd_port} to the integer\n"
  3118. "@var{offset}, which is interpreted according to the value of\n"
  3119. "@var{whence}.\n"
  3120. "\n"
  3121. "One of the following variables should be supplied for\n"
  3122. "@var{whence}:\n"
  3123. "@defvar SEEK_SET\n"
  3124. "Seek from the beginning of the file.\n"
  3125. "@end defvar\n"
  3126. "@defvar SEEK_CUR\n"
  3127. "Seek from the current position.\n"
  3128. "@end defvar\n"
  3129. "@defvar SEEK_END\n"
  3130. "Seek from the end of the file.\n"
  3131. "@end defvar\n"
  3132. "If @var{fd_port} is a file descriptor, the underlying system\n"
  3133. "call is @code{lseek}. @var{port} may be a string port.\n"
  3134. "\n"
  3135. "The value returned is the new position in the file. This means\n"
  3136. "that the current position of a port can be obtained using:\n"
  3137. "@lisp\n"
  3138. "(seek port 0 SEEK_CUR)\n"
  3139. "@end lisp")
  3140. #define FUNC_NAME s_scm_seek
  3141. {
  3142. int how;
  3143. fd_port = SCM_COERCE_OUTPORT (fd_port);
  3144. how = scm_to_int (whence);
  3145. if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
  3146. SCM_OUT_OF_RANGE (3, whence);
  3147. if (SCM_OPPORTP (fd_port))
  3148. {
  3149. scm_t_port *pt = SCM_PORT (fd_port);
  3150. scm_t_port_type *ptob = SCM_PORT_TYPE (fd_port);
  3151. scm_t_off off = scm_to_off_t (offset);
  3152. scm_t_off rv;
  3153. if (ptob->seek && how == SEEK_CUR && off == 0)
  3154. {
  3155. size_t tmp;
  3156. /* If we are just querying the current position, avoid
  3157. flushing buffers. We don't even need to require that the
  3158. port supports random access. */
  3159. scm_dynwind_begin (0);
  3160. scm_dynwind_acquire_port (fd_port);
  3161. rv = ptob->seek (fd_port, off, how);
  3162. scm_dynwind_end ();
  3163. rv -= scm_port_buffer_can_take (pt->read_buf, &tmp);
  3164. rv += scm_port_buffer_can_take (pt->write_buf, &tmp);
  3165. return scm_from_off_t (rv);
  3166. }
  3167. if (!ptob->seek || !pt->rw_random)
  3168. SCM_MISC_ERROR ("port is not seekable",
  3169. scm_cons (fd_port, SCM_EOL));
  3170. scm_end_input (fd_port);
  3171. scm_flush (fd_port);
  3172. scm_dynwind_begin (0);
  3173. scm_dynwind_acquire_port (fd_port);
  3174. rv = ptob->seek (fd_port, off, how);
  3175. scm_dynwind_end ();
  3176. /* Set stream-start flags according to new position. */
  3177. pt->at_stream_start_for_bom_read = (rv == 0);
  3178. pt->at_stream_start_for_bom_write = (rv == 0);
  3179. scm_i_clear_pending_eof (fd_port);
  3180. return scm_from_off_t (rv);
  3181. }
  3182. else /* file descriptor?. */
  3183. {
  3184. off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
  3185. off_t_or_off64_t rv;
  3186. rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
  3187. if (rv == -1)
  3188. SCM_SYSERROR;
  3189. return scm_from_off_t_or_off64_t (rv);
  3190. }
  3191. }
  3192. #undef FUNC_NAME
  3193. #ifndef O_BINARY
  3194. #define O_BINARY 0
  3195. #endif
  3196. /* Mingw has ftruncate(), perhaps implemented above using chsize, but
  3197. doesn't have the filename version truncate(), hence this code. */
  3198. #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
  3199. static int
  3200. truncate (const char *file, off_t length)
  3201. {
  3202. int ret, fdes;
  3203. fdes = open (file, O_BINARY | O_WRONLY);
  3204. if (fdes == -1)
  3205. return -1;
  3206. ret = ftruncate (fdes, length);
  3207. if (ret == -1)
  3208. {
  3209. int save_errno = errno;
  3210. close (fdes);
  3211. errno = save_errno;
  3212. return -1;
  3213. }
  3214. return close (fdes);
  3215. }
  3216. #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
  3217. SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
  3218. (SCM object, SCM length),
  3219. "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
  3220. "can be a filename string, a port object, or an integer file\n"
  3221. "descriptor.\n"
  3222. "The return value is unspecified.\n"
  3223. "\n"
  3224. "For a port or file descriptor @var{length} can be omitted, in\n"
  3225. "which case the file is truncated at the current position (per\n"
  3226. "@code{ftell} above).\n"
  3227. "\n"
  3228. "On most systems a file can be extended by giving a length\n"
  3229. "greater than the current size, but this is not mandatory in the\n"
  3230. "POSIX standard.")
  3231. #define FUNC_NAME s_scm_truncate_file
  3232. {
  3233. int rv;
  3234. /* "object" can be a port, fdes or filename.
  3235. Negative "length" makes no sense, but it's left to truncate() or
  3236. ftruncate() to give back an error for that (normally EINVAL).
  3237. */
  3238. if (SCM_UNBNDP (length))
  3239. {
  3240. /* must supply length if object is a filename. */
  3241. if (scm_is_string (object))
  3242. SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
  3243. length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
  3244. }
  3245. object = SCM_COERCE_OUTPORT (object);
  3246. if (scm_is_integer (object))
  3247. {
  3248. off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
  3249. SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
  3250. c_length));
  3251. }
  3252. else if (SCM_OPOUTPORTP (object))
  3253. {
  3254. scm_t_off c_length = scm_to_off_t (length);
  3255. scm_t_port_type *ptob = SCM_PORT_TYPE (object);
  3256. if (!ptob->truncate)
  3257. SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
  3258. scm_i_clear_pending_eof (object);
  3259. if (SCM_INPUT_PORT_P (object)
  3260. && SCM_PORT (object)->rw_random)
  3261. scm_end_input (object);
  3262. scm_flush (object);
  3263. scm_dynwind_begin (0);
  3264. scm_dynwind_acquire_port (object);
  3265. ptob->truncate (object, c_length);
  3266. scm_dynwind_end ();
  3267. rv = 0;
  3268. }
  3269. else
  3270. {
  3271. off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
  3272. char *str = scm_to_locale_string (object);
  3273. int eno;
  3274. SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
  3275. eno = errno;
  3276. free (str);
  3277. errno = eno;
  3278. }
  3279. if (rv == -1)
  3280. SCM_SYSERROR;
  3281. return SCM_UNSPECIFIED;
  3282. }
  3283. #undef FUNC_NAME
  3284. SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
  3285. (SCM port),
  3286. "Return the current line number for @var{port}.\n"
  3287. "\n"
  3288. "The first line of a file is 0. But you might want to add 1\n"
  3289. "when printing line numbers, since starting from 1 is\n"
  3290. "traditional in error messages, and likely to be more natural to\n"
  3291. "non-programmers.")
  3292. #define FUNC_NAME s_scm_port_line
  3293. {
  3294. port = SCM_COERCE_OUTPORT (port);
  3295. SCM_VALIDATE_OPENPORT (1, port);
  3296. return scm_port_position_line (SCM_PORT (port)->position);
  3297. }
  3298. #undef FUNC_NAME
  3299. SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
  3300. (SCM port, SCM line),
  3301. "Set the current line number for @var{port} to @var{line}. The\n"
  3302. "first line of a file is 0.")
  3303. #define FUNC_NAME s_scm_set_port_line_x
  3304. {
  3305. port = SCM_COERCE_OUTPORT (port);
  3306. SCM_VALIDATE_OPENPORT (1, port);
  3307. scm_to_long (line);
  3308. scm_port_position_set_line (SCM_PORT (port)->position, line);
  3309. return SCM_UNSPECIFIED;
  3310. }
  3311. #undef FUNC_NAME
  3312. SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
  3313. (SCM port),
  3314. "Return the current column number of @var{port}.\n"
  3315. "If the number is\n"
  3316. "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
  3317. "- i.e. the first character of the first line is line 0, column 0.\n"
  3318. "(However, when you display a file position, for example in an error\n"
  3319. "message, we recommend you add 1 to get 1-origin integers. This is\n"
  3320. "because lines and column numbers traditionally start with 1, and that is\n"
  3321. "what non-programmers will find most natural.)")
  3322. #define FUNC_NAME s_scm_port_column
  3323. {
  3324. port = SCM_COERCE_OUTPORT (port);
  3325. SCM_VALIDATE_OPENPORT (1, port);
  3326. return scm_port_position_column (SCM_PORT (port)->position);
  3327. }
  3328. #undef FUNC_NAME
  3329. SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
  3330. (SCM port, SCM column),
  3331. "Set the current column of @var{port}. Before reading the first\n"
  3332. "character on a line the column should be 0.")
  3333. #define FUNC_NAME s_scm_set_port_column_x
  3334. {
  3335. port = SCM_COERCE_OUTPORT (port);
  3336. SCM_VALIDATE_OPENPORT (1, port);
  3337. scm_to_int (column);
  3338. scm_port_position_set_column (SCM_PORT (port)->position, column);
  3339. return SCM_UNSPECIFIED;
  3340. }
  3341. #undef FUNC_NAME
  3342. SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
  3343. (SCM port),
  3344. "Return the filename associated with @var{port}, or @code{#f}\n"
  3345. "if no filename is associated with the port.")
  3346. #define FUNC_NAME s_scm_port_filename
  3347. {
  3348. port = SCM_COERCE_OUTPORT (port);
  3349. SCM_VALIDATE_OPENPORT (1, port);
  3350. return SCM_FILENAME (port);
  3351. }
  3352. #undef FUNC_NAME
  3353. SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
  3354. (SCM port, SCM filename),
  3355. "Change the filename associated with @var{port}, using the current input\n"
  3356. "port if none is specified. Note that this does not change the port's\n"
  3357. "source of data, but only the value that is returned by\n"
  3358. "@code{port-filename} and reported in diagnostic output.")
  3359. #define FUNC_NAME s_scm_set_port_filename_x
  3360. {
  3361. port = SCM_COERCE_OUTPORT (port);
  3362. SCM_VALIDATE_OPENPORT (1, port);
  3363. /* We allow the user to set the filename to whatever he likes. */
  3364. SCM_SET_FILENAME (port, filename);
  3365. return SCM_UNSPECIFIED;
  3366. }
  3367. #undef FUNC_NAME
  3368. /* Implementation helpers for port printing functions. */
  3369. void
  3370. scm_print_port_mode (SCM exp, SCM port)
  3371. {
  3372. scm_puts (SCM_CLOSEDP (exp)
  3373. ? "closed: "
  3374. : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
  3375. ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
  3376. ? "input-output: "
  3377. : "input: ")
  3378. : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
  3379. ? "output: "
  3380. : "bogus: ")),
  3381. port);
  3382. }
  3383. int
  3384. scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
  3385. {
  3386. char *type = SCM_PORT_TYPE (port)->name;
  3387. if (!type)
  3388. type = "port";
  3389. scm_puts ("#<", port);
  3390. scm_print_port_mode (exp, port);
  3391. scm_puts (type, port);
  3392. scm_putc (' ', port);
  3393. scm_uintprint ((scm_t_bits) SCM_PORT (exp), 16, port);
  3394. scm_putc ('>', port);
  3395. return 1;
  3396. }
  3397. /* Iterating over all ports. */
  3398. struct for_each_data
  3399. {
  3400. void (*proc) (void *data, SCM p);
  3401. void *data;
  3402. };
  3403. static SCM
  3404. for_each_trampoline (void *data, SCM port, SCM result)
  3405. {
  3406. struct for_each_data *d = data;
  3407. d->proc (d->data, port);
  3408. return result;
  3409. }
  3410. void
  3411. scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
  3412. {
  3413. struct for_each_data d;
  3414. d.proc = proc;
  3415. d.data = data;
  3416. scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
  3417. scm_i_port_weak_set);
  3418. }
  3419. static void
  3420. scm_for_each_trampoline (void *data, SCM port)
  3421. {
  3422. scm_call_1 (SCM_PACK_POINTER (data), port);
  3423. }
  3424. SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
  3425. (SCM proc),
  3426. "Apply @var{proc} to each port in the Guile port table\n"
  3427. "in turn. The return value is unspecified. More specifically,\n"
  3428. "@var{proc} is applied exactly once to every port that exists\n"
  3429. "in the system at the time @code{port-for-each} is invoked.\n"
  3430. "Changes to the port table while @code{port-for-each} is running\n"
  3431. "have no effect as far as @code{port-for-each} is concerned.")
  3432. #define FUNC_NAME s_scm_port_for_each
  3433. {
  3434. SCM_VALIDATE_PROC (1, proc);
  3435. scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
  3436. return SCM_UNSPECIFIED;
  3437. }
  3438. #undef FUNC_NAME
  3439. static void
  3440. flush_output_port (void *closure, SCM port)
  3441. {
  3442. if (SCM_OPOUTPORTP (port))
  3443. scm_flush (port);
  3444. }
  3445. SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
  3446. (),
  3447. "Equivalent to calling @code{force-output} on\n"
  3448. "all open output ports. The return value is unspecified.")
  3449. #define FUNC_NAME s_scm_flush_all_ports
  3450. {
  3451. scm_c_port_for_each (&flush_output_port, NULL);
  3452. return SCM_UNSPECIFIED;
  3453. }
  3454. #undef FUNC_NAME
  3455. /* Void ports. */
  3456. scm_t_port_type *scm_void_port_type = 0;
  3457. static size_t
  3458. void_port_read (SCM port, SCM dst, size_t start, size_t count)
  3459. {
  3460. return 0;
  3461. }
  3462. static size_t
  3463. void_port_write (SCM port, SCM src, size_t start, size_t count)
  3464. {
  3465. return count;
  3466. }
  3467. static SCM
  3468. scm_i_void_port (long mode_bits)
  3469. {
  3470. return scm_c_make_port (scm_void_port_type, mode_bits, 0);
  3471. }
  3472. SCM
  3473. scm_void_port (char *mode_str)
  3474. {
  3475. return scm_i_void_port (scm_mode_bits (mode_str));
  3476. }
  3477. SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
  3478. (SCM mode),
  3479. "Create and return a new void port. A void port acts like\n"
  3480. "@file{/dev/null}. The @var{mode} argument\n"
  3481. "specifies the input/output modes for this port: see the\n"
  3482. "documentation for @code{open-file} in @ref{File Ports}.")
  3483. #define FUNC_NAME s_scm_sys_make_void_port
  3484. {
  3485. return scm_i_void_port (scm_i_mode_bits (mode));
  3486. }
  3487. #undef FUNC_NAME
  3488. /* Initialization. */
  3489. static void
  3490. scm_init_ice_9_ports (void)
  3491. {
  3492. #include "ports.x"
  3493. scm_c_define ("the-eof-object", SCM_EOF_VAL);
  3494. /* lseek() symbols. */
  3495. scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
  3496. scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
  3497. scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
  3498. scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
  3499. scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
  3500. scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
  3501. scm_c_define ("%current-warning-port-fluid", cur_warnport_fluid);
  3502. }
  3503. void
  3504. scm_init_ports (void)
  3505. {
  3506. sym_UTF_8 = scm_from_latin1_symbol ("UTF-8");
  3507. sym_ISO_8859_1 = scm_from_latin1_symbol ("ISO-8859-1");
  3508. sym_UTF_16 = scm_from_latin1_symbol ("UTF-16");
  3509. sym_UTF_16LE = scm_from_latin1_symbol ("UTF-16LE");
  3510. sym_UTF_16BE = scm_from_latin1_symbol ("UTF-16BE");
  3511. sym_UTF_32 = scm_from_latin1_symbol ("UTF-32");
  3512. sym_UTF_32LE = scm_from_latin1_symbol ("UTF-32LE");
  3513. sym_UTF_32BE = scm_from_latin1_symbol ("UTF-32BE");
  3514. sym_substitute = scm_from_latin1_symbol ("substitute");
  3515. sym_escape = scm_from_latin1_symbol ("escape");
  3516. sym_error = scm_from_latin1_symbol ("error");
  3517. trampoline_to_c_read_subr =
  3518. scm_c_make_gsubr ("port-read", 4, 0, 0,
  3519. (scm_t_subr) trampoline_to_c_read);
  3520. trampoline_to_c_write_subr =
  3521. scm_c_make_gsubr ("port-write", 4, 0, 0,
  3522. (scm_t_subr) trampoline_to_c_write);
  3523. scm_void_port_type = scm_make_port_type ("void", void_port_read,
  3524. void_port_write);
  3525. scm_i_port_weak_set = scm_c_make_weak_set (31);
  3526. cur_inport_fluid = scm_make_fluid ();
  3527. cur_outport_fluid = scm_make_fluid ();
  3528. cur_errport_fluid = scm_make_fluid ();
  3529. cur_warnport_fluid = scm_make_fluid ();
  3530. cur_loadport_fluid = scm_make_fluid ();
  3531. default_port_encoding_var =
  3532. scm_c_define ("%default-port-encoding",
  3533. scm_make_fluid_with_default (SCM_BOOL_F));
  3534. default_conversion_strategy_var =
  3535. scm_c_define ("%default-port-conversion-strategy",
  3536. scm_make_fluid_with_default (sym_substitute));
  3537. /* Use the locale as the default port encoding. */
  3538. scm_i_set_default_port_encoding (locale_charset ());
  3539. scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
  3540. "scm_init_ice_9_ports",
  3541. (scm_t_extension_init_func) scm_init_ice_9_ports,
  3542. NULL);
  3543. /* The following bindings are used early in boot-9.scm. */
  3544. /* Used by `include'. */
  3545. scm_c_define_gsubr ("set-port-encoding!", 2, 0, 0,
  3546. (scm_t_subr) scm_set_port_encoding_x);
  3547. scm_c_define_gsubr (s_scm_eof_object_p, 1, 0, 0,
  3548. (scm_t_subr) scm_eof_object_p);
  3549. /* Used by a number of error/warning-printing routines. */
  3550. scm_c_define_gsubr (s_scm_force_output, 0, 1, 0,
  3551. (scm_t_subr) scm_force_output);
  3552. /* Used by `file-exists?' and related functions if `stat' is
  3553. unavailable. */
  3554. scm_c_define_gsubr (s_scm_close_port, 1, 0, 0,
  3555. (scm_t_subr) scm_close_port);
  3556. /* Used by error routines. */
  3557. scm_c_define_gsubr (s_scm_current_error_port, 0, 0, 0,
  3558. (scm_t_subr) scm_current_error_port);
  3559. scm_c_define_gsubr (s_scm_current_warning_port, 0, 0, 0,
  3560. (scm_t_subr) scm_current_warning_port);
  3561. }