ports.c 87 KB


  1. /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2006,
  2. * 2007, 2008, 2009, 2010, 2011, 2012, 2013,
  3. * 2014, 2015 Free Software Foundation, Inc.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public License
  7. * as published by the Free Software Foundation; either version 3 of
  8. * the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  18. * 02110-1301 USA
  19. */
  20. /* Headers. */
  21. #define _LARGEFILE64_SOURCE /* ask for stat64 etc */
  22. #ifdef HAVE_CONFIG_H
  23. # include <config.h>
  24. #endif
  25. #include <stdio.h>
  26. #include <errno.h>
  27. #include <fcntl.h> /* for chsize on mingw */
  28. #include <assert.h>
  29. #include <iconv.h>
  30. #include <uniconv.h>
  31. #include <unistr.h>
  32. #include <striconveh.h>
  33. #include <assert.h>
  34. #include "libguile/_scm.h"
  35. #include "libguile/async.h"
  36. #include "libguile/deprecation.h"
  37. #include "libguile/eval.h"
  38. #include "libguile/fports.h" /* direct access for seek and truncate */
  39. #include "libguile/goops.h"
  40. #include "libguile/smob.h"
  41. #include "libguile/chars.h"
  42. #include "libguile/dynwind.h"
  43. #include "libguile/keywords.h"
  44. #include "libguile/hashtab.h"
  45. #include "libguile/root.h"
  46. #include "libguile/strings.h"
  47. #include "libguile/mallocs.h"
  48. #include "libguile/validate.h"
  49. #include "libguile/ports.h"
  50. #include "libguile/ports-internal.h"
  51. #include "libguile/vectors.h"
  52. #include "libguile/weak-set.h"
  53. #include "libguile/fluids.h"
  54. #include "libguile/eq.h"
  55. #include "libguile/alist.h"
  56. #ifdef HAVE_STRING_H
  57. #include <string.h>
  58. #endif
  59. #ifdef HAVE_IO_H
  60. #include <io.h>
  61. #endif
  62. #include <unistd.h>
  63. #ifdef HAVE_SYS_IOCTL_H
  64. #include <sys/ioctl.h>
  65. #endif
  66. /* Mingw (version 3.4.5, circa 2006) has ftruncate as an alias for chsize
  67. already, but have this code here in case that wasn't so in past versions,
  68. or perhaps to help other minimal DOS environments.
  69. gnulib ftruncate.c has code using fcntl F_CHSIZE and F_FREESP, which
  70. might be possibilities if we've got other systems without ftruncate. */
  71. #if defined HAVE_CHSIZE && ! defined HAVE_FTRUNCATE
  72. #define ftruncate(fd, size) chsize (fd, size)
  73. #undef HAVE_FTRUNCATE
  74. #define HAVE_FTRUNCATE 1
  75. #endif
  76. /* Port encodings are case-insensitive ASCII strings. */
  77. static char
  78. ascii_toupper (char c)
  79. {
  80. return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
  81. }
  82. /* It is only necessary to use this function on encodings that come from
  83. the user and have not been canonicalized yet. Encodings that are set
  84. on ports or in the default encoding fluid are in upper-case, and can
  85. be compared with strcmp. */
  86. static int
  87. encoding_matches (const char *enc, const char *upper)
  88. {
  89. if (!enc)
  90. enc = "ISO-8859-1";
  91. while (*enc)
  92. if (ascii_toupper (*enc++) != *upper++)
  93. return 0;
  94. return !*upper;
  95. }
  96. static char*
  97. canonicalize_encoding (const char *enc)
  98. {
  99. char *ret;
  100. int i;
  101. if (!enc)
  102. return "ISO-8859-1";
  103. ret = scm_gc_strdup (enc, "port");
  104. for (i = 0; ret[i]; i++)
  105. {
  106. if (ret[i] > 127)
  107. /* Restrict to ASCII. */
  108. scm_misc_error (NULL, "invalid character encoding ~s",
  109. scm_list_1 (scm_from_latin1_string (enc)));
  110. else
  111. ret[i] = ascii_toupper (ret[i]);
  112. }
  113. return ret;
  114. }
  115. /* The port kind table --- a dynamically resized array of port types. */
  116. /* scm_ptobs scm_numptob
  117. * implement a dynamically resized array of ptob records.
  118. * Indexes into this table are used when generating type
  119. * tags for smobjects (if you know a tag you can get an index and conversely).
  120. */
  121. static scm_t_ptob_descriptor **scm_ptobs = NULL;
  122. static long scm_numptob = 0; /* Number of port types. */
  123. static long scm_ptobs_size = 0; /* Number of slots in the port type
  124. table. */
  125. static scm_i_pthread_mutex_t scm_ptobs_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
  126. long
  127. scm_c_num_port_types (void)
  128. {
  129. long ret;
  130. scm_i_pthread_mutex_lock (&scm_ptobs_lock);
  131. ret = scm_numptob;
  132. scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
  133. return ret;
  134. }
  135. scm_t_ptob_descriptor*
  136. scm_c_port_type_ref (long ptobnum)
  137. {
  138. scm_t_ptob_descriptor *ret = NULL;
  139. scm_i_pthread_mutex_lock (&scm_ptobs_lock);
  140. if (0 <= ptobnum && ptobnum < scm_numptob)
  141. ret = scm_ptobs[ptobnum];
  142. scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
  143. if (!ret)
  144. scm_out_of_range ("scm_c_port_type_ref", scm_from_long (ptobnum));
  145. return ret;
  146. }
  147. long
  148. scm_c_port_type_add_x (scm_t_ptob_descriptor *desc)
  149. {
  150. long ret = -1;
  151. scm_i_pthread_mutex_lock (&scm_ptobs_lock);
  152. if (scm_numptob + 1 < SCM_I_MAX_PORT_TYPE_COUNT)
  153. {
  154. if (scm_numptob == scm_ptobs_size)
  155. {
  156. unsigned long old_size = scm_ptobs_size;
  157. scm_t_ptob_descriptor **old_ptobs = scm_ptobs;
  158. /* Currently there are only 9 predefined port types, so one
  159. resize will cover it. */
  160. scm_ptobs_size = old_size + 10;
  161. if (scm_ptobs_size >= SCM_I_MAX_PORT_TYPE_COUNT)
  162. scm_ptobs_size = SCM_I_MAX_PORT_TYPE_COUNT;
  163. scm_ptobs = scm_gc_malloc (sizeof (*scm_ptobs) * scm_ptobs_size,
  164. "scm_ptobs");
  165. memcpy (scm_ptobs, old_ptobs, sizeof (*scm_ptobs) * scm_numptob);
  166. }
  167. ret = scm_numptob++;
  168. scm_ptobs[ret] = desc;
  169. }
  170. scm_i_pthread_mutex_unlock (&scm_ptobs_lock);
  171. if (ret < 0)
  172. scm_out_of_range ("scm_c_port_type_add_x", scm_from_long (scm_numptob));
  173. return ret;
  174. }
  175. /*
  176. * We choose to use an interface similar to the smob interface with
  177. * fill_input and write as standard fields, passed to the port
  178. * type constructor, and optional fields set by setters.
  179. */
  180. static void
  181. flush_port_default (SCM port SCM_UNUSED)
  182. {
  183. }
  184. static void
  185. end_input_default (SCM port SCM_UNUSED, int offset SCM_UNUSED)
  186. {
  187. }
  188. scm_t_bits
  189. scm_make_port_type (char *name,
  190. int (*fill_input) (SCM port),
  191. void (*write) (SCM port, const void *data, size_t size))
  192. {
  193. scm_t_ptob_descriptor *desc;
  194. long ptobnum;
  195. desc = scm_gc_malloc_pointerless (sizeof (*desc), "port-type");
  196. memset (desc, 0, sizeof (*desc));
  197. desc->name = name;
  198. desc->print = scm_port_print;
  199. desc->write = write;
  200. desc->flush = flush_port_default;
  201. desc->end_input = end_input_default;
  202. desc->fill_input = fill_input;
  203. ptobnum = scm_c_port_type_add_x (desc);
  204. /* Make a class object if GOOPS is present. */
  205. if (SCM_UNPACK (scm_i_port_class[0]) != 0)
  206. scm_make_port_classes (ptobnum, name);
  207. return scm_tc7_port + ptobnum * 256;
  208. }
  209. void
  210. scm_set_port_mark (scm_t_bits tc, SCM (*mark) (SCM))
  211. {
  212. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->mark = mark;
  213. }
  214. void
  215. scm_set_port_free (scm_t_bits tc, size_t (*free) (SCM))
  216. {
  217. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->free = free;
  218. }
  219. void
  220. scm_set_port_print (scm_t_bits tc, int (*print) (SCM exp, SCM port,
  221. scm_print_state *pstate))
  222. {
  223. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->print = print;
  224. }
  225. void
  226. scm_set_port_equalp (scm_t_bits tc, SCM (*equalp) (SCM, SCM))
  227. {
  228. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->equalp = equalp;
  229. }
  230. void
  231. scm_set_port_close (scm_t_bits tc, int (*close) (SCM))
  232. {
  233. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->close = close;
  234. }
  235. void
  236. scm_set_port_flush (scm_t_bits tc, void (*flush) (SCM port))
  237. {
  238. scm_t_ptob_descriptor *ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tc));
  239. ptob->flush = flush;
  240. ptob->flags |= SCM_PORT_TYPE_HAS_FLUSH;
  241. }
  242. void
  243. scm_set_port_end_input (scm_t_bits tc, void (*end_input) (SCM port, int offset))
  244. {
  245. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->end_input = end_input;
  246. }
  247. void
  248. scm_set_port_seek (scm_t_bits tc, scm_t_off (*seek) (SCM, scm_t_off, int))
  249. {
  250. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->seek = seek;
  251. }
  252. void
  253. scm_set_port_truncate (scm_t_bits tc, void (*truncate) (SCM, scm_t_off))
  254. {
  255. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->truncate = truncate;
  256. }
  257. void
  258. scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM))
  259. {
  260. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->input_waiting = input_waiting;
  261. }
  262. void
  263. scm_set_port_setvbuf (scm_t_bits tc, void (*setvbuf) (SCM, long, long))
  264. {
  265. scm_c_port_type_ref (SCM_TC2PTOBNUM (tc))->setvbuf = setvbuf;
  266. }
  267. static void
  268. scm_i_set_pending_eof (SCM port)
  269. {
  270. SCM_PORT_GET_INTERNAL (port)->pending_eof = 1;
  271. }
  272. static void
  273. scm_i_clear_pending_eof (SCM port)
  274. {
  275. SCM_PORT_GET_INTERNAL (port)->pending_eof = 0;
  276. }
  277. SCM_DEFINE (scm_i_port_property, "%port-property", 2, 0, 0,
  278. (SCM port, SCM key),
  279. "Return the property of @var{port} associated with @var{key}.")
  280. #define FUNC_NAME s_scm_i_port_property
  281. {
  282. scm_i_pthread_mutex_t *lock;
  283. SCM result;
  284. SCM_VALIDATE_OPPORT (1, port);
  285. scm_c_lock_port (port, &lock);
  286. result = scm_assq_ref (SCM_PORT_GET_INTERNAL (port)->alist, key);
  287. if (lock)
  288. scm_i_pthread_mutex_unlock (lock);
  289. return result;
  290. }
  291. #undef FUNC_NAME
  292. SCM_DEFINE (scm_i_set_port_property_x, "%set-port-property!", 3, 0, 0,
  293. (SCM port, SCM key, SCM value),
  294. "Set the property of @var{port} associated with @var{key} to @var{value}.")
  295. #define FUNC_NAME s_scm_i_set_port_property_x
  296. {
  297. scm_i_pthread_mutex_t *lock;
  298. scm_t_port_internal *pti;
  299. SCM_VALIDATE_OPPORT (1, port);
  300. scm_c_lock_port (port, &lock);
  301. pti = SCM_PORT_GET_INTERNAL (port);
  302. pti->alist = scm_assq_set_x (pti->alist, key, value);
  303. if (lock)
  304. scm_i_pthread_mutex_unlock (lock);
  305. return SCM_UNSPECIFIED;
  306. }
  307. #undef FUNC_NAME
  308. /* Standard ports --- current input, output, error, and more(!). */
  309. static SCM cur_inport_fluid = SCM_BOOL_F;
  310. static SCM cur_outport_fluid = SCM_BOOL_F;
  311. static SCM cur_errport_fluid = SCM_BOOL_F;
  312. static SCM cur_warnport_fluid = SCM_BOOL_F;
  313. static SCM cur_loadport_fluid = SCM_BOOL_F;
  314. SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
  315. (void),
  316. "Return the current input port. This is the default port used\n"
  317. "by many input procedures. Initially, @code{current-input-port}\n"
  318. "returns the @dfn{standard input} in Unix and C terminology.")
  319. #define FUNC_NAME s_scm_current_input_port
  320. {
  321. if (scm_is_true (cur_inport_fluid))
  322. return scm_fluid_ref (cur_inport_fluid);
  323. else
  324. return SCM_BOOL_F;
  325. }
  326. #undef FUNC_NAME
  327. SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
  328. (void),
  329. "Return the current output port. This is the default port used\n"
  330. "by many output procedures. Initially,\n"
  331. "@code{current-output-port} returns the @dfn{standard output} in\n"
  332. "Unix and C terminology.")
  333. #define FUNC_NAME s_scm_current_output_port
  334. {
  335. if (scm_is_true (cur_outport_fluid))
  336. return scm_fluid_ref (cur_outport_fluid);
  337. else
  338. return SCM_BOOL_F;
  339. }
  340. #undef FUNC_NAME
  341. SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
  342. (void),
  343. "Return the port to which errors and warnings should be sent (the\n"
  344. "@dfn{standard error} in Unix and C terminology).")
  345. #define FUNC_NAME s_scm_current_error_port
  346. {
  347. if (scm_is_true (cur_errport_fluid))
  348. return scm_fluid_ref (cur_errport_fluid);
  349. else
  350. return SCM_BOOL_F;
  351. }
  352. #undef FUNC_NAME
  353. SCM_DEFINE (scm_current_warning_port, "current-warning-port", 0, 0, 0,
  354. (void),
  355. "Return the port to which diagnostic warnings should be sent.")
  356. #define FUNC_NAME s_scm_current_warning_port
  357. {
  358. if (scm_is_true (cur_warnport_fluid))
  359. return scm_fluid_ref (cur_warnport_fluid);
  360. else
  361. return SCM_BOOL_F;
  362. }
  363. #undef FUNC_NAME
  364. SCM_DEFINE (scm_current_load_port, "current-load-port", 0, 0, 0,
  365. (),
  366. "Return the current-load-port.\n"
  367. "The load port is used internally by @code{primitive-load}.")
  368. #define FUNC_NAME s_scm_current_load_port
  369. {
  370. return scm_fluid_ref (cur_loadport_fluid);
  371. }
  372. #undef FUNC_NAME
  373. SCM_DEFINE (scm_set_current_input_port, "set-current-input-port", 1, 0, 0,
  374. (SCM port),
  375. "@deffnx {Scheme Procedure} set-current-output-port port\n"
  376. "@deffnx {Scheme Procedure} set-current-error-port port\n"
  377. "Change the ports returned by @code{current-input-port},\n"
  378. "@code{current-output-port} and @code{current-error-port}, respectively,\n"
  379. "so that they use the supplied @var{port} for input or output.")
  380. #define FUNC_NAME s_scm_set_current_input_port
  381. {
  382. SCM oinp = scm_fluid_ref (cur_inport_fluid);
  383. SCM_VALIDATE_OPINPORT (1, port);
  384. scm_fluid_set_x (cur_inport_fluid, port);
  385. return oinp;
  386. }
  387. #undef FUNC_NAME
  388. SCM_DEFINE (scm_set_current_output_port, "set-current-output-port", 1, 0, 0,
  389. (SCM port),
  390. "Set the current default output port to @var{port}.")
  391. #define FUNC_NAME s_scm_set_current_output_port
  392. {
  393. SCM ooutp = scm_fluid_ref (cur_outport_fluid);
  394. port = SCM_COERCE_OUTPORT (port);
  395. SCM_VALIDATE_OPOUTPORT (1, port);
  396. scm_fluid_set_x (cur_outport_fluid, port);
  397. return ooutp;
  398. }
  399. #undef FUNC_NAME
  400. SCM_DEFINE (scm_set_current_error_port, "set-current-error-port", 1, 0, 0,
  401. (SCM port),
  402. "Set the current default error port to @var{port}.")
  403. #define FUNC_NAME s_scm_set_current_error_port
  404. {
  405. SCM oerrp = scm_fluid_ref (cur_errport_fluid);
  406. port = SCM_COERCE_OUTPORT (port);
  407. SCM_VALIDATE_OPOUTPORT (1, port);
  408. scm_fluid_set_x (cur_errport_fluid, port);
  409. return oerrp;
  410. }
  411. #undef FUNC_NAME
  412. SCM
  413. scm_set_current_warning_port (SCM port)
  414. #define FUNC_NAME "set-current-warning-port"
  415. {
  416. SCM owarnp = scm_fluid_ref (cur_warnport_fluid);
  417. port = SCM_COERCE_OUTPORT (port);
  418. SCM_VALIDATE_OPOUTPORT (1, port);
  419. scm_fluid_set_x (cur_warnport_fluid, port);
  420. return owarnp;
  421. }
  422. #undef FUNC_NAME
  423. void
  424. scm_dynwind_current_input_port (SCM port)
  425. #define FUNC_NAME NULL
  426. {
  427. SCM_VALIDATE_OPINPORT (1, port);
  428. scm_dynwind_fluid (cur_inport_fluid, port);
  429. }
  430. #undef FUNC_NAME
  431. void
  432. scm_dynwind_current_output_port (SCM port)
  433. #define FUNC_NAME NULL
  434. {
  435. port = SCM_COERCE_OUTPORT (port);
  436. SCM_VALIDATE_OPOUTPORT (1, port);
  437. scm_dynwind_fluid (cur_outport_fluid, port);
  438. }
  439. #undef FUNC_NAME
  440. void
  441. scm_dynwind_current_error_port (SCM port)
  442. #define FUNC_NAME NULL
  443. {
  444. port = SCM_COERCE_OUTPORT (port);
  445. SCM_VALIDATE_OPOUTPORT (1, port);
  446. scm_dynwind_fluid (cur_errport_fluid, port);
  447. }
  448. #undef FUNC_NAME
  449. void
  450. scm_i_dynwind_current_load_port (SCM port)
  451. {
  452. scm_dynwind_fluid (cur_loadport_fluid, port);
  453. }
  454. /* Retrieving a port's mode. */
  455. /* Return the flags that characterize a port based on the mode
  456. * string used to open a file for that port.
  457. *
  458. * See PORT FLAGS in scm.h
  459. */
  460. static long
  461. scm_i_mode_bits_n (SCM modes)
  462. {
  463. return (SCM_OPN
  464. | (scm_i_string_contains_char (modes, 'r')
  465. || scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
  466. | (scm_i_string_contains_char (modes, 'w')
  467. || scm_i_string_contains_char (modes, 'a')
  468. || scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
  469. | (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
  470. | (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
  471. }
  472. long
  473. scm_mode_bits (char *modes)
  474. {
  475. /* Valid characters are rw+a0l. So, use latin1. */
  476. return scm_i_mode_bits (scm_from_latin1_string (modes));
  477. }
  478. long
  479. scm_i_mode_bits (SCM modes)
  480. {
  481. long bits;
  482. if (!scm_is_string (modes))
  483. scm_wrong_type_arg_msg (NULL, 0, modes, "string");
  484. bits = scm_i_mode_bits_n (modes);
  485. scm_remember_upto_here_1 (modes);
  486. return bits;
  487. }
  488. /* Return the mode flags from an open port.
  489. * Some modes such as "append" are only used when opening
  490. * a file and are not returned here. */
  491. SCM_DEFINE (scm_port_mode, "port-mode", 1, 0, 0,
  492. (SCM port),
  493. "Return the port modes associated with the open port @var{port}.\n"
  494. "These will not necessarily be identical to the modes used when\n"
  495. "the port was opened, since modes such as \"append\" which are\n"
  496. "used only during port creation are not retained.")
  497. #define FUNC_NAME s_scm_port_mode
  498. {
  499. char modes[4];
  500. modes[0] = '\0';
  501. port = SCM_COERCE_OUTPORT (port);
  502. SCM_VALIDATE_OPPORT (1, port);
  503. if (SCM_CELL_WORD_0 (port) & SCM_RDNG) {
  504. if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
  505. strcpy (modes, "r+");
  506. else
  507. strcpy (modes, "r");
  508. }
  509. else if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
  510. strcpy (modes, "w");
  511. if (SCM_CELL_WORD_0 (port) & SCM_BUF0)
  512. strcat (modes, "0");
  513. return scm_from_latin1_string (modes);
  514. }
  515. #undef FUNC_NAME
  516. /* The port table --- a weak set of all ports.
  517. We need a global registry of ports to flush them all at exit, and to
  518. get all the ports matching a file descriptor. */
  519. SCM scm_i_port_weak_set;
  520. /* Port finalization. */
  521. struct do_free_data
  522. {
  523. scm_t_ptob_descriptor *ptob;
  524. SCM port;
  525. };
  526. static SCM
  527. do_free (void *body_data)
  528. {
  529. struct do_free_data *data = body_data;
  530. /* `close' is for explicit `close-port' by user. `free' is for this
  531. purpose: ports collected by the GC. */
  532. data->ptob->free (data->port);
  533. return SCM_BOOL_T;
  534. }
  535. /* Finalize the object (a port) pointed to by PTR. */
  536. static void
  537. finalize_port (void *ptr, void *data)
  538. {
  539. SCM port = SCM_PACK_POINTER (ptr);
  540. if (!SCM_PORTP (port))
  541. abort ();
  542. if (SCM_OPENP (port))
  543. {
  544. struct do_free_data data;
  545. SCM_CLR_PORT_OPEN_FLAG (port);
  546. data.ptob = SCM_PORT_DESCRIPTOR (port);
  547. data.port = port;
  548. scm_internal_catch (SCM_BOOL_T, do_free, &data,
  549. scm_handle_by_message_noexit, NULL);
  550. scm_gc_ports_collected++;
  551. }
  552. }
  553. SCM
  554. scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
  555. const char *encoding,
  556. scm_t_string_failed_conversion_handler handler,
  557. scm_t_bits stream)
  558. {
  559. SCM ret;
  560. scm_t_port *entry;
  561. scm_t_port_internal *pti;
  562. scm_t_ptob_descriptor *ptob;
  563. entry = scm_gc_typed_calloc (scm_t_port);
  564. pti = scm_gc_typed_calloc (scm_t_port_internal);
  565. ptob = scm_c_port_type_ref (SCM_TC2PTOBNUM (tag));
  566. ret = scm_words (tag | mode_bits, 3);
  567. SCM_SET_CELL_WORD_1 (ret, (scm_t_bits) entry);
  568. SCM_SET_CELL_WORD_2 (ret, (scm_t_bits) ptob);
  569. entry->lock = scm_gc_malloc_pointerless (sizeof (*entry->lock), "port lock");
  570. scm_i_pthread_mutex_init (entry->lock, scm_i_pthread_mutexattr_recursive);
  571. entry->internal = pti;
  572. entry->file_name = SCM_BOOL_F;
  573. entry->rw_active = SCM_PORT_NEITHER;
  574. entry->port = ret;
  575. entry->stream = stream;
  576. if (encoding_matches (encoding, "UTF-8"))
  577. {
  578. pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
  579. entry->encoding = "UTF-8";
  580. }
  581. else if (encoding_matches (encoding, "ISO-8859-1"))
  582. {
  583. pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
  584. entry->encoding = "ISO-8859-1";
  585. }
  586. else
  587. {
  588. pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
  589. entry->encoding = canonicalize_encoding (encoding);
  590. }
  591. entry->ilseq_handler = handler;
  592. pti->iconv_descriptors = NULL;
  593. pti->at_stream_start_for_bom_read = 1;
  594. pti->at_stream_start_for_bom_write = 1;
  595. pti->pending_eof = 0;
  596. pti->alist = SCM_EOL;
  597. if (SCM_PORT_DESCRIPTOR (ret)->free)
  598. scm_i_set_finalizer (SCM2PTR (ret), finalize_port, NULL);
  599. if (SCM_PORT_DESCRIPTOR (ret)->flags & SCM_PORT_TYPE_HAS_FLUSH)
  600. scm_weak_set_add_x (scm_i_port_weak_set, ret);
  601. return ret;
  602. }
  603. SCM
  604. scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
  605. {
  606. return scm_c_make_port_with_encoding (tag, mode_bits,
  607. scm_i_default_port_encoding (),
  608. scm_i_default_port_conversion_handler (),
  609. stream);
  610. }
  611. SCM
  612. scm_new_port_table_entry (scm_t_bits tag)
  613. {
  614. return scm_c_make_port (tag, 0, 0);
  615. }
  616. /* Predicates. */
  617. SCM_DEFINE (scm_port_p, "port?", 1, 0, 0,
  618. (SCM x),
  619. "Return a boolean indicating whether @var{x} is a port.\n"
  620. "Equivalent to @code{(or (input-port? @var{x}) (output-port?\n"
  621. "@var{x}))}.")
  622. #define FUNC_NAME s_scm_port_p
  623. {
  624. return scm_from_bool (SCM_PORTP (x));
  625. }
  626. #undef FUNC_NAME
  627. SCM_DEFINE (scm_input_port_p, "input-port?", 1, 0, 0,
  628. (SCM x),
  629. "Return @code{#t} if @var{x} is an input port, otherwise return\n"
  630. "@code{#f}. Any object satisfying this predicate also satisfies\n"
  631. "@code{port?}.")
  632. #define FUNC_NAME s_scm_input_port_p
  633. {
  634. return scm_from_bool (SCM_INPUT_PORT_P (x));
  635. }
  636. #undef FUNC_NAME
  637. SCM_DEFINE (scm_output_port_p, "output-port?", 1, 0, 0,
  638. (SCM x),
  639. "Return @code{#t} if @var{x} is an output port, otherwise return\n"
  640. "@code{#f}. Any object satisfying this predicate also satisfies\n"
  641. "@code{port?}.")
  642. #define FUNC_NAME s_scm_output_port_p
  643. {
  644. x = SCM_COERCE_OUTPORT (x);
  645. return scm_from_bool (SCM_OUTPUT_PORT_P (x));
  646. }
  647. #undef FUNC_NAME
  648. SCM_DEFINE (scm_port_closed_p, "port-closed?", 1, 0, 0,
  649. (SCM port),
  650. "Return @code{#t} if @var{port} is closed or @code{#f} if it is\n"
  651. "open.")
  652. #define FUNC_NAME s_scm_port_closed_p
  653. {
  654. SCM_VALIDATE_PORT (1, port);
  655. return scm_from_bool (!SCM_OPPORTP (port));
  656. }
  657. #undef FUNC_NAME
  658. SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
  659. (SCM x),
  660. "Return @code{#t} if @var{x} is an end-of-file object; otherwise\n"
  661. "return @code{#f}.")
  662. #define FUNC_NAME s_scm_eof_object_p
  663. {
  664. return scm_from_bool (SCM_EOF_OBJECT_P (x));
  665. }
  666. #undef FUNC_NAME
  667. /* Closing ports. */
  668. static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
  669. /* scm_close_port
  670. * Call the close operation on a port object.
  671. * see also scm_close.
  672. */
  673. SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
  674. (SCM port),
  675. "Close the specified port object. Return @code{#t} if it\n"
  676. "successfully closes a port or @code{#f} if it was already\n"
  677. "closed. An exception may be raised if an error occurs, for\n"
  678. "example when flushing buffered output. See also @ref{Ports and\n"
  679. "File Descriptors, close}, for a procedure which can close file\n"
  680. "descriptors.")
  681. #define FUNC_NAME s_scm_close_port
  682. {
  683. scm_t_port_internal *pti;
  684. int rv;
  685. port = SCM_COERCE_OUTPORT (port);
  686. SCM_VALIDATE_PORT (1, port);
  687. if (SCM_CLOSEDP (port))
  688. return SCM_BOOL_F;
  689. pti = SCM_PORT_GET_INTERNAL (port);
  690. SCM_CLR_PORT_OPEN_FLAG (port);
  691. if (SCM_PORT_DESCRIPTOR (port)->flags & SCM_PORT_TYPE_HAS_FLUSH)
  692. scm_weak_set_remove_x (scm_i_port_weak_set, port);
  693. if (SCM_PORT_DESCRIPTOR (port)->close)
  694. /* Note! This may throw an exception. Anything after this point
  695. should be resilient to non-local exits. */
  696. rv = SCM_PORT_DESCRIPTOR (port)->close (port);
  697. else
  698. rv = 0;
  699. if (pti->iconv_descriptors)
  700. {
  701. /* If we don't get here, the iconv_descriptors finalizer will
  702. clean up. */
  703. close_iconv_descriptors (pti->iconv_descriptors);
  704. pti->iconv_descriptors = NULL;
  705. }
  706. return scm_from_bool (rv >= 0);
  707. }
  708. #undef FUNC_NAME
  709. SCM_DEFINE (scm_close_input_port, "close-input-port", 1, 0, 0,
  710. (SCM port),
  711. "Close the specified input port object. The routine has no effect if\n"
  712. "the file has already been closed. An exception may be raised if an\n"
  713. "error occurs. The value returned is unspecified.\n\n"
  714. "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
  715. "which can close file descriptors.")
  716. #define FUNC_NAME s_scm_close_input_port
  717. {
  718. SCM_VALIDATE_INPUT_PORT (1, port);
  719. scm_close_port (port);
  720. return SCM_UNSPECIFIED;
  721. }
  722. #undef FUNC_NAME
  723. SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
  724. (SCM port),
  725. "Close the specified output port object. The routine has no effect if\n"
  726. "the file has already been closed. An exception may be raised if an\n"
  727. "error occurs. The value returned is unspecified.\n\n"
  728. "See also @ref{Ports and File Descriptors, close}, for a procedure\n"
  729. "which can close file descriptors.")
  730. #define FUNC_NAME s_scm_close_output_port
  731. {
  732. port = SCM_COERCE_OUTPORT (port);
  733. SCM_VALIDATE_OUTPUT_PORT (1, port);
  734. scm_close_port (port);
  735. return SCM_UNSPECIFIED;
  736. }
  737. #undef FUNC_NAME
  738. /* Encoding characters to byte streams, and decoding byte streams to
  739. characters. */
  740. /* A fluid specifying the default encoding for newly created ports. If it is
  741. a string, that is the encoding. If it is #f, it is in the "native"
  742. (Latin-1) encoding. */
  743. SCM_VARIABLE (default_port_encoding_var, "%default-port-encoding");
  744. static int scm_port_encoding_init = 0;
  745. /* Use ENCODING as the default encoding for future ports. */
  746. void
  747. scm_i_set_default_port_encoding (const char *encoding)
  748. {
  749. if (!scm_port_encoding_init
  750. || !scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
  751. scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
  752. SCM_EOL);
  753. if (encoding_matches (encoding, "ISO-8859-1"))
  754. scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var), SCM_BOOL_F);
  755. else
  756. scm_fluid_set_x (SCM_VARIABLE_REF (default_port_encoding_var),
  757. scm_from_latin1_string (canonicalize_encoding (encoding)));
  758. }
  759. /* Return the name of the default encoding for newly created ports. */
  760. const char *
  761. scm_i_default_port_encoding (void)
  762. {
  763. if (!scm_port_encoding_init)
  764. return "ISO-8859-1";
  765. else if (!scm_is_fluid (SCM_VARIABLE_REF (default_port_encoding_var)))
  766. return "ISO-8859-1";
  767. else
  768. {
  769. SCM encoding;
  770. encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
  771. if (!scm_is_string (encoding))
  772. return "ISO-8859-1";
  773. else
  774. return scm_i_string_chars (encoding);
  775. }
  776. }
  777. /* A fluid specifying the default conversion handler for newly created
  778. ports. Its value should be one of the symbols below. */
  779. SCM_VARIABLE (default_conversion_strategy_var,
  780. "%default-port-conversion-strategy");
  781. /* Whether the above fluid is initialized. */
  782. static int scm_conversion_strategy_init = 0;
  783. /* The possible conversion strategies. */
  784. SCM_SYMBOL (sym_error, "error");
  785. SCM_SYMBOL (sym_substitute, "substitute");
  786. SCM_SYMBOL (sym_escape, "escape");
  787. /* Return the default failed encoding conversion policy for new created
  788. ports. */
  789. scm_t_string_failed_conversion_handler
  790. scm_i_default_port_conversion_handler (void)
  791. {
  792. scm_t_string_failed_conversion_handler handler;
  793. if (!scm_conversion_strategy_init
  794. || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
  795. handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
  796. else
  797. {
  798. SCM fluid, value;
  799. fluid = SCM_VARIABLE_REF (default_conversion_strategy_var);
  800. value = scm_fluid_ref (fluid);
  801. if (scm_is_eq (sym_substitute, value))
  802. handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
  803. else if (scm_is_eq (sym_escape, value))
  804. handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
  805. else
  806. /* Default to 'error also when the fluid's value is not one of
  807. the valid symbols. */
  808. handler = SCM_FAILED_CONVERSION_ERROR;
  809. }
  810. return handler;
  811. }
  812. /* Use HANDLER as the default conversion strategy for future ports. */
  813. void
  814. scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler
  815. handler)
  816. {
  817. SCM strategy;
  818. if (!scm_conversion_strategy_init
  819. || !scm_is_fluid (SCM_VARIABLE_REF (default_conversion_strategy_var)))
  820. scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
  821. SCM_EOL);
  822. switch (handler)
  823. {
  824. case SCM_FAILED_CONVERSION_ERROR:
  825. strategy = sym_error;
  826. break;
  827. case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
  828. strategy = sym_escape;
  829. break;
  830. case SCM_FAILED_CONVERSION_QUESTION_MARK:
  831. strategy = sym_substitute;
  832. break;
  833. default:
  834. abort ();
  835. }
  836. scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var),
  837. strategy);
  838. }
  839. static void
  840. scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port);
  841. /* If the next LEN bytes from PORT are equal to those in BYTES, then
  842. return 1, else return 0. Leave the port position unchanged. */
  843. static int
  844. looking_at_bytes (SCM port, const unsigned char *bytes, int len)
  845. {
  846. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  847. int i = 0;
  848. while (i < len && scm_peek_byte_or_eof_unlocked (port) == bytes[i])
  849. {
  850. pt->read_pos++;
  851. i++;
  852. }
  853. scm_i_unget_bytes_unlocked (bytes, i, port);
  854. return (i == len);
  855. }
  856. static const unsigned char scm_utf8_bom[3] = {0xEF, 0xBB, 0xBF};
  857. static const unsigned char scm_utf16be_bom[2] = {0xFE, 0xFF};
  858. static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
  859. static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
  860. static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
  861. /* Decide what byte order to use for a UTF-16 port. Return "UTF-16BE"
  862. or "UTF-16LE". MODE must be either SCM_PORT_READ or SCM_PORT_WRITE,
  863. and specifies which operation is about to be done. The MODE
  864. determines how we will decide the byte order. We deliberately avoid
  865. reading from the port unless the user is about to do so. If the user
  866. is about to read, then we look for a BOM, and if present, we use it
  867. to determine the byte order. Otherwise we choose big endian, as
  868. recommended by the Unicode Standard. Note that the BOM (if any) is
  869. not consumed here. */
  870. static const char *
  871. decide_utf16_encoding (SCM port, scm_t_port_rw_active mode)
  872. {
  873. if (mode == SCM_PORT_READ
  874. && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
  875. && looking_at_bytes (port, scm_utf16le_bom, sizeof scm_utf16le_bom))
  876. return "UTF-16LE";
  877. else
  878. return "UTF-16BE";
  879. }
  880. /* Decide what byte order to use for a UTF-32 port. Return "UTF-32BE"
  881. or "UTF-32LE". See the comment above 'decide_utf16_encoding' for
  882. details. */
  883. static const char *
  884. decide_utf32_encoding (SCM port, scm_t_port_rw_active mode)
  885. {
  886. if (mode == SCM_PORT_READ
  887. && SCM_PORT_GET_INTERNAL (port)->at_stream_start_for_bom_read
  888. && looking_at_bytes (port, scm_utf32le_bom, sizeof scm_utf32le_bom))
  889. return "UTF-32LE";
  890. else
  891. return "UTF-32BE";
  892. }
  893. static void
  894. finalize_iconv_descriptors (void *ptr, void *data)
  895. {
  896. close_iconv_descriptors (ptr);
  897. }
  898. static scm_t_iconv_descriptors *
  899. open_iconv_descriptors (const char *encoding, int reading, int writing)
  900. {
  901. scm_t_iconv_descriptors *id;
  902. iconv_t input_cd, output_cd;
  903. size_t i;
  904. input_cd = (iconv_t) -1;
  905. output_cd = (iconv_t) -1;
  906. for (i = 0; encoding[i]; i++)
  907. if (encoding[i] > 127)
  908. goto invalid_encoding;
  909. if (reading)
  910. {
  911. /* Open an input iconv conversion descriptor, from ENCODING
  912. to UTF-8. We choose UTF-8, not UTF-32, because iconv
  913. implementations can typically convert from anything to
  914. UTF-8, but not to UTF-32 (see
  915. <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
  916. /* Assume opening an iconv descriptor causes about 16 KB of
  917. allocation. */
  918. scm_gc_register_allocation (16 * 1024);
  919. input_cd = iconv_open ("UTF-8", encoding);
  920. if (input_cd == (iconv_t) -1)
  921. goto invalid_encoding;
  922. }
  923. if (writing)
  924. {
  925. /* Assume opening an iconv descriptor causes about 16 KB of
  926. allocation. */
  927. scm_gc_register_allocation (16 * 1024);
  928. output_cd = iconv_open (encoding, "UTF-8");
  929. if (output_cd == (iconv_t) -1)
  930. {
  931. if (input_cd != (iconv_t) -1)
  932. iconv_close (input_cd);
  933. goto invalid_encoding;
  934. }
  935. }
  936. id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
  937. id->input_cd = input_cd;
  938. id->output_cd = output_cd;
  939. /* Register a finalizer to close the descriptors. */
  940. scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
  941. return id;
  942. invalid_encoding:
  943. {
  944. SCM err;
  945. err = scm_from_latin1_string (encoding);
  946. scm_misc_error ("open_iconv_descriptors",
  947. "invalid or unknown character encoding ~s",
  948. scm_list_1 (err));
  949. }
  950. }
  951. static void
  952. close_iconv_descriptors (scm_t_iconv_descriptors *id)
  953. {
  954. if (id->input_cd != (iconv_t) -1)
  955. iconv_close (id->input_cd);
  956. if (id->output_cd != (iconv_t) -1)
  957. iconv_close (id->output_cd);
  958. id->input_cd = (void *) -1;
  959. id->output_cd = (void *) -1;
  960. }
  961. scm_t_iconv_descriptors *
  962. scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode)
  963. {
  964. scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
  965. assert (pti->encoding_mode == SCM_PORT_ENCODING_MODE_ICONV);
  966. if (!pti->iconv_descriptors)
  967. {
  968. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  969. const char *precise_encoding;
  970. if (!pt->encoding)
  971. pt->encoding = "ISO-8859-1";
  972. /* If the specified encoding is UTF-16 or UTF-32, then make
  973. that more precise by deciding what byte order to use. */
  974. if (strcmp (pt->encoding, "UTF-16") == 0)
  975. precise_encoding = decide_utf16_encoding (port, mode);
  976. else if (strcmp (pt->encoding, "UTF-32") == 0)
  977. precise_encoding = decide_utf32_encoding (port, mode);
  978. else
  979. precise_encoding = pt->encoding;
  980. pti->iconv_descriptors =
  981. open_iconv_descriptors (precise_encoding,
  982. SCM_INPUT_PORT_P (port),
  983. SCM_OUTPUT_PORT_P (port));
  984. }
  985. return pti->iconv_descriptors;
  986. }
  987. /* The name of the encoding is itself encoded in ASCII. */
  988. void
  989. scm_i_set_port_encoding_x (SCM port, const char *encoding)
  990. {
  991. scm_t_port *pt;
  992. scm_t_port_internal *pti;
  993. scm_t_iconv_descriptors *prev;
  994. /* Set the character encoding for this port. */
  995. pt = SCM_PTAB_ENTRY (port);
  996. pti = SCM_PORT_GET_INTERNAL (port);
  997. prev = pti->iconv_descriptors;
  998. /* In order to handle cases where the encoding changes mid-stream
  999. (e.g. within an HTTP stream, or within a file that is composed of
  1000. segments with different encodings), we consider this to be "stream
  1001. start" for purposes of BOM handling, regardless of our actual file
  1002. position. */
  1003. pti->at_stream_start_for_bom_read = 1;
  1004. pti->at_stream_start_for_bom_write = 1;
  1005. if (encoding_matches (encoding, "UTF-8"))
  1006. {
  1007. pt->encoding = "UTF-8";
  1008. pti->encoding_mode = SCM_PORT_ENCODING_MODE_UTF8;
  1009. }
  1010. else if (encoding_matches (encoding, "ISO-8859-1"))
  1011. {
  1012. pt->encoding = "ISO-8859-1";
  1013. pti->encoding_mode = SCM_PORT_ENCODING_MODE_LATIN1;
  1014. }
  1015. else
  1016. {
  1017. pt->encoding = canonicalize_encoding (encoding);
  1018. pti->encoding_mode = SCM_PORT_ENCODING_MODE_ICONV;
  1019. }
  1020. pti->iconv_descriptors = NULL;
  1021. if (prev)
  1022. close_iconv_descriptors (prev);
  1023. }
  1024. SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
  1025. (SCM port),
  1026. "Returns, as a string, the character encoding that @var{port}\n"
  1027. "uses to interpret its input and output.\n")
  1028. #define FUNC_NAME s_scm_port_encoding
  1029. {
  1030. SCM_VALIDATE_PORT (1, port);
  1031. return scm_from_latin1_string (SCM_PTAB_ENTRY (port)->encoding);
  1032. }
  1033. #undef FUNC_NAME
  1034. SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
  1035. (SCM port, SCM enc),
  1036. "Sets the character encoding that will be used to interpret all\n"
  1037. "port I/O. New ports are created with the encoding\n"
  1038. "appropriate for the current locale if @code{setlocale} has \n"
  1039. "been called or ISO-8859-1 otherwise\n"
  1040. "and this procedure can be used to modify that encoding.\n")
  1041. #define FUNC_NAME s_scm_set_port_encoding_x
  1042. {
  1043. char *enc_str;
  1044. SCM_VALIDATE_PORT (1, port);
  1045. SCM_VALIDATE_STRING (2, enc);
  1046. enc_str = scm_to_latin1_string (enc);
  1047. scm_i_set_port_encoding_x (port, enc_str);
  1048. free (enc_str);
  1049. return SCM_UNSPECIFIED;
  1050. }
  1051. #undef FUNC_NAME
  1052. SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
  1053. 1, 0, 0, (SCM port),
  1054. "Returns the behavior of the port when handling a character that\n"
  1055. "is not representable in the port's current encoding.\n"
  1056. "It returns the symbol @code{error} if unrepresentable characters\n"
  1057. "should cause exceptions, @code{substitute} if the port should\n"
  1058. "try to replace unrepresentable characters with question marks or\n"
  1059. "approximate characters, or @code{escape} if unrepresentable\n"
  1060. "characters should be converted to string escapes.\n"
  1061. "\n"
  1062. "If @var{port} is @code{#f}, then the current default behavior\n"
  1063. "will be returned. New ports will have this default behavior\n"
  1064. "when they are created.\n")
  1065. #define FUNC_NAME s_scm_port_conversion_strategy
  1066. {
  1067. scm_t_string_failed_conversion_handler h;
  1068. if (scm_is_false (port))
  1069. h = scm_i_default_port_conversion_handler ();
  1070. else
  1071. {
  1072. scm_t_port *pt;
  1073. SCM_VALIDATE_OPPORT (1, port);
  1074. pt = SCM_PTAB_ENTRY (port);
  1075. h = pt->ilseq_handler;
  1076. }
  1077. if (h == SCM_FAILED_CONVERSION_ERROR)
  1078. return scm_from_latin1_symbol ("error");
  1079. else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
  1080. return scm_from_latin1_symbol ("substitute");
  1081. else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
  1082. return scm_from_latin1_symbol ("escape");
  1083. else
  1084. abort ();
  1085. /* Never gets here. */
  1086. return SCM_UNDEFINED;
  1087. }
  1088. #undef FUNC_NAME
  1089. SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
  1090. 2, 0, 0,
  1091. (SCM port, SCM sym),
  1092. "Sets the behavior of the interpreter when outputting a character\n"
  1093. "that is not representable in the port's current encoding.\n"
  1094. "@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
  1095. "@code{'escape}. If it is @code{'error}, an error will be thrown\n"
  1096. "when an unconvertible character is encountered. If it is\n"
  1097. "@code{'substitute}, then unconvertible characters will \n"
  1098. "be replaced with approximate characters, or with question marks\n"
  1099. "if no approximately correct character is available.\n"
  1100. "If it is @code{'escape},\n"
  1101. "it will appear as a hex escape when output.\n"
  1102. "\n"
  1103. "If @var{port} is an open port, the conversion error behavior\n"
  1104. "is set for that port. If it is @code{#f}, it is set as the\n"
  1105. "default behavior for any future ports that get created in\n"
  1106. "this thread.\n")
  1107. #define FUNC_NAME s_scm_set_port_conversion_strategy_x
  1108. {
  1109. scm_t_string_failed_conversion_handler handler;
  1110. if (scm_is_eq (sym, sym_error))
  1111. handler = SCM_FAILED_CONVERSION_ERROR;
  1112. else if (scm_is_eq (sym, sym_substitute))
  1113. handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
  1114. else if (scm_is_eq (sym, sym_escape))
  1115. handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
  1116. else
  1117. SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
  1118. if (scm_is_false (port))
  1119. scm_i_set_default_port_conversion_handler (handler);
  1120. else
  1121. {
  1122. SCM_VALIDATE_OPPORT (1, port);
  1123. SCM_PTAB_ENTRY (port)->ilseq_handler = handler;
  1124. }
  1125. return SCM_UNSPECIFIED;
  1126. }
  1127. #undef FUNC_NAME
  1128. /* The port lock. */
  1129. static void
  1130. lock_port (void *mutex)
  1131. {
  1132. scm_i_pthread_mutex_lock ((scm_i_pthread_mutex_t *) mutex);
  1133. }
  1134. static void
  1135. unlock_port (void *mutex)
  1136. {
  1137. scm_i_pthread_mutex_unlock ((scm_i_pthread_mutex_t *) mutex);
  1138. }
  1139. void
  1140. scm_dynwind_lock_port (SCM port)
  1141. #define FUNC_NAME "dynwind-lock-port"
  1142. {
  1143. scm_i_pthread_mutex_t *lock;
  1144. SCM_VALIDATE_OPPORT (SCM_ARG1, port);
  1145. scm_c_lock_port (port, &lock);
  1146. if (lock)
  1147. {
  1148. scm_dynwind_unwind_handler (unlock_port, lock, SCM_F_WIND_EXPLICITLY);
  1149. scm_dynwind_rewind_handler (lock_port, lock, 0);
  1150. }
  1151. }
  1152. #undef FUNC_NAME
  1153. /* Input. */
  1154. int
  1155. scm_get_byte_or_eof (SCM port)
  1156. {
  1157. scm_i_pthread_mutex_t *lock;
  1158. int ret;
  1159. scm_c_lock_port (port, &lock);
  1160. ret = scm_get_byte_or_eof_unlocked (port);
  1161. if (lock)
  1162. scm_i_pthread_mutex_unlock (lock);
  1163. return ret;
  1164. }
  1165. int
  1166. scm_peek_byte_or_eof (SCM port)
  1167. {
  1168. scm_i_pthread_mutex_t *lock;
  1169. int ret;
  1170. scm_c_lock_port (port, &lock);
  1171. ret = scm_peek_byte_or_eof_unlocked (port);
  1172. if (lock)
  1173. scm_i_pthread_mutex_unlock (lock);
  1174. return ret;
  1175. }
  1176. /* scm_c_read
  1177. *
  1178. * Used by an application to read arbitrary number of bytes from an
  1179. * SCM port. Same semantics as libc read, except that scm_c_read only
  1180. * returns less than SIZE bytes if at end-of-file.
  1181. *
  1182. * Warning: Doesn't update port line and column counts! */
  1183. /* This structure, and the following swap_buffer function, are used
  1184. for temporarily swapping a port's own read buffer, and the buffer
  1185. that the caller of scm_c_read provides. */
  1186. struct port_and_swap_buffer
  1187. {
  1188. scm_t_port *pt;
  1189. unsigned char *buffer;
  1190. size_t size;
  1191. };
  1192. static void
  1193. swap_buffer (void *data)
  1194. {
  1195. struct port_and_swap_buffer *psb = (struct port_and_swap_buffer *) data;
  1196. unsigned char *old_buf = psb->pt->read_buf;
  1197. size_t old_size = psb->pt->read_buf_size;
  1198. /* Make the port use (buffer, size) from the struct. */
  1199. psb->pt->read_pos = psb->pt->read_buf = psb->pt->read_end = psb->buffer;
  1200. psb->pt->read_buf_size = psb->size;
  1201. /* Save the port's old (buffer, size) in the struct. */
  1202. psb->buffer = old_buf;
  1203. psb->size = old_size;
  1204. }
  1205. static int scm_i_fill_input_unlocked (SCM port);
  1206. size_t
  1207. scm_c_read_unlocked (SCM port, void *buffer, size_t size)
  1208. #define FUNC_NAME "scm_c_read"
  1209. {
  1210. scm_t_port *pt;
  1211. scm_t_port_internal *pti;
  1212. size_t n_read = 0, n_available;
  1213. struct port_and_swap_buffer psb;
  1214. SCM_VALIDATE_OPINPORT (1, port);
  1215. pt = SCM_PTAB_ENTRY (port);
  1216. pti = SCM_PORT_GET_INTERNAL (port);
  1217. if (pt->rw_active == SCM_PORT_WRITE)
  1218. SCM_PORT_DESCRIPTOR (port)->flush (port);
  1219. if (pt->rw_random)
  1220. pt->rw_active = SCM_PORT_READ;
  1221. /* Take bytes first from the port's read buffer. */
  1222. if (pt->read_pos < pt->read_end)
  1223. {
  1224. n_available = min (size, pt->read_end - pt->read_pos);
  1225. memcpy (buffer, pt->read_pos, n_available);
  1226. buffer = (char *) buffer + n_available;
  1227. pt->read_pos += n_available;
  1228. n_read += n_available;
  1229. size -= n_available;
  1230. }
  1231. /* Avoid the scm_dynwind_* costs if we now have enough data. */
  1232. if (size == 0)
  1233. return n_read;
  1234. /* Now we will call scm_i_fill_input_unlocked repeatedly until we have
  1235. read the requested number of bytes. (Note that a single
  1236. scm_i_fill_input_unlocked call does not guarantee to fill the whole
  1237. of the port's read buffer.) */
  1238. if (pt->read_buf_size <= 1
  1239. && pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
  1240. {
  1241. /* The port that we are reading from is unbuffered - i.e. does not
  1242. have its own persistent buffer - but we have a buffer, provided
  1243. by our caller, that is the right size for the data that is
  1244. wanted. For the following scm_i_fill_input_unlocked calls,
  1245. therefore, we use the buffer in hand as the port's read buffer.
  1246. We need to make sure that the port's normal (1 byte) buffer is
  1247. reinstated in case one of the scm_i_fill_input_unlocked ()
  1248. calls throws an exception; we use the scm_dynwind_* API to
  1249. achieve that.
  1250. A consequence of this optimization is that the fill_input
  1251. functions can't unget characters. That'll push data to the
  1252. pushback buffer instead of this psb buffer. */
  1253. #if SCM_DEBUG == 1
  1254. unsigned char *pback = pt->putback_buf;
  1255. #endif
  1256. psb.pt = pt;
  1257. psb.buffer = buffer;
  1258. psb.size = size;
  1259. scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
  1260. scm_dynwind_rewind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
  1261. scm_dynwind_unwind_handler (swap_buffer, &psb, SCM_F_WIND_EXPLICITLY);
  1262. /* Call scm_i_fill_input_unlocked until we have all the bytes that
  1263. we need, or we hit EOF. */
  1264. while (pt->read_buf_size && (scm_i_fill_input_unlocked (port) != EOF))
  1265. {
  1266. pt->read_buf_size -= (pt->read_end - pt->read_pos);
  1267. pt->read_pos = pt->read_buf = pt->read_end;
  1268. }
  1269. #if SCM_DEBUG == 1
  1270. if (pback != pt->putback_buf
  1271. || pt->read_buf - (unsigned char *) buffer < 0)
  1272. scm_misc_error (FUNC_NAME,
  1273. "scm_c_read must not call a fill function that pushes "
  1274. "back characters onto an unbuffered port", SCM_EOL);
  1275. #endif
  1276. n_read += pt->read_buf - (unsigned char *) buffer;
  1277. /* Reinstate the port's normal buffer. */
  1278. scm_dynwind_end ();
  1279. }
  1280. else
  1281. {
  1282. /* The port has its own buffer. It is important that we use it,
  1283. even if it happens to be smaller than our caller's buffer, so
  1284. that a custom port implementation's entry points (in
  1285. particular, fill_input) can rely on the buffer always being
  1286. the same as they first set up. */
  1287. while (size && (scm_i_fill_input_unlocked (port) != EOF))
  1288. {
  1289. n_available = min (size, pt->read_end - pt->read_pos);
  1290. memcpy (buffer, pt->read_pos, n_available);
  1291. buffer = (char *) buffer + n_available;
  1292. pt->read_pos += n_available;
  1293. n_read += n_available;
  1294. size -= n_available;
  1295. }
  1296. }
  1297. return n_read;
  1298. }
  1299. #undef FUNC_NAME
  1300. size_t
  1301. scm_c_read (SCM port, void *buffer, size_t size)
  1302. {
  1303. scm_i_pthread_mutex_t *lock;
  1304. size_t ret;
  1305. scm_c_lock_port (port, &lock);
  1306. ret = scm_c_read_unlocked (port, buffer, size);
  1307. if (lock)
  1308. scm_i_pthread_mutex_unlock (lock);
  1309. return ret;
  1310. }
  1311. /* Update the line and column number of PORT after consumption of C. */
  1312. static inline void
  1313. update_port_lf (scm_t_wchar c, SCM port)
  1314. {
  1315. switch (c)
  1316. {
  1317. case '\a':
  1318. case EOF:
  1319. break;
  1320. case '\b':
  1321. SCM_DECCOL (port);
  1322. break;
  1323. case '\n':
  1324. SCM_INCLINE (port);
  1325. break;
  1326. case '\r':
  1327. SCM_ZEROCOL (port);
  1328. break;
  1329. case '\t':
  1330. SCM_TABCOL (port);
  1331. break;
  1332. default:
  1333. SCM_INCCOL (port);
  1334. break;
  1335. }
  1336. }
  1337. #define SCM_MBCHAR_BUF_SIZE (4)
  1338. /* Convert the SIZE-byte UTF-8 sequence in UTF8_BUF to a codepoint.
  1339. UTF8_BUF is assumed to contain a valid UTF-8 sequence. */
  1340. static scm_t_wchar
  1341. utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
  1342. {
  1343. scm_t_wchar codepoint;
  1344. if (utf8_buf[0] <= 0x7f)
  1345. {
  1346. assert (size == 1);
  1347. codepoint = utf8_buf[0];
  1348. }
  1349. else if ((utf8_buf[0] & 0xe0) == 0xc0)
  1350. {
  1351. assert (size == 2);
  1352. codepoint = ((scm_t_wchar) utf8_buf[0] & 0x1f) << 6UL
  1353. | (utf8_buf[1] & 0x3f);
  1354. }
  1355. else if ((utf8_buf[0] & 0xf0) == 0xe0)
  1356. {
  1357. assert (size == 3);
  1358. codepoint = ((scm_t_wchar) utf8_buf[0] & 0x0f) << 12UL
  1359. | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 6UL
  1360. | (utf8_buf[2] & 0x3f);
  1361. }
  1362. else
  1363. {
  1364. assert (size == 4);
  1365. codepoint = ((scm_t_wchar) utf8_buf[0] & 0x07) << 18UL
  1366. | ((scm_t_wchar) utf8_buf[1] & 0x3f) << 12UL
  1367. | ((scm_t_wchar) utf8_buf[2] & 0x3f) << 6UL
  1368. | (utf8_buf[3] & 0x3f);
  1369. }
  1370. return codepoint;
  1371. }
  1372. /* Read a UTF-8 sequence from PORT. On success, return 0 and set
  1373. *CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
  1374. representation, and set *LEN to the length in bytes. Return
  1375. `EILSEQ' on error. */
  1376. static int
  1377. get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
  1378. scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
  1379. {
  1380. #define ASSERT_NOT_EOF(b) \
  1381. if (SCM_UNLIKELY ((b) == EOF)) \
  1382. goto invalid_seq
  1383. #define CONSUME_PEEKED_BYTE() \
  1384. pt->read_pos++
  1385. int byte;
  1386. scm_t_port *pt;
  1387. *len = 0;
  1388. pt = SCM_PTAB_ENTRY (port);
  1389. byte = scm_get_byte_or_eof_unlocked (port);
  1390. if (byte == EOF)
  1391. {
  1392. *codepoint = EOF;
  1393. return 0;
  1394. }
  1395. buf[0] = (scm_t_uint8) byte;
  1396. *len = 1;
  1397. if (buf[0] <= 0x7f)
  1398. /* 1-byte form. */
  1399. *codepoint = buf[0];
  1400. else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
  1401. {
  1402. /* 2-byte form. */
  1403. byte = scm_peek_byte_or_eof_unlocked (port);
  1404. ASSERT_NOT_EOF (byte);
  1405. if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
  1406. goto invalid_seq;
  1407. CONSUME_PEEKED_BYTE ();
  1408. buf[1] = (scm_t_uint8) byte;
  1409. *len = 2;
  1410. *codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
  1411. | (buf[1] & 0x3f);
  1412. }
  1413. else if ((buf[0] & 0xf0) == 0xe0)
  1414. {
  1415. /* 3-byte form. */
  1416. byte = scm_peek_byte_or_eof_unlocked (port);
  1417. ASSERT_NOT_EOF (byte);
  1418. if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
  1419. || (buf[0] == 0xe0 && byte < 0xa0)
  1420. || (buf[0] == 0xed && byte > 0x9f)))
  1421. goto invalid_seq;
  1422. CONSUME_PEEKED_BYTE ();
  1423. buf[1] = (scm_t_uint8) byte;
  1424. *len = 2;
  1425. byte = scm_peek_byte_or_eof_unlocked (port);
  1426. ASSERT_NOT_EOF (byte);
  1427. if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
  1428. goto invalid_seq;
  1429. CONSUME_PEEKED_BYTE ();
  1430. buf[2] = (scm_t_uint8) byte;
  1431. *len = 3;
  1432. *codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
  1433. | ((scm_t_wchar) buf[1] & 0x3f) << 6UL
  1434. | (buf[2] & 0x3f);
  1435. }
  1436. else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
  1437. {
  1438. /* 4-byte form. */
  1439. byte = scm_peek_byte_or_eof_unlocked (port);
  1440. ASSERT_NOT_EOF (byte);
  1441. if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
  1442. || (buf[0] == 0xf0 && byte < 0x90)
  1443. || (buf[0] == 0xf4 && byte > 0x8f)))
  1444. goto invalid_seq;
  1445. CONSUME_PEEKED_BYTE ();
  1446. buf[1] = (scm_t_uint8) byte;
  1447. *len = 2;
  1448. byte = scm_peek_byte_or_eof_unlocked (port);
  1449. ASSERT_NOT_EOF (byte);
  1450. if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
  1451. goto invalid_seq;
  1452. CONSUME_PEEKED_BYTE ();
  1453. buf[2] = (scm_t_uint8) byte;
  1454. *len = 3;
  1455. byte = scm_peek_byte_or_eof_unlocked (port);
  1456. ASSERT_NOT_EOF (byte);
  1457. if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
  1458. goto invalid_seq;
  1459. CONSUME_PEEKED_BYTE ();
  1460. buf[3] = (scm_t_uint8) byte;
  1461. *len = 4;
  1462. *codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
  1463. | ((scm_t_wchar) buf[1] & 0x3f) << 12UL
  1464. | ((scm_t_wchar) buf[2] & 0x3f) << 6UL
  1465. | (buf[3] & 0x3f);
  1466. }
  1467. else
  1468. goto invalid_seq;
  1469. return 0;
  1470. invalid_seq:
  1471. /* Here we could choose the consume the faulty byte when it's not a
  1472. valid starting byte, but it's not a requirement. What Section 3.9
  1473. of Unicode 6.0.0 mandates, though, is to not consume a byte that
  1474. would otherwise be a valid starting byte. */
  1475. return EILSEQ;
  1476. #undef CONSUME_PEEKED_BYTE
  1477. #undef ASSERT_NOT_EOF
  1478. }
  1479. /* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
  1480. 0 and set *CODEPOINT to the codepoint that was read, fill BUF with
  1481. its UTF-8 representation, and set *LEN to the length in bytes.
  1482. Return `EILSEQ' on error. */
  1483. static int
  1484. get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
  1485. char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
  1486. {
  1487. *codepoint = scm_get_byte_or_eof_unlocked (port);
  1488. if (*codepoint == EOF)
  1489. *len = 0;
  1490. else
  1491. {
  1492. *len = 1;
  1493. buf[0] = *codepoint;
  1494. }
  1495. return 0;
  1496. }
  1497. /* Likewise, read a byte sequence from PORT, passing it through its
  1498. input conversion descriptor. */
  1499. static int
  1500. get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
  1501. char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
  1502. {
  1503. scm_t_iconv_descriptors *id;
  1504. scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
  1505. size_t input_size = 0;
  1506. id = scm_i_port_iconv_descriptors (port, SCM_PORT_READ);
  1507. for (;;)
  1508. {
  1509. int byte_read;
  1510. char *input, *output;
  1511. size_t input_left, output_left, done;
  1512. byte_read = scm_get_byte_or_eof_unlocked (port);
  1513. if (SCM_UNLIKELY (byte_read == EOF))
  1514. {
  1515. if (SCM_LIKELY (input_size == 0))
  1516. {
  1517. *codepoint = (scm_t_wchar) EOF;
  1518. *len = input_size;
  1519. return 0;
  1520. }
  1521. else
  1522. {
  1523. /* EOF found in the middle of a multibyte character. */
  1524. scm_i_set_pending_eof (port);
  1525. return EILSEQ;
  1526. }
  1527. }
  1528. buf[input_size++] = byte_read;
  1529. input = buf;
  1530. input_left = input_size;
  1531. output = (char *) utf8_buf;
  1532. output_left = sizeof (utf8_buf);
  1533. done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
  1534. if (done == (size_t) -1)
  1535. {
  1536. int err = errno;
  1537. if (SCM_LIKELY (err == EINVAL))
  1538. /* The input byte sequence did not form a complete
  1539. character. Read another byte and try again. */
  1540. continue;
  1541. else
  1542. return err;
  1543. }
  1544. else
  1545. {
  1546. size_t output_size = sizeof (utf8_buf) - output_left;
  1547. if (SCM_LIKELY (output_size > 0))
  1548. {
  1549. /* iconv generated output. Convert the UTF8_BUF sequence
  1550. to a Unicode code point. */
  1551. *codepoint = utf8_to_codepoint (utf8_buf, output_size);
  1552. *len = input_size;
  1553. return 0;
  1554. }
  1555. else
  1556. {
  1557. /* iconv consumed some bytes without producing any output.
  1558. Most likely this means that a Unicode byte-order mark
  1559. (BOM) was consumed, which should not be included in the
  1560. returned buf. Shift any remaining bytes to the beginning
  1561. of buf, and continue the loop. */
  1562. memmove (buf, input, input_left);
  1563. input_size = input_left;
  1564. continue;
  1565. }
  1566. }
  1567. }
  1568. }
  1569. /* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
  1570. with the byte representation of the codepoint in PORT's encoding, and
  1571. set *LEN to the length in bytes of that representation. Return 0 on
  1572. success and an errno value on error. */
  1573. static SCM_C_INLINE int
  1574. get_codepoint (SCM port, scm_t_wchar *codepoint,
  1575. char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
  1576. {
  1577. int err;
  1578. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  1579. scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
  1580. if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
  1581. err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
  1582. else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
  1583. err = get_latin1_codepoint (port, codepoint, buf, len);
  1584. else
  1585. err = get_iconv_codepoint (port, codepoint, buf, len);
  1586. if (SCM_LIKELY (err == 0))
  1587. {
  1588. if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
  1589. {
  1590. /* Record that we're no longer at stream start. */
  1591. pti->at_stream_start_for_bom_read = 0;
  1592. if (pt->rw_random)
  1593. pti->at_stream_start_for_bom_write = 0;
  1594. /* If we just read a BOM in an encoding that recognizes them,
  1595. then silently consume it and read another code point. */
  1596. if (SCM_UNLIKELY
  1597. (*codepoint == SCM_UNICODE_BOM
  1598. && (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
  1599. || strcmp (pt->encoding, "UTF-16") == 0
  1600. || strcmp (pt->encoding, "UTF-32") == 0)))
  1601. return get_codepoint (port, codepoint, buf, len);
  1602. }
  1603. update_port_lf (*codepoint, port);
  1604. }
  1605. else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
  1606. {
  1607. *codepoint = '?';
  1608. err = 0;
  1609. update_port_lf (*codepoint, port);
  1610. }
  1611. return err;
  1612. }
  1613. /* Read a codepoint from PORT and return it. */
  1614. scm_t_wchar
  1615. scm_getc_unlocked (SCM port)
  1616. #define FUNC_NAME "scm_getc"
  1617. {
  1618. int err;
  1619. size_t len;
  1620. scm_t_wchar codepoint;
  1621. char buf[SCM_MBCHAR_BUF_SIZE];
  1622. err = get_codepoint (port, &codepoint, buf, &len);
  1623. if (SCM_UNLIKELY (err != 0))
  1624. /* At this point PORT should point past the invalid encoding, as per
  1625. R6RS-lib Section 8.2.4. */
  1626. scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
  1627. return codepoint;
  1628. }
  1629. #undef FUNC_NAME
  1630. scm_t_wchar
  1631. scm_getc (SCM port)
  1632. {
  1633. scm_i_pthread_mutex_t *lock;
  1634. scm_t_wchar ret;
  1635. scm_c_lock_port (port, &lock);
  1636. ret = scm_getc_unlocked (port);
  1637. if (lock)
  1638. scm_i_pthread_mutex_unlock (lock);
  1639. return ret;
  1640. }
  1641. SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
  1642. (SCM port),
  1643. "Return the next character available from @var{port}, updating\n"
  1644. "@var{port} to point to the following character. If no more\n"
  1645. "characters are available, the end-of-file object is returned.\n"
  1646. "\n"
  1647. "When @var{port}'s data cannot be decoded according to its\n"
  1648. "character encoding, a @code{decoding-error} is raised and\n"
  1649. "@var{port} points past the erroneous byte sequence.\n")
  1650. #define FUNC_NAME s_scm_read_char
  1651. {
  1652. scm_t_wchar c;
  1653. if (SCM_UNBNDP (port))
  1654. port = scm_current_input_port ();
  1655. SCM_VALIDATE_OPINPORT (1, port);
  1656. c = scm_getc_unlocked (port);
  1657. if (EOF == c)
  1658. return SCM_EOF_VAL;
  1659. return SCM_MAKE_CHAR (c);
  1660. }
  1661. #undef FUNC_NAME
  1662. /* Pushback. */
  1663. static void
  1664. scm_i_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
  1665. #define FUNC_NAME "scm_unget_bytes"
  1666. {
  1667. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  1668. size_t old_len, new_len;
  1669. scm_i_clear_pending_eof (port);
  1670. if (pt->read_buf != pt->putback_buf)
  1671. /* switch to the put-back buffer. */
  1672. {
  1673. if (pt->putback_buf == NULL)
  1674. {
  1675. pt->putback_buf_size = (len > SCM_INITIAL_PUTBACK_BUF_SIZE
  1676. ? len : SCM_INITIAL_PUTBACK_BUF_SIZE);
  1677. pt->putback_buf
  1678. = (unsigned char *) scm_gc_malloc_pointerless
  1679. (pt->putback_buf_size, "putback buffer");
  1680. }
  1681. pt->saved_read_buf = pt->read_buf;
  1682. pt->saved_read_pos = pt->read_pos;
  1683. pt->saved_read_end = pt->read_end;
  1684. pt->saved_read_buf_size = pt->read_buf_size;
  1685. /* Put read_pos at the end of the buffer, so that ungets will not
  1686. have to shift the buffer contents each time. */
  1687. pt->read_buf = pt->putback_buf;
  1688. pt->read_pos = pt->read_end = pt->putback_buf + pt->putback_buf_size;
  1689. pt->read_buf_size = pt->putback_buf_size;
  1690. }
  1691. old_len = pt->read_end - pt->read_pos;
  1692. new_len = old_len + len;
  1693. if (new_len > pt->read_buf_size)
  1694. /* The putback buffer needs to be enlarged. */
  1695. {
  1696. size_t new_buf_size;
  1697. unsigned char *new_buf, *new_end, *new_pos;
  1698. new_buf_size = pt->read_buf_size * 2;
  1699. if (new_buf_size < new_len)
  1700. new_buf_size = new_len;
  1701. new_buf = (unsigned char *)
  1702. scm_gc_malloc_pointerless (new_buf_size, "putback buffer");
  1703. /* Put the bytes at the end of the buffer, so that future
  1704. ungets won't need to shift the buffer. */
  1705. new_end = new_buf + new_buf_size;
  1706. new_pos = new_end - old_len;
  1707. memcpy (new_pos, pt->read_pos, old_len);
  1708. pt->read_buf = pt->putback_buf = new_buf;
  1709. pt->read_pos = new_pos;
  1710. pt->read_end = new_end;
  1711. pt->read_buf_size = pt->putback_buf_size = new_buf_size;
  1712. }
  1713. else if (pt->read_buf + len < pt->read_pos)
  1714. /* If needed, shift the existing buffer contents up.
  1715. This should not happen unless some external code
  1716. manipulates the putback buffer pointers. */
  1717. {
  1718. unsigned char *new_end = pt->read_buf + pt->read_buf_size;
  1719. unsigned char *new_pos = new_end - old_len;
  1720. memmove (new_pos, pt->read_pos, old_len);
  1721. pt->read_pos = new_pos;
  1722. pt->read_end = new_end;
  1723. }
  1724. /* Move read_pos back and copy the bytes there. */
  1725. pt->read_pos -= len;
  1726. memcpy (pt->read_buf + (pt->read_pos - pt->read_buf), buf, len);
  1727. if (pt->rw_active == SCM_PORT_WRITE)
  1728. scm_flush (port);
  1729. if (pt->rw_random)
  1730. pt->rw_active = SCM_PORT_READ;
  1731. }
  1732. #undef FUNC_NAME
  1733. void
  1734. scm_unget_bytes_unlocked (const unsigned char *buf, size_t len, SCM port)
  1735. {
  1736. scm_i_unget_bytes_unlocked (buf, len, port);
  1737. }
  1738. void
  1739. scm_unget_byte_unlocked (int c, SCM port)
  1740. {
  1741. unsigned char byte = c;
  1742. scm_i_unget_bytes_unlocked (&byte, 1, port);
  1743. }
  1744. void
  1745. scm_unget_bytes (const unsigned char *buf, size_t len, SCM port)
  1746. {
  1747. scm_i_pthread_mutex_t *lock;
  1748. scm_c_lock_port (port, &lock);
  1749. scm_i_unget_bytes_unlocked (buf, len, port);
  1750. if (lock)
  1751. scm_i_pthread_mutex_unlock (lock);
  1752. }
  1753. void
  1754. scm_unget_byte (int c, SCM port)
  1755. {
  1756. unsigned char byte = c;
  1757. scm_i_pthread_mutex_t *lock;
  1758. scm_c_lock_port (port, &lock);
  1759. scm_i_unget_bytes_unlocked (&byte, 1, port);
  1760. if (lock)
  1761. scm_i_pthread_mutex_unlock (lock);
  1762. }
  1763. void
  1764. scm_ungetc_unlocked (scm_t_wchar c, SCM port)
  1765. #define FUNC_NAME "scm_ungetc"
  1766. {
  1767. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  1768. scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
  1769. char *result;
  1770. char result_buf[10];
  1771. size_t len;
  1772. len = sizeof (result_buf);
  1773. if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
  1774. {
  1775. if (c < 0x80)
  1776. {
  1777. result_buf[0] = (char) c;
  1778. result = result_buf;
  1779. len = 1;
  1780. }
  1781. else
  1782. result =
  1783. (char *) u32_to_u8 ((uint32_t *) &c, 1, (uint8_t *) result_buf, &len);
  1784. }
  1785. else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 && c <= 0xff)
  1786. {
  1787. result_buf[0] = (char) c;
  1788. result = result_buf;
  1789. len = 1;
  1790. }
  1791. else
  1792. result = u32_conv_to_encoding (pt->encoding,
  1793. (enum iconv_ilseq_handler) pt->ilseq_handler,
  1794. (uint32_t *) &c, 1, NULL,
  1795. result_buf, &len);
  1796. if (SCM_UNLIKELY (result == NULL || len == 0))
  1797. scm_encoding_error (FUNC_NAME, errno,
  1798. "conversion to port encoding failed",
  1799. SCM_BOOL_F, SCM_MAKE_CHAR (c));
  1800. scm_i_unget_bytes_unlocked ((unsigned char *) result, len, port);
  1801. if (SCM_UNLIKELY (result != result_buf))
  1802. free (result);
  1803. if (c == '\n')
  1804. SCM_LINUM (port) -= 1;
  1805. SCM_DECCOL (port);
  1806. }
  1807. #undef FUNC_NAME
  1808. void
  1809. scm_ungetc (scm_t_wchar c, SCM port)
  1810. {
  1811. scm_i_pthread_mutex_t *lock;
  1812. scm_c_lock_port (port, &lock);
  1813. scm_ungetc_unlocked (c, port);
  1814. if (lock)
  1815. scm_i_pthread_mutex_unlock (lock);
  1816. }
  1817. void
  1818. scm_ungets_unlocked (const char *s, int n, SCM port)
  1819. {
  1820. /* This is simple minded and inefficient, but unreading strings is
  1821. * probably not a common operation, and remember that line and
  1822. * column numbers have to be handled...
  1823. *
  1824. * Please feel free to write an optimized version!
  1825. */
  1826. while (n--)
  1827. scm_ungetc_unlocked (s[n], port);
  1828. }
  1829. void
  1830. scm_ungets (const char *s, int n, SCM port)
  1831. {
  1832. scm_i_pthread_mutex_t *lock;
  1833. scm_c_lock_port (port, &lock);
  1834. scm_ungets_unlocked (s, n, port);
  1835. if (lock)
  1836. scm_i_pthread_mutex_unlock (lock);
  1837. }
  1838. SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
  1839. (SCM port),
  1840. "Return the next character available from @var{port},\n"
  1841. "@emph{without} updating @var{port} to point to the following\n"
  1842. "character. If no more characters are available, the\n"
  1843. "end-of-file object is returned.\n"
  1844. "\n"
  1845. "The value returned by\n"
  1846. "a call to @code{peek-char} is the same as the value that would\n"
  1847. "have been returned by a call to @code{read-char} on the same\n"
  1848. "port. The only difference is that the very next call to\n"
  1849. "@code{read-char} or @code{peek-char} on that @var{port} will\n"
  1850. "return the value returned by the preceding call to\n"
  1851. "@code{peek-char}. In particular, a call to @code{peek-char} on\n"
  1852. "an interactive port will hang waiting for input whenever a call\n"
  1853. "to @code{read-char} would have hung.\n"
  1854. "\n"
  1855. "As for @code{read-char}, a @code{decoding-error} may be raised\n"
  1856. "if such a situation occurs. However, unlike with @code{read-char},\n"
  1857. "@var{port} still points at the beginning of the erroneous byte\n"
  1858. "sequence when the error is raised.\n")
  1859. #define FUNC_NAME s_scm_peek_char
  1860. {
  1861. int err;
  1862. SCM result;
  1863. scm_t_wchar c;
  1864. char bytes[SCM_MBCHAR_BUF_SIZE];
  1865. long column, line;
  1866. size_t len = 0;
  1867. if (SCM_UNBNDP (port))
  1868. port = scm_current_input_port ();
  1869. SCM_VALIDATE_OPINPORT (1, port);
  1870. column = SCM_COL (port);
  1871. line = SCM_LINUM (port);
  1872. err = get_codepoint (port, &c, bytes, &len);
  1873. scm_i_unget_bytes_unlocked ((unsigned char *) bytes, len, port);
  1874. SCM_COL (port) = column;
  1875. SCM_LINUM (port) = line;
  1876. if (SCM_UNLIKELY (err != 0))
  1877. {
  1878. scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
  1879. /* Shouldn't happen since `catch' always aborts to prompt. */
  1880. result = SCM_BOOL_F;
  1881. }
  1882. else if (c == EOF)
  1883. {
  1884. scm_i_set_pending_eof (port);
  1885. result = SCM_EOF_VAL;
  1886. }
  1887. else
  1888. result = SCM_MAKE_CHAR (c);
  1889. return result;
  1890. }
  1891. #undef FUNC_NAME
  1892. SCM_DEFINE (scm_unread_char, "unread-char", 1, 1, 0,
  1893. (SCM cobj, SCM port),
  1894. "Place character @var{cobj} in @var{port} so that it will be\n"
  1895. "read by the next read operation. If called multiple times, the\n"
  1896. "unread characters will be read again in last-in first-out\n"
  1897. "order. If @var{port} is not supplied, the current input port\n"
  1898. "is used.")
  1899. #define FUNC_NAME s_scm_unread_char
  1900. {
  1901. int c;
  1902. SCM_VALIDATE_CHAR (1, cobj);
  1903. if (SCM_UNBNDP (port))
  1904. port = scm_current_input_port ();
  1905. SCM_VALIDATE_OPINPORT (2, port);
  1906. c = SCM_CHAR (cobj);
  1907. scm_ungetc_unlocked (c, port);
  1908. return cobj;
  1909. }
  1910. #undef FUNC_NAME
  1911. SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
  1912. (SCM str, SCM port),
  1913. "Place the string @var{str} in @var{port} so that its characters will be\n"
  1914. "read in subsequent read operations. If called multiple times, the\n"
  1915. "unread characters will be read again in last-in first-out order. If\n"
  1916. "@var{port} is not supplied, the current-input-port is used.")
  1917. #define FUNC_NAME s_scm_unread_string
  1918. {
  1919. int n;
  1920. SCM_VALIDATE_STRING (1, str);
  1921. if (SCM_UNBNDP (port))
  1922. port = scm_current_input_port ();
  1923. SCM_VALIDATE_OPINPORT (2, port);
  1924. n = scm_i_string_length (str);
  1925. while (n--)
  1926. scm_ungetc_unlocked (scm_i_string_ref (str, n), port);
  1927. return str;
  1928. }
  1929. #undef FUNC_NAME
  1930. /* Manipulating the buffers. */
  1931. /* This routine does not take any locks, as it is usually called as part
  1932. of a port implementation. */
  1933. void
  1934. scm_port_non_buffer (scm_t_port *pt)
  1935. {
  1936. pt->read_pos = pt->read_buf = pt->read_end = &pt->shortbuf;
  1937. pt->write_buf = pt->write_pos = &pt->shortbuf;
  1938. pt->read_buf_size = pt->write_buf_size = 1;
  1939. pt->write_end = pt->write_buf + pt->write_buf_size;
  1940. }
  1941. /* this should only be called when the read buffer is empty. it
  1942. tries to refill the read buffer. it returns the first char from
  1943. the port, which is either EOF or *(pt->read_pos). */
  1944. static int
  1945. scm_i_fill_input_unlocked (SCM port)
  1946. {
  1947. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  1948. scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
  1949. assert (pt->read_pos == pt->read_end);
  1950. if (pti->pending_eof)
  1951. {
  1952. pti->pending_eof = 0;
  1953. return EOF;
  1954. }
  1955. if (pt->read_buf == pt->putback_buf)
  1956. {
  1957. /* finished reading put-back chars. */
  1958. pt->read_buf = pt->saved_read_buf;
  1959. pt->read_pos = pt->saved_read_pos;
  1960. pt->read_end = pt->saved_read_end;
  1961. pt->read_buf_size = pt->saved_read_buf_size;
  1962. if (pt->read_pos < pt->read_end)
  1963. return *(pt->read_pos);
  1964. }
  1965. return SCM_PORT_DESCRIPTOR (port)->fill_input (port);
  1966. }
  1967. int
  1968. scm_fill_input (SCM port)
  1969. {
  1970. scm_i_pthread_mutex_t *lock;
  1971. int ret;
  1972. scm_c_lock_port (port, &lock);
  1973. ret = scm_fill_input_unlocked (port);
  1974. if (lock)
  1975. scm_i_pthread_mutex_unlock (lock);
  1976. return ret;
  1977. }
  1978. /* Slow-path fallback for 'scm_get_byte_or_eof_unlocked' */
  1979. int
  1980. scm_slow_get_byte_or_eof_unlocked (SCM port)
  1981. {
  1982. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  1983. if (pt->rw_active == SCM_PORT_WRITE)
  1984. scm_flush_unlocked (port);
  1985. if (pt->rw_random)
  1986. pt->rw_active = SCM_PORT_READ;
  1987. if (pt->read_pos >= pt->read_end)
  1988. {
  1989. if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
  1990. return EOF;
  1991. }
  1992. return *pt->read_pos++;
  1993. }
  1994. /* Slow-path fallback for 'scm_peek_byte_or_eof_unlocked' */
  1995. int
  1996. scm_slow_peek_byte_or_eof_unlocked (SCM port)
  1997. {
  1998. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  1999. if (pt->rw_active == SCM_PORT_WRITE)
  2000. scm_flush_unlocked (port);
  2001. if (pt->rw_random)
  2002. pt->rw_active = SCM_PORT_READ;
  2003. if (pt->read_pos >= pt->read_end)
  2004. {
  2005. if (SCM_UNLIKELY (scm_i_fill_input_unlocked (port) == EOF))
  2006. {
  2007. scm_i_set_pending_eof (port);
  2008. return EOF;
  2009. }
  2010. }
  2011. return *pt->read_pos;
  2012. }
  2013. /* Move up to READ_LEN bytes from PORT's putback and/or read buffers
  2014. into memory starting at DEST. Return the number of bytes moved.
  2015. PORT's line/column numbers are left unchanged. */
  2016. size_t
  2017. scm_take_from_input_buffers (SCM port, char *dest, size_t read_len)
  2018. {
  2019. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  2020. size_t bytes_read = 0;
  2021. size_t from_buf = min (pt->read_end - pt->read_pos, read_len);
  2022. if (from_buf > 0)
  2023. {
  2024. memcpy (dest, pt->read_pos, from_buf);
  2025. pt->read_pos += from_buf;
  2026. bytes_read += from_buf;
  2027. read_len -= from_buf;
  2028. dest += from_buf;
  2029. }
  2030. /* if putback was active, try the real input buffer too. */
  2031. if (pt->read_buf == pt->putback_buf)
  2032. {
  2033. from_buf = min (pt->saved_read_end - pt->saved_read_pos, read_len);
  2034. if (from_buf > 0)
  2035. {
  2036. memcpy (dest, pt->saved_read_pos, from_buf);
  2037. pt->saved_read_pos += from_buf;
  2038. bytes_read += from_buf;
  2039. }
  2040. }
  2041. return bytes_read;
  2042. }
  2043. /* Clear a port's read buffers, returning the contents. */
  2044. SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
  2045. (SCM port),
  2046. "This procedure clears a port's input buffers, similar\n"
  2047. "to the way that force-output clears the output buffer. The\n"
  2048. "contents of the buffers are returned as a single string, e.g.,\n"
  2049. "\n"
  2050. "@lisp\n"
  2051. "(define p (open-input-file ...))\n"
  2052. "(drain-input p) => empty string, nothing buffered yet.\n"
  2053. "(unread-char (read-char p) p)\n"
  2054. "(drain-input p) => initial chars from p, up to the buffer size.\n"
  2055. "@end lisp\n\n"
  2056. "Draining the buffers may be useful for cleanly finishing\n"
  2057. "buffered I/O so that the file descriptor can be used directly\n"
  2058. "for further input.")
  2059. #define FUNC_NAME s_scm_drain_input
  2060. {
  2061. SCM result;
  2062. char *data;
  2063. scm_t_port *pt;
  2064. long count;
  2065. SCM_VALIDATE_OPINPORT (1, port);
  2066. pt = SCM_PTAB_ENTRY (port);
  2067. count = pt->read_end - pt->read_pos;
  2068. if (pt->read_buf == pt->putback_buf)
  2069. count += pt->saved_read_end - pt->saved_read_pos;
  2070. if (count)
  2071. {
  2072. result = scm_i_make_string (count, &data, 0);
  2073. scm_take_from_input_buffers (port, data, count);
  2074. }
  2075. else
  2076. result = scm_nullstr;
  2077. return result;
  2078. }
  2079. #undef FUNC_NAME
  2080. void
  2081. scm_end_input_unlocked (SCM port)
  2082. {
  2083. long offset;
  2084. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  2085. scm_i_clear_pending_eof (port);
  2086. if (pt->read_buf == pt->putback_buf)
  2087. {
  2088. offset = pt->read_end - pt->read_pos;
  2089. pt->read_buf = pt->saved_read_buf;
  2090. pt->read_pos = pt->saved_read_pos;
  2091. pt->read_end = pt->saved_read_end;
  2092. pt->read_buf_size = pt->saved_read_buf_size;
  2093. }
  2094. else
  2095. offset = 0;
  2096. SCM_PORT_DESCRIPTOR (port)->end_input (port, offset);
  2097. }
  2098. void
  2099. scm_end_input (SCM port)
  2100. {
  2101. scm_i_pthread_mutex_t *lock;
  2102. scm_c_lock_port (port, &lock);
  2103. scm_end_input_unlocked (port);
  2104. if (lock)
  2105. scm_i_pthread_mutex_unlock (lock);
  2106. }
  2107. SCM_DEFINE (scm_force_output, "force-output", 0, 1, 0,
  2108. (SCM port),
  2109. "Flush the specified output port, or the current output port if @var{port}\n"
  2110. "is omitted. The current output buffer contents are passed to the\n"
  2111. "underlying port implementation (e.g., in the case of fports, the\n"
  2112. "data will be written to the file and the output buffer will be cleared.)\n"
  2113. "It has no effect on an unbuffered port.\n\n"
  2114. "The return value is unspecified.")
  2115. #define FUNC_NAME s_scm_force_output
  2116. {
  2117. if (SCM_UNBNDP (port))
  2118. port = scm_current_output_port ();
  2119. else
  2120. {
  2121. port = SCM_COERCE_OUTPORT (port);
  2122. SCM_VALIDATE_OPOUTPORT (1, port);
  2123. }
  2124. scm_flush_unlocked (port);
  2125. return SCM_UNSPECIFIED;
  2126. }
  2127. #undef FUNC_NAME
  2128. void
  2129. scm_flush_unlocked (SCM port)
  2130. {
  2131. SCM_PORT_DESCRIPTOR (port)->flush (port);
  2132. }
  2133. void
  2134. scm_flush (SCM port)
  2135. {
  2136. scm_i_pthread_mutex_t *lock;
  2137. scm_c_lock_port (port, &lock);
  2138. scm_flush_unlocked (port);
  2139. if (lock)
  2140. scm_i_pthread_mutex_unlock (lock);
  2141. }
  2142. int
  2143. scm_fill_input_unlocked (SCM port)
  2144. {
  2145. return scm_i_fill_input_unlocked (port);
  2146. }
  2147. /* Output. */
  2148. void
  2149. scm_putc (char c, SCM port)
  2150. {
  2151. scm_i_pthread_mutex_t *lock;
  2152. scm_c_lock_port (port, &lock);
  2153. scm_putc_unlocked (c, port);
  2154. if (lock)
  2155. scm_i_pthread_mutex_unlock (lock);
  2156. }
  2157. void
  2158. scm_puts (const char *s, SCM port)
  2159. {
  2160. scm_i_pthread_mutex_t *lock;
  2161. scm_c_lock_port (port, &lock);
  2162. scm_puts_unlocked (s, port);
  2163. if (lock)
  2164. scm_i_pthread_mutex_unlock (lock);
  2165. }
  2166. /* scm_c_write
  2167. *
  2168. * Used by an application to write arbitrary number of bytes to an SCM
  2169. * port. Similar semantics as libc write. However, unlike libc
  2170. * write, scm_c_write writes the requested number of bytes and has no
  2171. * return value.
  2172. *
  2173. * Warning: Doesn't update port line and column counts!
  2174. */
  2175. void
  2176. scm_c_write_unlocked (SCM port, const void *ptr, size_t size)
  2177. #define FUNC_NAME "scm_c_write"
  2178. {
  2179. scm_t_port *pt;
  2180. scm_t_ptob_descriptor *ptob;
  2181. SCM_VALIDATE_OPOUTPORT (1, port);
  2182. pt = SCM_PTAB_ENTRY (port);
  2183. ptob = SCM_PORT_DESCRIPTOR (port);
  2184. if (pt->rw_active == SCM_PORT_READ)
  2185. scm_end_input_unlocked (port);
  2186. ptob->write (port, ptr, size);
  2187. if (pt->rw_random)
  2188. pt->rw_active = SCM_PORT_WRITE;
  2189. }
  2190. #undef FUNC_NAME
  2191. void
  2192. scm_c_write (SCM port, const void *ptr, size_t size)
  2193. {
  2194. scm_i_pthread_mutex_t *lock;
  2195. scm_c_lock_port (port, &lock);
  2196. scm_c_write_unlocked (port, ptr, size);
  2197. if (lock)
  2198. scm_i_pthread_mutex_unlock (lock);
  2199. }
  2200. /* scm_lfwrite
  2201. *
  2202. * This function differs from scm_c_write; it updates port line and
  2203. * column. */
  2204. void
  2205. scm_lfwrite_unlocked (const char *ptr, size_t size, SCM port)
  2206. {
  2207. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  2208. scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
  2209. if (pt->rw_active == SCM_PORT_READ)
  2210. scm_end_input_unlocked (port);
  2211. ptob->write (port, ptr, size);
  2212. for (; size; ptr++, size--)
  2213. update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
  2214. if (pt->rw_random)
  2215. pt->rw_active = SCM_PORT_WRITE;
  2216. }
  2217. void
  2218. scm_lfwrite (const char *ptr, size_t size, SCM port)
  2219. {
  2220. scm_i_pthread_mutex_t *lock;
  2221. scm_c_lock_port (port, &lock);
  2222. scm_lfwrite_unlocked (ptr, size, port);
  2223. if (lock)
  2224. scm_i_pthread_mutex_unlock (lock);
  2225. }
  2226. /* Write STR to PORT from START inclusive to END exclusive. */
  2227. void
  2228. scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
  2229. {
  2230. scm_t_port *pt = SCM_PTAB_ENTRY (port);
  2231. if (pt->rw_active == SCM_PORT_READ)
  2232. scm_end_input_unlocked (port);
  2233. if (end == (size_t) -1)
  2234. end = scm_i_string_length (str);
  2235. scm_i_display_substring (str, start, end, port);
  2236. if (pt->rw_random)
  2237. pt->rw_active = SCM_PORT_WRITE;
  2238. }
  2239. /* Querying and setting positions, and character availability. */
  2240. SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0,
  2241. (SCM port),
  2242. "Return @code{#t} if a character is ready on input @var{port}\n"
  2243. "and return @code{#f} otherwise. If @code{char-ready?} returns\n"
  2244. "@code{#t} then the next @code{read-char} operation on\n"
  2245. "@var{port} is guaranteed not to hang. If @var{port} is a file\n"
  2246. "port at end of file then @code{char-ready?} returns @code{#t}.\n"
  2247. "\n"
  2248. "@code{char-ready?} exists to make it possible for a\n"
  2249. "program to accept characters from interactive ports without\n"
  2250. "getting stuck waiting for input. Any input editors associated\n"
  2251. "with such ports must make sure that characters whose existence\n"
  2252. "has been asserted by @code{char-ready?} cannot be rubbed out.\n"
  2253. "If @code{char-ready?} were to return @code{#f} at end of file,\n"
  2254. "a port at end of file would be indistinguishable from an\n"
  2255. "interactive port that has no ready characters.")
  2256. #define FUNC_NAME s_scm_char_ready_p
  2257. {
  2258. scm_t_port *pt;
  2259. if (SCM_UNBNDP (port))
  2260. port = scm_current_input_port ();
  2261. /* It's possible to close the current input port, so validate even in
  2262. this case. */
  2263. SCM_VALIDATE_OPINPORT (1, port);
  2264. pt = SCM_PTAB_ENTRY (port);
  2265. /* if the current read buffer is filled, or the
  2266. last pushed-back char has been read and the saved buffer is
  2267. filled, result is true. */
  2268. if (pt->read_pos < pt->read_end
  2269. || (pt->read_buf == pt->putback_buf
  2270. && pt->saved_read_pos < pt->saved_read_end))
  2271. return SCM_BOOL_T;
  2272. else
  2273. {
  2274. scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (port);
  2275. if (ptob->input_waiting)
  2276. return scm_from_bool(ptob->input_waiting (port));
  2277. else
  2278. return SCM_BOOL_T;
  2279. }
  2280. }
  2281. #undef FUNC_NAME
  2282. SCM_DEFINE (scm_seek, "seek", 3, 0, 0,
  2283. (SCM fd_port, SCM offset, SCM whence),
  2284. "Sets the current position of @var{fd_port} to the integer\n"
  2285. "@var{offset}, which is interpreted according to the value of\n"
  2286. "@var{whence}.\n"
  2287. "\n"
  2288. "One of the following variables should be supplied for\n"
  2289. "@var{whence}:\n"
  2290. "@defvar SEEK_SET\n"
  2291. "Seek from the beginning of the file.\n"
  2292. "@end defvar\n"
  2293. "@defvar SEEK_CUR\n"
  2294. "Seek from the current position.\n"
  2295. "@end defvar\n"
  2296. "@defvar SEEK_END\n"
  2297. "Seek from the end of the file.\n"
  2298. "@end defvar\n"
  2299. "If @var{fd_port} is a file descriptor, the underlying system\n"
  2300. "call is @code{lseek}. @var{port} may be a string port.\n"
  2301. "\n"
  2302. "The value returned is the new position in the file. This means\n"
  2303. "that the current position of a port can be obtained using:\n"
  2304. "@lisp\n"
  2305. "(seek port 0 SEEK_CUR)\n"
  2306. "@end lisp")
  2307. #define FUNC_NAME s_scm_seek
  2308. {
  2309. int how;
  2310. fd_port = SCM_COERCE_OUTPORT (fd_port);
  2311. how = scm_to_int (whence);
  2312. if (how != SEEK_SET && how != SEEK_CUR && how != SEEK_END)
  2313. SCM_OUT_OF_RANGE (3, whence);
  2314. if (SCM_OPPORTP (fd_port))
  2315. {
  2316. scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (fd_port);
  2317. scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (fd_port);
  2318. off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
  2319. off_t_or_off64_t rv;
  2320. if (!ptob->seek)
  2321. SCM_MISC_ERROR ("port is not seekable",
  2322. scm_cons (fd_port, SCM_EOL));
  2323. else
  2324. rv = ptob->seek (fd_port, off, how);
  2325. /* Set stream-start flags according to new position. */
  2326. pti->at_stream_start_for_bom_read = (rv == 0);
  2327. pti->at_stream_start_for_bom_write = (rv == 0);
  2328. scm_i_clear_pending_eof (fd_port);
  2329. return scm_from_off_t_or_off64_t (rv);
  2330. }
  2331. else /* file descriptor?. */
  2332. {
  2333. off_t_or_off64_t off = scm_to_off_t_or_off64_t (offset);
  2334. off_t_or_off64_t rv;
  2335. rv = lseek_or_lseek64 (scm_to_int (fd_port), off, how);
  2336. if (rv == -1)
  2337. SCM_SYSERROR;
  2338. return scm_from_off_t_or_off64_t (rv);
  2339. }
  2340. }
  2341. #undef FUNC_NAME
  2342. #ifndef O_BINARY
  2343. #define O_BINARY 0
  2344. #endif
  2345. /* Mingw has ftruncate(), perhaps implemented above using chsize, but
  2346. doesn't have the filename version truncate(), hence this code. */
  2347. #if HAVE_FTRUNCATE && ! HAVE_TRUNCATE
  2348. static int
  2349. truncate (const char *file, off_t length)
  2350. {
  2351. int ret, fdes;
  2352. fdes = open (file, O_BINARY | O_WRONLY);
  2353. if (fdes == -1)
  2354. return -1;
  2355. ret = ftruncate (fdes, length);
  2356. if (ret == -1)
  2357. {
  2358. int save_errno = errno;
  2359. close (fdes);
  2360. errno = save_errno;
  2361. return -1;
  2362. }
  2363. return close (fdes);
  2364. }
  2365. #endif /* HAVE_FTRUNCATE && ! HAVE_TRUNCATE */
  2366. SCM_DEFINE (scm_truncate_file, "truncate-file", 1, 1, 0,
  2367. (SCM object, SCM length),
  2368. "Truncate file @var{object} to @var{length} bytes. @var{object}\n"
  2369. "can be a filename string, a port object, or an integer file\n"
  2370. "descriptor.\n"
  2371. "The return value is unspecified.\n"
  2372. "\n"
  2373. "For a port or file descriptor @var{length} can be omitted, in\n"
  2374. "which case the file is truncated at the current position (per\n"
  2375. "@code{ftell} above).\n"
  2376. "\n"
  2377. "On most systems a file can be extended by giving a length\n"
  2378. "greater than the current size, but this is not mandatory in the\n"
  2379. "POSIX standard.")
  2380. #define FUNC_NAME s_scm_truncate_file
  2381. {
  2382. int rv;
  2383. /* "object" can be a port, fdes or filename.
  2384. Negative "length" makes no sense, but it's left to truncate() or
  2385. ftruncate() to give back an error for that (normally EINVAL).
  2386. */
  2387. if (SCM_UNBNDP (length))
  2388. {
  2389. /* must supply length if object is a filename. */
  2390. if (scm_is_string (object))
  2391. SCM_MISC_ERROR("must supply length if OBJECT is a filename", SCM_EOL);
  2392. length = scm_seek (object, SCM_INUM0, scm_from_int (SEEK_CUR));
  2393. }
  2394. object = SCM_COERCE_OUTPORT (object);
  2395. if (scm_is_integer (object))
  2396. {
  2397. off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
  2398. SCM_SYSCALL (rv = ftruncate_or_ftruncate64 (scm_to_int (object),
  2399. c_length));
  2400. }
  2401. else if (SCM_OPOUTPORTP (object))
  2402. {
  2403. off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
  2404. scm_t_port *pt = SCM_PTAB_ENTRY (object);
  2405. scm_t_ptob_descriptor *ptob = SCM_PORT_DESCRIPTOR (object);
  2406. if (!ptob->truncate)
  2407. SCM_MISC_ERROR ("port is not truncatable", SCM_EOL);
  2408. scm_i_clear_pending_eof (object);
  2409. if (pt->rw_active == SCM_PORT_READ)
  2410. scm_end_input_unlocked (object);
  2411. else if (pt->rw_active == SCM_PORT_WRITE)
  2412. ptob->flush (object);
  2413. ptob->truncate (object, c_length);
  2414. rv = 0;
  2415. }
  2416. else
  2417. {
  2418. off_t_or_off64_t c_length = scm_to_off_t_or_off64_t (length);
  2419. char *str = scm_to_locale_string (object);
  2420. int eno;
  2421. SCM_SYSCALL (rv = truncate_or_truncate64 (str, c_length));
  2422. eno = errno;
  2423. free (str);
  2424. errno = eno;
  2425. }
  2426. if (rv == -1)
  2427. SCM_SYSERROR;
  2428. return SCM_UNSPECIFIED;
  2429. }
  2430. #undef FUNC_NAME
  2431. SCM_DEFINE (scm_port_line, "port-line", 1, 0, 0,
  2432. (SCM port),
  2433. "Return the current line number for @var{port}.\n"
  2434. "\n"
  2435. "The first line of a file is 0. But you might want to add 1\n"
  2436. "when printing line numbers, since starting from 1 is\n"
  2437. "traditional in error messages, and likely to be more natural to\n"
  2438. "non-programmers.")
  2439. #define FUNC_NAME s_scm_port_line
  2440. {
  2441. port = SCM_COERCE_OUTPORT (port);
  2442. SCM_VALIDATE_OPENPORT (1, port);
  2443. return scm_from_long (SCM_LINUM (port));
  2444. }
  2445. #undef FUNC_NAME
  2446. SCM_DEFINE (scm_set_port_line_x, "set-port-line!", 2, 0, 0,
  2447. (SCM port, SCM line),
  2448. "Set the current line number for @var{port} to @var{line}. The\n"
  2449. "first line of a file is 0.")
  2450. #define FUNC_NAME s_scm_set_port_line_x
  2451. {
  2452. port = SCM_COERCE_OUTPORT (port);
  2453. SCM_VALIDATE_OPENPORT (1, port);
  2454. SCM_PTAB_ENTRY (port)->line_number = scm_to_long (line);
  2455. return SCM_UNSPECIFIED;
  2456. }
  2457. #undef FUNC_NAME
  2458. SCM_DEFINE (scm_port_column, "port-column", 1, 0, 0,
  2459. (SCM port),
  2460. "Return the current column number of @var{port}.\n"
  2461. "If the number is\n"
  2462. "unknown, the result is #f. Otherwise, the result is a 0-origin integer\n"
  2463. "- i.e. the first character of the first line is line 0, column 0.\n"
  2464. "(However, when you display a file position, for example in an error\n"
  2465. "message, we recommend you add 1 to get 1-origin integers. This is\n"
  2466. "because lines and column numbers traditionally start with 1, and that is\n"
  2467. "what non-programmers will find most natural.)")
  2468. #define FUNC_NAME s_scm_port_column
  2469. {
  2470. port = SCM_COERCE_OUTPORT (port);
  2471. SCM_VALIDATE_OPENPORT (1, port);
  2472. return scm_from_int (SCM_COL (port));
  2473. }
  2474. #undef FUNC_NAME
  2475. SCM_DEFINE (scm_set_port_column_x, "set-port-column!", 2, 0, 0,
  2476. (SCM port, SCM column),
  2477. "Set the current column of @var{port}. Before reading the first\n"
  2478. "character on a line the column should be 0.")
  2479. #define FUNC_NAME s_scm_set_port_column_x
  2480. {
  2481. port = SCM_COERCE_OUTPORT (port);
  2482. SCM_VALIDATE_OPENPORT (1, port);
  2483. SCM_PTAB_ENTRY (port)->column_number = scm_to_int (column);
  2484. return SCM_UNSPECIFIED;
  2485. }
  2486. #undef FUNC_NAME
  2487. SCM_DEFINE (scm_port_filename, "port-filename", 1, 0, 0,
  2488. (SCM port),
  2489. "Return the filename associated with @var{port}, or @code{#f}\n"
  2490. "if no filename is associated with the port.")
  2491. #define FUNC_NAME s_scm_port_filename
  2492. {
  2493. port = SCM_COERCE_OUTPORT (port);
  2494. SCM_VALIDATE_OPENPORT (1, port);
  2495. return SCM_FILENAME (port);
  2496. }
  2497. #undef FUNC_NAME
  2498. SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
  2499. (SCM port, SCM filename),
  2500. "Change the filename associated with @var{port}, using the current input\n"
  2501. "port if none is specified. Note that this does not change the port's\n"
  2502. "source of data, but only the value that is returned by\n"
  2503. "@code{port-filename} and reported in diagnostic output.")
  2504. #define FUNC_NAME s_scm_set_port_filename_x
  2505. {
  2506. port = SCM_COERCE_OUTPORT (port);
  2507. SCM_VALIDATE_OPENPORT (1, port);
  2508. /* We allow the user to set the filename to whatever he likes. */
  2509. SCM_SET_FILENAME (port, filename);
  2510. return SCM_UNSPECIFIED;
  2511. }
  2512. #undef FUNC_NAME
  2513. /* Implementation helpers for port printing functions. */
  2514. void
  2515. scm_print_port_mode (SCM exp, SCM port)
  2516. {
  2517. scm_puts_unlocked (SCM_CLOSEDP (exp)
  2518. ? "closed: "
  2519. : (SCM_RDNG & SCM_CELL_WORD_0 (exp)
  2520. ? (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
  2521. ? "input-output: "
  2522. : "input: ")
  2523. : (SCM_WRTNG & SCM_CELL_WORD_0 (exp)
  2524. ? "output: "
  2525. : "bogus: ")),
  2526. port);
  2527. }
  2528. int
  2529. scm_port_print (SCM exp, SCM port, scm_print_state *pstate SCM_UNUSED)
  2530. {
  2531. char *type = SCM_PTOBNAME (SCM_PTOBNUM (exp));
  2532. if (!type)
  2533. type = "port";
  2534. scm_puts_unlocked ("#<", port);
  2535. scm_print_port_mode (exp, port);
  2536. scm_puts_unlocked (type, port);
  2537. scm_putc_unlocked (' ', port);
  2538. scm_uintprint (SCM_CELL_WORD_1 (exp), 16, port);
  2539. scm_putc_unlocked ('>', port);
  2540. return 1;
  2541. }
  2542. /* Iterating over all ports. */
  2543. struct for_each_data
  2544. {
  2545. void (*proc) (void *data, SCM p);
  2546. void *data;
  2547. };
  2548. static SCM
  2549. for_each_trampoline (void *data, SCM port, SCM result)
  2550. {
  2551. struct for_each_data *d = data;
  2552. d->proc (d->data, port);
  2553. return result;
  2554. }
  2555. void
  2556. scm_c_port_for_each (void (*proc)(void *data, SCM p), void *data)
  2557. {
  2558. struct for_each_data d;
  2559. d.proc = proc;
  2560. d.data = data;
  2561. scm_c_weak_set_fold (for_each_trampoline, &d, SCM_EOL,
  2562. scm_i_port_weak_set);
  2563. }
  2564. static void
  2565. scm_for_each_trampoline (void *data, SCM port)
  2566. {
  2567. scm_call_1 (SCM_PACK_POINTER (data), port);
  2568. }
  2569. SCM_DEFINE (scm_port_for_each, "port-for-each", 1, 0, 0,
  2570. (SCM proc),
  2571. "Apply @var{proc} to each port in the Guile port table\n"
  2572. "in turn. The return value is unspecified. More specifically,\n"
  2573. "@var{proc} is applied exactly once to every port that exists\n"
  2574. "in the system at the time @code{port-for-each} is invoked.\n"
  2575. "Changes to the port table while @code{port-for-each} is running\n"
  2576. "have no effect as far as @code{port-for-each} is concerned.")
  2577. #define FUNC_NAME s_scm_port_for_each
  2578. {
  2579. SCM_VALIDATE_PROC (1, proc);
  2580. scm_c_port_for_each (scm_for_each_trampoline, SCM_UNPACK_POINTER (proc));
  2581. return SCM_UNSPECIFIED;
  2582. }
  2583. #undef FUNC_NAME
  2584. static void
  2585. flush_output_port (void *closure, SCM port)
  2586. {
  2587. if (SCM_OPOUTPORTP (port))
  2588. scm_flush_unlocked (port);
  2589. }
  2590. SCM_DEFINE (scm_flush_all_ports, "flush-all-ports", 0, 0, 0,
  2591. (),
  2592. "Equivalent to calling @code{force-output} on\n"
  2593. "all open output ports. The return value is unspecified.")
  2594. #define FUNC_NAME s_scm_flush_all_ports
  2595. {
  2596. scm_c_port_for_each (&flush_output_port, NULL);
  2597. return SCM_UNSPECIFIED;
  2598. }
  2599. #undef FUNC_NAME
  2600. /* Void ports. */
  2601. scm_t_bits scm_tc16_void_port = 0;
  2602. static int fill_input_void_port (SCM port SCM_UNUSED)
  2603. {
  2604. return EOF;
  2605. }
  2606. static void
  2607. write_void_port (SCM port SCM_UNUSED,
  2608. const void *data SCM_UNUSED,
  2609. size_t size SCM_UNUSED)
  2610. {
  2611. }
  2612. static SCM
  2613. scm_i_void_port (long mode_bits)
  2614. {
  2615. SCM ret;
  2616. ret = scm_c_make_port (scm_tc16_void_port, mode_bits, 0);
  2617. scm_port_non_buffer (SCM_PTAB_ENTRY (ret));
  2618. return ret;
  2619. }
  2620. SCM
  2621. scm_void_port (char *mode_str)
  2622. {
  2623. return scm_i_void_port (scm_mode_bits (mode_str));
  2624. }
  2625. SCM_DEFINE (scm_sys_make_void_port, "%make-void-port", 1, 0, 0,
  2626. (SCM mode),
  2627. "Create and return a new void port. A void port acts like\n"
  2628. "@file{/dev/null}. The @var{mode} argument\n"
  2629. "specifies the input/output modes for this port: see the\n"
  2630. "documentation for @code{open-file} in @ref{File Ports}.")
  2631. #define FUNC_NAME s_scm_sys_make_void_port
  2632. {
  2633. return scm_i_void_port (scm_i_mode_bits (mode));
  2634. }
  2635. #undef FUNC_NAME
  2636. /* Initialization. */
  2637. void
  2638. scm_init_ports ()
  2639. {
  2640. /* lseek() symbols. */
  2641. scm_c_define ("SEEK_SET", scm_from_int (SEEK_SET));
  2642. scm_c_define ("SEEK_CUR", scm_from_int (SEEK_CUR));
  2643. scm_c_define ("SEEK_END", scm_from_int (SEEK_END));
  2644. scm_tc16_void_port = scm_make_port_type ("void", fill_input_void_port,
  2645. write_void_port);
  2646. cur_inport_fluid = scm_make_fluid ();
  2647. cur_outport_fluid = scm_make_fluid ();
  2648. cur_errport_fluid = scm_make_fluid ();
  2649. cur_warnport_fluid = scm_make_fluid ();
  2650. cur_loadport_fluid = scm_make_fluid ();
  2651. scm_i_port_weak_set = scm_c_make_weak_set (31);
  2652. #include "libguile/ports.x"
  2653. /* Use Latin-1 as the default port encoding. */
  2654. SCM_VARIABLE_SET (default_port_encoding_var,
  2655. scm_make_fluid_with_default (SCM_BOOL_F));
  2656. scm_port_encoding_init = 1;
  2657. SCM_VARIABLE_SET (default_conversion_strategy_var,
  2658. scm_make_fluid_with_default (sym_substitute));
  2659. scm_conversion_strategy_init = 1;
  2660. /* These bindings are used when boot-9 turns `current-input-port' et
  2661. al into parameters. They are then removed from the guile module. */
  2662. scm_c_define ("%current-input-port-fluid", cur_inport_fluid);
  2663. scm_c_define ("%current-output-port-fluid", cur_outport_fluid);
  2664. scm_c_define ("%current-error-port-fluid", cur_errport_fluid);
  2665. scm_c_define ("%current-warning-port-fluid", cur_warnport_fluid);
  2666. }
  2667. /*
  2668. Local Variables:
  2669. c-file-style: "gnu"
  2670. End:
  2671. */