html.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. /* html.c -- html-related utilities.
  2. $Id: html.c,v 1.43 2011-04-06 21:32:42 gray Exp $
  3. Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  4. Free Software Foundation, Inc.
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  15. #include "system.h"
  16. #include "cmds.h"
  17. #include "files.h"
  18. #include "html.h"
  19. #include "lang.h"
  20. #include "makeinfo.h"
  21. #include "node.h"
  22. #include "sectioning.h"
  23. /* Filename to which to write list of index entries */
  24. char *internal_links_filename = NULL;
  25. FILE *internal_links_stream = NULL;
  26. /* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle
  27. null characters. */
  28. typedef struct
  29. {
  30. unsigned size; /* allocated */
  31. unsigned length; /* used */
  32. char *buffer;
  33. } buffer_type;
  34. static buffer_type *
  35. init_buffer (void)
  36. {
  37. buffer_type *buf = xmalloc (sizeof (buffer_type));
  38. buf->length = 0;
  39. buf->size = 0;
  40. buf->buffer = NULL;
  41. return buf;
  42. }
  43. static void
  44. append_char (buffer_type *buf, int c)
  45. {
  46. buf->length++;
  47. if (buf->length >= buf->size)
  48. {
  49. buf->size += 100;
  50. buf->buffer = xrealloc (buf->buffer, buf->size);
  51. }
  52. buf->buffer[buf->length - 1] = c;
  53. buf->buffer[buf->length] = 0;
  54. }
  55. /* Read the cascading style-sheet file FILENAME. Write out any @import
  56. commands, which must come first, by the definition of css. If the
  57. file contains any actual css code following the @imports, return it;
  58. else return NULL. */
  59. static char *
  60. process_css_file (char *filename)
  61. {
  62. int c;
  63. int lastchar = 0;
  64. FILE *f;
  65. buffer_type *import_text = init_buffer ();
  66. buffer_type *inline_text = init_buffer ();
  67. unsigned lineno = 1;
  68. enum { null_state, comment_state, import_state, inline_state } state
  69. = null_state, prev_state;
  70. prev_state = null_state;
  71. /* read from stdin if `-' is the filename. */
  72. f = STREQ (filename, "-") ? stdin : fopen (filename, "r");
  73. if (!f)
  74. {
  75. error (_("%s: could not open --css-file: %s"), progname, filename);
  76. return NULL;
  77. }
  78. /* Read the file. The @import statements must come at the beginning,
  79. with only whitespace and comments allowed before any inline css code. */
  80. while ((c = getc (f)) >= 0)
  81. {
  82. if (c == '\n')
  83. lineno++;
  84. switch (state)
  85. {
  86. case null_state: /* between things */
  87. if (c == '@')
  88. { /* Only @import and @charset should switch into
  89. import_state, other @-commands, such as @media, should
  90. put us into inline_state. I don't think any other css
  91. @-commands start with `i' or `c', although of course
  92. this will break when such a command is defined. */
  93. int nextchar = getc (f);
  94. if (nextchar == 'i' || nextchar == 'c')
  95. {
  96. append_char (import_text, c);
  97. append_char (import_text, nextchar);
  98. state = import_state;
  99. }
  100. else
  101. {
  102. ungetc (nextchar, f); /* wasn't an @import */
  103. state = inline_state;
  104. }
  105. }
  106. else if (c == '/')
  107. { /* possible start of a comment */
  108. int nextchar = getc (f);
  109. if (nextchar == '*')
  110. state = comment_state;
  111. else
  112. {
  113. ungetc (nextchar, f); /* wasn't a comment */
  114. state = inline_state;
  115. }
  116. }
  117. else if (isspace (c))
  118. ; /* skip whitespace; maybe should use c_isspace? */
  119. else
  120. /* not an @import, not a comment, not whitespace: we must
  121. have started the inline text. */
  122. state = inline_state;
  123. if (state == inline_state)
  124. append_char (inline_text, c);
  125. if (state != null_state)
  126. prev_state = null_state;
  127. break;
  128. case comment_state:
  129. if (c == '/' && lastchar == '*')
  130. state = prev_state; /* end of comment */
  131. break; /* else ignore this comment char */
  132. case import_state:
  133. append_char (import_text, c); /* include this import char */
  134. if (c == ';')
  135. { /* done with @import */
  136. append_char (import_text, '\n'); /* make the output nice */
  137. state = null_state;
  138. prev_state = import_state;
  139. }
  140. break;
  141. case inline_state:
  142. /* No harm in writing out comments, so don't bother parsing
  143. them out, just append everything. */
  144. append_char (inline_text, c);
  145. break;
  146. }
  147. lastchar = c;
  148. }
  149. fclose (f); /* Even closing stdin should be ok, can't read it more
  150. than once? */
  151. /* Reached the end of the file. We should not be still in a comment. */
  152. if (state == comment_state)
  153. warning (_("%s:%d: --css-file ended in comment"), filename, lineno);
  154. /* Write the @import text, if any. */
  155. if (import_text->buffer)
  156. {
  157. add_word (import_text->buffer);
  158. free (import_text->buffer);
  159. free (import_text);
  160. }
  161. /* We're wasting the buffer struct memory, but so what. */
  162. return inline_text->buffer;
  163. }
  164. HSTACK *htmlstack = NULL;
  165. /* See html.h. */
  166. int html_title_written = 0;
  167. void
  168. html_output_head (void)
  169. {
  170. static const char *html_title = NULL;
  171. char *encoding = current_document_encoding ();
  172. /* The <title> should not have markup, so use text_expansion. */
  173. if (!html_title)
  174. html_title = escape_string (title ?
  175. text_expansion (title) : (char *) gdt("Untitled"));
  176. /* Make sure this is the very first string of the output document. */
  177. output_paragraph_offset = 0;
  178. add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n",
  179. language_table[language_code].abbrev);
  180. /* When splitting, add current node's name to title if it's available
  181. and not Top. */
  182. if (splitting && current_node && !STREQ (current_node, "Top"))
  183. add_word_args ("<title>%s - %s</title>\n",
  184. escape_string (xstrdup (current_node)), html_title);
  185. else
  186. add_word_args ("<title>%s</title>\n", html_title);
  187. add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html");
  188. if (encoding && *encoding)
  189. add_word_args ("; charset=%s", encoding);
  190. add_word ("\">\n");
  191. if (!document_description)
  192. document_description = html_title;
  193. add_word_args ("<meta name=\"description\" content=\"%s\">\n",
  194. document_description);
  195. add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n",
  196. VERSION);
  197. /* Navigation bar links. */
  198. if (!splitting)
  199. add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n");
  200. else if (tag_table)
  201. {
  202. /* Always put a top link. */
  203. add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n");
  204. /* We already have a top link, avoid duplication. */
  205. if (tag_table->up && !STREQ (tag_table->up, "Top"))
  206. add_link (tag_table->up, "rel=\"up\"");
  207. if (tag_table->prev)
  208. add_link (tag_table->prev, "rel=\"prev\"");
  209. if (tag_table->next)
  210. add_link (tag_table->next, "rel=\"next\"");
  211. /* fixxme: Look for a way to put links to various indices in the
  212. document. Also possible candidates to be added here are First and
  213. Last links. */
  214. }
  215. else
  216. {
  217. /* We are splitting, but we neither have a tag_table. So this must be
  218. index.html. So put a link to Top. */
  219. add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n");
  220. }
  221. add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \
  222. rel=\"generator-home\" title=\"Texinfo Homepage\">\n");
  223. if (copying_text)
  224. { /* It is not ideal that we include the html markup here within
  225. <head>, so we use text_expansion. */
  226. insert_string ("<!--\n");
  227. insert_string (text_expansion (copying_text));
  228. insert_string ("-->\n");
  229. }
  230. /* Put the style definitions in a comment for the sake of browsers
  231. that don't support <style>. */
  232. add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n");
  233. add_word ("<style type=\"text/css\"><!--\n");
  234. {
  235. char *css_inline = NULL;
  236. if (css_include)
  237. /* This writes out any @import commands from the --css-file,
  238. and returns any actual css code following the imports. */
  239. css_inline = process_css_file (css_include);
  240. /* This seems cleaner than adding <br>'s at the end of each line for
  241. these "roman" displays. It's hardly the end of the world if the
  242. browser doesn't do <style>s, in any case; they'll just come out in
  243. typewriter. */
  244. #define CSS_FONT_INHERIT "font-family:inherit"
  245. add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT);
  246. add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT);
  247. /* Alternatively, we could do <font size=-1> in insertion.c, but this
  248. way makes it easier to override. */
  249. #define CSS_FONT_SMALLER "font-size:smaller"
  250. add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT,
  251. CSS_FONT_SMALLER);
  252. add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT,
  253. CSS_FONT_SMALLER);
  254. add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER);
  255. add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER);
  256. /* Since HTML doesn't have a sc element, we use span with a bit of
  257. CSS spice instead. */
  258. #define CSS_FONT_SMALL_CAPS "font-variant:small-caps"
  259. add_word_args (" span.sc { %s }\n", CSS_FONT_SMALL_CAPS);
  260. /* Roman (default) font class, closest we can come. */
  261. #define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;"
  262. add_word_args (" span.roman { %s } \n", CSS_FONT_ROMAN);
  263. /* Sans serif font class. */
  264. #define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;"
  265. add_word_args (" span.sansserif { %s } \n", CSS_FONT_SANSSERIF);
  266. /* Write out any css code from the user's --css-file. */
  267. if (css_inline)
  268. insert_string (css_inline);
  269. add_word ("--></style>\n");
  270. }
  271. if (css_ref)
  272. add_word_args ("<link rel=\"stylesheet\" type=\"text/css\" href=\"%s\">\n",
  273. css_ref);
  274. add_word ("</head>\n<body>\n");
  275. if (title && !html_title_written && titlepage_cmd_present)
  276. {
  277. add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title);
  278. html_title_written = 1;
  279. }
  280. free (encoding);
  281. }
  282. /* Escape HTML special characters in the string if necessary,
  283. returning a pointer to a possibly newly-allocated one. */
  284. char *
  285. escape_string (char *string)
  286. {
  287. char *newstring;
  288. int i = 0, newlen = 0;
  289. do
  290. {
  291. /* Find how much to allocate. */
  292. switch (string[i])
  293. {
  294. case '"':
  295. newlen += 6; /* `&quot;' */
  296. break;
  297. case '&':
  298. newlen += 5; /* `&amp;' */
  299. break;
  300. case '<':
  301. case '>':
  302. newlen += 4; /* `&lt;', `&gt;' */
  303. break;
  304. default:
  305. newlen++;
  306. }
  307. }
  308. while (string[i++]);
  309. if (newlen == i) return string; /* Already OK. */
  310. newstring = xmalloc (newlen);
  311. i = 0;
  312. do
  313. {
  314. switch (string[i])
  315. {
  316. case '"':
  317. strcpy (newstring, "&quot;");
  318. newstring += 6;
  319. break;
  320. case '&':
  321. strcpy (newstring, "&amp;");
  322. newstring += 5;
  323. break;
  324. case '<':
  325. strcpy (newstring, "&lt;");
  326. newstring += 4;
  327. break;
  328. case '>':
  329. strcpy (newstring, "&gt;");
  330. newstring += 4;
  331. break;
  332. default:
  333. newstring[0] = string[i];
  334. newstring++;
  335. }
  336. }
  337. while (string[i++]);
  338. return newstring - newlen;
  339. }
  340. /* Save current tag. */
  341. static void
  342. push_tag (char *tag, char *attribs)
  343. {
  344. HSTACK *newstack = xmalloc (sizeof (HSTACK));
  345. newstack->tag = tag;
  346. newstack->attribs = xstrdup (attribs);
  347. newstack->next = htmlstack;
  348. htmlstack = newstack;
  349. }
  350. /* Get last tag. */
  351. static void
  352. pop_tag (void)
  353. {
  354. HSTACK *tos = htmlstack;
  355. if (!tos)
  356. {
  357. line_error (_("[unexpected] no html tag to pop"));
  358. return;
  359. }
  360. free (htmlstack->attribs);
  361. htmlstack = htmlstack->next;
  362. free (tos);
  363. }
  364. /* Check if tag is an empty or a whitespace only element.
  365. If so, remove it, keeping whitespace intact. */
  366. int
  367. rollback_empty_tag (char *tag)
  368. {
  369. int check_position = output_paragraph_offset;
  370. int taglen = strlen (tag);
  371. int rollback_happened = 0;
  372. char *contents = ""; /* FIXME (ptr to constant, later
  373. assigned to malloc'd address).
  374. */
  375. char *contents_canon_white = "";
  376. /* If output_paragraph is empty, we cannot rollback :-\ */
  377. if (output_paragraph_offset <= 0)
  378. return 0;
  379. /* Find the end of the previous tag. */
  380. while (check_position > 0 && output_paragraph[check_position-1] != '>')
  381. check_position--;
  382. /* Save stuff between tag's end to output_paragraph's end. */
  383. if (check_position != output_paragraph_offset)
  384. {
  385. contents = xmalloc (output_paragraph_offset - check_position + 1);
  386. memcpy (contents, output_paragraph + check_position,
  387. output_paragraph_offset - check_position);
  388. contents[output_paragraph_offset - check_position] = '\0';
  389. contents_canon_white = xstrdup (contents);
  390. canon_white (contents_canon_white);
  391. }
  392. /* Find the start of the previous tag. */
  393. while (check_position > 0 && output_paragraph[check_position-1] != '<')
  394. check_position--;
  395. /* Check to see if this is the tag. */
  396. if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0
  397. && (whitespace (output_paragraph[check_position + taglen])
  398. || output_paragraph[check_position + taglen] == '>'))
  399. {
  400. if (!contents_canon_white || !*contents_canon_white)
  401. {
  402. /* Empty content after whitespace removal, so roll it back. */
  403. output_paragraph_offset = check_position - 1;
  404. rollback_happened = 1;
  405. /* Original contents may not be empty (whitespace.) */
  406. if (contents && *contents)
  407. {
  408. insert_string (contents);
  409. free (contents);
  410. }
  411. }
  412. }
  413. return rollback_happened;
  414. }
  415. /* Open or close TAG according to START_OR_END. */
  416. void
  417. insert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...)
  418. {
  419. char *old_tag = NULL;
  420. char *old_attribs = NULL;
  421. char formatted_attribs[2000]; /* xx no fixed limits */
  422. int do_return = 0;
  423. extern int in_html_elt;
  424. if (start_or_end != START)
  425. pop_tag ();
  426. if (htmlstack)
  427. {
  428. old_tag = htmlstack->tag;
  429. old_attribs = htmlstack->attribs;
  430. }
  431. if (format)
  432. {
  433. va_list ap;
  434. va_start (ap, format);
  435. vsnprintf (formatted_attribs, sizeof (formatted_attribs), format, ap);
  436. va_end (ap);
  437. }
  438. else
  439. formatted_attribs[0] = '\0';
  440. /* Exception: can nest multiple spans. */
  441. if (htmlstack
  442. && STREQ (htmlstack->tag, tag)
  443. && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs)))
  444. do_return = 1;
  445. if (start_or_end == START)
  446. push_tag (tag, formatted_attribs);
  447. if (do_return)
  448. return;
  449. in_html_elt++;
  450. /* texinfo.tex doesn't support more than one font attribute
  451. at the same time. */
  452. if ((start_or_end == START) && old_tag && *old_tag
  453. && !STREQ (old_tag, "samp")
  454. && !rollback_empty_tag (old_tag))
  455. add_word_args ("</%s>", old_tag);
  456. if (*tag)
  457. {
  458. if (start_or_end == START)
  459. add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs);
  460. else if (STREQ (tag, "samp") || !rollback_empty_tag (tag))
  461. /* Insert close tag only if we didn't rollback,
  462. in which case the opening tag is removed. */
  463. add_word_args ("</%s>", tag);
  464. }
  465. if ((start_or_end != START) && old_tag && *old_tag && !STREQ (old_tag, "samp"))
  466. add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>",
  467. old_tag, old_attribs);
  468. in_html_elt--;
  469. }
  470. void
  471. insert_html_tag (int start_or_end, char *tag)
  472. {
  473. insert_html_tag_with_attribute (start_or_end, tag, NULL);
  474. }
  475. /* Output an HTML <link> to the filename for NODE, including the
  476. other string as extra attributes. */
  477. void
  478. add_link (char *nodename, char *attributes)
  479. {
  480. if (nodename)
  481. {
  482. char *escaped_nodename;
  483. add_html_elt ("<link ");
  484. add_word_args ("%s", attributes);
  485. add_word_args (" href=\"");
  486. add_anchor_name (nodename, 1);
  487. escaped_nodename = escape_string (nodename);
  488. add_word_args ("\" title=\"%s\">\n", escaped_nodename);
  489. if (escaped_nodename != nodename)
  490. free (escaped_nodename);
  491. }
  492. }
  493. /* Copy a name with characters escaped as appropriate for an anchor
  494. name, i.e., escape URL special characters with our _00hh convention.
  495. (See the manual for details on the new scheme.) */
  496. char *
  497. escaped_anchor_name (const char *name)
  498. {
  499. /* The factor 5 in the next allocation allows all chars to be expanded. */
  500. char *res = xmalloc (5 * strlen (name) + 1);
  501. char *d = res;
  502. for (; *name; name++)
  503. {
  504. if (cr_or_whitespace (*name))
  505. *d++ = '-';
  506. else if (! URL_SAFE_CHAR (*name))
  507. {
  508. sprintf (d, "_00%x", (unsigned char) *name);
  509. /* do this manually since sprintf returns char * on
  510. SunOS 4 and other old systems. */
  511. while (*d)
  512. d++;
  513. }
  514. else
  515. *d++ = *name;
  516. }
  517. *d = 0;
  518. return res;
  519. }
  520. /* Output NAME with characters escaped as appropriate for an anchor
  521. name, i.e., escape URL special characters with our _00hh convention
  522. if OLD is zero. (See the manual for details on the new scheme.)
  523. If OLD is nonzero, generate the node name with the 4.6-and-earlier
  524. convention of %hh (and more special characters output as-is, notably
  525. - and *). This is only so that external references to old names can
  526. still work with HTML generated by the new makeinfo; the gcc folks
  527. needed this. Our own HTML does not refer to these names. */
  528. void
  529. add_escaped_anchor_name (char *name, int old)
  530. {
  531. canon_white (name);
  532. if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
  533. *name))
  534. { /* XHTML does not allow anything but an ASCII letter to start an
  535. identifier. Therefore kludge in this constant string if we
  536. have a nonletter. */
  537. add_word ("g_t");
  538. }
  539. if (!old)
  540. {
  541. char *expanded = escaped_anchor_name (name);
  542. add_word (expanded);
  543. free (expanded);
  544. }
  545. else for (; *name; name++)
  546. {
  547. if (cr_or_whitespace (*name))
  548. add_char ('-');
  549. else if (!URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name))
  550. /* Cast so characters with the high bit set are treated as >128,
  551. for example o-umlaut should be 246, not -10. */
  552. add_word_args ("%%%x", (unsigned char) *name);
  553. else
  554. add_char (*name);
  555. }
  556. }
  557. /* Insert the text for the name of a reference in an HTML anchor
  558. appropriate for NODENAME.
  559. If HREF is zero, generate text for name= in the new node name
  560. conversion convention.
  561. If HREF is negative, generate text for name= in the old convention.
  562. If HREF is positive, generate the name for an href= attribute, i.e.,
  563. including the `#' if it's an internal reference. */
  564. void
  565. add_anchor_name (char *nodename, int href)
  566. {
  567. if (href > 0)
  568. {
  569. if (splitting)
  570. add_url_name (nodename, href);
  571. add_char ('#');
  572. }
  573. /* Always add NODENAME, so that the reference would pinpoint the
  574. exact node on its file. This is so several nodes could share the
  575. same file, in case of file-name clashes, but also for more
  576. accurate browser positioning. */
  577. if (mbscasecmp (nodename, "(dir)") == 0)
  578. /* Strip the parens, but keep the original letter-case. */
  579. add_word_args ("%.3s", nodename + 1);
  580. else if (mbscasecmp (nodename, "top") == 0)
  581. add_word ("Top");
  582. else
  583. add_escaped_anchor_name (nodename, href < 0);
  584. }
  585. /* Insert the text for the name of a reference in an HTML url, aprropriate
  586. for NODENAME */
  587. void
  588. add_url_name (char *nodename, int href)
  589. {
  590. add_nodename_to_filename (nodename, href);
  591. }
  592. /* Convert non [A-Za-z0-9] characters depending on the command line options given.
  593. If --transliterate-file-names is specified, these are replaced with their ASCII
  594. phonetic transliteration. Otherwise, _00xx notation is used, where xx means the
  595. hexadecimal representation of the ASCII character. Also convert spaces and
  596. newlines to dashes. */
  597. static void
  598. fix_filename (char *filename)
  599. {
  600. int i;
  601. int len = strlen (filename);
  602. char *oldname = xstrdup (filename);
  603. *filename = '\0';
  604. for (i = 0; i < len; i++)
  605. {
  606. const char *p = lang_transliterate_char (oldname[i]);
  607. if (p)
  608. strcat (filename, p);
  609. else if (cr_or_whitespace (oldname[i]))
  610. strcat (filename, "-");
  611. else if (URL_SAFE_CHAR (oldname[i]))
  612. strncat (filename, (char *) oldname + i, 1);
  613. else
  614. {
  615. char *hexchar = xmalloc (6 * sizeof (char));
  616. sprintf (hexchar, "_00%x", (unsigned char) oldname[i]);
  617. strcat (filename, hexchar);
  618. free (hexchar);
  619. }
  620. /* Check if we are nearing boundaries. */
  621. if (strlen (filename) >= PATH_MAX - 20)
  622. break;
  623. }
  624. free (oldname);
  625. }
  626. /* As we can't look-up a (forward-referenced) nodes' html filename
  627. from the tentry, we take the easy way out. We assume that
  628. nodenames are unique, and generate the html filename from the
  629. nodename, that's always known. */
  630. static char *
  631. nodename_to_filename_1 (char *nodename, int href)
  632. {
  633. char *p;
  634. char *filename;
  635. char dirname[PATH_MAX];
  636. if (mbscasecmp (nodename, "Top") == 0)
  637. {
  638. /* We want to convert references to the Top node into
  639. "index.html#Top". */
  640. if (href)
  641. filename = xstrdup ("index.html"); /* "#Top" is added by our callers */
  642. else
  643. filename = xstrdup ("Top");
  644. }
  645. else if (mbscasecmp (nodename, "(dir)") == 0)
  646. /* We want to convert references to the (dir) node into
  647. "../index.html". */
  648. filename = xstrdup ("../index.html");
  649. else
  650. {
  651. filename = xmalloc (PATH_MAX);
  652. dirname[0] = '\0';
  653. *filename = '\0';
  654. /* Check for external reference: ``(info-document)node-name''
  655. Assume this node lives at: ``../info-document/node-name.html''
  656. We need to handle the special case (sigh): ``(info-document)'',
  657. ie, an external top-node, which should translate to:
  658. ``../info-document/info-document.html'' */
  659. p = nodename;
  660. if (*nodename == '(')
  661. {
  662. int length;
  663. p = strchr (nodename, ')');
  664. if (p == NULL)
  665. {
  666. line_error (_("[unexpected] invalid node name: `%s'"), nodename);
  667. xexit (1);
  668. }
  669. length = p - nodename - 1;
  670. if (length > 5 &&
  671. FILENAME_CMPN (p - 5, ".info", 5) == 0)
  672. length -= 5;
  673. /* This is for DOS, and also for Windows and GNU/Linux
  674. systems that might have Info files copied from a DOS 8+3
  675. filesystem. */
  676. if (length > 4 &&
  677. FILENAME_CMPN (p - 4, ".inf", 4) == 0)
  678. length -= 4;
  679. strcpy (filename, "../");
  680. strncpy (dirname, nodename + 1, length);
  681. *(dirname + length) = '\0';
  682. fix_filename (dirname);
  683. strcat (filename, dirname);
  684. strcat (filename, "/");
  685. p++;
  686. }
  687. /* In the case of just (info-document), there will be nothing
  688. remaining, and we will refer to ../info-document/, which will
  689. work fine. */
  690. strcat (filename, p);
  691. if (*p)
  692. {
  693. /* Hmm */
  694. fix_filename (filename + strlen (filename) - strlen (p));
  695. strcat (filename, ".html");
  696. }
  697. }
  698. /* Produce a file name suitable for the underlying filesystem. */
  699. normalize_filename (filename);
  700. #if 0
  701. /* We add ``#Nodified-filename'' anchor to external references to be
  702. prepared for non-split HTML support. Maybe drop this. */
  703. if (href && *dirname)
  704. {
  705. strcat (filename, "#");
  706. strcat (filename, p);
  707. /* Hmm, again */
  708. fix_filename (filename + strlen (filename) - strlen (p));
  709. }
  710. #endif
  711. return filename;
  712. }
  713. /* If necessary, ie, if current filename != filename of node, output
  714. the node name. */
  715. void
  716. add_nodename_to_filename (char *nodename, int href)
  717. {
  718. /* for now, don't check: always output filename */
  719. char *filename = nodename_to_filename_1 (nodename, href);
  720. add_word (filename);
  721. free (filename);
  722. }
  723. char *
  724. nodename_to_filename (char *nodename)
  725. {
  726. /* The callers of nodename_to_filename use the result to produce
  727. <a href=, so call nodename_to_filename_1 with last arg non-zero. */
  728. return nodename_to_filename_1 (nodename, 1);
  729. }