info-utils.c 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060
  1. /* info-utils.c -- miscellanous.
  2. $Id$
  3. Copyright 1993, 1998, 2003, 2004, 2007, 2008, 2009, 2011, 2012,
  4. 2013, 2014, 2015, 2016, 2017 Free Software Foundation, Inc.
  5. This program is free software: you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation, either version 3 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. Originally written by Brian Fox. */
  16. #include "info.h"
  17. #include "session.h"
  18. #include "info-utils.h"
  19. #include "tag.h"
  20. #include <langinfo.h>
  21. #if HAVE_ICONV
  22. # include <iconv.h>
  23. #endif
  24. #include <wchar.h>
  25. #ifdef __MINGW32__
  26. /* MinGW uses a replacement nl_langinfo, see pcterm.c. */
  27. # define nl_langinfo rpl_nl_langinfo
  28. extern char * rpl_nl_langinfo (nl_item);
  29. /* MinGW uses its own replacement wcwidth, see pcterm.c for the
  30. reasons. Since Gnulib's wchar.h might redirect wcwidth to
  31. rpl_wcwidth, we explicitly undo that here. */
  32. #undef wcwidth
  33. #endif
  34. #ifdef __hpux
  35. #define va_copy(ap1,ap2) memcpy((&ap1),(&ap2),sizeof(va_list))
  36. #endif
  37. /* Variable which holds the most recent filename parsed as a result of
  38. calling info_parse_xxx (). */
  39. char *info_parsed_filename = NULL;
  40. /* Variable which holds the most recent nodename parsed as a result of
  41. calling info_parse_xxx (). */
  42. char *info_parsed_nodename = NULL;
  43. /* Read a filename surrounded by "(" and ")", accounting for matching
  44. characters, and place it in *FILENAME if FILENAME is not null. Return
  45. length of read filename. On error, set *FILENAME to null and return 0. */
  46. int
  47. read_bracketed_filename (char *string, char **filename)
  48. {
  49. register int i = 0;
  50. int count = 0; /* Level of nesting. */
  51. int first_close = -1; /* First ")" encountered. */
  52. if (*string != '(')
  53. return 0;
  54. string++;
  55. count = 1;
  56. for (i = 0; string[i]; i++)
  57. {
  58. if (string[i] == '(')
  59. count++;
  60. else if (string[i] == ')')
  61. {
  62. if (first_close == -1)
  63. first_close = i;
  64. count--;
  65. if (count == 0)
  66. break;
  67. }
  68. }
  69. /* If string ended before brackets were balanced, take the first ")" as
  70. terminating the filename. */
  71. if (count > 0)
  72. {
  73. if (first_close == -1)
  74. {
  75. if (filename)
  76. *filename = 0;
  77. return 0;
  78. }
  79. i = first_close;
  80. }
  81. if (filename)
  82. {
  83. *filename = xcalloc (1, i + 1);
  84. memcpy (*filename, string, i);
  85. }
  86. return i + 2; /* Length of filename plus "(" and ")". */
  87. }
  88. /* Parse the filename and nodename out of STRING, saving in
  89. INFO_PARSED_FILENAME and INFO_PARSED_NODENAME. These variables should not
  90. be freed by calling code. If either is missing, the relevant variable is
  91. set to a null pointer. */
  92. void
  93. info_parse_node (char *string)
  94. {
  95. int nodename_len;
  96. free (info_parsed_filename);
  97. free (info_parsed_nodename);
  98. info_parsed_filename = 0;
  99. info_parsed_nodename = 0;
  100. /* Special case of nothing passed. Return nothing. */
  101. if (!string || !*string)
  102. return;
  103. string += skip_whitespace_and_newlines (string);
  104. string += read_bracketed_filename (string, &info_parsed_filename);
  105. /* Parse out nodename. */
  106. string += skip_whitespace_and_newlines (string);
  107. nodename_len = read_quoted_string (string, "", 0, &info_parsed_nodename);
  108. if (nodename_len != 0)
  109. {
  110. canonicalize_whitespace (info_parsed_nodename);
  111. }
  112. }
  113. /* Set *OUTPUT to a copy of the string starting at START and finishing at
  114. a character in TERMINATOR, unless START[0] == INFO_QUOTE, in which case
  115. copy string from START+1 until the next occurence of INFO_QUOTE. If
  116. TERMINATOR is an empty string, finish at a null character. LINES is
  117. the number of lines that the string can span. If LINES is zero, there is no
  118. limit. Return length of string including any quoting characters. Return
  119. 0 if input was invalid. */
  120. long
  121. read_quoted_string (char *start, char *terminator, int lines, char **output)
  122. {
  123. long len;
  124. char *nl = 0, saved_char;
  125. if (lines)
  126. {
  127. int i;
  128. nl = start;
  129. for (i = 0; i < lines; i++)
  130. {
  131. nl = strchr (nl, '\n');
  132. if (!nl)
  133. break; /* End of input string reached. */
  134. nl++;
  135. }
  136. if (nl)
  137. {
  138. saved_char = *nl;
  139. *nl = '\0';
  140. }
  141. }
  142. if (start[0] != '\177')
  143. {
  144. len = strcspn (start, terminator);
  145. if (*terminator && !start[len])
  146. {
  147. len = 0;
  148. *output = 0;
  149. }
  150. else
  151. {
  152. *output = xmalloc (len + 1);
  153. strncpy (*output, start, len);
  154. (*output)[len] = '\0';
  155. }
  156. }
  157. else
  158. {
  159. len = strcspn (start + 1, "\177");
  160. if (*terminator && !(start + 1)[len])
  161. {
  162. /* No closing 177 byte. */
  163. len = 0;
  164. *output = 0;
  165. }
  166. else
  167. {
  168. *output = xmalloc (len + 1);
  169. strncpy (*output, start + 1, len);
  170. (*output)[len] = '\0';
  171. len += 2; /* Count the two 177 bytes. */
  172. }
  173. }
  174. if (nl)
  175. *nl = saved_char;
  176. return len;
  177. }
  178. /* **************************************************************** */
  179. /* */
  180. /* Finding and Building Menus */
  181. /* */
  182. /* **************************************************************** */
  183. /* Get the entry associated with LABEL in the menu of NODE. Return a
  184. pointer to the ENTRY if found, or null. Return value should not
  185. be freed by caller. If SLOPPY, allow initial matches, like
  186. "Buffers" for a LABEL "buffer". */
  187. REFERENCE *
  188. info_get_menu_entry_by_label (NODE *node, char *label, int sloppy)
  189. {
  190. register int i;
  191. int best_guess = -1;
  192. REFERENCE *entry;
  193. REFERENCE **references = node->references;
  194. if (!references)
  195. return 0;
  196. for (i = 0; (entry = references[i]); i++)
  197. {
  198. if (entry->type != REFERENCE_MENU_ITEM)
  199. continue;
  200. if (mbscasecmp (label, entry->label) == 0)
  201. return entry; /* Exact, case-insensitive match. */
  202. else if (sloppy && best_guess == -1
  203. && (mbsncasecmp (entry->label, label, strlen (label)) == 0))
  204. best_guess = i;
  205. }
  206. if (sloppy && best_guess != -1)
  207. return references[best_guess];
  208. return 0;
  209. }
  210. /* A utility function for concatenating REFERENCE **. Returns a new
  211. REFERENCE ** which is the concatenation of REF1 and REF2. */
  212. REFERENCE **
  213. info_concatenate_references (REFERENCE **ref1, REFERENCE **ref2)
  214. {
  215. register int i, j;
  216. REFERENCE **result;
  217. int size = 0;
  218. /* Get the total size of the slots that we will need. */
  219. if (ref1)
  220. {
  221. for (i = 0; ref1[i]; i++);
  222. size += i;
  223. }
  224. if (ref2)
  225. {
  226. for (i = 0; ref2[i]; i++);
  227. size += i;
  228. }
  229. result = xmalloc ((1 + size) * sizeof (REFERENCE *));
  230. /* Copy the contents over. */
  231. j = 0;
  232. if (ref1)
  233. {
  234. for (i = 0; ref1[i]; i++)
  235. result[j++] = ref1[i];
  236. }
  237. if (ref2)
  238. {
  239. for (i = 0; ref2[i]; i++)
  240. result[j++] = ref2[i];
  241. }
  242. result[j] = NULL;
  243. return result;
  244. }
  245. /* Copy a reference structure. Copy each field into new memory. */
  246. REFERENCE *
  247. info_copy_reference (REFERENCE *src)
  248. {
  249. REFERENCE *dest = xmalloc (sizeof (REFERENCE));
  250. dest->label = src->label ? xstrdup (src->label) : NULL;
  251. dest->filename = src->filename ? xstrdup (src->filename) : NULL;
  252. dest->nodename = src->nodename ? xstrdup (src->nodename) : NULL;
  253. dest->start = src->start;
  254. dest->end = src->end;
  255. dest->line_number = src->line_number;
  256. dest->type = src->type;
  257. return dest;
  258. }
  259. /* Copy a list of references, copying in reference in turn with
  260. info_copy_reference. */
  261. REFERENCE **
  262. info_copy_references (REFERENCE **ref1)
  263. {
  264. int i;
  265. REFERENCE **result;
  266. int size;
  267. if (!ref1)
  268. return 0;
  269. /* Get the total size of the slots that we will need. */
  270. for (i = 0; ref1[i]; i++);
  271. size = i;
  272. result = xmalloc ((1 + size) * sizeof (REFERENCE *));
  273. /* Copy the contents over. */
  274. for (i = 0; ref1[i]; i++)
  275. result[i] = info_copy_reference (ref1[i]);
  276. result[i] = NULL;
  277. return result;
  278. }
  279. void
  280. info_reference_free (REFERENCE *ref)
  281. {
  282. if (ref)
  283. {
  284. free (ref->label);
  285. free (ref->filename);
  286. free (ref->nodename);
  287. free (ref);
  288. }
  289. }
  290. /* Free the data associated with REFERENCES. */
  291. void
  292. info_free_references (REFERENCE **references)
  293. {
  294. register int i;
  295. REFERENCE *entry;
  296. if (references)
  297. {
  298. for (i = 0; references && (entry = references[i]); i++)
  299. info_reference_free (entry);
  300. free (references);
  301. }
  302. }
  303. /* Return new REFERENCE with filename and nodename fields set. */
  304. REFERENCE *
  305. info_new_reference (char *filename, char *nodename)
  306. {
  307. REFERENCE *r = xmalloc (sizeof (REFERENCE));
  308. r->label = 0;
  309. r->filename = filename ? xstrdup (filename) : 0;
  310. r->nodename = nodename ? xstrdup (nodename) : 0;
  311. r->start = 0;
  312. r->end = 0;
  313. r->line_number = 0;
  314. r->type = 0;
  315. return r;
  316. }
  317. /* Search for sequences of whitespace or newlines in STRING, replacing
  318. all such sequences with just a single space. Remove whitespace from
  319. start and end of string. */
  320. void
  321. canonicalize_whitespace (char *string)
  322. {
  323. register int i, j;
  324. int len, whitespace_found, whitespace_loc = 0;
  325. char *temp;
  326. if (!string)
  327. return;
  328. len = strlen (string);
  329. temp = xmalloc (1 + len);
  330. /* Search for sequences of whitespace or newlines. Replace all such
  331. sequences in the string with just a single space. */
  332. whitespace_found = 0;
  333. for (i = 0, j = 0; string[i]; i++)
  334. {
  335. if (whitespace_or_newline (string[i]))
  336. {
  337. whitespace_found++;
  338. whitespace_loc = i;
  339. continue;
  340. }
  341. else
  342. {
  343. if (whitespace_found && whitespace_loc)
  344. {
  345. whitespace_found = 0;
  346. /* Suppress whitespace at start of string. */
  347. if (j)
  348. temp[j++] = ' ';
  349. }
  350. temp[j++] = string[i];
  351. }
  352. }
  353. /* Kill trailing whitespace. */
  354. if (j && whitespace (temp[j - 1]))
  355. j--;
  356. temp[j] = '\0';
  357. strcpy (string, temp);
  358. free (temp);
  359. }
  360. /* If ITER points to an ANSI escape sequence, process it, set PLEN to its
  361. length in bytes, and return 1.
  362. Otherwise, return 0.
  363. */
  364. int
  365. ansi_escape (mbi_iterator_t iter, size_t *plen)
  366. {
  367. if (raw_escapes_p && *mbi_cur_ptr (iter) == '\033' && mbi_avail (iter))
  368. {
  369. mbi_advance (iter);
  370. if (*mbi_cur_ptr (iter) == '[' && mbi_avail (iter))
  371. {
  372. ITER_SETBYTES (iter, 1);
  373. mbi_advance (iter);
  374. if (isdigit (*mbi_cur_ptr (iter)) && mbi_avail (iter))
  375. {
  376. ITER_SETBYTES (iter, 1);
  377. mbi_advance (iter);
  378. if (*mbi_cur_ptr (iter) == 'm')
  379. {
  380. *plen = 4;
  381. return 1;
  382. }
  383. else if (isdigit (*mbi_cur_ptr (iter)) && mbi_avail (iter))
  384. {
  385. ITER_SETBYTES (iter, 1);
  386. mbi_advance (iter);
  387. if (*mbi_cur_ptr (iter) == 'm')
  388. {
  389. *plen = 5;
  390. return 1;
  391. }
  392. }
  393. }
  394. }
  395. }
  396. return 0;
  397. }
  398. static struct text_buffer printed_rep = { 0 };
  399. /* Return pointer to string that is the printed representation of character
  400. (or other logical unit) at ITER if it were printed at screen column
  401. PL_CHARS. Use ITER_SETBYTES (info-utils.h) on ITER if we need to advance
  402. past a unit that the multibyte iteractor doesn't know about (like an ANSI
  403. escape sequence). If ITER points at an end-of-line character, set *DELIM to
  404. this character. *PCHARS gets the number of screen columns taken up by
  405. outputting the return value, and *PBYTES the number of bytes in returned
  406. string. Return value is not null-terminated. Return value must not be
  407. freed by caller. */
  408. char *
  409. printed_representation (mbi_iterator_t *iter, int *delim, size_t pl_chars,
  410. size_t *pchars, size_t *pbytes)
  411. {
  412. struct text_buffer *rep = &printed_rep;
  413. char *cur_ptr = (char *) mbi_cur_ptr (*iter);
  414. size_t cur_len = mb_len (mbi_cur (*iter));
  415. text_buffer_reset (&printed_rep);
  416. if (mb_isprint (mbi_cur (*iter)))
  417. {
  418. /* cur.wc gives a wchar_t object. See mbiter.h in the
  419. gnulib/lib directory. */
  420. *pchars = wcwidth ((*iter).cur.wc);
  421. *pbytes = cur_len;
  422. return cur_ptr;
  423. }
  424. else if (cur_len == 1)
  425. {
  426. if (*cur_ptr == '\n' || *cur_ptr == '\r')
  427. {
  428. /* If this is a CRLF line ending, ignore this character. */
  429. if (*cur_ptr == '\r' && cur_ptr[1] == '\n')
  430. {
  431. *pchars = 0;
  432. *pbytes = 0;
  433. return cur_ptr;
  434. }
  435. *pchars = 1;
  436. *pbytes = cur_len;
  437. *delim = *cur_ptr;
  438. text_buffer_add_char (rep, ' ');
  439. return cur_ptr;
  440. }
  441. else if (ansi_escape (*iter, &cur_len))
  442. {
  443. *pchars = 0;
  444. *pbytes = cur_len;
  445. ITER_SETBYTES (*iter, cur_len);
  446. return cur_ptr;
  447. }
  448. else if (*cur_ptr == '\t')
  449. {
  450. int i = 0;
  451. *pchars = ((pl_chars + 8) & 0xf8) - pl_chars;
  452. *pbytes = *pchars;
  453. /* We must output spaces instead of the tab because a tab may
  454. not clear characters already on the screen. */
  455. for (i = 0; i < *pbytes; i++)
  456. text_buffer_add_char (rep, ' ');
  457. return text_buffer_base (rep);
  458. }
  459. }
  460. /* Show CTRL-x as "^X". */
  461. if (iscntrl (*cur_ptr) && *(unsigned char *)cur_ptr < 127)
  462. {
  463. *pchars = 2;
  464. *pbytes = 2;
  465. text_buffer_add_char (rep, '^');
  466. text_buffer_add_char (rep, *cur_ptr | 0x40);
  467. return text_buffer_base (rep);
  468. }
  469. else if (*cur_ptr == DEL)
  470. {
  471. *pchars = 0;
  472. *pbytes = 0;
  473. return text_buffer_base (rep);
  474. }
  475. else
  476. {
  477. /* Original byte was not recognized as anything. Display its octal
  478. value. This could happen in the C locale for bytes above 128,
  479. or for bytes 128-159 in an ISO-8859-1 locale. Don't output the bytes
  480. as they are, because they could have special meaning to the
  481. terminal. */
  482. *pchars = 4;
  483. *pbytes = 4;
  484. text_buffer_printf (rep, "\\%o", *(unsigned char *)cur_ptr);
  485. return text_buffer_base (rep);
  486. }
  487. }
  488. /* **************************************************************** */
  489. /* */
  490. /* Scanning node */
  491. /* */
  492. /* **************************************************************** */
  493. /* Whether to strip syntax from the text of nodes. */
  494. int preprocess_nodes_p;
  495. /* Whether contents of nodes should be rewritten. */
  496. static int rewrite_p;
  497. /* inptr is moved forward through the body of a node. */
  498. static char *inptr;
  499. /* Pointer to first byte of node (after node separator). */
  500. static char *input_start;
  501. /* Number of bytes in node contents. */
  502. static size_t input_length;
  503. struct text_buffer output_buf;
  504. /* Pointer into a tags table for the file to the anchor we need to adjust as
  505. a result of byte counts changing due to character encoding conversion or
  506. inserted/deleted text. */
  507. static TAG **anchor_to_adjust;
  508. /* Offset within file buffer of first byte of node, used for anchor
  509. adjustment. */
  510. static int node_offset;
  511. /* Difference so far between the number of bytes input in the file and
  512. bytes output. Used to adjust the values of anchors in nodes. */
  513. static long int output_bytes_difference;
  514. /* Whether we are converting the character encoding of the file. */
  515. static int convert_encoding_p;
  516. #if HAVE_ICONV
  517. /* Whether text in file is encoded in UTF-8. */
  518. static int file_is_in_utf8;
  519. /* Used for conversion from file encoding to output encoding. */
  520. static iconv_t iconv_to_output;
  521. /* Conversion from file encoding to UTF-8. */
  522. static iconv_t iconv_to_utf8;
  523. #endif /* HAVE_ICONV */
  524. void
  525. init_conversion (FILE_BUFFER *fb)
  526. {
  527. char *target_encoding;
  528. convert_encoding_p = 0;
  529. /* Node being processed does not come from an Info file. */
  530. if (!fb)
  531. return;
  532. #if !HAVE_ICONV
  533. return;
  534. #else
  535. file_is_in_utf8 = 0;
  536. /* Don't process file if encoding is unknown. */
  537. if (!fb->encoding)
  538. return;
  539. /* Read name of character encoding from environment locale */
  540. target_encoding = nl_langinfo (CODESET);
  541. /* Don't convert the contents if the locale
  542. uses the same character encoding as the file */
  543. if (!strcasecmp(target_encoding, fb->encoding))
  544. return;
  545. /* Check if an iconv conversion from file locale to system
  546. locale exists */
  547. iconv_to_output = iconv_open (target_encoding, fb->encoding);
  548. if (iconv_to_output == (iconv_t) -1)
  549. return; /* Return if no conversion function implemented */
  550. if ( !strcasecmp ("UTF8", fb->encoding)
  551. || !strcasecmp ("UTF-8", fb->encoding))
  552. file_is_in_utf8 = 1;
  553. if (!file_is_in_utf8)
  554. {
  555. iconv_to_utf8 = iconv_open ("UTF-8", fb->encoding);
  556. if (iconv_to_utf8 == (iconv_t) -1)
  557. {
  558. /* Return if no conversion function implemented */
  559. iconv_close (iconv_to_output);
  560. return;
  561. }
  562. }
  563. convert_encoding_p = 1;
  564. rewrite_p = 1;
  565. #endif /* HAVE_ICONV */
  566. }
  567. void close_conversion (void)
  568. {
  569. #if HAVE_ICONV
  570. if (convert_encoding_p)
  571. {
  572. iconv_close (iconv_to_output);
  573. if (!file_is_in_utf8) iconv_close (iconv_to_utf8);
  574. }
  575. #endif
  576. }
  577. static void
  578. init_output_stream (FILE_BUFFER *fb)
  579. {
  580. init_conversion (fb);
  581. output_bytes_difference = 0;
  582. if (rewrite_p)
  583. text_buffer_init (&output_buf);
  584. }
  585. static size_t saved_offset;
  586. static char *saved_inptr;
  587. static long saved_difference;
  588. void
  589. save_conversion_state (void)
  590. {
  591. saved_offset = text_buffer_off (&output_buf);
  592. saved_inptr = inptr;
  593. saved_difference = output_bytes_difference;
  594. }
  595. /* Go back to the saved state of the output stream. */
  596. void
  597. reset_conversion (void)
  598. {
  599. text_buffer_off (&output_buf) = saved_offset;
  600. inptr = saved_inptr;
  601. output_bytes_difference = saved_difference;
  602. }
  603. /* Copy bytes from input to output with no encoding conversion. */
  604. static void
  605. copy_direct (long n)
  606. {
  607. text_buffer_add_string (&output_buf, inptr, n);
  608. inptr += n;
  609. }
  610. /* Read one character at *FROM and write out a sequence
  611. of bytes representing that character in ASCII. *FROM
  612. is advanced past the read character. */
  613. static int
  614. degrade_utf8 (char **from, size_t *from_left)
  615. {
  616. static struct encoding_replacement
  617. {
  618. char *from_string;
  619. char *to_string;
  620. } er[] = {
  621. {"\xE2\x80\x98","'"}, /* Opening single quote */
  622. {"\xE2\x80\x99","'"}, /* Closing single quote */
  623. {"\xE2\x80\x9C","\""},/* Opening double quote */
  624. {"\xE2\x80\x9D","\""},/* Closing double quote */
  625. {"\xC2\xA9","(C)"}, /* Copyright symbol */
  626. {"\xC2\xBB",">>"}, /* Closing double angle brackets */
  627. {"\xE2\x86\x92","->"},/* Right arrow */
  628. {"\xE2\x87\x92","=>"},/* Right double arrow */
  629. {"\xE2\x8A\xA3","-|"},/* Print symbol */
  630. {"\xE2\x98\x85","-!-"}, /* Point symbol */
  631. {"\xE2\x86\xA6","==>"}, /* Expansion symbol */
  632. {"\xE2\x80\x90","-"}, /* Hyphen */
  633. {"\xE2\x80\x91","-"}, /* Non-breaking hyphen */
  634. {"\xE2\x80\x92","-"}, /* Figure dash */
  635. {"\xE2\x80\x93","-"}, /* En dash */
  636. {"\xE2\x80\x94","--"}, /* Em dash */
  637. {"\xE2\x88\x92","-"}, /* Minus sign */
  638. {"\xE2\x80\xA6","..."}, /* Ellipsis */
  639. {"\xE2\x80\xA2","*"}, /* Bullet */
  640. {"\xC3\xA0","a`"}, /* Lower case letter a with grave accent */
  641. {"\xC3\xA2","a^"}, /* Lower case letter a with circumflex */
  642. {"\xC3\xA4","a\""}, /* Lower case letter a with diaeresis */
  643. {"\xC3\xA6","ae"}, /* Lower case letter ae ligature */
  644. {"\xC3\xA9","e'"}, /* Lower case letter e with acute accent */
  645. {"\xC3\xA8","e`"}, /* Lower case letter e with grave accent */
  646. {"\xC3\xAA","e^"}, /* Lower case letter e with circumflex */
  647. {"\xC3\xAB","e\""}, /* Lower case letter e with diaeresis */
  648. {"\xC3\xB6","o\""}, /* Lower case letter o with diaeresis */
  649. {"\xC3\xBC","u\""}, /* Lower case letter u with diaeresis */
  650. {"\xC3\x84", "A\""}, /* Upper case letter A with diaeresis. */
  651. {"\xC3\x96", "O\""}, /* Upper case letter O with diaeresis. */
  652. {"\xC3\x9c", "U\""}, /* Upper case letter U with diaeresis. */
  653. {"\xC3\xB1","n~"}, /* Lower case letter n with tilde */
  654. {"\xC3\x87","C,"}, /* Upper case letter C with cedilla */
  655. {"\xC3\xA7","c,"}, /* Lower case letter c with cedilla */
  656. {"\xC3\x9f","ss"}, /* Lower case letter sharp s */
  657. {0, 0}
  658. };
  659. struct encoding_replacement *erp;
  660. for (erp = er; erp->from_string != 0; erp++)
  661. {
  662. /* Avoid reading past end of input. */
  663. int width = strlen (erp->from_string);
  664. if (width > *from_left)
  665. continue;
  666. if (!strncmp (erp->from_string, *from, width))
  667. {
  668. text_buffer_add_string (&output_buf, erp->to_string,
  669. strlen(erp->to_string));
  670. *from += width;
  671. *from_left -= width;
  672. return 1;
  673. }
  674. }
  675. /* Failing this, just print a question mark. Maybe we should use SUB
  676. (^Z) (ASCII substitute character code) instead, or pass through the
  677. original bytes. */
  678. text_buffer_add_string (&output_buf, "?", 1);
  679. /* Ideally we would advance one UTF-8 character. This would
  680. require knowing its length in bytes. */
  681. (*from)++;
  682. (*from_left)--;
  683. return 0;
  684. }
  685. /* Convert N bytes from input to output encoding and write to
  686. output buffer. Return number of bytes over N written. */
  687. static int
  688. copy_converting (long n)
  689. {
  690. #if !HAVE_ICONV
  691. return 0;
  692. #else
  693. size_t bytes_left, orig_bytes_left;
  694. int extra_at_end;
  695. size_t iconv_ret;
  696. long output_start;
  697. size_t utf8_char_free;
  698. char utf8_char[4]; /* Maximum 4 bytes in a UTF-8 character */
  699. char *utf8_char_ptr, *orig_inptr;
  700. size_t i;
  701. /* Use n as an estimate of how many bytes will be required
  702. in target encoding. */
  703. text_buffer_alloc (&output_buf, (size_t) n);
  704. output_start = text_buffer_off (&output_buf);
  705. bytes_left = n;
  706. extra_at_end = 0;
  707. while (1)
  708. {
  709. iconv_ret = text_buffer_iconv (&output_buf, iconv_to_output,
  710. (ICONV_CONST char **)&inptr, &bytes_left);
  711. /* Make sure libiconv flushes out the last converted character.
  712. This is required when the conversion is stateful, in which
  713. case libiconv might not output the last character, waiting to
  714. see whether it should be combined with the next one. */
  715. if (iconv_ret != (size_t) -1
  716. && text_buffer_iconv (&output_buf, iconv_to_output,
  717. NULL, NULL) != (size_t) -1)
  718. /* Success: all of input converted. */
  719. break;
  720. /* There's been an error while converting. */
  721. switch (errno)
  722. {
  723. case E2BIG:
  724. /* Ran out of space in output buffer. Allocate more
  725. and try again. */
  726. text_buffer_alloc (&output_buf, n);
  727. continue;
  728. case EINVAL:
  729. /* Incomplete byte sequence at end of input buffer. Try to read
  730. more. */
  731. /* input_length - 2 is offset of last-but-one byte within input.
  732. This checks if there is at least one more byte within node
  733. contents. */
  734. if (inptr - input_start + (bytes_left - 1) <= input_length - 2)
  735. {
  736. bytes_left++;
  737. extra_at_end++;
  738. }
  739. else
  740. {
  741. copy_direct (bytes_left);
  742. bytes_left = 0;
  743. }
  744. continue;
  745. default: /* Unknown error */
  746. info_error (_("Error converting file character encoding."));
  747. /* Skip past current input and hope we don't get an
  748. error next time. */
  749. inptr += bytes_left;
  750. return 0;
  751. case EILSEQ:
  752. /* Byte sequence in input not recognized. Degrade to ASCII. */
  753. break;
  754. }
  755. /* Flush any waiting input in iconv_to_output and enter the
  756. default shift state. */
  757. text_buffer_iconv (&output_buf, iconv_to_output, NULL, NULL);
  758. if (file_is_in_utf8)
  759. {
  760. degrade_utf8 (&inptr, &bytes_left);
  761. continue;
  762. }
  763. /* If file is not in UTF-8, we degrade to ASCII in two steps:
  764. first convert the character to UTF-8, then look up a replacement
  765. string. Note that mixing iconv_to_output and iconv_to_utf8
  766. on the same input may not work well if the input encoding
  767. is stateful. We could deal with this by always converting to
  768. UTF-8 first; then we could mix conversions on the UTF-8 stream. */
  769. /* We want to read exactly one character. Do this by
  770. restricting size of output buffer. */
  771. utf8_char_ptr = utf8_char;
  772. orig_inptr = inptr;
  773. orig_bytes_left = bytes_left;
  774. for (i = 1; i <= 4; i++)
  775. {
  776. utf8_char_free = i;
  777. errno = 0;
  778. iconv_ret = iconv (iconv_to_utf8, (ICONV_CONST char **)&inptr,
  779. &bytes_left, &utf8_char_ptr, &utf8_char_free);
  780. if ((iconv_ret == (size_t) -1 && errno != E2BIG)
  781. /* If we managed to convert a character: */
  782. || utf8_char_ptr > utf8_char)
  783. break;
  784. }
  785. /* errno == E2BIG if iconv ran out of output buffer,
  786. which is expected. */
  787. if (iconv_ret == (size_t) -1 && errno != E2BIG)
  788. {
  789. /* Character is not recognized. Copy a single byte. */
  790. inptr = orig_inptr; /* iconv might have incremented inptr */
  791. copy_direct (1);
  792. bytes_left = orig_bytes_left - 1;
  793. }
  794. else
  795. {
  796. utf8_char_ptr = utf8_char;
  797. /* i is width of UTF-8 character */
  798. degrade_utf8 (&utf8_char_ptr, &i);
  799. /* If we are done, make sure iconv flushes the last character. */
  800. if (bytes_left <= 0)
  801. {
  802. utf8_char_ptr = utf8_char;
  803. i = 4;
  804. iconv (iconv_to_utf8, NULL, NULL,
  805. &utf8_char_ptr, &utf8_char_free);
  806. if (utf8_char_ptr > utf8_char)
  807. {
  808. utf8_char_ptr = utf8_char;
  809. degrade_utf8 (&utf8_char_ptr, &i);
  810. }
  811. }
  812. }
  813. }
  814. /* Must cast because the difference between unsigned size_t is always
  815. positive. */
  816. output_bytes_difference +=
  817. n - ((signed long) text_buffer_off (&output_buf) - output_start);
  818. return extra_at_end;
  819. #endif /* HAVE_ICONV */
  820. }
  821. /* Functions below are named from the perspective of the preprocess_nodes_p
  822. flag being on. */
  823. /* Copy text from input node contents, possibly converting the
  824. character encoding and adjusting anchor offsets at the same time. */
  825. static void
  826. copy_input_to_output (long n)
  827. {
  828. if (rewrite_p)
  829. {
  830. long bytes_left;
  831. bytes_left = n;
  832. while (bytes_left > 0)
  833. {
  834. if (!convert_encoding_p)
  835. {
  836. copy_direct (bytes_left);
  837. bytes_left = 0;
  838. }
  839. else
  840. {
  841. long bytes_to_convert;
  842. long extra_written;
  843. bytes_to_convert = bytes_left;
  844. if (anchor_to_adjust)
  845. {
  846. char *first_anchor = input_start
  847. + (*anchor_to_adjust)->nodestart - node_offset;
  848. /* If there is an anchor in the input: */
  849. if (first_anchor < inptr + bytes_left)
  850. {
  851. /* Convert enough to pass the first anchor in input. */
  852. bytes_to_convert = first_anchor - inptr + 1;
  853. /* Shouldn't happen because we should have already
  854. have adjusted this anchor. */
  855. if (bytes_to_convert < 0)
  856. {
  857. anchor_to_adjust = 0; /* Abandon anchor adjustment.*/
  858. bytes_to_convert = bytes_left;
  859. }
  860. }
  861. }
  862. /* copy_converting may read more than bytes_to_convert
  863. bytes if its input ends in an incomplete byte sequence. */
  864. extra_written = copy_converting (bytes_to_convert);
  865. bytes_left -= bytes_to_convert + extra_written;
  866. }
  867. /* Check if we have gone past any anchors and
  868. adjust with output_bytes_difference. */
  869. if (anchor_to_adjust)
  870. while ((*anchor_to_adjust)->nodestart - node_offset
  871. <= inptr - input_start)
  872. {
  873. (*anchor_to_adjust)->nodestart_adjusted
  874. = (*anchor_to_adjust)->nodestart - output_bytes_difference;
  875. anchor_to_adjust++;
  876. if (!*anchor_to_adjust
  877. || (*anchor_to_adjust)->cache.nodelen != 0)
  878. {
  879. anchor_to_adjust = 0;
  880. break;
  881. }
  882. }
  883. }
  884. }
  885. else
  886. inptr += n;
  887. }
  888. static void
  889. skip_input (long n)
  890. {
  891. if (preprocess_nodes_p)
  892. {
  893. inptr += n;
  894. output_bytes_difference += n;
  895. }
  896. else if (rewrite_p)
  897. {
  898. /* We are expanding tags only. Do not skip input. */
  899. copy_input_to_output (n);
  900. }
  901. else
  902. {
  903. inptr += n;
  904. }
  905. }
  906. static void
  907. write_extra_bytes_to_output (char *input, long n)
  908. {
  909. if (preprocess_nodes_p)
  910. {
  911. text_buffer_add_string (&output_buf, input, n);
  912. output_bytes_difference -= n;
  913. }
  914. }
  915. /* Like write_extra_bytes_to_output, but writes bytes even when
  916. preprocess_nodes=Off. */
  917. static void
  918. write_tag_contents (char *input, long n)
  919. {
  920. if (rewrite_p)
  921. {
  922. text_buffer_add_string (&output_buf, input, n);
  923. output_bytes_difference -= n;
  924. }
  925. }
  926. /* Like skip_input, but skip even when !preprocess_nodes_p. */
  927. static void
  928. skip_tag_contents (long n)
  929. {
  930. if (rewrite_p)
  931. {
  932. inptr += n;
  933. output_bytes_difference += n;
  934. }
  935. }
  936. /* Read first line of node and set next, prev and up. */
  937. static void
  938. parse_top_node_line (NODE *node)
  939. {
  940. char **store_in = 0;
  941. char *nodename;
  942. char *ptr;
  943. int value_length;
  944. /* If the first line is empty, leave it in. This is the case
  945. in the index-apropos window. */
  946. if (*node->contents == '\n')
  947. return;
  948. node->next = node->prev = node->up = 0;
  949. ptr = node->contents;
  950. while (1)
  951. {
  952. store_in = 0;
  953. ptr += skip_whitespace (ptr);
  954. /* Check what field we are looking at */
  955. if (!strncasecmp (ptr, INFO_FILE_LABEL, strlen(INFO_FILE_LABEL)))
  956. {
  957. ptr += strlen (INFO_FILE_LABEL);
  958. }
  959. else if (!strncasecmp (ptr, INFO_NODE_LABEL, strlen(INFO_NODE_LABEL)))
  960. {
  961. ptr += strlen (INFO_NODE_LABEL);
  962. }
  963. else if (!strncasecmp (ptr, INFO_PREV_LABEL, strlen(INFO_PREV_LABEL)))
  964. {
  965. ptr += strlen (INFO_PREV_LABEL);
  966. store_in = &node->prev;
  967. }
  968. else if (!strncasecmp (ptr, INFO_ALTPREV_LABEL,
  969. strlen(INFO_ALTPREV_LABEL)))
  970. {
  971. ptr += strlen (INFO_ALTPREV_LABEL);
  972. store_in = &node->prev;
  973. }
  974. else if (!strncasecmp (ptr, INFO_NEXT_LABEL, strlen(INFO_NEXT_LABEL)))
  975. {
  976. ptr += strlen (INFO_NEXT_LABEL);
  977. store_in = &node->next;
  978. }
  979. else if (!strncasecmp (ptr, INFO_UP_LABEL, strlen(INFO_UP_LABEL)))
  980. {
  981. ptr += strlen (INFO_UP_LABEL);
  982. store_in = &node->up;
  983. }
  984. else
  985. {
  986. store_in = 0;
  987. /* Not recognized - code below will skip to next comma */
  988. }
  989. ptr += skip_whitespace (ptr);
  990. /* Get length of a bracketed filename component. */
  991. if (*ptr != '(')
  992. value_length = 0;
  993. else
  994. value_length = read_bracketed_filename (ptr, 0);
  995. /* Get length of node name, or filename if following "File:". Note
  996. that . is not included in the second argument here in order to
  997. support this character in file names. */
  998. value_length += read_quoted_string (ptr + value_length,
  999. "\n\r\t,", 1, &nodename);
  1000. if (store_in)
  1001. {
  1002. *store_in = xmalloc (value_length + 1);
  1003. strncpy (*store_in, ptr, value_length);
  1004. (*store_in)[value_length] = '\0';
  1005. }
  1006. free (nodename);
  1007. ptr += value_length;
  1008. if (*ptr == '\n' || !*ptr)
  1009. break;
  1010. ptr += 1; /* Point after field terminator */
  1011. }
  1012. }
  1013. /* Output, replace or hide text introducing a reference. INPTR starts on
  1014. the first byte of a sequence introducing a reference and finishes on the
  1015. first (non-whitespace) byte of the reference label. */
  1016. static int
  1017. scan_reference_marker (REFERENCE *entry, int in_parentheses)
  1018. {
  1019. /* When preprocess_nodes is Off, we position the cursor on
  1020. the "*" when moving between references. */
  1021. if (!preprocess_nodes_p)
  1022. {
  1023. if (rewrite_p)
  1024. entry->start = text_buffer_off(&output_buf);
  1025. else
  1026. entry->start = inptr - input_start;
  1027. }
  1028. /* Check what we found based on first character of match */
  1029. if (inptr[0] == '\n')
  1030. {
  1031. entry->type = REFERENCE_MENU_ITEM;
  1032. if (!preprocess_nodes_p)
  1033. entry->start++;
  1034. }
  1035. else
  1036. entry->type = REFERENCE_XREF;
  1037. if (entry->type == REFERENCE_MENU_ITEM)
  1038. copy_input_to_output (strlen ("\n* "));
  1039. else
  1040. {
  1041. /* Only match "*Note" if it is followed by a whitespace character so that
  1042. it will not be recognized if, e.g., it is surrounded in inverted
  1043. commas. */
  1044. if (!strchr (" \t\r\n", inptr[strlen ("*Note")]))
  1045. {
  1046. copy_input_to_output (strlen ("*Note:"));
  1047. return 0;
  1048. }
  1049. /* Cross-references can be generated by four different Texinfo
  1050. commands. @inforef and @xref output "*Note " in Info format,
  1051. and "See" in HTML and print. @ref and @pxref output "*note "
  1052. in Info format, and either nothing at all or "see" in HTML
  1053. and print. Unfortunately, there is no easy way to distinguish
  1054. between these latter two cases. */
  1055. /* TODO: Internationalize these strings, but only if we know the
  1056. language of the document. */
  1057. if (inptr[1] == 'N')
  1058. {
  1059. write_extra_bytes_to_output ("See", 3);
  1060. in_parentheses = 1;
  1061. }
  1062. else if (in_parentheses)
  1063. {
  1064. write_extra_bytes_to_output ("see", 3);
  1065. /* Only output the "see" for input like "(*note ...)", which
  1066. would have come from a use of @pxref. We used to output "see" for
  1067. "*note" in more circumstances, with a list of words where to
  1068. suppress it (to avoid "see *note" turning into "see see"), but
  1069. such a list can't be complete or reliable. It's better to remove
  1070. it with more enthusiasm, then if the document writer wants a "see"
  1071. to appear, they can add one themselves. */
  1072. }
  1073. skip_input (strlen ("*Note"));
  1074. if (!in_parentheses)
  1075. skip_input (skip_whitespace (inptr));
  1076. }
  1077. /* Copy any white space before label. */
  1078. copy_input_to_output (skip_whitespace_and_newlines (inptr));
  1079. return 1;
  1080. }
  1081. /* Output reference label and update ENTRY. INPTR should be on the first
  1082. non-whitespace byte of label when this function is called. It is left
  1083. at the first character after the colon terminating the label. Return 0 if
  1084. invalid syntax is encountered. */
  1085. static int
  1086. scan_reference_label (REFERENCE *entry, int in_index)
  1087. {
  1088. char *dummy;
  1089. int max_lines;
  1090. int len, label_len = 0;
  1091. /* Handle case of cross-reference like (FILE)^?NODE^?::. */
  1092. if (inptr[0] == '(')
  1093. label_len = read_bracketed_filename (inptr, 0);
  1094. /* Search forward to ":" to get label name. Cross-references may have
  1095. a newline in the middle. */
  1096. if (entry->type == REFERENCE_MENU_ITEM)
  1097. max_lines = 1;
  1098. else
  1099. max_lines = 2;
  1100. if (!in_index || inptr[label_len] == '\177')
  1101. {
  1102. len = read_quoted_string (inptr + label_len, ":", max_lines, &dummy);
  1103. free (dummy);
  1104. if (!len)
  1105. return 0; /* Input invalid. */
  1106. label_len += len;
  1107. }
  1108. else
  1109. {
  1110. /* If in an index node, go forward to the last colon on the line
  1111. (not preceded by a newline, NUL or DEL). This is in order to
  1112. support index entries containing colons. This should work fine
  1113. as long as the node name does not contain a colon as well. */
  1114. char *p;
  1115. int n, m = 0;
  1116. p = inptr + label_len;
  1117. while (1)
  1118. {
  1119. n = strcspn (p, ":\n\177");
  1120. if (p[n] == ':')
  1121. {
  1122. m += n + 1;
  1123. p += n + 1;
  1124. continue;
  1125. }
  1126. break;
  1127. }
  1128. if (m == 0)
  1129. return 0; /* no : found */
  1130. label_len += m - 1;
  1131. }
  1132. entry->label = xmalloc (label_len + 1);
  1133. memcpy (entry->label, inptr, label_len);
  1134. entry->label[label_len] = '\0';
  1135. canonicalize_whitespace (entry->label);
  1136. if (preprocess_nodes_p)
  1137. entry->start = text_buffer_off (&output_buf);
  1138. /* Write text of label. */
  1139. copy_input_to_output (label_len);
  1140. if (rewrite_p)
  1141. entry->end = text_buffer_off (&output_buf);
  1142. else
  1143. entry->end = inptr - input_start;
  1144. /* Colon after label. */
  1145. skip_input (1);
  1146. /* Don't mess up the margin of a menu description. */
  1147. if (entry->type == REFERENCE_MENU_ITEM)
  1148. write_extra_bytes_to_output (" ", 1);
  1149. return 1;
  1150. }
  1151. /* INPTR should be at the first character after the colon
  1152. terminating the label. Return 0 on syntax error. */
  1153. static int
  1154. scan_reference_target (REFERENCE *entry, NODE *node, int in_parentheses)
  1155. {
  1156. int i;
  1157. int label_len;
  1158. /* If this reference entry continues with another ':' then the target
  1159. of the reference is given by the label. */
  1160. if (*inptr == ':')
  1161. info_parse_node (entry->label);
  1162. label_len = strlen (entry->label);
  1163. if (label_len >= 2 && entry->label[label_len - 1] == 0177)
  1164. {
  1165. /* Remove the DEL bytes. We don't do this until after calling
  1166. info_parse_node so that ^?(FOO)BAR^?:: refers to a node called
  1167. "(FOO)BAR" within the current manual. */
  1168. char *p = strchr (entry->label, '\177');
  1169. memmove (p, p + 1, label_len - (p - entry->label) - 1);
  1170. entry->label[label_len - 2] = '\0';
  1171. }
  1172. if (*inptr == ':')
  1173. {
  1174. skip_input (1);
  1175. if (entry->type == REFERENCE_MENU_ITEM)
  1176. write_extra_bytes_to_output (" ", 1);
  1177. if (info_parsed_filename)
  1178. entry->filename = xstrdup (info_parsed_filename);
  1179. if (info_parsed_nodename)
  1180. entry->nodename = xstrdup (info_parsed_nodename);
  1181. return 1;
  1182. }
  1183. /* This entry continues with a specific target. Parse the
  1184. file name and node name from the specification. */
  1185. if (entry->type == REFERENCE_XREF)
  1186. {
  1187. int length = 0; /* Length of specification */
  1188. char *target_start = inptr;
  1189. char *nl_off = 0;
  1190. int space_at_start_of_line = 0;
  1191. length += skip_whitespace_and_newlines (inptr);
  1192. length += read_bracketed_filename (inptr + length, &entry->filename);
  1193. length += skip_whitespace_and_newlines (inptr + length);
  1194. /* Get the node name. */
  1195. length += read_quoted_string (inptr + length, ",.", 2, &entry->nodename);
  1196. skip_input (length);
  1197. /* Check if there is a newline in the target. */
  1198. nl_off = strchr (target_start, '\n');
  1199. if (nl_off)
  1200. {
  1201. if (nl_off < inptr)
  1202. space_at_start_of_line = skip_whitespace (nl_off + 1);
  1203. else
  1204. nl_off = 0;
  1205. }
  1206. canonicalize_whitespace (entry->nodename);
  1207. if (entry->filename)
  1208. {
  1209. /* Heuristic of whether it's worth outputing a newline before the
  1210. filename. This checks whether the newline appears more
  1211. than half way through the text, and therefore which side is
  1212. longer. */
  1213. if (nl_off
  1214. && nl_off < target_start + (length - space_at_start_of_line) / 2)
  1215. {
  1216. int i;
  1217. write_extra_bytes_to_output ("\n", 1);
  1218. for (i = 0; i < space_at_start_of_line; i++)
  1219. write_extra_bytes_to_output (" ", 1);
  1220. skip_input (strspn (inptr, " "));
  1221. nl_off = 0;
  1222. }
  1223. else
  1224. if (*inptr != '\n')
  1225. {
  1226. write_extra_bytes_to_output (" ", 1);
  1227. }
  1228. write_extra_bytes_to_output ("(", 1);
  1229. write_extra_bytes_to_output (entry->filename,
  1230. strlen (entry->filename));
  1231. write_extra_bytes_to_output (" manual)",
  1232. strlen (" manual)"));
  1233. }
  1234. /* Hide terminating punctuation if we are in a reference
  1235. like "(*note Label:(file)node.)". */
  1236. if (in_parentheses && inptr[0] == '.')
  1237. skip_input (1);
  1238. /* Copy any terminating punctuation before the optional newline. */
  1239. copy_input_to_output (strspn (inptr, ".),"));
  1240. /* Output a newline if one is needed. Don't do it at the end of
  1241. a paragraph. */
  1242. if (nl_off && *inptr != '\n')
  1243. {
  1244. int i;
  1245. write_extra_bytes_to_output ("\n", 1);
  1246. for (i = 0; i < space_at_start_of_line; i++)
  1247. write_extra_bytes_to_output (" ", 1);
  1248. skip_input (strspn (inptr, " "));
  1249. }
  1250. }
  1251. else /* entry->type == REFERENCE_MENU_ITEM */
  1252. {
  1253. int line_len;
  1254. int length = 0; /* Length of specification */
  1255. length = skip_whitespace (inptr);
  1256. length += read_bracketed_filename (inptr + length, &entry->filename);
  1257. length += strspn (inptr + length, " ");
  1258. /* Get the node name. */
  1259. length += read_quoted_string (inptr + length, ",.\t\n", 2,
  1260. &entry->nodename);
  1261. if (inptr[length] == '.') /* A '.' terminating the entry. */
  1262. length++;
  1263. if (node->flags & N_IsDir)
  1264. {
  1265. /* Set line_len to length of line so far. */
  1266. char *linestart;
  1267. linestart = memrchr (input_start, '\n', inptr - input_start);
  1268. if (!linestart)
  1269. linestart = input_start;
  1270. else
  1271. linestart++; /* Point to first character after newline. */
  1272. line_len = inptr - linestart;
  1273. }
  1274. if (node->flags & N_IsIndex)
  1275. /* Show the name of the node the index entry refers to. */
  1276. copy_input_to_output (length);
  1277. else
  1278. {
  1279. skip_input (length);
  1280. if ((node->flags & N_IsDir) && inptr[strspn (inptr, " ")] == '\n')
  1281. {
  1282. /* For a dir node, if there is no more text in this line,
  1283. check if there is a menu entry description in the next
  1284. line to the right of the end of the label, and display it
  1285. in this line. */
  1286. skip_input (strspn (inptr, " "));
  1287. if (line_len <= strspn (inptr + 1, " "))
  1288. skip_input (1 + line_len);
  1289. }
  1290. else
  1291. {
  1292. for (i = 0; i < length; i++)
  1293. write_extra_bytes_to_output (" ", 1);
  1294. }
  1295. }
  1296. /* Parse "(line ...)" part of menus, if any. */
  1297. {
  1298. char *lineptr = inptr;
  1299. /* Skip any whitespace first, and then a newline in case the item
  1300. was so long to contain the ``(line ...)'' string in the same
  1301. physical line. */
  1302. lineptr += skip_whitespace (inptr);
  1303. if (*lineptr == '\n')
  1304. lineptr += 1 + skip_whitespace (lineptr + 1);
  1305. if (!strncmp (lineptr, "(line ", strlen ("(line ")))
  1306. {
  1307. lineptr += strlen ("(line ");
  1308. entry->line_number = strtol (lineptr, 0, 0);
  1309. }
  1310. else
  1311. entry->line_number = 0;
  1312. }
  1313. }
  1314. return 1;
  1315. }
  1316. /* BASE is earlier in a block of allocated memory than PTR, and the block
  1317. extends until at least BASE + LEN - 1. Return PTR[INDEX], unless this
  1318. could be outside the allocated block, in which case return 0. */
  1319. static char
  1320. safe_string_index (char *ptr, long index, char *base, long len)
  1321. {
  1322. long offset = ptr - base;
  1323. if ( offset + index < 0
  1324. || offset + index >= len)
  1325. return 0;
  1326. return ptr[index];
  1327. }
  1328. /* Process an in index marker ("^@^H[index^@^H]") or an image marker
  1329. ("^@^H[image ...^@^H]"). */
  1330. static void
  1331. scan_info_tag (NODE *node, int *in_index, FILE_BUFFER *fb)
  1332. {
  1333. char *p, *p1;
  1334. struct text_buffer *expansion = xmalloc (sizeof (struct text_buffer));
  1335. p = inptr;
  1336. p1 = p;
  1337. text_buffer_init (expansion);
  1338. if (tag_expand (&p1, input_start + input_length, expansion, in_index))
  1339. {
  1340. if (*in_index)
  1341. node->flags |= N_IsIndex;
  1342. if (!rewrite_p)
  1343. {
  1344. rewrite_p = 1;
  1345. init_output_stream (fb);
  1346. /* Put inptr back to start so that
  1347. copy_input_to_output below gets all
  1348. preceding contents. */
  1349. inptr = node->contents;
  1350. }
  1351. /* Write out up to tag. */
  1352. copy_input_to_output (p - inptr);
  1353. write_tag_contents (text_buffer_base (expansion),
  1354. text_buffer_off (expansion));
  1355. /* Skip past body of tag. */
  1356. skip_tag_contents (p1 - inptr);
  1357. }
  1358. else
  1359. {
  1360. /* It was not a valid tag. */
  1361. copy_input_to_output (p - inptr + 1);
  1362. }
  1363. text_buffer_free (expansion);
  1364. free (expansion);
  1365. }
  1366. #define looking_at_string(contents, string) \
  1367. (!strncasecmp (contents, string, strlen (string)))
  1368. static char *
  1369. forward_to_info_syntax (char *contents)
  1370. {
  1371. /* Loop until just before the end of the input. The '- 3' prevents us
  1372. accessing memory after the end of the input, and none of the strings we
  1373. are looking for are shorter than 3 bytes. */
  1374. while (contents < input_start + input_length - 3)
  1375. {
  1376. /* Menu entry comes first to optimize for the case of looking through a
  1377. long index node. */
  1378. if (looking_at_string (contents, INFO_MENU_ENTRY_LABEL)
  1379. || looking_at_string (contents, INFO_XREF_LABEL)
  1380. || !memcmp (contents, "\0\b[", 3))
  1381. return contents;
  1382. contents++;
  1383. }
  1384. return 0;
  1385. }
  1386. /* Scan contents of NODE, recording cross-references and similar.
  1387. Convert character encoding of node contents to that of the user if the two
  1388. are known to be different. If PREPROCESS_NODES_P == 1, remove Info syntax
  1389. in contents.
  1390. If FB is non-null, it is the file containing the node, and TAG_PTR is an
  1391. offset into FB->tags. If the node contents are rewritten, adjust anchors
  1392. that occur in the node and store adjusted value as TAG->nodestart_adjusted,
  1393. otherwise simply copy TAG->nodestart to TAG->nodestart_adjusted for each
  1394. anchor in the node. */
  1395. void
  1396. scan_node_contents (NODE *node, FILE_BUFFER *fb, TAG **tag_ptr)
  1397. {
  1398. int in_menu = 0;
  1399. char *match;
  1400. REFERENCE **refs = NULL;
  1401. size_t refs_index = 0, refs_slots = 0;
  1402. /* Whether an index tag was seen. */
  1403. int in_index = 0;
  1404. rewrite_p = preprocess_nodes_p;
  1405. init_output_stream (fb);
  1406. if (fb)
  1407. {
  1408. char *file_contents;
  1409. /* Set anchor_to_adjust to first anchor in node, if any. */
  1410. anchor_to_adjust = tag_ptr + 1;
  1411. if (!*anchor_to_adjust)
  1412. anchor_to_adjust = 0;
  1413. else if (*anchor_to_adjust
  1414. && (*anchor_to_adjust)->cache.nodelen != 0)
  1415. anchor_to_adjust = 0;
  1416. if (!node->subfile)
  1417. file_contents = fb->contents;
  1418. else
  1419. {
  1420. FILE_BUFFER *f = info_find_subfile (node->subfile);
  1421. if (!f)
  1422. return; /* This shouldn't happen. */
  1423. file_contents = f->contents;
  1424. }
  1425. node_offset = (*tag_ptr)->nodestart
  1426. + skip_node_separator (file_contents + (*tag_ptr)->nodestart);
  1427. }
  1428. else
  1429. anchor_to_adjust = 0;
  1430. /* Initialize refs to point to array of one null pointer in case
  1431. there are no results. This way we know if refs has been initialized
  1432. even if it is empty. */
  1433. refs = calloc (1, sizeof *refs);
  1434. refs_slots = 1;
  1435. parse_top_node_line (node);
  1436. /* This should be the only time we assign to inptr in this function -
  1437. all other assignment should be done with the helper functions above. */
  1438. inptr = node->contents;
  1439. input_start = node->contents;
  1440. input_length = node->nodelen;
  1441. while ((match = forward_to_info_syntax (inptr))
  1442. && match < node->contents + node->nodelen)
  1443. {
  1444. int in_parentheses = 0;
  1445. REFERENCE *entry;
  1446. /* Write out up to match */
  1447. copy_input_to_output (match - inptr);
  1448. if ((in_menu && match[0] == '\n') || match[0] == '*')
  1449. {
  1450. /* Menu entry or cross reference. */
  1451. /* Create REFERENCE entity. */
  1452. entry = info_new_reference (0, 0);
  1453. if (safe_string_index (inptr, -1, input_start, input_length) == '('
  1454. && safe_string_index (inptr, 1, input_start, input_length) == 'n')
  1455. in_parentheses = 1;
  1456. save_conversion_state ();
  1457. if (!scan_reference_marker (entry, in_parentheses)
  1458. || !scan_reference_label (entry, in_index)
  1459. || !scan_reference_target (entry, node, in_parentheses))
  1460. {
  1461. /* This is not a menu entry or reference. Do not add to our
  1462. list. */
  1463. char *cur_inptr = inptr;
  1464. reset_conversion ();
  1465. copy_input_to_output (cur_inptr - inptr);
  1466. info_reference_free (entry);
  1467. continue;
  1468. }
  1469. add_pointer_to_array (entry, refs_index, refs, refs_slots, 50);
  1470. }
  1471. /* Was "* Menu:" seen? If so, search for menu entries hereafter. */
  1472. else if (!in_menu && !strncmp (match, INFO_MENU_LABEL,
  1473. strlen (INFO_MENU_LABEL)))
  1474. {
  1475. in_menu = 1;
  1476. skip_input (strlen ("\n* Menu:"));
  1477. if (*inptr == '\n')
  1478. skip_input (strspn (inptr, "\n") - 1); /* Keep one newline. */
  1479. }
  1480. else if (match[0] == '\0') /* Info tag */
  1481. {
  1482. scan_info_tag (node, &in_index, fb);
  1483. }
  1484. else
  1485. copy_input_to_output (1);
  1486. }
  1487. /* If we haven't accidentally gone past the end of the node, write
  1488. out the rest of it. */
  1489. if (inptr < node->contents + node->nodelen)
  1490. copy_input_to_output ((node->contents + node->nodelen) - inptr);
  1491. /* Null to terminate buffer. */
  1492. if (rewrite_p)
  1493. text_buffer_add_string (&output_buf, "\0", 1);
  1494. /* Free resources used in character encoding conversion. */
  1495. close_conversion ();
  1496. node->references = refs;
  1497. if (rewrite_p)
  1498. {
  1499. node->contents = text_buffer_base (&output_buf);
  1500. node->flags |= N_WasRewritten;
  1501. /* output_buf.off is the offset of the next character to be
  1502. written. Subtracting 1 gives the offset of our terminating
  1503. null, that is, the length. */
  1504. node->nodelen = text_buffer_off (&output_buf) - 1;
  1505. }
  1506. else if (fb && tag_ptr)
  1507. {
  1508. /* Set nodestart_adjusted for all of the anchors in this node. */
  1509. tag_ptr++;
  1510. while (*tag_ptr && (*tag_ptr)->cache.nodelen == 0)
  1511. {
  1512. (*tag_ptr)->nodestart_adjusted = (*tag_ptr)->nodestart
  1513. - output_bytes_difference;
  1514. tag_ptr++;
  1515. }
  1516. }
  1517. }
  1518. /* Various utility functions */
  1519. /* Return the file buffer which belongs to WINDOW's node. */
  1520. FILE_BUFFER *
  1521. file_buffer_of_window (WINDOW *window)
  1522. {
  1523. /* If this window has no node, then it has no file buffer. */
  1524. if (!window->node)
  1525. return NULL;
  1526. if (window->node->fullpath)
  1527. return info_find_file (window->node->fullpath);
  1528. return NULL;
  1529. }
  1530. /* Return "(FILENAME)NODENAME" for NODE, or just "NODENAME" if NODE's
  1531. filename is not set. Return value should not be freed. */
  1532. char *
  1533. node_printed_rep (NODE *node)
  1534. {
  1535. static char *rep;
  1536. if (node->fullpath)
  1537. {
  1538. char *filename = filename_non_directory (node->fullpath);
  1539. rep = xrealloc (rep, 1 + strlen (filename) + 1 + strlen (node->nodename) + 1);
  1540. sprintf (rep, "(%s)%s", filename, node->nodename);
  1541. return rep;
  1542. }
  1543. else
  1544. return node->nodename;
  1545. }
  1546. /* Return a pointer to the part of PATHNAME that simply defines the file. */
  1547. char *
  1548. filename_non_directory (char *pathname)
  1549. {
  1550. register char *filename = pathname + strlen (pathname);
  1551. if (HAVE_DRIVE (pathname))
  1552. pathname += 2;
  1553. while (filename > pathname && !IS_SLASH (filename[-1]))
  1554. filename--;
  1555. return filename;
  1556. }
  1557. /* Return non-zero if NODE is one especially created by Info. */
  1558. int
  1559. internal_info_node_p (NODE *node)
  1560. {
  1561. return (node != NULL) && (node->flags & N_IsInternal);
  1562. }
  1563. /* Make NODE appear to be one especially created by Info. */
  1564. void
  1565. name_internal_node (NODE *node, char *name)
  1566. {
  1567. if (!node)
  1568. return;
  1569. node->fullpath = "";
  1570. node->subfile = 0;
  1571. node->nodename = name;
  1572. node->flags |= N_IsInternal;
  1573. }
  1574. /* Return the window displaying NAME, the name of an internally created
  1575. Info window. */
  1576. WINDOW *
  1577. get_internal_info_window (char *name)
  1578. {
  1579. WINDOW *win;
  1580. for (win = windows; win; win = win->next)
  1581. if (internal_info_node_p (win->node) &&
  1582. (strcmp (win->node->nodename, name) == 0))
  1583. break;
  1584. return win;
  1585. }
  1586. /* Flexible Text Buffer */
  1587. void
  1588. text_buffer_init (struct text_buffer *buf)
  1589. {
  1590. memset (buf, 0, sizeof *buf);
  1591. }
  1592. void
  1593. text_buffer_free (struct text_buffer *buf)
  1594. {
  1595. free (buf->base);
  1596. }
  1597. size_t
  1598. text_buffer_vprintf (struct text_buffer *buf, const char *format, va_list ap)
  1599. {
  1600. ssize_t n;
  1601. va_list ap_copy;
  1602. if (!buf->base)
  1603. {
  1604. if (buf->size == 0)
  1605. buf->size = MIN_TEXT_BUF_ALLOC; /* Initial allocation */
  1606. buf->base = xmalloc (buf->size);
  1607. }
  1608. for (;;)
  1609. {
  1610. va_copy (ap_copy, ap);
  1611. n = vsnprintf (buf->base + buf->off, buf->size - buf->off,
  1612. format, ap_copy);
  1613. va_end (ap_copy);
  1614. if (n < 0 || buf->off + n >= buf->size ||
  1615. !memchr (buf->base + buf->off, '\0', buf->size - buf->off + 1))
  1616. {
  1617. size_t newlen = buf->size * 2;
  1618. if (newlen < buf->size)
  1619. xalloc_die ();
  1620. buf->size = newlen;
  1621. buf->base = xrealloc (buf->base, buf->size);
  1622. }
  1623. else
  1624. {
  1625. buf->off += n;
  1626. break;
  1627. }
  1628. }
  1629. return n;
  1630. }
  1631. /* Make sure there are LEN free bytes at end of BUF. */
  1632. void
  1633. text_buffer_alloc (struct text_buffer *buf, size_t len)
  1634. {
  1635. if (buf->off + len > buf->size)
  1636. {
  1637. buf->size = buf->off + len;
  1638. if (buf->size < MIN_TEXT_BUF_ALLOC)
  1639. buf->size = MIN_TEXT_BUF_ALLOC;
  1640. buf->base = xrealloc (buf->base, buf->size);
  1641. }
  1642. }
  1643. /* Return number of bytes that can be written to text buffer without
  1644. reallocating the text buffer. */
  1645. size_t
  1646. text_buffer_space_left (struct text_buffer *buf)
  1647. {
  1648. /* buf->size is the offset of the first byte after the allocated space.
  1649. buf->off is the offset of the first byte to be written to. */
  1650. return buf->size - buf->off;
  1651. }
  1652. #if HAVE_ICONV
  1653. /* Run iconv using text buffer as output buffer. */
  1654. size_t
  1655. text_buffer_iconv (struct text_buffer *buf, iconv_t iconv_state,
  1656. ICONV_CONST char **inbuf, size_t *inbytesleft)
  1657. {
  1658. size_t out_bytes_left;
  1659. char *outptr;
  1660. size_t iconv_ret;
  1661. outptr = text_buffer_base (buf) + text_buffer_off (buf);
  1662. out_bytes_left = text_buffer_space_left (buf);
  1663. iconv_ret = iconv (iconv_state, inbuf, inbytesleft,
  1664. &outptr, &out_bytes_left);
  1665. text_buffer_off (buf) = outptr - text_buffer_base (buf);
  1666. return iconv_ret;
  1667. }
  1668. #endif /* HAVE_ICONV */
  1669. size_t
  1670. text_buffer_add_string (struct text_buffer *buf, const char *str, size_t len)
  1671. {
  1672. text_buffer_alloc (buf, len);
  1673. memcpy (buf->base + buf->off, str, len);
  1674. buf->off += len;
  1675. return len;
  1676. }
  1677. size_t
  1678. text_buffer_fill (struct text_buffer *buf, int c, size_t len)
  1679. {
  1680. char *p;
  1681. int i;
  1682. text_buffer_alloc (buf, len);
  1683. for (i = 0, p = buf->base + buf->off; i < len; i++)
  1684. *p++ = c;
  1685. buf->off += len;
  1686. return len;
  1687. }
  1688. void
  1689. text_buffer_add_char (struct text_buffer *buf, int c)
  1690. {
  1691. char ch = c;
  1692. text_buffer_add_string (buf, &ch, 1);
  1693. }
  1694. size_t
  1695. text_buffer_printf (struct text_buffer *buf, const char *format, ...)
  1696. {
  1697. va_list ap;
  1698. size_t n;
  1699. va_start (ap, format);
  1700. n = text_buffer_vprintf (buf, format, ap);
  1701. va_end (ap);
  1702. return n;
  1703. }
  1704. #if defined(__MSDOS__) || defined(__MINGW32__)
  1705. /* Cannot use FILENAME_CMP here, since that does not consider forward-
  1706. and back-slash characters equal. */
  1707. int
  1708. fncmp (const char *fn1, const char *fn2)
  1709. {
  1710. const char *s1 = fn1, *s2 = fn2;
  1711. while (tolower (*s1) == tolower (*s2)
  1712. || (IS_SLASH (*s1) && IS_SLASH (*s2)))
  1713. {
  1714. if (*s1 == 0)
  1715. return 0;
  1716. s1++;
  1717. s2++;
  1718. }
  1719. return tolower (*s1) - tolower (*s2);
  1720. }
  1721. #endif
  1722. struct info_namelist_entry
  1723. {
  1724. struct info_namelist_entry *next;
  1725. char name[1];
  1726. };
  1727. int
  1728. info_namelist_add (struct info_namelist_entry **ptop, const char *name)
  1729. {
  1730. struct info_namelist_entry *p;
  1731. for (p = *ptop; p; p = p->next)
  1732. if (fncmp (p->name, name) == 0)
  1733. return 1;
  1734. p = xmalloc (sizeof (*p) + strlen (name));
  1735. strcpy (p->name, name);
  1736. p->next = *ptop;
  1737. *ptop = p;
  1738. return 0;
  1739. }
  1740. void
  1741. info_namelist_free (struct info_namelist_entry *top)
  1742. {
  1743. while (top)
  1744. {
  1745. struct info_namelist_entry *next = top->next;
  1746. free (top);
  1747. top = next;
  1748. }
  1749. }