entities.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. /*
  2. * entities.c : implementation for the XML entities handling
  3. *
  4. * See Copyright for the status of this software.
  5. *
  6. * daniel@veillard.com
  7. */
  8. /* To avoid EBCDIC trouble when parsing on zOS */
  9. #if defined(__MVS__)
  10. #pragma convert("ISO8859-1")
  11. #endif
  12. #define IN_LIBXML
  13. #include "libxml.h"
  14. #include <string.h>
  15. #ifdef HAVE_STDLIB_H
  16. #include <stdlib.h>
  17. #endif
  18. #include <libxml/xmlmemory.h>
  19. #include <libxml/hash.h>
  20. #include <libxml/entities.h>
  21. #include <libxml/parser.h>
  22. #include <libxml/parserInternals.h>
  23. #include <libxml/xmlerror.h>
  24. #include <libxml/globals.h>
  25. #include <libxml/dict.h>
  26. #include "save.h"
  27. /*
  28. * The XML predefined entities.
  29. */
  30. static xmlEntity xmlEntityLt = {
  31. NULL, XML_ENTITY_DECL, BAD_CAST "lt",
  32. NULL, NULL, NULL, NULL, NULL, NULL,
  33. BAD_CAST "<", BAD_CAST "<", 1,
  34. XML_INTERNAL_PREDEFINED_ENTITY,
  35. NULL, NULL, NULL, NULL, 0, 1
  36. };
  37. static xmlEntity xmlEntityGt = {
  38. NULL, XML_ENTITY_DECL, BAD_CAST "gt",
  39. NULL, NULL, NULL, NULL, NULL, NULL,
  40. BAD_CAST ">", BAD_CAST ">", 1,
  41. XML_INTERNAL_PREDEFINED_ENTITY,
  42. NULL, NULL, NULL, NULL, 0, 1
  43. };
  44. static xmlEntity xmlEntityAmp = {
  45. NULL, XML_ENTITY_DECL, BAD_CAST "amp",
  46. NULL, NULL, NULL, NULL, NULL, NULL,
  47. BAD_CAST "&", BAD_CAST "&", 1,
  48. XML_INTERNAL_PREDEFINED_ENTITY,
  49. NULL, NULL, NULL, NULL, 0, 1
  50. };
  51. static xmlEntity xmlEntityQuot = {
  52. NULL, XML_ENTITY_DECL, BAD_CAST "quot",
  53. NULL, NULL, NULL, NULL, NULL, NULL,
  54. BAD_CAST "\"", BAD_CAST "\"", 1,
  55. XML_INTERNAL_PREDEFINED_ENTITY,
  56. NULL, NULL, NULL, NULL, 0, 1
  57. };
  58. static xmlEntity xmlEntityApos = {
  59. NULL, XML_ENTITY_DECL, BAD_CAST "apos",
  60. NULL, NULL, NULL, NULL, NULL, NULL,
  61. BAD_CAST "'", BAD_CAST "'", 1,
  62. XML_INTERNAL_PREDEFINED_ENTITY,
  63. NULL, NULL, NULL, NULL, 0, 1
  64. };
  65. /**
  66. * xmlEntitiesErrMemory:
  67. * @extra: extra information
  68. *
  69. * Handle an out of memory condition
  70. */
  71. static void
  72. xmlEntitiesErrMemory(const char *extra)
  73. {
  74. __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  75. }
  76. /**
  77. * xmlEntitiesErr:
  78. * @code: the error code
  79. * @msg: the message
  80. *
  81. * Handle an out of memory condition
  82. */
  83. static void LIBXML_ATTR_FORMAT(2,0)
  84. xmlEntitiesErr(xmlParserErrors code, const char *msg)
  85. {
  86. __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
  87. }
  88. /*
  89. * xmlFreeEntity : clean-up an entity record.
  90. */
  91. static void
  92. xmlFreeEntity(xmlEntityPtr entity)
  93. {
  94. xmlDictPtr dict = NULL;
  95. if (entity == NULL)
  96. return;
  97. if (entity->doc != NULL)
  98. dict = entity->doc->dict;
  99. if ((entity->children) && (entity->owner == 1) &&
  100. (entity == (xmlEntityPtr) entity->children->parent))
  101. xmlFreeNodeList(entity->children);
  102. if (dict != NULL) {
  103. if ((entity->name != NULL) && (!xmlDictOwns(dict, entity->name)))
  104. xmlFree((char *) entity->name);
  105. if ((entity->ExternalID != NULL) &&
  106. (!xmlDictOwns(dict, entity->ExternalID)))
  107. xmlFree((char *) entity->ExternalID);
  108. if ((entity->SystemID != NULL) &&
  109. (!xmlDictOwns(dict, entity->SystemID)))
  110. xmlFree((char *) entity->SystemID);
  111. if ((entity->URI != NULL) && (!xmlDictOwns(dict, entity->URI)))
  112. xmlFree((char *) entity->URI);
  113. if ((entity->content != NULL)
  114. && (!xmlDictOwns(dict, entity->content)))
  115. xmlFree((char *) entity->content);
  116. if ((entity->orig != NULL) && (!xmlDictOwns(dict, entity->orig)))
  117. xmlFree((char *) entity->orig);
  118. } else {
  119. if (entity->name != NULL)
  120. xmlFree((char *) entity->name);
  121. if (entity->ExternalID != NULL)
  122. xmlFree((char *) entity->ExternalID);
  123. if (entity->SystemID != NULL)
  124. xmlFree((char *) entity->SystemID);
  125. if (entity->URI != NULL)
  126. xmlFree((char *) entity->URI);
  127. if (entity->content != NULL)
  128. xmlFree((char *) entity->content);
  129. if (entity->orig != NULL)
  130. xmlFree((char *) entity->orig);
  131. }
  132. xmlFree(entity);
  133. }
  134. /*
  135. * xmlCreateEntity:
  136. *
  137. * internal routine doing the entity node structures allocations
  138. */
  139. static xmlEntityPtr
  140. xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
  141. const xmlChar *ExternalID, const xmlChar *SystemID,
  142. const xmlChar *content) {
  143. xmlEntityPtr ret;
  144. ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
  145. if (ret == NULL) {
  146. xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
  147. return(NULL);
  148. }
  149. memset(ret, 0, sizeof(xmlEntity));
  150. ret->type = XML_ENTITY_DECL;
  151. ret->checked = 0;
  152. /*
  153. * fill the structure.
  154. */
  155. ret->etype = (xmlEntityType) type;
  156. if (dict == NULL) {
  157. ret->name = xmlStrdup(name);
  158. if (ExternalID != NULL)
  159. ret->ExternalID = xmlStrdup(ExternalID);
  160. if (SystemID != NULL)
  161. ret->SystemID = xmlStrdup(SystemID);
  162. } else {
  163. ret->name = xmlDictLookup(dict, name, -1);
  164. if (ExternalID != NULL)
  165. ret->ExternalID = xmlDictLookup(dict, ExternalID, -1);
  166. if (SystemID != NULL)
  167. ret->SystemID = xmlDictLookup(dict, SystemID, -1);
  168. }
  169. if (content != NULL) {
  170. ret->length = xmlStrlen(content);
  171. if ((dict != NULL) && (ret->length < 5))
  172. ret->content = (xmlChar *)
  173. xmlDictLookup(dict, content, ret->length);
  174. else
  175. ret->content = xmlStrndup(content, ret->length);
  176. } else {
  177. ret->length = 0;
  178. ret->content = NULL;
  179. }
  180. ret->URI = NULL; /* to be computed by the layer knowing
  181. the defining entity */
  182. ret->orig = NULL;
  183. ret->owner = 0;
  184. return(ret);
  185. }
  186. /*
  187. * xmlAddEntity : register a new entity for an entities table.
  188. */
  189. static xmlEntityPtr
  190. xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
  191. const xmlChar *ExternalID, const xmlChar *SystemID,
  192. const xmlChar *content) {
  193. xmlDictPtr dict = NULL;
  194. xmlEntitiesTablePtr table = NULL;
  195. xmlEntityPtr ret, predef;
  196. if (name == NULL)
  197. return(NULL);
  198. if (dtd == NULL)
  199. return(NULL);
  200. if (dtd->doc != NULL)
  201. dict = dtd->doc->dict;
  202. switch (type) {
  203. case XML_INTERNAL_GENERAL_ENTITY:
  204. case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
  205. case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
  206. predef = xmlGetPredefinedEntity(name);
  207. if (predef != NULL) {
  208. int valid = 0;
  209. /* 4.6 Predefined Entities */
  210. if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
  211. (content != NULL)) {
  212. int c = predef->content[0];
  213. if (((content[0] == c) && (content[1] == 0)) &&
  214. ((c == '>') || (c == '\'') || (c == '"'))) {
  215. valid = 1;
  216. } else if ((content[0] == '&') && (content[1] == '#')) {
  217. if (content[2] == 'x') {
  218. xmlChar *hex = BAD_CAST "0123456789ABCDEF";
  219. xmlChar ref[] = "00;";
  220. ref[0] = hex[c / 16 % 16];
  221. ref[1] = hex[c % 16];
  222. if (xmlStrcasecmp(&content[3], ref) == 0)
  223. valid = 1;
  224. } else {
  225. xmlChar ref[] = "00;";
  226. ref[0] = '0' + c / 10 % 10;
  227. ref[1] = '0' + c % 10;
  228. if (xmlStrEqual(&content[2], ref))
  229. valid = 1;
  230. }
  231. }
  232. }
  233. if (!valid) {
  234. xmlEntitiesErr(XML_ERR_ENTITY_PROCESSING,
  235. "xmlAddEntity: invalid redeclaration of predefined"
  236. " entity");
  237. return(NULL);
  238. }
  239. }
  240. if (dtd->entities == NULL)
  241. dtd->entities = xmlHashCreateDict(0, dict);
  242. table = dtd->entities;
  243. break;
  244. case XML_INTERNAL_PARAMETER_ENTITY:
  245. case XML_EXTERNAL_PARAMETER_ENTITY:
  246. if (dtd->pentities == NULL)
  247. dtd->pentities = xmlHashCreateDict(0, dict);
  248. table = dtd->pentities;
  249. break;
  250. case XML_INTERNAL_PREDEFINED_ENTITY:
  251. return(NULL);
  252. }
  253. if (table == NULL)
  254. return(NULL);
  255. ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
  256. if (ret == NULL)
  257. return(NULL);
  258. ret->doc = dtd->doc;
  259. if (xmlHashAddEntry(table, name, ret)) {
  260. /*
  261. * entity was already defined at another level.
  262. */
  263. xmlFreeEntity(ret);
  264. return(NULL);
  265. }
  266. return(ret);
  267. }
  268. /**
  269. * xmlGetPredefinedEntity:
  270. * @name: the entity name
  271. *
  272. * Check whether this name is an predefined entity.
  273. *
  274. * Returns NULL if not, otherwise the entity
  275. */
  276. xmlEntityPtr
  277. xmlGetPredefinedEntity(const xmlChar *name) {
  278. if (name == NULL) return(NULL);
  279. switch (name[0]) {
  280. case 'l':
  281. if (xmlStrEqual(name, BAD_CAST "lt"))
  282. return(&xmlEntityLt);
  283. break;
  284. case 'g':
  285. if (xmlStrEqual(name, BAD_CAST "gt"))
  286. return(&xmlEntityGt);
  287. break;
  288. case 'a':
  289. if (xmlStrEqual(name, BAD_CAST "amp"))
  290. return(&xmlEntityAmp);
  291. if (xmlStrEqual(name, BAD_CAST "apos"))
  292. return(&xmlEntityApos);
  293. break;
  294. case 'q':
  295. if (xmlStrEqual(name, BAD_CAST "quot"))
  296. return(&xmlEntityQuot);
  297. break;
  298. default:
  299. break;
  300. }
  301. return(NULL);
  302. }
  303. /**
  304. * xmlAddDtdEntity:
  305. * @doc: the document
  306. * @name: the entity name
  307. * @type: the entity type XML_xxx_yyy_ENTITY
  308. * @ExternalID: the entity external ID if available
  309. * @SystemID: the entity system ID if available
  310. * @content: the entity content
  311. *
  312. * Register a new entity for this document DTD external subset.
  313. *
  314. * Returns a pointer to the entity or NULL in case of error
  315. */
  316. xmlEntityPtr
  317. xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
  318. const xmlChar *ExternalID, const xmlChar *SystemID,
  319. const xmlChar *content) {
  320. xmlEntityPtr ret;
  321. xmlDtdPtr dtd;
  322. if (doc == NULL) {
  323. xmlEntitiesErr(XML_DTD_NO_DOC,
  324. "xmlAddDtdEntity: document is NULL");
  325. return(NULL);
  326. }
  327. if (doc->extSubset == NULL) {
  328. xmlEntitiesErr(XML_DTD_NO_DTD,
  329. "xmlAddDtdEntity: document without external subset");
  330. return(NULL);
  331. }
  332. dtd = doc->extSubset;
  333. ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
  334. if (ret == NULL) return(NULL);
  335. /*
  336. * Link it to the DTD
  337. */
  338. ret->parent = dtd;
  339. ret->doc = dtd->doc;
  340. if (dtd->last == NULL) {
  341. dtd->children = dtd->last = (xmlNodePtr) ret;
  342. } else {
  343. dtd->last->next = (xmlNodePtr) ret;
  344. ret->prev = dtd->last;
  345. dtd->last = (xmlNodePtr) ret;
  346. }
  347. return(ret);
  348. }
  349. /**
  350. * xmlAddDocEntity:
  351. * @doc: the document
  352. * @name: the entity name
  353. * @type: the entity type XML_xxx_yyy_ENTITY
  354. * @ExternalID: the entity external ID if available
  355. * @SystemID: the entity system ID if available
  356. * @content: the entity content
  357. *
  358. * Register a new entity for this document.
  359. *
  360. * Returns a pointer to the entity or NULL in case of error
  361. */
  362. xmlEntityPtr
  363. xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
  364. const xmlChar *ExternalID, const xmlChar *SystemID,
  365. const xmlChar *content) {
  366. xmlEntityPtr ret;
  367. xmlDtdPtr dtd;
  368. if (doc == NULL) {
  369. xmlEntitiesErr(XML_DTD_NO_DOC,
  370. "xmlAddDocEntity: document is NULL");
  371. return(NULL);
  372. }
  373. if (doc->intSubset == NULL) {
  374. xmlEntitiesErr(XML_DTD_NO_DTD,
  375. "xmlAddDocEntity: document without internal subset");
  376. return(NULL);
  377. }
  378. dtd = doc->intSubset;
  379. ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
  380. if (ret == NULL) return(NULL);
  381. /*
  382. * Link it to the DTD
  383. */
  384. ret->parent = dtd;
  385. ret->doc = dtd->doc;
  386. if (dtd->last == NULL) {
  387. dtd->children = dtd->last = (xmlNodePtr) ret;
  388. } else {
  389. dtd->last->next = (xmlNodePtr) ret;
  390. ret->prev = dtd->last;
  391. dtd->last = (xmlNodePtr) ret;
  392. }
  393. return(ret);
  394. }
  395. /**
  396. * xmlNewEntity:
  397. * @doc: the document
  398. * @name: the entity name
  399. * @type: the entity type XML_xxx_yyy_ENTITY
  400. * @ExternalID: the entity external ID if available
  401. * @SystemID: the entity system ID if available
  402. * @content: the entity content
  403. *
  404. * Create a new entity, this differs from xmlAddDocEntity() that if
  405. * the document is NULL or has no internal subset defined, then an
  406. * unlinked entity structure will be returned, it is then the responsibility
  407. * of the caller to link it to the document later or free it when not needed
  408. * anymore.
  409. *
  410. * Returns a pointer to the entity or NULL in case of error
  411. */
  412. xmlEntityPtr
  413. xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
  414. const xmlChar *ExternalID, const xmlChar *SystemID,
  415. const xmlChar *content) {
  416. xmlEntityPtr ret;
  417. xmlDictPtr dict;
  418. if ((doc != NULL) && (doc->intSubset != NULL)) {
  419. return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
  420. }
  421. if (doc != NULL)
  422. dict = doc->dict;
  423. else
  424. dict = NULL;
  425. ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
  426. if (ret == NULL)
  427. return(NULL);
  428. ret->doc = doc;
  429. return(ret);
  430. }
  431. /**
  432. * xmlGetEntityFromTable:
  433. * @table: an entity table
  434. * @name: the entity name
  435. * @parameter: look for parameter entities
  436. *
  437. * Do an entity lookup in the table.
  438. * returns the corresponding parameter entity, if found.
  439. *
  440. * Returns A pointer to the entity structure or NULL if not found.
  441. */
  442. static xmlEntityPtr
  443. xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
  444. return((xmlEntityPtr) xmlHashLookup(table, name));
  445. }
  446. /**
  447. * xmlGetParameterEntity:
  448. * @doc: the document referencing the entity
  449. * @name: the entity name
  450. *
  451. * Do an entity lookup in the internal and external subsets and
  452. * returns the corresponding parameter entity, if found.
  453. *
  454. * Returns A pointer to the entity structure or NULL if not found.
  455. */
  456. xmlEntityPtr
  457. xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
  458. xmlEntitiesTablePtr table;
  459. xmlEntityPtr ret;
  460. if (doc == NULL)
  461. return(NULL);
  462. if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
  463. table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
  464. ret = xmlGetEntityFromTable(table, name);
  465. if (ret != NULL)
  466. return(ret);
  467. }
  468. if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
  469. table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
  470. return(xmlGetEntityFromTable(table, name));
  471. }
  472. return(NULL);
  473. }
  474. /**
  475. * xmlGetDtdEntity:
  476. * @doc: the document referencing the entity
  477. * @name: the entity name
  478. *
  479. * Do an entity lookup in the DTD entity hash table and
  480. * returns the corresponding entity, if found.
  481. * Note: the first argument is the document node, not the DTD node.
  482. *
  483. * Returns A pointer to the entity structure or NULL if not found.
  484. */
  485. xmlEntityPtr
  486. xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
  487. xmlEntitiesTablePtr table;
  488. if (doc == NULL)
  489. return(NULL);
  490. if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
  491. table = (xmlEntitiesTablePtr) doc->extSubset->entities;
  492. return(xmlGetEntityFromTable(table, name));
  493. }
  494. return(NULL);
  495. }
  496. /**
  497. * xmlGetDocEntity:
  498. * @doc: the document referencing the entity
  499. * @name: the entity name
  500. *
  501. * Do an entity lookup in the document entity hash table and
  502. * returns the corresponding entity, otherwise a lookup is done
  503. * in the predefined entities too.
  504. *
  505. * Returns A pointer to the entity structure or NULL if not found.
  506. */
  507. xmlEntityPtr
  508. xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
  509. xmlEntityPtr cur;
  510. xmlEntitiesTablePtr table;
  511. if (doc != NULL) {
  512. if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
  513. table = (xmlEntitiesTablePtr) doc->intSubset->entities;
  514. cur = xmlGetEntityFromTable(table, name);
  515. if (cur != NULL)
  516. return(cur);
  517. }
  518. if (doc->standalone != 1) {
  519. if ((doc->extSubset != NULL) &&
  520. (doc->extSubset->entities != NULL)) {
  521. table = (xmlEntitiesTablePtr) doc->extSubset->entities;
  522. cur = xmlGetEntityFromTable(table, name);
  523. if (cur != NULL)
  524. return(cur);
  525. }
  526. }
  527. }
  528. return(xmlGetPredefinedEntity(name));
  529. }
  530. /*
  531. * Macro used to grow the current buffer.
  532. */
  533. #define growBufferReentrant() { \
  534. xmlChar *tmp; \
  535. size_t new_size = buffer_size * 2; \
  536. if (new_size < buffer_size) goto mem_error; \
  537. tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
  538. if (tmp == NULL) goto mem_error; \
  539. buffer = tmp; \
  540. buffer_size = new_size; \
  541. }
  542. /**
  543. * xmlEncodeEntitiesInternal:
  544. * @doc: the document containing the string
  545. * @input: A string to convert to XML.
  546. * @attr: are we handling an attribute value
  547. *
  548. * Do a global encoding of a string, replacing the predefined entities
  549. * and non ASCII values with their entities and CharRef counterparts.
  550. * Contrary to xmlEncodeEntities, this routine is reentrant, and result
  551. * must be deallocated.
  552. *
  553. * Returns A newly allocated string with the substitution done.
  554. */
  555. static xmlChar *
  556. xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
  557. const xmlChar *cur = input;
  558. xmlChar *buffer = NULL;
  559. xmlChar *out = NULL;
  560. size_t buffer_size = 0;
  561. int html = 0;
  562. if (input == NULL) return(NULL);
  563. if (doc != NULL)
  564. html = (doc->type == XML_HTML_DOCUMENT_NODE);
  565. /*
  566. * allocate an translation buffer.
  567. */
  568. buffer_size = 1000;
  569. buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
  570. if (buffer == NULL) {
  571. xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
  572. return(NULL);
  573. }
  574. out = buffer;
  575. while (*cur != '\0') {
  576. size_t indx = out - buffer;
  577. if (indx + 100 > buffer_size) {
  578. growBufferReentrant();
  579. out = &buffer[indx];
  580. }
  581. /*
  582. * By default one have to encode at least '<', '>', '"' and '&' !
  583. */
  584. if (*cur == '<') {
  585. const xmlChar *end;
  586. /*
  587. * Special handling of server side include in HTML attributes
  588. */
  589. if (html && attr &&
  590. (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
  591. ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
  592. while (cur != end) {
  593. *out++ = *cur++;
  594. indx = out - buffer;
  595. if (indx + 100 > buffer_size) {
  596. growBufferReentrant();
  597. out = &buffer[indx];
  598. }
  599. }
  600. *out++ = *cur++;
  601. *out++ = *cur++;
  602. *out++ = *cur++;
  603. continue;
  604. }
  605. *out++ = '&';
  606. *out++ = 'l';
  607. *out++ = 't';
  608. *out++ = ';';
  609. } else if (*cur == '>') {
  610. *out++ = '&';
  611. *out++ = 'g';
  612. *out++ = 't';
  613. *out++ = ';';
  614. } else if (*cur == '&') {
  615. /*
  616. * Special handling of &{...} construct from HTML 4, see
  617. * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
  618. */
  619. if (html && attr && (cur[1] == '{') &&
  620. (strchr((const char *) cur, '}'))) {
  621. while (*cur != '}') {
  622. *out++ = *cur++;
  623. indx = out - buffer;
  624. if (indx + 100 > buffer_size) {
  625. growBufferReentrant();
  626. out = &buffer[indx];
  627. }
  628. }
  629. *out++ = *cur++;
  630. continue;
  631. }
  632. *out++ = '&';
  633. *out++ = 'a';
  634. *out++ = 'm';
  635. *out++ = 'p';
  636. *out++ = ';';
  637. } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
  638. (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
  639. /*
  640. * default case, just copy !
  641. */
  642. *out++ = *cur;
  643. } else if (*cur >= 0x80) {
  644. if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
  645. /*
  646. * Bjørn Reese <br@sseusa.com> provided the patch
  647. xmlChar xc;
  648. xc = (*cur & 0x3F) << 6;
  649. if (cur[1] != 0) {
  650. xc += *(++cur) & 0x3F;
  651. *out++ = xc;
  652. } else
  653. */
  654. *out++ = *cur;
  655. } else {
  656. /*
  657. * We assume we have UTF-8 input.
  658. * It must match either:
  659. * 110xxxxx 10xxxxxx
  660. * 1110xxxx 10xxxxxx 10xxxxxx
  661. * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  662. * That is:
  663. * cur[0] is 11xxxxxx
  664. * cur[1] is 10xxxxxx
  665. * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
  666. * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
  667. * cur[0] is not 11111xxx
  668. */
  669. char buf[11], *ptr;
  670. int val = 0, l = 1;
  671. if (((cur[0] & 0xC0) != 0xC0) ||
  672. ((cur[1] & 0xC0) != 0x80) ||
  673. (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
  674. (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
  675. (((cur[0] & 0xF8) == 0xF8))) {
  676. xmlEntitiesErr(XML_CHECK_NOT_UTF8,
  677. "xmlEncodeEntities: input not UTF-8");
  678. if (doc != NULL)
  679. doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
  680. snprintf(buf, sizeof(buf), "&#%d;", *cur);
  681. buf[sizeof(buf) - 1] = 0;
  682. ptr = buf;
  683. while (*ptr != 0) *out++ = *ptr++;
  684. cur++;
  685. continue;
  686. } else if (*cur < 0xE0) {
  687. val = (cur[0]) & 0x1F;
  688. val <<= 6;
  689. val |= (cur[1]) & 0x3F;
  690. l = 2;
  691. } else if (*cur < 0xF0) {
  692. val = (cur[0]) & 0x0F;
  693. val <<= 6;
  694. val |= (cur[1]) & 0x3F;
  695. val <<= 6;
  696. val |= (cur[2]) & 0x3F;
  697. l = 3;
  698. } else if (*cur < 0xF8) {
  699. val = (cur[0]) & 0x07;
  700. val <<= 6;
  701. val |= (cur[1]) & 0x3F;
  702. val <<= 6;
  703. val |= (cur[2]) & 0x3F;
  704. val <<= 6;
  705. val |= (cur[3]) & 0x3F;
  706. l = 4;
  707. }
  708. if ((l == 1) || (!IS_CHAR(val))) {
  709. xmlEntitiesErr(XML_ERR_INVALID_CHAR,
  710. "xmlEncodeEntities: char out of range\n");
  711. if (doc != NULL)
  712. doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
  713. snprintf(buf, sizeof(buf), "&#%d;", *cur);
  714. buf[sizeof(buf) - 1] = 0;
  715. ptr = buf;
  716. while (*ptr != 0) *out++ = *ptr++;
  717. cur++;
  718. continue;
  719. }
  720. /*
  721. * We could do multiple things here. Just save as a char ref
  722. */
  723. snprintf(buf, sizeof(buf), "&#x%X;", val);
  724. buf[sizeof(buf) - 1] = 0;
  725. ptr = buf;
  726. while (*ptr != 0) *out++ = *ptr++;
  727. cur += l;
  728. continue;
  729. }
  730. } else if (IS_BYTE_CHAR(*cur)) {
  731. char buf[11], *ptr;
  732. snprintf(buf, sizeof(buf), "&#%d;", *cur);
  733. buf[sizeof(buf) - 1] = 0;
  734. ptr = buf;
  735. while (*ptr != 0) *out++ = *ptr++;
  736. }
  737. cur++;
  738. }
  739. *out = 0;
  740. return(buffer);
  741. mem_error:
  742. xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
  743. xmlFree(buffer);
  744. return(NULL);
  745. }
  746. /**
  747. * xmlEncodeAttributeEntities:
  748. * @doc: the document containing the string
  749. * @input: A string to convert to XML.
  750. *
  751. * Do a global encoding of a string, replacing the predefined entities
  752. * and non ASCII values with their entities and CharRef counterparts for
  753. * attribute values.
  754. *
  755. * Returns A newly allocated string with the substitution done.
  756. */
  757. xmlChar *
  758. xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
  759. return xmlEncodeEntitiesInternal(doc, input, 1);
  760. }
  761. /**
  762. * xmlEncodeEntitiesReentrant:
  763. * @doc: the document containing the string
  764. * @input: A string to convert to XML.
  765. *
  766. * Do a global encoding of a string, replacing the predefined entities
  767. * and non ASCII values with their entities and CharRef counterparts.
  768. * Contrary to xmlEncodeEntities, this routine is reentrant, and result
  769. * must be deallocated.
  770. *
  771. * Returns A newly allocated string with the substitution done.
  772. */
  773. xmlChar *
  774. xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
  775. return xmlEncodeEntitiesInternal(doc, input, 0);
  776. }
  777. /**
  778. * xmlEncodeSpecialChars:
  779. * @doc: the document containing the string
  780. * @input: A string to convert to XML.
  781. *
  782. * Do a global encoding of a string, replacing the predefined entities
  783. * this routine is reentrant, and result must be deallocated.
  784. *
  785. * Returns A newly allocated string with the substitution done.
  786. */
  787. xmlChar *
  788. xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
  789. const xmlChar *cur = input;
  790. xmlChar *buffer = NULL;
  791. xmlChar *out = NULL;
  792. size_t buffer_size = 0;
  793. if (input == NULL) return(NULL);
  794. /*
  795. * allocate an translation buffer.
  796. */
  797. buffer_size = 1000;
  798. buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
  799. if (buffer == NULL) {
  800. xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
  801. return(NULL);
  802. }
  803. out = buffer;
  804. while (*cur != '\0') {
  805. size_t indx = out - buffer;
  806. if (indx + 10 > buffer_size) {
  807. growBufferReentrant();
  808. out = &buffer[indx];
  809. }
  810. /*
  811. * By default one have to encode at least '<', '>', '"' and '&' !
  812. */
  813. if (*cur == '<') {
  814. *out++ = '&';
  815. *out++ = 'l';
  816. *out++ = 't';
  817. *out++ = ';';
  818. } else if (*cur == '>') {
  819. *out++ = '&';
  820. *out++ = 'g';
  821. *out++ = 't';
  822. *out++ = ';';
  823. } else if (*cur == '&') {
  824. *out++ = '&';
  825. *out++ = 'a';
  826. *out++ = 'm';
  827. *out++ = 'p';
  828. *out++ = ';';
  829. } else if (*cur == '"') {
  830. *out++ = '&';
  831. *out++ = 'q';
  832. *out++ = 'u';
  833. *out++ = 'o';
  834. *out++ = 't';
  835. *out++ = ';';
  836. } else if (*cur == '\r') {
  837. *out++ = '&';
  838. *out++ = '#';
  839. *out++ = '1';
  840. *out++ = '3';
  841. *out++ = ';';
  842. } else {
  843. /*
  844. * Works because on UTF-8, all extended sequences cannot
  845. * result in bytes in the ASCII range.
  846. */
  847. *out++ = *cur;
  848. }
  849. cur++;
  850. }
  851. *out = 0;
  852. return(buffer);
  853. mem_error:
  854. xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
  855. xmlFree(buffer);
  856. return(NULL);
  857. }
  858. /**
  859. * xmlCreateEntitiesTable:
  860. *
  861. * create and initialize an empty entities hash table.
  862. * This really doesn't make sense and should be deprecated
  863. *
  864. * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
  865. */
  866. xmlEntitiesTablePtr
  867. xmlCreateEntitiesTable(void) {
  868. return((xmlEntitiesTablePtr) xmlHashCreate(0));
  869. }
  870. /**
  871. * xmlFreeEntityWrapper:
  872. * @entity: An entity
  873. * @name: its name
  874. *
  875. * Deallocate the memory used by an entities in the hash table.
  876. */
  877. static void
  878. xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
  879. if (entity != NULL)
  880. xmlFreeEntity((xmlEntityPtr) entity);
  881. }
  882. /**
  883. * xmlFreeEntitiesTable:
  884. * @table: An entity table
  885. *
  886. * Deallocate the memory used by an entities hash table.
  887. */
  888. void
  889. xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
  890. xmlHashFree(table, xmlFreeEntityWrapper);
  891. }
  892. #ifdef LIBXML_TREE_ENABLED
  893. /**
  894. * xmlCopyEntity:
  895. * @ent: An entity
  896. *
  897. * Build a copy of an entity
  898. *
  899. * Returns the new xmlEntitiesPtr or NULL in case of error.
  900. */
  901. static void *
  902. xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
  903. xmlEntityPtr ent = (xmlEntityPtr) payload;
  904. xmlEntityPtr cur;
  905. cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
  906. if (cur == NULL) {
  907. xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
  908. return(NULL);
  909. }
  910. memset(cur, 0, sizeof(xmlEntity));
  911. cur->type = XML_ENTITY_DECL;
  912. cur->etype = ent->etype;
  913. if (ent->name != NULL)
  914. cur->name = xmlStrdup(ent->name);
  915. if (ent->ExternalID != NULL)
  916. cur->ExternalID = xmlStrdup(ent->ExternalID);
  917. if (ent->SystemID != NULL)
  918. cur->SystemID = xmlStrdup(ent->SystemID);
  919. if (ent->content != NULL)
  920. cur->content = xmlStrdup(ent->content);
  921. if (ent->orig != NULL)
  922. cur->orig = xmlStrdup(ent->orig);
  923. if (ent->URI != NULL)
  924. cur->URI = xmlStrdup(ent->URI);
  925. return(cur);
  926. }
  927. /**
  928. * xmlCopyEntitiesTable:
  929. * @table: An entity table
  930. *
  931. * Build a copy of an entity table.
  932. *
  933. * Returns the new xmlEntitiesTablePtr or NULL in case of error.
  934. */
  935. xmlEntitiesTablePtr
  936. xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
  937. return(xmlHashCopy(table, xmlCopyEntity));
  938. }
  939. #endif /* LIBXML_TREE_ENABLED */
  940. #ifdef LIBXML_OUTPUT_ENABLED
  941. /**
  942. * xmlDumpEntityContent:
  943. * @buf: An XML buffer.
  944. * @content: The entity content.
  945. *
  946. * This will dump the quoted string value, taking care of the special
  947. * treatment required by %
  948. */
  949. static void
  950. xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
  951. if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return;
  952. if (xmlStrchr(content, '%')) {
  953. const xmlChar * base, *cur;
  954. xmlBufferCCat(buf, "\"");
  955. base = cur = content;
  956. while (*cur != 0) {
  957. if (*cur == '"') {
  958. if (base != cur)
  959. xmlBufferAdd(buf, base, cur - base);
  960. xmlBufferAdd(buf, BAD_CAST "&quot;", 6);
  961. cur++;
  962. base = cur;
  963. } else if (*cur == '%') {
  964. if (base != cur)
  965. xmlBufferAdd(buf, base, cur - base);
  966. xmlBufferAdd(buf, BAD_CAST "&#x25;", 6);
  967. cur++;
  968. base = cur;
  969. } else {
  970. cur++;
  971. }
  972. }
  973. if (base != cur)
  974. xmlBufferAdd(buf, base, cur - base);
  975. xmlBufferCCat(buf, "\"");
  976. } else {
  977. xmlBufferWriteQuotedString(buf, content);
  978. }
  979. }
  980. /**
  981. * xmlDumpEntityDecl:
  982. * @buf: An XML buffer.
  983. * @ent: An entity table
  984. *
  985. * This will dump the content of the entity table as an XML DTD definition
  986. */
  987. void
  988. xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
  989. if ((buf == NULL) || (ent == NULL)) return;
  990. switch (ent->etype) {
  991. case XML_INTERNAL_GENERAL_ENTITY:
  992. xmlBufferWriteChar(buf, "<!ENTITY ");
  993. xmlBufferWriteCHAR(buf, ent->name);
  994. xmlBufferWriteChar(buf, " ");
  995. if (ent->orig != NULL)
  996. xmlBufferWriteQuotedString(buf, ent->orig);
  997. else
  998. xmlDumpEntityContent(buf, ent->content);
  999. xmlBufferWriteChar(buf, ">\n");
  1000. break;
  1001. case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
  1002. xmlBufferWriteChar(buf, "<!ENTITY ");
  1003. xmlBufferWriteCHAR(buf, ent->name);
  1004. if (ent->ExternalID != NULL) {
  1005. xmlBufferWriteChar(buf, " PUBLIC ");
  1006. xmlBufferWriteQuotedString(buf, ent->ExternalID);
  1007. xmlBufferWriteChar(buf, " ");
  1008. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1009. } else {
  1010. xmlBufferWriteChar(buf, " SYSTEM ");
  1011. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1012. }
  1013. xmlBufferWriteChar(buf, ">\n");
  1014. break;
  1015. case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
  1016. xmlBufferWriteChar(buf, "<!ENTITY ");
  1017. xmlBufferWriteCHAR(buf, ent->name);
  1018. if (ent->ExternalID != NULL) {
  1019. xmlBufferWriteChar(buf, " PUBLIC ");
  1020. xmlBufferWriteQuotedString(buf, ent->ExternalID);
  1021. xmlBufferWriteChar(buf, " ");
  1022. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1023. } else {
  1024. xmlBufferWriteChar(buf, " SYSTEM ");
  1025. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1026. }
  1027. if (ent->content != NULL) { /* Should be true ! */
  1028. xmlBufferWriteChar(buf, " NDATA ");
  1029. if (ent->orig != NULL)
  1030. xmlBufferWriteCHAR(buf, ent->orig);
  1031. else
  1032. xmlBufferWriteCHAR(buf, ent->content);
  1033. }
  1034. xmlBufferWriteChar(buf, ">\n");
  1035. break;
  1036. case XML_INTERNAL_PARAMETER_ENTITY:
  1037. xmlBufferWriteChar(buf, "<!ENTITY % ");
  1038. xmlBufferWriteCHAR(buf, ent->name);
  1039. xmlBufferWriteChar(buf, " ");
  1040. if (ent->orig == NULL)
  1041. xmlDumpEntityContent(buf, ent->content);
  1042. else
  1043. xmlBufferWriteQuotedString(buf, ent->orig);
  1044. xmlBufferWriteChar(buf, ">\n");
  1045. break;
  1046. case XML_EXTERNAL_PARAMETER_ENTITY:
  1047. xmlBufferWriteChar(buf, "<!ENTITY % ");
  1048. xmlBufferWriteCHAR(buf, ent->name);
  1049. if (ent->ExternalID != NULL) {
  1050. xmlBufferWriteChar(buf, " PUBLIC ");
  1051. xmlBufferWriteQuotedString(buf, ent->ExternalID);
  1052. xmlBufferWriteChar(buf, " ");
  1053. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1054. } else {
  1055. xmlBufferWriteChar(buf, " SYSTEM ");
  1056. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1057. }
  1058. xmlBufferWriteChar(buf, ">\n");
  1059. break;
  1060. default:
  1061. xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
  1062. "xmlDumpEntitiesDecl: internal: unknown type entity type");
  1063. }
  1064. }
  1065. /**
  1066. * xmlDumpEntityDeclScan:
  1067. * @ent: An entity table
  1068. * @buf: An XML buffer.
  1069. *
  1070. * When using the hash table scan function, arguments need to be reversed
  1071. */
  1072. static void
  1073. xmlDumpEntityDeclScan(void *ent, void *buf,
  1074. const xmlChar *name ATTRIBUTE_UNUSED) {
  1075. xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
  1076. }
  1077. /**
  1078. * xmlDumpEntitiesTable:
  1079. * @buf: An XML buffer.
  1080. * @table: An entity table
  1081. *
  1082. * This will dump the content of the entity table as an XML DTD definition
  1083. */
  1084. void
  1085. xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
  1086. xmlHashScan(table, xmlDumpEntityDeclScan, buf);
  1087. }
  1088. #endif /* LIBXML_OUTPUT_ENABLED */
  1089. #define bottom_entities
  1090. #include "elfgcchack.h"