mozTXTToHTMLConv.cpp 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428
  1. /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "mozTXTToHTMLConv.h"
  6. #include "nsNetUtil.h"
  7. #include "nsUnicharUtils.h"
  8. #include "nsCRT.h"
  9. #include "nsIExternalProtocolHandler.h"
  10. #include "nsIIOService.h"
  11. #include "nsIURI.h"
  12. #include <algorithm>
  13. #ifdef DEBUG_BenB_Perf
  14. #include "prtime.h"
  15. #include "prinrval.h"
  16. #endif
  17. const double growthRate = 1.2;
  18. // Bug 183111, editor now replaces multiple spaces with leading
  19. // 0xA0's and a single ending space, so need to treat 0xA0's as spaces.
  20. // 0xA0 is the Latin1/Unicode character for "non-breaking space (nbsp)"
  21. // Also recognize the Japanese ideographic space 0x3000 as a space.
  22. static inline bool IsSpace(const char16_t aChar)
  23. {
  24. return (nsCRT::IsAsciiSpace(aChar) || aChar == 0xA0 || aChar == 0x3000);
  25. }
  26. // Escape Char will take ch, escape it and append the result to
  27. // aStringToAppendTo
  28. void
  29. mozTXTToHTMLConv::EscapeChar(const char16_t ch, nsString& aStringToAppendTo,
  30. bool inAttribute)
  31. {
  32. switch (ch)
  33. {
  34. case '<':
  35. aStringToAppendTo.AppendLiteral("&lt;");
  36. break;
  37. case '>':
  38. aStringToAppendTo.AppendLiteral("&gt;");
  39. break;
  40. case '&':
  41. aStringToAppendTo.AppendLiteral("&amp;");
  42. break;
  43. case '"':
  44. if (inAttribute)
  45. {
  46. aStringToAppendTo.AppendLiteral("&quot;");
  47. break;
  48. }
  49. // else fall through
  50. MOZ_FALLTHROUGH;
  51. default:
  52. aStringToAppendTo += ch;
  53. }
  54. return;
  55. }
  56. // EscapeStr takes the passed in string and
  57. // escapes it IN PLACE.
  58. void
  59. mozTXTToHTMLConv::EscapeStr(nsString& aInString, bool inAttribute)
  60. {
  61. // the replace substring routines
  62. // don't seem to work if you have a character
  63. // in the in string that is also in the replacement
  64. // string! =(
  65. //aInString.ReplaceSubstring("&", "&amp;");
  66. //aInString.ReplaceSubstring("<", "&lt;");
  67. //aInString.ReplaceSubstring(">", "&gt;");
  68. for (uint32_t i = 0; i < aInString.Length();)
  69. {
  70. switch (aInString[i])
  71. {
  72. case '<':
  73. aInString.Cut(i, 1);
  74. aInString.Insert(NS_LITERAL_STRING("&lt;"), i);
  75. i += 4; // skip past the integers we just added
  76. break;
  77. case '>':
  78. aInString.Cut(i, 1);
  79. aInString.Insert(NS_LITERAL_STRING("&gt;"), i);
  80. i += 4; // skip past the integers we just added
  81. break;
  82. case '&':
  83. aInString.Cut(i, 1);
  84. aInString.Insert(NS_LITERAL_STRING("&amp;"), i);
  85. i += 5; // skip past the integers we just added
  86. break;
  87. case '"':
  88. if (inAttribute)
  89. {
  90. aInString.Cut(i, 1);
  91. aInString.Insert(NS_LITERAL_STRING("&quot;"), i);
  92. i += 6;
  93. break;
  94. }
  95. // else fall through
  96. MOZ_FALLTHROUGH;
  97. default:
  98. i++;
  99. }
  100. }
  101. }
  102. void
  103. mozTXTToHTMLConv::UnescapeStr(const char16_t * aInString, int32_t aStartPos, int32_t aLength, nsString& aOutString)
  104. {
  105. const char16_t * subString = nullptr;
  106. for (uint32_t i = aStartPos; int32_t(i) - aStartPos < aLength;)
  107. {
  108. int32_t remainingChars = i - aStartPos;
  109. if (aInString[i] == '&')
  110. {
  111. subString = &aInString[i];
  112. if (!nsCRT::strncmp(subString, u"&lt;", std::min(4, aLength - remainingChars)))
  113. {
  114. aOutString.Append(char16_t('<'));
  115. i += 4;
  116. }
  117. else if (!nsCRT::strncmp(subString, u"&gt;", std::min(4, aLength - remainingChars)))
  118. {
  119. aOutString.Append(char16_t('>'));
  120. i += 4;
  121. }
  122. else if (!nsCRT::strncmp(subString, u"&amp;", std::min(5, aLength - remainingChars)))
  123. {
  124. aOutString.Append(char16_t('&'));
  125. i += 5;
  126. }
  127. else if (!nsCRT::strncmp(subString, u"&quot;", std::min(6, aLength - remainingChars)))
  128. {
  129. aOutString.Append(char16_t('"'));
  130. i += 6;
  131. }
  132. else
  133. {
  134. aOutString += aInString[i];
  135. i++;
  136. }
  137. }
  138. else
  139. {
  140. aOutString += aInString[i];
  141. i++;
  142. }
  143. }
  144. }
  145. void
  146. mozTXTToHTMLConv::CompleteAbbreviatedURL(const char16_t * aInString, int32_t aInLength,
  147. const uint32_t pos, nsString& aOutString)
  148. {
  149. NS_ASSERTION(int32_t(pos) < aInLength, "bad args to CompleteAbbreviatedURL, see bug #190851");
  150. if (int32_t(pos) >= aInLength)
  151. return;
  152. if (aInString[pos] == '@')
  153. {
  154. // only pre-pend a mailto url if the string contains a .domain in it..
  155. //i.e. we want to linkify johndoe@foo.com but not "let's meet @8pm"
  156. nsDependentString inString(aInString, aInLength);
  157. if (inString.FindChar('.', pos) != kNotFound) // if we have a '.' after the @ sign....
  158. {
  159. aOutString.AssignLiteral("mailto:");
  160. aOutString += aInString;
  161. }
  162. }
  163. else if (aInString[pos] == '.')
  164. {
  165. if (ItMatchesDelimited(aInString, aInLength,
  166. u"www.", 4, LT_IGNORE, LT_IGNORE))
  167. {
  168. aOutString.AssignLiteral("http://");
  169. aOutString += aInString;
  170. }
  171. else if (ItMatchesDelimited(aInString,aInLength, u"ftp.", 4, LT_IGNORE, LT_IGNORE))
  172. {
  173. aOutString.AssignLiteral("ftp://");
  174. aOutString += aInString;
  175. }
  176. }
  177. }
  178. bool
  179. mozTXTToHTMLConv::FindURLStart(const char16_t * aInString, int32_t aInLength,
  180. const uint32_t pos, const modetype check,
  181. uint32_t& start)
  182. {
  183. switch(check)
  184. { // no breaks, because end of blocks is never reached
  185. case RFC1738:
  186. {
  187. if (!nsCRT::strncmp(&aInString[std::max(int32_t(pos - 4), 0)], u"<URL:", 5))
  188. {
  189. start = pos + 1;
  190. return true;
  191. }
  192. else
  193. return false;
  194. }
  195. case RFC2396E:
  196. {
  197. nsString temp(aInString, aInLength);
  198. int32_t i = pos <= 0 ? kNotFound : temp.RFindCharInSet(u"<>\"", pos - 1);
  199. if (i != kNotFound && (temp[uint32_t(i)] == '<' ||
  200. temp[uint32_t(i)] == '"'))
  201. {
  202. start = uint32_t(++i);
  203. return start < pos;
  204. }
  205. else
  206. return false;
  207. }
  208. case freetext:
  209. {
  210. int32_t i = pos - 1;
  211. for (; i >= 0 && (
  212. nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
  213. nsCRT::IsAsciiDigit(aInString[uint32_t(i)]) ||
  214. aInString[uint32_t(i)] == '+' ||
  215. aInString[uint32_t(i)] == '-' ||
  216. aInString[uint32_t(i)] == '.'
  217. ); i--)
  218. ;
  219. if (++i >= 0 && uint32_t(i) < pos && nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]))
  220. {
  221. start = uint32_t(i);
  222. return true;
  223. }
  224. else
  225. return false;
  226. }
  227. case abbreviated:
  228. {
  229. int32_t i = pos - 1;
  230. // This disallows non-ascii-characters for email.
  231. // Currently correct, but revisit later after standards changed.
  232. bool isEmail = aInString[pos] == (char16_t)'@';
  233. // These chars mark the start of the URL
  234. for (; i >= 0
  235. && aInString[uint32_t(i)] != '>' && aInString[uint32_t(i)] != '<'
  236. && aInString[uint32_t(i)] != '"' && aInString[uint32_t(i)] != '\''
  237. && aInString[uint32_t(i)] != '`' && aInString[uint32_t(i)] != ','
  238. && aInString[uint32_t(i)] != '{' && aInString[uint32_t(i)] != '['
  239. && aInString[uint32_t(i)] != '(' && aInString[uint32_t(i)] != '|'
  240. && aInString[uint32_t(i)] != '\\'
  241. && !IsSpace(aInString[uint32_t(i)])
  242. && (!isEmail || nsCRT::IsAscii(aInString[uint32_t(i)]))
  243. ; i--)
  244. ;
  245. if
  246. (
  247. ++i >= 0 && uint32_t(i) < pos
  248. &&
  249. (
  250. nsCRT::IsAsciiAlpha(aInString[uint32_t(i)]) ||
  251. nsCRT::IsAsciiDigit(aInString[uint32_t(i)])
  252. )
  253. )
  254. {
  255. start = uint32_t(i);
  256. return true;
  257. }
  258. else
  259. return false;
  260. }
  261. default:
  262. return false;
  263. } //switch
  264. }
  265. bool
  266. mozTXTToHTMLConv::FindURLEnd(const char16_t * aInString, int32_t aInStringLength, const uint32_t pos,
  267. const modetype check, const uint32_t start, uint32_t& end)
  268. {
  269. switch(check)
  270. { // no breaks, because end of blocks is never reached
  271. case RFC1738:
  272. case RFC2396E:
  273. {
  274. nsString temp(aInString, aInStringLength);
  275. int32_t i = temp.FindCharInSet(u"<>\"", pos + 1);
  276. if (i != kNotFound && temp[uint32_t(i--)] ==
  277. (check == RFC1738 || temp[start - 1] == '<' ? '>' : '"'))
  278. {
  279. end = uint32_t(i);
  280. return end > pos;
  281. }
  282. return false;
  283. }
  284. case freetext:
  285. case abbreviated:
  286. {
  287. uint32_t i = pos + 1;
  288. bool isEmail = aInString[pos] == (char16_t)'@';
  289. bool seenOpeningParenthesis = false; // there is a '(' earlier in the URL
  290. bool seenOpeningSquareBracket = false; // there is a '[' earlier in the URL
  291. for (; int32_t(i) < aInStringLength; i++)
  292. {
  293. // These chars mark the end of the URL
  294. if (aInString[i] == '>' || aInString[i] == '<' ||
  295. aInString[i] == '"' || aInString[i] == '`' ||
  296. aInString[i] == '}' || aInString[i] == '{' ||
  297. (aInString[i] == ')' && !seenOpeningParenthesis) ||
  298. (aInString[i] == ']' && !seenOpeningSquareBracket) ||
  299. // Allow IPv6 adresses like http://[1080::8:800:200C:417A]/foo.
  300. (aInString[i] == '[' && i > 2 &&
  301. (aInString[i - 1] != '/' || aInString[i - 2] != '/')) ||
  302. IsSpace(aInString[i]))
  303. break;
  304. // Disallow non-ascii-characters for email.
  305. // Currently correct, but revisit later after standards changed.
  306. if (isEmail && (
  307. aInString[i] == '(' || aInString[i] == '\'' ||
  308. !nsCRT::IsAscii(aInString[i])))
  309. break;
  310. if (aInString[i] == '(')
  311. seenOpeningParenthesis = true;
  312. if (aInString[i] == '[')
  313. seenOpeningSquareBracket = true;
  314. }
  315. // These chars are allowed in the middle of the URL, but not at end.
  316. // Technically they are, but are used in normal text after the URL.
  317. while (--i > pos && (
  318. aInString[i] == '.' || aInString[i] == ',' || aInString[i] == ';' ||
  319. aInString[i] == '!' || aInString[i] == '?' || aInString[i] == '-' ||
  320. aInString[i] == ':' || aInString[i] == '\''
  321. ))
  322. ;
  323. if (i > pos)
  324. {
  325. end = i;
  326. return true;
  327. }
  328. return false;
  329. }
  330. default:
  331. return false;
  332. } //switch
  333. }
  334. void
  335. mozTXTToHTMLConv::CalculateURLBoundaries(const char16_t * aInString, int32_t aInStringLength,
  336. const uint32_t pos, const uint32_t whathasbeendone,
  337. const modetype check, const uint32_t start, const uint32_t end,
  338. nsString& txtURL, nsString& desc,
  339. int32_t& replaceBefore, int32_t& replaceAfter)
  340. {
  341. uint32_t descstart = start;
  342. switch(check)
  343. {
  344. case RFC1738:
  345. {
  346. descstart = start - 5;
  347. desc.Append(&aInString[descstart], end - descstart + 2); // include "<URL:" and ">"
  348. replaceAfter = end - pos + 1;
  349. } break;
  350. case RFC2396E:
  351. {
  352. descstart = start - 1;
  353. desc.Append(&aInString[descstart], end - descstart + 2); // include brackets
  354. replaceAfter = end - pos + 1;
  355. } break;
  356. case freetext:
  357. case abbreviated:
  358. {
  359. descstart = start;
  360. desc.Append(&aInString[descstart], end - start + 1); // don't include brackets
  361. replaceAfter = end - pos;
  362. } break;
  363. default: break;
  364. } //switch
  365. EscapeStr(desc, false);
  366. txtURL.Append(&aInString[start], end - start + 1);
  367. txtURL.StripWhitespace();
  368. // FIX ME
  369. nsAutoString temp2;
  370. ScanTXT(&aInString[descstart], pos - descstart, ~kURLs /*prevents loop*/ & whathasbeendone, temp2);
  371. replaceBefore = temp2.Length();
  372. return;
  373. }
  374. bool mozTXTToHTMLConv::ShouldLinkify(const nsCString& aURL)
  375. {
  376. if (!mIOService)
  377. return false;
  378. nsAutoCString scheme;
  379. nsresult rv = mIOService->ExtractScheme(aURL, scheme);
  380. if(NS_FAILED(rv))
  381. return false;
  382. // Get the handler for this scheme.
  383. nsCOMPtr<nsIProtocolHandler> handler;
  384. rv = mIOService->GetProtocolHandler(scheme.get(), getter_AddRefs(handler));
  385. if(NS_FAILED(rv))
  386. return false;
  387. // Is it an external protocol handler? If not, linkify it.
  388. nsCOMPtr<nsIExternalProtocolHandler> externalHandler = do_QueryInterface(handler);
  389. if (!externalHandler)
  390. return true; // handler is built-in, linkify it!
  391. // If external app exists for the scheme then linkify it.
  392. bool exists;
  393. rv = externalHandler->ExternalAppExistsForScheme(scheme, &exists);
  394. return(NS_SUCCEEDED(rv) && exists);
  395. }
  396. bool
  397. mozTXTToHTMLConv::CheckURLAndCreateHTML(
  398. const nsString& txtURL, const nsString& desc, const modetype mode,
  399. nsString& outputHTML)
  400. {
  401. // Create *uri from txtURL
  402. nsCOMPtr<nsIURI> uri;
  403. nsresult rv;
  404. // Lazily initialize mIOService
  405. if (!mIOService)
  406. {
  407. mIOService = do_GetIOService();
  408. if (!mIOService)
  409. return false;
  410. }
  411. // See if the url should be linkified.
  412. NS_ConvertUTF16toUTF8 utf8URL(txtURL);
  413. if (!ShouldLinkify(utf8URL))
  414. return false;
  415. // it would be faster if we could just check to see if there is a protocol
  416. // handler for the url and return instead of actually trying to create a url...
  417. rv = mIOService->NewURI(utf8URL, nullptr, nullptr, getter_AddRefs(uri));
  418. // Real work
  419. if (NS_SUCCEEDED(rv) && uri)
  420. {
  421. outputHTML.AssignLiteral("<a class=\"moz-txt-link-");
  422. switch(mode)
  423. {
  424. case RFC1738:
  425. outputHTML.AppendLiteral("rfc1738");
  426. break;
  427. case RFC2396E:
  428. outputHTML.AppendLiteral("rfc2396E");
  429. break;
  430. case freetext:
  431. outputHTML.AppendLiteral("freetext");
  432. break;
  433. case abbreviated:
  434. outputHTML.AppendLiteral("abbreviated");
  435. break;
  436. default: break;
  437. }
  438. nsAutoString escapedURL(txtURL);
  439. EscapeStr(escapedURL, true);
  440. outputHTML.AppendLiteral("\" href=\"");
  441. outputHTML += escapedURL;
  442. outputHTML.AppendLiteral("\">");
  443. outputHTML += desc;
  444. outputHTML.AppendLiteral("</a>");
  445. return true;
  446. }
  447. else
  448. return false;
  449. }
  450. NS_IMETHODIMP mozTXTToHTMLConv::FindURLInPlaintext(const char16_t * aInString, int32_t aInLength, int32_t aPos, int32_t * aStartPos, int32_t * aEndPos)
  451. {
  452. // call FindURL on the passed in string
  453. nsAutoString outputHTML; // we'll ignore the generated output HTML
  454. *aStartPos = -1;
  455. *aEndPos = -1;
  456. FindURL(aInString, aInLength, aPos, kURLs, outputHTML, *aStartPos, *aEndPos);
  457. return NS_OK;
  458. }
  459. bool
  460. mozTXTToHTMLConv::FindURL(const char16_t * aInString, int32_t aInLength, const uint32_t pos,
  461. const uint32_t whathasbeendone,
  462. nsString& outputHTML, int32_t& replaceBefore, int32_t& replaceAfter)
  463. {
  464. enum statetype {unchecked, invalid, startok, endok, success};
  465. static const modetype ranking[] = {RFC1738, RFC2396E, freetext, abbreviated};
  466. statetype state[mozTXTToHTMLConv_lastMode + 1]; // 0(=unknown)..lastMode
  467. /* I don't like this abuse of enums as index for the array,
  468. but I don't know a better method */
  469. // Define, which modes to check
  470. /* all modes but abbreviated are checked for text[pos] == ':',
  471. only abbreviated for '.', RFC2396E and abbreviated for '@' */
  472. for (modetype iState = unknown; iState <= mozTXTToHTMLConv_lastMode;
  473. iState = modetype(iState + 1))
  474. state[iState] = aInString[pos] == ':' ? unchecked : invalid;
  475. switch (aInString[pos])
  476. {
  477. case '@':
  478. state[RFC2396E] = unchecked;
  479. MOZ_FALLTHROUGH;
  480. case '.':
  481. state[abbreviated] = unchecked;
  482. break;
  483. case ':':
  484. state[abbreviated] = invalid;
  485. break;
  486. default:
  487. break;
  488. }
  489. // Test, first successful mode wins, sequence defined by |ranking|
  490. int32_t iCheck = 0; // the currently tested modetype
  491. modetype check = ranking[iCheck];
  492. for (; iCheck < mozTXTToHTMLConv_numberOfModes && state[check] != success;
  493. iCheck++)
  494. /* check state from last run.
  495. If this is the first, check this one, which isn't = success yet */
  496. {
  497. check = ranking[iCheck];
  498. uint32_t start, end;
  499. if (state[check] == unchecked)
  500. if (FindURLStart(aInString, aInLength, pos, check, start))
  501. state[check] = startok;
  502. if (state[check] == startok)
  503. if (FindURLEnd(aInString, aInLength, pos, check, start, end))
  504. state[check] = endok;
  505. if (state[check] == endok)
  506. {
  507. nsAutoString txtURL, desc;
  508. int32_t resultReplaceBefore, resultReplaceAfter;
  509. CalculateURLBoundaries(aInString, aInLength, pos, whathasbeendone, check, start, end,
  510. txtURL, desc,
  511. resultReplaceBefore, resultReplaceAfter);
  512. if (aInString[pos] != ':')
  513. {
  514. nsAutoString temp = txtURL;
  515. txtURL.SetLength(0);
  516. CompleteAbbreviatedURL(temp.get(),temp.Length(), pos - start, txtURL);
  517. }
  518. if (!txtURL.IsEmpty() && CheckURLAndCreateHTML(txtURL, desc, check,
  519. outputHTML))
  520. {
  521. replaceBefore = resultReplaceBefore;
  522. replaceAfter = resultReplaceAfter;
  523. state[check] = success;
  524. }
  525. } // if
  526. } // for
  527. return state[check] == success;
  528. }
  529. bool
  530. mozTXTToHTMLConv::ItMatchesDelimited(const char16_t * aInString,
  531. int32_t aInLength, const char16_t* rep, int32_t aRepLen,
  532. LIMTYPE before, LIMTYPE after)
  533. {
  534. // this little method gets called a LOT. I found we were spending a
  535. // lot of time just calculating the length of the variable "rep"
  536. // over and over again every time we called it. So we're now passing
  537. // an integer in here.
  538. int32_t textLen = aInLength;
  539. if
  540. (
  541. ((before == LT_IGNORE && (after == LT_IGNORE || after == LT_DELIMITER))
  542. && textLen < aRepLen) ||
  543. ((before != LT_IGNORE || (after != LT_IGNORE && after != LT_DELIMITER))
  544. && textLen < aRepLen + 1) ||
  545. (before != LT_IGNORE && after != LT_IGNORE && after != LT_DELIMITER
  546. && textLen < aRepLen + 2)
  547. )
  548. return false;
  549. char16_t text0 = aInString[0];
  550. char16_t textAfterPos = aInString[aRepLen + (before == LT_IGNORE ? 0 : 1)];
  551. if
  552. (
  553. (before == LT_ALPHA
  554. && !nsCRT::IsAsciiAlpha(text0)) ||
  555. (before == LT_DIGIT
  556. && !nsCRT::IsAsciiDigit(text0)) ||
  557. (before == LT_DELIMITER
  558. &&
  559. (
  560. nsCRT::IsAsciiAlpha(text0) ||
  561. nsCRT::IsAsciiDigit(text0) ||
  562. text0 == *rep
  563. )) ||
  564. (after == LT_ALPHA
  565. && !nsCRT::IsAsciiAlpha(textAfterPos)) ||
  566. (after == LT_DIGIT
  567. && !nsCRT::IsAsciiDigit(textAfterPos)) ||
  568. (after == LT_DELIMITER
  569. &&
  570. (
  571. nsCRT::IsAsciiAlpha(textAfterPos) ||
  572. nsCRT::IsAsciiDigit(textAfterPos) ||
  573. textAfterPos == *rep
  574. )) ||
  575. !Substring(Substring(aInString, aInString+aInLength),
  576. (before == LT_IGNORE ? 0 : 1),
  577. aRepLen).Equals(Substring(rep, rep+aRepLen),
  578. nsCaseInsensitiveStringComparator())
  579. )
  580. return false;
  581. return true;
  582. }
  583. uint32_t
  584. mozTXTToHTMLConv::NumberOfMatches(const char16_t * aInString, int32_t aInStringLength,
  585. const char16_t* rep, int32_t aRepLen, LIMTYPE before, LIMTYPE after)
  586. {
  587. uint32_t result = 0;
  588. for (int32_t i = 0; i < aInStringLength; i++)
  589. {
  590. const char16_t * indexIntoString = &aInString[i];
  591. if (ItMatchesDelimited(indexIntoString, aInStringLength - i, rep, aRepLen, before, after))
  592. result++;
  593. }
  594. return result;
  595. }
  596. // NOTE: the converted html for the phrase is appended to aOutString
  597. // tagHTML and attributeHTML are plain ASCII (literal strings, in fact)
  598. bool
  599. mozTXTToHTMLConv::StructPhraseHit(const char16_t * aInString, int32_t aInStringLength, bool col0,
  600. const char16_t* tagTXT, int32_t aTagTXTLen,
  601. const char* tagHTML, const char* attributeHTML,
  602. nsString& aOutString, uint32_t& openTags)
  603. {
  604. /* We're searching for the following pattern:
  605. LT_DELIMITER - "*" - ALPHA -
  606. [ some text (maybe more "*"-pairs) - ALPHA ] "*" - LT_DELIMITER.
  607. <strong> is only inserted, if existence of a pair could be verified
  608. We use the first opening/closing tag, if we can choose */
  609. const char16_t * newOffset = aInString;
  610. int32_t newLength = aInStringLength;
  611. if (!col0) // skip the first element?
  612. {
  613. newOffset = &aInString[1];
  614. newLength = aInStringLength - 1;
  615. }
  616. // opening tag
  617. if
  618. (
  619. ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen,
  620. (col0 ? LT_IGNORE : LT_DELIMITER), LT_ALPHA) // is opening tag
  621. && NumberOfMatches(newOffset, newLength, tagTXT, aTagTXTLen,
  622. LT_ALPHA, LT_DELIMITER) // remaining closing tags
  623. > openTags
  624. )
  625. {
  626. openTags++;
  627. aOutString.Append('<');
  628. aOutString.AppendASCII(tagHTML);
  629. aOutString.Append(char16_t(' '));
  630. aOutString.AppendASCII(attributeHTML);
  631. aOutString.AppendLiteral("><span class=\"moz-txt-tag\">");
  632. aOutString.Append(tagTXT);
  633. aOutString.AppendLiteral("</span>");
  634. return true;
  635. }
  636. // closing tag
  637. else if (openTags > 0
  638. && ItMatchesDelimited(aInString, aInStringLength, tagTXT, aTagTXTLen, LT_ALPHA, LT_DELIMITER))
  639. {
  640. openTags--;
  641. aOutString.AppendLiteral("<span class=\"moz-txt-tag\">");
  642. aOutString.Append(tagTXT);
  643. aOutString.AppendLiteral("</span></");
  644. aOutString.AppendASCII(tagHTML);
  645. aOutString.Append(char16_t('>'));
  646. return true;
  647. }
  648. return false;
  649. }
  650. bool
  651. mozTXTToHTMLConv::SmilyHit(const char16_t * aInString, int32_t aLength, bool col0,
  652. const char* tagTXT, const char* imageName,
  653. nsString& outputHTML, int32_t& glyphTextLen)
  654. {
  655. if ( !aInString || !tagTXT || !imageName )
  656. return false;
  657. int32_t tagLen = strlen(tagTXT);
  658. uint32_t delim = (col0 ? 0 : 1) + tagLen;
  659. if
  660. (
  661. (col0 || IsSpace(aInString[0]))
  662. &&
  663. (
  664. aLength <= int32_t(delim) ||
  665. IsSpace(aInString[delim]) ||
  666. (aLength > int32_t(delim + 1)
  667. &&
  668. (
  669. aInString[delim] == '.' ||
  670. aInString[delim] == ',' ||
  671. aInString[delim] == ';' ||
  672. aInString[delim] == '8' ||
  673. aInString[delim] == '>' ||
  674. aInString[delim] == '!' ||
  675. aInString[delim] == '?'
  676. )
  677. && IsSpace(aInString[delim + 1]))
  678. )
  679. && ItMatchesDelimited(aInString, aLength, NS_ConvertASCIItoUTF16(tagTXT).get(), tagLen,
  680. col0 ? LT_IGNORE : LT_DELIMITER, LT_IGNORE)
  681. // Note: tests at different pos for LT_IGNORE and LT_DELIMITER
  682. )
  683. {
  684. if (!col0)
  685. {
  686. outputHTML.Truncate();
  687. outputHTML.Append(char16_t(' '));
  688. }
  689. outputHTML.AppendLiteral("<span class=\""); // <span class="
  690. AppendASCIItoUTF16(imageName, outputHTML); // e.g. smiley-frown
  691. outputHTML.AppendLiteral("\" title=\""); // " title="
  692. AppendASCIItoUTF16(tagTXT, outputHTML); // smiley tooltip
  693. outputHTML.AppendLiteral("\"><span>"); // "><span>
  694. AppendASCIItoUTF16(tagTXT, outputHTML); // original text
  695. outputHTML.AppendLiteral("</span></span>"); // </span></span>
  696. glyphTextLen = (col0 ? 0 : 1) + tagLen;
  697. return true;
  698. }
  699. return false;
  700. }
  701. // the glyph is appended to aOutputString instead of the original string...
  702. bool
  703. mozTXTToHTMLConv::GlyphHit(const char16_t * aInString, int32_t aInLength, bool col0,
  704. nsString& aOutputString, int32_t& glyphTextLen)
  705. {
  706. char16_t text0 = aInString[0];
  707. char16_t text1 = aInString[1];
  708. char16_t firstChar = (col0 ? text0 : text1);
  709. // temporary variable used to store the glyph html text
  710. nsAutoString outputHTML;
  711. bool bTestSmilie;
  712. bool bArg = false;
  713. int i;
  714. // refactor some of this mess to avoid code duplication and speed execution a bit
  715. // there are two cases that need to be tried one after another. To avoid a lot of
  716. // duplicate code, rolling into a loop
  717. i = 0;
  718. while ( i < 2 )
  719. {
  720. bTestSmilie = false;
  721. if ( !i && (firstChar == ':' || firstChar == ';' || firstChar == '=' || firstChar == '>' || firstChar == '8' || firstChar == 'O'))
  722. {
  723. // first test passed
  724. bTestSmilie = true;
  725. bArg = col0;
  726. }
  727. if ( i && col0 && ( text1 == ':' || text1 == ';' || text1 == '=' || text1 == '>' || text1 == '8' || text1 == 'O' ) )
  728. {
  729. // second test passed
  730. bTestSmilie = true;
  731. bArg = false;
  732. }
  733. if ( bTestSmilie && (
  734. SmilyHit(aInString, aInLength, bArg,
  735. ":-)",
  736. "moz-smiley-s1", // smile
  737. outputHTML, glyphTextLen) ||
  738. SmilyHit(aInString, aInLength, bArg,
  739. ":)",
  740. "moz-smiley-s1", // smile
  741. outputHTML, glyphTextLen) ||
  742. SmilyHit(aInString, aInLength, bArg,
  743. ":-D",
  744. "moz-smiley-s5", // laughing
  745. outputHTML, glyphTextLen) ||
  746. SmilyHit(aInString, aInLength, bArg,
  747. ":-(",
  748. "moz-smiley-s2", // frown
  749. outputHTML, glyphTextLen) ||
  750. SmilyHit(aInString, aInLength, bArg,
  751. ":(",
  752. "moz-smiley-s2", // frown
  753. outputHTML, glyphTextLen) ||
  754. SmilyHit(aInString, aInLength, bArg,
  755. ":-[",
  756. "moz-smiley-s6", // embarassed
  757. outputHTML, glyphTextLen) ||
  758. SmilyHit(aInString, aInLength, bArg,
  759. ";-)",
  760. "moz-smiley-s3", // wink
  761. outputHTML, glyphTextLen) ||
  762. SmilyHit(aInString, aInLength, col0,
  763. ";)",
  764. "moz-smiley-s3", // wink
  765. outputHTML, glyphTextLen) ||
  766. SmilyHit(aInString, aInLength, bArg,
  767. ":-\\",
  768. "moz-smiley-s7", // undecided
  769. outputHTML, glyphTextLen) ||
  770. SmilyHit(aInString, aInLength, bArg,
  771. ":-P",
  772. "moz-smiley-s4", // tongue
  773. outputHTML, glyphTextLen) ||
  774. SmilyHit(aInString, aInLength, bArg,
  775. ";-P",
  776. "moz-smiley-s4", // tongue
  777. outputHTML, glyphTextLen) ||
  778. SmilyHit(aInString, aInLength, bArg,
  779. "=-O",
  780. "moz-smiley-s8", // surprise
  781. outputHTML, glyphTextLen) ||
  782. SmilyHit(aInString, aInLength, bArg,
  783. ":-*",
  784. "moz-smiley-s9", // kiss
  785. outputHTML, glyphTextLen) ||
  786. SmilyHit(aInString, aInLength, bArg,
  787. ">:o",
  788. "moz-smiley-s10", // yell
  789. outputHTML, glyphTextLen) ||
  790. SmilyHit(aInString, aInLength, bArg,
  791. ">:-o",
  792. "moz-smiley-s10", // yell
  793. outputHTML, glyphTextLen) ||
  794. SmilyHit(aInString, aInLength, bArg,
  795. "8-)",
  796. "moz-smiley-s11", // cool
  797. outputHTML, glyphTextLen) ||
  798. SmilyHit(aInString, aInLength, bArg,
  799. ":-$",
  800. "moz-smiley-s12", // money
  801. outputHTML, glyphTextLen) ||
  802. SmilyHit(aInString, aInLength, bArg,
  803. ":-!",
  804. "moz-smiley-s13", // foot
  805. outputHTML, glyphTextLen) ||
  806. SmilyHit(aInString, aInLength, bArg,
  807. "O:-)",
  808. "moz-smiley-s14", // innocent
  809. outputHTML, glyphTextLen) ||
  810. SmilyHit(aInString, aInLength, bArg,
  811. ":'(",
  812. "moz-smiley-s15", // cry
  813. outputHTML, glyphTextLen) ||
  814. SmilyHit(aInString, aInLength, bArg,
  815. ":-X",
  816. "moz-smiley-s16", // sealed
  817. outputHTML, glyphTextLen)
  818. )
  819. )
  820. {
  821. aOutputString.Append(outputHTML);
  822. return true;
  823. }
  824. i++;
  825. }
  826. if (text0 == '\f')
  827. {
  828. aOutputString.AppendLiteral("<span class='moz-txt-formfeed'></span>");
  829. glyphTextLen = 1;
  830. return true;
  831. }
  832. if (text0 == '+' || text1 == '+')
  833. {
  834. if (ItMatchesDelimited(aInString, aInLength,
  835. u" +/-", 4,
  836. LT_IGNORE, LT_IGNORE))
  837. {
  838. aOutputString.AppendLiteral(" &plusmn;");
  839. glyphTextLen = 4;
  840. return true;
  841. }
  842. if (col0 && ItMatchesDelimited(aInString, aInLength,
  843. u"+/-", 3,
  844. LT_IGNORE, LT_IGNORE))
  845. {
  846. aOutputString.AppendLiteral("&plusmn;");
  847. glyphTextLen = 3;
  848. return true;
  849. }
  850. }
  851. // x^2 => x<sup>2</sup>, also handle powers x^-2, x^0.5
  852. // implement regular expression /[\dA-Za-z\)\]}]\^-?\d+(\.\d+)*[^\dA-Za-z]/
  853. if
  854. (
  855. text1 == '^'
  856. &&
  857. (
  858. nsCRT::IsAsciiDigit(text0) || nsCRT::IsAsciiAlpha(text0) ||
  859. text0 == ')' || text0 == ']' || text0 == '}'
  860. )
  861. &&
  862. (
  863. (2 < aInLength && nsCRT::IsAsciiDigit(aInString[2])) ||
  864. (3 < aInLength && aInString[2] == '-' && nsCRT::IsAsciiDigit(aInString[3]))
  865. )
  866. )
  867. {
  868. // Find first non-digit
  869. int32_t delimPos = 3; // skip "^" and first digit (or '-')
  870. for (; delimPos < aInLength
  871. &&
  872. (
  873. nsCRT::IsAsciiDigit(aInString[delimPos]) ||
  874. (aInString[delimPos] == '.' && delimPos + 1 < aInLength &&
  875. nsCRT::IsAsciiDigit(aInString[delimPos + 1]))
  876. );
  877. delimPos++)
  878. ;
  879. if (delimPos < aInLength && nsCRT::IsAsciiAlpha(aInString[delimPos]))
  880. {
  881. return false;
  882. }
  883. outputHTML.Truncate();
  884. outputHTML += text0;
  885. outputHTML.AppendLiteral(
  886. "<sup class=\"moz-txt-sup\">"
  887. "<span style=\"display:inline-block;width:0;height:0;overflow:hidden\">"
  888. "^</span>");
  889. aOutputString.Append(outputHTML);
  890. aOutputString.Append(&aInString[2], delimPos - 2);
  891. aOutputString.AppendLiteral("</sup>");
  892. glyphTextLen = delimPos /* - 1 + 1 */ ;
  893. return true;
  894. }
  895. /*
  896. The following strings are not substituted:
  897. |TXT |HTML |Reason
  898. +------+---------+----------
  899. -> &larr; Bug #454
  900. => &lArr; dito
  901. <- &rarr; dito
  902. <= &rArr; dito
  903. (tm) &trade; dito
  904. 1/4 &frac14; is triggered by 1/4 Part 1, 2/4 Part 2, ...
  905. 3/4 &frac34; dito
  906. 1/2 &frac12; similar
  907. */
  908. return false;
  909. }
  910. /***************************************************************************
  911. Library-internal Interface
  912. ****************************************************************************/
  913. mozTXTToHTMLConv::mozTXTToHTMLConv()
  914. {
  915. }
  916. mozTXTToHTMLConv::~mozTXTToHTMLConv()
  917. {
  918. }
  919. NS_IMPL_ISUPPORTS(mozTXTToHTMLConv,
  920. mozITXTToHTMLConv,
  921. nsIStreamConverter,
  922. nsIStreamListener,
  923. nsIRequestObserver)
  924. int32_t
  925. mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line,
  926. uint32_t& logLineStart)
  927. {
  928. int32_t result = 0;
  929. int32_t lineLength = NS_strlen(line);
  930. bool moreCites = true;
  931. while (moreCites)
  932. {
  933. /* E.g. the following lines count as quote:
  934. > text
  935. //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
  936. >text
  937. //#ifdef QUOTE_RECOGNITION_AGGRESSIVE
  938. > text
  939. ] text
  940. USER> text
  941. USER] text
  942. //#endif
  943. logLineStart is the position of "t" in this example
  944. */
  945. uint32_t i = logLineStart;
  946. #ifdef QUOTE_RECOGNITION_AGGRESSIVE
  947. for (; int32_t(i) < lineLength && IsSpace(line[i]); i++)
  948. ;
  949. for (; int32_t(i) < lineLength && nsCRT::IsAsciiAlpha(line[i])
  950. && nsCRT::IsUpper(line[i]) ; i++)
  951. ;
  952. if (int32_t(i) < lineLength && (line[i] == '>' || line[i] == ']'))
  953. #else
  954. if (int32_t(i) < lineLength && line[i] == '>')
  955. #endif
  956. {
  957. i++;
  958. if (int32_t(i) < lineLength && line[i] == ' ')
  959. i++;
  960. // sendmail/mbox
  961. // Placed here for performance increase
  962. const char16_t * indexString = &line[logLineStart];
  963. // here, |logLineStart < lineLength| is always true
  964. uint32_t minlength = std::min(uint32_t(6), NS_strlen(indexString));
  965. if (Substring(indexString,
  966. indexString+minlength).Equals(Substring(NS_LITERAL_STRING(">From "), 0, minlength),
  967. nsCaseInsensitiveStringComparator()))
  968. //XXX RFC2646
  969. moreCites = false;
  970. else
  971. {
  972. result++;
  973. logLineStart = i;
  974. }
  975. }
  976. else
  977. moreCites = false;
  978. }
  979. return result;
  980. }
  981. void
  982. mozTXTToHTMLConv::ScanTXT(const char16_t * aInString, int32_t aInStringLength, uint32_t whattodo, nsString& aOutString)
  983. {
  984. bool doURLs = 0 != (whattodo & kURLs);
  985. bool doGlyphSubstitution = 0 != (whattodo & kGlyphSubstitution);
  986. bool doStructPhrase = 0 != (whattodo & kStructPhrase);
  987. uint32_t structPhrase_strong = 0; // Number of currently open tags
  988. uint32_t structPhrase_underline = 0;
  989. uint32_t structPhrase_italic = 0;
  990. uint32_t structPhrase_code = 0;
  991. nsAutoString outputHTML; // moved here for performance increase
  992. for(uint32_t i = 0; int32_t(i) < aInStringLength;)
  993. {
  994. if (doGlyphSubstitution)
  995. {
  996. int32_t glyphTextLen;
  997. if (GlyphHit(&aInString[i], aInStringLength - i, i == 0, aOutString, glyphTextLen))
  998. {
  999. i += glyphTextLen;
  1000. continue;
  1001. }
  1002. }
  1003. if (doStructPhrase)
  1004. {
  1005. const char16_t * newOffset = aInString;
  1006. int32_t newLength = aInStringLength;
  1007. if (i > 0 ) // skip the first element?
  1008. {
  1009. newOffset = &aInString[i-1];
  1010. newLength = aInStringLength - i + 1;
  1011. }
  1012. switch (aInString[i]) // Performance increase
  1013. {
  1014. case '*':
  1015. if (StructPhraseHit(newOffset, newLength, i == 0,
  1016. u"*", 1,
  1017. "b", "class=\"moz-txt-star\"",
  1018. aOutString, structPhrase_strong))
  1019. {
  1020. i++;
  1021. continue;
  1022. }
  1023. break;
  1024. case '/':
  1025. if (StructPhraseHit(newOffset, newLength, i == 0,
  1026. u"/", 1,
  1027. "i", "class=\"moz-txt-slash\"",
  1028. aOutString, structPhrase_italic))
  1029. {
  1030. i++;
  1031. continue;
  1032. }
  1033. break;
  1034. case '_':
  1035. if (StructPhraseHit(newOffset, newLength, i == 0,
  1036. u"_", 1,
  1037. "span" /* <u> is deprecated */,
  1038. "class=\"moz-txt-underscore\"",
  1039. aOutString, structPhrase_underline))
  1040. {
  1041. i++;
  1042. continue;
  1043. }
  1044. break;
  1045. case '|':
  1046. if (StructPhraseHit(newOffset, newLength, i == 0,
  1047. u"|", 1,
  1048. "code", "class=\"moz-txt-verticalline\"",
  1049. aOutString, structPhrase_code))
  1050. {
  1051. i++;
  1052. continue;
  1053. }
  1054. break;
  1055. }
  1056. }
  1057. if (doURLs)
  1058. {
  1059. switch (aInString[i])
  1060. {
  1061. case ':':
  1062. case '@':
  1063. case '.':
  1064. if ( (i == 0 || ((i > 0) && aInString[i - 1] != ' ')) && aInString[i +1] != ' ') // Performance increase
  1065. {
  1066. int32_t replaceBefore;
  1067. int32_t replaceAfter;
  1068. if (FindURL(aInString, aInStringLength, i, whattodo,
  1069. outputHTML, replaceBefore, replaceAfter)
  1070. && structPhrase_strong + structPhrase_italic +
  1071. structPhrase_underline + structPhrase_code == 0
  1072. /* workaround for bug #19445 */ )
  1073. {
  1074. aOutString.Cut(aOutString.Length() - replaceBefore, replaceBefore);
  1075. aOutString += outputHTML;
  1076. i += replaceAfter + 1;
  1077. continue;
  1078. }
  1079. }
  1080. break;
  1081. } //switch
  1082. }
  1083. switch (aInString[i])
  1084. {
  1085. // Special symbols
  1086. case '<':
  1087. case '>':
  1088. case '&':
  1089. EscapeChar(aInString[i], aOutString, false);
  1090. i++;
  1091. break;
  1092. // Normal characters
  1093. default:
  1094. aOutString += aInString[i];
  1095. i++;
  1096. break;
  1097. }
  1098. }
  1099. }
  1100. void
  1101. mozTXTToHTMLConv::ScanHTML(nsString& aInString, uint32_t whattodo, nsString &aOutString)
  1102. {
  1103. // some common variables we were recalculating
  1104. // every time inside the for loop...
  1105. int32_t lengthOfInString = aInString.Length();
  1106. const char16_t * uniBuffer = aInString.get();
  1107. #ifdef DEBUG_BenB_Perf
  1108. PRTime parsing_start = PR_IntervalNow();
  1109. #endif
  1110. // Look for simple entities not included in a tags and scan them.
  1111. // Skip all tags ("<[...]>") and content in an a link tag ("<a [...]</a>"),
  1112. // comment tag ("<!--[...]-->"), style tag, script tag or head tag.
  1113. // Unescape the rest (text between tags) and pass it to ScanTXT.
  1114. nsAutoCString canFollow(" \f\n\r\t>");
  1115. for (int32_t i = 0; i < lengthOfInString;)
  1116. {
  1117. if (aInString[i] == '<') // html tag
  1118. {
  1119. int32_t start = i;
  1120. if (i + 2 < lengthOfInString &&
  1121. nsCRT::ToLower(aInString[i + 1]) == 'a' &&
  1122. canFollow.FindChar(aInString[i + 2]) != kNotFound)
  1123. // if a tag, skip until </a>.
  1124. // Make sure there's a white-space character after, not to match "abbr".
  1125. {
  1126. i = aInString.Find("</a>", true, i);
  1127. if (i == kNotFound)
  1128. i = lengthOfInString;
  1129. else
  1130. i += 4;
  1131. }
  1132. else if (Substring(aInString, i + 1, 3).LowerCaseEqualsASCII("!--"))
  1133. // if out-commended code, skip until -->
  1134. {
  1135. i = aInString.Find("-->", false, i);
  1136. if (i == kNotFound)
  1137. i = lengthOfInString;
  1138. else
  1139. i += 3;
  1140. }
  1141. else if (i + 6 < lengthOfInString &&
  1142. Substring(aInString, i + 1, 5).LowerCaseEqualsASCII("style") &&
  1143. canFollow.FindChar(aInString[i + 6]) != kNotFound)
  1144. // if style tag, skip until </style>
  1145. {
  1146. i = aInString.Find("</style>", true, i);
  1147. if (i == kNotFound)
  1148. i = lengthOfInString;
  1149. else
  1150. i += 8;
  1151. }
  1152. else if (i + 7 < lengthOfInString &&
  1153. Substring(aInString, i + 1, 6).LowerCaseEqualsASCII("script") &&
  1154. canFollow.FindChar(aInString[i + 7]) != kNotFound)
  1155. // if script tag, skip until </script>
  1156. {
  1157. i = aInString.Find("</script>", true, i);
  1158. if (i == kNotFound)
  1159. i = lengthOfInString;
  1160. else
  1161. i += 9;
  1162. }
  1163. else if (i + 5 < lengthOfInString &&
  1164. Substring(aInString, i + 1, 4).LowerCaseEqualsASCII("head") &&
  1165. canFollow.FindChar(aInString[i + 5]) != kNotFound)
  1166. // if head tag, skip until </head>
  1167. // Make sure not to match <header>.
  1168. {
  1169. i = aInString.Find("</head>", true, i);
  1170. if (i == kNotFound)
  1171. i = lengthOfInString;
  1172. else
  1173. i += 7;
  1174. }
  1175. else // just skip tag (attributes etc.)
  1176. {
  1177. i = aInString.FindChar('>', i);
  1178. if (i == kNotFound)
  1179. i = lengthOfInString;
  1180. else
  1181. i++;
  1182. }
  1183. aOutString.Append(&uniBuffer[start], i - start);
  1184. }
  1185. else
  1186. {
  1187. uint32_t start = uint32_t(i);
  1188. i = aInString.FindChar('<', i);
  1189. if (i == kNotFound)
  1190. i = lengthOfInString;
  1191. nsString tempString;
  1192. tempString.SetCapacity(uint32_t((uint32_t(i) - start) * growthRate));
  1193. UnescapeStr(uniBuffer, start, uint32_t(i) - start, tempString);
  1194. ScanTXT(tempString.get(), tempString.Length(), whattodo, aOutString);
  1195. }
  1196. }
  1197. #ifdef DEBUG_BenB_Perf
  1198. printf("ScanHTML time: %d ms\n", PR_IntervalToMilliseconds(PR_IntervalNow() - parsing_start));
  1199. #endif
  1200. }
  1201. /****************************************************************************
  1202. XPCOM Interface
  1203. *****************************************************************************/
  1204. NS_IMETHODIMP
  1205. mozTXTToHTMLConv::Convert(nsIInputStream *aFromStream,
  1206. const char *aFromType,
  1207. const char *aToType,
  1208. nsISupports *aCtxt, nsIInputStream **_retval)
  1209. {
  1210. return NS_ERROR_NOT_IMPLEMENTED;
  1211. }
  1212. NS_IMETHODIMP
  1213. mozTXTToHTMLConv::AsyncConvertData(const char *aFromType,
  1214. const char *aToType,
  1215. nsIStreamListener *aListener, nsISupports *aCtxt) {
  1216. return NS_ERROR_NOT_IMPLEMENTED;
  1217. }
  1218. NS_IMETHODIMP
  1219. mozTXTToHTMLConv::OnDataAvailable(nsIRequest* request, nsISupports *ctxt,
  1220. nsIInputStream *inStr, uint64_t sourceOffset,
  1221. uint32_t count)
  1222. {
  1223. return NS_ERROR_NOT_IMPLEMENTED;
  1224. }
  1225. NS_IMETHODIMP
  1226. mozTXTToHTMLConv::OnStartRequest(nsIRequest* request, nsISupports *ctxt)
  1227. {
  1228. return NS_ERROR_NOT_IMPLEMENTED;
  1229. }
  1230. NS_IMETHODIMP
  1231. mozTXTToHTMLConv::OnStopRequest(nsIRequest* request, nsISupports *ctxt,
  1232. nsresult aStatus)
  1233. {
  1234. return NS_ERROR_NOT_IMPLEMENTED;
  1235. }
  1236. NS_IMETHODIMP
  1237. mozTXTToHTMLConv::CiteLevelTXT(const char16_t *line, uint32_t *logLineStart,
  1238. uint32_t *_retval)
  1239. {
  1240. if (!logLineStart || !_retval || !line)
  1241. return NS_ERROR_NULL_POINTER;
  1242. *_retval = CiteLevelTXT(line, *logLineStart);
  1243. return NS_OK;
  1244. }
  1245. NS_IMETHODIMP
  1246. mozTXTToHTMLConv::ScanTXT(const char16_t *text, uint32_t whattodo,
  1247. char16_t **_retval)
  1248. {
  1249. NS_ENSURE_ARG(text);
  1250. // FIX ME!!!
  1251. nsString outString;
  1252. int32_t inLength = NS_strlen(text);
  1253. // by setting a large capacity up front, we save time
  1254. // when appending characters to the output string because we don't
  1255. // need to reallocate and re-copy the characters already in the out String.
  1256. NS_ASSERTION(inLength, "ScanTXT passed 0 length string");
  1257. if (inLength == 0) {
  1258. *_retval = NS_strdup(text);
  1259. return NS_OK;
  1260. }
  1261. outString.SetCapacity(uint32_t(inLength * growthRate));
  1262. ScanTXT(text, inLength, whattodo, outString);
  1263. *_retval = ToNewUnicode(outString);
  1264. return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
  1265. }
  1266. NS_IMETHODIMP
  1267. mozTXTToHTMLConv::ScanHTML(const char16_t *text, uint32_t whattodo,
  1268. char16_t **_retval)
  1269. {
  1270. NS_ENSURE_ARG(text);
  1271. // FIX ME!!!
  1272. nsString outString;
  1273. nsString inString (text); // look at this nasty extra copy of the entire input buffer!
  1274. outString.SetCapacity(uint32_t(inString.Length() * growthRate));
  1275. ScanHTML(inString, whattodo, outString);
  1276. *_retval = ToNewUnicode(outString);
  1277. return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
  1278. }
  1279. nsresult
  1280. MOZ_NewTXTToHTMLConv(mozTXTToHTMLConv** aConv)
  1281. {
  1282. NS_PRECONDITION(aConv != nullptr, "null ptr");
  1283. if (!aConv)
  1284. return NS_ERROR_NULL_POINTER;
  1285. *aConv = new mozTXTToHTMLConv();
  1286. if (!*aConv)
  1287. return NS_ERROR_OUT_OF_MEMORY;
  1288. NS_ADDREF(*aConv);
  1289. // return (*aConv)->Init();
  1290. return NS_OK;
  1291. }