tedi2lang.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. /*
  2. * tedi2lang main file
  3. * Copyright (C) <2022> <alkeon> [alkeon@autistici.org]
  4. * Texdi is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * Texdi is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with tedi2lang. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include <iostream>
  18. #include <string>
  19. #include <fstream>
  20. #include <sstream>
  21. #include "tedi2lang.h"
  22. #include "exception.h"
  23. using namespace std;
  24. #define BLOCK 2
  25. #define IMAGE 1
  26. #define LINK 0
  27. #define NO_TAG -1
  28. tedi2lang::tedi2lang(tags_definition td):
  29. _start_heading_first_level_tag(td.start_heading_first_level_tag),
  30. _start_heading_second_level_tag(td.start_heading_second_level_tag),
  31. _start_heading_third_level_tag(td.start_heading_third_level_tag),
  32. _start_heading_fourth_level_tag(td.start_heading_fourth_level_tag),
  33. _end_heading_first_level_tag(td.end_heading_first_level_tag),
  34. _end_heading_second_level_tag(td.end_heading_second_level_tag),
  35. _end_heading_third_level_tag(td.end_heading_third_level_tag),
  36. _end_heading_fourth_level_tag(td.end_heading_fourth_level_tag),
  37. _start_list_tag(td.start_list_tag),
  38. _list_item_tag(td.list_item_tag),
  39. _end_list_tag(td.end_list_tag),
  40. _start_container_tag(td.start_container_tag),
  41. _middle_container_tag(td.middle_container_tag),
  42. _end_container_tag(td.end_container_tag),
  43. _start_link_tag(td.start_link_tag),
  44. _middle_link_tag(td.middle_link_tag),
  45. _end_link_tag(td.end_link_tag),
  46. _start_image_tag(td.start_image_tag),
  47. _middle_image_tag(td.middle_image_tag),
  48. _end_image_tag(td.end_image_tag),
  49. _start_table_tag(td.start_table_tag),
  50. _end_table_tag(td.end_table_tag),
  51. _start_table_row_tag(td.start_table_row_tag),
  52. _end_table_row_tag(td.end_table_row_tag),
  53. _start_table_data_tag(td.start_table_data_tag),
  54. _end_table_data_tag(td.end_table_data_tag),
  55. _end_paragraph_tag(td.end_paragraph_tag) {}
  56. /*
  57. * Add header, iterate through text and finish with footer
  58. *
  59. */
  60. string tedi2lang::convert(string text, string header, string footer) {
  61. stringstream index(text);
  62. string line, return_text = header;
  63. _open_brackets = 0;
  64. _is_converting_table = false;
  65. _is_unordered_list = 0;
  66. while(getline(index, line)) {
  67. _has_block = false;
  68. return_text += convert_line(line);
  69. }
  70. if(_open_brackets > 0)
  71. throw Invalid("Missing '}' in document.", "End of file");
  72. if(_is_converting_table) return_text += _end_table_tag + "\n";
  73. return_text += footer;
  74. return return_text;
  75. }
  76. /*
  77. * Main logic converting line.
  78. *
  79. */
  80. string tedi2lang::convert_line(string& line) {
  81. if(line[0] != '<') {
  82. string end_table = convert_end_table(line);
  83. size_t hash = line.find('#');
  84. if(found(hash) && is_first_tag(line, hash))
  85. return end_table + convert_line_heading(line, hash);
  86. else {
  87. size_t ul = line.find("__");
  88. if(found(ul))
  89. return end_table + convert_line_list_start(line);
  90. else {
  91. size_t li = line.find("--");
  92. if(found(li) && _is_unordered_list)
  93. convert_line_list_item(line, li);
  94. size_t ul_end = line.find(",,");
  95. if(found(ul_end) && _is_unordered_list)
  96. return end_table + convert_line_list_end(line);
  97. else {
  98. if(line[0] == '"')
  99. convert_line_quote(line);
  100. else
  101. convert_unquoted_tags(line);
  102. size_t first_pipe = line.find("|");
  103. if(found(first_pipe))
  104. return convert_line_table(line, first_pipe);
  105. else
  106. return end_table + convert_line_ending(line);
  107. }
  108. }
  109. }
  110. } else
  111. return convert_end_table_control_tags(line) + convert_line_control_tags(line);
  112. }
  113. /*
  114. * Deletes heading tag and insert new heading tag
  115. *
  116. */
  117. string tedi2lang::convert_line_heading(string& line, size_t hash_position) {
  118. int level = 0;
  119. while(found(hash_position) && line[hash_position + level] == '#')
  120. ++level;
  121. if(hash_position + level >= line.size())
  122. throw Invalid("Unexpected heading size", line);
  123. else
  124. line = line.substr(hash_position + level, line.size() - 1);
  125. line = strip_escaping(line);
  126. switch(level) {
  127. case 1: line = _start_heading_first_level_tag + line + _end_heading_first_level_tag; break;
  128. case 2: line = _start_heading_second_level_tag + line + _end_heading_second_level_tag; break;
  129. case 3: line = _start_heading_third_level_tag + line + _end_heading_third_level_tag; break;
  130. case 4: line = _start_heading_fourth_level_tag + line + _end_heading_fourth_level_tag; break;
  131. default: line = _start_heading_fourth_level_tag + line + _end_heading_fourth_level_tag; break;
  132. }
  133. return (line + "\n");
  134. }
  135. // List tags
  136. string tedi2lang::convert_line_list_start(string& line) {
  137. ++_is_unordered_list;
  138. return _start_list_tag + "\n";
  139. }
  140. void tedi2lang::convert_line_list_item(string& line, size_t li) {
  141. line = line.erase(li, 2);
  142. line = line.insert(li, _list_item_tag);
  143. }
  144. string tedi2lang::convert_line_list_end(string& line) {
  145. --_is_unordered_list;
  146. return _end_list_tag + "\n";
  147. }
  148. void tedi2lang::convert_line_quote(string& line) {
  149. if(line[line.size() - 1] == '"')
  150. line = strip_escaping(line.substr(1, line.size() - 2));
  151. else
  152. throw Invalid("Missing end quotes.", line);
  153. }
  154. /*
  155. * Convert every unquoted tag (image, link and block)
  156. *
  157. */
  158. void tedi2lang::convert_unquoted_tags(string& line) {
  159. int tag = main_tag(line);
  160. while(tag != NO_TAG) {
  161. switch(tag) {
  162. case LINK : convert_line_link(line); break;
  163. case IMAGE : convert_line_image(line); break;
  164. case BLOCK : convert_line_block(line); break;
  165. }
  166. tag = main_tag(line);
  167. }
  168. check_ending_container(line);
  169. }
  170. /*
  171. * Convert tedi link to the lang link tag
  172. *
  173. */
  174. void tedi2lang::convert_line_link(string& line) {
  175. size_t start_tag = get_not_escaped_tag(line, "[(");
  176. if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
  177. line = line.erase(start_tag, 2);
  178. line = line.insert(start_tag, _start_link_tag);
  179. size_t end_parenthesis = get_not_escaped_tag(line, ") ");
  180. if(found(end_parenthesis) && !is_tag_escaped(line, end_parenthesis)) {
  181. line = line.erase(end_parenthesis, 2);
  182. line = line.insert(end_parenthesis, _middle_link_tag);
  183. int bracket = correct_position(line, end_parenthesis + _middle_link_tag.size(), '[', ']');
  184. if(found(bracket)) {
  185. line = line.erase(bracket, 1);
  186. line = line.insert(bracket, _end_link_tag);
  187. } else
  188. throw Invalid("Missing ']' in link tag.", line);
  189. } else
  190. throw Invalid("Missing ')' in link tag.", line);
  191. } else
  192. throw Invalid("Missing link tag.", line);
  193. }
  194. /*
  195. * Deletes heading tag and insert image tag
  196. *
  197. */
  198. void tedi2lang::convert_line_image(string& line) {
  199. size_t start_tag = get_not_escaped_tag(line, "([");
  200. if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
  201. line = line.erase(start_tag, 2);
  202. line = line.insert(start_tag, _start_image_tag);
  203. size_t square_bracket = get_not_escaped_tag(line, "] ");
  204. if(found(square_bracket) && !is_tag_escaped(line, square_bracket)) {
  205. line = line.erase(square_bracket, 2);
  206. line = line.insert(square_bracket, _middle_image_tag);
  207. int last_bracket = correct_position(line, square_bracket + _middle_image_tag.size(), '(', ')');
  208. if(found(last_bracket)) {
  209. line = line.erase(last_bracket, 1);
  210. line = line.insert(last_bracket, _end_image_tag);
  211. } else
  212. throw Invalid("Missing ')' in images tag.",line);
  213. } else
  214. throw Invalid("Missing ']' in images tag.",line);
  215. } else
  216. throw Invalid("Missing images tag.",line);
  217. }
  218. /*
  219. * Deletes block tag and insert block tag
  220. *
  221. */
  222. void tedi2lang::convert_line_block(string& line) {
  223. size_t start_tag = get_not_escaped_tag(line, "{(");
  224. if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
  225. line = line.erase(start_tag, 2);
  226. line = line.insert(start_tag, _start_container_tag);
  227. start_tag = get_not_escaped_tag(line, ") ");
  228. if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
  229. line = line.erase(start_tag, 2);
  230. line = line.insert(start_tag, _middle_container_tag);
  231. size_t end_tag = correct_position(line, start_tag + _middle_container_tag.size(), '{', '}');
  232. if(found(end_tag)) {
  233. line = line.erase(end_tag, 1);
  234. line = line.insert(end_tag, _end_container_tag);
  235. } else
  236. ++_open_brackets;
  237. } else
  238. throw Invalid("Missing ')' in block tag.",line);
  239. } else
  240. throw Invalid("Missing block tag.",line);
  241. _has_block = true;
  242. }
  243. /*
  244. * Start or continue table conversion
  245. *
  246. */
  247. string tedi2lang::convert_line_table(string& line, size_t first_pipe) {
  248. string return_text;
  249. if(!_is_converting_table) {
  250. if(first_pipe != line.rfind("|")) {
  251. return_text += _start_table_tag + "\n";
  252. _is_converting_table = true;
  253. return return_text + convert_line_table_row(line);
  254. } else
  255. return convert_line_ending(line);
  256. }else
  257. return return_text + convert_line_table_row(line);
  258. return return_text;
  259. }
  260. /*
  261. * Convert every cell of one table row
  262. *
  263. */
  264. string tedi2lang::convert_line_table_row(string& line) {
  265. string return_text = _start_table_row_tag + "\n";
  266. size_t pipe = line.find("|");
  267. if(found(pipe)) {
  268. ++pipe;
  269. line = line.substr(pipe, line.size() - 1);
  270. pipe = line.find("|");
  271. if(found(pipe)) {
  272. int size = line.size() - 1;
  273. if(line[size] == '|') {
  274. while(found(pipe) && line[size] == '|') {
  275. return_text += _start_table_data_tag + strip_escaping(line.substr(0, pipe)) + _end_table_data_tag + "\n";
  276. ++pipe;
  277. line = line.substr(pipe, size);
  278. pipe = line.find("|");
  279. size = line.size() - 1;
  280. }
  281. return return_text + _end_table_row_tag + "\n";
  282. } else
  283. throw Invalid("Table not correctly written\n"
  284. "Maybe there's a whitespace at end of line", line);
  285. } else
  286. throw Invalid("Expected '|' in table", line);
  287. } else
  288. throw Invalid("Expected '|' in table", line);
  289. }
  290. /*
  291. * Checks if there is a non-space character (every char except \n \t \r)
  292. * before the given position
  293. *
  294. */
  295. bool tedi2lang::is_first_tag(string& line, int position) {
  296. int i = 0;
  297. while(i < position && isspace(line[i]))
  298. ++i;
  299. return (i == position);
  300. }
  301. /*
  302. * End table if there isn't a pipe char and there's a open
  303. * table tag
  304. *
  305. */
  306. string tedi2lang::convert_end_table(string& line) {
  307. size_t pipe = line.find("|");
  308. if(pipe == string::npos && _is_converting_table) {
  309. _is_converting_table = false;
  310. return _end_table_tag + "\n";
  311. }
  312. return "";
  313. }
  314. /*
  315. *
  316. * End table if there's a open table tag
  317. * (Added for control tags that
  318. */
  319. string tedi2lang::convert_end_table_control_tags(string& line) {
  320. if(_is_converting_table) {
  321. _is_converting_table = false;
  322. return _end_table_tag + "\n";
  323. }
  324. return "";
  325. }
  326. /*
  327. * Check line ending and convert depending ending whitespace
  328. *
  329. */
  330. string tedi2lang::convert_line_ending(string& line) {
  331. if(line[line.size() - 1] == ' ')
  332. return strip_escaping(line) + _end_paragraph_tag + "\n";
  333. else
  334. return strip_escaping(line) + "\n";
  335. }
  336. /*
  337. * Control tags:
  338. * <!, <>, <+. <
  339. */
  340. string tedi2lang::convert_line_control_tags(string& line) {
  341. if(line[1] != '!') {
  342. if(line[1] == '>') {
  343. line = line.erase(0,2);
  344. return line + "\n";
  345. } else if(line[1] == '+') {
  346. line = line.erase(0,2);
  347. return line + _end_paragraph_tag + "\n";
  348. } else {
  349. line = line.erase(0,1);
  350. return line + "\n";
  351. }
  352. } else
  353. return "";
  354. }
  355. /*
  356. * Get next unquoted tag (link, image, container) from line
  357. *
  358. */
  359. int tedi2lang::main_tag(string& line) {
  360. int tag = NO_TAG;
  361. size_t line_size = line.size();
  362. size_t first_bracket = get_not_escaped_tag(line, "{(");
  363. size_t first_parenthesis = get_not_escaped_tag(line, "([");
  364. size_t first_square_bracket = get_not_escaped_tag(line, "[(");
  365. if(found(first_bracket) && first_bracket < line_size && !is_tag_escaped(line, first_bracket)) {
  366. tag = BLOCK;
  367. line_size = first_bracket;
  368. }
  369. if(found(first_square_bracket) && first_square_bracket < line_size && !is_tag_escaped(line, first_square_bracket)) {
  370. line_size = first_square_bracket;
  371. tag = LINK;
  372. }
  373. if(found(first_parenthesis) && first_parenthesis < line_size && !is_tag_escaped(line, first_parenthesis))
  374. tag = IMAGE;
  375. return tag;
  376. }
  377. /*
  378. * Get ending char position or -1 if doesn't exist
  379. *
  380. */
  381. size_t tedi2lang::correct_position(string line, unsigned position, char starting_char, char ending_char) {
  382. int char_founds = 1;
  383. while(char_founds != 0 && line.size() > position) {
  384. if(line[position] == starting_char)
  385. ++char_founds;
  386. else if(line[position] == ending_char && position == 0)
  387. --char_founds;
  388. else if(line[position] == ending_char && position > 0 && !is_tag_escaped(line, position))
  389. --char_founds;
  390. ++position;
  391. }
  392. if(line.size() == position && line[position - 1] != ending_char)
  393. return string::npos;
  394. else
  395. return position - 1;
  396. }
  397. /*
  398. * Convert every container ending if there are open container tags.
  399. *
  400. */
  401. void tedi2lang::check_ending_container(string& line) {
  402. size_t first_bracket_closed = line.find("}");
  403. size_t first_bracket_open = line.find("{");
  404. if(found(first_bracket_closed) && !is_tag_escaped(line, first_bracket_closed) && !found(first_bracket_open)) {
  405. if(_open_brackets == 0)
  406. throw Invalid("Missing '{' in block tag.", line);
  407. --_open_brackets;
  408. _has_block = true;
  409. line = line.erase(first_bracket_closed, 1);
  410. line = line.insert(first_bracket_closed, _end_container_tag);
  411. }
  412. }
  413. /*
  414. * Check if position is escaped with '\' char
  415. *
  416. */
  417. bool tedi2lang::is_tag_escaped(string& line, size_t position) {
  418. bool is_escaped = false;
  419. if(position > 0 && line[position - 1] == '\\')
  420. is_escaped = true;
  421. return is_escaped;
  422. }
  423. /*
  424. * Check if position is escaped with '\' char
  425. *
  426. */
  427. size_t tedi2lang::get_not_escaped_tag(string& line, string find_character, size_t position) {
  428. if(!found(position))
  429. position = line.find(find_character);
  430. while(found(position) && is_tag_escaped(line, position))
  431. position = line.find(find_character, position + 1);
  432. return position;
  433. }
  434. /*
  435. * Convert escaped characters to their equivalents
  436. *
  437. */
  438. string tedi2lang::strip_escaping(string line) {
  439. if(line.size() > 1) {
  440. replace(line, "\\\\", "\\");
  441. replace(line, "\\[", "[");
  442. replace(line, "\\]", "]");
  443. replace(line, "\\{", "{");
  444. replace(line, "\\}", "}");
  445. replace(line, "\\)", ")");
  446. replace(line, "\\(", "(");
  447. }
  448. return line;
  449. }
  450. /*
  451. * Easy to use replace method
  452. *
  453. */
  454. int tedi2lang::replace(string& source, const string& find, const string& replace) {
  455. int num = 0;
  456. size_t fLen = find.size();
  457. size_t rLen = replace.size();
  458. for (size_t pos = 0; (pos = source.find(find, pos)) != string::npos; pos += rLen) {
  459. num++;
  460. source.replace(pos, fLen, replace);
  461. }
  462. return num;
  463. }