123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552 |
- /*
- * tedi2lang main file
- * Copyright (C) <2022> <alkeon> [alkeon@autistici.org]
-
- * Texdi is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Texdi is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with tedi2lang. If not, see <http://www.gnu.org/licenses/>.
- */
- #include <iostream>
- #include <string>
- #include <fstream>
- #include <sstream>
- #include "tedi2lang.h"
- #include "exception.h"
- using namespace std;
- #define BLOCK 2
- #define IMAGE 1
- #define LINK 0
- #define NO_TAG -1
- tedi2lang::tedi2lang(tags_definition td):
-
- _start_heading_first_level_tag(td.start_heading_first_level_tag),
- _start_heading_second_level_tag(td.start_heading_second_level_tag),
- _start_heading_third_level_tag(td.start_heading_third_level_tag),
- _start_heading_fourth_level_tag(td.start_heading_fourth_level_tag),
-
- _end_heading_first_level_tag(td.end_heading_first_level_tag),
- _end_heading_second_level_tag(td.end_heading_second_level_tag),
- _end_heading_third_level_tag(td.end_heading_third_level_tag),
- _end_heading_fourth_level_tag(td.end_heading_fourth_level_tag),
-
- _start_list_tag(td.start_list_tag),
- _list_item_tag(td.list_item_tag),
- _end_list_tag(td.end_list_tag),
-
- _start_container_tag(td.start_container_tag),
- _middle_container_tag(td.middle_container_tag),
- _end_container_tag(td.end_container_tag),
-
- _start_link_tag(td.start_link_tag),
- _middle_link_tag(td.middle_link_tag),
- _end_link_tag(td.end_link_tag),
-
- _start_image_tag(td.start_image_tag),
- _middle_image_tag(td.middle_image_tag),
- _end_image_tag(td.end_image_tag),
-
- _start_table_tag(td.start_table_tag),
- _end_table_tag(td.end_table_tag),
- _start_table_row_tag(td.start_table_row_tag),
- _end_table_row_tag(td.end_table_row_tag),
- _start_table_data_tag(td.start_table_data_tag),
- _end_table_data_tag(td.end_table_data_tag),
- _end_paragraph_tag(td.end_paragraph_tag) {}
- /*
- * Add header, iterate through text and finish with footer
- *
- */
- string tedi2lang::convert(string text, string header, string footer) {
- stringstream index(text);
- string line, return_text = header;
- _open_brackets = 0;
- _is_converting_table = false;
- _is_unordered_list = 0;
- while(getline(index, line)) {
- _has_block = false;
- return_text += convert_line(line);
- }
- if(_open_brackets > 0)
- throw Invalid("Missing '}' in document.", "End of file");
- if(_is_converting_table) return_text += _end_table_tag + "\n";
- return_text += footer;
- return return_text;
- }
- /*
- * Main logic converting line.
- *
- */
- string tedi2lang::convert_line(string& line) {
- if(line[0] != '<') {
- string end_table = convert_end_table(line);
- size_t hash = line.find('#');
- if(found(hash) && is_first_tag(line, hash))
- return end_table + convert_line_heading(line, hash);
- else {
- size_t ul = line.find("__");
- if(found(ul))
- return end_table + convert_line_list_start(line);
- else {
- size_t li = line.find("--");
- if(found(li) && _is_unordered_list)
- convert_line_list_item(line, li);
-
- size_t ul_end = line.find(",,");
- if(found(ul_end) && _is_unordered_list)
- return end_table + convert_line_list_end(line);
- else {
- if(line[0] == '"')
- convert_line_quote(line);
- else
- convert_unquoted_tags(line);
-
- size_t first_pipe = line.find("|");
- if(found(first_pipe))
- return convert_line_table(line, first_pipe);
- else
- return end_table + convert_line_ending(line);
-
- }
- }
- }
- } else
- return convert_end_table_control_tags(line) + convert_line_control_tags(line);
-
- }
- /*
- * Deletes heading tag and insert new heading tag
- *
- */
- string tedi2lang::convert_line_heading(string& line, size_t hash_position) {
- int level = 0;
- while(found(hash_position) && line[hash_position + level] == '#')
- ++level;
-
- if(hash_position + level >= line.size())
- throw Invalid("Unexpected heading size", line);
- else
- line = line.substr(hash_position + level, line.size() - 1);
-
- line = strip_escaping(line);
- switch(level) {
- case 1: line = _start_heading_first_level_tag + line + _end_heading_first_level_tag; break;
- case 2: line = _start_heading_second_level_tag + line + _end_heading_second_level_tag; break;
- case 3: line = _start_heading_third_level_tag + line + _end_heading_third_level_tag; break;
- case 4: line = _start_heading_fourth_level_tag + line + _end_heading_fourth_level_tag; break;
- default: line = _start_heading_fourth_level_tag + line + _end_heading_fourth_level_tag; break;
- }
-
- return (line + "\n");
- }
- // List tags
- string tedi2lang::convert_line_list_start(string& line) {
- ++_is_unordered_list;
- return _start_list_tag + "\n";
-
- }
- void tedi2lang::convert_line_list_item(string& line, size_t li) {
- line = line.erase(li, 2);
- line = line.insert(li, _list_item_tag);
-
- }
- string tedi2lang::convert_line_list_end(string& line) {
- --_is_unordered_list;
- return _end_list_tag + "\n";
-
- }
- void tedi2lang::convert_line_quote(string& line) {
- if(line[line.size() - 1] == '"')
- line = strip_escaping(line.substr(1, line.size() - 2));
- else
- throw Invalid("Missing end quotes.", line);
- }
- /*
- * Convert every unquoted tag (image, link and block)
- *
- */
- void tedi2lang::convert_unquoted_tags(string& line) {
- int tag = main_tag(line);
- while(tag != NO_TAG) {
- switch(tag) {
- case LINK : convert_line_link(line); break;
- case IMAGE : convert_line_image(line); break;
- case BLOCK : convert_line_block(line); break;
- }
- tag = main_tag(line);
- }
- check_ending_container(line);
- }
- /*
- * Convert tedi link to the lang link tag
- *
- */
- void tedi2lang::convert_line_link(string& line) {
- size_t start_tag = get_not_escaped_tag(line, "[(");
- if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
- line = line.erase(start_tag, 2);
- line = line.insert(start_tag, _start_link_tag);
- size_t end_parenthesis = get_not_escaped_tag(line, ") ");
- if(found(end_parenthesis) && !is_tag_escaped(line, end_parenthesis)) {
- line = line.erase(end_parenthesis, 2);
- line = line.insert(end_parenthesis, _middle_link_tag);
- int bracket = correct_position(line, end_parenthesis + _middle_link_tag.size(), '[', ']');
- if(found(bracket)) {
- line = line.erase(bracket, 1);
- line = line.insert(bracket, _end_link_tag);
- } else
- throw Invalid("Missing ']' in link tag.", line);
- } else
- throw Invalid("Missing ')' in link tag.", line);
- } else
- throw Invalid("Missing link tag.", line);
- }
- /*
- * Deletes heading tag and insert image tag
- *
- */
- void tedi2lang::convert_line_image(string& line) {
- size_t start_tag = get_not_escaped_tag(line, "([");
- if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
- line = line.erase(start_tag, 2);
- line = line.insert(start_tag, _start_image_tag);
- size_t square_bracket = get_not_escaped_tag(line, "] ");
- if(found(square_bracket) && !is_tag_escaped(line, square_bracket)) {
- line = line.erase(square_bracket, 2);
- line = line.insert(square_bracket, _middle_image_tag);
- int last_bracket = correct_position(line, square_bracket + _middle_image_tag.size(), '(', ')');
- if(found(last_bracket)) {
- line = line.erase(last_bracket, 1);
- line = line.insert(last_bracket, _end_image_tag);
- } else
- throw Invalid("Missing ')' in images tag.",line);
- } else
- throw Invalid("Missing ']' in images tag.",line);
- } else
- throw Invalid("Missing images tag.",line);
- }
- /*
- * Deletes block tag and insert block tag
- *
- */
- void tedi2lang::convert_line_block(string& line) {
- size_t start_tag = get_not_escaped_tag(line, "{(");
- if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
- line = line.erase(start_tag, 2);
- line = line.insert(start_tag, _start_container_tag);
- start_tag = get_not_escaped_tag(line, ") ");
- if(found(start_tag) && !is_tag_escaped(line, start_tag)) {
- line = line.erase(start_tag, 2);
- line = line.insert(start_tag, _middle_container_tag);
- size_t end_tag = correct_position(line, start_tag + _middle_container_tag.size(), '{', '}');
- if(found(end_tag)) {
- line = line.erase(end_tag, 1);
- line = line.insert(end_tag, _end_container_tag);
- } else
- ++_open_brackets;
-
- } else
- throw Invalid("Missing ')' in block tag.",line);
- } else
- throw Invalid("Missing block tag.",line);
- _has_block = true;
- }
- /*
- * Start or continue table conversion
- *
- */
- string tedi2lang::convert_line_table(string& line, size_t first_pipe) {
- string return_text;
- if(!_is_converting_table) {
- if(first_pipe != line.rfind("|")) {
- return_text += _start_table_tag + "\n";
- _is_converting_table = true;
- return return_text + convert_line_table_row(line);
- } else
- return convert_line_ending(line);
- }else
- return return_text + convert_line_table_row(line);
-
- return return_text;
- }
- /*
- * Convert every cell of one table row
- *
- */
- string tedi2lang::convert_line_table_row(string& line) {
- string return_text = _start_table_row_tag + "\n";
- size_t pipe = line.find("|");
- if(found(pipe)) {
- ++pipe;
- line = line.substr(pipe, line.size() - 1);
- pipe = line.find("|");
- if(found(pipe)) {
- int size = line.size() - 1;
- if(line[size] == '|') {
- while(found(pipe) && line[size] == '|') {
- return_text += _start_table_data_tag + strip_escaping(line.substr(0, pipe)) + _end_table_data_tag + "\n";
- ++pipe;
- line = line.substr(pipe, size);
- pipe = line.find("|");
- size = line.size() - 1;
- }
- return return_text + _end_table_row_tag + "\n";
- } else
- throw Invalid("Table not correctly written\n"
- "Maybe there's a whitespace at end of line", line);
- } else
- throw Invalid("Expected '|' in table", line);
- } else
- throw Invalid("Expected '|' in table", line);
- }
- /*
- * Checks if there is a non-space character (every char except \n \t \r)
- * before the given position
- *
- */
- bool tedi2lang::is_first_tag(string& line, int position) {
- int i = 0;
- while(i < position && isspace(line[i]))
- ++i;
- return (i == position);
- }
- /*
- * End table if there isn't a pipe char and there's a open
- * table tag
- *
- */
- string tedi2lang::convert_end_table(string& line) {
- size_t pipe = line.find("|");
- if(pipe == string::npos && _is_converting_table) {
- _is_converting_table = false;
- return _end_table_tag + "\n";
- }
- return "";
- }
- /*
- *
- * End table if there's a open table tag
- * (Added for control tags that
- */
- string tedi2lang::convert_end_table_control_tags(string& line) {
- if(_is_converting_table) {
- _is_converting_table = false;
- return _end_table_tag + "\n";
- }
- return "";
- }
- /*
- * Check line ending and convert depending ending whitespace
- *
- */
- string tedi2lang::convert_line_ending(string& line) {
- if(line[line.size() - 1] == ' ')
- return strip_escaping(line) + _end_paragraph_tag + "\n";
- else
- return strip_escaping(line) + "\n";
-
- }
- /*
- * Control tags:
- * <!, <>, <+. <
- */
- string tedi2lang::convert_line_control_tags(string& line) {
- if(line[1] != '!') {
- if(line[1] == '>') {
- line = line.erase(0,2);
- return line + "\n";
- } else if(line[1] == '+') {
- line = line.erase(0,2);
- return line + _end_paragraph_tag + "\n";
- } else {
- line = line.erase(0,1);
- return line + "\n";
- }
- } else
- return "";
-
- }
- /*
- * Get next unquoted tag (link, image, container) from line
- *
- */
- int tedi2lang::main_tag(string& line) {
- int tag = NO_TAG;
- size_t line_size = line.size();
- size_t first_bracket = get_not_escaped_tag(line, "{(");
- size_t first_parenthesis = get_not_escaped_tag(line, "([");
- size_t first_square_bracket = get_not_escaped_tag(line, "[(");
- if(found(first_bracket) && first_bracket < line_size && !is_tag_escaped(line, first_bracket)) {
- tag = BLOCK;
- line_size = first_bracket;
- }
- if(found(first_square_bracket) && first_square_bracket < line_size && !is_tag_escaped(line, first_square_bracket)) {
- line_size = first_square_bracket;
- tag = LINK;
- }
- if(found(first_parenthesis) && first_parenthesis < line_size && !is_tag_escaped(line, first_parenthesis))
- tag = IMAGE;
- return tag;
- }
- /*
- * Get ending char position or -1 if doesn't exist
- *
- */
- size_t tedi2lang::correct_position(string line, unsigned position, char starting_char, char ending_char) {
- int char_founds = 1;
- while(char_founds != 0 && line.size() > position) {
- if(line[position] == starting_char)
- ++char_founds;
- else if(line[position] == ending_char && position == 0)
- --char_founds;
- else if(line[position] == ending_char && position > 0 && !is_tag_escaped(line, position))
- --char_founds;
- ++position;
- }
- if(line.size() == position && line[position - 1] != ending_char)
- return string::npos;
- else
- return position - 1;
- }
- /*
- * Convert every container ending if there are open container tags.
- *
- */
- void tedi2lang::check_ending_container(string& line) {
- size_t first_bracket_closed = line.find("}");
- size_t first_bracket_open = line.find("{");
- if(found(first_bracket_closed) && !is_tag_escaped(line, first_bracket_closed) && !found(first_bracket_open)) {
- if(_open_brackets == 0)
- throw Invalid("Missing '{' in block tag.", line);
- --_open_brackets;
- _has_block = true;
- line = line.erase(first_bracket_closed, 1);
- line = line.insert(first_bracket_closed, _end_container_tag);
- }
- }
- /*
- * Check if position is escaped with '\' char
- *
- */
- bool tedi2lang::is_tag_escaped(string& line, size_t position) {
- bool is_escaped = false;
- if(position > 0 && line[position - 1] == '\\')
- is_escaped = true;
-
- return is_escaped;
- }
- /*
- * Check if position is escaped with '\' char
- *
- */
- size_t tedi2lang::get_not_escaped_tag(string& line, string find_character, size_t position) {
- if(!found(position))
- position = line.find(find_character);
- while(found(position) && is_tag_escaped(line, position))
- position = line.find(find_character, position + 1);
-
- return position;
- }
-
- /*
- * Convert escaped characters to their equivalents
- *
- */
- string tedi2lang::strip_escaping(string line) {
- if(line.size() > 1) {
- replace(line, "\\\\", "\\");
- replace(line, "\\[", "[");
- replace(line, "\\]", "]");
- replace(line, "\\{", "{");
- replace(line, "\\}", "}");
- replace(line, "\\)", ")");
- replace(line, "\\(", "(");
- }
- return line;
- }
- /*
- * Easy to use replace method
- *
- */
- int tedi2lang::replace(string& source, const string& find, const string& replace) {
- int num = 0;
- size_t fLen = find.size();
- size_t rLen = replace.size();
- for (size_t pos = 0; (pos = source.find(find, pos)) != string::npos; pos += rLen) {
- num++;
- source.replace(pos, fLen, replace);
- }
- return num;
- }
|