transtbl.cc 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. // Copyright (C) 2003 Mooffie <mooffie@typo.co.il>
  2. //
  3. // This program is free software; you can redistribute it and/or modify
  4. // it under the terms of the GNU General Public License as published by
  5. // the Free Software Foundation; either version 2 of the License, or
  6. // (at your option) any later version.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program; if not, write to the Free Software
  15. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
  16. #include <config.h>
  17. #include <stdio.h>
  18. #include <errno.h>
  19. #include "transtbl.h"
  20. #include "io.h" // set_last_error
  21. #include "dbg.h"
  22. // Most of the code below deals with parsing a TranslationTable
  23. // file. Such files consist of lines of the form:
  24. //
  25. // <character-from> <character-to>
  26. //
  27. // that map character-from to character-to.
  28. //
  29. // <character-xxx> can be in one of three forms:
  30. //
  31. // 1. ' literal-character '
  32. // 2. decimal-number .
  33. // 3. hex-number
  34. //
  35. // Examples:
  36. //
  37. // 'a' 5d0 # maps 'a' to Hebrew letter Alef
  38. // 'a' 1488. # the same
  39. // 'a' 'b' # maps 'a' to 'b'
  40. //
  41. // literal-character is UTF-8 encoded.
  42. // parse_next_char() - parses the next <character> token. (this is a
  43. // misnomer, because one might think we mean C's "char".)
  44. //
  45. // If there was no lexical error, returns a pointer to the end of the
  46. // token (so one can continue to parse the next token); else returns
  47. // NULL.
  48. static char *parse_next_char(char *s, unichar &ch)
  49. {
  50. while (*s == ' ' || *s == '\t')
  51. s++;
  52. if (!*s)
  53. return NULL;
  54. if (*s == '\'') {
  55. s++;
  56. char *end = strchr(s + 1, '\'');
  57. if (!end)
  58. return NULL;
  59. unistring us;
  60. us.init_from_utf8(s, end - s);
  61. if (us.size() != 1)
  62. return false;
  63. ch = us[0];
  64. return end + 1;
  65. } else {
  66. char *end;
  67. errno = 0;
  68. int val = strtol(s, &end, 16);
  69. if (*end == '.') {
  70. *end = ' ';
  71. val = strtol(s, &end, 10);
  72. }
  73. if (errno || (*end != '\0' && *end != ' ' && *end != '\t'))
  74. return NULL;
  75. ch = (unichar)val;
  76. return end;
  77. }
  78. }
  79. // load(filename) - loads--that is, parse--a file. It reads the file line by
  80. // line and for each line calls parse_next_char() to parse the two
  81. // <character> tokens. It then adds the mapping to the map table.
  82. bool TranslationTable::load(const char *filename)
  83. {
  84. #define MAX_LINE_LEN 1024
  85. charmap.clear();
  86. FILE *fp = fopen(filename, "r");
  87. if (!fp) {
  88. set_last_error(errno);
  89. return false;
  90. }
  91. DBG(1, ("Reading translation table %s\n", filename));
  92. char line[MAX_LINE_LEN];
  93. while (fgets(line, MAX_LINE_LEN, fp)) {
  94. int len = strlen(line);
  95. if (len && line[len-1] == '\n')
  96. line[len-1] = 0;
  97. if (strchr(line, '#')) // remove comment
  98. *(strchr(line, '#')) = '\0';
  99. unichar ch1, ch2;
  100. char *s = line;
  101. if ((s = parse_next_char(s, ch1)))
  102. if ((s = parse_next_char(s, ch2)))
  103. charmap[ch1] = ch2;
  104. }
  105. fclose(fp);
  106. return true;
  107. #undef MAX_LINE_LEN
  108. }
  109. // translate_char() - matches a character with another, in-place. returns
  110. // false if no match exists.
  111. bool TranslationTable::translate_char(unichar &ch) const
  112. {
  113. std::map<unichar, unichar>::const_iterator
  114. it = charmap.find(ch);
  115. if (it != charmap.end()) {
  116. ch = it->second;
  117. return true;
  118. } else
  119. return false;
  120. }