transtbl.cc 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. // Copyright (C) 2003 Mooffie <mooffie@typo.co.il>
  2. //
  3. // This program is free software; you can redistribute it and/or modify
  4. // it under the terms of the GNU General Public License as published by
  5. // the Free Software Foundation; either version 2 of the License, or
  6. // (at your option) any later version.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program; if not, write to the Free Software
  15. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
  16. #include <config.h>
  17. #include <stdio.h>
  18. #include <errno.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include "transtbl.h"
  22. #include "io.h" // set_last_error
  23. #include "dbg.h"
  24. // Most of the code below deals with parsing a TranslationTable
  25. // file. Such files consist of lines of the form:
  26. //
  27. // <character-from> <character-to>
  28. //
  29. // that map character-from to character-to.
  30. //
  31. // <character-xxx> can be in one of three forms:
  32. //
  33. // 1. ' literal-character '
  34. // 2. decimal-number .
  35. // 3. hex-number
  36. //
  37. // Examples:
  38. //
  39. // 'a' 5d0 # maps 'a' to Hebrew letter Alef
  40. // 'a' 1488. # the same
  41. // 'a' 'b' # maps 'a' to 'b'
  42. //
  43. // literal-character is UTF-8 encoded.
  44. // parse_next_char() - parses the next <character> token. (this is a
  45. // misnomer, because one might think we mean C's "char".)
  46. //
  47. // If there was no lexical error, returns a pointer to the end of the
  48. // token (so one can continue to parse the next token); else returns
  49. // NULL.
  50. static char *parse_next_char(char *s, unichar &ch)
  51. {
  52. while (*s == ' ' || *s == '\t')
  53. s++;
  54. if (!*s)
  55. return NULL;
  56. if (*s == '\'') {
  57. s++;
  58. char *end = strchr(s + 1, '\'');
  59. if (!end)
  60. return NULL;
  61. unistring us;
  62. us.init_from_utf8(s, end - s);
  63. if (us.size() != 1)
  64. return NULL;
  65. ch = us[0];
  66. return end + 1;
  67. } else {
  68. char *end;
  69. errno = 0;
  70. int val = strtol(s, &end, 16);
  71. if (*end == '.') {
  72. *end = ' ';
  73. val = strtol(s, &end, 10);
  74. }
  75. if (errno || (*end != '\0' && *end != ' ' && *end != '\t'))
  76. return NULL;
  77. ch = (unichar)val;
  78. return end;
  79. }
  80. }
  81. // load(filename) - loads--that is, parse--a file. It reads the file line by
  82. // line and for each line calls parse_next_char() to parse the two
  83. // <character> tokens. It then adds the mapping to the map table.
  84. bool TranslationTable::load(const char *filename)
  85. {
  86. #define MAX_LINE_LEN 1024
  87. charmap.clear();
  88. FILE *fp = fopen(filename, "r");
  89. if (!fp) {
  90. set_last_error(errno);
  91. return false;
  92. }
  93. DBG(1, ("Reading translation table %s\n", filename));
  94. char line[MAX_LINE_LEN];
  95. while (fgets(line, MAX_LINE_LEN, fp)) {
  96. int len = strlen(line);
  97. if (len && line[len-1] == '\n')
  98. line[len-1] = 0;
  99. if (strchr(line, '#')) // remove comment
  100. *(strchr(line, '#')) = '\0';
  101. unichar ch1, ch2;
  102. char *s = line;
  103. if ((s = parse_next_char(s, ch1)))
  104. if ((s = parse_next_char(s, ch2)))
  105. charmap[ch1] = ch2;
  106. }
  107. fclose(fp);
  108. return true;
  109. #undef MAX_LINE_LEN
  110. }
  111. // translate_char() - matches a character with another, in-place. returns
  112. // false if no match exists.
  113. bool TranslationTable::translate_char(unichar &ch) const
  114. {
  115. std::map<unichar, unichar>::const_iterator
  116. it = charmap.find(ch);
  117. if (it != charmap.end()) {
  118. ch = it->second;
  119. return true;
  120. } else
  121. return false;
  122. }