translation_loader_po.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. /**************************************************************************/
  2. /* translation_loader_po.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "translation_loader_po.h"
  31. #include "core/io/file_access.h"
  32. #include "core/string/translation.h"
  33. #include "core/string/translation_po.h"
  34. Ref<Resource> TranslationLoaderPO::load_translation(Ref<FileAccess> f, Error *r_error) {
  35. if (r_error) {
  36. *r_error = ERR_FILE_CORRUPT;
  37. }
  38. const String path = f->get_path();
  39. Ref<TranslationPO> translation = Ref<TranslationPO>(memnew(TranslationPO));
  40. String config;
  41. uint32_t magic = f->get_32();
  42. if (magic == 0x950412de) {
  43. // Load binary MO file.
  44. uint16_t version_maj = f->get_16();
  45. uint16_t version_min = f->get_16();
  46. ERR_FAIL_COND_V_MSG(version_maj > 1, Ref<Resource>(), vformat("Unsupported MO file %s, version %d.%d.", path, version_maj, version_min));
  47. uint32_t num_strings = f->get_32();
  48. uint32_t id_table_offset = f->get_32();
  49. uint32_t trans_table_offset = f->get_32();
  50. // Read string tables.
  51. for (uint32_t i = 0; i < num_strings; i++) {
  52. String msg_id;
  53. String msg_id_plural;
  54. String msg_context;
  55. // Read id strings and context.
  56. {
  57. Vector<uint8_t> data;
  58. f->seek(id_table_offset + i * 8);
  59. uint32_t str_start = 0;
  60. uint32_t str_len = f->get_32();
  61. uint32_t str_offset = f->get_32();
  62. data.resize(str_len + 1);
  63. f->seek(str_offset);
  64. f->get_buffer(data.ptrw(), str_len);
  65. data.write[str_len] = 0;
  66. bool is_plural = false;
  67. for (uint32_t j = 0; j < str_len + 1; j++) {
  68. if (data[j] == 0x04) {
  69. msg_context.parse_utf8((const char *)data.ptr(), j);
  70. str_start = j + 1;
  71. }
  72. if (data[j] == 0x00) {
  73. if (is_plural) {
  74. msg_id_plural.parse_utf8((const char *)(data.ptr() + str_start), j - str_start);
  75. } else {
  76. msg_id.parse_utf8((const char *)(data.ptr() + str_start), j - str_start);
  77. is_plural = true;
  78. }
  79. str_start = j + 1;
  80. }
  81. }
  82. }
  83. // Read translated strings.
  84. {
  85. Vector<uint8_t> data;
  86. f->seek(trans_table_offset + i * 8);
  87. uint32_t str_len = f->get_32();
  88. uint32_t str_offset = f->get_32();
  89. data.resize(str_len + 1);
  90. f->seek(str_offset);
  91. f->get_buffer(data.ptrw(), str_len);
  92. data.write[str_len] = 0;
  93. if (msg_id.is_empty()) {
  94. config = String::utf8((const char *)data.ptr(), str_len);
  95. // Record plural rule.
  96. int p_start = config.find("Plural-Forms");
  97. if (p_start != -1) {
  98. int p_end = config.find("\n", p_start);
  99. translation->set_plural_rule(config.substr(p_start, p_end - p_start));
  100. }
  101. } else {
  102. uint32_t str_start = 0;
  103. Vector<String> plural_msg;
  104. for (uint32_t j = 0; j < str_len + 1; j++) {
  105. if (data[j] == 0x00) {
  106. if (msg_id_plural.is_empty()) {
  107. translation->add_message(msg_id, String::utf8((const char *)(data.ptr() + str_start), j - str_start), msg_context);
  108. } else {
  109. plural_msg.push_back(String::utf8((const char *)(data.ptr() + str_start), j - str_start));
  110. }
  111. str_start = j + 1;
  112. }
  113. }
  114. if (!plural_msg.is_empty()) {
  115. translation->add_plural_message(msg_id, plural_msg, msg_context);
  116. }
  117. }
  118. }
  119. }
  120. } else {
  121. // Try to load as text PO file.
  122. f->seek(0);
  123. enum Status {
  124. STATUS_NONE,
  125. STATUS_READING_ID,
  126. STATUS_READING_STRING,
  127. STATUS_READING_CONTEXT,
  128. STATUS_READING_PLURAL,
  129. };
  130. Status status = STATUS_NONE;
  131. String msg_id;
  132. String msg_str;
  133. String msg_context;
  134. Vector<String> msgs_plural;
  135. if (r_error) {
  136. *r_error = ERR_FILE_CORRUPT;
  137. }
  138. int line = 1;
  139. int plural_forms = 0;
  140. int plural_index = -1;
  141. bool entered_context = false;
  142. bool skip_this = false;
  143. bool skip_next = false;
  144. bool is_eof = false;
  145. while (!is_eof) {
  146. String l = f->get_line().strip_edges();
  147. is_eof = f->eof_reached();
  148. // If we reached last line and it's not a content line, break, otherwise let processing that last loop
  149. if (is_eof && l.is_empty()) {
  150. if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || (status == STATUS_READING_PLURAL && plural_index != plural_forms - 1)) {
  151. ERR_FAIL_V_MSG(Ref<Resource>(), "Unexpected EOF while reading PO file at: " + path + ":" + itos(line));
  152. } else {
  153. break;
  154. }
  155. }
  156. if (l.begins_with("msgctxt")) {
  157. ERR_FAIL_COND_V_MSG(status != STATUS_READING_STRING && status != STATUS_READING_PLURAL, Ref<Resource>(), "Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line));
  158. // In PO file, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
  159. // and set "entered_context" to true to prevent adding twice.
  160. if (!skip_this && !msg_id.is_empty()) {
  161. if (status == STATUS_READING_STRING) {
  162. translation->add_message(msg_id, msg_str, msg_context);
  163. } else if (status == STATUS_READING_PLURAL) {
  164. ERR_FAIL_COND_V_MSG(plural_index != plural_forms - 1, Ref<Resource>(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
  165. translation->add_plural_message(msg_id, msgs_plural, msg_context);
  166. }
  167. }
  168. msg_context = "";
  169. l = l.substr(7, l.length()).strip_edges();
  170. status = STATUS_READING_CONTEXT;
  171. entered_context = true;
  172. }
  173. if (l.begins_with("msgid_plural")) {
  174. if (plural_forms == 0) {
  175. ERR_FAIL_V_MSG(Ref<Resource>(), "PO file uses 'msgid_plural' but 'Plural-Forms' is invalid or missing in header: " + path + ":" + itos(line));
  176. } else if (status != STATUS_READING_ID) {
  177. ERR_FAIL_V_MSG(Ref<Resource>(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line));
  178. }
  179. // We don't record the message in "msgid_plural" itself as tr_n(), TTRN(), RTRN() interfaces provide the plural string already.
  180. // We just have to reset variables related to plurals for "msgstr[]" later on.
  181. l = l.substr(12, l.length()).strip_edges();
  182. plural_index = -1;
  183. msgs_plural.clear();
  184. msgs_plural.resize(plural_forms);
  185. status = STATUS_READING_PLURAL;
  186. } else if (l.begins_with("msgid")) {
  187. ERR_FAIL_COND_V_MSG(status == STATUS_READING_ID, Ref<Resource>(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line));
  188. if (!msg_id.is_empty()) {
  189. if (!skip_this && !entered_context) {
  190. if (status == STATUS_READING_STRING) {
  191. translation->add_message(msg_id, msg_str, msg_context);
  192. } else if (status == STATUS_READING_PLURAL) {
  193. ERR_FAIL_COND_V_MSG(plural_index != plural_forms - 1, Ref<Resource>(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
  194. translation->add_plural_message(msg_id, msgs_plural, msg_context);
  195. }
  196. }
  197. } else if (config.is_empty()) {
  198. config = msg_str;
  199. // Record plural rule.
  200. int p_start = config.find("Plural-Forms");
  201. if (p_start != -1) {
  202. int p_end = config.find("\n", p_start);
  203. translation->set_plural_rule(config.substr(p_start, p_end - p_start));
  204. plural_forms = translation->get_plural_forms();
  205. }
  206. }
  207. l = l.substr(5, l.length()).strip_edges();
  208. status = STATUS_READING_ID;
  209. // If we did not encounter msgctxt, we reset context to empty to reset it.
  210. if (!entered_context) {
  211. msg_context = "";
  212. }
  213. msg_id = "";
  214. msg_str = "";
  215. skip_this = skip_next;
  216. skip_next = false;
  217. entered_context = false;
  218. }
  219. if (l.begins_with("msgstr[")) {
  220. ERR_FAIL_COND_V_MSG(status != STATUS_READING_PLURAL, Ref<Resource>(), "Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line));
  221. plural_index++; // Increment to add to the next slot in vector msgs_plural.
  222. l = l.substr(9, l.length()).strip_edges();
  223. } else if (l.begins_with("msgstr")) {
  224. ERR_FAIL_COND_V_MSG(status != STATUS_READING_ID, Ref<Resource>(), "Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line));
  225. l = l.substr(6, l.length()).strip_edges();
  226. status = STATUS_READING_STRING;
  227. }
  228. if (l.is_empty() || l.begins_with("#")) {
  229. if (l.contains("fuzzy")) {
  230. skip_next = true;
  231. }
  232. line++;
  233. continue; // Nothing to read or comment.
  234. }
  235. ERR_FAIL_COND_V_MSG(!l.begins_with("\"") || status == STATUS_NONE, Ref<Resource>(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line));
  236. l = l.substr(1, l.length());
  237. // Find final quote, ignoring escaped ones (\").
  238. // The escape_next logic is necessary to properly parse things like \\"
  239. // where the backslash is the one being escaped, not the quote.
  240. int end_pos = -1;
  241. bool escape_next = false;
  242. for (int i = 0; i < l.length(); i++) {
  243. if (l[i] == '\\' && !escape_next) {
  244. escape_next = true;
  245. continue;
  246. }
  247. if (l[i] == '"' && !escape_next) {
  248. end_pos = i;
  249. break;
  250. }
  251. escape_next = false;
  252. }
  253. ERR_FAIL_COND_V_MSG(end_pos == -1, Ref<Resource>(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line));
  254. l = l.substr(0, end_pos);
  255. l = l.c_unescape();
  256. if (status == STATUS_READING_ID) {
  257. msg_id += l;
  258. } else if (status == STATUS_READING_STRING) {
  259. msg_str += l;
  260. } else if (status == STATUS_READING_CONTEXT) {
  261. msg_context += l;
  262. } else if (status == STATUS_READING_PLURAL && plural_index >= 0) {
  263. ERR_FAIL_COND_V_MSG(plural_index >= plural_forms, Ref<Resource>(), "Unexpected plural form while parsing: " + path + ":" + itos(line));
  264. msgs_plural.write[plural_index] = msgs_plural[plural_index] + l;
  265. }
  266. line++;
  267. }
  268. // Add the last set of data from last iteration.
  269. if (status == STATUS_READING_STRING) {
  270. if (!msg_id.is_empty()) {
  271. if (!skip_this) {
  272. translation->add_message(msg_id, msg_str, msg_context);
  273. }
  274. } else if (config.is_empty()) {
  275. config = msg_str;
  276. }
  277. } else if (status == STATUS_READING_PLURAL) {
  278. if (!skip_this && !msg_id.is_empty()) {
  279. ERR_FAIL_COND_V_MSG(plural_index != plural_forms - 1, Ref<Resource>(), "Number of 'msgstr[]' doesn't match with number of plural forms: " + path + ":" + itos(line));
  280. translation->add_plural_message(msg_id, msgs_plural, msg_context);
  281. }
  282. }
  283. }
  284. ERR_FAIL_COND_V_MSG(config.is_empty(), Ref<Resource>(), "No config found in file: " + path + ".");
  285. Vector<String> configs = config.split("\n");
  286. for (int i = 0; i < configs.size(); i++) {
  287. String c = configs[i].strip_edges();
  288. int p = c.find(":");
  289. if (p == -1) {
  290. continue;
  291. }
  292. String prop = c.substr(0, p).strip_edges();
  293. String value = c.substr(p + 1, c.length()).strip_edges();
  294. if (prop == "X-Language" || prop == "Language") {
  295. translation->set_locale(value);
  296. }
  297. }
  298. if (r_error) {
  299. *r_error = OK;
  300. }
  301. return translation;
  302. }
  303. Ref<Resource> TranslationLoaderPO::load(const String &p_path, const String &p_original_path, Error *r_error, bool p_use_sub_threads, float *r_progress, CacheMode p_cache_mode) {
  304. if (r_error) {
  305. *r_error = ERR_CANT_OPEN;
  306. }
  307. Ref<FileAccess> f = FileAccess::open(p_path, FileAccess::READ);
  308. ERR_FAIL_COND_V_MSG(f.is_null(), Ref<Resource>(), "Cannot open file '" + p_path + "'.");
  309. return load_translation(f, r_error);
  310. }
  311. void TranslationLoaderPO::get_recognized_extensions(List<String> *p_extensions) const {
  312. p_extensions->push_back("po");
  313. p_extensions->push_back("mo");
  314. }
  315. bool TranslationLoaderPO::handles_type(const String &p_type) const {
  316. return (p_type == "Translation");
  317. }
  318. String TranslationLoaderPO::get_resource_type(const String &p_path) const {
  319. if (p_path.get_extension().to_lower() == "po" || p_path.get_extension().to_lower() == "mo") {
  320. return "Translation";
  321. }
  322. return "";
  323. }