pspec2po 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Copyright (C) 2018, Suleyman POYRAZ (Zaryob)
  5. #
  6. # This program is free software; you can redistribute it and/or modify it
  7. # under the terms of the GNU General Public License as published by the
  8. # Free Software Foundation; either version 2 of the License, or (at your
  9. # option) any later version. Please read the COPYING file.
  10. import os
  11. import re
  12. import sys
  13. import xml.dom.minidom as dom
  14. po_header_tmpl = """# SOME DESCRIPTIVE TITLE.
  15. # Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
  16. # This file is distributed under the same license as the PACKAGE package.
  17. # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
  18. #
  19. msgid ""
  20. msgstr ""
  21. "Project-Id-Version: PACKAGE VERSION\\n"
  22. "Report-Msgid-Bugs-To: \\n"
  23. "POT-Creation-Date: 2018-01-01 12:58+0200\\n"
  24. "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
  25. "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
  26. "Language-Team: LANGUAGE <LL@li.org>\\n"
  27. "MIME-Version: 1.0\\n"
  28. "Content-Type: text/plain; charset=UTF-8\\n"
  29. "Content-Transfer-Encoding: 8bit\\n"
  30. """
  31. class Message:
  32. def __init__(self):
  33. self.reference = None
  34. self.flags = []
  35. self.msgid = None
  36. self.msgstr = None
  37. class Po:
  38. def __init__(self, messages = None, header = None):
  39. self.messages = messages or []
  40. self.header = header or po_header_tmpl
  41. @staticmethod
  42. def _escape(str):
  43. if not str:
  44. return '""'
  45. str = re.sub('"', '\\"', str)
  46. parts = str.split("\n")
  47. if len(parts) == 1:
  48. return '"{}"'.format(parts[0])
  49. if str.endswith("\n"):
  50. parts = parts[:-1]
  51. ret = '""' + "".join(map(lambda x: '\n"{}\\n"'.format(x), parts))
  52. return ret
  53. def save(self, filename):
  54. f = open(filename, "w")
  55. f.write(self.header)
  56. for msg in self.messages:
  57. po_entry = "\n#: {}\n".format(msg.reference)
  58. if msg.flags:
  59. po_entry += "#, " + ", ".join(msg.flags) + "\n"
  60. po_entry += "msgid {}\n".format(self._escape(msg.msgid))
  61. po_entry += "msgstr {}\n".format(self._escape(msg.msgstr))
  62. f.write(po_entry)
  63. f.close()
  64. @staticmethod
  65. def _unescape(str):
  66. str = re.sub('\\\\"', '"', str)
  67. str = re.sub('\\\\n', '\n', str)
  68. return str
  69. def load(self, filename):
  70. sHeader, sSkip, sComment, sId, sMsg = range(5)
  71. self.messages = []
  72. self.header = ""
  73. msg = None
  74. state = sHeader
  75. # Silly state machines are easier to code than dealing with regexps
  76. for line in open(filename):
  77. line = line.rstrip("\n")
  78. if state == sHeader:
  79. if len(line.split()) == 0:
  80. state = sSkip
  81. else:
  82. self.header += line + "\n"
  83. continue
  84. if state == sSkip:
  85. if len(line.split()) != 0:
  86. msg = Message()
  87. state = sComment
  88. else:
  89. continue
  90. if state == sComment:
  91. if line.startswith("#: "):
  92. msg.reference = line[3:]
  93. continue
  94. elif line.startswith("#, "):
  95. msg.flags = line[3:].split(',')
  96. continue
  97. elif line.startswith("msgid "):
  98. state = sId
  99. else:
  100. continue
  101. if state == sId:
  102. if line.startswith("msgstr "):
  103. state = sMsg
  104. else:
  105. if line == 'msgid ""':
  106. continue
  107. if not msg.msgid:
  108. msg.msgid = ""
  109. msg.msgid += line[line.find('"')+1:line.rfind('"')]
  110. if state == sMsg:
  111. if len(line.split()) == 0:
  112. msg.msgid = self._unescape(msg.msgid)
  113. msg.msgstr = self._unescape(msg.msgstr)
  114. self.messages.append(msg)
  115. state = sSkip
  116. else:
  117. if not msg.msgstr:
  118. msg.msgstr = ""
  119. if line == 'msgstr ""':
  120. continue
  121. msg.msgstr += line[line.find('"')+1:line.rfind('"')]
  122. if msg:
  123. msg.msgid = self._unescape(msg.msgid)
  124. msg.msgstr = self._unescape(msg.msgstr)
  125. self.messages.append(msg)
  126. def find_packages(path):
  127. paks = []
  128. for root, dirs, files in os.walk(path):
  129. if "pspec.xml" in files:
  130. paks.append(root)
  131. # dont walk into the versioned stuff
  132. if ".svn" in dirs:
  133. dirs.remove(".svn")
  134. return paks
  135. def getDataByTagName(elem, tag):
  136. try:
  137. return elem.getElementsByTagName(tag)[0].firstChild.data
  138. except:
  139. pass
  140. def extract_from_translationsxmls(path, language, old_messages=None):
  141. if old_messages is None:
  142. old_messages = []
  143. if not path.endswith('/'):
  144. path += '/'
  145. messages = []
  146. paks = find_packages(path)
  147. # For duplicate msgid detection
  148. old_msg_ids = [omsg.msgid for omsg in old_messages]
  149. print("Number of old translations: {}".format(len(old_msg_ids)))
  150. def strp(msg):
  151. if msg:
  152. return msg.strip()
  153. return ""
  154. def set_fuzzy_flag(msg):
  155. for old_msg in old_messages:
  156. if old_msg.reference == msg.reference:
  157. if len(old_msg.msgstr) and old_msg.msgstr == msg.msgstr:
  158. if ('fuzzy' in old_msg.flags) or (strp(old_msg.msgid) != strp(msg.msgid)):
  159. msg.flags.append("fuzzy")
  160. if (old_msg.msgid == msg.msgid) and (strp(old_msg.msgstr) != strp(msg.msgstr)):
  161. msg.msgstr = old_msg.msgstr
  162. return msg
  163. def get_translation(pak, section, tag, name):
  164. if not os.path.exists(pak + "/translations.xml"):
  165. return ""
  166. try:
  167. translations = dom.parse(pak + "/translations.xml")
  168. for t_node in translations.getElementsByTagName(section):
  169. if getDataByTagName(t_node, "Name") == name:
  170. for t_item in t_node.getElementsByTagName(tag):
  171. t_lang = t_item.getAttribute("xml:lang")
  172. if t_lang == language:
  173. return t_item.firstChild.data
  174. return ""
  175. except:
  176. print("* Failed parsing {}/translations.xml".format(pak))
  177. # function starts here.
  178. paklen = 0
  179. msgidlen = 0
  180. for pak in paks:
  181. paklen += 1
  182. spec = None
  183. try:
  184. spec = dom.parse(pak + "/pspec.xml")
  185. except:
  186. print("* Failed parsing {}/pspec.xml".format(pak))
  187. continue
  188. for section in ["Package", "Source"]:
  189. for node in spec.getElementsByTagName(section):
  190. for tag in ["Summary", "Description"]:
  191. msg = Message()
  192. for item in node.getElementsByTagName(tag):
  193. lang = item.getAttribute("xml:lang")
  194. if not lang or lang == "en":
  195. msgidlen += 1
  196. msg.msgid = item.firstChild.data
  197. msg.msgstr = get_translation(pak, section, tag, getDataByTagName(node, "Name"))
  198. if section == "Package":
  199. msg.reference = pak[len(path):] + ":" + getDataByTagName(node, "Name") + ":" + tag.lower()
  200. else:
  201. msg.reference = pak[len(path):] + "::" + tag.lower()
  202. if msg.msgid and (msg.msgid not in old_msg_ids):
  203. msg = set_fuzzy_flag(msg)
  204. messages.append(msg)
  205. old_msg_ids.append(msg.msgid)
  206. print("packages: {0}, msgids: {1}".format(paklen, msgidlen))
  207. return messages
  208. def update_translationsxmls(path, language, po):
  209. finished_packages = []
  210. def get_sourcepackage(type, name):
  211. source_node = Element(type)
  212. name_node = Element("Name")
  213. name_txt = Text(name)
  214. name_node.appendChild(name_txt)
  215. source_node.appendChild(name_node)
  216. return source_node
  217. def get_sumdesc(type, content):
  218. node = Element(type)
  219. node.setAttribute("xml:lang", language)
  220. cnt = Text(content)
  221. node.appendChild(cnt)
  222. return node
  223. def has_item(doc, item, package_name):
  224. if len(doc.getElementsByTagName(item)) == 0:
  225. return False
  226. for i in doc.getElementsByTagName(item):
  227. if getDataByTagName(i, "Name") == package_name:
  228. return True
  229. return False
  230. def has_lang(item, tag, language):
  231. for s in item.getElementsByTagName(tag):
  232. if s.getAttribute("xml:lang") == language:
  233. return True
  234. return False
  235. Element = lambda x: dom.Document().createElement(x)
  236. Text = lambda x: dom.Document().createTextNode(x)
  237. for msg in po.messages:
  238. if not msg.msgstr:
  239. continue
  240. if "fuzzy" in msg.flags:
  241. continue
  242. current_name, type_flag, tag = msg.reference.split(':')
  243. tag = tag.title()
  244. if not os.path.exists(os.path.join(path, current_name)):
  245. #package may be removed to another place or deleted from repository..
  246. continue
  247. if current_name in finished_packages:
  248. continue
  249. else:
  250. finished_packages.append(current_name)
  251. translations = os.path.join(path, current_name, "translations.xml")
  252. if os.path.exists(translations):
  253. doc = dom.parse(translations)
  254. pisi = doc.getElementsByTagName("INARY")[0]
  255. else:
  256. doc = dom.Document()
  257. pisi = Element("INARY")
  258. for msg in po.messages:
  259. if not msg.msgstr:
  260. continue
  261. if "fuzzy" in msg.flags:
  262. continue
  263. name, type_flag, tag = msg.reference.split(':')
  264. tag = tag.title()
  265. if name != current_name:
  266. continue
  267. if not type_flag:
  268. item = "Source"
  269. package_name = os.path.basename(name)
  270. else:
  271. item = "Package"
  272. package_name = type_flag
  273. if os.path.exists(translations):
  274. # if not, we need to create one, with fabricated content
  275. if has_item(doc, item, package_name):
  276. # if not, it means there is no entry for 'item' in translations.xml
  277. # we're gonna need to create that entry first
  278. for i in doc.getElementsByTagName(item):
  279. if getDataByTagName(i, "Name") == package_name:
  280. if has_lang(i, tag, language):
  281. # if not, it means we have item with the correct package
  282. # name, but we need to insert a new 'tag' into the item node
  283. # with the correct language attribute instead of changing
  284. # the existing one.
  285. for s in i.getElementsByTagName(tag):
  286. if s.getAttribute("xml:lang") == language:
  287. s.firstChild.data = msg.msgstr
  288. else:
  289. sumdesc = get_sumdesc(tag, msg.msgstr)
  290. i.appendChild(sumdesc)
  291. else:
  292. entry = get_sourcepackage(item, package_name)
  293. sumdesc = get_sumdesc(tag, msg.msgstr)
  294. entry.appendChild(sumdesc)
  295. pisi.appendChild(entry)
  296. else:
  297. entry = get_sourcepackage(item, package_name)
  298. sumdesc = get_sumdesc(tag, msg.msgstr)
  299. entry.appendChild(sumdesc)
  300. pisi.appendChild(entry)
  301. doc.appendChild(pisi)
  302. # we can't use .toprettyxml() here. Because it also adds newlines and tabs
  303. # before and after of sum/desc data. Result looks like this:
  304. #
  305. # (...)
  306. # <Description xml:lang="mi">
  307. # Lorem ipsum dolor sit amet..
  308. # </Description>
  309. # (...)
  310. #
  311. # and we don't want that.
  312. xml = doc.toxml()
  313. xml = xml.replace("\n <", "<")
  314. xml = xml.replace("\n\n <", "<")
  315. xml = xml.replace("\n <", "<")
  316. xml = xml.replace("\n <", "<")
  317. xml = xml.replace("\n<", "<")
  318. xml = xml.replace("\n\n<", "<")
  319. xml = xml.replace("<INARY>", "\n<INARY>")
  320. xml = xml.replace("</INARY>", "\n</INARY>")
  321. xml = xml.replace("<Source>", "\n <Source>")
  322. xml = xml.replace("<Package>", "\n\n <Package>")
  323. xml = xml.replace("</Source>", "\n </Source>")
  324. xml = xml.replace("</Package>", "\n </Package>")
  325. xml = xml.replace("<Name>", "\n <Name>")
  326. xml = xml.replace("<Description ", "\n <Description ")
  327. xml = xml.replace("<Summary ", "\n <Summary ")
  328. if xml[-1] != "\n":
  329. xml += "\n"
  330. open(translations, "w").write(xml)
  331. def extract(path, language, pofile):
  332. if os.path.exists(pofile):
  333. old_po = Po()
  334. old_po.load(pofile)
  335. po = Po(header = old_po.header)
  336. po.messages = extract_from_translationsxmls(path, language, old_po.messages)
  337. po.save(pofile)
  338. else:
  339. po = Po()
  340. po.messages = extract_from_translationsxmls(path, language)
  341. po.save(pofile)
  342. def update(path, language, pofile):
  343. po = Po()
  344. po.load(pofile)
  345. update_translationsxmls(path, language, po)
  346. def usage():
  347. print("Extract translatable strings into a po file:")
  348. print(" pspec2po extract repopath language output_po_file")
  349. print("Update pspec translations from a po file:")
  350. print(" pspec2po update repopath language input_po_file")
  351. if __name__ == "__main__":
  352. if len(sys.argv) == 1 or sys.argv[1] == "help":
  353. usage()
  354. elif len(sys.argv) == 5 and sys.argv[1] == "extract":
  355. extract(sys.argv[2], sys.argv[3], sys.argv[4])
  356. elif len(sys.argv) == 5 and sys.argv[1] == "update":
  357. update(sys.argv[2], sys.argv[3], sys.argv[4])
  358. else:
  359. usage()