WordWrap.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. #! /usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # COPYRIGHT: Openmoko Inc. 2010
  4. # LICENSE: GPL Version 3 or later
  5. # DESCRIPTION: Word wrap a tex strings
  6. # AUTHORS: Sean Moss-Pultz <sean@openmoko.com>
  7. # Christopher Hall <hsw@openmoko.com>
  8. import sys
  9. import os
  10. import unicodedata
  11. import PrintLog
  12. class WordWrap():
  13. """word wrapping class"""
  14. CJK = ['CJK', 'HIRAGANA', 'KATAKANA', 'HANGUL', 'BOPOMOFO']
  15. PUNCTUATION = ['IDEOGRAPHIC', 'FULLWIDTH']
  16. def __init__(self, char_width_funtion):
  17. self.buffer = []
  18. self.char_width_funtion = char_width_funtion
  19. def append(self, text, face, url):
  20. if type(text) != unicode:
  21. text = unicode(text, 'utf8')
  22. leading_space = text[0] == ' '
  23. trailing_space = text[-1] == ' '
  24. words = text.split()
  25. space_len = self.char_width_funtion(' ', face)
  26. space = (' ', face, url, space_len, [space_len])
  27. if leading_space:
  28. try:
  29. if ' ' == self.buffer[-1][0]:
  30. leading_space = False
  31. except IndexError:
  32. pass
  33. if leading_space:
  34. self.buffer.append(space)
  35. for one_word in words:
  36. for w in self.partition(one_word):
  37. word_len = [self.char_width_funtion(c, face) for c in w]
  38. self.buffer.append((w, face, url, sum(word_len), word_len))
  39. self.buffer.append(space)
  40. if self.buffer != [] and not trailing_space and self.buffer[-1][0] == ' ':
  41. del self.buffer[-1]
  42. def partition(self, text):
  43. """private method - simulate zero width spaces for Japanese"""
  44. l = []
  45. r = ''
  46. last_n = ''
  47. for c in text:
  48. try:
  49. n = unicodedata.name(c).split()[0]
  50. except ValueError:
  51. n = 'NoName'
  52. PrintLog.message(u'No unicode name for: "{0:s}"'.format(c))
  53. if n in self.CJK:
  54. if '' != r:
  55. l.append(r)
  56. r = c
  57. last_n = n
  58. elif last_n in self.CJK:
  59. if n in self.PUNCTUATION:
  60. l.append(r + c)
  61. r = ''
  62. last_n = ''
  63. else:
  64. l.append(r)
  65. r = c
  66. last_n = n
  67. else:
  68. r += c
  69. last_n = n
  70. if '' != r:
  71. l.append(r)
  72. return l
  73. def AppendImage(self, width, height, data, url):
  74. self.buffer.append(('@', (width, height, data), url, width, [width]))
  75. def dump(self):
  76. print('B: {0:s}'.format(self.buffer))
  77. def out(self):
  78. try:
  79. if ' ' == self.buffer[-1][0]:
  80. del self.buffer[-1]
  81. except IndexError:
  82. pass
  83. sys.stdout.write('"')
  84. for b in self.buffer:
  85. sys.stdout.write(b[0].encode('utf-8'))
  86. sys.stdout.write('"\n')
  87. def split(self, item, width):
  88. # do not attempt to split a single character
  89. # this should not occur, but handle it anyway
  90. if len(item[0]) == 1:
  91. return (item, None)
  92. text = item[0]
  93. lengths = item[4]
  94. i = 0
  95. sum = 0
  96. for w in lengths:
  97. if sum + w > width:
  98. break
  99. sum += w
  100. i += 1
  101. return ((text[:i], item[1], item[2], sum, lengths[:i]),
  102. (text[i:], item[1], item[2], item[3] - sum, lengths[i:]))
  103. def wrap(self, width):
  104. if [] == self.buffer:
  105. return []
  106. while self.buffer[0][0] == ' ':
  107. del self.buffer[0]
  108. if self.buffer == []:
  109. return []
  110. if self.buffer[0][3] >= width and len(self.buffer[0][0]) > 1:
  111. (r, self.buffer[0]) = self.split(self.buffer[0], width - 1)
  112. return [r]
  113. result = [self.buffer[0]]
  114. w = result[0][3]
  115. i = 1
  116. for word in self.buffer[1:]:
  117. w += word[3]
  118. if w >= width:
  119. break
  120. result.append(word)
  121. i += 1
  122. if i > 0:
  123. del self.buffer[:i]
  124. if result[-1][0] == ' ':
  125. del result[-1]
  126. return result
  127. def have(self):
  128. return self.buffer != []
  129. # some testing
  130. def main():
  131. def make_link(url, x0, x1):
  132. print('\033[1;33mLink: "{0:s}" {1:d} -> {2:d} => {3:d}\033[0m'.format(url, x0, x1, (x1 - x0)))
  133. def cwidth(c, face):
  134. if unicode != type(c):
  135. c = unicode(c, 'utf-8')
  136. if unicodedata.name(c).split()[0] in ['CJK', 'HIRAGANA', 'KATAKANA', 'IDEOGRAPHIC', 'FULLWIDTH']:
  137. return 2
  138. return 1
  139. b = WordWrap(cwidth)
  140. default = '\033[0m'
  141. grey = '\033[1;30m'
  142. pink = '\033[1;31m'
  143. green = '\033[1;32m'
  144. yellow = '\033[1;33m'
  145. blue = '\033[1;34m'
  146. purple = '\033[1;35m'
  147. cyan = '\033[1;36m'
  148. white = '\033[1;37m'
  149. red = '\033[1;38m'
  150. colours = {
  151. 'n': default,
  152. 'b': green,
  153. 'i': blue,
  154. 'bi': purple,
  155. None: red,
  156. }
  157. b.append(' hello world I am some text', 'n', None)
  158. b.append(' this is another bit ', 'n', None)
  159. b.append('that is also add', 'n', 'A Link')
  160. b.append('ed ', 'i', None)
  161. b.append('in ', 'n', None)
  162. b.append(' 1234567890abcdefghijklmnopqrstuvwxyz ', 'bi', None)
  163. b.append(' and another line here is: 1234567890abcdefghijklmnopqrstuvwxyz & more', 'n', None)
  164. b.append(' some bold text ', 'b', None)
  165. b.append(' and ', 'n', None)
  166. b.append(' micro', 'n', 'micro')
  167. b.append('SDHC', 'n', 'SD card')
  168. b.append(' a VeryVeryVery', 'n', None)
  169. b.append('LongLinkWithoutSpacesThatIsCutIntoSeveralLines', 'n', 'nothing')
  170. b.append('VeryVeryVeryNothingNothingNothing', 'n', None)
  171. b.append('The expression list is evaluated once; it should yield an iterable ', 'n', None)
  172. b.append('object. An iterator is created for the result of the ', 'n', None)
  173. b.append('expression_list. The suite is then executed once for each item ', 'n', None)
  174. b.append('provided by the iterator, in the order of ascending indices. Each ', 'n', 'sdfdsf')
  175. b.append('item in turn is assigned to the target list using the standard rules ', 'n', None)
  176. b.append('for assignments, and then the suite is executed. When the items are ', 'n', None)
  177. b.append('exhausted (which is immediately when the sequence is empty), the ', 'n', None)
  178. b.append('suite in the else clause, if present, is executed, and the loop ', 'n', None)
  179. b.append('and yes it is. this ', 'n', None)
  180. b.AppendImage(1, 7, '@@@@', None)
  181. b.AppendImage(101, 7, '@@@@', None)
  182. b.AppendImage(102, 7, '@@@@', None)
  183. b.append(' is an image', 'n', None)
  184. b.append('------------------------------', 'n', None)
  185. b.append('振り子(ふりこ)は一点で支えられた棒、ひもなどの先に重りを付けたもの。', 'n', None)
  186. b.append('地上など', 'n', None)
  187. b.append('重力', 'n', None)
  188. b.append('のあるところで一回力を加えると揺れを繰り返す。 支点での摩擦や空気抵抗の無い理想の環境では永久に揺れ続ける。', 'n', None)
  189. b.append('------------------------------', 'n', None)
  190. b.append('振り子についての最初の研究記録は10世紀頃のアラビア人の天文学者イブン・ユーヌスによる。さらに 17世紀、ガリレオにはじまる物理学者らよる観測の結果、等時性が発見され時計に使用されるようになった。', 'n', None)
  191. b.append('------------------------------', 'n', None)
  192. b.append('振り子についてのab cd最初の研究記録は10世紀頃のアラビア人の、天文学者イブン・ユーヌスによる。さらに 17世紀、ガリレオにはじまる物理学者らよる観測の結果、等時性が発見され時計に使用されるようになった。', 'n', None)
  193. b.dump()
  194. b.out()
  195. ruler = 0
  196. while b.have():
  197. url = None
  198. x = 0
  199. url_x0 = 0
  200. l = b.wrap(30)
  201. t = default
  202. for i in l:
  203. if url != i[2]:
  204. if url != None:
  205. make_link(url, url_x0, x)
  206. t += default
  207. url = i[2]
  208. if url != None:
  209. url_x0 = x
  210. t += red
  211. if url == None:
  212. if tuple == type(i[1]):
  213. (width, height, data) = i[1]
  214. t += "[{0:d}.{1:d}:{2:s}]".format(width, height, data)
  215. else:
  216. t += colours[i[1]]
  217. t += i[0]
  218. x += i[3]
  219. if url != None:
  220. make_link(url, url_x0, x)
  221. t += default
  222. if 0 == ruler:
  223. print(' " 1 2 3"')
  224. print(' "123456789012345678901234567890"')
  225. print((u'Wrap: "{0:s}"' + default).format(t))
  226. ruler += 1
  227. if ruler > 10:
  228. ruler = 0
  229. # for c in u'imageや空気抵振り子(ふりこ)は一点でアラた。testing(x,y)の、・':
  230. # print u'{0:s} : {1:s} '.format(c, unicodedata.name(c).split())
  231. # run the program
  232. if __name__ == "__main__":
  233. main()