jadedctrl
/
wrdk
mirror da https://github.com/nzmichaelh/wrdk


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
							#! /usr/bin/env python
# -*- coding: utf-8 -*-
# COPYRIGHT: Openmoko Inc. 2010
# LICENSE: GPL Version 3 or later
# DESCRIPTION: Word wrap a tex strings
# AUTHORS: Sean Moss-Pultz <sean@openmoko.com>
#          Christopher Hall <hsw@openmoko.com>

import sys
import os
import unicodedata
import PrintLog


class WordWrap():
    """word wrapping class"""

    CJK = ['CJK', 'HIRAGANA', 'KATAKANA', 'HANGUL', 'BOPOMOFO']

    PUNCTUATION = ['IDEOGRAPHIC', 'FULLWIDTH']

    def __init__(self, char_width_funtion):
        self.buffer = []
        self.char_width_funtion = char_width_funtion


    def append(self, text, face, url):
        if type(text) != unicode:
            text = unicode(text, 'utf8')
        leading_space = text[0] == ' '
        trailing_space = text[-1] == ' '
        words = text.split()
        space_len = self.char_width_funtion(' ', face)
        space = (' ', face, url, space_len, [space_len])
        if leading_space:
            try:
                if ' ' == self.buffer[-1][0]:
                    leading_space = False
            except IndexError:
                pass

        if leading_space:
            self.buffer.append(space)

        for one_word in words:
            for w in self.partition(one_word):
                word_len = [self.char_width_funtion(c, face) for c in w]
                self.buffer.append((w, face, url, sum(word_len), word_len))
            self.buffer.append(space)

        if self.buffer != [] and not trailing_space and self.buffer[-1][0] == ' ':
            del self.buffer[-1]


    def partition(self, text):
        """private method - simulate zero width spaces for Japanese"""
        l = []
        r = ''
        last_n = ''
        for c in text:
            try:
                n = unicodedata.name(c).split()[0]
            except ValueError:
                n = 'NoName'
                PrintLog.message(u'No unicode name for: "{0:s}"'.format(c))

            if n in self.CJK:
                if '' != r:
                    l.append(r)
                r = c
                last_n = n

            elif last_n in self.CJK:
                if n in self.PUNCTUATION:
                    l.append(r + c)
                    r = ''
                    last_n = ''
                else:
                    l.append(r)
                    r = c
                    last_n = n
            else:
                r += c
                last_n = n

        if '' != r:
            l.append(r)
        return l


    def AppendImage(self, width, height, data, url):
        self.buffer.append(('@', (width, height, data), url, width, [width]))


    def dump(self):
        print('B: {0:s}'.format(self.buffer))


    def out(self):
        try:
            if ' ' == self.buffer[-1][0]:
                del self.buffer[-1]
        except IndexError:
            pass
        sys.stdout.write('"')
        for b in self.buffer:
            sys.stdout.write(b[0].encode('utf-8'))
        sys.stdout.write('"\n')


    def split(self, item, width):
        # do not attempt to split a single character
        # this should not occur, but handle it anyway
        if len(item[0]) == 1:
            return (item, None)

        text = item[0]
        lengths = item[4]
        i = 0
        sum = 0
        for w in lengths:
           if sum + w > width:
               break
           sum += w
           i += 1
        return ((text[:i], item[1], item[2], sum, lengths[:i]),
                (text[i:], item[1], item[2], item[3] - sum, lengths[i:]))


    def wrap(self, width):
        if [] == self.buffer:
            return []

        while self.buffer[0][0] == ' ':
            del self.buffer[0]
            if self.buffer == []:
                return []

        if self.buffer[0][3] >= width and len(self.buffer[0][0]) > 1:
            (r, self.buffer[0]) = self.split(self.buffer[0], width - 1)
            return [r]

        result = [self.buffer[0]]
        w = result[0][3]
        i = 1
        for word in self.buffer[1:]:
            w += word[3]
            if w >= width:
                break
            result.append(word)
            i += 1

        if i > 0:
            del self.buffer[:i]

        if result[-1][0] == ' ':
            del result[-1]

        return result


    def have(self):
        return self.buffer != []


# some testing
def main():

    def make_link(url, x0, x1):
        print('\033[1;33mLink: "{0:s}" {1:d} -> {2:d} => {3:d}\033[0m'.format(url, x0, x1, (x1 - x0)))

    def cwidth(c, face):
        if unicode != type(c):
            c = unicode(c, 'utf-8')
        if unicodedata.name(c).split()[0] in ['CJK', 'HIRAGANA', 'KATAKANA', 'IDEOGRAPHIC', 'FULLWIDTH']:
            return 2
        return 1

    b = WordWrap(cwidth)
    default = '\033[0m'
    grey = '\033[1;30m'
    pink = '\033[1;31m'
    green = '\033[1;32m'
    yellow = '\033[1;33m'
    blue = '\033[1;34m'
    purple = '\033[1;35m'
    cyan = '\033[1;36m'
    white = '\033[1;37m'
    red = '\033[1;38m'

    colours = {
        'n': default,
        'b': green,
        'i': blue,
        'bi': purple,
        None: red,
        }
    b.append('     hello world I am some text', 'n', None)
    b.append('   this is another bit ', 'n', None)
    b.append('that is also add', 'n', 'A Link')
    b.append('ed ', 'i', None)
    b.append('in ', 'n', None)
    b.append('     1234567890abcdefghijklmnopqrstuvwxyz ', 'bi', None)
    b.append('    and another line here is: 1234567890abcdefghijklmnopqrstuvwxyz & more', 'n', None)
    b.append(' some bold text ', 'b', None)
    b.append(' and ', 'n', None)
    b.append(' micro', 'n', 'micro')
    b.append('SDHC', 'n', 'SD card')
    b.append(' a VeryVeryVery', 'n', None)
    b.append('LongLinkWithoutSpacesThatIsCutIntoSeveralLines', 'n', 'nothing')
    b.append('VeryVeryVeryNothingNothingNothing', 'n', None)

    b.append('The expression list is evaluated once; it should yield an iterable ', 'n', None)
    b.append('object. An iterator is created for the result of the ', 'n', None)
    b.append('expression_list. The suite is then executed once for each item ', 'n', None)
    b.append('provided by the iterator, in the order of ascending indices. Each ', 'n', 'sdfdsf')
    b.append('item in turn is assigned to the target list using the standard rules ', 'n', None)
    b.append('for assignments, and then the suite is executed. When the items are ', 'n', None)
    b.append('exhausted (which is immediately when the sequence is empty), the ', 'n', None)
    b.append('suite in the else clause, if present, is executed, and the loop ', 'n', None)
    b.append('and yes it is. this ', 'n', None)
    b.AppendImage(1, 7, '@@@@', None)
    b.AppendImage(101, 7, '@@@@', None)
    b.AppendImage(102, 7, '@@@@', None)
    b.append(' is an image', 'n', None)

    b.append('------------------------------', 'n', None)
    b.append('振り子（ふりこ）は一点で支えられた棒、ひもなどの先に重りを付けたもの。', 'n', None)
    b.append('地上など', 'n', None)
    b.append('重力', 'n', None)
    b.append('のあるところで一回力を加えると揺れを繰り返す。 支点での摩擦や空気抵抗の無い理想の環境では永久に揺れ続ける。', 'n', None)

    b.append('------------------------------', 'n', None)
    b.append('振り子についての最初の研究記録は10世紀頃のアラビア人の天文学者イブン・ユーヌスによる。さらに 17世紀、ガリレオにはじまる物理学者らよる観測の結果、等時性が発見され時計に使用されるようになった。', 'n', None)

    b.append('------------------------------', 'n', None)
    b.append('振り子についてのab cd最初の研究記録は10世紀頃のアラビア人の、天文学者イブン・ユーヌスによる。さらに 17世紀、ガリレオにはじまる物理学者らよる観測の結果、等時性が発見され時計に使用されるようになった。', 'n', None)

    b.dump()
    b.out()

    ruler = 0
    while b.have():
        url = None
        x = 0
        url_x0 = 0
        l = b.wrap(30)
        t = default
        for i in l:
            if url != i[2]:
                if url != None:
                    make_link(url, url_x0, x)
                    t += default
                url = i[2]
                if url != None:
                    url_x0 = x
                    t += red
            if url == None:
                if tuple == type(i[1]):
                    (width, height, data) = i[1]
                    t += "[{0:d}.{1:d}:{2:s}]".format(width, height, data)
                else:
                    t += colours[i[1]]
                    t += i[0]
            x += i[3]
        if url != None:
            make_link(url, url_x0, x)
            t += default

        if 0 == ruler:
            print('      "         1         2         3"')
            print('      "123456789012345678901234567890"')
        print((u'Wrap: "{0:s}"' + default).format(t))
        ruler += 1
        if ruler > 10:
            ruler = 0


#    for c in u'imageや空気抵振り子（ふりこ）は一点でアラた。testing(x,y)の、・':
#        print u'{0:s} : {1:s} '.format(c, unicodedata.name(c).split())


# run the program
if __name__ == "__main__":
    main()