jadedctrl
/
wrdk
镜像自地址 https://github.com/nzmichaelh/wrdk


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
							#! /usr/bin/env python
# -*- coding: utf-8 -*-
# COPYRIGHT: Openmoko Inc. 2010
# LICENSE: GPL Version 3 or later
# DESCRIPTION: compress the fnd file - just a test
# AUTHORS: Christopher Hall <hsw@openmoko.com>

import sys, os
import struct
import os.path
import pylzma
import getopt
import PrintLog
import pylzma
import locale


locale.setlocale(locale.LC_ALL, '')

verbose = False


def usage(message):
    if None != message:
        print('error: {0:s}'.format(message))
    print('usage: {0:s} <options> <fnd-file>'.format(os.path.basename(__file__)))
    print('       --help                  This message')
    print('       --verbose               Enable verbose output')
    exit(1)


def main():
    global verbose

    try:
        opts, args = getopt.getopt(sys.argv[1:],
                                   'hv',
                                   ['help',
                                    'verbose',
                                    ])
    except getopt.GetoptError, err:
        usage(err)

    verbose = False
    uint32_size = 4

    for opt, arg in opts:
        if opt in ('-v', '--verbose'):
            verbose = True
        elif opt in ('-h', '--help'):
            usage(None)
        else:
            usage('unhandled option: ' + opt)

    if len(args) != 1:
        usage('missing argument')

    fnd_file = open(args[0], "rb")

    total_entries = 0

    bigram_table = {}
    for i in range(128,256):
        bigram_table[i] = fnd_file.read(2)

    previous_title1 = ''
    previous_title2 = ''

    while True:
        fnd_offset = fnd_file.tell()
        header = fnd_file.read(uint32_size + 1)

        if 0 == len(header):
            break

        article_number, nul_byte = struct.unpack('<IB', header)

        title1 = get_title(fnd_file)
        title2 = get_title(fnd_file)
        total_entries += 1

        length1 = len(title1)
        length2 = len(title2)

        if 0 != length1 and title1[0] < ' ':
            prefix_length = ord(title1[0]) + 1
            title1 = previous_title1[:prefix_length] + title1[1:]

        if 0 != length2 and title2[0] < ' ':
            prefix_length = ord(title2[0]) + 1
            title2 = previous_title2[:prefix_length] + title2[1:]

        full_length1 = len(title1)
        full_length2 = len(title2)

        decoded_title1 = ''
        for c in title1:
            i = ord(c)
            if i in bigram_table:
                decoded_title1 += bigram_table[i]
            else:
                decoded_title1 += c

        PrintLog.message(u'Index: {an:13n} @ Offset: {of:13n} [0x{of:08x}]\n'
                         u'{pad1:s}[{l1:3d}/{fl1:3d}]:{t1!r:s}\n'
                         u'{pad1:s}{pad2}{dt1!r:s}\n'
                         u'{pad1:s}[{fl1:3d}/{fl2:3d}]:"{t2:s}"\n'
                         .format(of = fnd_offset, an = article_number,
                                 l1 = length1, fl1 = full_length1, t1 = title1, dt1 = decoded_title1,
                                 pad1 = ' ' * 2, pad2 = ' ' * (2 * 3 + 4),
                                 l2 = length2, fl2 = full_length2, t2 = truncated_utf8(title2)))

        previous_title1 = title1
        previous_title2 = title2

    fnd_file.close()

    PrintLog.message(u'Total entries  = {0:13n}'.format(total_entries))


def truncated_utf8(text):
    """converted text to unicode even if the string is truncated"""
    while len(text) > 0:
        try:
            return unicode(text, 'utf-8')
        except UnicodeDecodeError:
            pass
        text = text[:-1]
    return u''


def get_title(f):
        c = 'X'
        title = ''
        while '\0' != c:
            c = f.read(1)
            title += c
        return title[:-1]


# run the program
if __name__ == "__main__":
    main()