123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939 |
- var parseXml = function (str) {
- return (new DOMParser()).parseFromString(str, 'text/xml');
- };
- var MParser = (function (){
-
- var UNDEFINED = void 0;
-
-
- var UTF_16LE = new TextDecoder('utf-16le');
-
-
- function conseq() { return arguments[0]; }
-
- function decrypt(buf, key) {
- key = ripemd128(key);
- var byte, keylen = key.length, prev = 0x36, i = 0, len = buf.length;
- for (; i < len; i++) {
- byte = buf[i];
- byte = ((byte >> 4) | (byte << 4) );
- byte = byte ^ prev ^ (i & 0xFF) ^ key[i % keylen];
- prev = buf[i];
- buf[i] = byte;
- }
- return buf;
- }
-
-
- function spreadus() {
- var args = Array.prototype.slice.apply(arguments);
- args._spreadus_ = true;
- return args;
- }
-
-
- function sliceThen(file, offset, len) {
- var p = new Promise(function(_resolve) {
- var reader = new FileReader();
- reader.onload = function() { _resolve(reader.result); }
- reader.readAsArrayBuffer(file.slice(offset, offset + len));
- });
-
- p.exec = function(proc ) {
- var args = Array.prototype.slice.call(arguments, 1);
- return p.then(function(data) {
- args.unshift(data);
- var ret = proc.apply(null, args);
- return resolve(ret !== UNDEFINED && ret._spreadus_ ? ret : [ret]);
- });
- };
-
- return p;
- }
-
-
- function resolve(value) { return Promise.resolve(value); }
-
-
- function reject(reason) { return Promise.reject(reason); }
-
-
- function harvest(outcomes) {
- return Promise.settle(outcomes).then(function(results) {
- if (results.length === 0) {
- return reject("** NOT FOUND **");
- }
-
- var solved = [], failed = [];
- for (var i = 0; i < results.length; i++) {
- if (results[i].isResolved()) {
- solved.push(results[i].value());
- } else {
- failed.push(results[i].reason());
- }
- }
- return solved.length ? solved : failed;
- });
- }
-
-
- function createRecordBlockTable() {
- var pos = 0,
- arr;
- return {
-
- alloc: function(len) {
- arr = new Uint32Array(len * 2);
- },
-
-
- put: function(offset_comp, offset_decomp) {
- arr[pos++] = offset_comp; arr[pos++] = offset_decomp;
- },
-
- find: function(keyAt) {
- var hi = (arr.length >> 1) - 1, lo = 0, i = (lo + hi) >> 1, val = arr[(i << 1) + 1];
- if (keyAt > arr[(hi << 1) + 1] || keyAt < 0) {
- return;
- }
- while (true) {
- if (hi - lo <= 1) {
- if (i < hi) {
- return {
- block_no: i,
- comp_offset: arr[i <<= 1],
- comp_size: arr[i + 2] - arr[i],
- decomp_offset:arr[i + 1],
- decomp_size: arr[i + 3] - arr[i + 1]
- };
- } else {
- return;
- }
- }
- (keyAt < val) ? hi = i : lo = i;
- i = (lo + hi) >> 1;
- val = arr[(i << 1) + 1];
- }
- },
- };
- }
-
-
- function isTrue(v) {
- v = ((v || false) + '').toLowerCase();
- return v === 'yes' || v === 'true';
- }
-
-
- function parse_mdict(file, ext) {
- var KEY_INDEX,
- RECORD_BLOCK_TABLE = createRecordBlockTable();
- var attrs = {},
- _v2,
- _bpu,
- _tail,
- _decoder,
- _decryptors = [false, false],
-
-
- _searchTextLen,
-
- _readShort = function(scanner) { return scanner.readUint8(); },
-
- _readNum = function(scanner) { return scanner.readInt(); },
-
-
- _checksum_v2 = function() {},
-
-
- _adaptKey = function(key) { return key; },
-
-
- _slice = sliceThen.bind(null, file);
-
-
-
- function config() {
- attrs.Encoding = attrs.Encoding || 'UTF-16';
-
- _searchTextLen = (attrs.Encoding === 'UTF-16')
- ? function(dv, offset) {
- offset = offset;
- var mark = offset;
- while (dv.getUint16(offset)) { offset+= _bpu };
- return offset - mark;
- } : function(dv, offset) {
- offset = offset;
- var mark = offset;
- while (dv.getUint8(offset++)) { }
- return offset - mark - 1;
- };
-
- _decoder = new TextDecoder(attrs.Encoding || 'UTF-16LE');
- _bpu = (attrs.Encoding === 'UTF-16') ? 2 : 1;
-
- if (parseInt(attrs.GeneratedByEngineVersion, 10) >= 2.0) {
- _v2 = true;
- _tail = _bpu;
-
- _readNum = function(scanner) { return scanner.forward(4), scanner.readInt(); };
- _readShort = function(scanner) { return scanner.readUint16(); };
- _checksum_v2 = function(scanner) { return scanner.checksum(); };
- } else {
- _tail = 0;
- }
-
-
- if (attrs.Encrypted & 0x02) {
- _decryptors[1] = decrypt;
- }
-
- var regexp = MCommon.REGEXP_STRIPKEY[ext];
- if (isTrue(attrs.KeyCaseSensitive)) {
- _adaptKey = isTrue(attrs.StripKey)
- ? function(key) { return key.replace(regexp, '$1'); }
- : function(key) { return key; };
- } else {
- _adaptKey = isTrue(attrs.StripKey || (_v2 ? '' : 'yes'))
- ? function(key) { return key.toLowerCase().replace(regexp, '$1'); }
- : function(key) { return key.toLowerCase(); };
- }
- }
-
-
- function Scanner(buf, len) {
- var offset = 0, dv = new DataView(buf);
- var methods = {
-
- size: function() { return len || buf.byteLength; },
-
- forward: function(len) { return offset += len; },
-
- offset: function() { return offset; },
-
-
-
-
- readInt: function() { return conseq(dv.getUint32(offset, false), this.forward(4)); },
- readUint16: function() { return conseq(dv.getUint16(offset, false), this.forward(2)); },
- readUint8: function() { return conseq(dv.getUint8(offset, false), this.forward(1)); },
-
-
- readShort: function() { return _readShort(this); },
-
- readNum: function() { return _readNum(this); },
- readUTF16: function(len) { return conseq(UTF_16LE.decode(new Uint8Array(buf, offset, len)), this.forward(len)); },
-
-
-
-
- readText: function() {
- var len = _searchTextLen(dv, offset);
- return conseq(_decoder.decode(new Uint8Array(buf, offset, len)), this.forward(len + _bpu));
- },
-
-
-
- readTextSized: function(len) {
- len *= _bpu;
- return conseq(_decoder.decode(new Uint8Array(buf, offset, len)), this.forward(len + _tail));
- },
-
-
- checksum: function() { this.forward(4); },
-
- checksum_v2: function() { return _checksum_v2(this); },
-
-
-
- readBlock: function(len, expectedBufSize, decryptor) {
- var comp_type = dv.getUint8(offset, false);
- if (comp_type === 0) {
- if (_v2) {
- this.forward(8);
- }
- return this;
- } else {
-
- offset += 8; len -= 8;
- var tmp = new Uint8Array(buf, offset, len);
- if (decryptor) {
- var passkey = new Uint8Array(8);
- passkey.set(new Uint8Array(buf, offset - 4, 4));
- passkey.set([0x95, 0x36, 0x00, 0x00], 4);
- tmp = decryptor(tmp, passkey);
- }
-
- tmp = comp_type === 2 ? pako.inflate(tmp) : lzo.decompress(tmp, expectedBufSize, 1308672);
- this.forward(len);
- return Scanner(tmp.buffer, tmp.length);
- }
- },
-
-
- readRaw: function(len) {
- return conseq(new Uint8Array(buf, offset, len), this.forward(len === UNDEFINED ? buf.length - offset : len));
- },
- };
- return Object.create(methods);
- }
-
-
- function read_file_head(input) {
- return Scanner(input).readInt();
- }
-
- function read_header_sect(input, len) {
- var scanner = Scanner(input),
- header_str = scanner.readUTF16(len).replace(/\0$/, '');
-
- var xml = parseXml(header_str).querySelector('Dictionary, Library_Data').attributes;
- for (var i = 0, item; i < xml.length; i++) {
- item = xml.item(i);
- attrs[item.nodeName] = item.nodeValue;
- }
- attrs.Encrypted = parseInt(attrs.Encrypted, 10) || 0;
- MCommon.log('dictionary attributes: ', attrs);
- config();
- return spreadus(len + 4, input);
- }
-
- function read_keyword_summary(input, offset) {
- var scanner = Scanner(input);
- scanner.forward(offset);
- return {
- num_blocks: scanner.readNum(),
- num_entries: scanner.readNum(),
- key_index_decomp_len: _v2 && scanner.readNum(),
- key_index_comp_len: scanner.readNum(),
- key_blocks_len: scanner.readNum(),
- chksum: scanner.checksum_v2(),
-
- len: scanner.offset() - offset,
- };
- }
-
- function read_keyword_index(input, keyword_summary) {
- var scanner = Scanner(input).readBlock(keyword_summary.key_index_comp_len, keyword_summary.key_index_decomp_len, _decryptors[1]),
- keyword_index = Array(keyword_summary.num_blocks),
- offset = 0;
-
- for (var i = 0, size; i < keyword_summary.num_blocks; i++) {
- keyword_index[i] = {
- num_entries: conseq(scanner.readNum(), size = scanner.readShort()),
- first_word: conseq(scanner.readTextSized(size), size = scanner.readShort()),
- last_word: scanner.readTextSized(size),
- comp_size: size = scanner.readNum(),
- decomp_size: scanner.readNum(),
-
- offset: offset,
- index: i
- };
- offset += size;
- }
- return spreadus(keyword_summary, keyword_index);
- }
-
- function read_key_block(scanner, kdx) {
- var scanner = scanner.readBlock(kdx.comp_size, kdx.decomp_size);
- for (var i = 0; i < kdx.num_entries; i++) {
- var kk = [scanner.readNum(), scanner.readText()];
- }
- }
-
-
- function willScanKeyTable(slicedKeyBlock, num_entries, keyword_index, delay) {
- slicedKeyBlock.delay(delay).then(function (input) {
- MCommon.log('scan key table...');
- var scanner = Scanner(input);
- for (var i = 0, size = keyword_index.length; i < size; i++) {
- read_key_block(scanner, keyword_index[i]);
- }
- MCommon.log('KEY_TABLE loaded.');
- });
- }
-
- function read_record_summary(input, pos) {
- var scanner = Scanner(input),
- record_summary = {
- num_blocks: scanner.readNum(),
- num_entries: scanner.readNum(),
- index_len: scanner.readNum(),
- blocks_len: scanner.readNum(),
-
- len: scanner.offset(),
- };
-
-
- record_summary.block_pos = pos + record_summary.index_len + record_summary.len;
- return record_summary;
- }
-
- function read_record_block(input, record_summary) {
- var scanner = Scanner(input),
- size = record_summary.num_blocks,
- record_index = Array(size),
- p0 = record_summary.block_pos,
- p1 = 0;
- RECORD_BLOCK_TABLE.alloc(size + 1);
- for (var i = 0, rdx; i < size; i++) {
- record_index[i] = rdx = {
- comp_size: scanner.readNum(),
- decomp_size: scanner.readNum()
- };
- RECORD_BLOCK_TABLE.put(p0, p1);
- p0 += rdx.comp_size;
- p1 += rdx.decomp_size;
- }
- RECORD_BLOCK_TABLE.put(p0, p1);
- }
-
-
- function read_definition(input, block, keyinfo) {
- var scanner = Scanner(input).readBlock(block.comp_size, block.decomp_size);
- scanner.forward(keyinfo.offset - block.decomp_offset);
- return scanner.readText();
- }
-
-
- function followLink(definition, lookup) {
- return (definition.substring(0, 8) !== '@@@LINK=')
- ? definition
- : lookup(definition.substring(8));
- }
-
- function read_object(input, block, keyinfo) {
- if (input.byteLength > 0) {
- var scanner = Scanner(input).readBlock(block.comp_size, block.decomp_size);
- scanner.forward(keyinfo.offset - block.decomp_offset);
- return scanner.readRaw(keyinfo.size);
- } else {
- throw '* OUT OF FILE RANGE * ' + keyinfo + ' @offset=' + block.comp_offset;
- }
- }
-
-
- function findWord(keyinfo) {
- var block = RECORD_BLOCK_TABLE.find(keyinfo.offset);
- return _slice(block.comp_offset, block.comp_size)
- .exec(read_definition, block, keyinfo)
- .spread(function (definition) { return resolve(followLink(definition, LOOKUP.mdx)); });
- }
-
-
- function findResource(keyinfo) {
- var block = RECORD_BLOCK_TABLE.find(keyinfo.offset);
- return _slice(block.comp_offset, block.comp_size)
- .exec(read_object, block, keyinfo)
- .spread(function (blob) { return resolve(blob); });
- }
-
-
-
-
- var slicedKeyBlock,
- _cached_keys,
- _trail,
- mutual_ticket = 0;
-
-
- function reduce(arr, phrase) {
- var len = arr.length;
- if (len > 1) {
- len = len >> 1;
- return phrase > _adaptKey(arr[len - 1].last_word)
- ? reduce(arr.slice(len), phrase)
- : reduce(arr.slice(0, len), phrase);
- } else {
- return arr[0];
- }
- }
-
-
- function shrink(arr, phrase) {
- var len = arr.length, sub;
- if (len > 1) {
- len = len >> 1;
- var key = _adaptKey(arr[len]);
- if (phrase < key) {
- sub = arr.slice(0, len);
- sub.pos = arr.pos;
- } else {
- sub = arr.slice(len);
- sub.pos = (arr.pos || 0) + len;
- }
- return shrink(sub, phrase);
- } else {
- return (arr.pos || 0) + (phrase <= _adaptKey(arr[0]) ? 0 : 1);
- }
- }
-
- function loadKeys(kdx) {
- if (_cached_keys && _cached_keys.pilot === kdx.first_word) {
- return resolve(_cached_keys.list);
- } else {
- return slicedKeyBlock.then(function(input) {
- var scanner = Scanner(input), list = Array(kdx.num_entries);
- scanner.forward(kdx.offset);
- scanner = scanner.readBlock(kdx.comp_size, kdx.decomp_size);
-
- for (var i = 0; i < kdx.num_entries; i++) {
- var offset = scanner.readNum();
- list[i] = new Object(scanner.readText());
- list[i].offset = offset;
- if (i > 0) {
- list[i - 1].size = offset - list[i - 1].offset;
- }
- }
- _cached_keys = {list: list, pilot: kdx.first_word};
- return list;
- });
- }
- }
-
-
- function seekVanguard(phrase) {
- phrase = _adaptKey(phrase);
- var kdx = reduce(KEY_INDEX, phrase);
-
- if (phrase <= _adaptKey(kdx.last_word)) {
- var index = kdx.index - 1, prev;
- while (prev = KEY_INDEX[index]) {
- if (_adaptKey(prev.last_word) !== _adaptKey(kdx.last_word)) {
- break;
- }
- kdx = prev;
- index--;
- }
- }
- return loadKeys(kdx).then(function (list) {
- var idx = shrink(list, phrase);
-
- while (idx > 0) {
- if (_adaptKey(list[--idx]) !== _adaptKey(phrase)) {
- idx++;
- break;
- }
- }
- return [kdx, Math.min(idx, list.length - 1), list];
- });
- }
-
-
-
- function appendMore(word, list, nextKdx, expectedSize, filter, ticket) {
- if (ticket !== mutual_ticket) {
- throw 'force terminated';
- }
- if (filter) {
- if (_trail.count < expectedSize && nextKdx && nextKdx.first_word.substr(0, word.length) === word) {
- return loadKeys(nextKdx).delay(30).then(function(more) {
- MCommon.log(nextKdx);
- _trail.offset = 0;
- _trail.block = nextKdx.index;
- Array.prototype.push.apply(list, more.filter(filter, _trail));
- return appendMore(word, list, KEY_INDEX[nextKdx.index + 1], expectedSize, filter, ticket);
- });
- } else {
- if (list.length === 0) {
- _trail.exhausted = true;
- }
- return resolve(list);
- }
- } else {
- var shortage = expectedSize - list.length;
- if (shortage > 0 && nextKdx) {
- console.log('go next', nextKdx);
- _trail.block = nextKdx.index;
- return loadKeys(nextKdx).then(function(more) {
- _trail.offset = 0;
- _trail.pos = Math.min(shortage, more.length);
- Array.prototype.push.apply(list, more.slice(0, shortage));
- console.log('$$ ' + more[shortage - 1], shortage);
- return appendMore(word, list, KEY_INDEX[nextKdx.index + 1], expectedSize, filter, ticket);
- });
- } else {
- if (_trail.pos > expectedSize) {
- _trail.pos = expectedSize;
- }
- list = list.slice(0, expectedSize);
- _trail.count = list.length;
- _trail.total += _trail.count;
- return resolve(list);
- }
- }
- }
-
- function followUp() {
- var kdx = KEY_INDEX[_trail.block];
- return loadKeys(kdx).then(function (list) {
- return [kdx, Math.min(_trail.offset + _trail.pos, list.length - 1), list];
- });
- }
-
- function matchKeys(phrase, expectedSize, follow) {
- expectedSize = Math.max(expectedSize || 0, 10);
- var str = phrase.trim().toLowerCase(),
- m = /([^?*]+)[?*]+/.exec(str),
- word;
- if (m) {
- word = m[1];
- var wildcard = new RegExp('^' + str.replace(/([\.\\\+\[\^\]\$\(\)])/g, '\\$1').replace(/\*+/g, '.*').replace(/\?/g, '.') + '$'),
- tester = phrase[phrase.length - 1] === ' '
- ? function(s) { return wildcard.test(s); }
- : function(s) { return wildcard.test(s) && !/ /.test(s); },
- filter = function (s, i) {
- if (_trail.count < expectedSize && tester(s)) {
- _trail.count++;
- _trail.total++;
- _trail.pos = i + 1;
- return true;
- }
- return false;
- };
- } else {
- word = phrase.trim();
- }
-
- if (_trail && _trail.phrase !== phrase) {
- follow = false;
- }
-
- if (follow && _trail && _trail.exhausted) {
- return resolve([]);
- }
-
- var startFrom = follow && _trail ? followUp() : seekVanguard(word);
-
- return startFrom.spread(function(kdx, idx, list) {
- console.log('start ', kdx);
- list = list.slice(idx);
- _trail = {phrase: phrase,
- block: kdx.index,
- offset: idx,
- pos: list.length,
- count: 0,
- total: follow ? _trail && _trail.total || 0 : 0
- };
- if (filter) {
- list = list.filter(filter, _trail);
- }
- return appendMore(word, list, KEY_INDEX[kdx.index + 1], expectedSize, filter, ++mutual_ticket)
- .then(function(result) {
- if (_trail.block === KEY_INDEX.length - 1) {
- if (_trail.offset + _trail.pos >= KEY_INDEX[_trail.block].num_entries) {
- _trail.exhausted = true;
- console.log('EXHAUSTED!!!!');
- }
- }
- console.log('trail: ', _trail);
- return result;
- });
- });
- };
-
-
-
- function matchOffset(list, offset) {
- return list.some(function(el) { return el.offset === offset ? list = [el] : false; }) ? list : [];
- }
-
-
- var LOOKUP = {
-
- mdx: function(query) {
- if (typeof query === 'string' || query instanceof String) {
- _trail = null;
- var word = query.trim().toLowerCase(), offset = query.offset;
- return seekVanguard(word).spread(function(kdx, idx, list) {
- list = list.slice(idx);
- if (offset !== UNDEFINED) {
- list = matchOffset(list, offset);
- } else {
- list = list.filter(function(el) { return el.toLowerCase() === word; });
- }
- return harvest(list.map(findWord));
- });
- } else {
- return matchKeys(query.phrase, query.max, query.follow);
- }
- },
-
-
- mdd: function(phrase) {
- var word = phrase.trim().toLowerCase();
- word = '\\' + word.replace(/(^[/\\])|([/]$)/, '');
- word = word.replace(/\//g, '\\');
- return seekVanguard(word).spread(function(kdx, idx, list) {
- return list.slice(idx).filter(function(one) {
- return one.toLowerCase() === word;
- });
- }).then(function(candidates) {
- if (candidates.length === 0) {
- throw '*RESOURCE NOT FOUND* ' + phrase;
- } else {
- return findResource(candidates[0]);
- }
- });
- }
- };
-
-
-
-
- MCommon.log('start to load ' + file.name);
-
- var pos = 0;
-
- return _slice(pos, 4).exec(read_file_head).spread(function(len) {
- pos += 4;
- return _slice(pos, len + 48)
- .exec(read_header_sect, len);
- }).spread(function(header_remain_len, input) {
- pos += header_remain_len;
- return read_keyword_summary(input, header_remain_len);
- }).then(function(keyword_summary) { MCommon.log(keyword_summary);
- pos += keyword_summary.len;
- return _slice(pos, keyword_summary.key_index_comp_len)
- .exec(read_keyword_index, keyword_summary);
- }).spread(function (keyword_summary, keyword_index) {
- pos += keyword_summary.key_index_comp_len;
- slicedKeyBlock = _slice(pos, keyword_summary.key_blocks_len);
-
-
- pos += keyword_summary.key_blocks_len;
-
- KEY_INDEX = keyword_index;
-
- }).then(function () {
- return _slice(pos, 32)
- .exec(read_record_summary, pos);
-
- }).spread(function (record_summary) { MCommon.log(record_summary);
- pos += record_summary.len;
- return _slice(pos, record_summary.index_len)
- .exec(read_record_block, record_summary);
- }).spread(function() { MCommon.log('-- parse done --', file.name);
-
- LOOKUP[ext].description = attrs.Description;
- return resolve(LOOKUP[ext]);
- });
- };
-
-
-
-
-
-
- return function load(files) {
- var resources = [];
- Array.prototype.forEach.call(files, function(f) {
- var ext = MCommon.getExtension(f.name, 'mdx');
-
- resources.push(resources[ext] = parse_mdict(f, ext));
- });
-
- return Promise.all(resources)
- .then(function() { return resolve(resources); });
- };
-
- }());
|