123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369 |
- // Copyright (C) 2003 Mooffie <mooffie@typo.co.il>
- //
- // This program is free software; you can redistribute it and/or modify
- // it under the terms of the GNU General Public License as published by
- // the Free Software Foundation; either version 2 of the License, or
- // (at your option) any later version.
- //
- // This program is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- //
- // You should have received a copy of the GNU General Public License
- // along with this program; if not, write to the Free Software
- // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- #include <config.h>
- #include "shaping.h"
- #include "bidi.h"
- #include "univalues.h"
- struct charinfo {
- unichar ccode;
- char cclass;
- unichar isolated;
- unichar final;
- unichar initial;
- unichar medial;
- };
- static charinfo infos[] = {
- { 0x0621, 'U', 0xFE80, 0x0000, 0x0000, 0x0000 },
- { 0x0622, 'R', 0xFE81, 0xFE82, 0x0000, 0x0000 },
- { 0x0623, 'R', 0xFE83, 0xFE84, 0x0000, 0x0000 },
- { 0x0624, 'R', 0xFE85, 0xFE86, 0x0000, 0x0000 },
- { 0x0625, 'R', 0xFE87, 0xFE88, 0x0000, 0x0000 },
- { 0x0626, 'D', 0xFE89, 0xFE8A, 0xFE8B, 0xFE8C },
- { 0x0627, 'R', 0xFE8D, 0xFE8E, 0x0000, 0x0000 },
- { 0x0628, 'D', 0xFE8F, 0xFE90, 0xFE91, 0xFE92 },
- { 0x0629, 'R', 0xFE93, 0xFE94, 0x0000, 0x0000 },
- { 0x062A, 'D', 0xFE95, 0xFE96, 0xFE97, 0xFE98 },
- { 0x062B, 'D', 0xFE99, 0xFE9A, 0xFE9B, 0xFE9C },
- { 0x062C, 'D', 0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0 },
- { 0x062D, 'D', 0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4 },
- { 0x062E, 'D', 0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8 },
- { 0x062F, 'R', 0xFEA9, 0xFEAA, 0x0000, 0x0000 },
- { 0x0630, 'R', 0xFEAB, 0xFEAC, 0x0000, 0x0000 },
- { 0x0631, 'R', 0xFEAD, 0xFEAE, 0x0000, 0x0000 },
- { 0x0632, 'R', 0xFEAF, 0xFEB0, 0x0000, 0x0000 },
- { 0x0633, 'D', 0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4 },
- { 0x0634, 'D', 0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8 },
- { 0x0635, 'D', 0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC },
- { 0x0636, 'D', 0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0 },
- { 0x0637, 'D', 0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4 },
- { 0x0638, 'D', 0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8 },
- { 0x0639, 'D', 0xFEC9, 0xFECA, 0xFECB, 0xFECC },
- { 0x063A, 'D', 0xFECD, 0xFECE, 0xFECF, 0xFED0 },
- { 0x063B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x063C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x063D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x063E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x063F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0640, 'D', 0x0640, 0x0640, 0x0640, 0x0640 },
- { 0x0641, 'D', 0xFED1, 0xFED2, 0xFED3, 0xFED4 },
- { 0x0642, 'D', 0xFED5, 0xFED6, 0xFED7, 0xFED8 },
- { 0x0643, 'D', 0xFED9, 0xFEDA, 0xFEDB, 0xFEDC },
- { 0x0644, 'D', 0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0 },
- { 0x0645, 'D', 0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4 },
- { 0x0646, 'D', 0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8 },
- { 0x0647, 'D', 0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC },
- { 0x0648, 'R', 0xFEED, 0xFEEE, 0x0000, 0x0000 },
- { 0x0649, 'D', 0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9 },
- { 0x064A, 'D', 0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4 },
- { 0x064B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x064C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x064D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x064E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x064F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0650, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0651, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0652, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0653, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0654, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0655, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0656, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0657, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0658, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0659, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x065A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x065B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x065C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x065D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x065E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x065F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0660, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0661, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0662, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0663, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0664, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0665, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0666, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0667, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0668, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0669, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x066A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x066B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x066C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x066D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x066E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x066F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0670, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0671, 'R', 0xFB50, 0xFB51, 0x0000, 0x0000 },
- { 0x0672, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0673, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0674, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0675, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0676, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0677, 'R', 0xFBDD, 0xFFFD, 0x0000, 0x0000 },
- { 0x0678, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0679, 'D', 0xFB66, 0xFB67, 0xFB68, 0xFB69 },
- { 0x067A, 'D', 0xFB5E, 0xFB5F, 0xFB60, 0xFB61 },
- { 0x067B, 'D', 0xFB52, 0xFB53, 0xFB54, 0xFB55 },
- { 0x067C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x067D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x067E, 'D', 0xFB56, 0xFB57, 0xFB58, 0xFB59 },
- { 0x067F, 'D', 0xFB62, 0xFB63, 0xFB64, 0xFB65 },
- { 0x0680, 'D', 0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D },
- { 0x0681, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0682, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0683, 'D', 0xFB76, 0xFB77, 0xFB78, 0xFB79 },
- { 0x0684, 'D', 0xFB72, 0xFB73, 0xFB74, 0xFB75 },
- { 0x0685, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0686, 'D', 0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D },
- { 0x0687, 'D', 0xFB7E, 0xFB7F, 0xFB80, 0xFB81 },
- { 0x0688, 'R', 0xFB88, 0xFB89, 0x0000, 0x0000 },
- { 0x0689, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x068A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x068B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x068C, 'R', 0xFB84, 0xFB85, 0x0000, 0x0000 },
- { 0x068D, 'R', 0xFB82, 0xFB83, 0x0000, 0x0000 },
- { 0x068E, 'R', 0xFB86, 0xFB87, 0x0000, 0x0000 },
- { 0x068F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0690, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0691, 'R', 0xFB8C, 0xFB8D, 0x0000, 0x0000 },
- { 0x0692, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0693, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0694, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0695, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0696, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0697, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x0698, 'R', 0xFB8A, 0xFB8B, 0x0000, 0x0000 },
- { 0x0699, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x069A, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x069B, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x069C, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x069D, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x069E, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x069F, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A0, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A1, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A3, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A4, 'D', 0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D },
- { 0x06A5, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A6, 'D', 0xFB6E, 0xFB6F, 0xFB70, 0xFB71 },
- { 0x06A7, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A8, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06A9, 'D', 0xFB8E, 0xFB8F, 0xFB90, 0xFB91 },
- { 0x06AA, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06AB, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06AC, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06AD, 'D', 0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6 },
- { 0x06AE, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06AF, 'D', 0xFB92, 0xFB93, 0xFB94, 0xFB95 },
- { 0x06B0, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06B1, 'D', 0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D },
- { 0x06B2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06B3, 'D', 0xFB96, 0xFB97, 0xFB98, 0xFB99 },
- { 0x06B4, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06B5, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06B6, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06B7, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06B8, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06B9, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06BA, 'D', 0xFB9E, 0xFB9F, 0xFFFD, 0xFFFD },
- { 0x06BB, 'D', 0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3 },
- { 0x06BC, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06BD, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06BE, 'D', 0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD },
- { 0x06BF, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06C0, 'R', 0xFBA4, 0xFBA5, 0x0000, 0x0000 },
- { 0x06C1, 'D', 0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9 },
- { 0x06C2, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06C3, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06C4, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06C5, 'R', 0xFBE0, 0xFBE1, 0x0000, 0x0000 },
- { 0x06C6, 'R', 0xFBD9, 0xFBDA, 0x0000, 0x0000 },
- { 0x06C7, 'R', 0xFBD7, 0xFBD8, 0x0000, 0x0000 },
- { 0x06C8, 'R', 0xFBDB, 0xFBDC, 0x0000, 0x0000 },
- { 0x06C9, 'R', 0xFBE2, 0xFBE3, 0x0000, 0x0000 },
- { 0x06CA, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06CB, 'R', 0xFBDE, 0xFBDF, 0x0000, 0x0000 },
- { 0x06CC, 'D', 0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF },
- { 0x06CD, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06CE, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06CF, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06D0, 'D', 0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7 },
- { 0x06D1, 'X', 0x0000, 0x0000, 0x0000, 0x0000 },
- { 0x06D2, 'R', 0xFBAE, 0xFBAF, 0x0000, 0x0000 },
- { 0x06D3, 'R', 0xFBB0, 0xFBB1, 0x0000, 0x0000 }
- };
- #define TBLMIN 0x0621
- #define TBLMAX 0x06D3
- static inline bool is_d(unichar ch) {
- if (ch >= TBLMIN && ch <= TBLMAX) {
- return (infos[ch - TBLMIN].cclass == 'D');
- }
- return false;
- }
- static inline bool is_r(unichar ch) {
- if (ch >= TBLMIN && ch <= TBLMAX) {
- return (infos[ch - TBLMIN].cclass == 'R');
- }
- return false;
- }
- static inline bool is_rjc(unichar ch) {
- if (ch == UNI_ZWJ)
- return true;
- if (ch >= TBLMIN && ch <= TBLMAX) {
- return (infos[ch - TBLMIN].cclass == 'D');
- }
- return false;
- }
- static inline bool is_ljc(unichar ch) {
- if (ch == UNI_ZWJ)
- return true;
- if (ch >= TBLMIN && ch <= TBLMAX) {
- return (infos[ch - TBLMIN].cclass == 'D'
- || infos[ch - TBLMIN].cclass == 'R');
- }
- return false;
- }
- static inline charinfo *get_info(unichar ch) {
- if (ch >= TBLMIN && ch <= TBLMAX) {
- return &infos[ch - TBLMIN];
- }
- return NULL;
- }
- bool is_shaping_transparent(unichar ch) {
- return BiDi::is_nsm(ch);
- }
- // shape() - this is a temporary and a very inefficient implementation
- // of Arabic joining described in section 8.2 of the Unicode standard.
- //
- // :TODO: optimize.
- int shape(unichar *s, int len, attribute_t *attributes)
- {
- if (!len)
- return len;
- unichar a = 0, b = 0, c = 0;
- for (int i = len - 1; i >= 0; i--) {
- b = s[i];
- unichar &chref = s[i];
- c = 0;
- while (i > 0 && is_shaping_transparent(s[i-1]))
- i--;
- if (i > 0)
- c = s[i-1];
- if (is_r(b)) {
- if (is_rjc(a)) {
- chref = get_info(b)->final;
- }
- } else if (is_d(b)) {
- if (is_rjc(a) && is_ljc(c)) {
- chref = get_info(b)->medial;
- } else if (is_rjc(a) && !is_ljc(c)) {
- chref = get_info(b)->final;
- } else if (!is_rjc(a) && is_ljc(c)) {
- chref = get_info(b)->initial;
- }
- }
- a = b;
- }
-
- return ligate(s, len, attributes);
- }
- // ligate() - do LAM-ALEF ligatures. returns the new length of the string.
- int ligate(unichar *s, int len, attribute_t *attributes)
- {
- #define LAM_L 0xFEDF
- #define LAM_M 0xFEE0
- #define ALEF_MADDA_R 0xFE82
- #define ALEF_HAMZA_ABOVE_R 0xFE84
- #define ALEF_HAMZA_BELOW_R 0xFE88
- #define ALEF_R 0xFE8E
- #define LAMALEF_MADDA_I 0xFEF5
- #define LAMALEF_MADDA_R 0xFEF6
- #define LAMALEF_HAMZA_ABOVE_I 0xFEF7
- #define LAMALEF_HAMZA_ABOVE_R 0xFEF8
- #define LAMALEF_HAMZA_BELOW_I 0xFEF9
- #define LAMALEF_HAMZA_BELOW_R 0xFEFA
- #define LAMALEF_I 0xFEFB
- #define LAMALEF_R 0xFEFC
- int new_len = len;
- bool may_start = false;
- int lig_start = 0; // silence the compiler
- for (int i = len - 1; i >= 0; i--) {
- if (i > 0 && (s[i] == LAM_L || s[i] == LAM_M)) {
- lig_start = i;
- may_start = true;
- }
- else if (may_start) {
- if (s[i] == ALEF_MADDA_R ||
- s[i] == ALEF_HAMZA_ABOVE_R ||
- s[i] == ALEF_HAMZA_BELOW_R ||
- s[i] == ALEF_R)
- {
- int rlig = (s[lig_start] == LAM_M) ? 1 : 0;
- switch (s[i]) {
- case ALEF_MADDA_R:
- s[i] = LAMALEF_MADDA_I + rlig; break;
- case ALEF_HAMZA_ABOVE_R:
- s[i] = LAMALEF_HAMZA_ABOVE_I + rlig; break;
- case ALEF_HAMZA_BELOW_R:
- s[i] = LAMALEF_HAMZA_BELOW_I + rlig; break;
- case ALEF_R:
- s[i] = LAMALEF_I + rlig; break;
- }
- for (int j = lig_start; j < new_len - 1; j++)
- s[j] = s[j+1];
- if (attributes) {
- for (int j = lig_start; j < new_len - 1; j++)
- attributes[j] = attributes[j+1];
- }
-
- new_len--; // we deleted a LAM
- } else {
- if (!is_shaping_transparent(s[i]))
- may_start = false;
- }
- }
- }
- return new_len;
- }
|