gb180304.awk 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. # Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
  2. # National Institute of Advanced Industrial Science and Technology (AIST)
  3. # Registration Number H13PRO009
  4. # This file is part of GNU Emacs.
  5. # GNU Emacs is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. # GNU Emacs is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. # You should have received a copy of the GNU General Public License
  14. # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  15. BEGIN {
  16. tohex["A"] = 10;
  17. tohex["B"] = 11;
  18. tohex["C"] = 12;
  19. tohex["D"] = 13;
  20. tohex["E"] = 14;
  21. tohex["F"] = 15;
  22. tohex["a"] = 10;
  23. tohex["b"] = 11;
  24. tohex["c"] = 12;
  25. tohex["d"] = 13;
  26. tohex["e"] = 14;
  27. tohex["f"] = 15;
  28. }
  29. function decode_hex(str) {
  30. n = 0;
  31. len = length(str);
  32. for (i = 1; i <= len; i++)
  33. {
  34. c = substr (str, i, 1);
  35. if (c >= "0" && c <= "9")
  36. n = n * 16 + (c - "0");
  37. else
  38. n = n * 16 + tohex[c];
  39. }
  40. return n;
  41. }
  42. function gb_to_index(gb) {
  43. b0 = int(gb / 256);
  44. b1 = gb % 256;
  45. idx = (((b0 - 129)) * 191 + b1 - 64);
  46. # if (b1 >= 127)
  47. # idx--;
  48. return idx
  49. }
  50. function index_to_gb(idx) {
  51. b3 = (idx % 10) + 48;
  52. idx = int(idx / 10);
  53. b2 = (idx % 126) + 129;
  54. idx = int(idx / 126);
  55. b1 = (idx % 10) + 48;
  56. b0 = int(idx / 10) + 129;
  57. return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
  58. }
  59. /^\#/ {
  60. print;
  61. next;
  62. }
  63. /0x....-0x..../ {
  64. gb_from = gb_to_index(decode_hex(substr($1, 3, 4)));
  65. gb_to = gb_to_index(decode_hex(substr($1, 10, 4)));
  66. unicode = decode_hex(substr($2, 3, 4));
  67. while (gb_from <= gb_to)
  68. {
  69. table[unicode++] = 1;
  70. gb_from++;
  71. }
  72. next;
  73. }
  74. {
  75. gb = decode_hex(substr($1, 3, 4));
  76. unicode = decode_hex(substr($2, 3, 4));
  77. table[unicode] = 1;
  78. }
  79. END {
  80. from_gb = -1;
  81. to_gb = 0;
  82. from_i = 0;
  83. table[65536] = 1;
  84. for (i = 128; i <= 65536; i++)
  85. {
  86. if (table[i] == 0)
  87. {
  88. if (i < 55296 || i >= 57344)
  89. {
  90. if (from_gb < 0)
  91. {
  92. from_gb = to_gb;
  93. from_i = i;
  94. }
  95. to_gb++;
  96. }
  97. }
  98. else if (from_gb >= 0)
  99. {
  100. if (from_gb + 1 == to_gb)
  101. printf "0x%s\t\t0x%04X\n",
  102. index_to_gb(from_gb), from_i;
  103. else
  104. printf "0x%s-0x%s\t0x%04X\n",
  105. index_to_gb(from_gb), index_to_gb(to_gb - 1), from_i;
  106. from_gb = -1;
  107. }
  108. }
  109. }