FindAllRedirects.php 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. <?php
  2. // COPYRIGHT: Openmoko Inc. 2010
  3. // LICENSE: GPL Version 3 or later
  4. // DESCRIPTION: Compute a list of all possible redirect words
  5. // by processing the MediaWiki language files
  6. // AUTHORS: Christopher Hall <hsw@openmoko.com>
  7. // check that some files are given
  8. if (sizeof($argv) < 2) {
  9. echo "usage: $argv[0] list_of_files\n";
  10. exit(1);
  11. }
  12. // remove the program name from the list of files
  13. $list = array_slice($argv, 1);
  14. // key of this array will be the redirect word
  15. $redirects = array();
  16. // Read each of the language files and extract: $magicWords['redirect']
  17. foreach ($list as $file) {
  18. if ('file' === filetype($file)) {
  19. //echo "Processing: $file\n";
  20. $magicWords = array();
  21. $magicWords['redirect'] = array();
  22. include $file;
  23. if (is_array($magicWords)) {
  24. $m = $magicWords['redirect'];
  25. if (is_array($m)) {
  26. foreach ($m as $value) {
  27. if ("0" !== $value && 0 !== $value) {
  28. $value = mb_strtolower($value, 'UTF-8');
  29. $redirects[$value] = $value;
  30. //echo $value, "\n";
  31. }
  32. }
  33. } elseif (NULL !== $m) {
  34. echo '$magicWords[\'redirect\'] is not an array for: ', $file, "\n";
  35. echo '$magicWords[\'redirect\'] is: ', gettype($m), "\n";
  36. }
  37. } else {
  38. echo '$magicWords is not an array for: ', $file, "\n";
  39. }
  40. }
  41. }
  42. // output the values
  43. sort($redirects);
  44. ?>
  45. #! /usr/bin/env python
  46. # -*- coding: utf-8 -*-
  47. #
  48. # *** WARNING: Generated file do not modify
  49. #
  50. # Generated on: <?php echo date('Y-m-d H:i:s'), ' by ', $argv[0]; ?>
  51. import os, sys
  52. import re
  53. redirect_string = r'('
  54. <?php
  55. $flag = false;
  56. foreach ($redirects as $value) {
  57. echo 'redirect_string += \'';
  58. if ($flag) {
  59. echo '|';
  60. } else {
  61. $flag = true;
  62. }
  63. echo $value, "'\n";
  64. }
  65. ?>
  66. redirect_string += r')'
  67. start_string = r'\s*'
  68. end_string = r'[^\[]*\[\[(.*?)([#|].*?)?\]\]'
  69. regex = re.compile(start_string + redirect_string + end_string, re.IGNORECASE)
  70. def main():
  71. tests = [
  72. r'#redirect[[Just Testing]]',
  73. r'#айдау[[Just Testing]]',
  74. r'#リダイレクト[[Just Testing]]',
  75. r'#転送[[Just Testing]]',
  76. r'#転送[[Just Testing]]',
  77. ]
  78. for s in tests:
  79. m = regex.match(s)
  80. if m:
  81. print('matched: {0:s} : {1:s} => {3:s}'.format(s, m.group(1), m.group(2)))
  82. else:
  83. print('no match: {0:s}'.format(s))
  84. # run the program
  85. if __name__ == "__main__":
  86. main()