phpaliases.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #!/usr/bin/env python
  2. """This script searches files for functions that are just aliases in
  3. PHP source code. This is not 100% reliable, so it should not be
  4. automated, but it's useful to run once in a while to make sure that
  5. all of the matches it finds are not really legitimate aliases.
  6. Usage:
  7. parse_aliases.py <name of alias file> [PHP source code filename]...
  8. """
  9. import sys
  10. # Fetch this URL to get the file that is parsed into the aliases list
  11. alias_url = 'http://www.zend.com/phpfunc/all_aliases.php'
  12. header_tok = '<!-- END OF HEADER -->';
  13. footer_tok = '<!-- FOOTER -->';
  14. # Example line of the table that we parse:
  15. # '<tr bgcolor="#EFEFFF"><td><a href="function.bzclose.php">bzclose</a></td><td><a href="http://lxr.php.net/source/php-src/ext/bz2/bz2.c#48">php-src/ext/bz2/bz2.c</a></td><td><a href="function.fclose.php">fclose</a></td></tr>'
  16. import re
  17. line_re = re.compile(r'''
  18. \A
  19. <tr\ bgcolor="[^">]+">
  20. <td><a\ href="[^>"]+\.php">([^<>]+)</a></td>
  21. <td><a\ href="[^">]+">[^<>]+</a></td>
  22. <td>
  23. (?:
  24. <a\ href="[^">]+\.php">
  25. ( [^<>]+ )
  26. </a>
  27. | ( [^<>]+ )
  28. )
  29. </td>
  30. </tr>
  31. \Z
  32. ''', re.VERBOSE)
  33. def parseString(s):
  34. _, rest = s.split(header_tok, 1)
  35. body, _ = rest.split(footer_tok, 1)
  36. lines = body.split('\n')
  37. assert [s.strip() for s in lines[-2:]] == ['</table>', '']
  38. assert lines[0].strip().startswith('<table')
  39. del lines[0], lines[-2:]
  40. aliases = {}
  41. for line in lines:
  42. mo = line_re.match(line)
  43. assert mo, line
  44. alias, master1, master2 = mo.groups()
  45. if master1:
  46. master = master1
  47. else:
  48. assert master2
  49. master = master2
  50. aliases[alias] = master
  51. return aliases
  52. def parseFile(f):
  53. return parseString(f.read())
  54. def parseFileName(fn):
  55. return parseFile(file(fn, 'r'))
  56. def parseURL(url):
  57. return parseFile(urllib2.urlopen(url))
  58. def getAliasRE(aliases):
  59. return re.compile(r'(->|\$|)\s*\b(%s)\b' % ('|'.join(aliases.keys())))
  60. def checkAliasesFile(alias_re, f):
  61. found = []
  62. line_num = 1
  63. for line in f:
  64. for mo in alias_re.finditer(line):
  65. if mo.group(1):
  66. continue
  67. alias = mo.group(2)
  68. found.append((line_num, alias))
  69. line_num += 1
  70. return found
  71. def checkAliases(alias_re, filename):
  72. return checkAliasesFile(alias_re, file(filename, 'r'))
  73. def checkAliasesFiles(alias_re, filenames):
  74. found = []
  75. for filename in filenames:
  76. file_found = checkAliases(alias_re, filename)
  77. found.extend([(filename, n, a) for (n, a) in file_found])
  78. return found
  79. def dumpResults(aliases, found, out=sys.stdout):
  80. for filename, n, a in found:
  81. print >>out, "%s:%d %s -> %s" % (filename, n, a, aliases[a])
  82. def main(alias_file, *filenames):
  83. aliases = parseFileName(alias_file)
  84. alias_re = getAliasRE(aliases)
  85. found = checkAliasesFiles(alias_re, filenames)
  86. dumpResults(aliases, found)
  87. return found
  88. if __name__ == '__main__':
  89. found = main(*sys.argv[1:])
  90. if found:
  91. sys.exit(1)