check-porn.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. #!/usr/bin/env python
  2. from __future__ import unicode_literals
  3. """
  4. This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
  5. if we are not 'age_limit' tagging some porn site
  6. A second approach implemented relies on a list of porn domains, to activate it
  7. pass the list filename as the only argument
  8. """
  9. # Allow direct execution
  10. import os
  11. import sys
  12. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  13. from test.helper import gettestcases
  14. from youtube_dl.utils import compat_urllib_parse_urlparse
  15. from youtube_dl.utils import compat_urllib_request
  16. if len(sys.argv) > 1:
  17. METHOD = 'LIST'
  18. LIST = open(sys.argv[1]).read().decode('utf8').strip()
  19. else:
  20. METHOD = 'EURISTIC'
  21. for test in gettestcases():
  22. if METHOD == 'EURISTIC':
  23. try:
  24. webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
  25. except Exception:
  26. print('\nFail: {0}'.format(test['name']))
  27. continue
  28. webpage = webpage.decode('utf8', 'replace')
  29. RESULT = 'porn' in webpage.lower()
  30. elif METHOD == 'LIST':
  31. domain = compat_urllib_parse_urlparse(test['url']).netloc
  32. if not domain:
  33. print('\nFail: {0}'.format(test['name']))
  34. continue
  35. domain = '.'.join(domain.split('.')[-2:])
  36. RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
  37. if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
  38. or test['info_dict']['age_limit'] != 18):
  39. print('\nPotential missing age_limit check: {0}'.format(test['name']))
  40. elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
  41. and test['info_dict']['age_limit'] == 18):
  42. print('\nPotential false negative: {0}'.format(test['name']))
  43. else:
  44. sys.stdout.write('.')
  45. sys.stdout.flush()
  46. print()