make_lazy_extractors.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import shutil
  5. import sys
  6. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  7. from inspect import getsource
  8. from devscripts.utils import get_filename_args, read_file, write_file
  9. NO_ATTR = object()
  10. STATIC_CLASS_PROPERTIES = [
  11. 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching
  12. '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions
  13. 'age_limit', # Used for --age-limit (evaluated)
  14. '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated)
  15. ]
  16. CLASS_METHODS = [
  17. 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching
  18. 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation
  19. 'description', # Used for --extractor-descriptions
  20. 'is_suitable', # Used for --age-limit
  21. 'supports_login', 'is_single_video', # Accessed in CLI only with instance
  22. ]
  23. IE_TEMPLATE = '''
  24. class {name}({bases}):
  25. _module = {module!r}
  26. '''
  27. MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py')
  28. def main():
  29. lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
  30. if os.path.exists(lazy_extractors_filename):
  31. os.remove(lazy_extractors_filename)
  32. _ALL_CLASSES = get_all_ies() # Must be before import
  33. import yt_dlp.plugins
  34. from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
  35. # Filter out plugins
  36. _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')]
  37. DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
  38. module_src = '\n'.join((
  39. MODULE_TEMPLATE,
  40. ' _module = None',
  41. *extra_ie_code(DummyInfoExtractor),
  42. '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
  43. *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
  44. ))
  45. write_file(lazy_extractors_filename, f'{module_src}\n')
  46. def get_all_ies():
  47. PLUGINS_DIRNAME = 'ytdlp_plugins'
  48. BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
  49. if os.path.exists(PLUGINS_DIRNAME):
  50. # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958
  51. shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
  52. try:
  53. from yt_dlp.extractor.extractors import _ALL_CLASSES
  54. finally:
  55. if os.path.exists(BLOCKED_DIRNAME):
  56. shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
  57. return _ALL_CLASSES
  58. def extra_ie_code(ie, base=None):
  59. for var in STATIC_CLASS_PROPERTIES:
  60. val = getattr(ie, var)
  61. if val != (getattr(base, var) if base else NO_ATTR):
  62. yield f' {var} = {val!r}'
  63. yield ''
  64. for name in CLASS_METHODS:
  65. f = getattr(ie, name)
  66. if not base or f.__func__ != getattr(base, name).__func__:
  67. yield getsource(f)
  68. def build_ies(ies, bases, attr_base):
  69. names = []
  70. for ie in sort_ies(ies, bases):
  71. yield build_lazy_ie(ie, ie.__name__, attr_base)
  72. if ie in ies:
  73. names.append(ie.__name__)
  74. yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
  75. def sort_ies(ies, ignored_bases):
  76. """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
  77. classes, returned_classes = ies[:-1], set()
  78. assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
  79. while classes:
  80. for c in classes[:]:
  81. bases = set(c.__bases__) - {object, *ignored_bases}
  82. restart = False
  83. for b in sorted(bases, key=lambda x: x.__name__):
  84. if b not in classes and b not in returned_classes:
  85. assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
  86. classes.insert(0, b)
  87. restart = True
  88. if restart:
  89. break
  90. if bases <= returned_classes:
  91. yield c
  92. returned_classes.add(c)
  93. classes.remove(c)
  94. break
  95. yield ies[-1]
  96. def build_lazy_ie(ie, name, attr_base):
  97. bases = ', '.join({
  98. 'InfoExtractor': 'LazyLoadExtractor',
  99. 'SearchInfoExtractor': 'LazyLoadSearchExtractor',
  100. }.get(base.__name__, base.__name__) for base in ie.__bases__)
  101. s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
  102. return s + '\n'.join(extra_ie_code(ie, attr_base))
  103. if __name__ == '__main__':
  104. main()