make_lazy_extractors.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import sys
  5. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  6. from inspect import getsource
  7. from devscripts.utils import get_filename_args, read_file, write_file
  8. from yt_dlp.extractor import import_extractors
  9. from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
  10. from yt_dlp.globals import extractors
  11. NO_ATTR = object()
  12. STATIC_CLASS_PROPERTIES = [
  13. 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching
  14. '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions
  15. 'age_limit', # Used for --age-limit (evaluated)
  16. '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated)
  17. ]
  18. CLASS_METHODS = [
  19. 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching
  20. 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation
  21. 'description', # Used for --extractor-descriptions
  22. 'is_suitable', # Used for --age-limit
  23. 'supports_login', 'is_single_video', # Accessed in CLI only with instance
  24. ]
  25. IE_TEMPLATE = '''
  26. class {name}({bases}):
  27. _module = {module!r}
  28. '''
  29. MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py')
  30. def main():
  31. os.environ['YTDLP_NO_PLUGINS'] = 'true'
  32. os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true'
  33. lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
  34. import_extractors()
  35. DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
  36. module_src = '\n'.join((
  37. MODULE_TEMPLATE,
  38. ' _module = None',
  39. *extra_ie_code(DummyInfoExtractor),
  40. '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
  41. *build_ies(list(extractors.value.values()), (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
  42. ))
  43. write_file(lazy_extractors_filename, f'{module_src}\n')
  44. def extra_ie_code(ie, base=None):
  45. for var in STATIC_CLASS_PROPERTIES:
  46. val = getattr(ie, var)
  47. if val != (getattr(base, var) if base else NO_ATTR):
  48. yield f' {var} = {val!r}'
  49. yield ''
  50. for name in CLASS_METHODS:
  51. f = getattr(ie, name)
  52. if not base or f.__func__ != getattr(base, name).__func__:
  53. yield getsource(f)
  54. def build_ies(ies, bases, attr_base):
  55. names = []
  56. for ie in sort_ies(ies, bases):
  57. yield build_lazy_ie(ie, ie.__name__, attr_base)
  58. if ie in ies:
  59. names.append(ie.__name__)
  60. yield '\n_CLASS_LOOKUP = {%s}' % ', '.join(f'{name!r}: {name}' for name in names)
  61. def sort_ies(ies, ignored_bases):
  62. """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
  63. classes, returned_classes = ies[:-1], set()
  64. assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
  65. while classes:
  66. for c in classes[:]:
  67. bases = set(c.__bases__) - {object, *ignored_bases}
  68. restart = False
  69. for b in sorted(bases, key=lambda x: x.__name__):
  70. if b not in classes and b not in returned_classes:
  71. assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
  72. classes.insert(0, b)
  73. restart = True
  74. if restart:
  75. break
  76. if bases <= returned_classes:
  77. yield c
  78. returned_classes.add(c)
  79. classes.remove(c)
  80. break
  81. yield ies[-1]
  82. def build_lazy_ie(ie, name, attr_base):
  83. bases = ', '.join({
  84. 'InfoExtractor': 'LazyLoadExtractor',
  85. 'SearchInfoExtractor': 'LazyLoadSearchExtractor',
  86. }.get(base.__name__, base.__name__) for base in ie.__bases__)
  87. s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
  88. return s + '\n'.join(extra_ie_code(ie, attr_base))
  89. if __name__ == '__main__':
  90. main()