twitter.py 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215
  1. import json
  2. import re
  3. import urllib.error
  4. from .common import InfoExtractor
  5. from .periscope import PeriscopeBaseIE, PeriscopeIE
  6. from ..compat import functools # isort: split
  7. from ..compat import (
  8. compat_parse_qs,
  9. compat_urllib_parse_unquote,
  10. compat_urllib_parse_urlparse,
  11. )
  12. from ..utils import (
  13. ExtractorError,
  14. dict_get,
  15. float_or_none,
  16. format_field,
  17. int_or_none,
  18. make_archive_id,
  19. str_or_none,
  20. strip_or_none,
  21. traverse_obj,
  22. try_call,
  23. try_get,
  24. unified_timestamp,
  25. update_url_query,
  26. url_or_none,
  27. xpath_text,
  28. )
  29. class TwitterBaseIE(InfoExtractor):
  30. _API_BASE = 'https://api.twitter.com/1.1/'
  31. _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  32. _TOKENS = {
  33. 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
  34. 'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
  35. }
  36. _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  37. def _extract_variant_formats(self, variant, video_id):
  38. variant_url = variant.get('url')
  39. if not variant_url:
  40. return [], {}
  41. elif '.m3u8' in variant_url:
  42. return self._extract_m3u8_formats_and_subtitles(
  43. variant_url, video_id, 'mp4', 'm3u8_native',
  44. m3u8_id='hls', fatal=False)
  45. else:
  46. tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
  47. f = {
  48. 'url': variant_url,
  49. 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
  50. 'tbr': tbr,
  51. }
  52. self._search_dimensions_in_video_url(f, variant_url)
  53. return [f], {}
  54. def _extract_formats_from_vmap_url(self, vmap_url, video_id):
  55. vmap_url = url_or_none(vmap_url)
  56. if not vmap_url:
  57. return [], {}
  58. vmap_data = self._download_xml(vmap_url, video_id)
  59. formats = []
  60. subtitles = {}
  61. urls = []
  62. for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
  63. video_variant.attrib['url'] = compat_urllib_parse_unquote(
  64. video_variant.attrib['url'])
  65. urls.append(video_variant.attrib['url'])
  66. fmts, subs = self._extract_variant_formats(
  67. video_variant.attrib, video_id)
  68. formats.extend(fmts)
  69. subtitles = self._merge_subtitles(subtitles, subs)
  70. video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
  71. if video_url not in urls:
  72. fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
  73. formats.extend(fmts)
  74. subtitles = self._merge_subtitles(subtitles, subs)
  75. return formats, subtitles
  76. @staticmethod
  77. def _search_dimensions_in_video_url(a_format, video_url):
  78. m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
  79. if m:
  80. a_format.update({
  81. 'width': int(m.group('width')),
  82. 'height': int(m.group('height')),
  83. })
  84. @functools.cached_property
  85. def is_logged_in(self):
  86. return bool(self._get_cookies(self._API_BASE).get('auth_token'))
  87. def _call_api(self, path, video_id, query={}, graphql=False):
  88. cookies = self._get_cookies(self._API_BASE)
  89. headers = {}
  90. csrf_cookie = cookies.get('ct0')
  91. if csrf_cookie:
  92. headers['x-csrf-token'] = csrf_cookie.value
  93. if self.is_logged_in:
  94. headers.update({
  95. 'x-twitter-auth-type': 'OAuth2Session',
  96. 'x-twitter-client-language': 'en',
  97. 'x-twitter-active-user': 'yes',
  98. })
  99. last_error = None
  100. for bearer_token in self._TOKENS:
  101. for first_attempt in (True, False):
  102. headers['Authorization'] = f'Bearer {bearer_token}'
  103. if not self.is_logged_in:
  104. if not self._TOKENS[bearer_token]:
  105. headers.pop('x-guest-token', None)
  106. guest_token_response = self._download_json(
  107. self._API_BASE + 'guest/activate.json', video_id,
  108. 'Downloading guest token', data=b'', headers=headers)
  109. self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
  110. if not self._TOKENS[bearer_token]:
  111. raise ExtractorError('Could not retrieve guest token')
  112. headers['x-guest-token'] = self._TOKENS[bearer_token]
  113. try:
  114. allowed_status = {400, 403, 404} if graphql else {403}
  115. result = self._download_json(
  116. (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
  117. video_id, headers=headers, query=query, expected_status=allowed_status)
  118. except ExtractorError as e:
  119. if last_error:
  120. raise last_error
  121. if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
  122. raise
  123. last_error = e
  124. self.report_warning(
  125. 'Twitter API gave 404 response, retrying with deprecated auth token. '
  126. 'Only one media item can be extracted')
  127. break # continue outer loop with next bearer_token
  128. if result.get('errors'):
  129. errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
  130. if first_attempt and any('bad guest token' in error.lower() for error in errors):
  131. self.to_screen('Guest token has expired. Refreshing guest token')
  132. self._TOKENS[bearer_token] = None
  133. continue
  134. error_message = ', '.join(set(errors)) or 'Unknown error'
  135. raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
  136. return result
  137. def _build_graphql_query(self, media_id):
  138. raise NotImplementedError('Method must be implemented to support GraphQL')
  139. def _call_graphql_api(self, endpoint, media_id):
  140. data = self._build_graphql_query(media_id)
  141. query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
  142. return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
  143. class TwitterCardIE(InfoExtractor):
  144. IE_NAME = 'twitter:card'
  145. _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
  146. _TESTS = [
  147. {
  148. 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
  149. # MD5 checksums are different in different places
  150. 'info_dict': {
  151. 'id': '560070131976392705',
  152. 'ext': 'mp4',
  153. 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
  154. 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
  155. 'uploader': 'Twitter',
  156. 'uploader_id': 'Twitter',
  157. 'thumbnail': r're:^https?://.*\.jpg',
  158. 'duration': 30.033,
  159. 'timestamp': 1422366112,
  160. 'upload_date': '20150127',
  161. 'age_limit': 0,
  162. 'comment_count': int,
  163. 'tags': [],
  164. 'repost_count': int,
  165. 'like_count': int,
  166. 'display_id': '560070183650213889',
  167. 'uploader_url': 'https://twitter.com/Twitter',
  168. },
  169. },
  170. {
  171. 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
  172. 'md5': '7137eca597f72b9abbe61e5ae0161399',
  173. 'info_dict': {
  174. 'id': '623160978427936768',
  175. 'ext': 'mp4',
  176. 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
  177. 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
  178. 'uploader': 'NASA',
  179. 'uploader_id': 'NASA',
  180. 'timestamp': 1437408129,
  181. 'upload_date': '20150720',
  182. 'uploader_url': 'https://twitter.com/NASA',
  183. 'age_limit': 0,
  184. 'comment_count': int,
  185. 'like_count': int,
  186. 'repost_count': int,
  187. 'tags': ['PlutoFlyby'],
  188. },
  189. 'params': {'format': '[protocol=https]'}
  190. },
  191. {
  192. 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
  193. 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
  194. 'info_dict': {
  195. 'id': 'dq4Oj5quskI',
  196. 'ext': 'mp4',
  197. 'title': 'Ubuntu 11.10 Overview',
  198. 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
  199. 'upload_date': '20111013',
  200. 'uploader': 'OMG! UBUNTU!',
  201. 'uploader_id': 'omgubuntu',
  202. 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
  203. 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
  204. 'channel_follower_count': int,
  205. 'chapters': 'count:8',
  206. 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
  207. 'duration': 138,
  208. 'categories': ['Film & Animation'],
  209. 'age_limit': 0,
  210. 'comment_count': int,
  211. 'availability': 'public',
  212. 'like_count': int,
  213. 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
  214. 'view_count': int,
  215. 'tags': 'count:12',
  216. 'channel': 'OMG! UBUNTU!',
  217. 'playable_in_embed': True,
  218. },
  219. 'add_ie': ['Youtube'],
  220. },
  221. {
  222. 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
  223. 'info_dict': {
  224. 'id': 'iBb2x00UVlv',
  225. 'ext': 'mp4',
  226. 'upload_date': '20151113',
  227. 'uploader_id': '1189339351084113920',
  228. 'uploader': 'ArsenalTerje',
  229. 'title': 'Vine by ArsenalTerje',
  230. 'timestamp': 1447451307,
  231. 'alt_title': 'Vine by ArsenalTerje',
  232. 'comment_count': int,
  233. 'like_count': int,
  234. 'thumbnail': r're:^https?://[^?#]+\.jpg',
  235. 'view_count': int,
  236. 'repost_count': int,
  237. },
  238. 'add_ie': ['Vine'],
  239. 'params': {'skip_download': 'm3u8'},
  240. },
  241. {
  242. 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
  243. 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
  244. 'info_dict': {
  245. 'id': '705235433198714880',
  246. 'ext': 'mp4',
  247. 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
  248. 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
  249. 'uploader': 'Brent Yarina',
  250. 'uploader_id': 'BTNBrentYarina',
  251. 'timestamp': 1456976204,
  252. 'upload_date': '20160303',
  253. },
  254. 'skip': 'This content is no longer available.',
  255. },
  256. {
  257. 'url': 'https://twitter.com/i/videos/752274308186120192',
  258. 'only_matching': True,
  259. },
  260. ]
  261. def _real_extract(self, url):
  262. status_id = self._match_id(url)
  263. return self.url_result(
  264. 'https://twitter.com/statuses/' + status_id,
  265. TwitterIE.ie_key(), status_id)
  266. class TwitterIE(TwitterBaseIE):
  267. IE_NAME = 'twitter'
  268. _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
  269. _TESTS = [{
  270. 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
  271. 'info_dict': {
  272. 'id': '643211870443208704',
  273. 'display_id': '643211948184596480',
  274. 'ext': 'mp4',
  275. 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
  276. 'thumbnail': r're:^https?://.*\.jpg',
  277. 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
  278. 'uploader': 'FREE THE NIPPLE',
  279. 'uploader_id': 'freethenipple',
  280. 'duration': 12.922,
  281. 'timestamp': 1442188653,
  282. 'upload_date': '20150913',
  283. 'uploader_url': 'https://twitter.com/freethenipple',
  284. 'comment_count': int,
  285. 'repost_count': int,
  286. 'like_count': int,
  287. 'tags': [],
  288. 'age_limit': 18,
  289. },
  290. }, {
  291. 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
  292. 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
  293. 'info_dict': {
  294. 'id': '657991469417025536',
  295. 'ext': 'mp4',
  296. 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
  297. 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
  298. 'thumbnail': r're:^https?://.*\.png',
  299. 'uploader': 'Gifs',
  300. 'uploader_id': 'giphz',
  301. },
  302. 'expected_warnings': ['height', 'width'],
  303. 'skip': 'Account suspended',
  304. }, {
  305. 'url': 'https://twitter.com/starwars/status/665052190608723968',
  306. 'info_dict': {
  307. 'id': '665052190608723968',
  308. 'display_id': '665052190608723968',
  309. 'ext': 'mp4',
  310. 'title': 'md5:55fef1d5b811944f1550e91b44abb82e',
  311. 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
  312. 'uploader_id': 'starwars',
  313. 'uploader': r're:Star Wars.*',
  314. 'timestamp': 1447395772,
  315. 'upload_date': '20151113',
  316. 'uploader_url': 'https://twitter.com/starwars',
  317. 'comment_count': int,
  318. 'repost_count': int,
  319. 'like_count': int,
  320. 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
  321. 'age_limit': 0,
  322. },
  323. }, {
  324. 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
  325. 'info_dict': {
  326. 'id': '705235433198714880',
  327. 'ext': 'mp4',
  328. 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
  329. 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
  330. 'uploader_id': 'BTNBrentYarina',
  331. 'uploader': 'Brent Yarina',
  332. 'timestamp': 1456976204,
  333. 'upload_date': '20160303',
  334. 'uploader_url': 'https://twitter.com/BTNBrentYarina',
  335. 'comment_count': int,
  336. 'repost_count': int,
  337. 'like_count': int,
  338. 'tags': [],
  339. 'age_limit': 0,
  340. },
  341. 'params': {
  342. # The same video as https://twitter.com/i/videos/tweet/705235433198714880
  343. # Test case of TwitterCardIE
  344. 'skip_download': True,
  345. },
  346. 'skip': 'Dead external link',
  347. }, {
  348. 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
  349. 'info_dict': {
  350. 'id': '700207414000242688',
  351. 'display_id': '700207533655363584',
  352. 'ext': 'mp4',
  353. 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
  354. 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
  355. 'thumbnail': r're:^https?://.*\.jpg',
  356. 'uploader': 'jaydin donte geer',
  357. 'uploader_id': 'jaydingeer',
  358. 'duration': 30.0,
  359. 'timestamp': 1455777459,
  360. 'upload_date': '20160218',
  361. 'uploader_url': 'https://twitter.com/jaydingeer',
  362. 'comment_count': int,
  363. 'repost_count': int,
  364. 'like_count': int,
  365. 'tags': ['Damndaniel'],
  366. 'age_limit': 0,
  367. },
  368. }, {
  369. 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
  370. 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
  371. 'info_dict': {
  372. 'id': 'MIOxnrUteUd',
  373. 'ext': 'mp4',
  374. 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
  375. 'uploader': 'TAKUMA',
  376. 'uploader_id': '1004126642786242560',
  377. 'timestamp': 1402826626,
  378. 'upload_date': '20140615',
  379. 'thumbnail': r're:^https?://.*\.jpg',
  380. 'alt_title': 'Vine by TAKUMA',
  381. 'comment_count': int,
  382. 'repost_count': int,
  383. 'like_count': int,
  384. 'view_count': int,
  385. },
  386. 'add_ie': ['Vine'],
  387. }, {
  388. 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
  389. 'info_dict': {
  390. 'id': '717462543795523584',
  391. 'display_id': '719944021058060289',
  392. 'ext': 'mp4',
  393. 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
  394. 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
  395. 'uploader_id': 'CaptainAmerica',
  396. 'uploader': 'Captain America',
  397. 'duration': 3.17,
  398. 'timestamp': 1460483005,
  399. 'upload_date': '20160412',
  400. 'uploader_url': 'https://twitter.com/CaptainAmerica',
  401. 'thumbnail': r're:^https?://.*\.jpg',
  402. 'comment_count': int,
  403. 'repost_count': int,
  404. 'like_count': int,
  405. 'tags': [],
  406. 'age_limit': 0,
  407. },
  408. }, {
  409. 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
  410. 'info_dict': {
  411. 'id': '1zqKVVlkqLaKB',
  412. 'ext': 'mp4',
  413. 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
  414. 'upload_date': '20160923',
  415. 'uploader_id': '1PmKqpJdOJQoY',
  416. 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
  417. 'timestamp': 1474613214,
  418. 'thumbnail': r're:^https?://.*\.jpg',
  419. },
  420. 'add_ie': ['Periscope'],
  421. }, {
  422. # has mp4 formats via mobile API
  423. 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
  424. 'info_dict': {
  425. 'id': '852138619213144067',
  426. 'ext': 'mp4',
  427. 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
  428. 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
  429. 'uploader': 'عالم الأخبار',
  430. 'uploader_id': 'news_al3alm',
  431. 'duration': 277.4,
  432. 'timestamp': 1492000653,
  433. 'upload_date': '20170412',
  434. },
  435. 'skip': 'Account suspended',
  436. }, {
  437. 'url': 'https://twitter.com/i/web/status/910031516746514432',
  438. 'info_dict': {
  439. 'id': '910030238373089285',
  440. 'display_id': '910031516746514432',
  441. 'ext': 'mp4',
  442. 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
  443. 'thumbnail': r're:^https?://.*\.jpg',
  444. 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
  445. 'uploader': 'Préfet de Guadeloupe',
  446. 'uploader_id': 'Prefet971',
  447. 'duration': 47.48,
  448. 'timestamp': 1505803395,
  449. 'upload_date': '20170919',
  450. 'uploader_url': 'https://twitter.com/Prefet971',
  451. 'comment_count': int,
  452. 'repost_count': int,
  453. 'like_count': int,
  454. 'tags': ['Maria'],
  455. 'age_limit': 0,
  456. },
  457. 'params': {
  458. 'skip_download': True, # requires ffmpeg
  459. },
  460. }, {
  461. # card via api.twitter.com/1.1/videos/tweet/config
  462. 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
  463. 'info_dict': {
  464. 'id': '1001551417340022785',
  465. 'display_id': '1001551623938805763',
  466. 'ext': 'mp4',
  467. 'title': 're:.*?Shep is on a roll today.*?',
  468. 'thumbnail': r're:^https?://.*\.jpg',
  469. 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
  470. 'uploader': 'Lis Power',
  471. 'uploader_id': 'LisPower1',
  472. 'duration': 111.278,
  473. 'timestamp': 1527623489,
  474. 'upload_date': '20180529',
  475. 'uploader_url': 'https://twitter.com/LisPower1',
  476. 'comment_count': int,
  477. 'repost_count': int,
  478. 'like_count': int,
  479. 'tags': [],
  480. 'age_limit': 0,
  481. },
  482. 'params': {
  483. 'skip_download': True, # requires ffmpeg
  484. },
  485. }, {
  486. 'url': 'https://twitter.com/foobar/status/1087791357756956680',
  487. 'info_dict': {
  488. 'id': '1087791272830607360',
  489. 'display_id': '1087791357756956680',
  490. 'ext': 'mp4',
  491. 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
  492. 'thumbnail': r're:^https?://.*\.jpg',
  493. 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
  494. 'uploader': 'Twitter',
  495. 'uploader_id': 'Twitter',
  496. 'duration': 61.567,
  497. 'timestamp': 1548184644,
  498. 'upload_date': '20190122',
  499. 'uploader_url': 'https://twitter.com/Twitter',
  500. 'comment_count': int,
  501. 'repost_count': int,
  502. 'like_count': int,
  503. 'tags': [],
  504. 'age_limit': 0,
  505. },
  506. }, {
  507. # not available in Periscope
  508. 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
  509. 'info_dict': {
  510. 'id': '1vOGwqejwoWxB',
  511. 'ext': 'mp4',
  512. 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
  513. 'uploader': 'Vivi',
  514. 'uploader_id': '1eVjYOLGkGrQL',
  515. 'thumbnail': r're:^https?://.*\.jpg',
  516. 'tags': ['EduTECH2019'],
  517. 'view_count': int,
  518. },
  519. 'add_ie': ['TwitterBroadcast'],
  520. }, {
  521. # unified card
  522. 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
  523. 'info_dict': {
  524. 'id': '1349774757969989634',
  525. 'display_id': '1349794411333394432',
  526. 'ext': 'mp4',
  527. 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
  528. 'thumbnail': r're:^https?://.*\.jpg',
  529. 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
  530. 'uploader': 'Brooklyn Nets',
  531. 'uploader_id': 'BrooklynNets',
  532. 'duration': 324.484,
  533. 'timestamp': 1610651040,
  534. 'upload_date': '20210114',
  535. 'uploader_url': 'https://twitter.com/BrooklynNets',
  536. 'comment_count': int,
  537. 'repost_count': int,
  538. 'like_count': int,
  539. 'tags': [],
  540. 'age_limit': 0,
  541. },
  542. 'params': {
  543. 'skip_download': True,
  544. },
  545. }, {
  546. 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
  547. 'info_dict': {
  548. 'id': '1577855447914409984',
  549. 'display_id': '1577855540407197696',
  550. 'ext': 'mp4',
  551. 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
  552. 'description': 'md5:b9c3699335447391d11753ab21c70a74',
  553. 'upload_date': '20221006',
  554. 'uploader': 'oshtru',
  555. 'uploader_id': 'oshtru',
  556. 'uploader_url': 'https://twitter.com/oshtru',
  557. 'thumbnail': r're:^https?://.*\.jpg',
  558. 'duration': 30.03,
  559. 'timestamp': 1665025050,
  560. 'comment_count': int,
  561. 'repost_count': int,
  562. 'like_count': int,
  563. 'tags': [],
  564. 'age_limit': 0,
  565. },
  566. 'params': {'skip_download': True},
  567. }, {
  568. 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
  569. 'info_dict': {
  570. 'id': '1577719286659006464',
  571. 'title': 'Ultima | #\u0432\u029f\u043c - Test',
  572. 'description': 'Test https://t.co/Y3KEZD7Dad',
  573. 'uploader': 'Ultima | #\u0432\u029f\u043c',
  574. 'uploader_id': 'UltimaShadowX',
  575. 'uploader_url': 'https://twitter.com/UltimaShadowX',
  576. 'upload_date': '20221005',
  577. 'timestamp': 1664992565,
  578. 'comment_count': int,
  579. 'repost_count': int,
  580. 'like_count': int,
  581. 'tags': [],
  582. 'age_limit': 0,
  583. },
  584. 'playlist_count': 4,
  585. 'params': {'skip_download': True},
  586. }, {
  587. 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
  588. 'info_dict': {
  589. 'id': '1575559336759263233',
  590. 'display_id': '1575560063510810624',
  591. 'ext': 'mp4',
  592. 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
  593. 'thumbnail': r're:^https?://.*\.jpg',
  594. 'description': 'md5:95aea692fda36a12081b9629b02daa92',
  595. 'uploader': 'Max Olson',
  596. 'uploader_id': 'MesoMax919',
  597. 'uploader_url': 'https://twitter.com/MesoMax919',
  598. 'duration': 21.321,
  599. 'timestamp': 1664477766,
  600. 'upload_date': '20220929',
  601. 'comment_count': int,
  602. 'repost_count': int,
  603. 'like_count': int,
  604. 'tags': ['HurricaneIan'],
  605. 'age_limit': 0,
  606. },
  607. }, {
  608. # Adult content, uses old token
  609. # Fails if not logged in (GraphQL)
  610. 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
  611. 'info_dict': {
  612. 'id': '1575199163847000068',
  613. 'display_id': '1575199173472927762',
  614. 'ext': 'mp4',
  615. 'title': str,
  616. 'description': str,
  617. 'uploader': str,
  618. 'uploader_id': 'Rizdraws',
  619. 'uploader_url': 'https://twitter.com/Rizdraws',
  620. 'upload_date': '20220928',
  621. 'timestamp': 1664391723,
  622. 'thumbnail': 're:^https?://.*\\.jpg',
  623. 'like_count': int,
  624. 'repost_count': int,
  625. 'comment_count': int,
  626. 'age_limit': 18,
  627. 'tags': []
  628. },
  629. 'expected_warnings': ['404'],
  630. }, {
  631. # Description is missing one https://t.co url (GraphQL)
  632. 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
  633. 'playlist_mincount': 2,
  634. 'info_dict': {
  635. 'id': '1395079556562706435',
  636. 'title': str,
  637. 'tags': [],
  638. 'uploader': str,
  639. 'like_count': int,
  640. 'upload_date': '20210519',
  641. 'age_limit': 0,
  642. 'repost_count': int,
  643. 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
  644. 'uploader_id': 'Srirachachau',
  645. 'comment_count': int,
  646. 'uploader_url': 'https://twitter.com/Srirachachau',
  647. 'timestamp': 1621447860,
  648. },
  649. }, {
  650. # Description is missing one https://t.co url (GraphQL)
  651. 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
  652. 'playlist_mincount': 2,
  653. 'info_dict': {
  654. 'id': '1578353380363501568',
  655. 'title': str,
  656. 'uploader_id': 'DavidToons_',
  657. 'repost_count': int,
  658. 'like_count': int,
  659. 'uploader': str,
  660. 'timestamp': 1665143744,
  661. 'uploader_url': 'https://twitter.com/DavidToons_',
  662. 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
  663. 'tags': [],
  664. 'comment_count': int,
  665. 'upload_date': '20221007',
  666. 'age_limit': 0,
  667. },
  668. }, {
  669. 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
  670. 'playlist_count': 2,
  671. 'info_dict': {
  672. 'id': '1578401165338976258',
  673. 'title': str,
  674. 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
  675. 'uploader': str,
  676. 'uploader_id': 'primevideouk',
  677. 'timestamp': 1665155137,
  678. 'upload_date': '20221007',
  679. 'age_limit': 0,
  680. 'uploader_url': 'https://twitter.com/primevideouk',
  681. 'comment_count': int,
  682. 'repost_count': int,
  683. 'like_count': int,
  684. 'tags': ['TheRingsOfPower'],
  685. },
  686. }, {
  687. # Twitter Spaces
  688. 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
  689. 'info_dict': {
  690. 'id': '1lPJqmBeeNAJb',
  691. 'ext': 'm4a',
  692. 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
  693. 'uploader': r're:Monique Camarra.+?',
  694. 'uploader_id': 'MoniqueCamarra',
  695. 'live_status': 'was_live',
  696. 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
  697. 'timestamp': 1658407771464,
  698. },
  699. 'add_ie': ['TwitterSpaces'],
  700. 'params': {'skip_download': 'm3u8'},
  701. }, {
  702. # onion route
  703. 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
  704. 'only_matching': True,
  705. }, {
  706. # Twitch Clip Embed
  707. 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
  708. 'only_matching': True,
  709. }, {
  710. # promo_video_website card
  711. 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
  712. 'only_matching': True,
  713. }, {
  714. # promo_video_convo card
  715. 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
  716. 'only_matching': True,
  717. }, {
  718. # appplayer card
  719. 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
  720. 'only_matching': True,
  721. }, {
  722. # video_direct_message card
  723. 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
  724. 'only_matching': True,
  725. }, {
  726. # poll2choice_video card
  727. 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
  728. 'only_matching': True,
  729. }, {
  730. # poll3choice_video card
  731. 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
  732. 'only_matching': True,
  733. }, {
  734. # poll4choice_video card
  735. 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
  736. 'only_matching': True,
  737. }]
  738. def _graphql_to_legacy(self, data, twid):
  739. result = traverse_obj(data, (
  740. 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
  741. lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
  742. 'tweet_results', 'result'
  743. ), expected_type=dict, default={}, get_all=False)
  744. if 'tombstone' in result:
  745. cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
  746. raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
  747. status = result.get('legacy', {})
  748. status.update(traverse_obj(result, {
  749. 'user': ('core', 'user_results', 'result', 'legacy'),
  750. 'card': ('card', 'legacy'),
  751. 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
  752. }, expected_type=dict, default={}))
  753. # extra transformation is needed since result does not match legacy format
  754. binding_values = {
  755. binding_value.get('key'): binding_value.get('value')
  756. for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
  757. }
  758. if binding_values:
  759. status['card']['binding_values'] = binding_values
  760. return status
  761. def _build_graphql_query(self, media_id):
  762. return {
  763. 'variables': {
  764. 'focalTweetId': media_id,
  765. 'includePromotedContent': True,
  766. 'with_rux_injections': False,
  767. 'withBirdwatchNotes': True,
  768. 'withCommunity': True,
  769. 'withDownvotePerspective': False,
  770. 'withQuickPromoteEligibilityTweetFields': True,
  771. 'withReactionsMetadata': False,
  772. 'withReactionsPerspective': False,
  773. 'withSuperFollowsTweetFields': True,
  774. 'withSuperFollowsUserFields': True,
  775. 'withV2Timeline': True,
  776. 'withVoice': True,
  777. },
  778. 'features': {
  779. 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
  780. 'interactive_text_enabled': True,
  781. 'responsive_web_edit_tweet_api_enabled': True,
  782. 'responsive_web_enhance_cards_enabled': True,
  783. 'responsive_web_graphql_timeline_navigation_enabled': False,
  784. 'responsive_web_text_conversations_enabled': False,
  785. 'responsive_web_uc_gql_enabled': True,
  786. 'standardized_nudges_misinfo': True,
  787. 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
  788. 'tweetypie_unmention_optimization_enabled': True,
  789. 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
  790. 'verified_phone_label_enabled': False,
  791. 'vibe_api_enabled': True,
  792. },
  793. }
  794. def _real_extract(self, url):
  795. twid = self._match_id(url)
  796. if self.is_logged_in or self._configuration_arg('force_graphql'):
  797. self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
  798. result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
  799. status = self._graphql_to_legacy(result, twid)
  800. else:
  801. status = self._call_api(f'statuses/show/{twid}.json', twid, {
  802. 'cards_platform': 'Web-12',
  803. 'include_cards': 1,
  804. 'include_reply_count': 1,
  805. 'include_user_entities': 0,
  806. 'tweet_mode': 'extended',
  807. })
  808. title = description = status['full_text'].replace('\n', ' ')
  809. # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
  810. title = re.sub(r'\s+(https?://[^ ]+)', '', title)
  811. user = status.get('user') or {}
  812. uploader = user.get('name')
  813. if uploader:
  814. title = f'{uploader} - {title}'
  815. uploader_id = user.get('screen_name')
  816. tags = []
  817. for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
  818. hashtag_text = hashtag.get('text')
  819. if not hashtag_text:
  820. continue
  821. tags.append(hashtag_text)
  822. info = {
  823. 'id': twid,
  824. 'title': title,
  825. 'description': description,
  826. 'uploader': uploader,
  827. 'timestamp': unified_timestamp(status.get('created_at')),
  828. 'uploader_id': uploader_id,
  829. 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
  830. 'like_count': int_or_none(status.get('favorite_count')),
  831. 'repost_count': int_or_none(status.get('retweet_count')),
  832. 'comment_count': int_or_none(status.get('reply_count')),
  833. 'age_limit': 18 if status.get('possibly_sensitive') else 0,
  834. 'tags': tags,
  835. }
  836. def extract_from_video_info(media):
  837. media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
  838. self.write_debug(f'Extracting from video info: {media_id}')
  839. video_info = media.get('video_info') or {}
  840. formats = []
  841. subtitles = {}
  842. for variant in video_info.get('variants', []):
  843. fmts, subs = self._extract_variant_formats(variant, twid)
  844. subtitles = self._merge_subtitles(subtitles, subs)
  845. formats.extend(fmts)
  846. thumbnails = []
  847. media_url = media.get('media_url_https') or media.get('media_url')
  848. if media_url:
  849. def add_thumbnail(name, size):
  850. thumbnails.append({
  851. 'id': name,
  852. 'url': update_url_query(media_url, {'name': name}),
  853. 'width': int_or_none(size.get('w') or size.get('width')),
  854. 'height': int_or_none(size.get('h') or size.get('height')),
  855. })
  856. for name, size in media.get('sizes', {}).items():
  857. add_thumbnail(name, size)
  858. add_thumbnail('orig', media.get('original_info') or {})
  859. return {
  860. 'id': media_id,
  861. 'formats': formats,
  862. 'subtitles': subtitles,
  863. 'thumbnails': thumbnails,
  864. 'duration': float_or_none(video_info.get('duration_millis'), 1000),
  865. # The codec of http formats are unknown
  866. '_format_sort_fields': ('res', 'br', 'size', 'proto'),
  867. }
  868. def extract_from_card_info(card):
  869. if not card:
  870. return
  871. self.write_debug(f'Extracting from card info: {card.get("url")}')
  872. binding_values = card['binding_values']
  873. def get_binding_value(k):
  874. o = binding_values.get(k) or {}
  875. return try_get(o, lambda x: x[x['type'].lower() + '_value'])
  876. card_name = card['name'].split(':')[-1]
  877. if card_name == 'player':
  878. yield {
  879. '_type': 'url',
  880. 'url': get_binding_value('player_url'),
  881. }
  882. elif card_name == 'periscope_broadcast':
  883. yield {
  884. '_type': 'url',
  885. 'url': get_binding_value('url') or get_binding_value('player_url'),
  886. 'ie_key': PeriscopeIE.ie_key(),
  887. }
  888. elif card_name == 'broadcast':
  889. yield {
  890. '_type': 'url',
  891. 'url': get_binding_value('broadcast_url'),
  892. 'ie_key': TwitterBroadcastIE.ie_key(),
  893. }
  894. elif card_name == 'audiospace':
  895. yield {
  896. '_type': 'url',
  897. 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
  898. 'ie_key': TwitterSpacesIE.ie_key(),
  899. }
  900. elif card_name == 'summary':
  901. yield {
  902. '_type': 'url',
  903. 'url': get_binding_value('card_url'),
  904. }
  905. elif card_name == 'unified_card':
  906. unified_card = self._parse_json(get_binding_value('unified_card'), twid)
  907. yield from map(extract_from_video_info, traverse_obj(
  908. unified_card, ('media_entities', ...), expected_type=dict))
  909. # amplify, promo_video_website, promo_video_convo, appplayer,
  910. # video_direct_message, poll2choice_video, poll3choice_video,
  911. # poll4choice_video, ...
  912. else:
  913. is_amplify = card_name == 'amplify'
  914. vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
  915. content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
  916. formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
  917. thumbnails = []
  918. for suffix in ('_small', '', '_large', '_x_large', '_original'):
  919. image = get_binding_value('player_image' + suffix) or {}
  920. image_url = image.get('url')
  921. if not image_url or '/player-placeholder' in image_url:
  922. continue
  923. thumbnails.append({
  924. 'id': suffix[1:] if suffix else 'medium',
  925. 'url': image_url,
  926. 'width': int_or_none(image.get('width')),
  927. 'height': int_or_none(image.get('height')),
  928. })
  929. yield {
  930. 'formats': formats,
  931. 'subtitles': subtitles,
  932. 'thumbnails': thumbnails,
  933. 'duration': int_or_none(get_binding_value(
  934. 'content_duration_seconds')),
  935. }
  936. media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
  937. videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
  938. cards = extract_from_card_info(status.get('card'))
  939. entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
  940. if not entries:
  941. expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
  942. if not expanded_url or expanded_url == url:
  943. raise ExtractorError('No video could be found in this tweet', expected=True)
  944. return self.url_result(expanded_url, display_id=twid, **info)
  945. entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
  946. if len(entries) == 1:
  947. return entries[0]
  948. for index, entry in enumerate(entries, 1):
  949. entry['title'] += f' #{index}'
  950. return self.playlist_result(entries, **info)
  951. class TwitterAmplifyIE(TwitterBaseIE):
  952. IE_NAME = 'twitter:amplify'
  953. _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
  954. _TEST = {
  955. 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
  956. 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
  957. 'info_dict': {
  958. 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
  959. 'ext': 'mp4',
  960. 'title': 'Twitter Video',
  961. 'thumbnail': 're:^https?://.*',
  962. },
  963. 'params': {'format': '[protocol=https]'},
  964. }
  965. def _real_extract(self, url):
  966. video_id = self._match_id(url)
  967. webpage = self._download_webpage(url, video_id)
  968. vmap_url = self._html_search_meta(
  969. 'twitter:amplify:vmap', webpage, 'vmap url')
  970. formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
  971. thumbnails = []
  972. thumbnail = self._html_search_meta(
  973. 'twitter:image:src', webpage, 'thumbnail', fatal=False)
  974. def _find_dimension(target):
  975. w = int_or_none(self._html_search_meta(
  976. 'twitter:%s:width' % target, webpage, fatal=False))
  977. h = int_or_none(self._html_search_meta(
  978. 'twitter:%s:height' % target, webpage, fatal=False))
  979. return w, h
  980. if thumbnail:
  981. thumbnail_w, thumbnail_h = _find_dimension('image')
  982. thumbnails.append({
  983. 'url': thumbnail,
  984. 'width': thumbnail_w,
  985. 'height': thumbnail_h,
  986. })
  987. video_w, video_h = _find_dimension('player')
  988. formats[0].update({
  989. 'width': video_w,
  990. 'height': video_h,
  991. })
  992. return {
  993. 'id': video_id,
  994. 'title': 'Twitter Video',
  995. 'formats': formats,
  996. 'thumbnails': thumbnails,
  997. }
  998. class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
  999. IE_NAME = 'twitter:broadcast'
  1000. _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
  1001. _TEST = {
  1002. # untitled Periscope video
  1003. 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
  1004. 'info_dict': {
  1005. 'id': '1yNGaQLWpejGj',
  1006. 'ext': 'mp4',
  1007. 'title': 'Andrea May Sahouri - Periscope Broadcast',
  1008. 'uploader': 'Andrea May Sahouri',
  1009. 'uploader_id': '1PXEdBZWpGwKe',
  1010. 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
  1011. 'view_count': int,
  1012. },
  1013. }
  1014. def _real_extract(self, url):
  1015. broadcast_id = self._match_id(url)
  1016. broadcast = self._call_api(
  1017. 'broadcasts/show.json', broadcast_id,
  1018. {'ids': broadcast_id})['broadcasts'][broadcast_id]
  1019. info = self._parse_broadcast_data(broadcast, broadcast_id)
  1020. media_key = broadcast['media_key']
  1021. source = self._call_api(
  1022. f'live_video_stream/status/{media_key}', media_key)['source']
  1023. m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
  1024. if '/live_video_stream/geoblocked/' in m3u8_url:
  1025. self.raise_geo_restricted()
  1026. m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
  1027. m3u8_url).query).get('type', [None])[0]
  1028. state, width, height = self._extract_common_format_info(broadcast)
  1029. info['formats'] = self._extract_pscp_m3u8_formats(
  1030. m3u8_url, broadcast_id, m3u8_id, state, width, height)
  1031. return info
  1032. class TwitterSpacesIE(TwitterBaseIE):
  1033. IE_NAME = 'twitter:spaces'
  1034. _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
  1035. _TESTS = [{
  1036. 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
  1037. 'info_dict': {
  1038. 'id': '1RDxlgyvNXzJL',
  1039. 'ext': 'm4a',
  1040. 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
  1041. 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
  1042. 'uploader': r're:Lucio Di Gaetano.*?',
  1043. 'uploader_id': 'luciodigaetano',
  1044. 'live_status': 'was_live',
  1045. 'timestamp': 1659877956397,
  1046. },
  1047. 'params': {'skip_download': 'm3u8'},
  1048. }]
  1049. SPACE_STATUS = {
  1050. 'notstarted': 'is_upcoming',
  1051. 'ended': 'was_live',
  1052. 'running': 'is_live',
  1053. 'timedout': 'post_live',
  1054. }
  1055. def _build_graphql_query(self, space_id):
  1056. return {
  1057. 'variables': {
  1058. 'id': space_id,
  1059. 'isMetatagsQuery': True,
  1060. 'withDownvotePerspective': False,
  1061. 'withReactionsMetadata': False,
  1062. 'withReactionsPerspective': False,
  1063. 'withReplays': True,
  1064. 'withSuperFollowsUserFields': True,
  1065. 'withSuperFollowsTweetFields': True,
  1066. },
  1067. 'features': {
  1068. 'dont_mention_me_view_api_enabled': True,
  1069. 'interactive_text_enabled': True,
  1070. 'responsive_web_edit_tweet_api_enabled': True,
  1071. 'responsive_web_enhance_cards_enabled': True,
  1072. 'responsive_web_uc_gql_enabled': True,
  1073. 'spaces_2022_h2_clipping': True,
  1074. 'spaces_2022_h2_spaces_communities': False,
  1075. 'standardized_nudges_misinfo': True,
  1076. 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
  1077. 'vibe_api_enabled': True,
  1078. },
  1079. }
  1080. def _real_extract(self, url):
  1081. space_id = self._match_id(url)
  1082. space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
  1083. if not space_data:
  1084. raise ExtractorError('Twitter Space not found', expected=True)
  1085. metadata = space_data['metadata']
  1086. live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
  1087. formats = []
  1088. if live_status == 'is_upcoming':
  1089. self.raise_no_formats('Twitter Space not started yet', expected=True)
  1090. elif live_status == 'post_live':
  1091. self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
  1092. else:
  1093. source = self._call_api(
  1094. f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
  1095. # XXX: Native downloader does not work
  1096. formats = self._extract_m3u8_formats(
  1097. traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
  1098. metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
  1099. headers={'Referer': 'https://twitter.com/'})
  1100. for fmt in formats:
  1101. fmt.update({'vcodec': 'none', 'acodec': 'aac'})
  1102. participants = ', '.join(traverse_obj(
  1103. space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
  1104. return {
  1105. 'id': space_id,
  1106. 'title': metadata.get('title'),
  1107. 'description': f'Twitter Space participated by {participants}',
  1108. 'uploader': traverse_obj(
  1109. metadata, ('creator_results', 'result', 'legacy', 'name')),
  1110. 'uploader_id': traverse_obj(
  1111. metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
  1112. 'live_status': live_status,
  1113. 'timestamp': metadata.get('created_at'),
  1114. 'formats': formats,
  1115. }
  1116. class TwitterShortenerIE(TwitterBaseIE):
  1117. IE_NAME = 'twitter:shortener'
  1118. _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
  1119. _BASE_URL = 'https://t.co/'
  1120. def _real_extract(self, url):
  1121. mobj = self._match_valid_url(url)
  1122. eid, id = mobj.group('eid', 'id')
  1123. if eid:
  1124. id = eid
  1125. url = self._BASE_URL + id
  1126. new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
  1127. __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
  1128. if new_url.startswith(__UNSAFE_LINK):
  1129. new_url = new_url.replace(__UNSAFE_LINK, "")
  1130. return self.url_result(new_url)