generic.py 116 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825
  1. import os
  2. import re
  3. import types
  4. import urllib.parse
  5. import xml.etree.ElementTree
  6. from .common import InfoExtractor # isort: split
  7. from .commonprotocols import RtmpIE
  8. from .youtube import YoutubeIE
  9. from ..compat import compat_etree_fromstring
  10. from ..utils import (
  11. KNOWN_EXTENSIONS,
  12. MEDIA_EXTENSIONS,
  13. ExtractorError,
  14. UnsupportedError,
  15. determine_ext,
  16. dict_get,
  17. format_field,
  18. int_or_none,
  19. is_html,
  20. js_to_json,
  21. merge_dicts,
  22. mimetype2ext,
  23. orderedSet,
  24. parse_duration,
  25. parse_resolution,
  26. smuggle_url,
  27. str_or_none,
  28. traverse_obj,
  29. try_call,
  30. unescapeHTML,
  31. unified_timestamp,
  32. unsmuggle_url,
  33. url_or_none,
  34. variadic,
  35. xpath_attr,
  36. xpath_text,
  37. xpath_with_ns,
  38. )
  39. class GenericIE(InfoExtractor):
  40. IE_DESC = 'Generic downloader that works on some sites'
  41. _VALID_URL = r'.*'
  42. IE_NAME = 'generic'
  43. _NETRC_MACHINE = False # Suppress username warning
  44. _TESTS = [
  45. # Direct link to a video
  46. {
  47. 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
  48. 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
  49. 'info_dict': {
  50. 'id': 'trailer',
  51. 'ext': 'mp4',
  52. 'title': 'trailer',
  53. 'upload_date': '20100513',
  54. }
  55. },
  56. # Direct link to media delivered compressed (until Accept-Encoding is *)
  57. {
  58. 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
  59. 'md5': '128c42e68b13950268b648275386fc74',
  60. 'info_dict': {
  61. 'id': 'FictionJunction-Parallel_Hearts',
  62. 'ext': 'flac',
  63. 'title': 'FictionJunction-Parallel_Hearts',
  64. 'upload_date': '20140522',
  65. },
  66. 'expected_warnings': [
  67. 'URL could be a direct video link, returning it as such.'
  68. ],
  69. 'skip': 'URL invalid',
  70. },
  71. # Direct download with broken HEAD
  72. {
  73. 'url': 'http://ai-radio.org:8000/radio.opus',
  74. 'info_dict': {
  75. 'id': 'radio',
  76. 'ext': 'opus',
  77. 'title': 'radio',
  78. },
  79. 'params': {
  80. 'skip_download': True, # infinite live stream
  81. },
  82. 'expected_warnings': [
  83. r'501.*Not Implemented',
  84. r'400.*Bad Request',
  85. ],
  86. },
  87. # Direct link with incorrect MIME type
  88. {
  89. 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
  90. 'md5': '4ccbebe5f36706d85221f204d7eb5913',
  91. 'info_dict': {
  92. 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
  93. 'id': '5_Lennart_Poettering_-_Systemd',
  94. 'ext': 'webm',
  95. 'title': '5_Lennart_Poettering_-_Systemd',
  96. 'upload_date': '20141120',
  97. },
  98. 'expected_warnings': [
  99. 'URL could be a direct video link, returning it as such.'
  100. ]
  101. },
  102. # RSS feed
  103. {
  104. 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
  105. 'info_dict': {
  106. 'id': 'https://phihag.de/2014/youtube-dl/rss2.xml',
  107. 'title': 'Zero Punctuation',
  108. 'description': 're:.*groundbreaking video review series.*'
  109. },
  110. 'playlist_mincount': 11,
  111. },
  112. # RSS feed with enclosure
  113. {
  114. 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
  115. 'info_dict': {
  116. 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
  117. 'title': 'MSNBC Rachel Maddow (video)',
  118. 'description': 're:.*her unique approach to storytelling.*',
  119. },
  120. 'playlist': [{
  121. 'info_dict': {
  122. 'ext': 'mov',
  123. 'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
  124. 'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
  125. 'description': 're:.*her unique approach to storytelling.*',
  126. 'upload_date': '20201204',
  127. },
  128. }],
  129. },
  130. # RSS feed with item with description and thumbnails
  131. {
  132. 'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
  133. 'info_dict': {
  134. 'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
  135. 'title': 're:.*100% Hydrogen.*',
  136. 'description': 're:.*In this episode.*',
  137. },
  138. 'playlist': [{
  139. 'info_dict': {
  140. 'ext': 'm4a',
  141. 'id': 'c1c879525ce2cb640b344507e682c36d',
  142. 'title': 're:Hydrogen!',
  143. 'description': 're:.*In this episode we are going.*',
  144. 'timestamp': 1567977776,
  145. 'upload_date': '20190908',
  146. 'duration': 459,
  147. 'thumbnail': r're:^https?://.*\.jpg$',
  148. 'episode_number': 1,
  149. 'season_number': 1,
  150. 'age_limit': 0,
  151. 'season': 'Season 1',
  152. 'direct': True,
  153. 'episode': 'Episode 1',
  154. },
  155. }],
  156. 'params': {
  157. 'skip_download': True,
  158. },
  159. },
  160. # RSS feed with enclosures and unsupported link URLs
  161. {
  162. 'url': 'http://www.hellointernet.fm/podcast?format=rss',
  163. 'info_dict': {
  164. 'id': 'http://www.hellointernet.fm/podcast?format=rss',
  165. 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
  166. 'title': 'Hello Internet',
  167. },
  168. 'playlist_mincount': 100,
  169. },
  170. # RSS feed with guid
  171. {
  172. 'url': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
  173. 'info_dict': {
  174. 'id': 'https://www.omnycontent.com/d/playlist/a7b4f8fe-59d9-4afc-a79a-a90101378abf/bf2c1d80-3656-4449-9d00-a903004e8f84/efbff746-e7c1-463a-9d80-a903004e8f8f/podcast.rss',
  175. 'description': 'md5:be809a44b63b0c56fb485caf68685520',
  176. 'title': 'The Little Red Podcast',
  177. },
  178. 'playlist_mincount': 76,
  179. },
  180. # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
  181. {
  182. 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
  183. 'info_dict': {
  184. 'id': 'smil',
  185. 'ext': 'mp4',
  186. 'title': 'Automatics, robotics and biocybernetics',
  187. 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
  188. 'upload_date': '20130627',
  189. 'formats': 'mincount:16',
  190. 'subtitles': 'mincount:1',
  191. },
  192. 'params': {
  193. 'force_generic_extractor': True,
  194. 'skip_download': True,
  195. },
  196. },
  197. # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
  198. {
  199. 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
  200. 'info_dict': {
  201. 'id': 'hds',
  202. 'ext': 'flv',
  203. 'title': 'hds',
  204. 'formats': 'mincount:1',
  205. },
  206. 'params': {
  207. 'skip_download': True,
  208. },
  209. },
  210. # SMIL from https://www.restudy.dk/video/play/id/1637
  211. {
  212. 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
  213. 'info_dict': {
  214. 'id': 'video_1637',
  215. 'ext': 'flv',
  216. 'title': 'video_1637',
  217. 'formats': 'mincount:3',
  218. },
  219. 'params': {
  220. 'skip_download': True,
  221. },
  222. },
  223. # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
  224. {
  225. 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
  226. 'info_dict': {
  227. 'id': 'smil-service',
  228. 'ext': 'flv',
  229. 'title': 'smil-service',
  230. 'formats': 'mincount:1',
  231. },
  232. 'params': {
  233. 'skip_download': True,
  234. },
  235. },
  236. # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
  237. {
  238. 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
  239. 'info_dict': {
  240. 'id': '4719370',
  241. 'ext': 'mp4',
  242. 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
  243. 'formats': 'mincount:3',
  244. },
  245. 'params': {
  246. 'skip_download': True,
  247. },
  248. },
  249. # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
  250. {
  251. 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
  252. 'info_dict': {
  253. 'id': 'mZlp2ctYIUEB',
  254. 'ext': 'mp4',
  255. 'title': 'Tikibad ontruimd wegens brand',
  256. 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
  257. 'thumbnail': r're:^https?://.*\.jpg$',
  258. 'duration': 33,
  259. },
  260. 'params': {
  261. 'skip_download': True,
  262. },
  263. },
  264. # MPD from http://dash-mse-test.appspot.com/media.html
  265. {
  266. 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
  267. 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
  268. 'info_dict': {
  269. 'id': 'car-20120827-manifest',
  270. 'ext': 'mp4',
  271. 'title': 'car-20120827-manifest',
  272. 'formats': 'mincount:9',
  273. 'upload_date': '20130904',
  274. },
  275. },
  276. # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
  277. {
  278. 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
  279. 'info_dict': {
  280. 'id': 'content',
  281. 'ext': 'mp4',
  282. 'title': 'content',
  283. 'formats': 'mincount:8',
  284. },
  285. 'params': {
  286. # m3u8 downloads
  287. 'skip_download': True,
  288. },
  289. 'skip': 'video gone',
  290. },
  291. # m3u8 served with Content-Type: text/plain
  292. {
  293. 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
  294. 'info_dict': {
  295. 'id': 'index',
  296. 'ext': 'mp4',
  297. 'title': 'index',
  298. 'upload_date': '20140720',
  299. 'formats': 'mincount:11',
  300. },
  301. 'params': {
  302. # m3u8 downloads
  303. 'skip_download': True,
  304. },
  305. 'skip': 'video gone',
  306. },
  307. # google redirect
  308. {
  309. 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
  310. 'info_dict': {
  311. 'id': 'cmQHVoWB5FY',
  312. 'ext': 'mp4',
  313. 'upload_date': '20130224',
  314. 'uploader_id': 'TheVerge',
  315. 'description': r're:^Chris Ziegler takes a look at the\.*',
  316. 'uploader': 'The Verge',
  317. 'title': 'First Firefox OS phones side-by-side',
  318. },
  319. 'params': {
  320. 'skip_download': False,
  321. }
  322. },
  323. {
  324. # redirect in Refresh HTTP header
  325. 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
  326. 'info_dict': {
  327. 'id': 'pO8h3EaFRdo',
  328. 'ext': 'mp4',
  329. 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
  330. 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
  331. 'upload_date': '20150917',
  332. 'uploader_id': 'brtvofficial',
  333. 'uploader': 'Boiler Room',
  334. },
  335. 'params': {
  336. 'skip_download': False,
  337. },
  338. },
  339. {
  340. 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
  341. 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
  342. 'info_dict': {
  343. 'id': '13601338388002',
  344. 'ext': 'mp4',
  345. 'uploader': 'www.hodiho.fr',
  346. 'title': 'R\u00e9gis plante sa Jeep',
  347. }
  348. },
  349. # bandcamp page with custom domain
  350. {
  351. 'add_ie': ['Bandcamp'],
  352. 'url': 'http://bronyrock.com/track/the-pony-mash',
  353. 'info_dict': {
  354. 'id': '3235767654',
  355. 'ext': 'mp3',
  356. 'title': 'The Pony Mash',
  357. 'uploader': 'M_Pallante',
  358. },
  359. 'skip': 'There is a limit of 200 free downloads / month for the test song',
  360. },
  361. # ooyala video
  362. {
  363. 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
  364. 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
  365. 'info_dict': {
  366. 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
  367. 'ext': 'mp4',
  368. 'title': '2cc213299525360.mov', # that's what we get
  369. 'duration': 238.231,
  370. },
  371. 'add_ie': ['Ooyala'],
  372. },
  373. {
  374. # ooyala video embedded with http://player.ooyala.com/iframe.js
  375. 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
  376. 'info_dict': {
  377. 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
  378. 'ext': 'mp4',
  379. 'title': '"Steve Jobs: Man in the Machine" trailer',
  380. 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
  381. 'duration': 135.427,
  382. },
  383. 'params': {
  384. 'skip_download': True,
  385. },
  386. 'skip': 'movie expired',
  387. },
  388. # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
  389. {
  390. 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
  391. 'info_dict': {
  392. 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
  393. 'ext': 'mp4',
  394. 'title': 'Steampunk Fest Comes to Honesdale',
  395. 'duration': 43.276,
  396. },
  397. 'params': {
  398. 'skip_download': True,
  399. }
  400. },
  401. # embed.ly video
  402. {
  403. 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
  404. 'info_dict': {
  405. 'id': '9ODmcdjQcHQ',
  406. 'ext': 'mp4',
  407. 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
  408. 'upload_date': '20140225',
  409. 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
  410. 'uploader': 'Tested',
  411. 'uploader_id': 'testedcom',
  412. },
  413. # No need to test YoutubeIE here
  414. 'params': {
  415. 'skip_download': True,
  416. },
  417. },
  418. # funnyordie embed
  419. {
  420. 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
  421. 'info_dict': {
  422. 'id': '18e820ec3f',
  423. 'ext': 'mp4',
  424. 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
  425. 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
  426. },
  427. # HEAD requests lead to endless 301, while GET is OK
  428. 'expected_warnings': ['301'],
  429. },
  430. # RUTV embed
  431. {
  432. 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
  433. 'info_dict': {
  434. 'id': '776940',
  435. 'ext': 'mp4',
  436. 'title': 'Охотское море стало целиком российским',
  437. 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
  438. },
  439. 'params': {
  440. # m3u8 download
  441. 'skip_download': True,
  442. },
  443. },
  444. # TVC embed
  445. {
  446. 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
  447. 'info_dict': {
  448. 'id': '55304',
  449. 'ext': 'mp4',
  450. 'title': 'Дошкольное воспитание',
  451. },
  452. },
  453. # SportBox embed
  454. {
  455. 'url': 'http://www.vestifinance.ru/articles/25753',
  456. 'info_dict': {
  457. 'id': '25753',
  458. 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
  459. },
  460. 'playlist': [{
  461. 'info_dict': {
  462. 'id': '370908',
  463. 'title': 'Госзаказ. День 3',
  464. 'ext': 'mp4',
  465. }
  466. }, {
  467. 'info_dict': {
  468. 'id': '370905',
  469. 'title': 'Госзаказ. День 2',
  470. 'ext': 'mp4',
  471. }
  472. }, {
  473. 'info_dict': {
  474. 'id': '370902',
  475. 'title': 'Госзаказ. День 1',
  476. 'ext': 'mp4',
  477. }
  478. }],
  479. 'params': {
  480. # m3u8 download
  481. 'skip_download': True,
  482. },
  483. },
  484. # Myvi.ru embed
  485. {
  486. 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
  487. 'info_dict': {
  488. 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
  489. 'ext': 'mp4',
  490. 'title': 'Ужастики, русский трейлер (2015)',
  491. 'thumbnail': r're:^https?://.*\.jpg$',
  492. 'duration': 153,
  493. }
  494. },
  495. # XHamster embed
  496. {
  497. 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
  498. 'info_dict': {
  499. 'id': 'showthread',
  500. 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
  501. },
  502. 'playlist_mincount': 7,
  503. # This forum does not allow <iframe> syntaxes anymore
  504. # Now HTML tags are displayed as-is
  505. 'skip': 'No videos on this page',
  506. },
  507. # Embedded TED video
  508. {
  509. 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
  510. 'md5': '65fdff94098e4a607385a60c5177c638',
  511. 'info_dict': {
  512. 'id': '1969',
  513. 'ext': 'mp4',
  514. 'title': 'Hidden miracles of the natural world',
  515. 'uploader': 'Louie Schwartzberg',
  516. 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
  517. }
  518. },
  519. # nowvideo embed hidden behind percent encoding
  520. {
  521. 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
  522. 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
  523. 'info_dict': {
  524. 'id': '06e53103ca9aa',
  525. 'ext': 'flv',
  526. 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
  527. 'description': 'No description',
  528. },
  529. },
  530. # arte embed
  531. {
  532. 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
  533. 'md5': '7653032cbb25bf6c80d80f217055fa43',
  534. 'info_dict': {
  535. 'id': '048195-004_PLUS7-F',
  536. 'ext': 'flv',
  537. 'title': 'X:enius',
  538. 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
  539. 'upload_date': '20140320',
  540. },
  541. 'params': {
  542. 'skip_download': 'Requires rtmpdump'
  543. },
  544. 'skip': 'video gone',
  545. },
  546. # francetv embed
  547. {
  548. 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
  549. 'info_dict': {
  550. 'id': 'EV_30231',
  551. 'ext': 'mp4',
  552. 'title': 'Alcaline, le concert avec Calogero',
  553. 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
  554. 'upload_date': '20150226',
  555. 'timestamp': 1424989860,
  556. 'duration': 5400,
  557. },
  558. 'params': {
  559. # m3u8 downloads
  560. 'skip_download': True,
  561. },
  562. 'expected_warnings': [
  563. 'Forbidden'
  564. ]
  565. },
  566. # Condé Nast embed
  567. {
  568. 'url': 'http://www.wired.com/2014/04/honda-asimo/',
  569. 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
  570. 'info_dict': {
  571. 'id': '53501be369702d3275860000',
  572. 'ext': 'mp4',
  573. 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
  574. }
  575. },
  576. # Dailymotion embed
  577. {
  578. 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
  579. 'md5': '441aeeb82eb72c422c7f14ec533999cd',
  580. 'info_dict': {
  581. 'id': 'k2mm4bCdJ6CQ2i7c8o2',
  582. 'ext': 'mp4',
  583. 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
  584. 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
  585. 'uploader': 'Spi0n',
  586. 'uploader_id': 'xgditw',
  587. 'upload_date': '20140425',
  588. 'timestamp': 1398441542,
  589. },
  590. 'add_ie': ['Dailymotion'],
  591. },
  592. # DailyMail embed
  593. {
  594. 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
  595. 'info_dict': {
  596. 'id': '1495629',
  597. 'ext': 'mp4',
  598. 'title': 'Care worker punches elderly dementia patient in head 11 times',
  599. 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
  600. },
  601. 'add_ie': ['DailyMail'],
  602. 'params': {
  603. 'skip_download': True,
  604. },
  605. },
  606. # YouTube embed
  607. {
  608. 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
  609. 'info_dict': {
  610. 'id': 'FXRb4ykk4S0',
  611. 'ext': 'mp4',
  612. 'title': 'The NBL Auction 2014',
  613. 'uploader': 'BADMINTON England',
  614. 'uploader_id': 'BADMINTONEvents',
  615. 'upload_date': '20140603',
  616. 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
  617. },
  618. 'add_ie': ['Youtube'],
  619. 'params': {
  620. 'skip_download': True,
  621. }
  622. },
  623. # MTVServices embed
  624. {
  625. 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
  626. 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
  627. 'info_dict': {
  628. 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
  629. 'ext': 'mp4',
  630. 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
  631. 'description': 'Two valets share their love for movie star Liam Neesons.',
  632. 'timestamp': 1349922600,
  633. 'upload_date': '20121011',
  634. },
  635. },
  636. # YouTube embed via <data-embed-url="">
  637. {
  638. 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
  639. 'info_dict': {
  640. 'id': '4vAffPZIT44',
  641. 'ext': 'mp4',
  642. 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
  643. 'uploader': 'Gameloft',
  644. 'uploader_id': 'gameloft',
  645. 'upload_date': '20140828',
  646. 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
  647. },
  648. 'params': {
  649. 'skip_download': True,
  650. }
  651. },
  652. # Flowplayer
  653. {
  654. 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
  655. 'md5': '9d65602bf31c6e20014319c7d07fba27',
  656. 'info_dict': {
  657. 'id': '5123ea6d5e5a7',
  658. 'ext': 'mp4',
  659. 'age_limit': 18,
  660. 'uploader': 'www.handjobhub.com',
  661. 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
  662. }
  663. },
  664. # MLB embed
  665. {
  666. 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
  667. 'md5': '96f09a37e44da40dd083e12d9a683327',
  668. 'info_dict': {
  669. 'id': '33322633',
  670. 'ext': 'mp4',
  671. 'title': 'Ump changes call to ball',
  672. 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
  673. 'duration': 48,
  674. 'timestamp': 1401537900,
  675. 'upload_date': '20140531',
  676. 'thumbnail': r're:^https?://.*\.jpg$',
  677. },
  678. },
  679. # Wistia standard embed (async)
  680. {
  681. 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
  682. 'info_dict': {
  683. 'id': '807fafadvk',
  684. 'ext': 'mp4',
  685. 'title': 'Drip Brennan Dunn Workshop',
  686. 'description': 'a JV Webinars video from getdrip-1',
  687. 'duration': 4986.95,
  688. 'timestamp': 1463607249,
  689. 'upload_date': '20160518',
  690. },
  691. 'params': {
  692. 'skip_download': True,
  693. },
  694. 'skip': 'webpage 404 not found',
  695. },
  696. # Soundcloud embed
  697. {
  698. 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
  699. 'info_dict': {
  700. 'id': '174391317',
  701. 'ext': 'mp3',
  702. 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
  703. 'uploader': 'Sophos Security',
  704. 'title': 'Chet Chat 171 - Oct 29, 2014',
  705. 'upload_date': '20141029',
  706. }
  707. },
  708. # Soundcloud multiple embeds
  709. {
  710. 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
  711. 'info_dict': {
  712. 'id': '52809',
  713. 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
  714. },
  715. 'playlist_mincount': 7,
  716. },
  717. # TuneIn station embed
  718. {
  719. 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
  720. 'info_dict': {
  721. 'id': '204146',
  722. 'ext': 'mp3',
  723. 'title': 'CNRV',
  724. 'location': 'Paris, France',
  725. 'is_live': True,
  726. },
  727. 'params': {
  728. # Live stream
  729. 'skip_download': True,
  730. },
  731. },
  732. # Livestream embed
  733. {
  734. 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
  735. 'info_dict': {
  736. 'id': '67864563',
  737. 'ext': 'flv',
  738. 'upload_date': '20141112',
  739. 'title': 'Rosetta #CometLanding webcast HL 10',
  740. }
  741. },
  742. # Another Livestream embed, without 'new.' in URL
  743. {
  744. 'url': 'https://www.freespeech.org/',
  745. 'info_dict': {
  746. 'id': '123537347',
  747. 'ext': 'mp4',
  748. 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  749. },
  750. 'params': {
  751. # Live stream
  752. 'skip_download': True,
  753. },
  754. },
  755. # LazyYT
  756. {
  757. 'url': 'https://skiplagged.com/',
  758. 'info_dict': {
  759. 'id': 'skiplagged',
  760. 'title': 'Skiplagged: The smart way to find cheap flights',
  761. },
  762. 'playlist_mincount': 1,
  763. 'add_ie': ['Youtube'],
  764. },
  765. # Cinchcast embed
  766. {
  767. 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
  768. 'info_dict': {
  769. 'id': '7141703',
  770. 'ext': 'mp3',
  771. 'upload_date': '20141126',
  772. 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
  773. }
  774. },
  775. # Cinerama player
  776. {
  777. 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
  778. 'info_dict': {
  779. 'id': '730m_DandD_1901_512k',
  780. 'ext': 'mp4',
  781. 'uploader': 'www.abc.net.au',
  782. 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
  783. }
  784. },
  785. # embedded viddler video
  786. {
  787. 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
  788. 'info_dict': {
  789. 'id': '4d03aad9',
  790. 'ext': 'mp4',
  791. 'uploader': 'deadspin',
  792. 'title': 'WALL-TO-GORTAT',
  793. 'timestamp': 1422285291,
  794. 'upload_date': '20150126',
  795. },
  796. 'add_ie': ['Viddler'],
  797. },
  798. # Libsyn embed
  799. {
  800. 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
  801. 'info_dict': {
  802. 'id': '3377616',
  803. 'ext': 'mp3',
  804. 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
  805. 'description': 'md5:601cb790edd05908957dae8aaa866465',
  806. 'upload_date': '20150220',
  807. },
  808. 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
  809. },
  810. # jwplayer YouTube
  811. {
  812. 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
  813. 'info_dict': {
  814. 'id': 'Mrj4DVp2zeA',
  815. 'ext': 'mp4',
  816. 'upload_date': '20150212',
  817. 'uploader': 'The National Archives UK',
  818. 'description': 'md5:8078af856dca76edc42910b61273dbbf',
  819. 'uploader_id': 'NationalArchives08',
  820. 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
  821. },
  822. },
  823. # jwplayer rtmp
  824. {
  825. 'url': 'http://www.suffolk.edu/sjc/live.php',
  826. 'info_dict': {
  827. 'id': 'live',
  828. 'ext': 'flv',
  829. 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
  830. 'uploader': 'www.suffolk.edu',
  831. },
  832. 'params': {
  833. 'skip_download': True,
  834. },
  835. 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
  836. },
  837. # jwplayer with only the json URL
  838. {
  839. 'url': 'https://www.hollywoodreporter.com/news/general-news/dunkirk-team-reveals-what-christopher-nolan-said-oscar-win-meet-your-oscar-winner-1092454',
  840. 'info_dict': {
  841. 'id': 'TljWkvWH',
  842. 'ext': 'mp4',
  843. 'upload_date': '20180306',
  844. 'title': 'md5:91eb1862f6526415214f62c00b453936',
  845. 'description': 'md5:73048ae50ae953da10549d1d2fe9b3aa',
  846. 'timestamp': 1520367225,
  847. },
  848. 'params': {
  849. 'skip_download': True,
  850. },
  851. },
  852. # Complex jwplayer
  853. {
  854. 'url': 'http://www.indiedb.com/games/king-machine/videos',
  855. 'info_dict': {
  856. 'id': 'videos',
  857. 'ext': 'mp4',
  858. 'title': 'king machine trailer 1',
  859. 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
  860. 'thumbnail': r're:^https?://.*\.jpg$',
  861. },
  862. },
  863. {
  864. # JWPlayer config passed as variable
  865. 'url': 'http://www.txxx.com/videos/3326530/ariele/',
  866. 'info_dict': {
  867. 'id': '3326530_hq',
  868. 'ext': 'mp4',
  869. 'title': 'ARIELE | Tube Cup',
  870. 'uploader': 'www.txxx.com',
  871. 'age_limit': 18,
  872. },
  873. 'params': {
  874. 'skip_download': True,
  875. }
  876. },
  877. {
  878. # Video.js embed, multiple formats
  879. 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
  880. 'info_dict': {
  881. 'id': 'yygqldloqIk',
  882. 'ext': 'mp4',
  883. 'title': 'SolidWorks. Урок 6 Настройка чертежа',
  884. 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
  885. 'upload_date': '20130314',
  886. 'uploader': 'PROстое3D',
  887. 'uploader_id': 'PROstoe3D',
  888. },
  889. 'params': {
  890. 'skip_download': True,
  891. },
  892. },
  893. {
  894. # Video.js embed, single format
  895. 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
  896. 'info_dict': {
  897. 'id': 'watch',
  898. 'ext': 'mp4',
  899. 'title': 'Step 1 - Good Foundation',
  900. 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
  901. },
  902. 'params': {
  903. 'skip_download': True,
  904. },
  905. },
  906. # rtl.nl embed
  907. {
  908. 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
  909. 'playlist_mincount': 5,
  910. 'info_dict': {
  911. 'id': 'aanslagen-kopenhagen',
  912. 'title': 'Aanslagen Kopenhagen',
  913. }
  914. },
  915. # Zapiks embed
  916. {
  917. 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
  918. 'info_dict': {
  919. 'id': '118046',
  920. 'ext': 'mp4',
  921. 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
  922. }
  923. },
  924. # Kaltura embed (different embed code)
  925. {
  926. 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
  927. 'info_dict': {
  928. 'id': '1_a52wc67y',
  929. 'ext': 'flv',
  930. 'upload_date': '20150127',
  931. 'uploader_id': 'PremierMedia',
  932. 'timestamp': int,
  933. 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
  934. },
  935. },
  936. # Kaltura embed with single quotes
  937. {
  938. 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
  939. 'info_dict': {
  940. 'id': '0_izeg5utt',
  941. 'ext': 'mp4',
  942. 'title': '35871',
  943. 'timestamp': 1355743100,
  944. 'upload_date': '20121217',
  945. 'uploader_id': 'cplapp@learn360.com',
  946. },
  947. 'add_ie': ['Kaltura'],
  948. },
  949. {
  950. # Kaltura embedded via quoted entry_id
  951. 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
  952. 'info_dict': {
  953. 'id': '0_utuok90b',
  954. 'ext': 'mp4',
  955. 'title': '06_matthew_brender_raj_dutt',
  956. 'timestamp': 1466638791,
  957. 'upload_date': '20160622',
  958. },
  959. 'add_ie': ['Kaltura'],
  960. 'expected_warnings': [
  961. 'Could not send HEAD request'
  962. ],
  963. 'params': {
  964. 'skip_download': True,
  965. }
  966. },
  967. {
  968. # Kaltura embedded, some fileExt broken (#11480)
  969. 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
  970. 'info_dict': {
  971. 'id': '1_sgtvehim',
  972. 'ext': 'mp4',
  973. 'title': 'Our "Standard Models" of particle physics and cosmology',
  974. 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
  975. 'timestamp': 1321158993,
  976. 'upload_date': '20111113',
  977. 'uploader_id': 'kps1',
  978. },
  979. 'add_ie': ['Kaltura'],
  980. },
  981. {
  982. # Kaltura iframe embed
  983. 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
  984. 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
  985. 'info_dict': {
  986. 'id': '0_f2cfbpwy',
  987. 'ext': 'mp4',
  988. 'title': 'I. M. Pei: A Centennial Celebration',
  989. 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
  990. 'upload_date': '20170403',
  991. 'uploader_id': 'batchUser',
  992. 'timestamp': 1491232186,
  993. },
  994. 'add_ie': ['Kaltura'],
  995. },
  996. {
  997. # Kaltura iframe embed, more sophisticated
  998. 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
  999. 'info_dict': {
  1000. 'id': '1_9gzouybz',
  1001. 'ext': 'mp4',
  1002. 'title': 'lecture-05sep2017',
  1003. 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
  1004. 'upload_date': '20170913',
  1005. 'uploader_id': 'eps2',
  1006. 'timestamp': 1505340777,
  1007. },
  1008. 'params': {
  1009. 'skip_download': True,
  1010. },
  1011. 'add_ie': ['Kaltura'],
  1012. },
  1013. {
  1014. # meta twitter:player
  1015. 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
  1016. 'info_dict': {
  1017. 'id': '0_01b42zps',
  1018. 'ext': 'mp4',
  1019. 'title': 'Main Twerk (Video)',
  1020. 'upload_date': '20171208',
  1021. 'uploader_id': 'sebastian.salinas@thechive.com',
  1022. 'timestamp': 1512713057,
  1023. },
  1024. 'params': {
  1025. 'skip_download': True,
  1026. },
  1027. 'add_ie': ['Kaltura'],
  1028. },
  1029. # referrer protected EaglePlatform embed
  1030. {
  1031. 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
  1032. 'info_dict': {
  1033. 'id': '582306',
  1034. 'ext': 'mp4',
  1035. 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
  1036. 'thumbnail': r're:^https?://.*\.jpg$',
  1037. 'duration': 3382,
  1038. 'view_count': int,
  1039. },
  1040. 'params': {
  1041. 'skip_download': True,
  1042. },
  1043. },
  1044. # ClipYou (EaglePlatform) embed (custom URL)
  1045. {
  1046. 'url': 'http://muz-tv.ru/play/7129/',
  1047. # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
  1048. 'info_dict': {
  1049. 'id': '12820',
  1050. 'ext': 'mp4',
  1051. 'title': "'O Sole Mio",
  1052. 'thumbnail': r're:^https?://.*\.jpg$',
  1053. 'duration': 216,
  1054. 'view_count': int,
  1055. },
  1056. 'params': {
  1057. 'skip_download': True,
  1058. },
  1059. 'skip': 'This video is unavailable.',
  1060. },
  1061. # Pladform embed
  1062. {
  1063. 'url': 'http://muz-tv.ru/kinozal/view/7400/',
  1064. 'info_dict': {
  1065. 'id': '100183293',
  1066. 'ext': 'mp4',
  1067. 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
  1068. 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
  1069. 'thumbnail': r're:^https?://.*\.jpg$',
  1070. 'duration': 694,
  1071. 'age_limit': 0,
  1072. },
  1073. 'skip': 'HTTP Error 404: Not Found',
  1074. },
  1075. # Playwire embed
  1076. {
  1077. 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
  1078. 'info_dict': {
  1079. 'id': '3519514',
  1080. 'ext': 'mp4',
  1081. 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
  1082. 'thumbnail': r're:^https?://.*\.png$',
  1083. 'duration': 45.115,
  1084. },
  1085. },
  1086. # Crooks and Liars embed
  1087. {
  1088. 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
  1089. 'info_dict': {
  1090. 'id': '8RUoRhRi',
  1091. 'ext': 'mp4',
  1092. 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
  1093. 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
  1094. 'timestamp': 1428207000,
  1095. 'upload_date': '20150405',
  1096. 'uploader': 'Heather',
  1097. },
  1098. },
  1099. # Crooks and Liars external embed
  1100. {
  1101. 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
  1102. 'info_dict': {
  1103. 'id': 'MTE3MjUtMzQ2MzA',
  1104. 'ext': 'mp4',
  1105. 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
  1106. 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
  1107. 'timestamp': 1265032391,
  1108. 'upload_date': '20100201',
  1109. 'uploader': 'Heather',
  1110. },
  1111. },
  1112. # NBC Sports vplayer embed
  1113. {
  1114. 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
  1115. 'info_dict': {
  1116. 'id': 'ln7x1qSThw4k',
  1117. 'ext': 'flv',
  1118. 'title': "PFT Live: New leader in the 'new-look' defense",
  1119. 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
  1120. 'uploader': 'NBCU-SPORTS',
  1121. 'upload_date': '20140107',
  1122. 'timestamp': 1389118457,
  1123. },
  1124. 'skip': 'Invalid Page URL',
  1125. },
  1126. # NBC News embed
  1127. {
  1128. 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
  1129. 'md5': '1aa589c675898ae6d37a17913cf68d66',
  1130. 'info_dict': {
  1131. 'id': 'x_dtl_oa_LettermanliftPR_160608',
  1132. 'ext': 'mp4',
  1133. 'title': 'David Letterman: A Preview',
  1134. 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
  1135. 'upload_date': '20160609',
  1136. 'timestamp': 1465431544,
  1137. 'uploader': 'NBCU-NEWS',
  1138. },
  1139. },
  1140. # UDN embed
  1141. {
  1142. 'url': 'https://video.udn.com/news/300346',
  1143. 'md5': 'fd2060e988c326991037b9aff9df21a6',
  1144. 'info_dict': {
  1145. 'id': '300346',
  1146. 'ext': 'mp4',
  1147. 'title': '中一中男師變性 全校師生力挺',
  1148. 'thumbnail': r're:^https?://.*\.jpg$',
  1149. },
  1150. 'params': {
  1151. # m3u8 download
  1152. 'skip_download': True,
  1153. },
  1154. 'expected_warnings': ['Failed to parse JSON Expecting value'],
  1155. },
  1156. # Kinja embed
  1157. {
  1158. 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
  1159. 'info_dict': {
  1160. 'id': '106351',
  1161. 'ext': 'mp4',
  1162. 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
  1163. 'description': 'Migrated from OnionStudios',
  1164. 'thumbnail': r're:^https?://.*\.jpe?g$',
  1165. 'uploader': 'clickhole',
  1166. 'upload_date': '20150527',
  1167. 'timestamp': 1432744860,
  1168. }
  1169. },
  1170. # SnagFilms embed
  1171. {
  1172. 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
  1173. 'info_dict': {
  1174. 'id': '74849a00-85a9-11e1-9660-123139220831',
  1175. 'ext': 'mp4',
  1176. 'title': '#whilewewatch',
  1177. }
  1178. },
  1179. # AdobeTVVideo embed
  1180. {
  1181. 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
  1182. 'md5': '43662b577c018ad707a63766462b1e87',
  1183. 'info_dict': {
  1184. 'id': '2456',
  1185. 'ext': 'mp4',
  1186. 'title': 'New experience with Acrobat DC',
  1187. 'description': 'New experience with Acrobat DC',
  1188. 'duration': 248.667,
  1189. },
  1190. },
  1191. # Another form of arte.tv embed
  1192. {
  1193. 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
  1194. 'md5': '850bfe45417ddf221288c88a0cffe2e2',
  1195. 'info_dict': {
  1196. 'id': '030273-562_PLUS7-F',
  1197. 'ext': 'mp4',
  1198. 'title': 'ARTE Reportage - Nulle part, en France',
  1199. 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
  1200. 'upload_date': '20160409',
  1201. },
  1202. },
  1203. # Duplicated embedded video URLs
  1204. {
  1205. 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
  1206. 'info_dict': {
  1207. 'id': '149298443_480_16c25b74_2',
  1208. 'ext': 'mp4',
  1209. 'title': 'vs. Blue Orange Spring Game',
  1210. 'uploader': 'www.hudl.com',
  1211. },
  1212. },
  1213. # twitter:player:stream embed
  1214. {
  1215. 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
  1216. 'info_dict': {
  1217. 'id': 'master',
  1218. 'ext': 'mp4',
  1219. 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
  1220. 'uploader': 'www.rtl.be',
  1221. },
  1222. 'params': {
  1223. # m3u8 downloads
  1224. 'skip_download': True,
  1225. },
  1226. },
  1227. # twitter:player embed
  1228. {
  1229. 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
  1230. 'md5': 'a3e0df96369831de324f0778e126653c',
  1231. 'info_dict': {
  1232. 'id': '4909620399001',
  1233. 'ext': 'mp4',
  1234. 'title': 'What Do Black Holes Sound Like?',
  1235. 'description': 'what do black holes sound like',
  1236. 'upload_date': '20160524',
  1237. 'uploader_id': '29913724001',
  1238. 'timestamp': 1464107587,
  1239. 'uploader': 'TheAtlantic',
  1240. },
  1241. 'skip': 'Private Youtube video',
  1242. },
  1243. # Facebook <iframe> embed
  1244. {
  1245. 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
  1246. 'md5': 'fbcde74f534176ecb015849146dd3aee',
  1247. 'info_dict': {
  1248. 'id': '599637780109885',
  1249. 'ext': 'mp4',
  1250. 'title': 'Facebook video #599637780109885',
  1251. },
  1252. },
  1253. # Facebook <iframe> embed, plugin video
  1254. {
  1255. 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
  1256. 'info_dict': {
  1257. 'id': '1754168231264132',
  1258. 'ext': 'mp4',
  1259. 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
  1260. 'uploader': 'Tariq Ramadan (official)',
  1261. 'timestamp': 1496758379,
  1262. 'upload_date': '20170606',
  1263. },
  1264. 'params': {
  1265. 'skip_download': True,
  1266. },
  1267. },
  1268. # Facebook API embed
  1269. {
  1270. 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
  1271. 'md5': 'a47372ee61b39a7b90287094d447d94e',
  1272. 'info_dict': {
  1273. 'id': '10153467542406923',
  1274. 'ext': 'mp4',
  1275. 'title': 'Facebook video #10153467542406923',
  1276. },
  1277. },
  1278. # Wordpress "YouTube Video Importer" plugin
  1279. {
  1280. 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
  1281. 'md5': 'd16797741b560b485194eddda8121b48',
  1282. 'info_dict': {
  1283. 'id': 'HNTXWDXV9Is',
  1284. 'ext': 'mp4',
  1285. 'title': 'Blue Devils Drumline Stanford lot 2016',
  1286. 'upload_date': '20160627',
  1287. 'uploader_id': 'GENOCIDE8GENERAL10',
  1288. 'uploader': 'cylus cyrus',
  1289. },
  1290. },
  1291. {
  1292. # video stored on custom kaltura server
  1293. 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
  1294. 'md5': '537617d06e64dfed891fa1593c4b30cc',
  1295. 'info_dict': {
  1296. 'id': '0_1iotm5bh',
  1297. 'ext': 'mp4',
  1298. 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
  1299. 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
  1300. 'uploader_id': 'videos.expansion@el-mundo.net',
  1301. 'upload_date': '20150429',
  1302. 'timestamp': 1430303472,
  1303. },
  1304. 'add_ie': ['Kaltura'],
  1305. },
  1306. {
  1307. # multiple kaltura embeds, nsfw
  1308. 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
  1309. 'info_dict': {
  1310. 'id': 'kamila-avec-video-jaime-sadomie',
  1311. 'title': "Kamila avec vídeo “J'aime sadomie”",
  1312. },
  1313. 'playlist_count': 8,
  1314. },
  1315. {
  1316. # Non-standard Vimeo embed
  1317. 'url': 'https://openclassrooms.com/courses/understanding-the-web',
  1318. 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
  1319. 'info_dict': {
  1320. 'id': '148867247',
  1321. 'ext': 'mp4',
  1322. 'title': 'Understanding the web - Teaser',
  1323. 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
  1324. 'upload_date': '20151214',
  1325. 'uploader': 'OpenClassrooms',
  1326. 'uploader_id': 'openclassrooms',
  1327. },
  1328. 'add_ie': ['Vimeo'],
  1329. },
  1330. {
  1331. # generic vimeo embed that requires original URL passed as Referer
  1332. 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
  1333. 'only_matching': True,
  1334. },
  1335. {
  1336. 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
  1337. 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
  1338. 'info_dict': {
  1339. 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
  1340. 'ext': 'mp4',
  1341. 'title': 'Big Buck Bunny',
  1342. 'description': 'Royalty free test video',
  1343. 'timestamp': 1432816365,
  1344. 'upload_date': '20150528',
  1345. 'is_live': False,
  1346. },
  1347. 'params': {
  1348. 'skip_download': True,
  1349. },
  1350. 'add_ie': ['Arkena'],
  1351. },
  1352. {
  1353. 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
  1354. 'info_dict': {
  1355. 'id': '1c7141f46c',
  1356. 'ext': 'mp4',
  1357. 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
  1358. },
  1359. 'params': {
  1360. 'skip_download': True,
  1361. },
  1362. 'add_ie': ['Vbox7'],
  1363. },
  1364. {
  1365. # DBTV embeds
  1366. 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
  1367. 'info_dict': {
  1368. 'id': '43254897',
  1369. 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
  1370. },
  1371. 'playlist_mincount': 3,
  1372. },
  1373. {
  1374. # Videa embeds
  1375. 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
  1376. 'info_dict': {
  1377. 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
  1378. 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
  1379. },
  1380. 'playlist_mincount': 2,
  1381. },
  1382. {
  1383. # 20 minuten embed
  1384. 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
  1385. 'info_dict': {
  1386. 'id': '523629',
  1387. 'ext': 'mp4',
  1388. 'title': 'So kommen Sie bei Eis und Schnee sicher an',
  1389. 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
  1390. },
  1391. 'params': {
  1392. 'skip_download': True,
  1393. },
  1394. 'add_ie': ['TwentyMinuten'],
  1395. },
  1396. {
  1397. # VideoPress embed
  1398. 'url': 'https://en.support.wordpress.com/videopress/',
  1399. 'info_dict': {
  1400. 'id': 'OcobLTqC',
  1401. 'ext': 'm4v',
  1402. 'title': 'IMG_5786',
  1403. 'timestamp': 1435711927,
  1404. 'upload_date': '20150701',
  1405. },
  1406. 'params': {
  1407. 'skip_download': True,
  1408. },
  1409. 'add_ie': ['VideoPress'],
  1410. },
  1411. {
  1412. # Rutube embed
  1413. 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
  1414. 'info_dict': {
  1415. 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
  1416. 'ext': 'flv',
  1417. 'title': 'Магаззино: Казань 2',
  1418. 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
  1419. 'uploader': 'Магаззино',
  1420. 'upload_date': '20170228',
  1421. 'uploader_id': '996642',
  1422. },
  1423. 'params': {
  1424. 'skip_download': True,
  1425. },
  1426. 'add_ie': ['Rutube'],
  1427. },
  1428. {
  1429. # glomex:embed
  1430. 'url': 'https://www.skai.gr/news/world/iatrikos-syllogos-tourkias-to-turkovac-aplo-dialyma-erntogan-eiste-apateones-kai-pseytes',
  1431. 'info_dict': {
  1432. 'id': 'v-ch2nkhcirwc9-sf',
  1433. 'ext': 'mp4',
  1434. 'title': 'md5:786e1e24e06c55993cee965ef853a0c1',
  1435. 'description': 'md5:8b517a61d577efe7e36fde72fd535995',
  1436. 'timestamp': 1641885019,
  1437. 'upload_date': '20220111',
  1438. 'duration': 460000,
  1439. 'thumbnail': 'https://i3thumbs.glomex.com/dC1idjJwdndiMjRzeGwvMjAyMi8wMS8xMS8wNy8xMF8zNV82MWRkMmQ2YmU5ZTgyLmpwZw==/profile:player-960x540',
  1440. },
  1441. },
  1442. {
  1443. # megatvcom:embed
  1444. 'url': 'https://www.in.gr/2021/12/18/greece/apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize/',
  1445. 'info_dict': {
  1446. 'id': 'apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize',
  1447. 'title': 'md5:5e569cf996ec111057c2764ec272848f',
  1448. },
  1449. 'playlist': [{
  1450. 'md5': '1afa26064ff00ccb91617957dbc73dc1',
  1451. 'info_dict': {
  1452. 'ext': 'mp4',
  1453. 'id': '564916',
  1454. 'display_id': 'md5:6cdf22d3a2e7bacb274b7295089a1770',
  1455. 'title': 'md5:33b9dd39584685b62873043670eb52a6',
  1456. 'description': 'md5:c1db7310f390518ac36dd69d947ef1a1',
  1457. 'timestamp': 1639753145,
  1458. 'upload_date': '20211217',
  1459. 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/prezerakos-1024x597.jpg',
  1460. },
  1461. }, {
  1462. 'md5': '4a1c220695f1ef865a8b7966a53e2474',
  1463. 'info_dict': {
  1464. 'ext': 'mp4',
  1465. 'id': '564905',
  1466. 'display_id': 'md5:ead15695e485e649aed2b81ebd699b88',
  1467. 'title': 'md5:2b71fd54249a3ca34609fe39ae31c47b',
  1468. 'description': 'md5:c42e12f638d0a97d6de4508e2c4df982',
  1469. 'timestamp': 1639753047,
  1470. 'upload_date': '20211217',
  1471. 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg',
  1472. },
  1473. }]
  1474. },
  1475. {
  1476. 'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
  1477. 'info_dict': {
  1478. 'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
  1479. 'ext': 'mp4',
  1480. 'title': 'md5:df64f5b61c06d0e9556c0cdd5cf14464',
  1481. 'thumbnail': 'https://www.ert.gr/themata/photos/2021/20220114-apotis6-gouales-pita.jpg',
  1482. },
  1483. },
  1484. {
  1485. # ThePlatform embedded with whitespaces in URLs
  1486. 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
  1487. 'only_matching': True,
  1488. },
  1489. {
  1490. # Senate ISVP iframe https
  1491. 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
  1492. 'md5': 'fb8c70b0b515e5037981a2492099aab8',
  1493. 'info_dict': {
  1494. 'id': 'govtaff020316',
  1495. 'ext': 'mp4',
  1496. 'title': 'Integrated Senate Video Player',
  1497. },
  1498. 'add_ie': ['SenateISVP'],
  1499. },
  1500. {
  1501. # Limelight embeds (1 channel embed + 4 media embeds)
  1502. 'url': 'http://www.sedona.com/FacilitatorTraining2017',
  1503. 'info_dict': {
  1504. 'id': 'FacilitatorTraining2017',
  1505. 'title': 'Facilitator Training 2017',
  1506. },
  1507. 'playlist_mincount': 5,
  1508. },
  1509. {
  1510. # Limelight embed (LimelightPlayerUtil.embed)
  1511. 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
  1512. 'info_dict': {
  1513. 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
  1514. 'ext': 'mp4',
  1515. 'title': '07448641',
  1516. 'timestamp': 1499890639,
  1517. 'upload_date': '20170712',
  1518. },
  1519. 'params': {
  1520. 'skip_download': True,
  1521. },
  1522. 'add_ie': ['LimelightMedia'],
  1523. },
  1524. {
  1525. 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
  1526. 'info_dict': {
  1527. 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
  1528. 'title': 'Standoff with Walnut Creek murder suspect ends',
  1529. 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
  1530. },
  1531. 'playlist_mincount': 4,
  1532. },
  1533. {
  1534. # WashingtonPost embed
  1535. 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
  1536. 'info_dict': {
  1537. 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
  1538. 'ext': 'mp4',
  1539. 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
  1540. 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
  1541. 'timestamp': 1455216756,
  1542. 'uploader': 'The Washington Post',
  1543. 'upload_date': '20160211',
  1544. },
  1545. 'add_ie': ['WashingtonPost'],
  1546. },
  1547. {
  1548. # Mediaset embed
  1549. 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
  1550. 'info_dict': {
  1551. 'id': '720642',
  1552. 'ext': 'mp4',
  1553. 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
  1554. },
  1555. 'params': {
  1556. 'skip_download': True,
  1557. },
  1558. 'add_ie': ['Mediaset'],
  1559. },
  1560. {
  1561. # JOJ.sk embeds
  1562. 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
  1563. 'info_dict': {
  1564. 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
  1565. 'title': 'Slovenskom sa prehnala vlna silných búrok',
  1566. },
  1567. 'playlist_mincount': 5,
  1568. 'add_ie': ['Joj'],
  1569. },
  1570. {
  1571. # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
  1572. 'url': 'https://tvrain.ru/amp/418921/',
  1573. 'md5': 'cc00413936695987e8de148b67d14f1d',
  1574. 'info_dict': {
  1575. 'id': '418921',
  1576. 'ext': 'mp4',
  1577. 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
  1578. },
  1579. },
  1580. {
  1581. # vzaar embed
  1582. 'url': 'http://help.vzaar.com/article/165-embedding-video',
  1583. 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
  1584. 'info_dict': {
  1585. 'id': '8707641',
  1586. 'ext': 'mp4',
  1587. 'title': 'Building A Business Online: Principal Chairs Q & A',
  1588. },
  1589. },
  1590. {
  1591. # multiple HTML5 videos on one page
  1592. 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
  1593. 'info_dict': {
  1594. 'id': 'keyscenarios',
  1595. 'title': 'Rescue Kit 14 Free Edition - Getting started',
  1596. },
  1597. 'playlist_count': 4,
  1598. },
  1599. {
  1600. # vshare embed
  1601. 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
  1602. 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
  1603. 'info_dict': {
  1604. 'id': '0f64ce6',
  1605. 'title': 'vl14062007715967',
  1606. 'ext': 'mp4',
  1607. }
  1608. },
  1609. {
  1610. 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
  1611. 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
  1612. 'info_dict': {
  1613. 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
  1614. 'ext': 'mp4',
  1615. 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
  1616. 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
  1617. 'timestamp': 1474354800,
  1618. 'upload_date': '20160920',
  1619. }
  1620. },
  1621. {
  1622. 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
  1623. 'info_dict': {
  1624. 'id': '1731611',
  1625. 'ext': 'mp4',
  1626. 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
  1627. 'description': 'md5:eb5f23826a027ba95277d105f248b825',
  1628. 'timestamp': 1516100691,
  1629. 'upload_date': '20180116',
  1630. },
  1631. 'params': {
  1632. 'skip_download': True,
  1633. },
  1634. 'add_ie': ['SpringboardPlatform'],
  1635. },
  1636. {
  1637. 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
  1638. 'info_dict': {
  1639. 'id': 'vMDE4NzI1Mjgt690b',
  1640. 'ext': 'mp4',
  1641. 'title': 'Котята',
  1642. },
  1643. 'add_ie': ['YapFiles'],
  1644. 'params': {
  1645. 'skip_download': True,
  1646. },
  1647. },
  1648. {
  1649. # CloudflareStream embed
  1650. 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
  1651. 'info_dict': {
  1652. 'id': '31c9291ab41fac05471db4e73aa11717',
  1653. 'ext': 'mp4',
  1654. 'title': '31c9291ab41fac05471db4e73aa11717',
  1655. },
  1656. 'add_ie': ['CloudflareStream'],
  1657. 'params': {
  1658. 'skip_download': True,
  1659. },
  1660. },
  1661. {
  1662. # PeerTube embed
  1663. 'url': 'https://joinpeertube.org/fr/home/',
  1664. 'info_dict': {
  1665. 'id': 'home',
  1666. 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
  1667. },
  1668. 'playlist_count': 2,
  1669. },
  1670. {
  1671. # Indavideo embed
  1672. 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
  1673. 'info_dict': {
  1674. 'id': '1693903',
  1675. 'ext': 'mp4',
  1676. 'title': 'Így kell otthon hamburgert sütni',
  1677. 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
  1678. 'timestamp': 1426330212,
  1679. 'upload_date': '20150314',
  1680. 'uploader': 'StreetKitchen',
  1681. 'uploader_id': '546363',
  1682. },
  1683. 'add_ie': ['IndavideoEmbed'],
  1684. 'params': {
  1685. 'skip_download': True,
  1686. },
  1687. },
  1688. {
  1689. # APA embed via JWPlatform embed
  1690. 'url': 'http://www.vol.at/blue-man-group/5593454',
  1691. 'info_dict': {
  1692. 'id': 'jjv85FdZ',
  1693. 'ext': 'mp4',
  1694. 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
  1695. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  1696. 'thumbnail': r're:^https?://.*\.jpg$',
  1697. 'duration': 254,
  1698. 'timestamp': 1519211149,
  1699. 'upload_date': '20180221',
  1700. },
  1701. 'params': {
  1702. 'skip_download': True,
  1703. },
  1704. },
  1705. {
  1706. 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
  1707. 'md5': 'b68d276de422ab07ee1d49388103f457',
  1708. 'info_dict': {
  1709. 'id': '83645793',
  1710. 'title': 'Lock up and get excited',
  1711. 'ext': 'mp4'
  1712. },
  1713. 'skip': 'TODO: fix nested playlists processing in tests',
  1714. },
  1715. {
  1716. # Viqeo embeds
  1717. 'url': 'https://viqeo.tv/',
  1718. 'info_dict': {
  1719. 'id': 'viqeo',
  1720. 'title': 'All-new video platform',
  1721. },
  1722. 'playlist_count': 6,
  1723. },
  1724. # {
  1725. # # Zype embed
  1726. # 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
  1727. # 'info_dict': {
  1728. # 'id': '5b400b834b32992a310622b9',
  1729. # 'ext': 'mp4',
  1730. # 'title': 'Smoky Barbecue Favorites',
  1731. # 'thumbnail': r're:^https?://.*\.jpe?g',
  1732. # 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
  1733. # 'upload_date': '20170909',
  1734. # 'timestamp': 1504915200,
  1735. # },
  1736. # 'add_ie': [ZypeIE.ie_key()],
  1737. # 'params': {
  1738. # 'skip_download': True,
  1739. # },
  1740. # },
  1741. {
  1742. # videojs embed
  1743. 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
  1744. 'info_dict': {
  1745. 'id': 'shell',
  1746. 'ext': 'mp4',
  1747. 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
  1748. 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
  1749. 'thumbnail': r're:^https?://.*\.jpg$',
  1750. },
  1751. 'params': {
  1752. 'skip_download': True,
  1753. },
  1754. 'expected_warnings': ['Failed to download MPD manifest'],
  1755. },
  1756. {
  1757. # DailyMotion embed with DM.player
  1758. 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
  1759. 'info_dict': {
  1760. 'id': 'k6aKkGHd9FJs4mtJN39',
  1761. 'ext': 'mp4',
  1762. 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
  1763. 'description': 'This video is private.',
  1764. 'uploader_id': 'x1jf30l',
  1765. 'uploader': 'beIN SPORTS USA',
  1766. 'upload_date': '20190528',
  1767. 'timestamp': 1559062971,
  1768. },
  1769. 'params': {
  1770. 'skip_download': True,
  1771. },
  1772. },
  1773. {
  1774. # tvopengr:embed
  1775. 'url': 'https://www.ethnos.gr/World/article/190604/hparosiaxekinoynoisynomiliessthgeneyhmethskiatoypolemoypanoapothnoykrania',
  1776. 'md5': 'eb0c3995d0a6f18f6538c8e057865d7d',
  1777. 'info_dict': {
  1778. 'id': '101119',
  1779. 'ext': 'mp4',
  1780. 'display_id': 'oikarpoitondiapragmateyseonhparosias',
  1781. 'title': 'md5:b979f4d640c568617d6547035528a149',
  1782. 'description': 'md5:e54fc1977c7159b01cc11cd7d9d85550',
  1783. 'timestamp': 1641772800,
  1784. 'upload_date': '20220110',
  1785. 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg',
  1786. }
  1787. },
  1788. {
  1789. # blogger embed
  1790. 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
  1791. 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
  1792. 'info_dict': {
  1793. 'id': 'BLOGGER-video-3c740e3a49197e16-796',
  1794. 'ext': 'mp4',
  1795. 'title': 'Blogger',
  1796. 'thumbnail': r're:^https?://.*',
  1797. },
  1798. },
  1799. # {
  1800. # # TODO: find another test
  1801. # # http://schema.org/VideoObject
  1802. # 'url': 'https://flipagram.com/f/nyvTSJMKId',
  1803. # 'md5': '888dcf08b7ea671381f00fab74692755',
  1804. # 'info_dict': {
  1805. # 'id': 'nyvTSJMKId',
  1806. # 'ext': 'mp4',
  1807. # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
  1808. # 'description': '#love for cats.',
  1809. # 'timestamp': 1461244995,
  1810. # 'upload_date': '20160421',
  1811. # },
  1812. # 'params': {
  1813. # 'force_generic_extractor': True,
  1814. # },
  1815. # },
  1816. {
  1817. # VHX Embed
  1818. 'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
  1819. 'info_dict': {
  1820. 'id': '858208',
  1821. 'ext': 'mp4',
  1822. 'title': 'Untitled',
  1823. 'uploader_id': 'user80538407',
  1824. 'uploader': 'OTT Videos',
  1825. },
  1826. },
  1827. {
  1828. # ArcPublishing PoWa video player
  1829. 'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
  1830. 'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
  1831. 'info_dict': {
  1832. 'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
  1833. 'ext': 'mp4',
  1834. 'title': 'Senate candidates wave to voters on Anchorage streets',
  1835. 'description': 'md5:91f51a6511f090617353dc720318b20e',
  1836. 'timestamp': 1604378735,
  1837. 'upload_date': '20201103',
  1838. 'duration': 1581,
  1839. },
  1840. },
  1841. {
  1842. # MyChannels SDK embed
  1843. # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
  1844. 'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
  1845. 'md5': '90c0699c37006ef18e198c032d81739c',
  1846. 'info_dict': {
  1847. 'id': '194165',
  1848. 'ext': 'mp4',
  1849. 'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
  1850. 'timestamp': 1611740340,
  1851. 'upload_date': '20210127',
  1852. 'duration': 159,
  1853. },
  1854. },
  1855. {
  1856. # Simplecast player embed
  1857. 'url': 'https://www.bio.org/podcast',
  1858. 'info_dict': {
  1859. 'id': 'podcast',
  1860. 'title': 'I AM BIO Podcast | BIO',
  1861. },
  1862. 'playlist_mincount': 52,
  1863. },
  1864. {
  1865. # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
  1866. 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
  1867. 'only_matching': True,
  1868. }, {
  1869. # WimTv embed player
  1870. 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
  1871. 'info_dict': {
  1872. 'id': 'wearefmi-pt-2-2021',
  1873. 'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV',
  1874. },
  1875. 'playlist_count': 1,
  1876. }, {
  1877. # KVS Player
  1878. 'url': 'https://www.kvs-demo.com/videos/105/kelis-4th-of-july/',
  1879. 'info_dict': {
  1880. 'id': '105',
  1881. 'display_id': 'kelis-4th-of-july',
  1882. 'ext': 'mp4',
  1883. 'title': 'Kelis - 4th Of July',
  1884. 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
  1885. },
  1886. 'params': {
  1887. 'skip_download': True,
  1888. },
  1889. }, {
  1890. # KVS Player
  1891. 'url': 'https://www.kvs-demo.com/embed/105/',
  1892. 'info_dict': {
  1893. 'id': '105',
  1894. 'display_id': 'kelis-4th-of-july',
  1895. 'ext': 'mp4',
  1896. 'title': 'Kelis - 4th Of July / Embed Player',
  1897. 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
  1898. },
  1899. 'params': {
  1900. 'skip_download': True,
  1901. },
  1902. }, {
  1903. # KVS Player
  1904. 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
  1905. 'md5': '3397979512c682f6b85b3b04989df224',
  1906. 'info_dict': {
  1907. 'id': '2400174',
  1908. 'display_id': 'french-boy-pantsed',
  1909. 'ext': 'mp4',
  1910. 'title': 'French Boy Pantsed - ThisVid.com',
  1911. 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
  1912. }
  1913. }, {
  1914. # KVS Player
  1915. 'url': 'https://thisvid.com/embed/2400174/',
  1916. 'md5': '3397979512c682f6b85b3b04989df224',
  1917. 'info_dict': {
  1918. 'id': '2400174',
  1919. 'display_id': 'french-boy-pantsed',
  1920. 'ext': 'mp4',
  1921. 'title': 'French Boy Pantsed - ThisVid.com',
  1922. 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
  1923. }
  1924. }, {
  1925. # KVS Player
  1926. 'url': 'https://youix.com/video/leningrad-zoj/',
  1927. 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
  1928. 'info_dict': {
  1929. 'id': '18485',
  1930. 'display_id': 'leningrad-zoj',
  1931. 'ext': 'mp4',
  1932. 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
  1933. 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
  1934. }
  1935. }, {
  1936. # KVS Player
  1937. 'url': 'https://youix.com/embed/18485',
  1938. 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
  1939. 'info_dict': {
  1940. 'id': '18485',
  1941. 'display_id': 'leningrad-zoj',
  1942. 'ext': 'mp4',
  1943. 'title': 'Ленинград - ЗОЖ',
  1944. 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg',
  1945. }
  1946. }, {
  1947. # KVS Player
  1948. 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
  1949. 'md5': '94166bdb26b4cb1fb9214319a629fc51',
  1950. 'info_dict': {
  1951. 'id': '21217',
  1952. 'display_id': '40-nochey-40-nights-2016',
  1953. 'ext': 'mp4',
  1954. 'title': '40 ночей (2016) - BogMedia.org',
  1955. 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
  1956. }
  1957. },
  1958. {
  1959. # KVS Player (for sites that serve kt_player.js via non-https urls)
  1960. 'url': 'http://www.camhub.world/embed/389508',
  1961. 'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
  1962. 'info_dict': {
  1963. 'id': '389508',
  1964. 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
  1965. 'ext': 'mp4',
  1966. 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
  1967. 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg',
  1968. }
  1969. },
  1970. {
  1971. # Reddit-hosted video that will redirect and be processed by RedditIE
  1972. # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
  1973. 'url': 'https://v.redd.it/zv89llsvexdz',
  1974. 'md5': '87f5f02f6c1582654146f830f21f8662',
  1975. 'info_dict': {
  1976. 'id': 'zv89llsvexdz',
  1977. 'ext': 'mp4',
  1978. 'timestamp': 1501941939.0,
  1979. 'title': 'That small heart attack.',
  1980. 'upload_date': '20170805',
  1981. 'uploader': 'Antw87'
  1982. }
  1983. },
  1984. {
  1985. # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
  1986. 'url': 'https://v.redd.it/33hgok7dfbz71/',
  1987. 'md5': '7a1d587940242c9bb3bd6eb320b39258',
  1988. 'info_dict': {
  1989. 'id': '33hgok7dfbz71',
  1990. 'ext': 'mp4',
  1991. 'title': "The game Didn't want me to Knife that Guy I guess",
  1992. 'uploader': 'paraf1ve',
  1993. 'timestamp': 1636788683.0,
  1994. 'upload_date': '20211113'
  1995. }
  1996. },
  1997. {
  1998. # MainStreaming player
  1999. 'url': 'https://www.lactv.it/2021/10/03/lac-news24-la-settimana-03-10-2021/',
  2000. 'info_dict': {
  2001. 'id': 'EUlZfGWkGpOd',
  2002. 'title': 'La Settimana ',
  2003. 'description': '03 Ottobre ore 02:00',
  2004. 'ext': 'mp4',
  2005. 'live_status': 'not_live',
  2006. 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
  2007. 'duration': 1512
  2008. }
  2009. },
  2010. {
  2011. # Multiple gfycat iframe embeds
  2012. 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422',
  2013. 'info_dict': {
  2014. 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다',
  2015. 'id': 'board'
  2016. },
  2017. 'playlist_count': 8,
  2018. },
  2019. {
  2020. # Multiple gfycat gifs (direct links)
  2021. 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199',
  2022. 'info_dict': {
  2023. 'title': '옳게 된 크롭 니트 스테이씨 아이사',
  2024. 'id': 'board'
  2025. },
  2026. 'playlist_count': 6
  2027. },
  2028. {
  2029. # Multiple gfycat embeds, with uppercase "IFR" in urls
  2030. 'url': 'https://kkzz.kr/?vid=2295',
  2031. 'info_dict': {
  2032. 'title': '지방시 앰버서더 에스파 카리나 움짤',
  2033. 'id': '?vid=2295'
  2034. },
  2035. 'playlist_count': 9
  2036. },
  2037. {
  2038. # Panopto embeds
  2039. 'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
  2040. 'info_dict': {
  2041. 'ext': 'mp4',
  2042. 'id': '0bd3f16c-824a-436a-8486-ac5900693aef',
  2043. 'title': 'Quizzes in Panopto',
  2044. },
  2045. },
  2046. {
  2047. # Ruutu embed
  2048. 'url': 'https://www.nelonen.fi/ohjelmat/madventures-suomi/2160731-riku-ja-tunna-lahtevat-peurajahtiin-tv-sta-tutun-biologin-kanssa---metsastysreissu-huipentuu-kasvissyojan-painajaiseen',
  2049. 'md5': 'a2513a98d3496099e6eced40f7e6a14b',
  2050. 'info_dict': {
  2051. 'id': '4044426',
  2052. 'ext': 'mp4',
  2053. 'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!',
  2054. 'thumbnail': r're:^https?://.+\.jpg$',
  2055. 'duration': 108,
  2056. 'series': 'Madventures Suomi',
  2057. 'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381',
  2058. 'categories': ['Matkailu', 'Elämäntyyli'],
  2059. 'age_limit': 0,
  2060. 'upload_date': '20220308',
  2061. },
  2062. },
  2063. {
  2064. # Multiple Ruutu embeds
  2065. 'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html',
  2066. 'info_dict': {
  2067. 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä',
  2068. 'id': 'art-2000008762560'
  2069. },
  2070. 'playlist_count': 3
  2071. },
  2072. {
  2073. # Ruutu embed in hs.fi with a single video
  2074. 'url': 'https://www.hs.fi/kotimaa/art-2000008793421.html',
  2075. 'md5': 'f8964e65d8fada6e8a562389bf366bb4',
  2076. 'info_dict': {
  2077. 'id': '4081841',
  2078. 'ext': 'mp4',
  2079. 'title': 'Puolustusvoimat siirsi panssariajoneuvoja harjoituksiin Niinisaloon 2.5.2022',
  2080. 'thumbnail': r're:^https?://.+\.jpg$',
  2081. 'duration': 138,
  2082. 'age_limit': 0,
  2083. 'upload_date': '20220504',
  2084. },
  2085. },
  2086. {
  2087. # Webpage contains double BOM
  2088. 'url': 'https://www.filmarkivet.se/movies/paris-d-moll/',
  2089. 'md5': 'df02cadc719dcc63d43288366f037754',
  2090. 'info_dict': {
  2091. 'id': 'paris-d-moll',
  2092. 'ext': 'mp4',
  2093. 'upload_date': '20220518',
  2094. 'title': 'Paris d-moll',
  2095. 'description': 'md5:319e37ea5542293db37e1e13072fe330',
  2096. 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
  2097. 'timestamp': 1652833414,
  2098. 'age_limit': 0,
  2099. }
  2100. },
  2101. {
  2102. 'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details',
  2103. 'md5': '198bde8bed23d0b23c70725c83c9b6d9',
  2104. 'info_dict': {
  2105. 'id': '53602801',
  2106. 'ext': 'mpga',
  2107. 'title': 'Interstellar',
  2108. 'description': 'Listen now | Episode One',
  2109. 'thumbnail': 'md5:c30d9c83f738e16d8551d7219d321538',
  2110. 'uploader': 'Molly Movie Club',
  2111. 'uploader_id': '839621',
  2112. },
  2113. },
  2114. {
  2115. 'url': 'https://www.blockedandreported.org/p/episode-117-lets-talk-about-depp?s=r',
  2116. 'md5': 'c0cc44ee7415daeed13c26e5b56d6aa0',
  2117. 'info_dict': {
  2118. 'id': '57962052',
  2119. 'ext': 'mpga',
  2120. 'title': 'md5:855b2756f0ee10f6723fa00b16266f8d',
  2121. 'description': 'md5:fe512a5e94136ad260c80bde00ea4eef',
  2122. 'thumbnail': 'md5:2218f27dfe517bb5ac16c47d0aebac59',
  2123. 'uploader': 'Blocked and Reported',
  2124. 'uploader_id': '500230',
  2125. },
  2126. },
  2127. {
  2128. 'url': 'https://www.skimag.com/video/ski-people-1980/',
  2129. 'md5': '022a7e31c70620ebec18deeab376ee03',
  2130. 'info_dict': {
  2131. 'id': 'YTmgRiNU',
  2132. 'ext': 'mp4',
  2133. 'title': '1980 Ski People',
  2134. 'timestamp': 1610407738,
  2135. 'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
  2136. 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
  2137. 'duration': 5688.0,
  2138. 'upload_date': '20210111',
  2139. }
  2140. },
  2141. {
  2142. 'note': 'JSON LD with multiple @type',
  2143. 'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html',
  2144. 'md5': 'c7949f34f57273013fb7ccb1156393db',
  2145. 'info_dict': {
  2146. 'id': 'ipy2AcGL',
  2147. 'ext': 'mp4',
  2148. 'description': 'md5:6a9d644bab0dc2dc06849c2505d8383d',
  2149. 'thumbnail': r're:https://media\.nu\.nl/m/.+\.jpg',
  2150. 'title': 'Hoe een bladvlo dit verwoestende Japanse onkruid moet vernietigen',
  2151. 'timestamp': 1586577474,
  2152. 'upload_date': '20200411',
  2153. 'age_limit': 0,
  2154. 'duration': 111.0,
  2155. }
  2156. },
  2157. {
  2158. 'note': 'JSON LD with unexpected data type',
  2159. 'url': 'https://www.autoweek.nl/autotests/artikel/porsche-911-gt3-rs-rij-impressie-2/',
  2160. 'info_dict': {
  2161. 'id': 'porsche-911-gt3-rs-rij-impressie-2',
  2162. 'ext': 'mp4',
  2163. 'title': 'Test: Porsche 911 GT3 RS',
  2164. 'description': 'Je ziet het niet, maar het is er wel. Downforce, hebben we het dan over. En in de nieuwe Porsche 911 GT3 RS is er zelfs heel veel downforce.',
  2165. 'timestamp': 1664920902,
  2166. 'upload_date': '20221004',
  2167. 'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$',
  2168. 'age_limit': 0,
  2169. 'direct': True,
  2170. }
  2171. }
  2172. ]
  2173. def report_following_redirect(self, new_url):
  2174. """Report information extraction."""
  2175. self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
  2176. def report_detected(self, name, num=1, note=None):
  2177. if num > 1:
  2178. name += 's'
  2179. elif not num:
  2180. return
  2181. else:
  2182. num = 'a'
  2183. self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
  2184. def _fragment_query(self, url):
  2185. if self._configuration_arg('fragment_query'):
  2186. query_string = urllib.parse.urlparse(url).query
  2187. if query_string:
  2188. return {'extra_param_to_segment_url': query_string}
  2189. return {}
  2190. def _extract_rss(self, url, video_id, doc):
  2191. NS_MAP = {
  2192. 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
  2193. }
  2194. entries = []
  2195. for it in doc.findall('./channel/item'):
  2196. next_url = next(
  2197. (e.attrib.get('url') for e in it.findall('./enclosure')),
  2198. xpath_text(it, 'link', fatal=False))
  2199. if not next_url:
  2200. continue
  2201. guid = try_call(lambda: it.find('guid').text)
  2202. if guid:
  2203. next_url = smuggle_url(next_url, {'force_videoid': guid})
  2204. def itunes(key):
  2205. return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
  2206. entries.append({
  2207. '_type': 'url_transparent',
  2208. 'url': next_url,
  2209. 'title': try_call(lambda: it.find('title').text),
  2210. 'description': xpath_text(it, 'description', default=None),
  2211. 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
  2212. 'duration': parse_duration(itunes('duration')),
  2213. 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
  2214. 'episode': itunes('title'),
  2215. 'episode_number': int_or_none(itunes('episode')),
  2216. 'season_number': int_or_none(itunes('season')),
  2217. 'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
  2218. })
  2219. return {
  2220. '_type': 'playlist',
  2221. 'id': url,
  2222. 'title': try_call(lambda: doc.find('./channel/title').text),
  2223. 'description': try_call(lambda: doc.find('./channel/description').text),
  2224. 'entries': entries,
  2225. }
  2226. def _kvs_getrealurl(self, video_url, license_code):
  2227. if not video_url.startswith('function/0/'):
  2228. return video_url # not obfuscated
  2229. url_path, _, url_query = video_url.partition('?')
  2230. urlparts = url_path.split('/')[2:]
  2231. license = self._kvs_getlicensetoken(license_code)
  2232. newmagic = urlparts[5][:32]
  2233. for o in range(len(newmagic) - 1, -1, -1):
  2234. new = ''
  2235. l = (o + sum(int(n) for n in license[o:])) % 32
  2236. for i in range(0, len(newmagic)):
  2237. if i == o:
  2238. new += newmagic[l]
  2239. elif i == l:
  2240. new += newmagic[o]
  2241. else:
  2242. new += newmagic[i]
  2243. newmagic = new
  2244. urlparts[5] = newmagic + urlparts[5][32:]
  2245. return '/'.join(urlparts) + '?' + url_query
  2246. def _kvs_getlicensetoken(self, license):
  2247. modlicense = license.replace('$', '').replace('0', '1')
  2248. center = int(len(modlicense) / 2)
  2249. fronthalf = int(modlicense[:center + 1])
  2250. backhalf = int(modlicense[center:])
  2251. modlicense = str(4 * abs(fronthalf - backhalf))
  2252. retval = ''
  2253. for o in range(0, center + 1):
  2254. for i in range(1, 5):
  2255. retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
  2256. return retval
  2257. def _real_extract(self, url):
  2258. if url.startswith('//'):
  2259. return self.url_result(self.http_scheme() + url)
  2260. parsed_url = urllib.parse.urlparse(url)
  2261. if not parsed_url.scheme:
  2262. default_search = self.get_param('default_search')
  2263. if default_search is None:
  2264. default_search = 'fixup_error'
  2265. if default_search in ('auto', 'auto_warning', 'fixup_error'):
  2266. if re.match(r'^[^\s/]+\.[^\s/]+/', url):
  2267. self.report_warning('The url doesn\'t specify the protocol, trying with http')
  2268. return self.url_result('http://' + url)
  2269. elif default_search != 'fixup_error':
  2270. if default_search == 'auto_warning':
  2271. if re.match(r'^(?:url|URL)$', url):
  2272. raise ExtractorError(
  2273. 'Invalid URL: %r . Call hypervideo like this: hypervideo -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
  2274. expected=True)
  2275. else:
  2276. self.report_warning(
  2277. 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
  2278. return self.url_result('ytsearch:' + url)
  2279. if default_search in ('error', 'fixup_error'):
  2280. raise ExtractorError(
  2281. '%r is not a valid URL. '
  2282. 'Set --default-search "ytsearch" (or run hypervideo "ytsearch:%s" ) to search YouTube'
  2283. % (url, url), expected=True)
  2284. else:
  2285. if ':' not in default_search:
  2286. default_search += ':'
  2287. return self.url_result(default_search + url)
  2288. original_url = url
  2289. url, smuggled_data = unsmuggle_url(url, {})
  2290. force_videoid = None
  2291. is_intentional = smuggled_data.get('to_generic')
  2292. if 'force_videoid' in smuggled_data:
  2293. force_videoid = smuggled_data['force_videoid']
  2294. video_id = force_videoid
  2295. else:
  2296. video_id = self._generic_id(url)
  2297. # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
  2298. # making it impossible to download only chunk of the file (yet we need only 512kB to
  2299. # test whether it's HTML or not). According to hypervideo default Accept-Encoding
  2300. # that will always result in downloading the whole file that is not desirable.
  2301. # Therefore for extraction pass we have to override Accept-Encoding to any in order
  2302. # to accept raw bytes and being able to download only a chunk.
  2303. # It may probably better to solve this by checking Content-Type for application/octet-stream
  2304. # after a HEAD request, but not sure if we can rely on this.
  2305. full_response = self._request_webpage(url, video_id, headers={
  2306. 'Accept-Encoding': '*',
  2307. **smuggled_data.get('http_headers', {})
  2308. })
  2309. new_url = full_response.geturl()
  2310. if new_url == urllib.parse.urlparse(url)._replace(scheme='https').geturl():
  2311. url = new_url
  2312. elif url != new_url:
  2313. self.report_following_redirect(new_url)
  2314. if force_videoid:
  2315. new_url = smuggle_url(new_url, {'force_videoid': force_videoid})
  2316. return self.url_result(new_url)
  2317. info_dict = {
  2318. 'id': video_id,
  2319. 'title': self._generic_title(url),
  2320. 'timestamp': unified_timestamp(full_response.headers.get('Last-Modified'))
  2321. }
  2322. # Check for direct link to a video
  2323. content_type = full_response.headers.get('Content-Type', '').lower()
  2324. m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
  2325. if m:
  2326. self.report_detected('direct video link')
  2327. headers = smuggled_data.get('http_headers', {})
  2328. format_id = str(m.group('format_id'))
  2329. subtitles = {}
  2330. if format_id.endswith('mpegurl'):
  2331. formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
  2332. info_dict.update(self._fragment_query(url))
  2333. elif format_id.endswith('mpd') or format_id.endswith('dash+xml'):
  2334. formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
  2335. info_dict.update(self._fragment_query(url))
  2336. elif format_id == 'f4m':
  2337. formats = self._extract_f4m_formats(url, video_id, headers=headers)
  2338. else:
  2339. formats = [{
  2340. 'format_id': format_id,
  2341. 'url': url,
  2342. 'vcodec': 'none' if m.group('type') == 'audio' else None
  2343. }]
  2344. info_dict['direct'] = True
  2345. info_dict.update({
  2346. 'formats': formats,
  2347. 'subtitles': subtitles,
  2348. 'http_headers': headers,
  2349. })
  2350. return info_dict
  2351. if not self.get_param('test', False) and not is_intentional:
  2352. force = self.get_param('force_generic_extractor', False)
  2353. self.report_warning('%s generic information extractor' % ('Forcing' if force else 'Falling back on'))
  2354. first_bytes = full_response.read(512)
  2355. # Is it an M3U playlist?
  2356. if first_bytes.startswith(b'#EXTM3U'):
  2357. self.report_detected('M3U playlist')
  2358. info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
  2359. info_dict.update(self._fragment_query(url))
  2360. return info_dict
  2361. # Maybe it's a direct link to a video?
  2362. # Be careful not to download the whole thing!
  2363. if not is_html(first_bytes):
  2364. self.report_warning(
  2365. 'URL could be a direct video link, returning it as such.')
  2366. info_dict.update({
  2367. 'direct': True,
  2368. 'url': url,
  2369. })
  2370. return info_dict
  2371. webpage = self._webpage_read_content(
  2372. full_response, url, video_id, prefix=first_bytes)
  2373. if '<title>DPG Media Privacy Gate</title>' in webpage:
  2374. webpage = self._download_webpage(url, video_id)
  2375. self.report_extraction(video_id)
  2376. # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
  2377. try:
  2378. try:
  2379. doc = compat_etree_fromstring(webpage)
  2380. except xml.etree.ElementTree.ParseError:
  2381. doc = compat_etree_fromstring(webpage.encode('utf-8'))
  2382. if doc.tag == 'rss':
  2383. self.report_detected('RSS feed')
  2384. return self._extract_rss(url, video_id, doc)
  2385. elif doc.tag == 'SmoothStreamingMedia':
  2386. info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
  2387. self.report_detected('ISM manifest')
  2388. return info_dict
  2389. elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
  2390. smil = self._parse_smil(doc, url, video_id)
  2391. self.report_detected('SMIL file')
  2392. return smil
  2393. elif doc.tag == '{http://xspf.org/ns/0/}playlist':
  2394. self.report_detected('XSPF playlist')
  2395. return self.playlist_result(
  2396. self._parse_xspf(
  2397. doc, video_id, xspf_url=url,
  2398. xspf_base_url=full_response.geturl()),
  2399. video_id)
  2400. elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
  2401. info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
  2402. doc,
  2403. mpd_base_url=full_response.geturl().rpartition('/')[0],
  2404. mpd_url=url)
  2405. info_dict.update(self._fragment_query(url))
  2406. self.report_detected('DASH manifest')
  2407. return info_dict
  2408. elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
  2409. info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
  2410. self.report_detected('F4M manifest')
  2411. return info_dict
  2412. except xml.etree.ElementTree.ParseError:
  2413. pass
  2414. info_dict.update({
  2415. # it's tempting to parse this further, but you would
  2416. # have to take into account all the variations like
  2417. # Video Title - Site Name
  2418. # Site Name | Video Title
  2419. # Video Title - Tagline | Site Name
  2420. # and so on and so forth; it's just not practical
  2421. 'title': self._generic_title('', webpage, default='video'),
  2422. 'description': self._og_search_description(webpage, default=None),
  2423. 'thumbnail': self._og_search_thumbnail(webpage, default=None),
  2424. 'age_limit': self._rta_search(webpage),
  2425. })
  2426. self._downloader.write_debug('Looking for embeds')
  2427. embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
  2428. if len(embeds) == 1:
  2429. return {**info_dict, **embeds[0]}
  2430. elif embeds:
  2431. return self.playlist_result(embeds, **info_dict)
  2432. raise UnsupportedError(url)
  2433. def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
  2434. """Returns an iterator of video entries"""
  2435. info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
  2436. video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
  2437. url, smuggled_data = unsmuggle_url(url, {})
  2438. actual_url = urlh.geturl() if urlh else url
  2439. # Sometimes embedded video player is hidden behind percent encoding
  2440. # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
  2441. # Unescaping the whole page allows to handle those cases in a generic way
  2442. # FIXME: unescaping the whole page may break URLs, commenting out for now.
  2443. # There probably should be a second run of generic extractor on unescaped webpage.
  2444. # webpage = urllib.parse.unquote(webpage)
  2445. embeds = []
  2446. for ie in self._downloader._ies.values():
  2447. if ie.ie_key() in smuggled_data.get('block_ies', []):
  2448. continue
  2449. gen = ie.extract_from_webpage(self._downloader, url, webpage)
  2450. current_embeds = []
  2451. try:
  2452. while True:
  2453. current_embeds.append(next(gen))
  2454. except self.StopExtraction:
  2455. self.report_detected(f'{ie.IE_NAME} exclusive embed', len(current_embeds),
  2456. embeds and 'discarding other embeds')
  2457. return current_embeds
  2458. except StopIteration:
  2459. self.report_detected(f'{ie.IE_NAME} embed', len(current_embeds))
  2460. embeds.extend(current_embeds)
  2461. if embeds:
  2462. return embeds
  2463. jwplayer_data = self._find_jwplayer_data(
  2464. webpage, video_id, transform_source=js_to_json)
  2465. if jwplayer_data:
  2466. if isinstance(jwplayer_data.get('playlist'), str):
  2467. self.report_detected('JW Player playlist')
  2468. return [self.url_result(jwplayer_data['playlist'], 'JWPlatform')]
  2469. try:
  2470. info = self._parse_jwplayer_data(
  2471. jwplayer_data, video_id, require_title=False, base_url=url)
  2472. if traverse_obj(info, 'formats', ('entries', ..., 'formats')):
  2473. self.report_detected('JW Player data')
  2474. return [info]
  2475. except ExtractorError:
  2476. # See https://github.com/ytdl-org/youtube-dl/pull/16735
  2477. pass
  2478. # Video.js embed
  2479. mobj = re.search(
  2480. r'(?s)\bvideojs\s*\(.+?([a-zA-Z0-9_$]+)\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
  2481. webpage)
  2482. if mobj is not None:
  2483. varname = mobj.group(1)
  2484. sources = variadic(self._parse_json(
  2485. mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
  2486. formats = []
  2487. subtitles = {}
  2488. for source in sources:
  2489. src = source.get('src')
  2490. if not src or not isinstance(src, str):
  2491. continue
  2492. src = urllib.parse.urljoin(url, src)
  2493. src_type = source.get('type')
  2494. if isinstance(src_type, str):
  2495. src_type = src_type.lower()
  2496. ext = determine_ext(src).lower()
  2497. if src_type == 'video/youtube':
  2498. return [self.url_result(src, YoutubeIE.ie_key())]
  2499. if src_type == 'application/dash+xml' or ext == 'mpd':
  2500. fmts, subs = self._extract_mpd_formats_and_subtitles(
  2501. src, video_id, mpd_id='dash', fatal=False)
  2502. formats.extend(fmts)
  2503. self._merge_subtitles(subs, target=subtitles)
  2504. elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
  2505. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  2506. src, video_id, 'mp4', entry_protocol='m3u8_native',
  2507. m3u8_id='hls', fatal=False)
  2508. formats.extend(fmts)
  2509. self._merge_subtitles(subs, target=subtitles)
  2510. for fmt in formats:
  2511. fmt.update(self._fragment_query(src))
  2512. if not formats:
  2513. formats.append({
  2514. 'url': src,
  2515. 'ext': (mimetype2ext(src_type)
  2516. or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
  2517. 'http_headers': {
  2518. 'Referer': actual_url,
  2519. },
  2520. })
  2521. # https://docs.videojs.com/player#addRemoteTextTrack
  2522. # https://html.spec.whatwg.org/multipage/media.html#htmltrackelement
  2523. for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
  2524. sub = self._parse_json(
  2525. sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
  2526. src = str_or_none(sub.get('src'))
  2527. if not src:
  2528. continue
  2529. subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
  2530. 'url': urllib.parse.urljoin(url, src),
  2531. 'name': sub.get('label'),
  2532. 'http_headers': {
  2533. 'Referer': actual_url,
  2534. },
  2535. })
  2536. if formats or subtitles:
  2537. self.report_detected('video.js embed')
  2538. return [{'formats': formats, 'subtitles': subtitles}]
  2539. # Looking for http://schema.org/VideoObject
  2540. json_ld = self._search_json_ld(webpage, video_id, default={})
  2541. if json_ld.get('url') not in (url, None):
  2542. self.report_detected('JSON LD')
  2543. is_direct = json_ld.get('ext') not in (None, *MEDIA_EXTENSIONS.manifests)
  2544. return [merge_dicts({
  2545. '_type': 'video' if is_direct else 'url_transparent',
  2546. 'url': smuggle_url(json_ld['url'], {
  2547. 'force_videoid': video_id,
  2548. 'to_generic': True,
  2549. 'http_headers': {'Referer': url},
  2550. }),
  2551. }, json_ld)]
  2552. def check_video(vurl):
  2553. if YoutubeIE.suitable(vurl):
  2554. return True
  2555. if RtmpIE.suitable(vurl):
  2556. return True
  2557. vpath = urllib.parse.urlparse(vurl).path
  2558. vext = determine_ext(vpath, None)
  2559. return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
  2560. def filter_video(urls):
  2561. return list(filter(check_video, urls))
  2562. # Start with something easy: JW Player in SWFObject
  2563. found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
  2564. if found:
  2565. self.report_detected('JW Player in SFWObject')
  2566. else:
  2567. # Look for gorilla-vid style embedding
  2568. found = filter_video(re.findall(r'''(?sx)
  2569. (?:
  2570. jw_plugins|
  2571. JWPlayerOptions|
  2572. jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
  2573. )
  2574. .*?
  2575. ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
  2576. if found:
  2577. self.report_detected('JW Player embed')
  2578. if not found:
  2579. # Look for generic KVS player
  2580. found = re.search(r'<script [^>]*?src="https?://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
  2581. if found:
  2582. self.report_detected('KWS Player')
  2583. if found.group('maj_ver') not in ['4', '5']:
  2584. self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
  2585. flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
  2586. flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
  2587. # extract the part after the last / as the display_id from the
  2588. # canonical URL.
  2589. display_id = self._search_regex(
  2590. r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
  2591. r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
  2592. webpage, 'display_id', fatal=False
  2593. )
  2594. title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
  2595. thumbnail = flashvars['preview_url']
  2596. if thumbnail.startswith('//'):
  2597. protocol, _, _ = url.partition('/')
  2598. thumbnail = protocol + thumbnail
  2599. url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys()))
  2600. formats = []
  2601. for key in url_keys:
  2602. if '/get_file/' not in flashvars[key]:
  2603. continue
  2604. format_id = flashvars.get(f'{key}_text', key)
  2605. formats.append({
  2606. 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
  2607. 'format_id': format_id,
  2608. 'ext': 'mp4',
  2609. **(parse_resolution(format_id) or parse_resolution(flashvars[key]))
  2610. })
  2611. if not formats[-1].get('height'):
  2612. formats[-1]['quality'] = 1
  2613. return [{
  2614. 'id': flashvars['video_id'],
  2615. 'display_id': display_id,
  2616. 'title': title,
  2617. 'thumbnail': thumbnail,
  2618. 'formats': formats,
  2619. }]
  2620. if not found:
  2621. # Broaden the search a little bit
  2622. found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
  2623. if found:
  2624. self.report_detected('video file')
  2625. if not found:
  2626. # Broaden the findall a little bit: JWPlayer JS loader
  2627. found = filter_video(re.findall(
  2628. r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
  2629. if found:
  2630. self.report_detected('JW Player JS loader')
  2631. if not found:
  2632. # Flow player
  2633. found = filter_video(re.findall(r'''(?xs)
  2634. flowplayer\("[^"]+",\s*
  2635. \{[^}]+?\}\s*,
  2636. \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
  2637. ["']?url["']?\s*:\s*["']([^"']+)["']
  2638. ''', webpage))
  2639. if found:
  2640. self.report_detected('Flow Player')
  2641. if not found:
  2642. # Cinerama player
  2643. found = re.findall(
  2644. r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
  2645. if found:
  2646. self.report_detected('Cinerama player')
  2647. if not found:
  2648. # Try to find twitter cards info
  2649. # twitter:player:stream should be checked before twitter:player since
  2650. # it is expected to contain a raw stream (see
  2651. # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
  2652. found = filter_video(re.findall(
  2653. r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
  2654. if found:
  2655. self.report_detected('Twitter card')
  2656. if not found:
  2657. # We look for Open Graph info:
  2658. # We have to match any number spaces between elements, some sites try to align them, e.g.: statigr.am
  2659. m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
  2660. # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
  2661. if m_video_type is not None:
  2662. found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
  2663. if found:
  2664. self.report_detected('Open Graph video info')
  2665. if not found:
  2666. REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
  2667. found = re.search(
  2668. r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
  2669. r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
  2670. webpage)
  2671. if not found:
  2672. # Look also in Refresh HTTP header
  2673. refresh_header = urlh and urlh.headers.get('Refresh')
  2674. if refresh_header:
  2675. found = re.search(REDIRECT_REGEX, refresh_header)
  2676. if found:
  2677. new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
  2678. if new_url != url:
  2679. self.report_following_redirect(new_url)
  2680. return [self.url_result(new_url)]
  2681. else:
  2682. found = None
  2683. if not found:
  2684. # twitter:player is a https URL to iframe player that may or may not
  2685. # be supported by hypervideo thus this is checked the very last (see
  2686. # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
  2687. embed_url = self._html_search_meta('twitter:player', webpage, default=None)
  2688. if embed_url and embed_url != url:
  2689. self.report_detected('twitter:player iframe')
  2690. return [self.url_result(embed_url)]
  2691. if not found:
  2692. return []
  2693. domain_name = self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader', default=None)
  2694. entries = []
  2695. for video_url in orderedSet(found):
  2696. video_url = unescapeHTML(video_url)
  2697. video_url = video_url.replace('\\/', '/')
  2698. video_url = urllib.parse.urljoin(url, video_url)
  2699. video_id = urllib.parse.unquote(os.path.basename(video_url))
  2700. # Sometimes, jwplayer extraction will result in a YouTube URL
  2701. if YoutubeIE.suitable(video_url):
  2702. entries.append(self.url_result(video_url, 'Youtube'))
  2703. continue
  2704. video_id = os.path.splitext(video_id)[0]
  2705. headers = {
  2706. 'referer': actual_url
  2707. }
  2708. entry_info_dict = {
  2709. 'id': video_id,
  2710. 'uploader': domain_name,
  2711. 'title': info_dict['title'],
  2712. 'age_limit': info_dict['age_limit'],
  2713. 'http_headers': headers,
  2714. }
  2715. if RtmpIE.suitable(video_url):
  2716. entry_info_dict.update({
  2717. '_type': 'url_transparent',
  2718. 'ie_key': RtmpIE.ie_key(),
  2719. 'url': video_url,
  2720. })
  2721. entries.append(entry_info_dict)
  2722. continue
  2723. ext = determine_ext(video_url)
  2724. if ext == 'smil':
  2725. entry_info_dict = {**self._extract_smil_info(video_url, video_id), **entry_info_dict}
  2726. elif ext == 'xspf':
  2727. return [self._extract_xspf_playlist(video_url, video_id)]
  2728. elif ext == 'm3u8':
  2729. entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
  2730. entry_info_dict.update(self._fragment_query(video_url))
  2731. elif ext == 'mpd':
  2732. entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
  2733. entry_info_dict.update(self._fragment_query(video_url))
  2734. elif ext == 'f4m':
  2735. entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
  2736. elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
  2737. # Just matching .ism/manifest is not enough to be reliably sure
  2738. # whether it's actually an ISM manifest or some other streaming
  2739. # manifest since there are various streaming URL formats
  2740. # possible (see [1]) as well as some other shenanigans like
  2741. # .smil/manifest URLs that actually serve an ISM (see [2]) and
  2742. # so on.
  2743. # Thus the most reasonable way to solve this is to delegate
  2744. # to generic extractor in order to look into the contents of
  2745. # the manifest itself.
  2746. # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
  2747. # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
  2748. entry_info_dict = self.url_result(
  2749. smuggle_url(video_url, {'to_generic': True}),
  2750. GenericIE.ie_key())
  2751. else:
  2752. entry_info_dict['url'] = video_url
  2753. entries.append(entry_info_dict)
  2754. if len(entries) > 1:
  2755. for num, e in enumerate(entries, start=1):
  2756. # 'url' results don't have a title
  2757. if e.get('title') is not None:
  2758. e['title'] = '%s (%d)' % (e['title'], num)
  2759. return entries