youtube.py 313 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834
  1. import base64
  2. import calendar
  3. import collections
  4. import copy
  5. import datetime
  6. import enum
  7. import hashlib
  8. import itertools
  9. import json
  10. import math
  11. import os.path
  12. import random
  13. import re
  14. import sys
  15. import threading
  16. import time
  17. import traceback
  18. import urllib.error
  19. import urllib.parse
  20. from .common import InfoExtractor, SearchInfoExtractor
  21. from .openload import PhantomJSwrapper
  22. from ..compat import functools
  23. from ..jsinterp import JSInterpreter
  24. from ..utils import (
  25. NO_DEFAULT,
  26. ExtractorError,
  27. LazyList,
  28. UserNotLive,
  29. bug_reports_message,
  30. classproperty,
  31. clean_html,
  32. datetime_from_str,
  33. dict_get,
  34. filter_dict,
  35. float_or_none,
  36. format_field,
  37. get_first,
  38. int_or_none,
  39. is_html,
  40. join_nonempty,
  41. js_to_json,
  42. mimetype2ext,
  43. network_exceptions,
  44. orderedSet,
  45. parse_codecs,
  46. parse_count,
  47. parse_duration,
  48. parse_iso8601,
  49. parse_qs,
  50. qualities,
  51. remove_start,
  52. smuggle_url,
  53. str_or_none,
  54. str_to_int,
  55. strftime_or_none,
  56. traverse_obj,
  57. try_get,
  58. unescapeHTML,
  59. unified_strdate,
  60. unified_timestamp,
  61. unsmuggle_url,
  62. update_url_query,
  63. url_or_none,
  64. urljoin,
  65. variadic,
  66. )
  67. # any clients starting with _ cannot be explicitly requested by the user
  68. INNERTUBE_CLIENTS = {
  69. 'web': {
  70. 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  71. 'INNERTUBE_CONTEXT': {
  72. 'client': {
  73. 'clientName': 'WEB',
  74. 'clientVersion': '2.20220801.00.00',
  75. }
  76. },
  77. 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  78. },
  79. 'web_embedded': {
  80. 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  81. 'INNERTUBE_CONTEXT': {
  82. 'client': {
  83. 'clientName': 'WEB_EMBEDDED_PLAYER',
  84. 'clientVersion': '1.20220731.00.00',
  85. },
  86. },
  87. 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  88. },
  89. 'web_music': {
  90. 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  91. 'INNERTUBE_HOST': 'music.youtube.com',
  92. 'INNERTUBE_CONTEXT': {
  93. 'client': {
  94. 'clientName': 'WEB_REMIX',
  95. 'clientVersion': '1.20220727.01.00',
  96. }
  97. },
  98. 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
  99. },
  100. 'web_creator': {
  101. 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
  102. 'INNERTUBE_CONTEXT': {
  103. 'client': {
  104. 'clientName': 'WEB_CREATOR',
  105. 'clientVersion': '1.20220726.00.00',
  106. }
  107. },
  108. 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
  109. },
  110. 'android': {
  111. 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
  112. 'INNERTUBE_CONTEXT': {
  113. 'client': {
  114. 'clientName': 'ANDROID',
  115. 'clientVersion': '17.31.35',
  116. 'androidSdkVersion': 30,
  117. 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
  118. }
  119. },
  120. 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
  121. 'REQUIRE_JS_PLAYER': False
  122. },
  123. 'android_embedded': {
  124. 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
  125. 'INNERTUBE_CONTEXT': {
  126. 'client': {
  127. 'clientName': 'ANDROID_EMBEDDED_PLAYER',
  128. 'clientVersion': '17.31.35',
  129. 'androidSdkVersion': 30,
  130. 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
  131. },
  132. },
  133. 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
  134. 'REQUIRE_JS_PLAYER': False
  135. },
  136. 'android_music': {
  137. 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
  138. 'INNERTUBE_CONTEXT': {
  139. 'client': {
  140. 'clientName': 'ANDROID_MUSIC',
  141. 'clientVersion': '5.16.51',
  142. 'androidSdkVersion': 30,
  143. 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
  144. }
  145. },
  146. 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
  147. 'REQUIRE_JS_PLAYER': False
  148. },
  149. 'android_creator': {
  150. 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
  151. 'INNERTUBE_CONTEXT': {
  152. 'client': {
  153. 'clientName': 'ANDROID_CREATOR',
  154. 'clientVersion': '22.30.100',
  155. 'androidSdkVersion': 30,
  156. 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
  157. },
  158. },
  159. 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
  160. 'REQUIRE_JS_PLAYER': False
  161. },
  162. # iOS clients have HLS live streams. Setting device model to get 60fps formats.
  163. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
  164. 'ios': {
  165. 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
  166. 'INNERTUBE_CONTEXT': {
  167. 'client': {
  168. 'clientName': 'IOS',
  169. 'clientVersion': '17.33.2',
  170. 'deviceModel': 'iPhone14,3',
  171. 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
  172. }
  173. },
  174. 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
  175. 'REQUIRE_JS_PLAYER': False
  176. },
  177. 'ios_embedded': {
  178. 'INNERTUBE_CONTEXT': {
  179. 'client': {
  180. 'clientName': 'IOS_MESSAGES_EXTENSION',
  181. 'clientVersion': '17.33.2',
  182. 'deviceModel': 'iPhone14,3',
  183. 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
  184. },
  185. },
  186. 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
  187. 'REQUIRE_JS_PLAYER': False
  188. },
  189. 'ios_music': {
  190. 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
  191. 'INNERTUBE_CONTEXT': {
  192. 'client': {
  193. 'clientName': 'IOS_MUSIC',
  194. 'clientVersion': '5.21',
  195. 'deviceModel': 'iPhone14,3',
  196. 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
  197. },
  198. },
  199. 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
  200. 'REQUIRE_JS_PLAYER': False
  201. },
  202. 'ios_creator': {
  203. 'INNERTUBE_CONTEXT': {
  204. 'client': {
  205. 'clientName': 'IOS_CREATOR',
  206. 'clientVersion': '22.33.101',
  207. 'deviceModel': 'iPhone14,3',
  208. 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
  209. },
  210. },
  211. 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
  212. 'REQUIRE_JS_PLAYER': False
  213. },
  214. # mweb has 'ultralow' formats
  215. # See: https://github.com/hypervideo/hypervideo/pull/557
  216. 'mweb': {
  217. 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  218. 'INNERTUBE_CONTEXT': {
  219. 'client': {
  220. 'clientName': 'MWEB',
  221. 'clientVersion': '2.20220801.00.00',
  222. }
  223. },
  224. 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
  225. },
  226. # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
  227. # See: https://github.com/zerodytrash/YouTube-Internal-Clients
  228. 'tv_embedded': {
  229. 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  230. 'INNERTUBE_CONTEXT': {
  231. 'client': {
  232. 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
  233. 'clientVersion': '2.0',
  234. },
  235. },
  236. 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
  237. },
  238. }
  239. def _split_innertube_client(client_name):
  240. variant, *base = client_name.rsplit('.', 1)
  241. if base:
  242. return variant, base[0], variant
  243. base, *variant = client_name.split('_', 1)
  244. return client_name, base, variant[0] if variant else None
  245. def build_innertube_clients():
  246. THIRD_PARTY = {
  247. 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
  248. }
  249. BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
  250. priority = qualities(BASE_CLIENTS[::-1])
  251. for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
  252. ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
  253. ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
  254. ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
  255. ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
  256. _, base_client, variant = _split_innertube_client(client)
  257. ytcfg['priority'] = 10 * priority(base_client)
  258. if not variant:
  259. INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
  260. embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
  261. embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
  262. embedscreen['priority'] -= 3
  263. elif variant == 'embedded':
  264. ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
  265. ytcfg['priority'] -= 2
  266. else:
  267. ytcfg['priority'] -= 3
  268. build_innertube_clients()
  269. class BadgeType(enum.Enum):
  270. AVAILABILITY_UNLISTED = enum.auto()
  271. AVAILABILITY_PRIVATE = enum.auto()
  272. AVAILABILITY_PUBLIC = enum.auto()
  273. AVAILABILITY_PREMIUM = enum.auto()
  274. AVAILABILITY_SUBSCRIPTION = enum.auto()
  275. LIVE_NOW = enum.auto()
  276. class YoutubeBaseInfoExtractor(InfoExtractor):
  277. """Provide base functions for Youtube extractors"""
  278. _RESERVED_NAMES = (
  279. r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
  280. r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
  281. r'browse|oembed|get_video_info|iframe_api|s/player|source|'
  282. r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
  283. _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  284. # _NETRC_MACHINE = 'youtube'
  285. # If True it will raise an error if no login info is provided
  286. _LOGIN_REQUIRED = False
  287. _INVIDIOUS_SITES = (
  288. # invidious-redirect websites
  289. r'(?:www\.)?redirect\.invidious\.io',
  290. r'(?:(?:www|dev)\.)?invidio\.us',
  291. # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
  292. r'(?:www\.)?invidious\.pussthecat\.org',
  293. r'(?:www\.)?invidious\.zee\.li',
  294. r'(?:www\.)?invidious\.ethibox\.fr',
  295. r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
  296. r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
  297. r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
  298. # youtube-dl invidious instances list
  299. r'(?:(?:www|no)\.)?invidiou\.sh',
  300. r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
  301. r'(?:www\.)?invidious\.kabi\.tk',
  302. r'(?:www\.)?invidious\.mastodon\.host',
  303. r'(?:www\.)?invidious\.zapashcanon\.fr',
  304. r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
  305. r'(?:www\.)?invidious\.tinfoil-hat\.net',
  306. r'(?:www\.)?invidious\.himiko\.cloud',
  307. r'(?:www\.)?invidious\.reallyancient\.tech',
  308. r'(?:www\.)?invidious\.tube',
  309. r'(?:www\.)?invidiou\.site',
  310. r'(?:www\.)?invidious\.site',
  311. r'(?:www\.)?invidious\.xyz',
  312. r'(?:www\.)?invidious\.nixnet\.xyz',
  313. r'(?:www\.)?invidious\.048596\.xyz',
  314. r'(?:www\.)?invidious\.drycat\.fr',
  315. r'(?:www\.)?inv\.skyn3t\.in',
  316. r'(?:www\.)?tube\.poal\.co',
  317. r'(?:www\.)?tube\.connect\.cafe',
  318. r'(?:www\.)?vid\.wxzm\.sx',
  319. r'(?:www\.)?vid\.mint\.lgbt',
  320. r'(?:www\.)?vid\.puffyan\.us',
  321. r'(?:www\.)?yewtu\.be',
  322. r'(?:www\.)?yt\.elukerio\.org',
  323. r'(?:www\.)?yt\.lelux\.fi',
  324. r'(?:www\.)?invidious\.ggc-project\.de',
  325. r'(?:www\.)?yt\.maisputain\.ovh',
  326. r'(?:www\.)?ytprivate\.com',
  327. r'(?:www\.)?invidious\.13ad\.de',
  328. r'(?:www\.)?invidious\.toot\.koeln',
  329. r'(?:www\.)?invidious\.fdn\.fr',
  330. r'(?:www\.)?watch\.nettohikari\.com',
  331. r'(?:www\.)?invidious\.namazso\.eu',
  332. r'(?:www\.)?invidious\.silkky\.cloud',
  333. r'(?:www\.)?invidious\.exonip\.de',
  334. r'(?:www\.)?invidious\.riverside\.rocks',
  335. r'(?:www\.)?invidious\.blamefran\.net',
  336. r'(?:www\.)?invidious\.moomoo\.de',
  337. r'(?:www\.)?ytb\.trom\.tf',
  338. r'(?:www\.)?yt\.cyberhost\.uk',
  339. r'(?:www\.)?kgg2m7yk5aybusll\.onion',
  340. r'(?:www\.)?qklhadlycap4cnod\.onion',
  341. r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
  342. r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
  343. r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
  344. r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
  345. r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
  346. r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
  347. r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
  348. r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
  349. r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
  350. r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
  351. # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
  352. r'(?:www\.)?piped\.kavin\.rocks',
  353. r'(?:www\.)?piped\.tokhmi\.xyz',
  354. r'(?:www\.)?piped\.syncpundit\.io',
  355. r'(?:www\.)?piped\.mha\.fi',
  356. r'(?:www\.)?watch\.whatever\.social',
  357. r'(?:www\.)?piped\.garudalinux\.org',
  358. r'(?:www\.)?piped\.rivo\.lol',
  359. r'(?:www\.)?piped-libre\.kavin\.rocks',
  360. r'(?:www\.)?yt\.jae\.fi',
  361. r'(?:www\.)?piped\.mint\.lgbt',
  362. r'(?:www\.)?il\.ax',
  363. r'(?:www\.)?piped\.esmailelbob\.xyz',
  364. r'(?:www\.)?piped\.projectsegfau\.lt',
  365. r'(?:www\.)?piped\.privacydev\.net',
  366. r'(?:www\.)?piped\.palveluntarjoaja\.eu',
  367. r'(?:www\.)?piped\.smnz\.de',
  368. r'(?:www\.)?piped\.adminforge\.de',
  369. r'(?:www\.)?watch\.whatevertinfoil\.de',
  370. r'(?:www\.)?piped\.qdi\.fi',
  371. r'(?:www\.)?piped\.video',
  372. r'(?:www\.)?piped\.aeong\.one',
  373. )
  374. # extracted from account/account_menu ep
  375. # XXX: These are the supported YouTube UI and API languages,
  376. # which is slightly different from languages supported for translation in YouTube studio
  377. _SUPPORTED_LANG_CODES = [
  378. 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
  379. 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
  380. 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
  381. 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
  382. 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
  383. 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
  384. ]
  385. _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
  386. @functools.cached_property
  387. def _preferred_lang(self):
  388. """
  389. Returns a language code supported by YouTube for the user preferred language.
  390. Returns None if no preferred language set.
  391. """
  392. preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
  393. if not preferred_lang:
  394. return
  395. if preferred_lang not in self._SUPPORTED_LANG_CODES:
  396. raise ExtractorError(
  397. f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
  398. expected=True)
  399. elif preferred_lang != 'en':
  400. self.report_warning(
  401. f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
  402. return preferred_lang
  403. def _initialize_consent(self):
  404. cookies = self._get_cookies('https://www.youtube.com/')
  405. if cookies.get('__Secure-3PSID'):
  406. return
  407. consent_id = None
  408. consent = cookies.get('CONSENT')
  409. if consent:
  410. if 'YES' in consent.value:
  411. return
  412. consent_id = self._search_regex(
  413. r'PENDING\+(\d+)', consent.value, 'consent', default=None)
  414. if not consent_id:
  415. consent_id = random.randint(100, 999)
  416. self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
  417. def _initialize_pref(self):
  418. cookies = self._get_cookies('https://www.youtube.com/')
  419. pref_cookie = cookies.get('PREF')
  420. pref = {}
  421. if pref_cookie:
  422. try:
  423. pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
  424. except ValueError:
  425. self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
  426. pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
  427. self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
  428. def _real_initialize(self):
  429. self._initialize_pref()
  430. self._initialize_consent()
  431. self._check_login_required()
  432. def _check_login_required(self):
  433. if self._LOGIN_REQUIRED and not self._cookies_passed:
  434. self.raise_login_required('Login details are needed to download this content', method='cookies')
  435. _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
  436. _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
  437. def _get_default_ytcfg(self, client='web'):
  438. return copy.deepcopy(INNERTUBE_CLIENTS[client])
  439. def _get_innertube_host(self, client='web'):
  440. return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
  441. def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
  442. # try_get but with fallback to default ytcfg client values when present
  443. _func = lambda y: try_get(y, getter, expected_type)
  444. return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
  445. def _extract_client_name(self, ytcfg, default_client='web'):
  446. return self._ytcfg_get_safe(
  447. ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
  448. lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
  449. def _extract_client_version(self, ytcfg, default_client='web'):
  450. return self._ytcfg_get_safe(
  451. ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
  452. lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
  453. def _select_api_hostname(self, req_api_hostname, default_client=None):
  454. return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
  455. or req_api_hostname or self._get_innertube_host(default_client or 'web'))
  456. def _extract_api_key(self, ytcfg=None, default_client='web'):
  457. return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
  458. def _extract_context(self, ytcfg=None, default_client='web'):
  459. context = get_first(
  460. (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
  461. # Enforce language and tz for extraction
  462. client_context = traverse_obj(context, 'client', expected_type=dict, default={})
  463. client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
  464. return context
  465. _SAPISID = None
  466. def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
  467. time_now = round(time.time())
  468. if self._SAPISID is None:
  469. yt_cookies = self._get_cookies('https://www.youtube.com')
  470. # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
  471. # See: https://github.com/hypervideo/hypervideo/issues/393
  472. sapisid_cookie = dict_get(
  473. yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
  474. if sapisid_cookie and sapisid_cookie.value:
  475. self._SAPISID = sapisid_cookie.value
  476. self.write_debug('Extracted SAPISID cookie')
  477. # SAPISID cookie is required if not already present
  478. if not yt_cookies.get('SAPISID'):
  479. self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
  480. self._set_cookie(
  481. '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
  482. else:
  483. self._SAPISID = False
  484. if not self._SAPISID:
  485. return None
  486. # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
  487. sapisidhash = hashlib.sha1(
  488. f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
  489. return f'SAPISIDHASH {time_now}_{sapisidhash}'
  490. def _call_api(self, ep, query, video_id, fatal=True, headers=None,
  491. note='Downloading API JSON', errnote='Unable to download API page',
  492. context=None, api_key=None, api_hostname=None, default_client='web'):
  493. data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
  494. data.update(query)
  495. real_headers = self.generate_api_headers(default_client=default_client)
  496. real_headers.update({'content-type': 'application/json'})
  497. if headers:
  498. real_headers.update(headers)
  499. api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
  500. or api_key or self._extract_api_key(default_client=default_client))
  501. return self._download_json(
  502. f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
  503. video_id=video_id, fatal=fatal, note=note, errnote=errnote,
  504. data=json.dumps(data).encode('utf8'), headers=real_headers,
  505. query={'key': api_key, 'prettyPrint': 'false'})
  506. def extract_yt_initial_data(self, item_id, webpage, fatal=True):
  507. return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
  508. @staticmethod
  509. def _extract_session_index(*data):
  510. """
  511. Index of current account in account list.
  512. See: https://github.com/hypervideo/hypervideo/pull/519
  513. """
  514. for ytcfg in data:
  515. session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
  516. if session_index is not None:
  517. return session_index
  518. # Deprecated?
  519. def _extract_identity_token(self, ytcfg=None, webpage=None):
  520. if ytcfg:
  521. token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
  522. if token:
  523. return token
  524. if webpage:
  525. return self._search_regex(
  526. r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
  527. 'identity token', default=None, fatal=False)
  528. @staticmethod
  529. def _extract_account_syncid(*args):
  530. """
  531. Extract syncId required to download private playlists of secondary channels
  532. @params response and/or ytcfg
  533. """
  534. for data in args:
  535. # ytcfg includes channel_syncid if on secondary channel
  536. delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
  537. if delegated_sid:
  538. return delegated_sid
  539. sync_ids = (try_get(
  540. data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
  541. lambda x: x['DATASYNC_ID']), str) or '').split('||')
  542. if len(sync_ids) >= 2 and sync_ids[1]:
  543. # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
  544. # and just "user_syncid||" for primary channel. We only want the channel_syncid
  545. return sync_ids[0]
  546. @staticmethod
  547. def _extract_visitor_data(*args):
  548. """
  549. Extracts visitorData from an API response or ytcfg
  550. Appears to be used to track session state
  551. """
  552. return get_first(
  553. args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
  554. expected_type=str)
  555. @functools.cached_property
  556. def is_authenticated(self):
  557. return bool(self._generate_sapisidhash_header())
  558. def extract_ytcfg(self, video_id, webpage):
  559. if not webpage:
  560. return {}
  561. return self._parse_json(
  562. self._search_regex(
  563. r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
  564. default='{}'), video_id, fatal=False) or {}
  565. def generate_api_headers(
  566. self, *, ytcfg=None, account_syncid=None, session_index=None,
  567. visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
  568. origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
  569. headers = {
  570. 'X-YouTube-Client-Name': str(
  571. self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
  572. 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
  573. 'Origin': origin,
  574. 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
  575. 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
  576. 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
  577. 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
  578. }
  579. if session_index is None:
  580. session_index = self._extract_session_index(ytcfg)
  581. if account_syncid or session_index is not None:
  582. headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
  583. auth = self._generate_sapisidhash_header(origin)
  584. if auth is not None:
  585. headers['Authorization'] = auth
  586. headers['X-Origin'] = origin
  587. return filter_dict(headers)
  588. def _download_ytcfg(self, client, video_id):
  589. url = {
  590. 'web': 'https://www.youtube.com',
  591. 'web_music': 'https://music.youtube.com',
  592. 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
  593. }.get(client)
  594. if not url:
  595. return {}
  596. webpage = self._download_webpage(
  597. url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
  598. return self.extract_ytcfg(video_id, webpage) or {}
  599. @staticmethod
  600. def _build_api_continuation_query(continuation, ctp=None):
  601. query = {
  602. 'continuation': continuation
  603. }
  604. # TODO: Inconsistency with clickTrackingParams.
  605. # Currently we have a fixed ctp contained within context (from ytcfg)
  606. # and a ctp in root query for continuation.
  607. if ctp:
  608. query['clickTracking'] = {'clickTrackingParams': ctp}
  609. return query
  610. @classmethod
  611. def _extract_next_continuation_data(cls, renderer):
  612. next_continuation = try_get(
  613. renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
  614. lambda x: x['continuation']['reloadContinuationData']), dict)
  615. if not next_continuation:
  616. return
  617. continuation = next_continuation.get('continuation')
  618. if not continuation:
  619. return
  620. ctp = next_continuation.get('clickTrackingParams')
  621. return cls._build_api_continuation_query(continuation, ctp)
  622. @classmethod
  623. def _extract_continuation_ep_data(cls, continuation_ep: dict):
  624. if isinstance(continuation_ep, dict):
  625. continuation = try_get(
  626. continuation_ep, lambda x: x['continuationCommand']['token'], str)
  627. if not continuation:
  628. return
  629. ctp = continuation_ep.get('clickTrackingParams')
  630. return cls._build_api_continuation_query(continuation, ctp)
  631. @classmethod
  632. def _extract_continuation(cls, renderer):
  633. next_continuation = cls._extract_next_continuation_data(renderer)
  634. if next_continuation:
  635. return next_continuation
  636. return traverse_obj(renderer, (
  637. ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
  638. ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
  639. ), get_all=False, expected_type=cls._extract_continuation_ep_data)
  640. @classmethod
  641. def _extract_alerts(cls, data):
  642. for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
  643. if not isinstance(alert_dict, dict):
  644. continue
  645. for alert in alert_dict.values():
  646. alert_type = alert.get('type')
  647. if not alert_type:
  648. continue
  649. message = cls._get_text(alert, 'text')
  650. if message:
  651. yield alert_type, message
  652. def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
  653. errors, warnings = [], []
  654. for alert_type, alert_message in alerts:
  655. if alert_type.lower() == 'error' and fatal:
  656. errors.append([alert_type, alert_message])
  657. elif alert_message not in self._IGNORED_WARNINGS:
  658. warnings.append([alert_type, alert_message])
  659. for alert_type, alert_message in (warnings + errors[:-1]):
  660. self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
  661. if errors:
  662. raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
  663. def _extract_and_report_alerts(self, data, *args, **kwargs):
  664. return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
  665. def _extract_badges(self, renderer: dict):
  666. privacy_icon_map = {
  667. 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
  668. 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
  669. 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
  670. }
  671. badge_style_map = {
  672. 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
  673. 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
  674. 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
  675. }
  676. label_map = {
  677. 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
  678. 'private': BadgeType.AVAILABILITY_PRIVATE,
  679. 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
  680. 'live': BadgeType.LIVE_NOW,
  681. 'premium': BadgeType.AVAILABILITY_PREMIUM
  682. }
  683. badges = []
  684. for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
  685. badge_type = (
  686. privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
  687. or badge_style_map.get(traverse_obj(badge, 'style'))
  688. )
  689. if badge_type:
  690. badges.append({'type': badge_type})
  691. continue
  692. # fallback, won't work in some languages
  693. label = traverse_obj(badge, 'label', expected_type=str, default='')
  694. for match, label_badge_type in label_map.items():
  695. if match in label.lower():
  696. badges.append({'type': badge_type})
  697. continue
  698. return badges
  699. @staticmethod
  700. def _has_badge(badges, badge_type):
  701. return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
  702. @staticmethod
  703. def _get_text(data, *path_list, max_runs=None):
  704. for path in path_list or [None]:
  705. if path is None:
  706. obj = [data]
  707. else:
  708. obj = traverse_obj(data, path, default=[])
  709. if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
  710. obj = [obj]
  711. for item in obj:
  712. text = try_get(item, lambda x: x['simpleText'], str)
  713. if text:
  714. return text
  715. runs = try_get(item, lambda x: x['runs'], list) or []
  716. if not runs and isinstance(item, list):
  717. runs = item
  718. runs = runs[:min(len(runs), max_runs or len(runs))]
  719. text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
  720. if text:
  721. return text
  722. def _get_count(self, data, *path_list):
  723. count_text = self._get_text(data, *path_list) or ''
  724. count = parse_count(count_text)
  725. if count is None:
  726. count = str_to_int(
  727. self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
  728. return count
  729. @staticmethod
  730. def _extract_thumbnails(data, *path_list):
  731. """
  732. Extract thumbnails from thumbnails dict
  733. @param path_list: path list to level that contains 'thumbnails' key
  734. """
  735. thumbnails = []
  736. for path in path_list or [()]:
  737. for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
  738. thumbnail_url = url_or_none(thumbnail.get('url'))
  739. if not thumbnail_url:
  740. continue
  741. # Sometimes youtube gives a wrong thumbnail URL. See:
  742. # https://github.com/hypervideo/hypervideo/issues/233
  743. # https://github.com/ytdl-org/youtube-dl/issues/28023
  744. if 'maxresdefault' in thumbnail_url:
  745. thumbnail_url = thumbnail_url.split('?')[0]
  746. thumbnails.append({
  747. 'url': thumbnail_url,
  748. 'height': int_or_none(thumbnail.get('height')),
  749. 'width': int_or_none(thumbnail.get('width')),
  750. })
  751. return thumbnails
  752. @staticmethod
  753. def extract_relative_time(relative_time_text):
  754. """
  755. Extracts a relative time from string and converts to dt object
  756. e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
  757. """
  758. mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
  759. if mobj:
  760. start = mobj.group('start')
  761. if start:
  762. return datetime_from_str(start)
  763. try:
  764. return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
  765. except ValueError:
  766. return None
  767. def _parse_time_text(self, text):
  768. if not text:
  769. return
  770. dt = self.extract_relative_time(text)
  771. timestamp = None
  772. if isinstance(dt, datetime.datetime):
  773. timestamp = calendar.timegm(dt.timetuple())
  774. if timestamp is None:
  775. timestamp = (
  776. unified_timestamp(text) or unified_timestamp(
  777. self._search_regex(
  778. (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
  779. text.lower(), 'time text', default=None)))
  780. if text and timestamp is None and self._preferred_lang in (None, 'en'):
  781. self.report_warning(
  782. f'Cannot parse localized time text "{text}"', only_once=True)
  783. return timestamp
  784. def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
  785. ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
  786. default_client='web'):
  787. for retry in self.RetryManager():
  788. try:
  789. response = self._call_api(
  790. ep=ep, fatal=True, headers=headers,
  791. video_id=item_id, query=query, note=note,
  792. context=self._extract_context(ytcfg, default_client),
  793. api_key=self._extract_api_key(ytcfg, default_client),
  794. api_hostname=api_hostname, default_client=default_client)
  795. except ExtractorError as e:
  796. if not isinstance(e.cause, network_exceptions):
  797. return self._error_or_warning(e, fatal=fatal)
  798. elif not isinstance(e.cause, urllib.error.HTTPError):
  799. retry.error = e
  800. continue
  801. first_bytes = e.cause.read(512)
  802. if not is_html(first_bytes):
  803. yt_error = try_get(
  804. self._parse_json(
  805. self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
  806. lambda x: x['error']['message'], str)
  807. if yt_error:
  808. self._report_alerts([('ERROR', yt_error)], fatal=False)
  809. # Downloading page may result in intermittent 5xx HTTP error
  810. # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
  811. # We also want to catch all other network exceptions since errors in later pages can be troublesome
  812. # See https://github.com/hypervideo/hypervideo/issues/507#issuecomment-880188210
  813. if e.cause.code not in (403, 429):
  814. retry.error = e
  815. continue
  816. return self._error_or_warning(e, fatal=fatal)
  817. try:
  818. self._extract_and_report_alerts(response, only_once=True)
  819. except ExtractorError as e:
  820. # YouTube servers may return errors we want to retry on in a 200 OK response
  821. # See: https://github.com/hypervideo/hypervideo/issues/839
  822. if 'unknown error' in e.msg.lower():
  823. retry.error = e
  824. continue
  825. return self._error_or_warning(e, fatal=fatal)
  826. # Youtube sometimes sends incomplete data
  827. # See: https://github.com/ytdl-org/youtube-dl/issues/28194
  828. if not traverse_obj(response, *variadic(check_get_keys)):
  829. retry.error = ExtractorError('Incomplete data received', expected=True)
  830. continue
  831. return response
  832. @staticmethod
  833. def is_music_url(url):
  834. return re.match(r'https?://music\.youtube\.com/', url) is not None
  835. def _extract_video(self, renderer):
  836. video_id = renderer.get('videoId')
  837. reel_header_renderer = traverse_obj(renderer, (
  838. 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
  839. 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
  840. title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
  841. description = self._get_text(renderer, 'descriptionSnippet')
  842. duration = int_or_none(renderer.get('lengthSeconds'))
  843. if duration is None:
  844. duration = parse_duration(self._get_text(
  845. renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
  846. if duration is None:
  847. # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
  848. duration = parse_duration(self._search_regex(
  849. r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
  850. traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
  851. video_id, default=None, group='duration'))
  852. channel_id = traverse_obj(
  853. renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
  854. expected_type=str, get_all=False)
  855. if not channel_id:
  856. channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
  857. overlay_style = traverse_obj(
  858. renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
  859. get_all=False, expected_type=str)
  860. badges = self._extract_badges(renderer)
  861. navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
  862. renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
  863. expected_type=str)) or ''
  864. url = f'https://www.youtube.com/watch?v={video_id}'
  865. if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
  866. url = f'https://www.youtube.com/shorts/{video_id}'
  867. time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
  868. or self._get_text(reel_header_renderer, 'timestampText') or '')
  869. scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
  870. live_status = (
  871. 'is_upcoming' if scheduled_timestamp is not None
  872. else 'was_live' if 'streamed' in time_text.lower()
  873. else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
  874. else None)
  875. # videoInfo is a string like '50K views • 10 years ago'.
  876. view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
  877. view_count = (0 if 'no views' in view_count_text.lower()
  878. else self._get_count({'simpleText': view_count_text}))
  879. view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
  880. return {
  881. '_type': 'url',
  882. 'ie_key': YoutubeIE.ie_key(),
  883. 'id': video_id,
  884. 'url': url,
  885. 'title': title,
  886. 'description': description,
  887. 'duration': duration,
  888. 'channel_id': channel_id,
  889. 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
  890. or self._get_text(reel_header_renderer, 'channelTitleText')),
  891. 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
  892. 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
  893. 'timestamp': (self._parse_time_text(time_text)
  894. if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
  895. else None),
  896. 'release_timestamp': scheduled_timestamp,
  897. 'availability':
  898. 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
  899. else self._availability(
  900. is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
  901. needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
  902. needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
  903. is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
  904. view_count_field: view_count,
  905. 'live_status': live_status
  906. }
  907. class YoutubeIE(YoutubeBaseInfoExtractor):
  908. IE_DESC = 'YouTube'
  909. _VALID_URL = r"""(?x)^
  910. (
  911. (?:https?://|//) # http(s):// or protocol-independent URL
  912. (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
  913. (?:www\.)?deturl\.com/www\.youtube\.com|
  914. (?:www\.)?pwnyoutube\.com|
  915. (?:www\.)?hooktube\.com|
  916. (?:www\.)?yourepeat\.com|
  917. tube\.majestyc\.net|
  918. %(invidious)s|
  919. youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
  920. (?:.*?\#/)? # handle anchor (#/) redirect urls
  921. (?: # the various things that can precede the ID:
  922. (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
  923. |(?: # or the v= param in all its forms
  924. (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
  925. (?:\?|\#!?) # the params delimiter ? or # or #!
  926. (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
  927. v=
  928. )
  929. ))
  930. |(?:
  931. youtu\.be| # just youtu.be/xxxx
  932. vid\.plus| # or vid.plus/xxxx
  933. zwearz\.com/watch| # or zwearz.com/watch/xxxx
  934. %(invidious)s
  935. )/
  936. |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
  937. )
  938. )? # all until now is optional -> you can pass the naked ID
  939. (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
  940. (?(1).+)? # if we found the ID, everything can follow
  941. (?:\#|$)""" % {
  942. 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
  943. }
  944. _EMBED_REGEX = [
  945. r'''(?x)
  946. (?:
  947. <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
  948. data-video-url=|
  949. <embed[^>]+?src=|
  950. embedSWF\(?:\s*|
  951. <object[^>]+data=|
  952. new\s+SWFObject\(
  953. )
  954. (["\'])
  955. (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
  956. (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
  957. \1''',
  958. # https://wordpress.org/plugins/lazy-load-for-videos/
  959. r'''(?xs)
  960. <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
  961. \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
  962. ]
  963. _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
  964. _PLAYER_INFO_RE = (
  965. r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
  966. r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
  967. r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
  968. )
  969. _formats = {
  970. '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
  971. '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
  972. '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
  973. '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
  974. '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
  975. '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  976. '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  977. '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  978. # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
  979. '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
  980. '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  981. '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  982. '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
  983. '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
  984. '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
  985. '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
  986. '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  987. '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  988. # 3D videos
  989. '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
  990. '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
  991. '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
  992. '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
  993. '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
  994. '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
  995. '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
  996. # Apple HTTP Live Streaming
  997. '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  998. '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  999. '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
  1000. '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
  1001. '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
  1002. '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
  1003. '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  1004. '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
  1005. # DASH mp4 video
  1006. '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1007. '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1008. '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1009. '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1010. '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1011. '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
  1012. '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1013. '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1014. '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1015. '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
  1016. '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
  1017. '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
  1018. # Dash mp4 audio
  1019. '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
  1020. '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
  1021. '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
  1022. '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
  1023. '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
  1024. '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
  1025. '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
  1026. # Dash webm
  1027. '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1028. '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1029. '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1030. '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1031. '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1032. '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
  1033. '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
  1034. '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1035. '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1036. '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1037. '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1038. '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1039. '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1040. '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1041. '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1042. # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
  1043. '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1044. '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1045. '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1046. '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1047. '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
  1048. '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
  1049. # Dash webm audio
  1050. '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
  1051. '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
  1052. # Dash webm audio with opus inside
  1053. '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
  1054. '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
  1055. '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
  1056. # RTMP (unnamed)
  1057. '_rtmp': {'protocol': 'rtmp'},
  1058. # av01 video only formats sometimes served with "unknown" codecs
  1059. '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
  1060. '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
  1061. '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
  1062. '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
  1063. '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
  1064. '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
  1065. '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
  1066. '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
  1067. }
  1068. _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
  1069. _GEO_BYPASS = False
  1070. IE_NAME = 'youtube'
  1071. _TESTS = [
  1072. {
  1073. 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
  1074. 'info_dict': {
  1075. 'id': 'BaW_jenozKc',
  1076. 'ext': 'mp4',
  1077. 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
  1078. 'uploader': 'Philipp Hagemeister',
  1079. 'uploader_id': 'phihag',
  1080. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
  1081. 'channel': 'Philipp Hagemeister',
  1082. 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
  1083. 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
  1084. 'upload_date': '20121002',
  1085. 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
  1086. 'categories': ['Science & Technology'],
  1087. 'tags': ['youtube-dl'],
  1088. 'duration': 10,
  1089. 'view_count': int,
  1090. 'like_count': int,
  1091. 'availability': 'public',
  1092. 'playable_in_embed': True,
  1093. 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
  1094. 'live_status': 'not_live',
  1095. 'age_limit': 0,
  1096. 'start_time': 1,
  1097. 'end_time': 9,
  1098. 'comment_count': int,
  1099. 'channel_follower_count': int
  1100. }
  1101. },
  1102. {
  1103. 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
  1104. 'note': 'Embed-only video (#1746)',
  1105. 'info_dict': {
  1106. 'id': 'yZIXLfi8CZQ',
  1107. 'ext': 'mp4',
  1108. 'upload_date': '20120608',
  1109. 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
  1110. 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
  1111. 'uploader': 'SET India',
  1112. 'uploader_id': 'setindia',
  1113. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
  1114. 'age_limit': 18,
  1115. },
  1116. 'skip': 'Private video',
  1117. },
  1118. {
  1119. 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
  1120. 'note': 'Use the first video ID in the URL',
  1121. 'info_dict': {
  1122. 'id': 'BaW_jenozKc',
  1123. 'ext': 'mp4',
  1124. 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
  1125. 'uploader': 'Philipp Hagemeister',
  1126. 'uploader_id': 'phihag',
  1127. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
  1128. 'channel': 'Philipp Hagemeister',
  1129. 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
  1130. 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
  1131. 'upload_date': '20121002',
  1132. 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
  1133. 'categories': ['Science & Technology'],
  1134. 'tags': ['youtube-dl'],
  1135. 'duration': 10,
  1136. 'view_count': int,
  1137. 'like_count': int,
  1138. 'availability': 'public',
  1139. 'playable_in_embed': True,
  1140. 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
  1141. 'live_status': 'not_live',
  1142. 'age_limit': 0,
  1143. 'comment_count': int,
  1144. 'channel_follower_count': int
  1145. },
  1146. 'params': {
  1147. 'skip_download': True,
  1148. },
  1149. },
  1150. {
  1151. 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
  1152. 'note': '256k DASH audio (format 141) via DASH manifest',
  1153. 'info_dict': {
  1154. 'id': 'a9LDPn-MO4I',
  1155. 'ext': 'm4a',
  1156. 'upload_date': '20121002',
  1157. 'uploader_id': '8KVIDEO',
  1158. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
  1159. 'description': '',
  1160. 'uploader': '8KVIDEO',
  1161. 'title': 'UHDTV TEST 8K VIDEO.mp4'
  1162. },
  1163. 'params': {
  1164. 'youtube_include_dash_manifest': True,
  1165. 'format': '141',
  1166. },
  1167. 'skip': 'format 141 not served anymore',
  1168. },
  1169. # DASH manifest with encrypted signature
  1170. {
  1171. 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
  1172. 'info_dict': {
  1173. 'id': 'IB3lcPjvWLA',
  1174. 'ext': 'm4a',
  1175. 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
  1176. 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
  1177. 'duration': 244,
  1178. 'uploader': 'AfrojackVEVO',
  1179. 'uploader_id': 'AfrojackVEVO',
  1180. 'upload_date': '20131011',
  1181. 'abr': 129.495,
  1182. 'like_count': int,
  1183. 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
  1184. 'playable_in_embed': True,
  1185. 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
  1186. 'view_count': int,
  1187. 'track': 'The Spark',
  1188. 'live_status': 'not_live',
  1189. 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
  1190. 'channel': 'Afrojack',
  1191. 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
  1192. 'tags': 'count:19',
  1193. 'availability': 'public',
  1194. 'categories': ['Music'],
  1195. 'age_limit': 0,
  1196. 'alt_title': 'The Spark',
  1197. 'channel_follower_count': int
  1198. },
  1199. 'params': {
  1200. 'youtube_include_dash_manifest': True,
  1201. 'format': '141/bestaudio[ext=m4a]',
  1202. },
  1203. },
  1204. # Age-gate videos. See https://github.com/hypervideo/hypervideo/pull/575#issuecomment-888837000
  1205. {
  1206. 'note': 'Embed allowed age-gate video',
  1207. 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
  1208. 'info_dict': {
  1209. 'id': 'HtVdAasjOgU',
  1210. 'ext': 'mp4',
  1211. 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
  1212. 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
  1213. 'duration': 142,
  1214. 'uploader': 'The Witcher',
  1215. 'uploader_id': 'WitcherGame',
  1216. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
  1217. 'upload_date': '20140605',
  1218. 'age_limit': 18,
  1219. 'categories': ['Gaming'],
  1220. 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
  1221. 'availability': 'needs_auth',
  1222. 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
  1223. 'like_count': int,
  1224. 'channel': 'The Witcher',
  1225. 'live_status': 'not_live',
  1226. 'tags': 'count:17',
  1227. 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
  1228. 'playable_in_embed': True,
  1229. 'view_count': int,
  1230. 'channel_follower_count': int
  1231. },
  1232. },
  1233. {
  1234. 'note': 'Age-gate video with embed allowed in public site',
  1235. 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
  1236. 'info_dict': {
  1237. 'id': 'HsUATh_Nc2U',
  1238. 'ext': 'mp4',
  1239. 'title': 'Godzilla 2 (Official Video)',
  1240. 'description': 'md5:bf77e03fcae5529475e500129b05668a',
  1241. 'upload_date': '20200408',
  1242. 'uploader_id': 'FlyingKitty900',
  1243. 'uploader': 'FlyingKitty',
  1244. 'age_limit': 18,
  1245. 'availability': 'needs_auth',
  1246. 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
  1247. 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
  1248. 'channel': 'FlyingKitty',
  1249. 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
  1250. 'view_count': int,
  1251. 'categories': ['Entertainment'],
  1252. 'live_status': 'not_live',
  1253. 'tags': ['Flyingkitty', 'godzilla 2'],
  1254. 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
  1255. 'like_count': int,
  1256. 'duration': 177,
  1257. 'playable_in_embed': True,
  1258. 'channel_follower_count': int
  1259. },
  1260. },
  1261. {
  1262. 'note': 'Age-gate video embedable only with clientScreen=EMBED',
  1263. 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
  1264. 'info_dict': {
  1265. 'id': 'Tq92D6wQ1mg',
  1266. 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
  1267. 'ext': 'mp4',
  1268. 'upload_date': '20191228',
  1269. 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
  1270. 'uploader': 'Projekt Melody',
  1271. 'description': 'md5:17eccca93a786d51bc67646756894066',
  1272. 'age_limit': 18,
  1273. 'like_count': int,
  1274. 'availability': 'needs_auth',
  1275. 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
  1276. 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
  1277. 'view_count': int,
  1278. 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
  1279. 'channel': 'Projekt Melody',
  1280. 'live_status': 'not_live',
  1281. 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
  1282. 'playable_in_embed': True,
  1283. 'categories': ['Entertainment'],
  1284. 'duration': 106,
  1285. 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
  1286. 'comment_count': int,
  1287. 'channel_follower_count': int
  1288. },
  1289. },
  1290. {
  1291. 'note': 'Non-Agegated non-embeddable video',
  1292. 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
  1293. 'info_dict': {
  1294. 'id': 'MeJVWBSsPAY',
  1295. 'ext': 'mp4',
  1296. 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
  1297. 'uploader': 'Herr Lurik',
  1298. 'uploader_id': 'st3in234',
  1299. 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
  1300. 'upload_date': '20130730',
  1301. 'track': 'Such mich find mich',
  1302. 'age_limit': 0,
  1303. 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
  1304. 'like_count': int,
  1305. 'playable_in_embed': False,
  1306. 'creator': 'OOMPH!',
  1307. 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
  1308. 'view_count': int,
  1309. 'alt_title': 'Such mich find mich',
  1310. 'duration': 210,
  1311. 'channel': 'Herr Lurik',
  1312. 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
  1313. 'categories': ['Music'],
  1314. 'availability': 'public',
  1315. 'uploader_url': 'http://www.youtube.com/user/st3in234',
  1316. 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
  1317. 'live_status': 'not_live',
  1318. 'artist': 'OOMPH!',
  1319. 'channel_follower_count': int
  1320. },
  1321. },
  1322. {
  1323. 'note': 'Non-bypassable age-gated video',
  1324. 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
  1325. 'only_matching': True,
  1326. },
  1327. # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
  1328. # YouTube Red ad is not captured for creator
  1329. {
  1330. 'url': '__2ABJjxzNo',
  1331. 'info_dict': {
  1332. 'id': '__2ABJjxzNo',
  1333. 'ext': 'mp4',
  1334. 'duration': 266,
  1335. 'upload_date': '20100430',
  1336. 'uploader_id': 'deadmau5',
  1337. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
  1338. 'creator': 'deadmau5',
  1339. 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
  1340. 'uploader': 'deadmau5',
  1341. 'title': 'Deadmau5 - Some Chords (HD)',
  1342. 'alt_title': 'Some Chords',
  1343. 'availability': 'public',
  1344. 'tags': 'count:14',
  1345. 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
  1346. 'view_count': int,
  1347. 'live_status': 'not_live',
  1348. 'channel': 'deadmau5',
  1349. 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
  1350. 'like_count': int,
  1351. 'track': 'Some Chords',
  1352. 'artist': 'deadmau5',
  1353. 'playable_in_embed': True,
  1354. 'age_limit': 0,
  1355. 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
  1356. 'categories': ['Music'],
  1357. 'album': 'Some Chords',
  1358. 'channel_follower_count': int
  1359. },
  1360. 'expected_warnings': [
  1361. 'DASH manifest missing',
  1362. ]
  1363. },
  1364. # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
  1365. {
  1366. 'url': 'lqQg6PlCWgI',
  1367. 'info_dict': {
  1368. 'id': 'lqQg6PlCWgI',
  1369. 'ext': 'mp4',
  1370. 'duration': 6085,
  1371. 'upload_date': '20150827',
  1372. 'uploader_id': 'olympic',
  1373. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
  1374. 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
  1375. 'uploader': 'Olympics',
  1376. 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
  1377. 'like_count': int,
  1378. 'release_timestamp': 1343767800,
  1379. 'playable_in_embed': True,
  1380. 'categories': ['Sports'],
  1381. 'release_date': '20120731',
  1382. 'channel': 'Olympics',
  1383. 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
  1384. 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
  1385. 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
  1386. 'age_limit': 0,
  1387. 'availability': 'public',
  1388. 'live_status': 'was_live',
  1389. 'view_count': int,
  1390. 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
  1391. 'channel_follower_count': int
  1392. },
  1393. 'params': {
  1394. 'skip_download': 'requires avconv',
  1395. }
  1396. },
  1397. # Non-square pixels
  1398. {
  1399. 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
  1400. 'info_dict': {
  1401. 'id': '_b-2C3KPAM0',
  1402. 'ext': 'mp4',
  1403. 'stretched_ratio': 16 / 9.,
  1404. 'duration': 85,
  1405. 'upload_date': '20110310',
  1406. 'uploader_id': 'AllenMeow',
  1407. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
  1408. 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
  1409. 'uploader': '孫ᄋᄅ',
  1410. 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
  1411. 'playable_in_embed': True,
  1412. 'channel': '孫ᄋᄅ',
  1413. 'age_limit': 0,
  1414. 'tags': 'count:11',
  1415. 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
  1416. 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
  1417. 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
  1418. 'view_count': int,
  1419. 'categories': ['People & Blogs'],
  1420. 'like_count': int,
  1421. 'live_status': 'not_live',
  1422. 'availability': 'unlisted',
  1423. 'comment_count': int,
  1424. 'channel_follower_count': int
  1425. },
  1426. },
  1427. # url_encoded_fmt_stream_map is empty string
  1428. {
  1429. 'url': 'qEJwOuvDf7I',
  1430. 'info_dict': {
  1431. 'id': 'qEJwOuvDf7I',
  1432. 'ext': 'webm',
  1433. 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
  1434. 'description': '',
  1435. 'upload_date': '20150404',
  1436. 'uploader_id': 'spbelect',
  1437. 'uploader': 'Наблюдатели Петербурга',
  1438. },
  1439. 'params': {
  1440. 'skip_download': 'requires avconv',
  1441. },
  1442. 'skip': 'This live event has ended.',
  1443. },
  1444. # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
  1445. {
  1446. 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
  1447. 'info_dict': {
  1448. 'id': 'FIl7x6_3R5Y',
  1449. 'ext': 'webm',
  1450. 'title': 'md5:7b81415841e02ecd4313668cde88737a',
  1451. 'description': 'md5:116377fd2963b81ec4ce64b542173306',
  1452. 'duration': 220,
  1453. 'upload_date': '20150625',
  1454. 'uploader_id': 'dorappi2000',
  1455. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
  1456. 'uploader': 'dorappi2000',
  1457. 'formats': 'mincount:31',
  1458. },
  1459. 'skip': 'not actual anymore',
  1460. },
  1461. # DASH manifest with segment_list
  1462. {
  1463. 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
  1464. 'md5': '8ce563a1d667b599d21064e982ab9e31',
  1465. 'info_dict': {
  1466. 'id': 'CsmdDsKjzN8',
  1467. 'ext': 'mp4',
  1468. 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
  1469. 'uploader': 'Airtek',
  1470. 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
  1471. 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
  1472. 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
  1473. },
  1474. 'params': {
  1475. 'youtube_include_dash_manifest': True,
  1476. 'format': '135', # bestvideo
  1477. },
  1478. 'skip': 'This live event has ended.',
  1479. },
  1480. {
  1481. # Multifeed videos (multiple cameras), URL can be of any Camera
  1482. 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
  1483. 'info_dict': {
  1484. 'id': 'zaPI8MvL8pg',
  1485. 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
  1486. 'description': 'md5:563ccbc698b39298481ca3c571169519',
  1487. },
  1488. 'playlist': [{
  1489. 'info_dict': {
  1490. 'id': 'j5yGuxZ8lLU',
  1491. 'ext': 'mp4',
  1492. 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
  1493. 'uploader': 'WiiLikeToPlay',
  1494. 'description': 'md5:563ccbc698b39298481ca3c571169519',
  1495. 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
  1496. 'duration': 10120,
  1497. 'channel_follower_count': int,
  1498. 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
  1499. 'availability': 'public',
  1500. 'playable_in_embed': True,
  1501. 'upload_date': '20131105',
  1502. 'uploader_id': 'WiiRikeToPray',
  1503. 'categories': ['Gaming'],
  1504. 'live_status': 'was_live',
  1505. 'tags': 'count:24',
  1506. 'release_timestamp': 1383701910,
  1507. 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
  1508. 'comment_count': int,
  1509. 'age_limit': 0,
  1510. 'like_count': int,
  1511. 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
  1512. 'channel': 'WiiLikeToPlay',
  1513. 'view_count': int,
  1514. 'release_date': '20131106',
  1515. },
  1516. }, {
  1517. 'info_dict': {
  1518. 'id': 'zaPI8MvL8pg',
  1519. 'ext': 'mp4',
  1520. 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
  1521. 'uploader_id': 'WiiRikeToPray',
  1522. 'availability': 'public',
  1523. 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
  1524. 'channel': 'WiiLikeToPlay',
  1525. 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
  1526. 'channel_follower_count': int,
  1527. 'description': 'md5:563ccbc698b39298481ca3c571169519',
  1528. 'duration': 10108,
  1529. 'age_limit': 0,
  1530. 'like_count': int,
  1531. 'tags': 'count:24',
  1532. 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
  1533. 'uploader': 'WiiLikeToPlay',
  1534. 'release_timestamp': 1383701915,
  1535. 'comment_count': int,
  1536. 'upload_date': '20131105',
  1537. 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
  1538. 'release_date': '20131106',
  1539. 'playable_in_embed': True,
  1540. 'live_status': 'was_live',
  1541. 'categories': ['Gaming'],
  1542. 'view_count': int,
  1543. },
  1544. }, {
  1545. 'info_dict': {
  1546. 'id': 'R7r3vfO7Hao',
  1547. 'ext': 'mp4',
  1548. 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
  1549. 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
  1550. 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
  1551. 'like_count': int,
  1552. 'availability': 'public',
  1553. 'playable_in_embed': True,
  1554. 'upload_date': '20131105',
  1555. 'description': 'md5:563ccbc698b39298481ca3c571169519',
  1556. 'uploader_id': 'WiiRikeToPray',
  1557. 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
  1558. 'channel_follower_count': int,
  1559. 'tags': 'count:24',
  1560. 'release_date': '20131106',
  1561. 'uploader': 'WiiLikeToPlay',
  1562. 'comment_count': int,
  1563. 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
  1564. 'channel': 'WiiLikeToPlay',
  1565. 'categories': ['Gaming'],
  1566. 'release_timestamp': 1383701914,
  1567. 'live_status': 'was_live',
  1568. 'age_limit': 0,
  1569. 'duration': 10128,
  1570. 'view_count': int,
  1571. },
  1572. }],
  1573. 'params': {'skip_download': True},
  1574. },
  1575. {
  1576. # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
  1577. 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
  1578. 'info_dict': {
  1579. 'id': 'gVfLd0zydlo',
  1580. 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
  1581. },
  1582. 'playlist_count': 2,
  1583. 'skip': 'Not multifeed anymore',
  1584. },
  1585. {
  1586. 'url': 'https://vid.plus/FlRa-iH7PGw',
  1587. 'only_matching': True,
  1588. },
  1589. {
  1590. 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
  1591. 'only_matching': True,
  1592. },
  1593. {
  1594. # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
  1595. # Also tests cut-off URL expansion in video description (see
  1596. # https://github.com/ytdl-org/youtube-dl/issues/1892,
  1597. # https://github.com/ytdl-org/youtube-dl/issues/8164)
  1598. 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
  1599. 'info_dict': {
  1600. 'id': 'lsguqyKfVQg',
  1601. 'ext': 'mp4',
  1602. 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
  1603. 'alt_title': 'Dark Walk',
  1604. 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
  1605. 'duration': 133,
  1606. 'upload_date': '20151119',
  1607. 'uploader_id': 'IronSoulElf',
  1608. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
  1609. 'uploader': 'IronSoulElf',
  1610. 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
  1611. 'track': 'Dark Walk',
  1612. 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
  1613. 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
  1614. 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
  1615. 'categories': ['Film & Animation'],
  1616. 'view_count': int,
  1617. 'live_status': 'not_live',
  1618. 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
  1619. 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
  1620. 'tags': 'count:13',
  1621. 'availability': 'public',
  1622. 'channel': 'IronSoulElf',
  1623. 'playable_in_embed': True,
  1624. 'like_count': int,
  1625. 'age_limit': 0,
  1626. 'channel_follower_count': int
  1627. },
  1628. 'params': {
  1629. 'skip_download': True,
  1630. },
  1631. },
  1632. {
  1633. # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
  1634. 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
  1635. 'only_matching': True,
  1636. },
  1637. {
  1638. # Video with yt:stretch=17:0
  1639. 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
  1640. 'info_dict': {
  1641. 'id': 'Q39EVAstoRM',
  1642. 'ext': 'mp4',
  1643. 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
  1644. 'description': 'md5:ee18a25c350637c8faff806845bddee9',
  1645. 'upload_date': '20151107',
  1646. 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
  1647. 'uploader': 'CH GAMER DROID',
  1648. },
  1649. 'params': {
  1650. 'skip_download': True,
  1651. },
  1652. 'skip': 'This video does not exist.',
  1653. },
  1654. {
  1655. # Video with incomplete 'yt:stretch=16:'
  1656. 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
  1657. 'only_matching': True,
  1658. },
  1659. {
  1660. # Video licensed under Creative Commons
  1661. 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
  1662. 'info_dict': {
  1663. 'id': 'M4gD1WSo5mA',
  1664. 'ext': 'mp4',
  1665. 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
  1666. 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
  1667. 'duration': 721,
  1668. 'upload_date': '20150128',
  1669. 'uploader_id': 'BerkmanCenter',
  1670. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
  1671. 'uploader': 'The Berkman Klein Center for Internet & Society',
  1672. 'license': 'Creative Commons Attribution license (reuse allowed)',
  1673. 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
  1674. 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
  1675. 'like_count': int,
  1676. 'age_limit': 0,
  1677. 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
  1678. 'channel': 'The Berkman Klein Center for Internet & Society',
  1679. 'availability': 'public',
  1680. 'view_count': int,
  1681. 'categories': ['Education'],
  1682. 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
  1683. 'live_status': 'not_live',
  1684. 'playable_in_embed': True,
  1685. 'comment_count': int,
  1686. 'channel_follower_count': int,
  1687. 'chapters': list,
  1688. },
  1689. 'params': {
  1690. 'skip_download': True,
  1691. },
  1692. },
  1693. {
  1694. # Channel-like uploader_url
  1695. 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
  1696. 'info_dict': {
  1697. 'id': 'eQcmzGIKrzg',
  1698. 'ext': 'mp4',
  1699. 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
  1700. 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
  1701. 'duration': 4060,
  1702. 'upload_date': '20151120',
  1703. 'uploader': 'Bernie Sanders',
  1704. 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
  1705. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
  1706. 'license': 'Creative Commons Attribution license (reuse allowed)',
  1707. 'playable_in_embed': True,
  1708. 'tags': 'count:12',
  1709. 'like_count': int,
  1710. 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
  1711. 'age_limit': 0,
  1712. 'availability': 'public',
  1713. 'categories': ['News & Politics'],
  1714. 'channel': 'Bernie Sanders',
  1715. 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
  1716. 'view_count': int,
  1717. 'live_status': 'not_live',
  1718. 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
  1719. 'comment_count': int,
  1720. 'channel_follower_count': int,
  1721. 'chapters': list,
  1722. },
  1723. 'params': {
  1724. 'skip_download': True,
  1725. },
  1726. },
  1727. {
  1728. 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
  1729. 'only_matching': True,
  1730. },
  1731. {
  1732. # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
  1733. 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
  1734. 'only_matching': True,
  1735. },
  1736. {
  1737. # Rental video preview
  1738. 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
  1739. 'info_dict': {
  1740. 'id': 'uGpuVWrhIzE',
  1741. 'ext': 'mp4',
  1742. 'title': 'Piku - Trailer',
  1743. 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
  1744. 'upload_date': '20150811',
  1745. 'uploader': 'FlixMatrix',
  1746. 'uploader_id': 'FlixMatrixKaravan',
  1747. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
  1748. 'license': 'Standard YouTube License',
  1749. },
  1750. 'params': {
  1751. 'skip_download': True,
  1752. },
  1753. 'skip': 'This video is not available.',
  1754. },
  1755. {
  1756. # YouTube Red video with episode data
  1757. 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
  1758. 'info_dict': {
  1759. 'id': 'iqKdEhx-dD4',
  1760. 'ext': 'mp4',
  1761. 'title': 'Isolation - Mind Field (Ep 1)',
  1762. 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
  1763. 'duration': 2085,
  1764. 'upload_date': '20170118',
  1765. 'uploader': 'Vsauce',
  1766. 'uploader_id': 'Vsauce',
  1767. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
  1768. 'series': 'Mind Field',
  1769. 'season_number': 1,
  1770. 'episode_number': 1,
  1771. 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
  1772. 'tags': 'count:12',
  1773. 'view_count': int,
  1774. 'availability': 'public',
  1775. 'age_limit': 0,
  1776. 'channel': 'Vsauce',
  1777. 'episode': 'Episode 1',
  1778. 'categories': ['Entertainment'],
  1779. 'season': 'Season 1',
  1780. 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
  1781. 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
  1782. 'like_count': int,
  1783. 'playable_in_embed': True,
  1784. 'live_status': 'not_live',
  1785. 'channel_follower_count': int
  1786. },
  1787. 'params': {
  1788. 'skip_download': True,
  1789. },
  1790. 'expected_warnings': [
  1791. 'Skipping DASH manifest',
  1792. ],
  1793. },
  1794. {
  1795. # The following content has been identified by the YouTube community
  1796. # as inappropriate or offensive to some audiences.
  1797. 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
  1798. 'info_dict': {
  1799. 'id': '6SJNVb0GnPI',
  1800. 'ext': 'mp4',
  1801. 'title': 'Race Differences in Intelligence',
  1802. 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
  1803. 'duration': 965,
  1804. 'upload_date': '20140124',
  1805. 'uploader': 'New Century Foundation',
  1806. 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
  1807. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
  1808. },
  1809. 'params': {
  1810. 'skip_download': True,
  1811. },
  1812. 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
  1813. },
  1814. {
  1815. # itag 212
  1816. 'url': '1t24XAntNCY',
  1817. 'only_matching': True,
  1818. },
  1819. {
  1820. # geo restricted to JP
  1821. 'url': 'sJL6WA-aGkQ',
  1822. 'only_matching': True,
  1823. },
  1824. {
  1825. 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
  1826. 'only_matching': True,
  1827. },
  1828. {
  1829. 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
  1830. 'only_matching': True,
  1831. },
  1832. {
  1833. # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
  1834. 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
  1835. 'only_matching': True,
  1836. },
  1837. {
  1838. # DRM protected
  1839. 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
  1840. 'only_matching': True,
  1841. },
  1842. {
  1843. # Video with unsupported adaptive stream type formats
  1844. 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
  1845. 'info_dict': {
  1846. 'id': 'Z4Vy8R84T1U',
  1847. 'ext': 'mp4',
  1848. 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
  1849. 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  1850. 'duration': 433,
  1851. 'upload_date': '20130923',
  1852. 'uploader': 'Amelia Putri Harwita',
  1853. 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
  1854. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
  1855. 'formats': 'maxcount:10',
  1856. },
  1857. 'params': {
  1858. 'skip_download': True,
  1859. 'youtube_include_dash_manifest': False,
  1860. },
  1861. 'skip': 'not actual anymore',
  1862. },
  1863. {
  1864. # Youtube Music Auto-generated description
  1865. 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
  1866. 'info_dict': {
  1867. 'id': 'MgNrAu2pzNs',
  1868. 'ext': 'mp4',
  1869. 'title': 'Voyeur Girl',
  1870. 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
  1871. 'upload_date': '20190312',
  1872. 'uploader': 'Stephen - Topic',
  1873. 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
  1874. 'artist': 'Stephen',
  1875. 'track': 'Voyeur Girl',
  1876. 'album': 'it\'s too much love to know my dear',
  1877. 'release_date': '20190313',
  1878. 'release_year': 2019,
  1879. 'alt_title': 'Voyeur Girl',
  1880. 'view_count': int,
  1881. 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
  1882. 'playable_in_embed': True,
  1883. 'like_count': int,
  1884. 'categories': ['Music'],
  1885. 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
  1886. 'channel': 'Stephen',
  1887. 'availability': 'public',
  1888. 'creator': 'Stephen',
  1889. 'duration': 169,
  1890. 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
  1891. 'age_limit': 0,
  1892. 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
  1893. 'tags': 'count:11',
  1894. 'live_status': 'not_live',
  1895. 'channel_follower_count': int
  1896. },
  1897. 'params': {
  1898. 'skip_download': True,
  1899. },
  1900. },
  1901. {
  1902. 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
  1903. 'only_matching': True,
  1904. },
  1905. {
  1906. # invalid -> valid video id redirection
  1907. 'url': 'DJztXj2GPfl',
  1908. 'info_dict': {
  1909. 'id': 'DJztXj2GPfk',
  1910. 'ext': 'mp4',
  1911. 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
  1912. 'description': 'md5:bf577a41da97918e94fa9798d9228825',
  1913. 'upload_date': '20090125',
  1914. 'uploader': 'Prochorowka',
  1915. 'uploader_id': 'Prochorowka',
  1916. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
  1917. 'artist': 'Panjabi MC',
  1918. 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
  1919. 'album': 'Beware of the Boys (Mundian To Bach Ke)',
  1920. },
  1921. 'params': {
  1922. 'skip_download': True,
  1923. },
  1924. 'skip': 'Video unavailable',
  1925. },
  1926. {
  1927. # empty description results in an empty string
  1928. 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
  1929. 'info_dict': {
  1930. 'id': 'x41yOUIvK2k',
  1931. 'ext': 'mp4',
  1932. 'title': 'IMG 3456',
  1933. 'description': '',
  1934. 'upload_date': '20170613',
  1935. 'uploader_id': 'ElevageOrVert',
  1936. 'uploader': 'ElevageOrVert',
  1937. 'view_count': int,
  1938. 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
  1939. 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
  1940. 'like_count': int,
  1941. 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
  1942. 'tags': [],
  1943. 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
  1944. 'availability': 'public',
  1945. 'age_limit': 0,
  1946. 'categories': ['Pets & Animals'],
  1947. 'duration': 7,
  1948. 'playable_in_embed': True,
  1949. 'live_status': 'not_live',
  1950. 'channel': 'ElevageOrVert',
  1951. 'channel_follower_count': int
  1952. },
  1953. 'params': {
  1954. 'skip_download': True,
  1955. },
  1956. },
  1957. {
  1958. # with '};' inside yt initial data (see [1])
  1959. # see [2] for an example with '};' inside ytInitialPlayerResponse
  1960. # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
  1961. # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
  1962. 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
  1963. 'info_dict': {
  1964. 'id': 'CHqg6qOn4no',
  1965. 'ext': 'mp4',
  1966. 'title': 'Part 77 Sort a list of simple types in c#',
  1967. 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
  1968. 'upload_date': '20130831',
  1969. 'uploader_id': 'kudvenkat',
  1970. 'uploader': 'kudvenkat',
  1971. 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
  1972. 'like_count': int,
  1973. 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
  1974. 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
  1975. 'live_status': 'not_live',
  1976. 'categories': ['Education'],
  1977. 'availability': 'public',
  1978. 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
  1979. 'tags': 'count:12',
  1980. 'playable_in_embed': True,
  1981. 'age_limit': 0,
  1982. 'view_count': int,
  1983. 'duration': 522,
  1984. 'channel': 'kudvenkat',
  1985. 'comment_count': int,
  1986. 'channel_follower_count': int,
  1987. 'chapters': list,
  1988. },
  1989. 'params': {
  1990. 'skip_download': True,
  1991. },
  1992. },
  1993. {
  1994. # another example of '};' in ytInitialData
  1995. 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
  1996. 'only_matching': True,
  1997. },
  1998. {
  1999. 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
  2000. 'only_matching': True,
  2001. },
  2002. {
  2003. # https://github.com/ytdl-org/youtube-dl/pull/28094
  2004. 'url': 'OtqTfy26tG0',
  2005. 'info_dict': {
  2006. 'id': 'OtqTfy26tG0',
  2007. 'ext': 'mp4',
  2008. 'title': 'Burn Out',
  2009. 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
  2010. 'upload_date': '20141120',
  2011. 'uploader': 'The Cinematic Orchestra - Topic',
  2012. 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
  2013. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
  2014. 'artist': 'The Cinematic Orchestra',
  2015. 'track': 'Burn Out',
  2016. 'album': 'Every Day',
  2017. 'like_count': int,
  2018. 'live_status': 'not_live',
  2019. 'alt_title': 'Burn Out',
  2020. 'duration': 614,
  2021. 'age_limit': 0,
  2022. 'view_count': int,
  2023. 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
  2024. 'creator': 'The Cinematic Orchestra',
  2025. 'channel': 'The Cinematic Orchestra',
  2026. 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
  2027. 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
  2028. 'availability': 'public',
  2029. 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
  2030. 'categories': ['Music'],
  2031. 'playable_in_embed': True,
  2032. 'channel_follower_count': int
  2033. },
  2034. 'params': {
  2035. 'skip_download': True,
  2036. },
  2037. },
  2038. {
  2039. # controversial video, only works with bpctr when authenticated with cookies
  2040. 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
  2041. 'only_matching': True,
  2042. },
  2043. {
  2044. # controversial video, requires bpctr/contentCheckOk
  2045. 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
  2046. 'info_dict': {
  2047. 'id': 'SZJvDhaSDnc',
  2048. 'ext': 'mp4',
  2049. 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
  2050. 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
  2051. 'uploader': 'CBS Mornings',
  2052. 'uploader_id': 'CBSThisMorning',
  2053. 'upload_date': '20140716',
  2054. 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
  2055. 'duration': 170,
  2056. 'categories': ['News & Politics'],
  2057. 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
  2058. 'view_count': int,
  2059. 'channel': 'CBS Mornings',
  2060. 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
  2061. 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
  2062. 'age_limit': 18,
  2063. 'availability': 'needs_auth',
  2064. 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
  2065. 'like_count': int,
  2066. 'live_status': 'not_live',
  2067. 'playable_in_embed': True,
  2068. 'channel_follower_count': int
  2069. }
  2070. },
  2071. {
  2072. # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
  2073. 'url': 'cBvYw8_A0vQ',
  2074. 'info_dict': {
  2075. 'id': 'cBvYw8_A0vQ',
  2076. 'ext': 'mp4',
  2077. 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
  2078. 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
  2079. 'upload_date': '20201120',
  2080. 'uploader': 'Walk around Japan',
  2081. 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
  2082. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
  2083. 'duration': 1456,
  2084. 'categories': ['Travel & Events'],
  2085. 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
  2086. 'view_count': int,
  2087. 'channel': 'Walk around Japan',
  2088. 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
  2089. 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
  2090. 'age_limit': 0,
  2091. 'availability': 'public',
  2092. 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
  2093. 'live_status': 'not_live',
  2094. 'playable_in_embed': True,
  2095. 'channel_follower_count': int
  2096. },
  2097. 'params': {
  2098. 'skip_download': True,
  2099. },
  2100. }, {
  2101. # Has multiple audio streams
  2102. 'url': 'WaOKSUlf4TM',
  2103. 'only_matching': True
  2104. }, {
  2105. # Requires Premium: has format 141 when requested using YTM url
  2106. 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
  2107. 'only_matching': True
  2108. }, {
  2109. # multiple subtitles with same lang_code
  2110. 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
  2111. 'only_matching': True,
  2112. }, {
  2113. # Force use android client fallback
  2114. 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
  2115. 'info_dict': {
  2116. 'id': 'YOelRv7fMxY',
  2117. 'title': 'DIGGING A SECRET TUNNEL Part 1',
  2118. 'ext': '3gp',
  2119. 'upload_date': '20210624',
  2120. 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
  2121. 'uploader': 'colinfurze',
  2122. 'uploader_id': 'colinfurze',
  2123. 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
  2124. 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
  2125. 'duration': 596,
  2126. 'categories': ['Entertainment'],
  2127. 'uploader_url': 'http://www.youtube.com/user/colinfurze',
  2128. 'view_count': int,
  2129. 'channel': 'colinfurze',
  2130. 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
  2131. 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
  2132. 'age_limit': 0,
  2133. 'availability': 'public',
  2134. 'like_count': int,
  2135. 'live_status': 'not_live',
  2136. 'playable_in_embed': True,
  2137. 'channel_follower_count': int,
  2138. 'chapters': list,
  2139. },
  2140. 'params': {
  2141. 'format': '17', # 3gp format available on android
  2142. 'extractor_args': {'youtube': {'player_client': ['android']}},
  2143. },
  2144. },
  2145. {
  2146. # Skip download of additional client configs (remix client config in this case)
  2147. 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
  2148. 'only_matching': True,
  2149. 'params': {
  2150. 'extractor_args': {'youtube': {'player_skip': ['configs']}},
  2151. },
  2152. }, {
  2153. # shorts
  2154. 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
  2155. 'only_matching': True,
  2156. }, {
  2157. 'note': 'Storyboards',
  2158. 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
  2159. 'info_dict': {
  2160. 'id': '5KLPxDtMqe8',
  2161. 'ext': 'mhtml',
  2162. 'format_id': 'sb0',
  2163. 'title': 'Your Brain is Plastic',
  2164. 'uploader_id': 'scishow',
  2165. 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
  2166. 'upload_date': '20140324',
  2167. 'uploader': 'SciShow',
  2168. 'like_count': int,
  2169. 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
  2170. 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
  2171. 'view_count': int,
  2172. 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
  2173. 'playable_in_embed': True,
  2174. 'tags': 'count:12',
  2175. 'uploader_url': 'http://www.youtube.com/user/scishow',
  2176. 'availability': 'public',
  2177. 'channel': 'SciShow',
  2178. 'live_status': 'not_live',
  2179. 'duration': 248,
  2180. 'categories': ['Education'],
  2181. 'age_limit': 0,
  2182. 'channel_follower_count': int,
  2183. 'chapters': list,
  2184. }, 'params': {'format': 'mhtml', 'skip_download': True}
  2185. }, {
  2186. # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
  2187. 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
  2188. 'info_dict': {
  2189. 'id': '2NUZ8W2llS4',
  2190. 'ext': 'mp4',
  2191. 'title': 'The NP that test your phone performance 🙂',
  2192. 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
  2193. 'uploader': 'Leon Nguyen',
  2194. 'uploader_id': 'VNSXIII',
  2195. 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
  2196. 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
  2197. 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
  2198. 'duration': 21,
  2199. 'view_count': int,
  2200. 'age_limit': 0,
  2201. 'categories': ['Gaming'],
  2202. 'tags': 'count:23',
  2203. 'playable_in_embed': True,
  2204. 'live_status': 'not_live',
  2205. 'upload_date': '20220103',
  2206. 'like_count': int,
  2207. 'availability': 'public',
  2208. 'channel': 'Leon Nguyen',
  2209. 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
  2210. 'comment_count': int,
  2211. 'channel_follower_count': int
  2212. }
  2213. }, {
  2214. # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
  2215. 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
  2216. 'info_dict': {
  2217. 'id': '2NUZ8W2llS4',
  2218. 'ext': 'mp4',
  2219. 'title': 'The NP that test your phone performance 🙂',
  2220. 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
  2221. 'uploader': 'Leon Nguyen',
  2222. 'uploader_id': 'VNSXIII',
  2223. 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
  2224. 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
  2225. 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
  2226. 'duration': 21,
  2227. 'view_count': int,
  2228. 'age_limit': 0,
  2229. 'categories': ['Gaming'],
  2230. 'tags': 'count:23',
  2231. 'playable_in_embed': True,
  2232. 'live_status': 'not_live',
  2233. 'upload_date': '20220102',
  2234. 'like_count': int,
  2235. 'availability': 'public',
  2236. 'channel': 'Leon Nguyen',
  2237. 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
  2238. 'comment_count': int,
  2239. 'channel_follower_count': int
  2240. },
  2241. 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
  2242. }, {
  2243. # date text is premiered video, ensure upload date in UTC (published 1641172509)
  2244. 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
  2245. 'info_dict': {
  2246. 'id': 'mzZzzBU6lrM',
  2247. 'ext': 'mp4',
  2248. 'title': 'I Met GeorgeNotFound In Real Life...',
  2249. 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
  2250. 'uploader': 'Quackity',
  2251. 'uploader_id': 'QuackityHQ',
  2252. 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
  2253. 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
  2254. 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
  2255. 'duration': 955,
  2256. 'view_count': int,
  2257. 'age_limit': 0,
  2258. 'categories': ['Entertainment'],
  2259. 'tags': 'count:26',
  2260. 'playable_in_embed': True,
  2261. 'live_status': 'not_live',
  2262. 'release_timestamp': 1641172509,
  2263. 'release_date': '20220103',
  2264. 'upload_date': '20220103',
  2265. 'like_count': int,
  2266. 'availability': 'public',
  2267. 'channel': 'Quackity',
  2268. 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
  2269. 'channel_follower_count': int
  2270. }
  2271. },
  2272. { # continuous livestream. Microformat upload date should be preferred.
  2273. # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
  2274. 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
  2275. 'info_dict': {
  2276. 'id': 'kgx4WGK0oNU',
  2277. 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
  2278. 'ext': 'mp4',
  2279. 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
  2280. 'availability': 'public',
  2281. 'age_limit': 0,
  2282. 'release_timestamp': 1637975704,
  2283. 'upload_date': '20210619',
  2284. 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
  2285. 'live_status': 'is_live',
  2286. 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
  2287. 'uploader': '阿鲍Abao',
  2288. 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
  2289. 'channel': 'Abao in Tokyo',
  2290. 'channel_follower_count': int,
  2291. 'release_date': '20211127',
  2292. 'tags': 'count:39',
  2293. 'categories': ['People & Blogs'],
  2294. 'like_count': int,
  2295. 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
  2296. 'view_count': int,
  2297. 'playable_in_embed': True,
  2298. 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
  2299. 'concurrent_view_count': int,
  2300. },
  2301. 'params': {'skip_download': True}
  2302. }, {
  2303. # Story. Requires specific player params to work.
  2304. 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
  2305. 'info_dict': {
  2306. 'id': 'vv8qTUWmulI',
  2307. 'ext': 'mp4',
  2308. 'availability': 'unlisted',
  2309. 'view_count': int,
  2310. 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
  2311. 'upload_date': '20220526',
  2312. 'categories': ['Education'],
  2313. 'title': 'Story',
  2314. 'channel': 'IT\'S HISTORY',
  2315. 'description': '',
  2316. 'uploader_id': 'BlastfromthePast',
  2317. 'duration': 12,
  2318. 'uploader': 'IT\'S HISTORY',
  2319. 'playable_in_embed': True,
  2320. 'age_limit': 0,
  2321. 'live_status': 'not_live',
  2322. 'tags': [],
  2323. 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
  2324. 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
  2325. 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
  2326. },
  2327. 'skip': 'stories get removed after some period of time',
  2328. }, {
  2329. 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
  2330. 'info_dict': {
  2331. 'id': 'tjjjtzRLHvA',
  2332. 'ext': 'mp4',
  2333. 'title': 'ハッシュタグ無し };if window.ytcsi',
  2334. 'upload_date': '20220323',
  2335. 'like_count': int,
  2336. 'availability': 'unlisted',
  2337. 'channel': 'nao20010128nao',
  2338. 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
  2339. 'age_limit': 0,
  2340. 'uploader': 'nao20010128nao',
  2341. 'uploader_id': 'nao20010128nao',
  2342. 'categories': ['Music'],
  2343. 'view_count': int,
  2344. 'description': '',
  2345. 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
  2346. 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
  2347. 'live_status': 'not_live',
  2348. 'playable_in_embed': True,
  2349. 'channel_follower_count': int,
  2350. 'duration': 6,
  2351. 'tags': [],
  2352. 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
  2353. }
  2354. }, {
  2355. # Prefer primary title+description language metadata by default
  2356. # Do not prefer translated description if primary is empty
  2357. 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
  2358. 'info_dict': {
  2359. 'id': 'el3E4MbxRqQ',
  2360. 'ext': 'mp4',
  2361. 'title': 'dlp test video 2 - primary sv no desc',
  2362. 'description': '',
  2363. 'channel': 'cole-dlp-test-acc',
  2364. 'tags': [],
  2365. 'view_count': int,
  2366. 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  2367. 'like_count': int,
  2368. 'playable_in_embed': True,
  2369. 'availability': 'unlisted',
  2370. 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
  2371. 'age_limit': 0,
  2372. 'duration': 5,
  2373. 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
  2374. 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  2375. 'live_status': 'not_live',
  2376. 'upload_date': '20220908',
  2377. 'categories': ['People & Blogs'],
  2378. 'uploader': 'cole-dlp-test-acc',
  2379. 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
  2380. },
  2381. 'params': {'skip_download': True}
  2382. }, {
  2383. # Extractor argument: prefer translated title+description
  2384. 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
  2385. 'info_dict': {
  2386. 'id': 'gHKT4uU8Zng',
  2387. 'ext': 'mp4',
  2388. 'channel': 'cole-dlp-test-acc',
  2389. 'tags': [],
  2390. 'duration': 5,
  2391. 'live_status': 'not_live',
  2392. 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
  2393. 'upload_date': '20220728',
  2394. 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
  2395. 'view_count': int,
  2396. 'categories': ['People & Blogs'],
  2397. 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
  2398. 'title': 'dlp test video title translated (fr)',
  2399. 'availability': 'public',
  2400. 'uploader': 'cole-dlp-test-acc',
  2401. 'age_limit': 0,
  2402. 'description': 'dlp test video description translated (fr)',
  2403. 'playable_in_embed': True,
  2404. 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  2405. 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  2406. },
  2407. 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
  2408. 'expected_warnings': [r'Preferring "fr" translated fields'],
  2409. }, {
  2410. 'note': '6 channel audio',
  2411. 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
  2412. 'only_matching': True,
  2413. }, {
  2414. 'note': 'Multiple HLS formats with same itag',
  2415. 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
  2416. 'info_dict': {
  2417. 'id': 'kX3nB4PpJko',
  2418. 'ext': 'mp4',
  2419. 'categories': ['Entertainment'],
  2420. 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
  2421. 'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
  2422. 'live_status': 'not_live',
  2423. 'duration': 937,
  2424. 'channel_follower_count': int,
  2425. 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
  2426. 'title': 'Last To Take Hand Off Jet, Keeps It!',
  2427. 'channel': 'MrBeast',
  2428. 'playable_in_embed': True,
  2429. 'view_count': int,
  2430. 'upload_date': '20221112',
  2431. 'uploader': 'MrBeast',
  2432. 'uploader_id': 'MrBeast6000',
  2433. 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
  2434. 'age_limit': 0,
  2435. 'availability': 'public',
  2436. 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
  2437. 'like_count': int,
  2438. 'tags': [],
  2439. },
  2440. 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
  2441. }
  2442. ]
  2443. _WEBPAGE_TESTS = [
  2444. # YouTube <object> embed
  2445. {
  2446. 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
  2447. 'md5': '873c81d308b979f0e23ee7e620b312a3',
  2448. 'info_dict': {
  2449. 'id': 'msN87y-iEx0',
  2450. 'ext': 'mp4',
  2451. 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
  2452. 'upload_date': '20080526',
  2453. 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
  2454. 'uploader': 'Christopher Sykes',
  2455. 'uploader_id': 'ChristopherJSykes',
  2456. 'age_limit': 0,
  2457. 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
  2458. 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
  2459. 'playable_in_embed': True,
  2460. 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
  2461. 'like_count': int,
  2462. 'comment_count': int,
  2463. 'channel': 'Christopher Sykes',
  2464. 'live_status': 'not_live',
  2465. 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
  2466. 'availability': 'public',
  2467. 'duration': 195,
  2468. 'view_count': int,
  2469. 'categories': ['Science & Technology'],
  2470. 'channel_follower_count': int,
  2471. 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
  2472. },
  2473. 'params': {
  2474. 'skip_download': True,
  2475. }
  2476. },
  2477. ]
  2478. @classmethod
  2479. def suitable(cls, url):
  2480. from ..utils import parse_qs
  2481. qs = parse_qs(url)
  2482. if qs.get('list', [None])[0]:
  2483. return False
  2484. return super().suitable(url)
  2485. def __init__(self, *args, **kwargs):
  2486. super().__init__(*args, **kwargs)
  2487. self._code_cache = {}
  2488. self._player_cache = {}
  2489. def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
  2490. lock = threading.Lock()
  2491. start_time = time.time()
  2492. formats = [f for f in formats if f.get('is_from_start')]
  2493. def refetch_manifest(format_id, delay):
  2494. nonlocal formats, start_time, is_live
  2495. if time.time() <= start_time + delay:
  2496. return
  2497. _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
  2498. video_details = traverse_obj(
  2499. prs, (..., 'videoDetails'), expected_type=dict, default=[])
  2500. microformats = traverse_obj(
  2501. prs, (..., 'microformat', 'playerMicroformatRenderer'),
  2502. expected_type=dict, default=[])
  2503. _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
  2504. is_live = live_status == 'is_live'
  2505. start_time = time.time()
  2506. def mpd_feed(format_id, delay):
  2507. """
  2508. @returns (manifest_url, manifest_stream_number, is_live) or None
  2509. """
  2510. with lock:
  2511. refetch_manifest(format_id, delay)
  2512. f = next((f for f in formats if f['format_id'] == format_id), None)
  2513. if not f:
  2514. if not is_live:
  2515. self.to_screen(f'{video_id}: Video is no longer live')
  2516. else:
  2517. self.report_warning(
  2518. f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
  2519. return None
  2520. return f['manifest_url'], f['manifest_stream_number'], is_live
  2521. for f in formats:
  2522. f['is_live'] = is_live
  2523. gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
  2524. live_start_time, mpd_feed, not is_live and f.copy())
  2525. if is_live:
  2526. f['fragments'] = gen
  2527. f['protocol'] = 'http_dash_segments_generator'
  2528. else:
  2529. f['fragments'] = LazyList(gen({}))
  2530. del f['is_from_start']
  2531. def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
  2532. FETCH_SPAN, MAX_DURATION = 5, 432000
  2533. mpd_url, stream_number, is_live = None, None, True
  2534. begin_index = 0
  2535. download_start_time = ctx.get('start') or time.time()
  2536. lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
  2537. if lack_early_segments:
  2538. self.report_warning(bug_reports_message(
  2539. 'Starting download from the last 120 hours of the live stream since '
  2540. 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
  2541. lack_early_segments = True
  2542. known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
  2543. fragments, fragment_base_url = None, None
  2544. def _extract_sequence_from_mpd(refresh_sequence, immediate):
  2545. nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
  2546. # Obtain from MPD's maximum seq value
  2547. old_mpd_url = mpd_url
  2548. last_error = ctx.pop('last_error', None)
  2549. expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
  2550. mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
  2551. or (mpd_url, stream_number, False))
  2552. if not refresh_sequence:
  2553. if expire_fast and not is_live:
  2554. return False, last_seq
  2555. elif old_mpd_url == mpd_url:
  2556. return True, last_seq
  2557. if manifestless_orig_fmt:
  2558. fmt_info = manifestless_orig_fmt
  2559. else:
  2560. try:
  2561. fmts, _ = self._extract_mpd_formats_and_subtitles(
  2562. mpd_url, None, note=False, errnote=False, fatal=False)
  2563. except ExtractorError:
  2564. fmts = None
  2565. if not fmts:
  2566. no_fragment_score += 2
  2567. return False, last_seq
  2568. fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
  2569. fragments = fmt_info['fragments']
  2570. fragment_base_url = fmt_info['fragment_base_url']
  2571. assert fragment_base_url
  2572. _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
  2573. return True, _last_seq
  2574. self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
  2575. while is_live:
  2576. fetch_time = time.time()
  2577. if no_fragment_score > 30:
  2578. return
  2579. if last_segment_url:
  2580. # Obtain from "X-Head-Seqnum" header value from each segment
  2581. try:
  2582. urlh = self._request_webpage(
  2583. last_segment_url, None, note=False, errnote=False, fatal=False)
  2584. except ExtractorError:
  2585. urlh = None
  2586. last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
  2587. if last_seq is None:
  2588. no_fragment_score += 2
  2589. last_segment_url = None
  2590. continue
  2591. else:
  2592. should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
  2593. no_fragment_score += 2
  2594. if not should_continue:
  2595. continue
  2596. if known_idx > last_seq:
  2597. last_segment_url = None
  2598. continue
  2599. last_seq += 1
  2600. if begin_index < 0 and known_idx < 0:
  2601. # skip from the start when it's negative value
  2602. known_idx = last_seq + begin_index
  2603. if lack_early_segments:
  2604. known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
  2605. try:
  2606. for idx in range(known_idx, last_seq):
  2607. # do not update sequence here or you'll get skipped some part of it
  2608. should_continue, _ = _extract_sequence_from_mpd(False, False)
  2609. if not should_continue:
  2610. known_idx = idx - 1
  2611. raise ExtractorError('breaking out of outer loop')
  2612. last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
  2613. yield {
  2614. 'url': last_segment_url,
  2615. 'fragment_count': last_seq,
  2616. }
  2617. if known_idx == last_seq:
  2618. no_fragment_score += 5
  2619. else:
  2620. no_fragment_score = 0
  2621. known_idx = last_seq
  2622. except ExtractorError:
  2623. continue
  2624. if manifestless_orig_fmt:
  2625. # Stop at the first iteration if running for post-live manifestless;
  2626. # fragment count no longer increase since it starts
  2627. break
  2628. time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
  2629. def _extract_player_url(self, *ytcfgs, webpage=None):
  2630. player_url = traverse_obj(
  2631. ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
  2632. get_all=False, expected_type=str)
  2633. if not player_url:
  2634. return
  2635. return urljoin('https://www.youtube.com', player_url)
  2636. def _download_player_url(self, video_id, fatal=False):
  2637. res = self._download_webpage(
  2638. 'https://www.youtube.com/iframe_api',
  2639. note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
  2640. if res:
  2641. player_version = self._search_regex(
  2642. r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
  2643. if player_version:
  2644. return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
  2645. def _signature_cache_id(self, example_sig):
  2646. """ Return a string representation of a signature """
  2647. return '.'.join(str(len(part)) for part in example_sig.split('.'))
  2648. @classmethod
  2649. def _extract_player_info(cls, player_url):
  2650. for player_re in cls._PLAYER_INFO_RE:
  2651. id_m = re.search(player_re, player_url)
  2652. if id_m:
  2653. break
  2654. else:
  2655. raise ExtractorError('Cannot identify player %r' % player_url)
  2656. return id_m.group('id')
  2657. def _load_player(self, video_id, player_url, fatal=True):
  2658. player_id = self._extract_player_info(player_url)
  2659. if player_id not in self._code_cache:
  2660. code = self._download_webpage(
  2661. player_url, video_id, fatal=fatal,
  2662. note='Downloading player ' + player_id,
  2663. errnote='Download of %s failed' % player_url)
  2664. if code:
  2665. self._code_cache[player_id] = code
  2666. return self._code_cache.get(player_id)
  2667. def _extract_signature_function(self, video_id, player_url, example_sig):
  2668. player_id = self._extract_player_info(player_url)
  2669. # Read from filesystem cache
  2670. func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
  2671. assert os.path.basename(func_id) == func_id
  2672. self.write_debug(f'Extracting signature function {func_id}')
  2673. cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
  2674. if not cache_spec:
  2675. code = self._load_player(video_id, player_url)
  2676. if code:
  2677. res = self._parse_sig_js(code)
  2678. test_string = ''.join(map(chr, range(len(example_sig))))
  2679. cache_spec = [ord(c) for c in res(test_string)]
  2680. self.cache.store('youtube-sigfuncs', func_id, cache_spec)
  2681. return lambda s: ''.join(s[i] for i in cache_spec)
  2682. def _print_sig_code(self, func, example_sig):
  2683. if not self.get_param('youtube_print_sig_code'):
  2684. return
  2685. def gen_sig_code(idxs):
  2686. def _genslice(start, end, step):
  2687. starts = '' if start == 0 else str(start)
  2688. ends = (':%d' % (end + step)) if end + step >= 0 else ':'
  2689. steps = '' if step == 1 else (':%d' % step)
  2690. return f's[{starts}{ends}{steps}]'
  2691. step = None
  2692. # Quelch pyflakes warnings - start will be set when step is set
  2693. start = '(Never used)'
  2694. for i, prev in zip(idxs[1:], idxs[:-1]):
  2695. if step is not None:
  2696. if i - prev == step:
  2697. continue
  2698. yield _genslice(start, prev, step)
  2699. step = None
  2700. continue
  2701. if i - prev in [-1, 1]:
  2702. step = i - prev
  2703. start = prev
  2704. continue
  2705. else:
  2706. yield 's[%d]' % prev
  2707. if step is None:
  2708. yield 's[%d]' % i
  2709. else:
  2710. yield _genslice(start, i, step)
  2711. test_string = ''.join(map(chr, range(len(example_sig))))
  2712. cache_res = func(test_string)
  2713. cache_spec = [ord(c) for c in cache_res]
  2714. expr_code = ' + '.join(gen_sig_code(cache_spec))
  2715. signature_id_tuple = '(%s)' % (
  2716. ', '.join(str(len(p)) for p in example_sig.split('.')))
  2717. code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
  2718. ' return %s\n') % (signature_id_tuple, expr_code)
  2719. self.to_screen('Extracted signature function:\n' + code)
  2720. def _parse_sig_js(self, jscode):
  2721. funcname = self._search_regex(
  2722. (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2723. r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2724. r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
  2725. r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
  2726. r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
  2727. r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
  2728. r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
  2729. # Obsolete patterns
  2730. r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2731. r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
  2732. r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2733. r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2734. r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2735. r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2736. r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
  2737. r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
  2738. jscode, 'Initial JS player signature function name', group='sig')
  2739. jsi = JSInterpreter(jscode)
  2740. initial_function = jsi.extract_function(funcname)
  2741. return lambda s: initial_function([s])
  2742. def _cached(self, func, *cache_id):
  2743. def inner(*args, **kwargs):
  2744. if cache_id not in self._player_cache:
  2745. try:
  2746. self._player_cache[cache_id] = func(*args, **kwargs)
  2747. except ExtractorError as e:
  2748. self._player_cache[cache_id] = e
  2749. except Exception as e:
  2750. self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
  2751. ret = self._player_cache[cache_id]
  2752. if isinstance(ret, Exception):
  2753. raise ret
  2754. return ret
  2755. return inner
  2756. def _decrypt_signature(self, s, video_id, player_url):
  2757. """Turn the encrypted s field into a working signature"""
  2758. extract_sig = self._cached(
  2759. self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
  2760. func = extract_sig(video_id, player_url, s)
  2761. self._print_sig_code(func, s)
  2762. return func(s)
  2763. def _decrypt_nsig(self, s, video_id, player_url):
  2764. """Turn the encrypted n field into a working signature"""
  2765. if player_url is None:
  2766. raise ExtractorError('Cannot decrypt nsig without player_url')
  2767. player_url = urljoin('https://www.youtube.com', player_url)
  2768. try:
  2769. jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
  2770. except ExtractorError as e:
  2771. raise ExtractorError('Unable to extract nsig function code', cause=e)
  2772. if self.get_param('youtube_print_sig_code'):
  2773. self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
  2774. try:
  2775. extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
  2776. ret = extract_nsig(jsi, func_code)(s)
  2777. except JSInterpreter.Exception as e:
  2778. try:
  2779. jsi = PhantomJSwrapper(self, timeout=5000)
  2780. except ExtractorError:
  2781. raise e
  2782. self.report_warning(
  2783. f'Native nsig extraction failed: Trying with PhantomJS\n'
  2784. f' n = {s} ; player = {player_url}', video_id)
  2785. self.write_debug(e, only_once=True)
  2786. args, func_body = func_code
  2787. ret = jsi.execute(
  2788. f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
  2789. video_id=video_id, note='Executing signature code').strip()
  2790. self.write_debug(f'Decrypted nsig {s} => {ret}')
  2791. return ret
  2792. def _extract_n_function_name(self, jscode):
  2793. funcname, idx = self._search_regex(
  2794. r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
  2795. jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
  2796. if not idx:
  2797. return funcname
  2798. return json.loads(js_to_json(self._search_regex(
  2799. rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
  2800. f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
  2801. def _extract_n_function_code(self, video_id, player_url):
  2802. player_id = self._extract_player_info(player_url)
  2803. func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
  2804. jscode = func_code or self._load_player(video_id, player_url)
  2805. jsi = JSInterpreter(jscode)
  2806. if func_code:
  2807. return jsi, player_id, func_code
  2808. func_name = self._extract_n_function_name(jscode)
  2809. # For redundancy
  2810. func_code = self._search_regex(
  2811. r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
  2812. # NB: The end of the regex is intentionally kept strict
  2813. {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
  2814. jscode, 'nsig function', group=('var', 'code'), default=None)
  2815. if func_code:
  2816. func_code = ([func_code[0]], func_code[1])
  2817. else:
  2818. self.write_debug('Extracting nsig function with jsinterp')
  2819. func_code = jsi.extract_function_code(func_name)
  2820. self.cache.store('youtube-nsig', player_id, func_code)
  2821. return jsi, player_id, func_code
  2822. def _extract_n_function_from_code(self, jsi, func_code):
  2823. func = jsi.extract_function_from_code(*func_code)
  2824. def extract_nsig(s):
  2825. try:
  2826. ret = func([s])
  2827. except JSInterpreter.Exception:
  2828. raise
  2829. except Exception as e:
  2830. raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
  2831. if ret.startswith('enhanced_except_'):
  2832. raise JSInterpreter.Exception('Signature function returned an exception')
  2833. return ret
  2834. return extract_nsig
  2835. def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
  2836. """
  2837. Extract signatureTimestamp (sts)
  2838. Required to tell API what sig/player version is in use.
  2839. """
  2840. sts = None
  2841. if isinstance(ytcfg, dict):
  2842. sts = int_or_none(ytcfg.get('STS'))
  2843. if not sts:
  2844. # Attempt to extract from player
  2845. if player_url is None:
  2846. error_msg = 'Cannot extract signature timestamp without player_url.'
  2847. if fatal:
  2848. raise ExtractorError(error_msg)
  2849. self.report_warning(error_msg)
  2850. return
  2851. code = self._load_player(video_id, player_url, fatal=fatal)
  2852. if code:
  2853. sts = int_or_none(self._search_regex(
  2854. r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
  2855. 'JS player signature timestamp', group='sts', fatal=fatal))
  2856. return sts
  2857. def _mark_watched(self, video_id, player_responses):
  2858. for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
  2859. label = 'fully ' if is_full else ''
  2860. url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
  2861. expected_type=url_or_none)
  2862. if not url:
  2863. self.report_warning(f'Unable to mark {label}watched')
  2864. return
  2865. parsed_url = urllib.parse.urlparse(url)
  2866. qs = urllib.parse.parse_qs(parsed_url.query)
  2867. # cpn generation algorithm is reverse engineered from base.js.
  2868. # In fact it works even with dummy cpn.
  2869. CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
  2870. cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
  2871. # # more consistent results setting it to right before the end
  2872. video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
  2873. qs.update({
  2874. 'ver': ['2'],
  2875. 'cpn': [cpn],
  2876. 'cmt': video_length,
  2877. 'el': 'detailpage', # otherwise defaults to "shorts"
  2878. })
  2879. if is_full:
  2880. # these seem to mark watchtime "history" in the real world
  2881. # they're required, so send in a single value
  2882. qs.update({
  2883. 'st': 0,
  2884. 'et': video_length,
  2885. })
  2886. url = urllib.parse.urlunparse(
  2887. parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
  2888. self._download_webpage(
  2889. url, video_id, f'Marking {label}watched',
  2890. 'Unable to mark watched', fatal=False)
  2891. @classmethod
  2892. def _extract_from_webpage(cls, url, webpage):
  2893. # Invidious Instances
  2894. # https://github.com/hypervideo/hypervideo/issues/195
  2895. # https://github.com/iv-org/invidious/pull/1730
  2896. mobj = re.search(
  2897. r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
  2898. webpage)
  2899. if mobj:
  2900. yield cls.url_result(mobj.group('url'), cls)
  2901. raise cls.StopExtraction()
  2902. yield from super()._extract_from_webpage(url, webpage)
  2903. # lazyYT YouTube embed
  2904. for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
  2905. yield cls.url_result(unescapeHTML(id_), cls, id_)
  2906. # Wordpress "YouTube Video Importer" plugin
  2907. for m in re.findall(r'''(?x)<div[^>]+
  2908. class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
  2909. data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
  2910. yield cls.url_result(m[-1], cls, m[-1])
  2911. @classmethod
  2912. def extract_id(cls, url):
  2913. video_id = cls.get_temp_id(url)
  2914. if not video_id:
  2915. raise ExtractorError(f'Invalid URL: {url}')
  2916. return video_id
  2917. def _extract_chapters_from_json(self, data, duration):
  2918. chapter_list = traverse_obj(
  2919. data, (
  2920. 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
  2921. 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
  2922. ), expected_type=list)
  2923. return self._extract_chapters(
  2924. chapter_list,
  2925. chapter_time=lambda chapter: float_or_none(
  2926. traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
  2927. chapter_title=lambda chapter: traverse_obj(
  2928. chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
  2929. duration=duration)
  2930. def _extract_chapters_from_engagement_panel(self, data, duration):
  2931. content_list = traverse_obj(
  2932. data,
  2933. ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
  2934. expected_type=list, default=[])
  2935. chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
  2936. chapter_title = lambda chapter: self._get_text(chapter, 'title')
  2937. return next(filter(None, (
  2938. self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
  2939. chapter_time, chapter_title, duration)
  2940. for contents in content_list)), [])
  2941. def _extract_chapters_from_description(self, description, duration):
  2942. duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
  2943. sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
  2944. return self._extract_chapters(
  2945. re.findall(sep_re % (duration_re, r'.+?'), description or ''),
  2946. chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
  2947. duration=duration, strict=False) or self._extract_chapters(
  2948. re.findall(sep_re % (r'.+?', duration_re), description or ''),
  2949. chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
  2950. duration=duration, strict=False)
  2951. def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
  2952. if not duration:
  2953. return
  2954. chapter_list = [{
  2955. 'start_time': chapter_time(chapter),
  2956. 'title': chapter_title(chapter),
  2957. } for chapter in chapter_list or []]
  2958. if not strict:
  2959. chapter_list.sort(key=lambda c: c['start_time'] or 0)
  2960. chapters = [{'start_time': 0}]
  2961. for idx, chapter in enumerate(chapter_list):
  2962. if chapter['start_time'] is None:
  2963. self.report_warning(f'Incomplete chapter {idx}')
  2964. elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
  2965. chapters.append(chapter)
  2966. elif chapter not in chapters:
  2967. self.report_warning(
  2968. f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
  2969. return chapters[1:]
  2970. def _extract_comment(self, comment_renderer, parent=None):
  2971. comment_id = comment_renderer.get('commentId')
  2972. if not comment_id:
  2973. return
  2974. text = self._get_text(comment_renderer, 'contentText')
  2975. # Timestamp is an estimate calculated from the current time and time_text
  2976. time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
  2977. timestamp = self._parse_time_text(time_text)
  2978. author = self._get_text(comment_renderer, 'authorText')
  2979. author_id = try_get(comment_renderer,
  2980. lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
  2981. votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
  2982. lambda x: x['likeCount']), str)) or 0
  2983. author_thumbnail = try_get(comment_renderer,
  2984. lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
  2985. author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
  2986. is_favorited = 'creatorHeart' in (try_get(
  2987. comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
  2988. return {
  2989. 'id': comment_id,
  2990. 'text': text,
  2991. 'timestamp': timestamp,
  2992. 'time_text': time_text,
  2993. 'like_count': votes,
  2994. 'is_favorited': is_favorited,
  2995. 'author': author,
  2996. 'author_id': author_id,
  2997. 'author_thumbnail': author_thumbnail,
  2998. 'author_is_uploader': author_is_uploader,
  2999. 'parent': parent or 'root'
  3000. }
  3001. def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
  3002. get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
  3003. def extract_header(contents):
  3004. _continuation = None
  3005. for content in contents:
  3006. comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
  3007. expected_comment_count = self._get_count(
  3008. comments_header_renderer, 'countText', 'commentsCount')
  3009. if expected_comment_count:
  3010. tracker['est_total'] = expected_comment_count
  3011. self.to_screen(f'Downloading ~{expected_comment_count} comments')
  3012. comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
  3013. sort_menu_item = try_get(
  3014. comments_header_renderer,
  3015. lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
  3016. sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
  3017. _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
  3018. if not _continuation:
  3019. continue
  3020. sort_text = str_or_none(sort_menu_item.get('title'))
  3021. if not sort_text:
  3022. sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
  3023. self.to_screen('Sorting comments by %s' % sort_text.lower())
  3024. break
  3025. return _continuation
  3026. def extract_thread(contents):
  3027. if not parent:
  3028. tracker['current_page_thread'] = 0
  3029. for content in contents:
  3030. if not parent and tracker['total_parent_comments'] >= max_parents:
  3031. yield
  3032. comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
  3033. comment_renderer = get_first(
  3034. (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
  3035. expected_type=dict, default={})
  3036. comment = self._extract_comment(comment_renderer, parent)
  3037. if not comment:
  3038. continue
  3039. tracker['running_total'] += 1
  3040. tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
  3041. yield comment
  3042. # Attempt to get the replies
  3043. comment_replies_renderer = try_get(
  3044. comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
  3045. if comment_replies_renderer:
  3046. tracker['current_page_thread'] += 1
  3047. comment_entries_iter = self._comment_entries(
  3048. comment_replies_renderer, ytcfg, video_id,
  3049. parent=comment.get('id'), tracker=tracker)
  3050. yield from itertools.islice(comment_entries_iter, min(
  3051. max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
  3052. # Keeps track of counts across recursive calls
  3053. if not tracker:
  3054. tracker = dict(
  3055. running_total=0,
  3056. est_total=0,
  3057. current_page_thread=0,
  3058. total_parent_comments=0,
  3059. total_reply_comments=0)
  3060. # TODO: Deprecated
  3061. # YouTube comments have a max depth of 2
  3062. max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
  3063. if max_depth:
  3064. self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
  3065. 'Set max replies in the max-comments extractor argument instead')
  3066. if max_depth == 1 and parent:
  3067. return
  3068. max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
  3069. lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
  3070. continuation = self._extract_continuation(root_continuation_data)
  3071. response = None
  3072. is_forced_continuation = False
  3073. is_first_continuation = parent is None
  3074. if is_first_continuation and not continuation:
  3075. # Sometimes you can get comments by generating the continuation yourself,
  3076. # even if YouTube initially reports them being disabled - e.g. stories comments.
  3077. # Note: if the comment section is actually disabled, YouTube may return a response with
  3078. # required check_get_keys missing. So we will disable that check initially in this case.
  3079. continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
  3080. is_forced_continuation = True
  3081. for page_num in itertools.count(0):
  3082. if not continuation:
  3083. break
  3084. headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
  3085. comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
  3086. if page_num == 0:
  3087. if is_first_continuation:
  3088. note_prefix = 'Downloading comment section API JSON'
  3089. else:
  3090. note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
  3091. tracker['current_page_thread'], comment_prog_str)
  3092. else:
  3093. note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
  3094. ' ' if parent else '', ' replies' if parent else '',
  3095. page_num, comment_prog_str)
  3096. try:
  3097. response = self._extract_response(
  3098. item_id=None, query=continuation,
  3099. ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
  3100. check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
  3101. except ExtractorError as e:
  3102. # Ignore incomplete data error for replies if retries didn't work.
  3103. # This is to allow any other parent comments and comment threads to be downloaded.
  3104. # See: https://github.com/hypervideo/hypervideo/issues/4669
  3105. if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
  3106. self.report_warning(
  3107. 'Received incomplete data for a comment reply thread and retrying did not help. '
  3108. 'Ignoring to let other comments be downloaded.')
  3109. else:
  3110. raise
  3111. is_forced_continuation = False
  3112. continuation_contents = traverse_obj(
  3113. response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
  3114. continuation = None
  3115. for continuation_section in continuation_contents:
  3116. continuation_items = traverse_obj(
  3117. continuation_section,
  3118. (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
  3119. get_all=False, expected_type=list) or []
  3120. if is_first_continuation:
  3121. continuation = extract_header(continuation_items)
  3122. is_first_continuation = False
  3123. if continuation:
  3124. break
  3125. continue
  3126. for entry in extract_thread(continuation_items):
  3127. if not entry:
  3128. return
  3129. yield entry
  3130. continuation = self._extract_continuation({'contents': continuation_items})
  3131. if continuation:
  3132. break
  3133. message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
  3134. if message and not parent and tracker['running_total'] == 0:
  3135. self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
  3136. raise self.CommentsDisabled
  3137. @staticmethod
  3138. def _generate_comment_continuation(video_id):
  3139. """
  3140. Generates initial comment section continuation token from given video id
  3141. """
  3142. token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
  3143. return base64.b64encode(token.encode()).decode()
  3144. def _get_comments(self, ytcfg, video_id, contents, webpage):
  3145. """Entry for comment extraction"""
  3146. def _real_comment_extract(contents):
  3147. renderer = next((
  3148. item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
  3149. if item.get('sectionIdentifier') == 'comment-item-section'), None)
  3150. yield from self._comment_entries(renderer, ytcfg, video_id)
  3151. max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
  3152. return itertools.islice(_real_comment_extract(contents), 0, max_comments)
  3153. @staticmethod
  3154. def _get_checkok_params():
  3155. return {'contentCheckOk': True, 'racyCheckOk': True}
  3156. @classmethod
  3157. def _generate_player_context(cls, sts=None):
  3158. context = {
  3159. 'html5Preference': 'HTML5_PREF_WANTS',
  3160. }
  3161. if sts is not None:
  3162. context['signatureTimestamp'] = sts
  3163. return {
  3164. 'playbackContext': {
  3165. 'contentPlaybackContext': context
  3166. },
  3167. **cls._get_checkok_params()
  3168. }
  3169. @staticmethod
  3170. def _is_agegated(player_response):
  3171. if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
  3172. return True
  3173. reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
  3174. AGE_GATE_REASONS = (
  3175. 'confirm your age', 'age-restricted', 'inappropriate', # reason
  3176. 'age_verification_required', 'age_check_required', # status
  3177. )
  3178. return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
  3179. @staticmethod
  3180. def _is_unplayable(player_response):
  3181. return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
  3182. _STORY_PLAYER_PARAMS = '8AEB'
  3183. def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
  3184. session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
  3185. syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
  3186. sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
  3187. headers = self.generate_api_headers(
  3188. ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
  3189. yt_query = {
  3190. 'videoId': video_id,
  3191. }
  3192. if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
  3193. yt_query['params'] = self._STORY_PLAYER_PARAMS
  3194. yt_query.update(self._generate_player_context(sts))
  3195. return self._extract_response(
  3196. item_id=video_id, ep='player', query=yt_query,
  3197. ytcfg=player_ytcfg, headers=headers, fatal=True,
  3198. default_client=client,
  3199. note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
  3200. ) or None
  3201. def _get_requested_clients(self, url, smuggled_data):
  3202. requested_clients = []
  3203. default = ['android', 'web']
  3204. allowed_clients = sorted(
  3205. (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
  3206. key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
  3207. for client in self._configuration_arg('player_client'):
  3208. if client in allowed_clients:
  3209. requested_clients.append(client)
  3210. elif client == 'default':
  3211. requested_clients.extend(default)
  3212. elif client == 'all':
  3213. requested_clients.extend(allowed_clients)
  3214. else:
  3215. self.report_warning(f'Skipping unsupported client {client}')
  3216. if not requested_clients:
  3217. requested_clients = default
  3218. if smuggled_data.get('is_music_url') or self.is_music_url(url):
  3219. requested_clients.extend(
  3220. f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
  3221. return orderedSet(requested_clients)
  3222. def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
  3223. initial_pr = None
  3224. if webpage:
  3225. initial_pr = self._search_json(
  3226. self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
  3227. all_clients = set(clients)
  3228. clients = clients[::-1]
  3229. prs = []
  3230. def append_client(*client_names):
  3231. """ Append the first client name that exists but not already used """
  3232. for client_name in client_names:
  3233. actual_client = _split_innertube_client(client_name)[0]
  3234. if actual_client in INNERTUBE_CLIENTS:
  3235. if actual_client not in all_clients:
  3236. clients.append(client_name)
  3237. all_clients.add(actual_client)
  3238. return
  3239. # Android player_response does not have microFormats which are needed for
  3240. # extraction of some data. So we return the initial_pr with formats
  3241. # stripped out even if not requested by the user
  3242. # See: https://github.com/hypervideo/hypervideo/issues/501
  3243. if initial_pr:
  3244. pr = dict(initial_pr)
  3245. pr['streamingData'] = None
  3246. prs.append(pr)
  3247. last_error = None
  3248. tried_iframe_fallback = False
  3249. player_url = None
  3250. while clients:
  3251. client, base_client, variant = _split_innertube_client(clients.pop())
  3252. player_ytcfg = master_ytcfg if client == 'web' else {}
  3253. if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
  3254. player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
  3255. player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
  3256. require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
  3257. if 'js' in self._configuration_arg('player_skip'):
  3258. require_js_player = False
  3259. player_url = None
  3260. if not player_url and not tried_iframe_fallback and require_js_player:
  3261. player_url = self._download_player_url(video_id)
  3262. tried_iframe_fallback = True
  3263. try:
  3264. pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
  3265. client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
  3266. except ExtractorError as e:
  3267. if last_error:
  3268. self.report_warning(last_error)
  3269. last_error = e
  3270. continue
  3271. if pr:
  3272. # YouTube may return a different video player response than expected.
  3273. # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
  3274. pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
  3275. if pr_video_id and pr_video_id != video_id:
  3276. self.report_warning(
  3277. f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
  3278. else:
  3279. prs.append(pr)
  3280. # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
  3281. if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
  3282. append_client(f'{base_client}_creator')
  3283. elif self._is_agegated(pr):
  3284. if variant == 'tv_embedded':
  3285. append_client(f'{base_client}_embedded')
  3286. elif not variant:
  3287. append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
  3288. if last_error:
  3289. if not len(prs):
  3290. raise last_error
  3291. self.report_warning(last_error)
  3292. return prs, player_url
  3293. def _needs_live_processing(self, live_status, duration):
  3294. if (live_status == 'is_live' and self.get_param('live_from_start')
  3295. or live_status == 'post_live' and (duration or 0) > 4 * 3600):
  3296. return live_status
  3297. def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
  3298. itags, stream_ids = collections.defaultdict(set), []
  3299. itag_qualities, res_qualities = {}, {0: None}
  3300. q = qualities([
  3301. # Normally tiny is the smallest video-only formats. But
  3302. # audio-only formats with unknown quality may get tagged as tiny
  3303. 'tiny',
  3304. 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
  3305. 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
  3306. ])
  3307. streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
  3308. for fmt in streaming_formats:
  3309. if fmt.get('targetDurationSec'):
  3310. continue
  3311. itag = str_or_none(fmt.get('itag'))
  3312. audio_track = fmt.get('audioTrack') or {}
  3313. stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
  3314. if stream_id in stream_ids:
  3315. continue
  3316. quality = fmt.get('quality')
  3317. height = int_or_none(fmt.get('height'))
  3318. if quality == 'tiny' or not quality:
  3319. quality = fmt.get('audioQuality', '').lower() or quality
  3320. # The 3gp format (17) in android client has a quality of "small",
  3321. # but is actually worse than other formats
  3322. if itag == '17':
  3323. quality = 'tiny'
  3324. if quality:
  3325. if itag:
  3326. itag_qualities[itag] = quality
  3327. if height:
  3328. res_qualities[height] = quality
  3329. # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
  3330. # (adding `&sq=0` to the URL) and parsing emsg box to determine the
  3331. # number of fragment that would subsequently requested with (`&sq=N`)
  3332. if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
  3333. continue
  3334. fmt_url = fmt.get('url')
  3335. if not fmt_url:
  3336. sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
  3337. fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
  3338. encrypted_sig = try_get(sc, lambda x: x['s'][0])
  3339. if not all((sc, fmt_url, player_url, encrypted_sig)):
  3340. continue
  3341. try:
  3342. fmt_url += '&%s=%s' % (
  3343. traverse_obj(sc, ('sp', -1)) or 'signature',
  3344. self._decrypt_signature(encrypted_sig, video_id, player_url)
  3345. )
  3346. except ExtractorError as e:
  3347. self.report_warning('Signature extraction failed: Some formats may be missing',
  3348. video_id=video_id, only_once=True)
  3349. self.write_debug(e, only_once=True)
  3350. continue
  3351. query = parse_qs(fmt_url)
  3352. throttled = False
  3353. if query.get('n'):
  3354. try:
  3355. decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
  3356. fmt_url = update_url_query(fmt_url, {
  3357. 'n': decrypt_nsig(query['n'][0], video_id, player_url)
  3358. })
  3359. except ExtractorError as e:
  3360. phantomjs_hint = ''
  3361. if isinstance(e, JSInterpreter.Exception):
  3362. phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
  3363. f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
  3364. if player_url:
  3365. self.report_warning(
  3366. f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
  3367. f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
  3368. self.write_debug(e, only_once=True)
  3369. else:
  3370. self.report_warning(
  3371. 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
  3372. video_id=video_id, only_once=True)
  3373. throttled = True
  3374. tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
  3375. language_preference = (
  3376. 10 if audio_track.get('audioIsDefault') and 10
  3377. else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
  3378. else -1)
  3379. # Some formats may have much smaller duration than others (possibly damaged during encoding)
  3380. # E.g. 2-nOtRESiUc Ref: https://github.com/hypervideo/hypervideo/issues/2823
  3381. # Make sure to avoid false positives with small duration differences.
  3382. # E.g. __2ABJjxzNo, ySuUZEjARPY
  3383. is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
  3384. if is_damaged:
  3385. self.report_warning(
  3386. f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
  3387. dct = {
  3388. 'asr': int_or_none(fmt.get('audioSampleRate')),
  3389. 'filesize': int_or_none(fmt.get('contentLength')),
  3390. 'format_id': itag,
  3391. 'format_note': join_nonempty(
  3392. '%s%s' % (audio_track.get('displayName') or '',
  3393. ' (default)' if language_preference > 0 else ''),
  3394. fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
  3395. try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
  3396. try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
  3397. throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
  3398. # Format 22 is likely to be damaged. See https://github.com/hypervideo/hypervideo/issues/3372
  3399. 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
  3400. 'fps': int_or_none(fmt.get('fps')) or None,
  3401. 'audio_channels': fmt.get('audioChannels'),
  3402. 'height': height,
  3403. 'quality': q(quality),
  3404. 'has_drm': bool(fmt.get('drmFamilies')),
  3405. 'tbr': tbr,
  3406. 'url': fmt_url,
  3407. 'width': int_or_none(fmt.get('width')),
  3408. 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
  3409. 'desc' if language_preference < -1 else ''),
  3410. 'language_preference': language_preference,
  3411. # Strictly de-prioritize damaged and 3gp formats
  3412. 'preference': -10 if is_damaged else -2 if itag == '17' else None,
  3413. }
  3414. mime_mobj = re.match(
  3415. r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
  3416. if mime_mobj:
  3417. dct['ext'] = mimetype2ext(mime_mobj.group(1))
  3418. dct.update(parse_codecs(mime_mobj.group(2)))
  3419. no_audio = dct.get('acodec') == 'none'
  3420. no_video = dct.get('vcodec') == 'none'
  3421. if no_audio:
  3422. dct['vbr'] = tbr
  3423. if no_video:
  3424. dct['abr'] = tbr
  3425. if no_audio or no_video:
  3426. dct['downloader_options'] = {
  3427. # Youtube throttles chunks >~10M
  3428. 'http_chunk_size': 10485760,
  3429. }
  3430. if dct.get('ext'):
  3431. dct['container'] = dct['ext'] + '_dash'
  3432. if itag:
  3433. itags[itag].add(('https', dct.get('language')))
  3434. stream_ids.append(stream_id)
  3435. yield dct
  3436. needs_live_processing = self._needs_live_processing(live_status, duration)
  3437. skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
  3438. skip_manifests = set(self._configuration_arg('skip'))
  3439. if (not self.get_param('youtube_include_hls_manifest', True)
  3440. or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
  3441. or needs_live_processing and skip_bad_formats):
  3442. skip_manifests.add('hls')
  3443. if not self.get_param('youtube_include_dash_manifest', True):
  3444. skip_manifests.add('dash')
  3445. if self._configuration_arg('include_live_dash'):
  3446. self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
  3447. 'Use include_incomplete_formats extractor argument instead')
  3448. elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
  3449. skip_manifests.add('dash')
  3450. def process_manifest_format(f, proto, itag):
  3451. key = (proto, f.get('language'))
  3452. if key in itags[itag]:
  3453. return False
  3454. itags[itag].add(key)
  3455. if any(p != proto for p, _ in itags[itag]):
  3456. f['format_id'] = f'{itag}-{proto}'
  3457. elif itag:
  3458. f['format_id'] = itag
  3459. f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
  3460. if f['quality'] == -1 and f.get('height'):
  3461. f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
  3462. return True
  3463. subtitles = {}
  3464. for sd in streaming_data:
  3465. hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
  3466. if hls_manifest_url:
  3467. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  3468. hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
  3469. subtitles = self._merge_subtitles(subs, subtitles)
  3470. for f in fmts:
  3471. if process_manifest_format(f, 'hls', self._search_regex(
  3472. r'/itag/(\d+)', f['url'], 'itag', default=None)):
  3473. yield f
  3474. dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
  3475. if dash_manifest_url:
  3476. formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
  3477. subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
  3478. for f in formats:
  3479. if process_manifest_format(f, 'dash', f['format_id']):
  3480. f['filesize'] = int_or_none(self._search_regex(
  3481. r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
  3482. if needs_live_processing:
  3483. f['is_from_start'] = True
  3484. yield f
  3485. yield subtitles
  3486. def _extract_storyboard(self, player_responses, duration):
  3487. spec = get_first(
  3488. player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
  3489. base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
  3490. if not base_url:
  3491. return
  3492. L = len(spec) - 1
  3493. for i, args in enumerate(spec):
  3494. args = args.split('#')
  3495. counts = list(map(int_or_none, args[:5]))
  3496. if len(args) != 8 or not all(counts):
  3497. self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
  3498. continue
  3499. width, height, frame_count, cols, rows = counts
  3500. N, sigh = args[6:]
  3501. url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
  3502. fragment_count = frame_count / (cols * rows)
  3503. fragment_duration = duration / fragment_count
  3504. yield {
  3505. 'format_id': f'sb{i}',
  3506. 'format_note': 'storyboard',
  3507. 'ext': 'mhtml',
  3508. 'protocol': 'mhtml',
  3509. 'acodec': 'none',
  3510. 'vcodec': 'none',
  3511. 'url': url,
  3512. 'width': width,
  3513. 'height': height,
  3514. 'fps': frame_count / duration,
  3515. 'rows': rows,
  3516. 'columns': cols,
  3517. 'fragments': [{
  3518. 'url': url.replace('$M', str(j)),
  3519. 'duration': min(fragment_duration, duration - (j * fragment_duration)),
  3520. } for j in range(math.ceil(fragment_count))],
  3521. }
  3522. def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
  3523. webpage = None
  3524. if 'webpage' not in self._configuration_arg('player_skip'):
  3525. query = {'bpctr': '9999999999', 'has_verified': '1'}
  3526. if smuggled_data.get('is_story'):
  3527. query['pp'] = self._STORY_PLAYER_PARAMS
  3528. webpage = self._download_webpage(
  3529. webpage_url, video_id, fatal=False, query=query)
  3530. master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
  3531. player_responses, player_url = self._extract_player_responses(
  3532. self._get_requested_clients(url, smuggled_data),
  3533. video_id, webpage, master_ytcfg, smuggled_data)
  3534. return webpage, master_ytcfg, player_responses, player_url
  3535. def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
  3536. live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
  3537. is_live = get_first(video_details, 'isLive')
  3538. if is_live is None:
  3539. is_live = get_first(live_broadcast_details, 'isLiveNow')
  3540. live_content = get_first(video_details, 'isLiveContent')
  3541. is_upcoming = get_first(video_details, 'isUpcoming')
  3542. post_live = get_first(video_details, 'isPostLiveDvr')
  3543. live_status = ('post_live' if post_live
  3544. else 'is_live' if is_live
  3545. else 'is_upcoming' if is_upcoming
  3546. else 'was_live' if live_content
  3547. else 'not_live' if False in (is_live, live_content)
  3548. else None)
  3549. streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
  3550. *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
  3551. return live_broadcast_details, live_status, streaming_data, formats, subtitles
  3552. def _real_extract(self, url):
  3553. url, smuggled_data = unsmuggle_url(url, {})
  3554. video_id = self._match_id(url)
  3555. base_url = self.http_scheme() + '//www.youtube.com/'
  3556. webpage_url = base_url + 'watch?v=' + video_id
  3557. webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
  3558. playability_statuses = traverse_obj(
  3559. player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
  3560. trailer_video_id = get_first(
  3561. playability_statuses,
  3562. ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
  3563. expected_type=str)
  3564. if trailer_video_id:
  3565. return self.url_result(
  3566. trailer_video_id, self.ie_key(), trailer_video_id)
  3567. search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
  3568. if webpage else (lambda x: None))
  3569. video_details = traverse_obj(
  3570. player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
  3571. microformats = traverse_obj(
  3572. player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
  3573. expected_type=dict, default=[])
  3574. translated_title = self._get_text(microformats, (..., 'title'))
  3575. video_title = (self._preferred_lang and translated_title
  3576. or get_first(video_details, 'title') # primary
  3577. or translated_title
  3578. or search_meta(['og:title', 'twitter:title', 'title']))
  3579. translated_description = self._get_text(microformats, (..., 'description'))
  3580. original_description = get_first(video_details, 'shortDescription')
  3581. video_description = (
  3582. self._preferred_lang and translated_description
  3583. # If original description is blank, it will be an empty string.
  3584. # Do not prefer translated description in this case.
  3585. or original_description if original_description is not None else translated_description)
  3586. multifeed_metadata_list = get_first(
  3587. player_responses,
  3588. ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
  3589. expected_type=str)
  3590. if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
  3591. if self.get_param('noplaylist'):
  3592. self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
  3593. else:
  3594. entries = []
  3595. feed_ids = []
  3596. for feed in multifeed_metadata_list.split(','):
  3597. # Unquote should take place before split on comma (,) since textual
  3598. # fields may contain comma as well (see
  3599. # https://github.com/ytdl-org/youtube-dl/issues/8536)
  3600. feed_data = urllib.parse.parse_qs(
  3601. urllib.parse.unquote_plus(feed))
  3602. def feed_entry(name):
  3603. return try_get(
  3604. feed_data, lambda x: x[name][0], str)
  3605. feed_id = feed_entry('id')
  3606. if not feed_id:
  3607. continue
  3608. feed_title = feed_entry('title')
  3609. title = video_title
  3610. if feed_title:
  3611. title += ' (%s)' % feed_title
  3612. entries.append({
  3613. '_type': 'url_transparent',
  3614. 'ie_key': 'Youtube',
  3615. 'url': smuggle_url(
  3616. '%swatch?v=%s' % (base_url, feed_data['id'][0]),
  3617. {'force_singlefeed': True}),
  3618. 'title': title,
  3619. })
  3620. feed_ids.append(feed_id)
  3621. self.to_screen(
  3622. 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
  3623. % (', '.join(feed_ids), video_id))
  3624. return self.playlist_result(
  3625. entries, video_id, video_title, video_description)
  3626. duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
  3627. or int_or_none(get_first(microformats, 'lengthSeconds'))
  3628. or parse_duration(search_meta('duration')) or None)
  3629. live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
  3630. self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
  3631. if live_status == 'post_live':
  3632. self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
  3633. if not formats:
  3634. if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
  3635. self.report_drm(video_id)
  3636. pemr = get_first(
  3637. playability_statuses,
  3638. ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
  3639. reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
  3640. subreason = clean_html(self._get_text(pemr, 'subreason') or '')
  3641. if subreason:
  3642. if subreason == 'The uploader has not made this video available in your country.':
  3643. countries = get_first(microformats, 'availableCountries')
  3644. if not countries:
  3645. regions_allowed = search_meta('regionsAllowed')
  3646. countries = regions_allowed.split(',') if regions_allowed else None
  3647. self.raise_geo_restricted(subreason, countries, metadata_available=True)
  3648. reason += f'. {subreason}'
  3649. if reason:
  3650. self.raise_no_formats(reason, expected=True)
  3651. keywords = get_first(video_details, 'keywords', expected_type=list) or []
  3652. if not keywords and webpage:
  3653. keywords = [
  3654. unescapeHTML(m.group('content'))
  3655. for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
  3656. for keyword in keywords:
  3657. if keyword.startswith('yt:stretch='):
  3658. mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
  3659. if mobj:
  3660. # NB: float is intentional for forcing float division
  3661. w, h = (float(v) for v in mobj.groups())
  3662. if w > 0 and h > 0:
  3663. ratio = w / h
  3664. for f in formats:
  3665. if f.get('vcodec') != 'none':
  3666. f['stretched_ratio'] = ratio
  3667. break
  3668. thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
  3669. thumbnail_url = search_meta(['og:image', 'twitter:image'])
  3670. if thumbnail_url:
  3671. thumbnails.append({
  3672. 'url': thumbnail_url,
  3673. })
  3674. original_thumbnails = thumbnails.copy()
  3675. # The best resolution thumbnails sometimes does not appear in the webpage
  3676. # See: https://github.com/hypervideo/hypervideo/issues/340
  3677. # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
  3678. thumbnail_names = [
  3679. # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
  3680. # in resolution, these are not the custom thumbnail. So de-prioritize them
  3681. 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
  3682. 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
  3683. ]
  3684. n_thumbnail_names = len(thumbnail_names)
  3685. thumbnails.extend({
  3686. 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
  3687. video_id=video_id, name=name, ext=ext,
  3688. webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
  3689. } for name in thumbnail_names for ext in ('webp', 'jpg'))
  3690. for thumb in thumbnails:
  3691. i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
  3692. thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
  3693. self._remove_duplicate_formats(thumbnails)
  3694. self._downloader._sort_thumbnails(original_thumbnails)
  3695. category = get_first(microformats, 'category') or search_meta('genre')
  3696. channel_id = str_or_none(
  3697. get_first(video_details, 'channelId')
  3698. or get_first(microformats, 'externalChannelId')
  3699. or search_meta('channelId'))
  3700. owner_profile_url = get_first(microformats, 'ownerProfileUrl')
  3701. live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
  3702. live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
  3703. if not duration and live_end_time and live_start_time:
  3704. duration = live_end_time - live_start_time
  3705. needs_live_processing = self._needs_live_processing(live_status, duration)
  3706. def is_bad_format(fmt):
  3707. if needs_live_processing and not fmt.get('is_from_start'):
  3708. return True
  3709. elif (live_status == 'is_live' and needs_live_processing != 'is_live'
  3710. and fmt.get('protocol') == 'http_dash_segments'):
  3711. return True
  3712. for fmt in filter(is_bad_format, formats):
  3713. fmt['preference'] = (fmt.get('preference') or -1) - 10
  3714. fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
  3715. if needs_live_processing:
  3716. self._prepare_live_from_start_formats(
  3717. formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
  3718. formats.extend(self._extract_storyboard(player_responses, duration))
  3719. info = {
  3720. 'id': video_id,
  3721. 'title': video_title,
  3722. 'formats': formats,
  3723. 'thumbnails': thumbnails,
  3724. # The best thumbnail that we are sure exists. Prevents unnecessary
  3725. # URL checking if user don't care about getting the best possible thumbnail
  3726. 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
  3727. 'description': video_description,
  3728. 'uploader': get_first(video_details, 'author'),
  3729. 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
  3730. 'uploader_url': owner_profile_url,
  3731. 'channel_id': channel_id,
  3732. 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
  3733. 'duration': duration,
  3734. 'view_count': int_or_none(
  3735. get_first((video_details, microformats), (..., 'viewCount'))
  3736. or search_meta('interactionCount')),
  3737. 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
  3738. 'age_limit': 18 if (
  3739. get_first(microformats, 'isFamilySafe') is False
  3740. or search_meta('isFamilyFriendly') == 'false'
  3741. or search_meta('og:restrictions:age') == '18+') else 0,
  3742. 'webpage_url': webpage_url,
  3743. 'categories': [category] if category else None,
  3744. 'tags': keywords,
  3745. 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
  3746. 'live_status': live_status,
  3747. 'release_timestamp': live_start_time,
  3748. '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
  3749. 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
  3750. }
  3751. subtitles = {}
  3752. pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
  3753. if pctr:
  3754. def get_lang_code(track):
  3755. return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
  3756. or track.get('languageCode'))
  3757. # Converted into dicts to remove duplicates
  3758. captions = {
  3759. get_lang_code(sub): sub
  3760. for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
  3761. translation_languages = {
  3762. lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
  3763. for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
  3764. def process_language(container, base_url, lang_code, sub_name, query):
  3765. lang_subs = container.setdefault(lang_code, [])
  3766. for fmt in self._SUBTITLE_FORMATS:
  3767. query.update({
  3768. 'fmt': fmt,
  3769. })
  3770. lang_subs.append({
  3771. 'ext': fmt,
  3772. 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
  3773. 'name': sub_name,
  3774. })
  3775. # NB: Constructing the full subtitle dictionary is slow
  3776. get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
  3777. self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
  3778. for lang_code, caption_track in captions.items():
  3779. base_url = caption_track.get('baseUrl')
  3780. orig_lang = parse_qs(base_url).get('lang', [None])[-1]
  3781. if not base_url:
  3782. continue
  3783. lang_name = self._get_text(caption_track, 'name', max_runs=1)
  3784. if caption_track.get('kind') != 'asr':
  3785. if not lang_code:
  3786. continue
  3787. process_language(
  3788. subtitles, base_url, lang_code, lang_name, {})
  3789. if not caption_track.get('isTranslatable'):
  3790. continue
  3791. for trans_code, trans_name in translation_languages.items():
  3792. if not trans_code:
  3793. continue
  3794. orig_trans_code = trans_code
  3795. if caption_track.get('kind') != 'asr' and trans_code != 'und':
  3796. if not get_translated_subs:
  3797. continue
  3798. trans_code += f'-{lang_code}'
  3799. trans_name += format_field(lang_name, None, ' from %s')
  3800. # Add an "-orig" label to the original language so that it can be distinguished.
  3801. # The subs are returned without "-orig" as well for compatibility
  3802. if lang_code == f'a-{orig_trans_code}':
  3803. process_language(
  3804. automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
  3805. # Setting tlang=lang returns damaged subtitles.
  3806. process_language(automatic_captions, base_url, trans_code, trans_name,
  3807. {} if orig_lang == orig_trans_code else {'tlang': trans_code})
  3808. info['automatic_captions'] = automatic_captions
  3809. info['subtitles'] = subtitles
  3810. parsed_url = urllib.parse.urlparse(url)
  3811. for component in [parsed_url.fragment, parsed_url.query]:
  3812. query = urllib.parse.parse_qs(component)
  3813. for k, v in query.items():
  3814. for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
  3815. d_k += '_time'
  3816. if d_k not in info and k in s_ks:
  3817. info[d_k] = parse_duration(query[k][0])
  3818. # Youtube Music Auto-generated description
  3819. if video_description:
  3820. mobj = re.search(
  3821. r'''(?xs)
  3822. (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
  3823. (?P<album>[^\n]+)
  3824. (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
  3825. (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
  3826. (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
  3827. .+\nAuto-generated\ by\ YouTube\.\s*$
  3828. ''', video_description)
  3829. if mobj:
  3830. release_year = mobj.group('release_year')
  3831. release_date = mobj.group('release_date')
  3832. if release_date:
  3833. release_date = release_date.replace('-', '')
  3834. if not release_year:
  3835. release_year = release_date[:4]
  3836. info.update({
  3837. 'album': mobj.group('album'.strip()),
  3838. 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
  3839. 'track': mobj.group('track').strip(),
  3840. 'release_date': release_date,
  3841. 'release_year': int_or_none(release_year),
  3842. })
  3843. initial_data = None
  3844. if webpage:
  3845. initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
  3846. if not initial_data:
  3847. query = {'videoId': video_id}
  3848. query.update(self._get_checkok_params())
  3849. initial_data = self._extract_response(
  3850. item_id=video_id, ep='next', fatal=False,
  3851. ytcfg=master_ytcfg, query=query,
  3852. headers=self.generate_api_headers(ytcfg=master_ytcfg),
  3853. note='Downloading initial data API JSON')
  3854. info['comment_count'] = traverse_obj(initial_data, (
  3855. 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
  3856. 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
  3857. ), (
  3858. 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
  3859. 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
  3860. ), expected_type=int_or_none, get_all=False)
  3861. try: # This will error if there is no livechat
  3862. initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
  3863. except (KeyError, IndexError, TypeError):
  3864. pass
  3865. else:
  3866. info.setdefault('subtitles', {})['live_chat'] = [{
  3867. # url is needed to set cookies
  3868. 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
  3869. 'video_id': video_id,
  3870. 'ext': 'json',
  3871. 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
  3872. else 'youtube_live_chat_replay'),
  3873. }]
  3874. if initial_data:
  3875. info['chapters'] = (
  3876. self._extract_chapters_from_json(initial_data, duration)
  3877. or self._extract_chapters_from_engagement_panel(initial_data, duration)
  3878. or self._extract_chapters_from_description(video_description, duration)
  3879. or None)
  3880. contents = traverse_obj(
  3881. initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
  3882. expected_type=list, default=[])
  3883. vpir = get_first(contents, 'videoPrimaryInfoRenderer')
  3884. if vpir:
  3885. stl = vpir.get('superTitleLink')
  3886. if stl:
  3887. stl = self._get_text(stl)
  3888. if try_get(
  3889. vpir,
  3890. lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
  3891. info['location'] = stl
  3892. else:
  3893. mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
  3894. if mobj:
  3895. info.update({
  3896. 'series': mobj.group(1),
  3897. 'season_number': int(mobj.group(2)),
  3898. 'episode_number': int(mobj.group(3)),
  3899. })
  3900. for tlb in (try_get(
  3901. vpir,
  3902. lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
  3903. list) or []):
  3904. tbrs = variadic(
  3905. traverse_obj(
  3906. tlb, 'toggleButtonRenderer',
  3907. ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
  3908. default=[]))
  3909. for tbr in tbrs:
  3910. for getter, regex in [(
  3911. lambda x: x['defaultText']['accessibility']['accessibilityData'],
  3912. r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
  3913. lambda x: x['accessibility'],
  3914. lambda x: x['accessibilityData']['accessibilityData'],
  3915. ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
  3916. label = (try_get(tbr, getter, dict) or {}).get('label')
  3917. if label:
  3918. mobj = re.match(regex, label)
  3919. if mobj:
  3920. info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
  3921. break
  3922. sbr_tooltip = try_get(
  3923. vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
  3924. if sbr_tooltip:
  3925. like_count, dislike_count = sbr_tooltip.split(' / ')
  3926. info.update({
  3927. 'like_count': str_to_int(like_count),
  3928. 'dislike_count': str_to_int(dislike_count),
  3929. })
  3930. vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
  3931. if vcr:
  3932. vc = self._get_count(vcr, 'viewCount')
  3933. # Upcoming premieres with waiting count are treated as live here
  3934. if vcr.get('isLive'):
  3935. info['concurrent_view_count'] = vc
  3936. elif info.get('view_count') is None:
  3937. info['view_count'] = vc
  3938. vsir = get_first(contents, 'videoSecondaryInfoRenderer')
  3939. if vsir:
  3940. vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
  3941. info.update({
  3942. 'channel': self._get_text(vor, 'title'),
  3943. 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
  3944. rows = try_get(
  3945. vsir,
  3946. lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
  3947. list) or []
  3948. multiple_songs = False
  3949. for row in rows:
  3950. if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
  3951. multiple_songs = True
  3952. break
  3953. for row in rows:
  3954. mrr = row.get('metadataRowRenderer') or {}
  3955. mrr_title = mrr.get('title')
  3956. if not mrr_title:
  3957. continue
  3958. mrr_title = self._get_text(mrr, 'title')
  3959. mrr_contents_text = self._get_text(mrr, ('contents', 0))
  3960. if mrr_title == 'License':
  3961. info['license'] = mrr_contents_text
  3962. elif not multiple_songs:
  3963. if mrr_title == 'Album':
  3964. info['album'] = mrr_contents_text
  3965. elif mrr_title == 'Artist':
  3966. info['artist'] = mrr_contents_text
  3967. elif mrr_title == 'Song':
  3968. info['track'] = mrr_contents_text
  3969. fallbacks = {
  3970. 'channel': 'uploader',
  3971. 'channel_id': 'uploader_id',
  3972. 'channel_url': 'uploader_url',
  3973. }
  3974. # The upload date for scheduled, live and past live streams / premieres in microformats
  3975. # may be different from the stream date. Although not in UTC, we will prefer it in this case.
  3976. # See: https://github.com/hypervideo/hypervideo/pull/2223#issuecomment-1008485139
  3977. upload_date = (
  3978. unified_strdate(get_first(microformats, 'uploadDate'))
  3979. or unified_strdate(search_meta('uploadDate')))
  3980. if not upload_date or (
  3981. live_status in ('not_live', None)
  3982. and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
  3983. ):
  3984. upload_date = strftime_or_none(
  3985. self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
  3986. info['upload_date'] = upload_date
  3987. for to, frm in fallbacks.items():
  3988. if not info.get(to):
  3989. info[to] = info.get(frm)
  3990. for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
  3991. v = info.get(s_k)
  3992. if v:
  3993. info[d_k] = v
  3994. badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
  3995. is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
  3996. or get_first(video_details, 'isPrivate', expected_type=bool))
  3997. info['availability'] = (
  3998. 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
  3999. else self._availability(
  4000. is_private=is_private,
  4001. needs_premium=(
  4002. self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
  4003. or False if initial_data and is_private is not None else None),
  4004. needs_subscription=(
  4005. self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
  4006. or False if initial_data and is_private is not None else None),
  4007. needs_auth=info['age_limit'] >= 18,
  4008. is_unlisted=None if is_private is None else (
  4009. self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
  4010. or get_first(microformats, 'isUnlisted', expected_type=bool))))
  4011. info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
  4012. self.mark_watched(video_id, player_responses)
  4013. return info
  4014. class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
  4015. @staticmethod
  4016. def passthrough_smuggled_data(func):
  4017. def _smuggle(info, smuggled_data):
  4018. if info.get('_type') not in ('url', 'url_transparent'):
  4019. return info
  4020. if smuggled_data.get('is_music_url'):
  4021. parsed_url = urllib.parse.urlparse(info['url'])
  4022. if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
  4023. smuggled_data.pop('is_music_url')
  4024. info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
  4025. if smuggled_data:
  4026. info['url'] = smuggle_url(info['url'], smuggled_data)
  4027. return info
  4028. @functools.wraps(func)
  4029. def wrapper(self, url):
  4030. url, smuggled_data = unsmuggle_url(url, {})
  4031. if self.is_music_url(url):
  4032. smuggled_data['is_music_url'] = True
  4033. info_dict = func(self, url, smuggled_data)
  4034. if smuggled_data:
  4035. _smuggle(info_dict, smuggled_data)
  4036. if info_dict.get('entries'):
  4037. info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
  4038. return info_dict
  4039. return wrapper
  4040. def _extract_channel_id(self, webpage):
  4041. channel_id = self._html_search_meta(
  4042. 'channelId', webpage, 'channel id', default=None)
  4043. if channel_id:
  4044. return channel_id
  4045. channel_url = self._html_search_meta(
  4046. ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
  4047. 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
  4048. 'twitter:app:url:googleplay'), webpage, 'channel url')
  4049. return self._search_regex(
  4050. r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
  4051. channel_url, 'channel id')
  4052. @staticmethod
  4053. def _extract_basic_item_renderer(item):
  4054. # Modified from _extract_grid_item_renderer
  4055. known_basic_renderers = (
  4056. 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
  4057. )
  4058. for key, renderer in item.items():
  4059. if not isinstance(renderer, dict):
  4060. continue
  4061. elif key in known_basic_renderers:
  4062. return renderer
  4063. elif key.startswith('grid') and key.endswith('Renderer'):
  4064. return renderer
  4065. def _grid_entries(self, grid_renderer):
  4066. for item in grid_renderer['items']:
  4067. if not isinstance(item, dict):
  4068. continue
  4069. renderer = self._extract_basic_item_renderer(item)
  4070. if not isinstance(renderer, dict):
  4071. continue
  4072. title = self._get_text(renderer, 'title')
  4073. # playlist
  4074. playlist_id = renderer.get('playlistId')
  4075. if playlist_id:
  4076. yield self.url_result(
  4077. 'https://www.youtube.com/playlist?list=%s' % playlist_id,
  4078. ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
  4079. video_title=title)
  4080. continue
  4081. # video
  4082. video_id = renderer.get('videoId')
  4083. if video_id:
  4084. yield self._extract_video(renderer)
  4085. continue
  4086. # channel
  4087. channel_id = renderer.get('channelId')
  4088. if channel_id:
  4089. yield self.url_result(
  4090. 'https://www.youtube.com/channel/%s' % channel_id,
  4091. ie=YoutubeTabIE.ie_key(), video_title=title)
  4092. continue
  4093. # generic endpoint URL support
  4094. ep_url = urljoin('https://www.youtube.com/', try_get(
  4095. renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
  4096. str))
  4097. if ep_url:
  4098. for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
  4099. if ie.suitable(ep_url):
  4100. yield self.url_result(
  4101. ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
  4102. break
  4103. def _music_reponsive_list_entry(self, renderer):
  4104. video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
  4105. if video_id:
  4106. return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
  4107. ie=YoutubeIE.ie_key(), video_id=video_id)
  4108. playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
  4109. if playlist_id:
  4110. video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
  4111. if video_id:
  4112. return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
  4113. ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
  4114. return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
  4115. ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
  4116. browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
  4117. if browse_id:
  4118. return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
  4119. ie=YoutubeTabIE.ie_key(), video_id=browse_id)
  4120. def _shelf_entries_from_content(self, shelf_renderer):
  4121. content = shelf_renderer.get('content')
  4122. if not isinstance(content, dict):
  4123. return
  4124. renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
  4125. if renderer:
  4126. # TODO: add support for nested playlists so each shelf is processed
  4127. # as separate playlist
  4128. # TODO: this includes only first N items
  4129. yield from self._grid_entries(renderer)
  4130. renderer = content.get('horizontalListRenderer')
  4131. if renderer:
  4132. # TODO
  4133. pass
  4134. def _shelf_entries(self, shelf_renderer, skip_channels=False):
  4135. ep = try_get(
  4136. shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
  4137. str)
  4138. shelf_url = urljoin('https://www.youtube.com', ep)
  4139. if shelf_url:
  4140. # Skipping links to another channels, note that checking for
  4141. # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
  4142. # will not work
  4143. if skip_channels and '/channels?' in shelf_url:
  4144. return
  4145. title = self._get_text(shelf_renderer, 'title')
  4146. yield self.url_result(shelf_url, video_title=title)
  4147. # Shelf may not contain shelf URL, fallback to extraction from content
  4148. yield from self._shelf_entries_from_content(shelf_renderer)
  4149. def _playlist_entries(self, video_list_renderer):
  4150. for content in video_list_renderer['contents']:
  4151. if not isinstance(content, dict):
  4152. continue
  4153. renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
  4154. if not isinstance(renderer, dict):
  4155. continue
  4156. video_id = renderer.get('videoId')
  4157. if not video_id:
  4158. continue
  4159. yield self._extract_video(renderer)
  4160. def _rich_entries(self, rich_grid_renderer):
  4161. renderer = traverse_obj(
  4162. rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
  4163. video_id = renderer.get('videoId')
  4164. if not video_id:
  4165. return
  4166. yield self._extract_video(renderer)
  4167. def _video_entry(self, video_renderer):
  4168. video_id = video_renderer.get('videoId')
  4169. if video_id:
  4170. return self._extract_video(video_renderer)
  4171. def _hashtag_tile_entry(self, hashtag_tile_renderer):
  4172. url = urljoin('https://youtube.com', traverse_obj(
  4173. hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
  4174. if url:
  4175. return self.url_result(
  4176. url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
  4177. def _post_thread_entries(self, post_thread_renderer):
  4178. post_renderer = try_get(
  4179. post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
  4180. if not post_renderer:
  4181. return
  4182. # video attachment
  4183. video_renderer = try_get(
  4184. post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
  4185. video_id = video_renderer.get('videoId')
  4186. if video_id:
  4187. entry = self._extract_video(video_renderer)
  4188. if entry:
  4189. yield entry
  4190. # playlist attachment
  4191. playlist_id = try_get(
  4192. post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
  4193. if playlist_id:
  4194. yield self.url_result(
  4195. 'https://www.youtube.com/playlist?list=%s' % playlist_id,
  4196. ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
  4197. # inline video links
  4198. runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
  4199. for run in runs:
  4200. if not isinstance(run, dict):
  4201. continue
  4202. ep_url = try_get(
  4203. run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
  4204. if not ep_url:
  4205. continue
  4206. if not YoutubeIE.suitable(ep_url):
  4207. continue
  4208. ep_video_id = YoutubeIE._match_id(ep_url)
  4209. if video_id == ep_video_id:
  4210. continue
  4211. yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
  4212. def _post_thread_continuation_entries(self, post_thread_continuation):
  4213. contents = post_thread_continuation.get('contents')
  4214. if not isinstance(contents, list):
  4215. return
  4216. for content in contents:
  4217. renderer = content.get('backstagePostThreadRenderer')
  4218. if isinstance(renderer, dict):
  4219. yield from self._post_thread_entries(renderer)
  4220. continue
  4221. renderer = content.get('videoRenderer')
  4222. if isinstance(renderer, dict):
  4223. yield self._video_entry(renderer)
  4224. r''' # unused
  4225. def _rich_grid_entries(self, contents):
  4226. for content in contents:
  4227. video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
  4228. if video_renderer:
  4229. entry = self._video_entry(video_renderer)
  4230. if entry:
  4231. yield entry
  4232. '''
  4233. def _report_history_entries(self, renderer):
  4234. for url in traverse_obj(renderer, (
  4235. 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
  4236. 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
  4237. 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
  4238. yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
  4239. def _extract_entries(self, parent_renderer, continuation_list):
  4240. # continuation_list is modified in-place with continuation_list = [continuation_token]
  4241. continuation_list[:] = [None]
  4242. contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
  4243. for content in contents:
  4244. if not isinstance(content, dict):
  4245. continue
  4246. is_renderer = traverse_obj(
  4247. content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
  4248. expected_type=dict)
  4249. if not is_renderer:
  4250. if content.get('richItemRenderer'):
  4251. for entry in self._rich_entries(content['richItemRenderer']):
  4252. yield entry
  4253. continuation_list[0] = self._extract_continuation(parent_renderer)
  4254. elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
  4255. table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
  4256. yield from self._report_history_entries(table)
  4257. continuation_list[0] = self._extract_continuation(table)
  4258. continue
  4259. isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
  4260. for isr_content in isr_contents:
  4261. if not isinstance(isr_content, dict):
  4262. continue
  4263. known_renderers = {
  4264. 'playlistVideoListRenderer': self._playlist_entries,
  4265. 'gridRenderer': self._grid_entries,
  4266. 'reelShelfRenderer': self._grid_entries,
  4267. 'shelfRenderer': self._shelf_entries,
  4268. 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
  4269. 'backstagePostThreadRenderer': self._post_thread_entries,
  4270. 'videoRenderer': lambda x: [self._video_entry(x)],
  4271. 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
  4272. 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
  4273. 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
  4274. }
  4275. for key, renderer in isr_content.items():
  4276. if key not in known_renderers:
  4277. continue
  4278. for entry in known_renderers[key](renderer):
  4279. if entry:
  4280. yield entry
  4281. continuation_list[0] = self._extract_continuation(renderer)
  4282. break
  4283. if not continuation_list[0]:
  4284. continuation_list[0] = self._extract_continuation(is_renderer)
  4285. if not continuation_list[0]:
  4286. continuation_list[0] = self._extract_continuation(parent_renderer)
  4287. def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
  4288. continuation_list = [None]
  4289. extract_entries = lambda x: self._extract_entries(x, continuation_list)
  4290. tab_content = try_get(tab, lambda x: x['content'], dict)
  4291. if not tab_content:
  4292. return
  4293. parent_renderer = (
  4294. try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
  4295. or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
  4296. yield from extract_entries(parent_renderer)
  4297. continuation = continuation_list[0]
  4298. for page_num in itertools.count(1):
  4299. if not continuation:
  4300. break
  4301. headers = self.generate_api_headers(
  4302. ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
  4303. response = self._extract_response(
  4304. item_id=f'{item_id} page {page_num}',
  4305. query=continuation, headers=headers, ytcfg=ytcfg,
  4306. check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
  4307. if not response:
  4308. break
  4309. # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
  4310. # See: https://github.com/ytdl-org/youtube-dl/issues/28702
  4311. visitor_data = self._extract_visitor_data(response) or visitor_data
  4312. known_renderers = {
  4313. 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
  4314. 'gridPlaylistRenderer': (self._grid_entries, 'items'),
  4315. 'gridVideoRenderer': (self._grid_entries, 'items'),
  4316. 'gridChannelRenderer': (self._grid_entries, 'items'),
  4317. 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
  4318. 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
  4319. 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
  4320. 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
  4321. 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
  4322. 'playlistVideoListContinuation': (self._playlist_entries, None),
  4323. 'gridContinuation': (self._grid_entries, None),
  4324. 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
  4325. 'sectionListContinuation': (extract_entries, None), # for feeds
  4326. }
  4327. continuation_items = traverse_obj(response, (
  4328. ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
  4329. 'appendContinuationItemsAction', 'continuationItems'
  4330. ), 'continuationContents', get_all=False)
  4331. continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
  4332. video_items_renderer = None
  4333. for key in continuation_item.keys():
  4334. if key not in known_renderers:
  4335. continue
  4336. func, parent_key = known_renderers[key]
  4337. video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
  4338. continuation_list = [None]
  4339. yield from func(video_items_renderer)
  4340. continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
  4341. if not video_items_renderer:
  4342. break
  4343. @staticmethod
  4344. def _extract_selected_tab(tabs, fatal=True):
  4345. for tab_renderer in tabs:
  4346. if tab_renderer.get('selected'):
  4347. return tab_renderer
  4348. if fatal:
  4349. raise ExtractorError('Unable to find selected tab')
  4350. @staticmethod
  4351. def _extract_tab_renderers(response):
  4352. return traverse_obj(
  4353. response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
  4354. def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
  4355. metadata = self._extract_metadata_from_tabs(item_id, data)
  4356. selected_tab = self._extract_selected_tab(tabs)
  4357. metadata['title'] += format_field(selected_tab, 'title', ' - %s')
  4358. metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
  4359. return self.playlist_result(
  4360. self._entries(
  4361. selected_tab, metadata['id'], ytcfg,
  4362. self._extract_account_syncid(ytcfg, data),
  4363. self._extract_visitor_data(data, ytcfg)),
  4364. **metadata)
  4365. def _extract_metadata_from_tabs(self, item_id, data):
  4366. info = {'id': item_id}
  4367. metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
  4368. if metadata_renderer:
  4369. info.update({
  4370. 'uploader': metadata_renderer.get('title'),
  4371. 'uploader_id': metadata_renderer.get('externalId'),
  4372. 'uploader_url': metadata_renderer.get('channelUrl'),
  4373. })
  4374. if info['uploader_id']:
  4375. info['id'] = info['uploader_id']
  4376. else:
  4377. metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
  4378. # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
  4379. # See: https://github.com/hypervideo/hypervideo/issues/2237#issuecomment-1013694714
  4380. def _get_uncropped(url):
  4381. return url_or_none((url or '').split('=')[0] + '=s0')
  4382. avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
  4383. if avatar_thumbnails:
  4384. uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
  4385. if uncropped_avatar:
  4386. avatar_thumbnails.append({
  4387. 'url': uncropped_avatar,
  4388. 'id': 'avatar_uncropped',
  4389. 'preference': 1
  4390. })
  4391. channel_banners = self._extract_thumbnails(
  4392. data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
  4393. for banner in channel_banners:
  4394. banner['preference'] = -10
  4395. if channel_banners:
  4396. uncropped_banner = _get_uncropped(channel_banners[0]['url'])
  4397. if uncropped_banner:
  4398. channel_banners.append({
  4399. 'url': uncropped_banner,
  4400. 'id': 'banner_uncropped',
  4401. 'preference': -5
  4402. })
  4403. # Deprecated - remove primary_sidebar_renderer when layout discontinued
  4404. primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
  4405. playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
  4406. primary_thumbnails = self._extract_thumbnails(
  4407. primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
  4408. playlist_thumbnails = self._extract_thumbnails(
  4409. playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
  4410. info.update({
  4411. 'title': (traverse_obj(metadata_renderer, 'title')
  4412. or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
  4413. or info['id']),
  4414. 'availability': self._extract_availability(data),
  4415. 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
  4416. 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
  4417. 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
  4418. 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
  4419. })
  4420. # Playlist stats is a text runs array containing [video count, view count, last updated].
  4421. # last updated or (view count and last updated) may be missing.
  4422. playlist_stats = get_first(
  4423. (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
  4424. last_updated_unix = self._parse_time_text(
  4425. self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
  4426. or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
  4427. info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
  4428. info['view_count'] = self._get_count(playlist_stats, 1)
  4429. if info['view_count'] is None: # 0 is allowed
  4430. info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
  4431. info['playlist_count'] = self._get_count(playlist_stats, 0)
  4432. if info['playlist_count'] is None: # 0 is allowed
  4433. info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
  4434. if not info.get('uploader_id'):
  4435. owner = traverse_obj(playlist_header_renderer, 'ownerText')
  4436. if not owner: # Deprecated
  4437. owner = traverse_obj(
  4438. self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
  4439. ('videoOwner', 'videoOwnerRenderer', 'title'))
  4440. owner_text = self._get_text(owner)
  4441. browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
  4442. info.update({
  4443. 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
  4444. 'uploader_id': browse_ep.get('browseId'),
  4445. 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
  4446. })
  4447. info.update({
  4448. 'channel': info['uploader'],
  4449. 'channel_id': info['uploader_id'],
  4450. 'channel_url': info['uploader_url']
  4451. })
  4452. return info
  4453. def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
  4454. first_id = last_id = response = None
  4455. for page_num in itertools.count(1):
  4456. videos = list(self._playlist_entries(playlist))
  4457. if not videos:
  4458. return
  4459. start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
  4460. if start >= len(videos):
  4461. return
  4462. yield from videos[start:]
  4463. first_id = first_id or videos[0]['id']
  4464. last_id = videos[-1]['id']
  4465. watch_endpoint = try_get(
  4466. playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
  4467. headers = self.generate_api_headers(
  4468. ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
  4469. visitor_data=self._extract_visitor_data(response, data, ytcfg))
  4470. query = {
  4471. 'playlistId': playlist_id,
  4472. 'videoId': watch_endpoint.get('videoId') or last_id,
  4473. 'index': watch_endpoint.get('index') or len(videos),
  4474. 'params': watch_endpoint.get('params') or 'OAE%3D'
  4475. }
  4476. response = self._extract_response(
  4477. item_id='%s page %d' % (playlist_id, page_num),
  4478. query=query, ep='next', headers=headers, ytcfg=ytcfg,
  4479. check_get_keys='contents'
  4480. )
  4481. playlist = try_get(
  4482. response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
  4483. def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
  4484. title = playlist.get('title') or try_get(
  4485. data, lambda x: x['titleText']['simpleText'], str)
  4486. playlist_id = playlist.get('playlistId') or item_id
  4487. # Delegating everything except mix playlists to regular tab-based playlist URL
  4488. playlist_url = urljoin(url, try_get(
  4489. playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
  4490. str))
  4491. # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
  4492. # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
  4493. is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
  4494. if playlist_url and playlist_url != url and not is_known_unviewable:
  4495. return self.url_result(
  4496. playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
  4497. video_title=title)
  4498. return self.playlist_result(
  4499. self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
  4500. playlist_id=playlist_id, playlist_title=title)
  4501. def _extract_availability(self, data):
  4502. """
  4503. Gets the availability of a given playlist/tab.
  4504. Note: Unless YouTube tells us explicitly, we do not assume it is public
  4505. @param data: response
  4506. """
  4507. sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
  4508. playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
  4509. player_header_privacy = playlist_header_renderer.get('privacy')
  4510. badges = self._extract_badges(sidebar_renderer)
  4511. # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
  4512. privacy_setting_icon = get_first(
  4513. (playlist_header_renderer, sidebar_renderer),
  4514. ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
  4515. lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
  4516. expected_type=str)
  4517. microformats_is_unlisted = traverse_obj(
  4518. data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
  4519. return (
  4520. 'public' if (
  4521. self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
  4522. or player_header_privacy == 'PUBLIC'
  4523. or privacy_setting_icon == 'PRIVACY_PUBLIC')
  4524. else self._availability(
  4525. is_private=(
  4526. self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
  4527. or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
  4528. else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
  4529. is_unlisted=(
  4530. self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
  4531. or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
  4532. else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
  4533. else microformats_is_unlisted if microformats_is_unlisted is not None else None),
  4534. needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
  4535. needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
  4536. needs_auth=False))
  4537. @staticmethod
  4538. def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
  4539. sidebar_renderer = try_get(
  4540. data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
  4541. for item in sidebar_renderer:
  4542. renderer = try_get(item, lambda x: x[info_renderer], expected_type)
  4543. if renderer:
  4544. return renderer
  4545. def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
  4546. """
  4547. Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
  4548. """
  4549. is_playlist = bool(traverse_obj(
  4550. data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
  4551. if not is_playlist:
  4552. return
  4553. headers = self.generate_api_headers(
  4554. ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
  4555. visitor_data=self._extract_visitor_data(data, ytcfg))
  4556. query = {
  4557. 'params': 'wgYCCAA=',
  4558. 'browseId': f'VL{item_id}'
  4559. }
  4560. return self._extract_response(
  4561. item_id=item_id, headers=headers, query=query,
  4562. check_get_keys='contents', fatal=False, ytcfg=ytcfg,
  4563. note='Redownloading playlist API JSON with unavailable videos')
  4564. @functools.cached_property
  4565. def skip_webpage(self):
  4566. return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
  4567. def _extract_webpage(self, url, item_id, fatal=True):
  4568. webpage, data = None, None
  4569. for retry in self.RetryManager(fatal=fatal):
  4570. try:
  4571. webpage = self._download_webpage(url, item_id, note='Downloading webpage')
  4572. data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
  4573. except ExtractorError as e:
  4574. if isinstance(e.cause, network_exceptions):
  4575. if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
  4576. retry.error = e
  4577. continue
  4578. self._error_or_warning(e, fatal=fatal)
  4579. break
  4580. try:
  4581. self._extract_and_report_alerts(data)
  4582. except ExtractorError as e:
  4583. self._error_or_warning(e, fatal=fatal)
  4584. break
  4585. # Sometimes youtube returns a webpage with incomplete ytInitialData
  4586. # See: https://github.com/hypervideo/hypervideo/issues/116
  4587. if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
  4588. retry.error = ExtractorError('Incomplete yt initial data received')
  4589. continue
  4590. return webpage, data
  4591. def _report_playlist_authcheck(self, ytcfg, fatal=True):
  4592. """Use if failed to extract ytcfg (and data) from initial webpage"""
  4593. if not ytcfg and self.is_authenticated:
  4594. msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
  4595. if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
  4596. raise ExtractorError(
  4597. f'{msg}. If you are not downloading private content, or '
  4598. 'your cookies are only for the first account and channel,'
  4599. ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
  4600. expected=True)
  4601. self.report_warning(msg, only_once=True)
  4602. def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
  4603. data = None
  4604. if not self.skip_webpage:
  4605. webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
  4606. ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
  4607. # Reject webpage data if redirected to home page without explicitly requesting
  4608. selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
  4609. if (url != 'https://www.youtube.com/feed/recommended'
  4610. and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
  4611. and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
  4612. msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
  4613. if fatal:
  4614. raise ExtractorError(msg, expected=True)
  4615. self.report_warning(msg, only_once=True)
  4616. if not data:
  4617. self._report_playlist_authcheck(ytcfg, fatal=fatal)
  4618. data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
  4619. return data, ytcfg
  4620. def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
  4621. headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
  4622. resolve_response = self._extract_response(
  4623. item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
  4624. ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
  4625. endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
  4626. for ep_key, ep in endpoints.items():
  4627. params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
  4628. if params:
  4629. return self._extract_response(
  4630. item_id=item_id, query=params, ep=ep, headers=headers,
  4631. ytcfg=ytcfg, fatal=fatal, default_client=default_client,
  4632. check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
  4633. err_note = 'Failed to resolve url (does the playlist exist?)'
  4634. if fatal:
  4635. raise ExtractorError(err_note, expected=True)
  4636. self.report_warning(err_note, item_id)
  4637. _SEARCH_PARAMS = None
  4638. def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
  4639. data = {'query': query}
  4640. if params is NO_DEFAULT:
  4641. params = self._SEARCH_PARAMS
  4642. if params:
  4643. data['params'] = params
  4644. content_keys = (
  4645. ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
  4646. ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
  4647. # ytmusic search
  4648. ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
  4649. ('continuationContents', ),
  4650. )
  4651. display_id = f'query "{query}"'
  4652. check_get_keys = tuple({keys[0] for keys in content_keys})
  4653. ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
  4654. self._report_playlist_authcheck(ytcfg, fatal=False)
  4655. continuation_list = [None]
  4656. search = None
  4657. for page_num in itertools.count(1):
  4658. data.update(continuation_list[0] or {})
  4659. headers = self.generate_api_headers(
  4660. ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
  4661. search = self._extract_response(
  4662. item_id=f'{display_id} page {page_num}', ep='search', query=data,
  4663. default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
  4664. slr_contents = traverse_obj(search, *content_keys)
  4665. yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
  4666. if not continuation_list[0]:
  4667. break
  4668. class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
  4669. IE_DESC = 'YouTube Tabs'
  4670. _VALID_URL = r'''(?x:
  4671. https?://
  4672. (?:\w+\.)?
  4673. (?:
  4674. youtube(?:kids)?\.com|
  4675. %(invidious)s
  4676. )/
  4677. (?:
  4678. (?P<channel_type>channel|c|user|browse)/|
  4679. (?P<not_channel>
  4680. feed/|hashtag/|
  4681. (?:playlist|watch)\?.*?\blist=
  4682. )|
  4683. (?!(?:%(reserved_names)s)\b) # Direct URLs
  4684. )
  4685. (?P<id>[^/?\#&]+)
  4686. )''' % {
  4687. 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
  4688. 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
  4689. }
  4690. IE_NAME = 'youtube:tab'
  4691. _TESTS = [{
  4692. 'note': 'playlists, multipage',
  4693. 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
  4694. 'playlist_mincount': 94,
  4695. 'info_dict': {
  4696. 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
  4697. 'title': 'Igor Kleiner - Playlists',
  4698. 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
  4699. 'uploader': 'Igor Kleiner',
  4700. 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
  4701. 'channel': 'Igor Kleiner',
  4702. 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
  4703. 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
  4704. 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
  4705. 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
  4706. 'channel_follower_count': int
  4707. },
  4708. }, {
  4709. 'note': 'playlists, multipage, different order',
  4710. 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
  4711. 'playlist_mincount': 94,
  4712. 'info_dict': {
  4713. 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
  4714. 'title': 'Igor Kleiner - Playlists',
  4715. 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
  4716. 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
  4717. 'uploader': 'Igor Kleiner',
  4718. 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
  4719. 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
  4720. 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
  4721. 'channel': 'Igor Kleiner',
  4722. 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
  4723. 'channel_follower_count': int
  4724. },
  4725. }, {
  4726. 'note': 'playlists, series',
  4727. 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
  4728. 'playlist_mincount': 5,
  4729. 'info_dict': {
  4730. 'id': 'UCYO_jab_esuFRV4b17AJtAw',
  4731. 'title': '3Blue1Brown - Playlists',
  4732. 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
  4733. 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
  4734. 'uploader': '3Blue1Brown',
  4735. 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
  4736. 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
  4737. 'channel': '3Blue1Brown',
  4738. 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
  4739. 'tags': ['Mathematics'],
  4740. 'channel_follower_count': int
  4741. },
  4742. }, {
  4743. 'note': 'playlists, singlepage',
  4744. 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
  4745. 'playlist_mincount': 4,
  4746. 'info_dict': {
  4747. 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
  4748. 'title': 'ThirstForScience - Playlists',
  4749. 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
  4750. 'uploader': 'ThirstForScience',
  4751. 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
  4752. 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
  4753. 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
  4754. 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
  4755. 'tags': 'count:13',
  4756. 'channel': 'ThirstForScience',
  4757. 'channel_follower_count': int
  4758. }
  4759. }, {
  4760. 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
  4761. 'only_matching': True,
  4762. }, {
  4763. 'note': 'basic, single video playlist',
  4764. 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
  4765. 'info_dict': {
  4766. 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
  4767. 'uploader': 'Sergey M.',
  4768. 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
  4769. 'title': 'youtube-dl public playlist',
  4770. 'description': '',
  4771. 'tags': [],
  4772. 'view_count': int,
  4773. 'modified_date': '20201130',
  4774. 'channel': 'Sergey M.',
  4775. 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
  4776. 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  4777. 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  4778. 'availability': 'public',
  4779. },
  4780. 'playlist_count': 1,
  4781. }, {
  4782. 'note': 'empty playlist',
  4783. 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
  4784. 'info_dict': {
  4785. 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
  4786. 'uploader': 'Sergey M.',
  4787. 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
  4788. 'title': 'youtube-dl empty playlist',
  4789. 'tags': [],
  4790. 'channel': 'Sergey M.',
  4791. 'description': '',
  4792. 'modified_date': '20160902',
  4793. 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
  4794. 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  4795. 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  4796. 'availability': 'public',
  4797. },
  4798. 'playlist_count': 0,
  4799. }, {
  4800. 'note': 'Home tab',
  4801. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
  4802. 'info_dict': {
  4803. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4804. 'title': 'lex will - Home',
  4805. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  4806. 'uploader': 'lex will',
  4807. 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4808. 'channel': 'lex will',
  4809. 'tags': ['bible', 'history', 'prophesy'],
  4810. 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4811. 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4812. 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4813. 'channel_follower_count': int
  4814. },
  4815. 'playlist_mincount': 2,
  4816. }, {
  4817. 'note': 'Videos tab',
  4818. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
  4819. 'info_dict': {
  4820. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4821. 'title': 'lex will - Videos',
  4822. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  4823. 'uploader': 'lex will',
  4824. 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4825. 'tags': ['bible', 'history', 'prophesy'],
  4826. 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4827. 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4828. 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4829. 'channel': 'lex will',
  4830. 'channel_follower_count': int
  4831. },
  4832. 'playlist_mincount': 975,
  4833. }, {
  4834. 'note': 'Videos tab, sorted by popular',
  4835. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
  4836. 'info_dict': {
  4837. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4838. 'title': 'lex will - Videos',
  4839. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  4840. 'uploader': 'lex will',
  4841. 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4842. 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4843. 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4844. 'channel': 'lex will',
  4845. 'tags': ['bible', 'history', 'prophesy'],
  4846. 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4847. 'channel_follower_count': int
  4848. },
  4849. 'playlist_mincount': 199,
  4850. }, {
  4851. 'note': 'Playlists tab',
  4852. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
  4853. 'info_dict': {
  4854. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4855. 'title': 'lex will - Playlists',
  4856. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  4857. 'uploader': 'lex will',
  4858. 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4859. 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4860. 'channel': 'lex will',
  4861. 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4862. 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4863. 'tags': ['bible', 'history', 'prophesy'],
  4864. 'channel_follower_count': int
  4865. },
  4866. 'playlist_mincount': 17,
  4867. }, {
  4868. 'note': 'Community tab',
  4869. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
  4870. 'info_dict': {
  4871. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4872. 'title': 'lex will - Community',
  4873. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  4874. 'uploader': 'lex will',
  4875. 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4876. 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4877. 'channel': 'lex will',
  4878. 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4879. 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4880. 'tags': ['bible', 'history', 'prophesy'],
  4881. 'channel_follower_count': int
  4882. },
  4883. 'playlist_mincount': 18,
  4884. }, {
  4885. 'note': 'Channels tab',
  4886. 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
  4887. 'info_dict': {
  4888. 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4889. 'title': 'lex will - Channels',
  4890. 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
  4891. 'uploader': 'lex will',
  4892. 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4893. 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4894. 'channel': 'lex will',
  4895. 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
  4896. 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
  4897. 'tags': ['bible', 'history', 'prophesy'],
  4898. 'channel_follower_count': int
  4899. },
  4900. 'playlist_mincount': 12,
  4901. }, {
  4902. 'note': 'Search tab',
  4903. 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
  4904. 'playlist_mincount': 40,
  4905. 'info_dict': {
  4906. 'id': 'UCYO_jab_esuFRV4b17AJtAw',
  4907. 'title': '3Blue1Brown - Search - linear algebra',
  4908. 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
  4909. 'uploader': '3Blue1Brown',
  4910. 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
  4911. 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
  4912. 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
  4913. 'tags': ['Mathematics'],
  4914. 'channel': '3Blue1Brown',
  4915. 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
  4916. 'channel_follower_count': int
  4917. },
  4918. }, {
  4919. 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  4920. 'only_matching': True,
  4921. }, {
  4922. 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  4923. 'only_matching': True,
  4924. }, {
  4925. 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
  4926. 'only_matching': True,
  4927. }, {
  4928. 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
  4929. 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
  4930. 'info_dict': {
  4931. 'title': '29C3: Not my department',
  4932. 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
  4933. 'uploader': 'Christiaan008',
  4934. 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
  4935. 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
  4936. 'tags': [],
  4937. 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
  4938. 'view_count': int,
  4939. 'modified_date': '20150605',
  4940. 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
  4941. 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
  4942. 'channel': 'Christiaan008',
  4943. 'availability': 'public',
  4944. },
  4945. 'playlist_count': 96,
  4946. }, {
  4947. 'note': 'Large playlist',
  4948. 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
  4949. 'info_dict': {
  4950. 'title': 'Uploads from Cauchemar',
  4951. 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
  4952. 'uploader': 'Cauchemar',
  4953. 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
  4954. 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
  4955. 'tags': [],
  4956. 'modified_date': r're:\d{8}',
  4957. 'channel': 'Cauchemar',
  4958. 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
  4959. 'view_count': int,
  4960. 'description': '',
  4961. 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
  4962. 'availability': 'public',
  4963. },
  4964. 'playlist_mincount': 1123,
  4965. 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
  4966. }, {
  4967. 'note': 'even larger playlist, 8832 videos',
  4968. 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
  4969. 'only_matching': True,
  4970. }, {
  4971. 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
  4972. 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
  4973. 'info_dict': {
  4974. 'title': 'Uploads from Interstellar Movie',
  4975. 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
  4976. 'uploader': 'Interstellar Movie',
  4977. 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
  4978. 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
  4979. 'tags': [],
  4980. 'view_count': int,
  4981. 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
  4982. 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
  4983. 'channel': 'Interstellar Movie',
  4984. 'description': '',
  4985. 'modified_date': r're:\d{8}',
  4986. 'availability': 'public',
  4987. },
  4988. 'playlist_mincount': 21,
  4989. }, {
  4990. 'note': 'Playlist with "show unavailable videos" button',
  4991. 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
  4992. 'info_dict': {
  4993. 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
  4994. 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
  4995. 'uploader': 'Phim Siêu Nhân Nhật Bản',
  4996. 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
  4997. 'view_count': int,
  4998. 'channel': 'Phim Siêu Nhân Nhật Bản',
  4999. 'tags': [],
  5000. 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
  5001. 'description': '',
  5002. 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
  5003. 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
  5004. 'modified_date': r're:\d{8}',
  5005. 'availability': 'public',
  5006. },
  5007. 'playlist_mincount': 200,
  5008. 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
  5009. }, {
  5010. 'note': 'Playlist with unavailable videos in page 7',
  5011. 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
  5012. 'info_dict': {
  5013. 'title': 'Uploads from BlankTV',
  5014. 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
  5015. 'uploader': 'BlankTV',
  5016. 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
  5017. 'channel': 'BlankTV',
  5018. 'channel_url': 'https://www.youtube.com/c/blanktv',
  5019. 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
  5020. 'view_count': int,
  5021. 'tags': [],
  5022. 'uploader_url': 'https://www.youtube.com/c/blanktv',
  5023. 'modified_date': r're:\d{8}',
  5024. 'description': '',
  5025. 'availability': 'public',
  5026. },
  5027. 'playlist_mincount': 1000,
  5028. 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
  5029. }, {
  5030. 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
  5031. 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
  5032. 'info_dict': {
  5033. 'title': 'Data Analysis with Dr Mike Pound',
  5034. 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
  5035. 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
  5036. 'uploader': 'Computerphile',
  5037. 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
  5038. 'uploader_url': 'https://www.youtube.com/user/Computerphile',
  5039. 'tags': [],
  5040. 'view_count': int,
  5041. 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
  5042. 'channel_url': 'https://www.youtube.com/user/Computerphile',
  5043. 'channel': 'Computerphile',
  5044. 'availability': 'public',
  5045. 'modified_date': '20190712',
  5046. },
  5047. 'playlist_mincount': 11,
  5048. }, {
  5049. 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
  5050. 'only_matching': True,
  5051. }, {
  5052. 'note': 'Playlist URL that does not actually serve a playlist',
  5053. 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
  5054. 'info_dict': {
  5055. 'id': 'FqZTN594JQw',
  5056. 'ext': 'webm',
  5057. 'title': "Smiley's People 01 detective, Adventure Series, Action",
  5058. 'uploader': 'STREEM',
  5059. 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
  5060. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
  5061. 'upload_date': '20150526',
  5062. 'license': 'Standard YouTube License',
  5063. 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
  5064. 'categories': ['People & Blogs'],
  5065. 'tags': list,
  5066. 'view_count': int,
  5067. 'like_count': int,
  5068. },
  5069. 'params': {
  5070. 'skip_download': True,
  5071. },
  5072. 'skip': 'This video is not available.',
  5073. 'add_ie': [YoutubeIE.ie_key()],
  5074. }, {
  5075. 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
  5076. 'only_matching': True,
  5077. }, {
  5078. 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
  5079. 'only_matching': True,
  5080. }, {
  5081. 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
  5082. 'info_dict': {
  5083. 'id': 'Wq15eF5vCbI', # This will keep changing
  5084. 'ext': 'mp4',
  5085. 'title': str,
  5086. 'uploader': 'Sky News',
  5087. 'uploader_id': 'skynews',
  5088. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
  5089. 'upload_date': r're:\d{8}',
  5090. 'description': str,
  5091. 'categories': ['News & Politics'],
  5092. 'tags': list,
  5093. 'like_count': int,
  5094. 'release_timestamp': int,
  5095. 'channel': 'Sky News',
  5096. 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
  5097. 'age_limit': 0,
  5098. 'view_count': int,
  5099. 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
  5100. 'playable_in_embed': True,
  5101. 'release_date': r're:\d+',
  5102. 'availability': 'public',
  5103. 'live_status': 'is_live',
  5104. 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
  5105. 'channel_follower_count': int,
  5106. 'concurrent_view_count': int,
  5107. },
  5108. 'params': {
  5109. 'skip_download': True,
  5110. },
  5111. 'expected_warnings': ['Ignoring subtitle tracks found in '],
  5112. }, {
  5113. 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
  5114. 'info_dict': {
  5115. 'id': 'a48o2S1cPoo',
  5116. 'ext': 'mp4',
  5117. 'title': 'The Young Turks - Live Main Show',
  5118. 'uploader': 'The Young Turks',
  5119. 'uploader_id': 'TheYoungTurks',
  5120. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
  5121. 'upload_date': '20150715',
  5122. 'license': 'Standard YouTube License',
  5123. 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
  5124. 'categories': ['News & Politics'],
  5125. 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
  5126. 'like_count': int,
  5127. },
  5128. 'params': {
  5129. 'skip_download': True,
  5130. },
  5131. 'only_matching': True,
  5132. }, {
  5133. 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
  5134. 'only_matching': True,
  5135. }, {
  5136. 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
  5137. 'only_matching': True,
  5138. }, {
  5139. 'note': 'A channel that is not live. Should raise error',
  5140. 'url': 'https://www.youtube.com/user/numberphile/live',
  5141. 'only_matching': True,
  5142. }, {
  5143. 'url': 'https://www.youtube.com/feed/trending',
  5144. 'only_matching': True,
  5145. }, {
  5146. 'url': 'https://www.youtube.com/feed/library',
  5147. 'only_matching': True,
  5148. }, {
  5149. 'url': 'https://www.youtube.com/feed/history',
  5150. 'only_matching': True,
  5151. }, {
  5152. 'url': 'https://www.youtube.com/feed/subscriptions',
  5153. 'only_matching': True,
  5154. }, {
  5155. 'url': 'https://www.youtube.com/feed/watch_later',
  5156. 'only_matching': True,
  5157. }, {
  5158. 'note': 'Recommended - redirects to home page.',
  5159. 'url': 'https://www.youtube.com/feed/recommended',
  5160. 'only_matching': True,
  5161. }, {
  5162. 'note': 'inline playlist with not always working continuations',
  5163. 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
  5164. 'only_matching': True,
  5165. }, {
  5166. 'url': 'https://www.youtube.com/course',
  5167. 'only_matching': True,
  5168. }, {
  5169. 'url': 'https://www.youtube.com/zsecurity',
  5170. 'only_matching': True,
  5171. }, {
  5172. 'url': 'http://www.youtube.com/NASAgovVideo/videos',
  5173. 'only_matching': True,
  5174. }, {
  5175. 'url': 'https://www.youtube.com/TheYoungTurks/live',
  5176. 'only_matching': True,
  5177. }, {
  5178. 'url': 'https://www.youtube.com/hashtag/cctv9',
  5179. 'info_dict': {
  5180. 'id': 'cctv9',
  5181. 'title': '#cctv9',
  5182. 'tags': [],
  5183. },
  5184. 'playlist_mincount': 300, # not consistent but should be over 300
  5185. }, {
  5186. 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
  5187. 'only_matching': True,
  5188. }, {
  5189. 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
  5190. 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
  5191. 'only_matching': True
  5192. }, {
  5193. 'note': '/browse/ should redirect to /channel/',
  5194. 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
  5195. 'only_matching': True
  5196. }, {
  5197. 'note': 'VLPL, should redirect to playlist?list=PL...',
  5198. 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
  5199. 'info_dict': {
  5200. 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
  5201. 'uploader': 'NoCopyrightSounds',
  5202. 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
  5203. 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
  5204. 'title': 'NCS : All Releases 💿',
  5205. 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
  5206. 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
  5207. 'modified_date': r're:\d{8}',
  5208. 'view_count': int,
  5209. 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
  5210. 'tags': [],
  5211. 'channel': 'NoCopyrightSounds',
  5212. 'availability': 'public',
  5213. },
  5214. 'playlist_mincount': 166,
  5215. 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
  5216. }, {
  5217. 'note': 'Topic, should redirect to playlist?list=UU...',
  5218. 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
  5219. 'info_dict': {
  5220. 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
  5221. 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
  5222. 'title': 'Uploads from Royalty Free Music - Topic',
  5223. 'uploader': 'Royalty Free Music - Topic',
  5224. 'tags': [],
  5225. 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
  5226. 'channel': 'Royalty Free Music - Topic',
  5227. 'view_count': int,
  5228. 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
  5229. 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
  5230. 'modified_date': r're:\d{8}',
  5231. 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
  5232. 'description': '',
  5233. 'availability': 'public',
  5234. },
  5235. 'playlist_mincount': 101,
  5236. }, {
  5237. # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
  5238. # Treat as a general feed
  5239. 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
  5240. 'info_dict': {
  5241. 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
  5242. 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
  5243. 'tags': [],
  5244. },
  5245. 'playlist_mincount': 9,
  5246. }, {
  5247. 'note': 'Youtube music Album',
  5248. 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
  5249. 'info_dict': {
  5250. 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
  5251. 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
  5252. 'tags': [],
  5253. 'view_count': int,
  5254. 'description': '',
  5255. 'availability': 'unlisted',
  5256. 'modified_date': r're:\d{8}',
  5257. },
  5258. 'playlist_count': 50,
  5259. }, {
  5260. 'note': 'unlisted single video playlist',
  5261. 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
  5262. 'info_dict': {
  5263. 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
  5264. 'uploader': 'colethedj',
  5265. 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
  5266. 'title': 'hypervideo unlisted playlist test',
  5267. 'availability': 'unlisted',
  5268. 'tags': [],
  5269. 'modified_date': '20220418',
  5270. 'channel': 'colethedj',
  5271. 'view_count': int,
  5272. 'description': '',
  5273. 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
  5274. 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
  5275. 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
  5276. },
  5277. 'playlist_count': 1,
  5278. }, {
  5279. 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
  5280. 'url': 'https://www.youtube.com/feed/recommended',
  5281. 'info_dict': {
  5282. 'id': 'recommended',
  5283. 'title': 'recommended',
  5284. 'tags': [],
  5285. },
  5286. 'playlist_mincount': 50,
  5287. 'params': {
  5288. 'skip_download': True,
  5289. 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
  5290. },
  5291. }, {
  5292. 'note': 'API Fallback: /videos tab, sorted by oldest first',
  5293. 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
  5294. 'info_dict': {
  5295. 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
  5296. 'title': 'Cody\'sLab - Videos',
  5297. 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
  5298. 'uploader': 'Cody\'sLab',
  5299. 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
  5300. 'channel': 'Cody\'sLab',
  5301. 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
  5302. 'tags': [],
  5303. 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
  5304. 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
  5305. 'channel_follower_count': int
  5306. },
  5307. 'playlist_mincount': 650,
  5308. 'params': {
  5309. 'skip_download': True,
  5310. 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
  5311. },
  5312. 'skip': 'Query for sorting no longer works',
  5313. }, {
  5314. 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
  5315. 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
  5316. 'info_dict': {
  5317. 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
  5318. 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
  5319. 'title': 'Uploads from Royalty Free Music - Topic',
  5320. 'uploader': 'Royalty Free Music - Topic',
  5321. 'modified_date': r're:\d{8}',
  5322. 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
  5323. 'description': '',
  5324. 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
  5325. 'tags': [],
  5326. 'channel': 'Royalty Free Music - Topic',
  5327. 'view_count': int,
  5328. 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
  5329. 'availability': 'public',
  5330. },
  5331. 'playlist_mincount': 101,
  5332. 'params': {
  5333. 'skip_download': True,
  5334. 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
  5335. },
  5336. }, {
  5337. 'note': 'non-standard redirect to regional channel',
  5338. 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
  5339. 'only_matching': True
  5340. }, {
  5341. 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
  5342. 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
  5343. 'info_dict': {
  5344. 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
  5345. 'modified_date': '20220407',
  5346. 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
  5347. 'tags': [],
  5348. 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
  5349. 'uploader': 'pukkandan',
  5350. 'availability': 'unlisted',
  5351. 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
  5352. 'channel': 'pukkandan',
  5353. 'description': 'Test for collaborative playlist',
  5354. 'title': 'hypervideo test - collaborative playlist',
  5355. 'view_count': int,
  5356. 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
  5357. },
  5358. 'playlist_mincount': 2
  5359. }, {
  5360. 'note': 'translated tab name',
  5361. 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
  5362. 'info_dict': {
  5363. 'id': 'UCiu-3thuViMebBjw_5nWYrA',
  5364. 'tags': [],
  5365. 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
  5366. 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  5367. 'description': 'test description',
  5368. 'title': 'cole-dlp-test-acc - 再生リスト',
  5369. 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  5370. 'uploader': 'cole-dlp-test-acc',
  5371. 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
  5372. 'channel': 'cole-dlp-test-acc',
  5373. 'channel_follower_count': int,
  5374. },
  5375. 'playlist_mincount': 1,
  5376. 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
  5377. 'expected_warnings': ['Preferring "ja"'],
  5378. }, {
  5379. # XXX: this should really check flat playlist entries, but the test suite doesn't support that
  5380. 'note': 'preferred lang set with playlist with translated video titles',
  5381. 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
  5382. 'info_dict': {
  5383. 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
  5384. 'tags': [],
  5385. 'view_count': int,
  5386. 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  5387. 'uploader': 'cole-dlp-test-acc',
  5388. 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
  5389. 'channel': 'cole-dlp-test-acc',
  5390. 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
  5391. 'description': 'test',
  5392. 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  5393. 'title': 'dlp test playlist',
  5394. 'availability': 'public',
  5395. },
  5396. 'playlist_mincount': 1,
  5397. 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
  5398. 'expected_warnings': ['Preferring "ja"'],
  5399. }, {
  5400. # shorts audio pivot for 2GtVksBMYFM.
  5401. 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
  5402. 'info_dict': {
  5403. 'id': 'sfv_audio_pivot',
  5404. 'title': 'sfv_audio_pivot',
  5405. 'tags': [],
  5406. },
  5407. 'playlist_mincount': 50,
  5408. }, {
  5409. # Channel with a real live tab (not to be mistaken with streams tab)
  5410. # Do not treat like it should redirect to live stream
  5411. 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
  5412. 'info_dict': {
  5413. 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
  5414. 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
  5415. 'tags': [],
  5416. },
  5417. 'playlist_mincount': 20,
  5418. }, {
  5419. # Tab name is not the same as tab id
  5420. 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
  5421. 'info_dict': {
  5422. 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
  5423. 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
  5424. 'tags': [],
  5425. },
  5426. 'playlist_mincount': 8,
  5427. }, {
  5428. # Home tab id is literally home. Not to get mistaken with featured
  5429. 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
  5430. 'info_dict': {
  5431. 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
  5432. 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
  5433. 'tags': [],
  5434. },
  5435. 'playlist_mincount': 8,
  5436. }, {
  5437. # Should get three playlists for videos, shorts and streams tabs
  5438. 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
  5439. 'info_dict': {
  5440. 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
  5441. 'title': 'Polka Ch. 尾丸ポルカ',
  5442. 'channel_follower_count': int,
  5443. 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
  5444. 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
  5445. 'uploader': 'Polka Ch. 尾丸ポルカ',
  5446. 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
  5447. 'channel': 'Polka Ch. 尾丸ポルカ',
  5448. 'tags': 'count:35',
  5449. 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
  5450. 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
  5451. },
  5452. 'playlist_count': 3,
  5453. }, {
  5454. # Shorts tab with channel with handle
  5455. 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
  5456. 'info_dict': {
  5457. 'id': 'UC0intLFzLaudFG-xAvUEO-A',
  5458. 'title': 'Not Just Bikes - Shorts',
  5459. 'tags': 'count:12',
  5460. 'uploader': 'Not Just Bikes',
  5461. 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
  5462. 'description': 'md5:7513148b1f02b924783157d84c4ea555',
  5463. 'channel_follower_count': int,
  5464. 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
  5465. 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
  5466. 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
  5467. 'channel': 'Not Just Bikes',
  5468. },
  5469. 'playlist_mincount': 10,
  5470. }, {
  5471. # Streams tab
  5472. 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
  5473. 'info_dict': {
  5474. 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
  5475. 'title': '中村悠一 - Live',
  5476. 'tags': 'count:7',
  5477. 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
  5478. 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
  5479. 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
  5480. 'channel': '中村悠一',
  5481. 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
  5482. 'channel_follower_count': int,
  5483. 'uploader': '中村悠一',
  5484. 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
  5485. },
  5486. 'playlist_mincount': 60,
  5487. }, {
  5488. # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
  5489. # See test_youtube_lists
  5490. 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
  5491. 'only_matching': True,
  5492. }, {
  5493. # No uploads and no UCID given. Should fail with no uploads error
  5494. # See test_youtube_lists
  5495. 'url': 'https://www.youtube.com/news',
  5496. 'only_matching': True
  5497. }, {
  5498. # No videos tab but has a shorts tab
  5499. 'url': 'https://www.youtube.com/c/TKFShorts',
  5500. 'info_dict': {
  5501. 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
  5502. 'title': 'Shorts Break - Shorts',
  5503. 'tags': 'count:32',
  5504. 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
  5505. 'channel': 'Shorts Break',
  5506. 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
  5507. 'uploader': 'Shorts Break',
  5508. 'channel_follower_count': int,
  5509. 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
  5510. 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
  5511. 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
  5512. },
  5513. 'playlist_mincount': 30,
  5514. }, {
  5515. # Trending Now Tab. tab id is empty
  5516. 'url': 'https://www.youtube.com/feed/trending',
  5517. 'info_dict': {
  5518. 'id': 'trending',
  5519. 'title': 'trending - Now',
  5520. 'tags': [],
  5521. },
  5522. 'playlist_mincount': 30,
  5523. }, {
  5524. # Trending Gaming Tab. tab id is empty
  5525. 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
  5526. 'info_dict': {
  5527. 'id': 'trending',
  5528. 'title': 'trending - Gaming',
  5529. 'tags': [],
  5530. },
  5531. 'playlist_mincount': 30,
  5532. }, {
  5533. # Shorts url result in shorts tab
  5534. 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
  5535. 'info_dict': {
  5536. 'id': 'UCiu-3thuViMebBjw_5nWYrA',
  5537. 'title': 'cole-dlp-test-acc - Shorts',
  5538. 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
  5539. 'channel': 'cole-dlp-test-acc',
  5540. 'channel_follower_count': int,
  5541. 'description': 'test description',
  5542. 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
  5543. 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  5544. 'tags': [],
  5545. 'uploader': 'cole-dlp-test-acc',
  5546. 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  5547. },
  5548. 'playlist': [{
  5549. 'info_dict': {
  5550. '_type': 'url',
  5551. 'ie_key': 'Youtube',
  5552. 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
  5553. 'id': 'sSM9J5YH_60',
  5554. 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
  5555. 'title': 'SHORT short',
  5556. 'channel': 'cole-dlp-test-acc',
  5557. 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
  5558. 'view_count': int,
  5559. 'thumbnails': list,
  5560. }
  5561. }],
  5562. 'params': {'extract_flat': True},
  5563. }, {
  5564. # Live video status should be extracted
  5565. 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
  5566. 'info_dict': {
  5567. 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
  5568. 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
  5569. 'tags': []
  5570. },
  5571. 'playlist': [{
  5572. 'info_dict': {
  5573. '_type': 'url',
  5574. 'ie_key': 'Youtube',
  5575. 'url': 'startswith:https://www.youtube.com/watch?v=',
  5576. 'id': str,
  5577. 'title': str,
  5578. 'live_status': 'is_live',
  5579. 'channel_id': str,
  5580. 'channel_url': str,
  5581. 'concurrent_view_count': int,
  5582. 'channel': str,
  5583. }
  5584. }],
  5585. 'params': {'extract_flat': True},
  5586. 'playlist_mincount': 1
  5587. }]
  5588. @classmethod
  5589. def suitable(cls, url):
  5590. return False if YoutubeIE.suitable(url) else super().suitable(url)
  5591. _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
  5592. def _get_url_mobj(self, url):
  5593. mobj = self._URL_RE.match(url).groupdict()
  5594. mobj.update((k, '') for k, v in mobj.items() if v is None)
  5595. return mobj
  5596. def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
  5597. tab_name = (tab.get('title') or '').lower()
  5598. tab_url = urljoin(base_url, traverse_obj(
  5599. tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
  5600. tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
  5601. or traverse_obj(tab, 'tabIdentifier', expected_type=str))
  5602. if tab_id:
  5603. return {
  5604. 'TAB_ID_SPONSORSHIPS': 'membership',
  5605. }.get(tab_id, tab_id), tab_name
  5606. # Fallback to tab name if we cannot get the tab id.
  5607. # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
  5608. # Note that in the case of translated tab name this may result in an empty string, which we don't want.
  5609. if tab_name:
  5610. self.write_debug(f'Falling back to selected tab name: {tab_name}')
  5611. return {
  5612. 'home': 'featured',
  5613. 'live': 'streams',
  5614. }.get(tab_name, tab_name), tab_name
  5615. def _has_tab(self, tabs, tab_id):
  5616. return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
  5617. @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
  5618. def _real_extract(self, url, smuggled_data):
  5619. item_id = self._match_id(url)
  5620. url = urllib.parse.urlunparse(
  5621. urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
  5622. compat_opts = self.get_param('compat_opts', [])
  5623. mobj = self._get_url_mobj(url)
  5624. pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
  5625. if is_channel and smuggled_data.get('is_music_url'):
  5626. if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
  5627. return self.url_result(
  5628. f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
  5629. elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
  5630. mdata = self._extract_tab_endpoint(
  5631. f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
  5632. murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
  5633. get_all=False, expected_type=str)
  5634. if not murl:
  5635. raise ExtractorError('Failed to resolve album to playlist')
  5636. return self.url_result(murl, YoutubeTabIE)
  5637. elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
  5638. return self.url_result(
  5639. f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
  5640. original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
  5641. if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
  5642. url = f'{pre}/videos{post}'
  5643. # Handle both video/playlist URLs
  5644. qs = parse_qs(url)
  5645. video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
  5646. if not video_id and mobj['not_channel'].startswith('watch'):
  5647. if not playlist_id:
  5648. # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
  5649. raise ExtractorError('A video URL was given without video ID', expected=True)
  5650. # Common mistake: https://www.youtube.com/watch?list=playlist_id
  5651. self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
  5652. return self.url_result(
  5653. f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
  5654. if not self._yes_playlist(playlist_id, video_id):
  5655. return self.url_result(
  5656. f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
  5657. data, ytcfg = self._extract_data(url, display_id)
  5658. # YouTube may provide a non-standard redirect to the regional channel
  5659. # See: https://github.com/hypervideo/hypervideo/issues/2694
  5660. # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
  5661. redirect_url = traverse_obj(
  5662. data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
  5663. if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
  5664. redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
  5665. self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
  5666. return self.url_result(redirect_url, YoutubeTabIE)
  5667. tabs, extra_tabs = self._extract_tab_renderers(data), []
  5668. if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
  5669. selected_tab = self._extract_selected_tab(tabs)
  5670. selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
  5671. self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
  5672. if not original_tab_id and selected_tab_name:
  5673. self.to_screen('Downloading all uploads of the channel. '
  5674. 'To download only the videos in a specific tab, pass the tab\'s URL')
  5675. if self._has_tab(tabs, 'streams'):
  5676. extra_tabs.append(''.join((pre, '/streams', post)))
  5677. if self._has_tab(tabs, 'shorts'):
  5678. extra_tabs.append(''.join((pre, '/shorts', post)))
  5679. # XXX: Members-only tab should also be extracted
  5680. if not extra_tabs and selected_tab_id != 'videos':
  5681. # Channel does not have streams, shorts or videos tabs
  5682. if item_id[:2] != 'UC':
  5683. raise ExtractorError('This channel has no uploads', expected=True)
  5684. # Topic channels don't have /videos. Use the equivalent playlist instead
  5685. pl_id = f'UU{item_id[2:]}'
  5686. pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
  5687. try:
  5688. data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
  5689. except ExtractorError:
  5690. raise ExtractorError('This channel has no uploads', expected=True)
  5691. else:
  5692. item_id, url = pl_id, pl_url
  5693. self.to_screen(
  5694. f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
  5695. elif extra_tabs and selected_tab_id != 'videos':
  5696. # When there are shorts/live tabs but not videos tab
  5697. url, data = f'{pre}{post}', None
  5698. elif (original_tab_id or 'videos') != selected_tab_id:
  5699. if original_tab_id == 'live':
  5700. # Live tab should have redirected to the video
  5701. # Except in the case the channel has an actual live tab
  5702. # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
  5703. raise UserNotLive(video_id=item_id)
  5704. elif selected_tab_name:
  5705. raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
  5706. # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
  5707. url = f'{pre}{post}'
  5708. # YouTube sometimes provides a button to reload playlist with unavailable videos.
  5709. if 'no-youtube-unavailable-videos' not in compat_opts:
  5710. data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
  5711. self._extract_and_report_alerts(data, only_once=True)
  5712. tabs, entries = self._extract_tab_renderers(data), []
  5713. if tabs:
  5714. entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
  5715. entries[0].update({
  5716. 'extractor_key': YoutubeTabIE.ie_key(),
  5717. 'extractor': YoutubeTabIE.IE_NAME,
  5718. 'webpage_url': url,
  5719. })
  5720. if self.get_param('playlist_items') == '0':
  5721. entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
  5722. else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
  5723. entries.extend(map(self._real_extract, extra_tabs))
  5724. if len(entries) == 1:
  5725. return entries[0]
  5726. elif entries:
  5727. metadata = self._extract_metadata_from_tabs(item_id, data)
  5728. uploads_url = 'the Uploads (UU) playlist URL'
  5729. if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
  5730. uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
  5731. self.to_screen(
  5732. 'Downloading as multiple playlists, separated by tabs. '
  5733. f'To download as a single playlist instead, pass {uploads_url}')
  5734. return self.playlist_result(entries, item_id, **metadata)
  5735. # Inline playlist
  5736. playlist = traverse_obj(
  5737. data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
  5738. if playlist:
  5739. return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
  5740. video_id = traverse_obj(
  5741. data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
  5742. if video_id:
  5743. if tab != '/live': # live tab is expected to redirect to video
  5744. self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
  5745. return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
  5746. raise ExtractorError('Unable to recognize tab page')
  5747. class YoutubePlaylistIE(InfoExtractor):
  5748. IE_DESC = 'YouTube playlists'
  5749. _VALID_URL = r'''(?x)(?:
  5750. (?:https?://)?
  5751. (?:\w+\.)?
  5752. (?:
  5753. (?:
  5754. youtube(?:kids)?\.com|
  5755. %(invidious)s
  5756. )
  5757. /.*?\?.*?\blist=
  5758. )?
  5759. (?P<id>%(playlist_id)s)
  5760. )''' % {
  5761. 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
  5762. 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
  5763. }
  5764. IE_NAME = 'youtube:playlist'
  5765. _TESTS = [{
  5766. 'note': 'issue #673',
  5767. 'url': 'PLBB231211A4F62143',
  5768. 'info_dict': {
  5769. 'title': '[OLD]Team Fortress 2 (Class-based LP)',
  5770. 'id': 'PLBB231211A4F62143',
  5771. 'uploader': 'Wickman',
  5772. 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
  5773. 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
  5774. 'view_count': int,
  5775. 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
  5776. 'modified_date': r're:\d{8}',
  5777. 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
  5778. 'channel': 'Wickman',
  5779. 'tags': [],
  5780. 'channel_url': 'https://www.youtube.com/c/WickmanVT',
  5781. 'availability': 'public',
  5782. },
  5783. 'playlist_mincount': 29,
  5784. }, {
  5785. 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
  5786. 'info_dict': {
  5787. 'title': 'YDL_safe_search',
  5788. 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
  5789. },
  5790. 'playlist_count': 2,
  5791. 'skip': 'This playlist is private',
  5792. }, {
  5793. 'note': 'embedded',
  5794. 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
  5795. 'playlist_count': 4,
  5796. 'info_dict': {
  5797. 'title': 'JODA15',
  5798. 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
  5799. 'uploader': 'milan',
  5800. 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
  5801. 'description': '',
  5802. 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
  5803. 'tags': [],
  5804. 'modified_date': '20140919',
  5805. 'view_count': int,
  5806. 'channel': 'milan',
  5807. 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
  5808. 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
  5809. 'availability': 'public',
  5810. },
  5811. 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
  5812. }, {
  5813. 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
  5814. 'playlist_mincount': 455,
  5815. 'info_dict': {
  5816. 'title': '2018 Chinese New Singles (11/6 updated)',
  5817. 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
  5818. 'uploader': 'LBK',
  5819. 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
  5820. 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
  5821. 'channel': 'LBK',
  5822. 'view_count': int,
  5823. 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
  5824. 'tags': [],
  5825. 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
  5826. 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
  5827. 'modified_date': r're:\d{8}',
  5828. 'availability': 'public',
  5829. },
  5830. 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
  5831. }, {
  5832. 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
  5833. 'only_matching': True,
  5834. }, {
  5835. # music album playlist
  5836. 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
  5837. 'only_matching': True,
  5838. }]
  5839. @classmethod
  5840. def suitable(cls, url):
  5841. if YoutubeTabIE.suitable(url):
  5842. return False
  5843. from ..utils import parse_qs
  5844. qs = parse_qs(url)
  5845. if qs.get('v', [None])[0]:
  5846. return False
  5847. return super().suitable(url)
  5848. def _real_extract(self, url):
  5849. playlist_id = self._match_id(url)
  5850. is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
  5851. url = update_url_query(
  5852. 'https://www.youtube.com/playlist',
  5853. parse_qs(url) or {'list': playlist_id})
  5854. if is_music_url:
  5855. url = smuggle_url(url, {'is_music_url': True})
  5856. return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
  5857. class YoutubeYtBeIE(InfoExtractor):
  5858. IE_DESC = 'youtu.be'
  5859. _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
  5860. _TESTS = [{
  5861. 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
  5862. 'info_dict': {
  5863. 'id': 'yeWKywCrFtk',
  5864. 'ext': 'mp4',
  5865. 'title': 'Small Scale Baler and Braiding Rugs',
  5866. 'uploader': 'Backus-Page House Museum',
  5867. 'uploader_id': 'backuspagemuseum',
  5868. 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
  5869. 'upload_date': '20161008',
  5870. 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
  5871. 'categories': ['Nonprofits & Activism'],
  5872. 'tags': list,
  5873. 'like_count': int,
  5874. 'age_limit': 0,
  5875. 'playable_in_embed': True,
  5876. 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
  5877. 'channel': 'Backus-Page House Museum',
  5878. 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
  5879. 'live_status': 'not_live',
  5880. 'view_count': int,
  5881. 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
  5882. 'availability': 'public',
  5883. 'duration': 59,
  5884. 'comment_count': int,
  5885. 'channel_follower_count': int
  5886. },
  5887. 'params': {
  5888. 'noplaylist': True,
  5889. 'skip_download': True,
  5890. },
  5891. }, {
  5892. 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
  5893. 'only_matching': True,
  5894. }]
  5895. def _real_extract(self, url):
  5896. mobj = self._match_valid_url(url)
  5897. video_id = mobj.group('id')
  5898. playlist_id = mobj.group('playlist_id')
  5899. return self.url_result(
  5900. update_url_query('https://www.youtube.com/watch', {
  5901. 'v': video_id,
  5902. 'list': playlist_id,
  5903. 'feature': 'youtu.be',
  5904. }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
  5905. class YoutubeLivestreamEmbedIE(InfoExtractor):
  5906. IE_DESC = 'YouTube livestream embeds'
  5907. _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
  5908. _TESTS = [{
  5909. 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
  5910. 'only_matching': True,
  5911. }]
  5912. def _real_extract(self, url):
  5913. channel_id = self._match_id(url)
  5914. return self.url_result(
  5915. f'https://www.youtube.com/channel/{channel_id}/live',
  5916. ie=YoutubeTabIE.ie_key(), video_id=channel_id)
  5917. class YoutubeYtUserIE(InfoExtractor):
  5918. IE_DESC = 'YouTube user videos; "ytuser:" prefix'
  5919. IE_NAME = 'youtube:user'
  5920. _VALID_URL = r'ytuser:(?P<id>.+)'
  5921. _TESTS = [{
  5922. 'url': 'ytuser:phihag',
  5923. 'only_matching': True,
  5924. }]
  5925. def _real_extract(self, url):
  5926. user_id = self._match_id(url)
  5927. return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
  5928. class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
  5929. IE_NAME = 'youtube:favorites'
  5930. IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
  5931. _VALID_URL = r':ytfav(?:ou?rite)?s?'
  5932. _LOGIN_REQUIRED = True
  5933. _TESTS = [{
  5934. 'url': ':ytfav',
  5935. 'only_matching': True,
  5936. }, {
  5937. 'url': ':ytfavorites',
  5938. 'only_matching': True,
  5939. }]
  5940. def _real_extract(self, url):
  5941. return self.url_result(
  5942. 'https://www.youtube.com/playlist?list=LL',
  5943. ie=YoutubeTabIE.ie_key())
  5944. class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
  5945. IE_NAME = 'youtube:notif'
  5946. IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
  5947. _VALID_URL = r':ytnotif(?:ication)?s?'
  5948. _LOGIN_REQUIRED = True
  5949. _TESTS = [{
  5950. 'url': ':ytnotif',
  5951. 'only_matching': True,
  5952. }, {
  5953. 'url': ':ytnotifications',
  5954. 'only_matching': True,
  5955. }]
  5956. def _extract_notification_menu(self, response, continuation_list):
  5957. notification_list = traverse_obj(
  5958. response,
  5959. ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
  5960. ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
  5961. expected_type=list) or []
  5962. continuation_list[0] = None
  5963. for item in notification_list:
  5964. entry = self._extract_notification_renderer(item.get('notificationRenderer'))
  5965. if entry:
  5966. yield entry
  5967. continuation = item.get('continuationItemRenderer')
  5968. if continuation:
  5969. continuation_list[0] = continuation
  5970. def _extract_notification_renderer(self, notification):
  5971. video_id = traverse_obj(
  5972. notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
  5973. url = f'https://www.youtube.com/watch?v={video_id}'
  5974. channel_id = None
  5975. if not video_id:
  5976. browse_ep = traverse_obj(
  5977. notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
  5978. channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
  5979. post_id = self._search_regex(
  5980. r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
  5981. 'post id', default=None)
  5982. if not channel_id or not post_id:
  5983. return
  5984. # The direct /post url redirects to this in the browser
  5985. url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
  5986. channel = traverse_obj(
  5987. notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
  5988. expected_type=str)
  5989. notification_title = self._get_text(notification, 'shortMessage')
  5990. if notification_title:
  5991. notification_title = notification_title.replace('\xad', '') # remove soft hyphens
  5992. # TODO: handle recommended videos
  5993. title = self._search_regex(
  5994. rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
  5995. 'video title', default=None)
  5996. timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
  5997. if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
  5998. else None)
  5999. return {
  6000. '_type': 'url',
  6001. 'url': url,
  6002. 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
  6003. 'video_id': video_id,
  6004. 'title': title,
  6005. 'channel_id': channel_id,
  6006. 'channel': channel,
  6007. 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
  6008. 'timestamp': timestamp,
  6009. }
  6010. def _notification_menu_entries(self, ytcfg):
  6011. continuation_list = [None]
  6012. response = None
  6013. for page in itertools.count(1):
  6014. ctoken = traverse_obj(
  6015. continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
  6016. response = self._extract_response(
  6017. item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
  6018. ep='notification/get_notification_menu', check_get_keys='actions',
  6019. headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
  6020. yield from self._extract_notification_menu(response, continuation_list)
  6021. if not continuation_list[0]:
  6022. break
  6023. def _real_extract(self, url):
  6024. display_id = 'notifications'
  6025. ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
  6026. self._report_playlist_authcheck(ytcfg)
  6027. return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
  6028. class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
  6029. IE_DESC = 'YouTube search'
  6030. IE_NAME = 'youtube:search'
  6031. _SEARCH_KEY = 'ytsearch'
  6032. _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
  6033. _TESTS = [{
  6034. 'url': 'ytsearch5:youtube-dl test video',
  6035. 'playlist_count': 5,
  6036. 'info_dict': {
  6037. 'id': 'youtube-dl test video',
  6038. 'title': 'youtube-dl test video',
  6039. }
  6040. }]
  6041. class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
  6042. IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
  6043. _SEARCH_KEY = 'ytsearchdate'
  6044. IE_DESC = 'YouTube search, newest videos first'
  6045. _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
  6046. _TESTS = [{
  6047. 'url': 'ytsearchdate5:youtube-dl test video',
  6048. 'playlist_count': 5,
  6049. 'info_dict': {
  6050. 'id': 'youtube-dl test video',
  6051. 'title': 'youtube-dl test video',
  6052. }
  6053. }]
  6054. class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
  6055. IE_DESC = 'YouTube search URLs with sorting and filter support'
  6056. IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
  6057. _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
  6058. _TESTS = [{
  6059. 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
  6060. 'playlist_mincount': 5,
  6061. 'info_dict': {
  6062. 'id': 'youtube-dl test video',
  6063. 'title': 'youtube-dl test video',
  6064. }
  6065. }, {
  6066. 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
  6067. 'playlist_mincount': 5,
  6068. 'info_dict': {
  6069. 'id': 'python',
  6070. 'title': 'python',
  6071. }
  6072. }, {
  6073. 'url': 'https://www.youtube.com/results?search_query=%23cats',
  6074. 'playlist_mincount': 1,
  6075. 'info_dict': {
  6076. 'id': '#cats',
  6077. 'title': '#cats',
  6078. # The test suite does not have support for nested playlists
  6079. # 'entries': [{
  6080. # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
  6081. # 'title': '#cats',
  6082. # }],
  6083. },
  6084. }, {
  6085. 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
  6086. 'only_matching': True,
  6087. }]
  6088. def _real_extract(self, url):
  6089. qs = parse_qs(url)
  6090. query = (qs.get('search_query') or qs.get('q'))[0]
  6091. return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
  6092. class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
  6093. IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
  6094. IE_NAME = 'youtube:music:search_url'
  6095. _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
  6096. _TESTS = [{
  6097. 'url': 'https://music.youtube.com/search?q=royalty+free+music',
  6098. 'playlist_count': 16,
  6099. 'info_dict': {
  6100. 'id': 'royalty free music',
  6101. 'title': 'royalty free music',
  6102. }
  6103. }, {
  6104. 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
  6105. 'playlist_mincount': 30,
  6106. 'info_dict': {
  6107. 'id': 'royalty free music - songs',
  6108. 'title': 'royalty free music - songs',
  6109. },
  6110. 'params': {'extract_flat': 'in_playlist'}
  6111. }, {
  6112. 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
  6113. 'playlist_mincount': 30,
  6114. 'info_dict': {
  6115. 'id': 'royalty free music - community playlists',
  6116. 'title': 'royalty free music - community playlists',
  6117. },
  6118. 'params': {'extract_flat': 'in_playlist'}
  6119. }]
  6120. _SECTIONS = {
  6121. 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
  6122. 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
  6123. 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
  6124. 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
  6125. 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
  6126. 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
  6127. }
  6128. def _real_extract(self, url):
  6129. qs = parse_qs(url)
  6130. query = (qs.get('search_query') or qs.get('q'))[0]
  6131. params = qs.get('sp', (None,))[0]
  6132. if params:
  6133. section = next((k for k, v in self._SECTIONS.items() if v == params), params)
  6134. else:
  6135. section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
  6136. params = self._SECTIONS.get(section)
  6137. if not params:
  6138. section = None
  6139. title = join_nonempty(query, section, delim=' - ')
  6140. return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
  6141. class YoutubeFeedsInfoExtractor(InfoExtractor):
  6142. """
  6143. Base class for feed extractors
  6144. Subclasses must re-define the _FEED_NAME property.
  6145. """
  6146. _LOGIN_REQUIRED = True
  6147. _FEED_NAME = 'feeds'
  6148. def _real_initialize(self):
  6149. YoutubeBaseInfoExtractor._check_login_required(self)
  6150. @classproperty
  6151. def IE_NAME(self):
  6152. return f'youtube:{self._FEED_NAME}'
  6153. def _real_extract(self, url):
  6154. return self.url_result(
  6155. f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
  6156. class YoutubeWatchLaterIE(InfoExtractor):
  6157. IE_NAME = 'youtube:watchlater'
  6158. IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
  6159. _VALID_URL = r':ytwatchlater'
  6160. _TESTS = [{
  6161. 'url': ':ytwatchlater',
  6162. 'only_matching': True,
  6163. }]
  6164. def _real_extract(self, url):
  6165. return self.url_result(
  6166. 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
  6167. class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
  6168. IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
  6169. _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
  6170. _FEED_NAME = 'recommended'
  6171. _LOGIN_REQUIRED = False
  6172. _TESTS = [{
  6173. 'url': ':ytrec',
  6174. 'only_matching': True,
  6175. }, {
  6176. 'url': ':ytrecommended',
  6177. 'only_matching': True,
  6178. }, {
  6179. 'url': 'https://youtube.com',
  6180. 'only_matching': True,
  6181. }]
  6182. class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
  6183. IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
  6184. _VALID_URL = r':ytsub(?:scription)?s?'
  6185. _FEED_NAME = 'subscriptions'
  6186. _TESTS = [{
  6187. 'url': ':ytsubs',
  6188. 'only_matching': True,
  6189. }, {
  6190. 'url': ':ytsubscriptions',
  6191. 'only_matching': True,
  6192. }]
  6193. class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
  6194. IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
  6195. _VALID_URL = r':ythis(?:tory)?'
  6196. _FEED_NAME = 'history'
  6197. _TESTS = [{
  6198. 'url': ':ythistory',
  6199. 'only_matching': True,
  6200. }]
  6201. class YoutubeStoriesIE(InfoExtractor):
  6202. IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
  6203. IE_NAME = 'youtube:stories'
  6204. _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
  6205. _TESTS = [{
  6206. 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
  6207. 'only_matching': True,
  6208. }]
  6209. def _real_extract(self, url):
  6210. playlist_id = f'RLTD{self._match_id(url)}'
  6211. return self.url_result(
  6212. smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
  6213. ie=YoutubeTabIE, video_id=playlist_id)
  6214. class YoutubeShortsAudioPivotIE(InfoExtractor):
  6215. IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
  6216. IE_NAME = 'youtube:shorts:pivot:audio'
  6217. _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
  6218. _TESTS = [{
  6219. 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
  6220. 'only_matching': True,
  6221. }]
  6222. @staticmethod
  6223. def _generate_audio_pivot_params(video_id):
  6224. """
  6225. Generates sfv_audio_pivot browse params for this video id
  6226. """
  6227. pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
  6228. return urllib.parse.quote(base64.b64encode(pb_params).decode())
  6229. def _real_extract(self, url):
  6230. video_id = self._match_id(url)
  6231. return self.url_result(
  6232. f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
  6233. ie=YoutubeTabIE)
  6234. class YoutubeTruncatedURLIE(InfoExtractor):
  6235. IE_NAME = 'youtube:truncated_url'
  6236. IE_DESC = False # Do not list
  6237. _VALID_URL = r'''(?x)
  6238. (?:https?://)?
  6239. (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
  6240. (?:watch\?(?:
  6241. feature=[a-z_]+|
  6242. annotation_id=annotation_[^&]+|
  6243. x-yt-cl=[0-9]+|
  6244. hl=[^&]*|
  6245. t=[0-9]+
  6246. )?
  6247. |
  6248. attribution_link\?a=[^&]+
  6249. )
  6250. $
  6251. '''
  6252. _TESTS = [{
  6253. 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
  6254. 'only_matching': True,
  6255. }, {
  6256. 'url': 'https://www.youtube.com/watch?',
  6257. 'only_matching': True,
  6258. }, {
  6259. 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
  6260. 'only_matching': True,
  6261. }, {
  6262. 'url': 'https://www.youtube.com/watch?feature=foo',
  6263. 'only_matching': True,
  6264. }, {
  6265. 'url': 'https://www.youtube.com/watch?hl=en-GB',
  6266. 'only_matching': True,
  6267. }, {
  6268. 'url': 'https://www.youtube.com/watch?t=2372',
  6269. 'only_matching': True,
  6270. }]
  6271. def _real_extract(self, url):
  6272. raise ExtractorError(
  6273. 'Did you forget to quote the URL? Remember that & is a meta '
  6274. 'character in most shells, so you want to put the URL in quotes, '
  6275. 'like youtube-dl '
  6276. '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
  6277. ' or simply youtube-dl BaW_jenozKc .',
  6278. expected=True)
  6279. class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
  6280. IE_NAME = 'youtube:clip'
  6281. _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
  6282. _TESTS = [{
  6283. # FIXME: Other metadata should be extracted from the clip, not from the base video
  6284. 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
  6285. 'info_dict': {
  6286. 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
  6287. 'ext': 'mp4',
  6288. 'section_start': 29.0,
  6289. 'section_end': 39.7,
  6290. 'duration': 10.7,
  6291. 'age_limit': 0,
  6292. 'availability': 'public',
  6293. 'categories': ['Gaming'],
  6294. 'channel': 'Scott The Woz',
  6295. 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
  6296. 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
  6297. 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
  6298. 'like_count': int,
  6299. 'playable_in_embed': True,
  6300. 'tags': 'count:17',
  6301. 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
  6302. 'title': 'Mobile Games on Console - Scott The Woz',
  6303. 'upload_date': '20210920',
  6304. 'uploader': 'Scott The Woz',
  6305. 'uploader_id': 'scottthewoz',
  6306. 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
  6307. 'view_count': int,
  6308. 'live_status': 'not_live',
  6309. 'channel_follower_count': int
  6310. }
  6311. }]
  6312. def _real_extract(self, url):
  6313. clip_id = self._match_id(url)
  6314. _, data = self._extract_webpage(url, clip_id)
  6315. video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
  6316. if not video_id:
  6317. raise ExtractorError('Unable to find video ID')
  6318. clip_data = traverse_obj(data, (
  6319. 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
  6320. 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
  6321. 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
  6322. 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
  6323. return {
  6324. '_type': 'url_transparent',
  6325. 'url': f'https://www.youtube.com/watch?v={video_id}',
  6326. 'ie_key': YoutubeIE.ie_key(),
  6327. 'id': clip_id,
  6328. 'section_start': int(clip_data['startTimeMs']) / 1000,
  6329. 'section_end': int(clip_data['endTimeMs']) / 1000,
  6330. }
  6331. class YoutubeTruncatedIDIE(InfoExtractor):
  6332. IE_NAME = 'youtube:truncated_id'
  6333. IE_DESC = False # Do not list
  6334. _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
  6335. _TESTS = [{
  6336. 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
  6337. 'only_matching': True,
  6338. }]
  6339. def _real_extract(self, url):
  6340. video_id = self._match_id(url)
  6341. raise ExtractorError(
  6342. f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
  6343. expected=True)