test_networking.py 83 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983
  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import sys
  5. import pytest
  6. from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
  7. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  8. import gzip
  9. import http.client
  10. import http.cookiejar
  11. import http.server
  12. import io
  13. import logging
  14. import pathlib
  15. import random
  16. import ssl
  17. import tempfile
  18. import threading
  19. import time
  20. import urllib.error
  21. import urllib.request
  22. import warnings
  23. import zlib
  24. from email.message import Message
  25. from http.cookiejar import CookieJar
  26. from test.helper import (
  27. FakeYDL,
  28. http_server_port,
  29. validate_and_send,
  30. verify_address_availability,
  31. )
  32. from yt_dlp.cookies import YoutubeDLCookieJar
  33. from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
  34. from yt_dlp.networking import (
  35. HEADRequest,
  36. PUTRequest,
  37. Request,
  38. RequestDirector,
  39. RequestHandler,
  40. Response,
  41. )
  42. from yt_dlp.networking._urllib import UrllibRH
  43. from yt_dlp.networking.exceptions import (
  44. CertificateVerifyError,
  45. HTTPError,
  46. IncompleteRead,
  47. NoSupportingHandlers,
  48. ProxyError,
  49. RequestError,
  50. SSLError,
  51. TransportError,
  52. UnsupportedRequest,
  53. )
  54. from yt_dlp.networking.impersonate import (
  55. ImpersonateRequestHandler,
  56. ImpersonateTarget,
  57. )
  58. from yt_dlp.utils import YoutubeDLError
  59. from yt_dlp.utils._utils import _YDLLogger as FakeLogger
  60. from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
  61. TEST_DIR = os.path.dirname(os.path.abspath(__file__))
  62. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  63. protocol_version = 'HTTP/1.1'
  64. default_request_version = 'HTTP/1.1'
  65. def log_message(self, format, *args):
  66. pass
  67. def _headers(self):
  68. payload = str(self.headers).encode()
  69. self.send_response(200)
  70. self.send_header('Content-Type', 'application/json')
  71. self.send_header('Content-Length', str(len(payload)))
  72. self.end_headers()
  73. self.wfile.write(payload)
  74. def _redirect(self):
  75. self.send_response(int(self.path[len('/redirect_'):]))
  76. self.send_header('Location', '/method')
  77. self.send_header('Content-Length', '0')
  78. self.end_headers()
  79. def _method(self, method, payload=None):
  80. self.send_response(200)
  81. self.send_header('Content-Length', str(len(payload or '')))
  82. self.send_header('Method', method)
  83. self.end_headers()
  84. if payload:
  85. self.wfile.write(payload)
  86. def _status(self, status):
  87. payload = f'<html>{status} NOT FOUND</html>'.encode()
  88. self.send_response(int(status))
  89. self.send_header('Content-Type', 'text/html; charset=utf-8')
  90. self.send_header('Content-Length', str(len(payload)))
  91. self.end_headers()
  92. self.wfile.write(payload)
  93. def _read_data(self):
  94. if 'Content-Length' in self.headers:
  95. return self.rfile.read(int(self.headers['Content-Length']))
  96. else:
  97. return b''
  98. def do_POST(self):
  99. data = self._read_data() + str(self.headers).encode()
  100. if self.path.startswith('/redirect_'):
  101. self._redirect()
  102. elif self.path.startswith('/method'):
  103. self._method('POST', data)
  104. elif self.path.startswith('/headers'):
  105. self._headers()
  106. else:
  107. self._status(404)
  108. def do_HEAD(self):
  109. if self.path.startswith('/redirect_'):
  110. self._redirect()
  111. elif self.path.startswith('/method'):
  112. self._method('HEAD')
  113. else:
  114. self._status(404)
  115. def do_PUT(self):
  116. data = self._read_data() + str(self.headers).encode()
  117. if self.path.startswith('/redirect_'):
  118. self._redirect()
  119. elif self.path.startswith('/method'):
  120. self._method('PUT', data)
  121. else:
  122. self._status(404)
  123. def do_GET(self):
  124. if self.path == '/video.html':
  125. payload = b'<html><video src="/vid.mp4" /></html>'
  126. self.send_response(200)
  127. self.send_header('Content-Type', 'text/html; charset=utf-8')
  128. self.send_header('Content-Length', str(len(payload)))
  129. self.end_headers()
  130. self.wfile.write(payload)
  131. elif self.path == '/vid.mp4':
  132. payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
  133. self.send_response(200)
  134. self.send_header('Content-Type', 'video/mp4')
  135. self.send_header('Content-Length', str(len(payload)))
  136. self.end_headers()
  137. self.wfile.write(payload)
  138. elif self.path == '/%E4%B8%AD%E6%96%87.html':
  139. payload = b'<html><video src="/vid.mp4" /></html>'
  140. self.send_response(200)
  141. self.send_header('Content-Type', 'text/html; charset=utf-8')
  142. self.send_header('Content-Length', str(len(payload)))
  143. self.end_headers()
  144. self.wfile.write(payload)
  145. elif self.path == '/%c7%9f':
  146. payload = b'<html><video src="/vid.mp4" /></html>'
  147. self.send_response(200)
  148. self.send_header('Content-Type', 'text/html; charset=utf-8')
  149. self.send_header('Content-Length', str(len(payload)))
  150. self.end_headers()
  151. self.wfile.write(payload)
  152. elif self.path.startswith('/redirect_loop'):
  153. self.send_response(301)
  154. self.send_header('Location', self.path)
  155. self.send_header('Content-Length', '0')
  156. self.end_headers()
  157. elif self.path == '/redirect_dotsegments':
  158. self.send_response(301)
  159. # redirect to /headers but with dot segments before
  160. self.send_header('Location', '/a/b/./../../headers')
  161. self.send_header('Content-Length', '0')
  162. self.end_headers()
  163. elif self.path == '/redirect_dotsegments_absolute':
  164. self.send_response(301)
  165. # redirect to /headers but with dot segments before - absolute url
  166. self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
  167. self.send_header('Content-Length', '0')
  168. self.end_headers()
  169. elif self.path.startswith('/redirect_'):
  170. self._redirect()
  171. elif self.path.startswith('/method'):
  172. self._method('GET', str(self.headers).encode())
  173. elif self.path.startswith('/headers'):
  174. self._headers()
  175. elif self.path.startswith('/308-to-headers'):
  176. self.send_response(308)
  177. # redirect to "localhost" for testing cookie redirection handling
  178. self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers')
  179. self.send_header('Content-Length', '0')
  180. self.end_headers()
  181. elif self.path == '/trailing_garbage':
  182. payload = b'<html><video src="/vid.mp4" /></html>'
  183. self.send_response(200)
  184. self.send_header('Content-Type', 'text/html; charset=utf-8')
  185. self.send_header('Content-Encoding', 'gzip')
  186. buf = io.BytesIO()
  187. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  188. f.write(payload)
  189. compressed = buf.getvalue() + b'trailing garbage'
  190. self.send_header('Content-Length', str(len(compressed)))
  191. self.end_headers()
  192. self.wfile.write(compressed)
  193. elif self.path == '/302-non-ascii-redirect':
  194. new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
  195. self.send_response(301)
  196. self.send_header('Location', new_url)
  197. self.send_header('Content-Length', '0')
  198. self.end_headers()
  199. elif self.path == '/content-encoding':
  200. encodings = self.headers.get('ytdl-encoding', '')
  201. payload = b'<html><video src="/vid.mp4" /></html>'
  202. for encoding in filter(None, (e.strip() for e in encodings.split(','))):
  203. if encoding == 'br' and brotli:
  204. payload = brotli.compress(payload)
  205. elif encoding == 'gzip':
  206. buf = io.BytesIO()
  207. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  208. f.write(payload)
  209. payload = buf.getvalue()
  210. elif encoding == 'deflate':
  211. payload = zlib.compress(payload)
  212. elif encoding == 'unsupported':
  213. payload = b'raw'
  214. break
  215. else:
  216. self._status(415)
  217. return
  218. self.send_response(200)
  219. self.send_header('Content-Encoding', encodings)
  220. self.send_header('Content-Length', str(len(payload)))
  221. self.end_headers()
  222. self.wfile.write(payload)
  223. elif self.path.startswith('/gen_'):
  224. payload = b'<html></html>'
  225. self.send_response(int(self.path[len('/gen_'):]))
  226. self.send_header('Content-Type', 'text/html; charset=utf-8')
  227. self.send_header('Content-Length', str(len(payload)))
  228. self.end_headers()
  229. self.wfile.write(payload)
  230. elif self.path.startswith('/incompleteread'):
  231. payload = b'<html></html>'
  232. self.send_response(200)
  233. self.send_header('Content-Type', 'text/html; charset=utf-8')
  234. self.send_header('Content-Length', '234234')
  235. self.end_headers()
  236. self.wfile.write(payload)
  237. self.finish()
  238. elif self.path.startswith('/timeout_'):
  239. time.sleep(int(self.path[len('/timeout_'):]))
  240. self._headers()
  241. elif self.path == '/source_address':
  242. payload = str(self.client_address[0]).encode()
  243. self.send_response(200)
  244. self.send_header('Content-Type', 'text/html; charset=utf-8')
  245. self.send_header('Content-Length', str(len(payload)))
  246. self.end_headers()
  247. self.wfile.write(payload)
  248. self.finish()
  249. else:
  250. self._status(404)
  251. def send_header(self, keyword, value):
  252. """
  253. Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
  254. This is against what is defined in RFC 3986, however we need to test we support this
  255. since some sites incorrectly do this.
  256. """
  257. if keyword.lower() == 'connection':
  258. return super().send_header(keyword, value)
  259. if not hasattr(self, '_headers_buffer'):
  260. self._headers_buffer = []
  261. self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
  262. class TestRequestHandlerBase:
  263. @classmethod
  264. def setup_class(cls):
  265. cls.http_httpd = http.server.ThreadingHTTPServer(
  266. ('127.0.0.1', 0), HTTPTestRequestHandler)
  267. cls.http_port = http_server_port(cls.http_httpd)
  268. cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
  269. # FIXME: we should probably stop the http server thread after each test
  270. # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
  271. cls.http_server_thread.daemon = True
  272. cls.http_server_thread.start()
  273. # HTTPS server
  274. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  275. cls.https_httpd = http.server.ThreadingHTTPServer(
  276. ('127.0.0.1', 0), HTTPTestRequestHandler)
  277. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  278. sslctx.load_cert_chain(certfn, None)
  279. cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
  280. cls.https_port = http_server_port(cls.https_httpd)
  281. cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
  282. cls.https_server_thread.daemon = True
  283. cls.https_server_thread.start()
  284. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  285. class TestHTTPRequestHandler(TestRequestHandlerBase):
  286. def test_verify_cert(self, handler):
  287. with handler() as rh:
  288. with pytest.raises(CertificateVerifyError):
  289. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  290. with handler(verify=False) as rh:
  291. r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  292. assert r.status == 200
  293. r.close()
  294. def test_ssl_error(self, handler):
  295. # HTTPS server with too old TLS version
  296. # XXX: is there a better way to test this than to create a new server?
  297. https_httpd = http.server.ThreadingHTTPServer(
  298. ('127.0.0.1', 0), HTTPTestRequestHandler)
  299. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  300. https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
  301. https_port = http_server_port(https_httpd)
  302. https_server_thread = threading.Thread(target=https_httpd.serve_forever)
  303. https_server_thread.daemon = True
  304. https_server_thread.start()
  305. with handler(verify=False) as rh:
  306. with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info:
  307. validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
  308. assert not issubclass(exc_info.type, CertificateVerifyError)
  309. def test_percent_encode(self, handler):
  310. with handler() as rh:
  311. # Unicode characters should be encoded with uppercase percent-encoding
  312. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
  313. assert res.status == 200
  314. res.close()
  315. # don't normalize existing percent encodings
  316. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
  317. assert res.status == 200
  318. res.close()
  319. @pytest.mark.parametrize('path', [
  320. '/a/b/./../../headers',
  321. '/redirect_dotsegments',
  322. # https://github.com/yt-dlp/yt-dlp/issues/9020
  323. '/redirect_dotsegments_absolute',
  324. ])
  325. def test_remove_dot_segments(self, handler, path):
  326. with handler(verbose=True) as rh:
  327. # This isn't a comprehensive test,
  328. # but it should be enough to check whether the handler is removing dot segments in required scenarios
  329. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
  330. assert res.status == 200
  331. assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
  332. res.close()
  333. @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
  334. def test_unicode_path_redirection(self, handler):
  335. with handler() as rh:
  336. r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
  337. assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
  338. r.close()
  339. def test_raise_http_error(self, handler):
  340. with handler() as rh:
  341. for bad_status in (400, 500, 599, 302):
  342. with pytest.raises(HTTPError):
  343. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
  344. # Should not raise an error
  345. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
  346. def test_response_url(self, handler):
  347. with handler() as rh:
  348. # Response url should be that of the last url in redirect chain
  349. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
  350. assert res.url == f'http://127.0.0.1:{self.http_port}/method'
  351. res.close()
  352. res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
  353. assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
  354. res2.close()
  355. # Covers some basic cases we expect some level of consistency between request handlers for
  356. @pytest.mark.parametrize('redirect_status,method,expected', [
  357. # A 303 must either use GET or HEAD for subsequent request
  358. (303, 'POST', ('', 'GET', False)),
  359. (303, 'HEAD', ('', 'HEAD', False)),
  360. # 301 and 302 turn POST only into a GET
  361. (301, 'POST', ('', 'GET', False)),
  362. (301, 'HEAD', ('', 'HEAD', False)),
  363. (302, 'POST', ('', 'GET', False)),
  364. (302, 'HEAD', ('', 'HEAD', False)),
  365. # 307 and 308 should not change method
  366. (307, 'POST', ('testdata', 'POST', True)),
  367. (308, 'POST', ('testdata', 'POST', True)),
  368. (307, 'HEAD', ('', 'HEAD', False)),
  369. (308, 'HEAD', ('', 'HEAD', False)),
  370. ])
  371. def test_redirect(self, handler, redirect_status, method, expected):
  372. with handler() as rh:
  373. data = b'testdata' if method == 'POST' else None
  374. headers = {}
  375. if data is not None:
  376. headers['Content-Type'] = 'application/test'
  377. res = validate_and_send(
  378. rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data,
  379. headers=headers))
  380. headers = b''
  381. data_recv = b''
  382. if data is not None:
  383. data_recv += res.read(len(data))
  384. if data_recv != data:
  385. headers += data_recv
  386. data_recv = b''
  387. headers += res.read()
  388. assert expected[0] == data_recv.decode()
  389. assert expected[1] == res.headers.get('method')
  390. assert expected[2] == ('content-length' in headers.decode().lower())
  391. def test_request_cookie_header(self, handler):
  392. # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
  393. with handler() as rh:
  394. # Specified Cookie header should be used
  395. res = validate_and_send(
  396. rh, Request(
  397. f'http://127.0.0.1:{self.http_port}/headers',
  398. headers={'Cookie': 'test=test'})).read().decode()
  399. assert 'cookie: test=test' in res.lower()
  400. # Specified Cookie header should be removed on any redirect
  401. res = validate_and_send(
  402. rh, Request(
  403. f'http://127.0.0.1:{self.http_port}/308-to-headers',
  404. headers={'Cookie': 'test=test2'})).read().decode()
  405. assert 'cookie: test=test2' not in res.lower()
  406. # Specified Cookie header should override global cookiejar for that request
  407. # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now
  408. cookiejar = YoutubeDLCookieJar()
  409. cookiejar.set_cookie(http.cookiejar.Cookie(
  410. version=0, name='test', value='ytdlp', port=None, port_specified=False,
  411. domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
  412. path_specified=True, secure=False, expires=None, discard=False, comment=None,
  413. comment_url=None, rest={}))
  414. with handler(cookiejar=cookiejar) as rh:
  415. data = validate_and_send(
  416. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read()
  417. assert b'cookie: test=ytdlp' not in data.lower()
  418. assert b'cookie: test=test3' in data.lower()
  419. def test_redirect_loop(self, handler):
  420. with handler() as rh:
  421. with pytest.raises(HTTPError, match='redirect loop'):
  422. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
  423. def test_incompleteread(self, handler):
  424. with handler(timeout=2) as rh:
  425. with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
  426. validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
  427. def test_cookies(self, handler):
  428. cookiejar = YoutubeDLCookieJar()
  429. cookiejar.set_cookie(http.cookiejar.Cookie(
  430. 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
  431. False, '/headers', True, False, None, False, None, None, {}))
  432. with handler(cookiejar=cookiejar) as rh:
  433. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  434. assert b'cookie: test=ytdlp' in data.lower()
  435. # Per request
  436. with handler() as rh:
  437. data = validate_and_send(
  438. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
  439. assert b'cookie: test=ytdlp' in data.lower()
  440. def test_headers(self, handler):
  441. with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
  442. # Global Headers
  443. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower()
  444. assert b'test1: test' in data
  445. # Per request headers, merged with global
  446. data = validate_and_send(rh, Request(
  447. f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower()
  448. assert b'test1: test' in data
  449. assert b'test2: changed' in data
  450. assert b'test2: test2' not in data
  451. assert b'test3: test3' in data
  452. def test_read_timeout(self, handler):
  453. with handler() as rh:
  454. # Default timeout is 20 seconds, so this should go through
  455. validate_and_send(
  456. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
  457. with handler(timeout=0.1) as rh:
  458. with pytest.raises(TransportError):
  459. validate_and_send(
  460. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5'))
  461. # Per request timeout, should override handler timeout
  462. validate_and_send(
  463. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
  464. def test_connect_timeout(self, handler):
  465. # nothing should be listening on this port
  466. connect_timeout_url = 'http://10.255.255.255'
  467. with handler(timeout=0.01) as rh, pytest.raises(TransportError):
  468. now = time.time()
  469. validate_and_send(rh, Request(connect_timeout_url))
  470. assert time.time() - now < DEFAULT_TIMEOUT
  471. # Per request timeout, should override handler timeout
  472. request = Request(connect_timeout_url, extensions={'timeout': 0.01})
  473. with handler() as rh, pytest.raises(TransportError):
  474. now = time.time()
  475. validate_and_send(rh, request)
  476. assert time.time() - now < DEFAULT_TIMEOUT
  477. def test_source_address(self, handler):
  478. source_address = f'127.0.0.{random.randint(5, 255)}'
  479. # on some systems these loopback addresses we need for testing may not be available
  480. # see: https://github.com/yt-dlp/yt-dlp/issues/8890
  481. verify_address_availability(source_address)
  482. with handler(source_address=source_address) as rh:
  483. data = validate_and_send(
  484. rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
  485. assert source_address == data
  486. # Not supported by CurlCFFI
  487. @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
  488. def test_gzip_trailing_garbage(self, handler):
  489. with handler() as rh:
  490. data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
  491. assert data == '<html><video src="/vid.mp4" /></html>'
  492. @pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
  493. @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
  494. def test_brotli(self, handler):
  495. with handler() as rh:
  496. res = validate_and_send(
  497. rh, Request(
  498. f'http://127.0.0.1:{self.http_port}/content-encoding',
  499. headers={'ytdl-encoding': 'br'}))
  500. assert res.headers.get('Content-Encoding') == 'br'
  501. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  502. def test_deflate(self, handler):
  503. with handler() as rh:
  504. res = validate_and_send(
  505. rh, Request(
  506. f'http://127.0.0.1:{self.http_port}/content-encoding',
  507. headers={'ytdl-encoding': 'deflate'}))
  508. assert res.headers.get('Content-Encoding') == 'deflate'
  509. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  510. def test_gzip(self, handler):
  511. with handler() as rh:
  512. res = validate_and_send(
  513. rh, Request(
  514. f'http://127.0.0.1:{self.http_port}/content-encoding',
  515. headers={'ytdl-encoding': 'gzip'}))
  516. assert res.headers.get('Content-Encoding') == 'gzip'
  517. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  518. def test_multiple_encodings(self, handler):
  519. with handler() as rh:
  520. for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
  521. res = validate_and_send(
  522. rh, Request(
  523. f'http://127.0.0.1:{self.http_port}/content-encoding',
  524. headers={'ytdl-encoding': pair}))
  525. assert res.headers.get('Content-Encoding') == pair
  526. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  527. @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
  528. def test_unsupported_encoding(self, handler):
  529. with handler() as rh:
  530. res = validate_and_send(
  531. rh, Request(
  532. f'http://127.0.0.1:{self.http_port}/content-encoding',
  533. headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'}))
  534. assert res.headers.get('Content-Encoding') == 'unsupported'
  535. assert res.read() == b'raw'
  536. def test_read(self, handler):
  537. with handler() as rh:
  538. res = validate_and_send(
  539. rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
  540. assert res.readable()
  541. assert res.read(1) == b'H'
  542. assert res.read(3) == b'ost'
  543. assert res.read().decode().endswith('\n\n')
  544. assert res.read() == b''
  545. def test_request_disable_proxy(self, handler):
  546. for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
  547. # Given the handler is configured with a proxy
  548. with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
  549. # When a proxy is explicitly set to None for the request
  550. res = validate_and_send(
  551. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
  552. # Then no proxy should be used
  553. res.close()
  554. assert res.status == 200
  555. @pytest.mark.skip_handlers_if(
  556. lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
  557. def test_noproxy(self, handler):
  558. for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
  559. # Given the handler is configured with a proxy
  560. with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
  561. for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
  562. # When request no proxy includes the request url host
  563. nop_response = validate_and_send(
  564. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
  565. # Then the proxy should not be used
  566. assert nop_response.status == 200
  567. nop_response.close()
  568. @pytest.mark.skip_handlers_if(
  569. lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
  570. def test_allproxy(self, handler):
  571. # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
  572. # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
  573. with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
  574. with pytest.raises(TransportError):
  575. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()
  576. with handler(timeout=0.1) as rh:
  577. with pytest.raises(TransportError):
  578. validate_and_send(
  579. rh, Request(
  580. f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()
  581. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  582. class TestClientCertificate:
  583. @classmethod
  584. def setup_class(cls):
  585. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  586. cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
  587. cacertfn = os.path.join(cls.certdir, 'ca.crt')
  588. cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
  589. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  590. sslctx.verify_mode = ssl.CERT_REQUIRED
  591. sslctx.load_verify_locations(cafile=cacertfn)
  592. sslctx.load_cert_chain(certfn, None)
  593. cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
  594. cls.port = http_server_port(cls.httpd)
  595. cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
  596. cls.server_thread.daemon = True
  597. cls.server_thread.start()
  598. def _run_test(self, handler, **handler_kwargs):
  599. with handler(
  600. # Disable client-side validation of unacceptable self-signed testcert.pem
  601. # The test is of a check on the server side, so unaffected
  602. verify=False,
  603. **handler_kwargs,
  604. ) as rh:
  605. validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
  606. def test_certificate_combined_nopass(self, handler):
  607. self._run_test(handler, client_cert={
  608. 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
  609. })
  610. def test_certificate_nocombined_nopass(self, handler):
  611. self._run_test(handler, client_cert={
  612. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  613. 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
  614. })
  615. def test_certificate_combined_pass(self, handler):
  616. self._run_test(handler, client_cert={
  617. 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
  618. 'client_certificate_password': 'foobar',
  619. })
  620. def test_certificate_nocombined_pass(self, handler):
  621. self._run_test(handler, client_cert={
  622. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  623. 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
  624. 'client_certificate_password': 'foobar',
  625. })
  626. @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
  627. class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
  628. def test_supported_impersonate_targets(self, handler):
  629. with handler(headers=std_headers) as rh:
  630. # note: this assumes the impersonate request handler supports the impersonate extension
  631. for target in rh.supported_targets:
  632. res = validate_and_send(rh, Request(
  633. f'http://127.0.0.1:{self.http_port}/headers', extensions={'impersonate': target}))
  634. assert res.status == 200
  635. assert std_headers['user-agent'].lower() not in res.read().decode().lower()
  636. def test_response_extensions(self, handler):
  637. with handler() as rh:
  638. for target in rh.supported_targets:
  639. request = Request(
  640. f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
  641. res = validate_and_send(rh, request)
  642. assert res.extensions['impersonate'] == rh._get_request_target(request)
  643. def test_http_error_response_extensions(self, handler):
  644. with handler() as rh:
  645. for target in rh.supported_targets:
  646. request = Request(
  647. f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
  648. try:
  649. validate_and_send(rh, request)
  650. except HTTPError as e:
  651. res = e.response
  652. assert res.extensions['impersonate'] == rh._get_request_target(request)
  653. class TestRequestHandlerMisc:
  654. """Misc generic tests for request handlers, not related to request or validation testing"""
  655. @pytest.mark.parametrize('handler,logger_name', [
  656. ('Requests', 'urllib3'),
  657. ('Websockets', 'websockets.client'),
  658. ('Websockets', 'websockets.server')
  659. ], indirect=['handler'])
  660. def test_remove_logging_handler(self, handler, logger_name):
  661. # Ensure any logging handlers, which may contain a YoutubeDL instance,
  662. # are removed when we close the request handler
  663. # See: https://github.com/yt-dlp/yt-dlp/issues/8922
  664. logging_handlers = logging.getLogger(logger_name).handlers
  665. before_count = len(logging_handlers)
  666. rh = handler()
  667. assert len(logging_handlers) == before_count + 1
  668. rh.close()
  669. assert len(logging_handlers) == before_count
  670. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  671. class TestUrllibRequestHandler(TestRequestHandlerBase):
  672. def test_file_urls(self, handler):
  673. # See https://github.com/ytdl-org/youtube-dl/issues/8227
  674. tf = tempfile.NamedTemporaryFile(delete=False)
  675. tf.write(b'foobar')
  676. tf.close()
  677. req = Request(pathlib.Path(tf.name).as_uri())
  678. with handler() as rh:
  679. with pytest.raises(UnsupportedRequest):
  680. rh.validate(req)
  681. # Test that urllib never loaded FileHandler
  682. with pytest.raises(TransportError):
  683. rh.send(req)
  684. with handler(enable_file_urls=True) as rh:
  685. res = validate_and_send(rh, req)
  686. assert res.read() == b'foobar'
  687. res.close()
  688. os.unlink(tf.name)
  689. def test_http_error_returns_content(self, handler):
  690. # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
  691. def get_response():
  692. with handler() as rh:
  693. # headers url
  694. try:
  695. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
  696. except HTTPError as e:
  697. return e.response
  698. assert get_response().read() == b'<html></html>'
  699. def test_verify_cert_error_text(self, handler):
  700. # Check the output of the error message
  701. with handler() as rh:
  702. with pytest.raises(
  703. CertificateVerifyError,
  704. match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
  705. ):
  706. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  707. @pytest.mark.parametrize('req,match,version_check', [
  708. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
  709. # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
  710. (
  711. Request('http://127.0.0.1', method='GET\n'),
  712. 'method can\'t contain control characters',
  713. lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
  714. ),
  715. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
  716. # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
  717. (
  718. Request('http://127.0.0. 1', method='GET'),
  719. 'URL can\'t contain control characters',
  720. lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
  721. ),
  722. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
  723. (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
  724. ])
  725. def test_httplib_validation_errors(self, handler, req, match, version_check):
  726. if version_check and version_check(sys.version_info):
  727. pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
  728. with handler() as rh:
  729. with pytest.raises(RequestError, match=match) as exc_info:
  730. validate_and_send(rh, req)
  731. assert not isinstance(exc_info.value, TransportError)
  732. @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
  733. class TestRequestsRequestHandler(TestRequestHandlerBase):
  734. @pytest.mark.parametrize('raised,expected', [
  735. (lambda: requests.exceptions.ConnectTimeout(), TransportError),
  736. (lambda: requests.exceptions.ReadTimeout(), TransportError),
  737. (lambda: requests.exceptions.Timeout(), TransportError),
  738. (lambda: requests.exceptions.ConnectionError(), TransportError),
  739. (lambda: requests.exceptions.ProxyError(), ProxyError),
  740. (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
  741. (lambda: requests.exceptions.SSLError(), SSLError),
  742. (lambda: requests.exceptions.InvalidURL(), RequestError),
  743. (lambda: requests.exceptions.InvalidHeader(), RequestError),
  744. # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
  745. (lambda: urllib3.exceptions.HTTPError(), TransportError),
  746. (lambda: requests.exceptions.RequestException(), RequestError)
  747. # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
  748. ])
  749. def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
  750. with handler() as rh:
  751. def mock_get_instance(*args, **kwargs):
  752. class MockSession:
  753. def request(self, *args, **kwargs):
  754. raise raised()
  755. return MockSession()
  756. monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
  757. with pytest.raises(expected) as exc_info:
  758. rh.send(Request('http://fake'))
  759. assert exc_info.type is expected
  760. @pytest.mark.parametrize('raised,expected,match', [
  761. (lambda: urllib3.exceptions.SSLError(), SSLError, None),
  762. (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
  763. (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
  764. (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
  765. (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
  766. (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
  767. (
  768. lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
  769. IncompleteRead,
  770. '3 bytes read, 4 more expected'
  771. ),
  772. (
  773. lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
  774. IncompleteRead,
  775. '3 bytes read, 5 more expected'
  776. ),
  777. ])
  778. def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
  779. from requests.models import Response as RequestsResponse
  780. from urllib3.response import HTTPResponse as Urllib3Response
  781. from yt_dlp.networking._requests import RequestsResponseAdapter
  782. requests_res = RequestsResponse()
  783. requests_res.raw = Urllib3Response(body=b'', status=200)
  784. res = RequestsResponseAdapter(requests_res)
  785. def mock_read(*args, **kwargs):
  786. raise raised()
  787. monkeypatch.setattr(res.fp, 'read', mock_read)
  788. with pytest.raises(expected, match=match) as exc_info:
  789. res.read()
  790. assert exc_info.type is expected
  791. def test_close(self, handler, monkeypatch):
  792. rh = handler()
  793. session = rh._get_instance(cookiejar=rh.cookiejar)
  794. called = False
  795. original_close = session.close
  796. def mock_close(*args, **kwargs):
  797. nonlocal called
  798. called = True
  799. return original_close(*args, **kwargs)
  800. monkeypatch.setattr(session, 'close', mock_close)
  801. rh.close()
  802. assert called
  803. @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
  804. class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
  805. @pytest.mark.parametrize('params,extensions', [
  806. ({}, {'impersonate': ImpersonateTarget('chrome')}),
  807. ({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
  808. ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
  809. ])
  810. def test_impersonate(self, handler, params, extensions):
  811. with handler(headers=std_headers, **params) as rh:
  812. res = validate_and_send(
  813. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions=extensions)).read().decode()
  814. assert 'sec-ch-ua: "Chromium";v="110"' in res
  815. # Check that user agent is added over ours
  816. assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res
  817. def test_headers(self, handler):
  818. with handler(headers=std_headers) as rh:
  819. # Ensure curl-impersonate overrides our standard headers (usually added
  820. res = validate_and_send(
  821. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={
  822. 'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower()
  823. assert std_headers['user-agent'].lower() not in res
  824. assert std_headers['accept-language'].lower() not in res
  825. assert std_headers['sec-fetch-mode'].lower() not in res
  826. # other than UA, custom headers that differ from std_headers should be kept
  827. assert 'sec-fetch-mode: custom' in res
  828. assert 'x-custom: test' in res
  829. # but when not impersonating don't remove std_headers
  830. res = validate_and_send(
  831. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower()
  832. # std_headers should be present
  833. for k, v in std_headers.items():
  834. assert f'{k}: {v}'.lower() in res
  835. @pytest.mark.parametrize('raised,expected,match', [
  836. (lambda: curl_cffi.requests.errors.RequestsError(
  837. '', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None),
  838. (lambda: curl_cffi.requests.errors.RequestsError(
  839. '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
  840. (lambda: curl_cffi.requests.errors.RequestsError(
  841. '', code=curl_cffi.const.CurlECode.RECV_ERROR), TransportError, None),
  842. ])
  843. def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
  844. import curl_cffi.requests
  845. from yt_dlp.networking._curlcffi import CurlCFFIResponseAdapter
  846. curl_res = curl_cffi.requests.Response()
  847. res = CurlCFFIResponseAdapter(curl_res)
  848. def mock_read(*args, **kwargs):
  849. try:
  850. raise raised()
  851. except Exception as e:
  852. e.response = curl_res
  853. raise
  854. monkeypatch.setattr(res.fp, 'read', mock_read)
  855. with pytest.raises(expected, match=match) as exc_info:
  856. res.read()
  857. assert exc_info.type is expected
  858. @pytest.mark.parametrize('raised,expected,match', [
  859. (lambda: curl_cffi.requests.errors.RequestsError(
  860. '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
  861. (lambda: curl_cffi.requests.errors.RequestsError(
  862. '', code=curl_cffi.const.CurlECode.PEER_FAILED_VERIFICATION), CertificateVerifyError, None),
  863. (lambda: curl_cffi.requests.errors.RequestsError(
  864. '', code=curl_cffi.const.CurlECode.SSL_CONNECT_ERROR), SSLError, None),
  865. (lambda: curl_cffi.requests.errors.RequestsError(
  866. '', code=curl_cffi.const.CurlECode.TOO_MANY_REDIRECTS), HTTPError, None),
  867. (lambda: curl_cffi.requests.errors.RequestsError(
  868. '', code=curl_cffi.const.CurlECode.PROXY), ProxyError, None),
  869. ])
  870. def test_request_error_mapping(self, handler, monkeypatch, raised, expected, match):
  871. import curl_cffi.requests
  872. curl_res = curl_cffi.requests.Response()
  873. curl_res.status_code = 301
  874. with handler() as rh:
  875. original_get_instance = rh._get_instance
  876. def mock_get_instance(*args, **kwargs):
  877. instance = original_get_instance(*args, **kwargs)
  878. def request(*_, **__):
  879. try:
  880. raise raised()
  881. except Exception as e:
  882. e.response = curl_res
  883. raise
  884. monkeypatch.setattr(instance, 'request', request)
  885. return instance
  886. monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
  887. with pytest.raises(expected) as exc_info:
  888. rh.send(Request('http://fake'))
  889. assert exc_info.type is expected
  890. def test_response_reader(self, handler):
  891. class FakeResponse:
  892. def __init__(self, raise_error=False):
  893. self.raise_error = raise_error
  894. self.closed = False
  895. def iter_content(self):
  896. yield b'foo'
  897. yield b'bar'
  898. yield b'z'
  899. if self.raise_error:
  900. raise Exception('test')
  901. def close(self):
  902. self.closed = True
  903. from yt_dlp.networking._curlcffi import CurlCFFIResponseReader
  904. res = CurlCFFIResponseReader(FakeResponse())
  905. assert res.readable
  906. assert res.bytes_read == 0
  907. assert res.read(1) == b'f'
  908. assert res.bytes_read == 3
  909. assert res._buffer == b'oo'
  910. assert res.read(2) == b'oo'
  911. assert res.bytes_read == 3
  912. assert res._buffer == b''
  913. assert res.read(2) == b'ba'
  914. assert res.bytes_read == 6
  915. assert res._buffer == b'r'
  916. assert res.read(3) == b'rz'
  917. assert res.bytes_read == 7
  918. assert res._buffer == b''
  919. assert res.closed
  920. assert res._response.closed
  921. # should handle no size param
  922. res2 = CurlCFFIResponseReader(FakeResponse())
  923. assert res2.read() == b'foobarz'
  924. assert res2.bytes_read == 7
  925. assert res2._buffer == b''
  926. assert res2.closed
  927. # should close on an exception
  928. res3 = CurlCFFIResponseReader(FakeResponse(raise_error=True))
  929. with pytest.raises(Exception, match='test'):
  930. res3.read()
  931. assert res3._buffer == b''
  932. assert res3.bytes_read == 7
  933. assert res3.closed
  934. # buffer should be cleared on close
  935. res4 = CurlCFFIResponseReader(FakeResponse())
  936. res4.read(2)
  937. assert res4._buffer == b'o'
  938. res4.close()
  939. assert res4.closed
  940. assert res4._buffer == b''
  941. def run_validation(handler, error, req, **handler_kwargs):
  942. with handler(**handler_kwargs) as rh:
  943. if error:
  944. with pytest.raises(error):
  945. rh.validate(req)
  946. else:
  947. rh.validate(req)
  948. class TestRequestHandlerValidation:
  949. class ValidationRH(RequestHandler):
  950. def _send(self, request):
  951. raise RequestError('test')
  952. class NoCheckRH(ValidationRH):
  953. _SUPPORTED_FEATURES = None
  954. _SUPPORTED_PROXY_SCHEMES = None
  955. _SUPPORTED_URL_SCHEMES = None
  956. def _check_extensions(self, extensions):
  957. extensions.clear()
  958. class HTTPSupportedRH(ValidationRH):
  959. _SUPPORTED_URL_SCHEMES = ('http',)
  960. URL_SCHEME_TESTS = [
  961. # scheme, expected to fail, handler kwargs
  962. ('Urllib', [
  963. ('http', False, {}),
  964. ('https', False, {}),
  965. ('data', False, {}),
  966. ('ftp', False, {}),
  967. ('file', UnsupportedRequest, {}),
  968. ('file', False, {'enable_file_urls': True}),
  969. ]),
  970. ('Requests', [
  971. ('http', False, {}),
  972. ('https', False, {}),
  973. ]),
  974. ('Websockets', [
  975. ('ws', False, {}),
  976. ('wss', False, {}),
  977. ]),
  978. ('CurlCFFI', [
  979. ('http', False, {}),
  980. ('https', False, {}),
  981. ]),
  982. (NoCheckRH, [('http', False, {})]),
  983. (ValidationRH, [('http', UnsupportedRequest, {})])
  984. ]
  985. PROXY_SCHEME_TESTS = [
  986. # proxy scheme, expected to fail
  987. ('Urllib', 'http', [
  988. ('http', False),
  989. ('https', UnsupportedRequest),
  990. ('socks4', False),
  991. ('socks4a', False),
  992. ('socks5', False),
  993. ('socks5h', False),
  994. ('socks', UnsupportedRequest),
  995. ]),
  996. ('Requests', 'http', [
  997. ('http', False),
  998. ('https', False),
  999. ('socks4', False),
  1000. ('socks4a', False),
  1001. ('socks5', False),
  1002. ('socks5h', False),
  1003. ]),
  1004. ('CurlCFFI', 'http', [
  1005. ('http', False),
  1006. ('https', False),
  1007. ('socks4', False),
  1008. ('socks4a', False),
  1009. ('socks5', False),
  1010. ('socks5h', False),
  1011. ]),
  1012. ('Websockets', 'ws', [
  1013. ('http', UnsupportedRequest),
  1014. ('https', UnsupportedRequest),
  1015. ('socks4', False),
  1016. ('socks4a', False),
  1017. ('socks5', False),
  1018. ('socks5h', False),
  1019. ]),
  1020. (NoCheckRH, 'http', [('http', False)]),
  1021. (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
  1022. (NoCheckRH, 'http', [('http', False)]),
  1023. (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
  1024. ]
  1025. PROXY_KEY_TESTS = [
  1026. # proxy key, proxy scheme, expected to fail
  1027. ('Urllib', 'http', [
  1028. ('all', 'http', False),
  1029. ('unrelated', 'http', False),
  1030. ]),
  1031. ('Requests', 'http', [
  1032. ('all', 'http', False),
  1033. ('unrelated', 'http', False),
  1034. ]),
  1035. ('CurlCFFI', 'http', [
  1036. ('all', 'http', False),
  1037. ('unrelated', 'http', False),
  1038. ]),
  1039. ('Websockets', 'ws', [
  1040. ('all', 'socks5', False),
  1041. ('unrelated', 'socks5', False),
  1042. ]),
  1043. (NoCheckRH, 'http', [('all', 'http', False)]),
  1044. (HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
  1045. (HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
  1046. ]
  1047. EXTENSION_TESTS = [
  1048. ('Urllib', 'http', [
  1049. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1050. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1051. ({'cookiejar': CookieJar()}, AssertionError),
  1052. ({'timeout': 1}, False),
  1053. ({'timeout': 'notatimeout'}, AssertionError),
  1054. ({'unsupported': 'value'}, UnsupportedRequest),
  1055. ]),
  1056. ('Requests', 'http', [
  1057. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1058. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1059. ({'timeout': 1}, False),
  1060. ({'timeout': 'notatimeout'}, AssertionError),
  1061. ({'unsupported': 'value'}, UnsupportedRequest),
  1062. ]),
  1063. ('CurlCFFI', 'http', [
  1064. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1065. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1066. ({'timeout': 1}, False),
  1067. ({'timeout': 'notatimeout'}, AssertionError),
  1068. ({'unsupported': 'value'}, UnsupportedRequest),
  1069. ({'impersonate': ImpersonateTarget('badtarget', None, None, None)}, UnsupportedRequest),
  1070. ({'impersonate': 123}, AssertionError),
  1071. ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False),
  1072. ({'impersonate': ImpersonateTarget(None, None, None, None)}, False),
  1073. ({'impersonate': ImpersonateTarget()}, False),
  1074. ({'impersonate': 'chrome'}, AssertionError)
  1075. ]),
  1076. (NoCheckRH, 'http', [
  1077. ({'cookiejar': 'notacookiejar'}, False),
  1078. ({'somerandom': 'test'}, False), # but any extension is allowed through
  1079. ]),
  1080. ('Websockets', 'ws', [
  1081. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1082. ({'timeout': 2}, False),
  1083. ]),
  1084. ]
  1085. @pytest.mark.parametrize('handler,fail,scheme', [
  1086. ('Urllib', False, 'http'),
  1087. ('Requests', False, 'http'),
  1088. ('CurlCFFI', False, 'http'),
  1089. ('Websockets', False, 'ws')
  1090. ], indirect=['handler'])
  1091. def test_no_proxy(self, handler, fail, scheme):
  1092. run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
  1093. run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
  1094. @pytest.mark.parametrize('handler,scheme', [
  1095. ('Urllib', 'http'),
  1096. (HTTPSupportedRH, 'http'),
  1097. ('Requests', 'http'),
  1098. ('CurlCFFI', 'http'),
  1099. ('Websockets', 'ws')
  1100. ], indirect=['handler'])
  1101. def test_empty_proxy(self, handler, scheme):
  1102. run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
  1103. run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
  1104. @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
  1105. @pytest.mark.parametrize('handler,scheme', [
  1106. ('Urllib', 'http'),
  1107. (HTTPSupportedRH, 'http'),
  1108. ('Requests', 'http'),
  1109. ('CurlCFFI', 'http'),
  1110. ('Websockets', 'ws')
  1111. ], indirect=['handler'])
  1112. def test_invalid_proxy_url(self, handler, scheme, proxy_url):
  1113. run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
  1114. @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
  1115. (handler_tests[0], scheme, fail, handler_kwargs)
  1116. for handler_tests in URL_SCHEME_TESTS
  1117. for scheme, fail, handler_kwargs in handler_tests[1]
  1118. ], indirect=['handler'])
  1119. def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
  1120. run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
  1121. @pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
  1122. (handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
  1123. for handler_tests in PROXY_KEY_TESTS
  1124. for proxy_key, proxy_scheme, fail in handler_tests[2]
  1125. ], indirect=['handler'])
  1126. def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
  1127. run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
  1128. run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})
  1129. @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
  1130. (handler_tests[0], handler_tests[1], scheme, fail)
  1131. for handler_tests in PROXY_SCHEME_TESTS
  1132. for scheme, fail in handler_tests[2]
  1133. ], indirect=['handler'])
  1134. def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
  1135. run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
  1136. run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
  1137. @pytest.mark.parametrize('handler,scheme,extensions,fail', [
  1138. (handler_tests[0], handler_tests[1], extensions, fail)
  1139. for handler_tests in EXTENSION_TESTS
  1140. for extensions, fail in handler_tests[2]
  1141. ], indirect=['handler'])
  1142. def test_extension(self, handler, scheme, extensions, fail):
  1143. run_validation(
  1144. handler, fail, Request(f'{scheme}://', extensions=extensions))
  1145. def test_invalid_request_type(self):
  1146. rh = self.ValidationRH(logger=FakeLogger())
  1147. for method in (rh.validate, rh.send):
  1148. with pytest.raises(TypeError, match='Expected an instance of Request'):
  1149. method('not a request')
  1150. class FakeResponse(Response):
  1151. def __init__(self, request):
  1152. # XXX: we could make request part of standard response interface
  1153. self.request = request
  1154. super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
  1155. class FakeRH(RequestHandler):
  1156. def __init__(self, *args, **params):
  1157. self.params = params
  1158. super().__init__(*args, **params)
  1159. def _validate(self, request):
  1160. return
  1161. def _send(self, request: Request):
  1162. if request.url.startswith('ssl://'):
  1163. raise SSLError(request.url[len('ssl://'):])
  1164. return FakeResponse(request)
  1165. class FakeRHYDL(FakeYDL):
  1166. def __init__(self, *args, **kwargs):
  1167. super().__init__(*args, **kwargs)
  1168. self._request_director = self.build_request_director([FakeRH])
  1169. class AllUnsupportedRHYDL(FakeYDL):
  1170. def __init__(self, *args, **kwargs):
  1171. class UnsupportedRH(RequestHandler):
  1172. def _send(self, request: Request):
  1173. pass
  1174. _SUPPORTED_FEATURES = ()
  1175. _SUPPORTED_PROXY_SCHEMES = ()
  1176. _SUPPORTED_URL_SCHEMES = ()
  1177. super().__init__(*args, **kwargs)
  1178. self._request_director = self.build_request_director([UnsupportedRH])
  1179. class TestRequestDirector:
  1180. def test_handler_operations(self):
  1181. director = RequestDirector(logger=FakeLogger())
  1182. handler = FakeRH(logger=FakeLogger())
  1183. director.add_handler(handler)
  1184. assert director.handlers.get(FakeRH.RH_KEY) is handler
  1185. # Handler should overwrite
  1186. handler2 = FakeRH(logger=FakeLogger())
  1187. director.add_handler(handler2)
  1188. assert director.handlers.get(FakeRH.RH_KEY) is not handler
  1189. assert director.handlers.get(FakeRH.RH_KEY) is handler2
  1190. assert len(director.handlers) == 1
  1191. class AnotherFakeRH(FakeRH):
  1192. pass
  1193. director.add_handler(AnotherFakeRH(logger=FakeLogger()))
  1194. assert len(director.handlers) == 2
  1195. assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
  1196. director.handlers.pop(FakeRH.RH_KEY, None)
  1197. assert director.handlers.get(FakeRH.RH_KEY) is None
  1198. assert len(director.handlers) == 1
  1199. # RequestErrors should passthrough
  1200. with pytest.raises(SSLError):
  1201. director.send(Request('ssl://something'))
  1202. def test_send(self):
  1203. director = RequestDirector(logger=FakeLogger())
  1204. with pytest.raises(RequestError):
  1205. director.send(Request('any://'))
  1206. director.add_handler(FakeRH(logger=FakeLogger()))
  1207. assert isinstance(director.send(Request('http://')), FakeResponse)
  1208. def test_unsupported_handlers(self):
  1209. class SupportedRH(RequestHandler):
  1210. _SUPPORTED_URL_SCHEMES = ['http']
  1211. def _send(self, request: Request):
  1212. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  1213. director = RequestDirector(logger=FakeLogger())
  1214. director.add_handler(SupportedRH(logger=FakeLogger()))
  1215. director.add_handler(FakeRH(logger=FakeLogger()))
  1216. # First should take preference
  1217. assert director.send(Request('http://')).read() == b'supported'
  1218. assert director.send(Request('any://')).read() == b''
  1219. director.handlers.pop(FakeRH.RH_KEY)
  1220. with pytest.raises(NoSupportingHandlers):
  1221. director.send(Request('any://'))
  1222. def test_unexpected_error(self):
  1223. director = RequestDirector(logger=FakeLogger())
  1224. class UnexpectedRH(FakeRH):
  1225. def _send(self, request: Request):
  1226. raise TypeError('something')
  1227. director.add_handler(UnexpectedRH(logger=FakeLogger))
  1228. with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
  1229. director.send(Request('any://'))
  1230. director.handlers.clear()
  1231. assert len(director.handlers) == 0
  1232. # Should not be fatal
  1233. director.add_handler(FakeRH(logger=FakeLogger()))
  1234. director.add_handler(UnexpectedRH(logger=FakeLogger))
  1235. assert director.send(Request('any://'))
  1236. def test_preference(self):
  1237. director = RequestDirector(logger=FakeLogger())
  1238. director.add_handler(FakeRH(logger=FakeLogger()))
  1239. class SomeRH(RequestHandler):
  1240. _SUPPORTED_URL_SCHEMES = ['http']
  1241. def _send(self, request: Request):
  1242. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  1243. def some_preference(rh, request):
  1244. return (0 if not isinstance(rh, SomeRH)
  1245. else 100 if 'prefer' in request.headers
  1246. else -1)
  1247. director.add_handler(SomeRH(logger=FakeLogger()))
  1248. director.preferences.add(some_preference)
  1249. assert director.send(Request('http://')).read() == b''
  1250. assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
  1251. def test_close(self, monkeypatch):
  1252. director = RequestDirector(logger=FakeLogger())
  1253. director.add_handler(FakeRH(logger=FakeLogger()))
  1254. called = False
  1255. def mock_close(*args, **kwargs):
  1256. nonlocal called
  1257. called = True
  1258. monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
  1259. director.close()
  1260. assert called
  1261. # XXX: do we want to move this to test_YoutubeDL.py?
  1262. class TestYoutubeDLNetworking:
  1263. @staticmethod
  1264. def build_handler(ydl, handler: RequestHandler = FakeRH):
  1265. return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
  1266. def test_compat_opener(self):
  1267. with FakeYDL() as ydl:
  1268. with warnings.catch_warnings():
  1269. warnings.simplefilter('ignore', category=DeprecationWarning)
  1270. assert isinstance(ydl._opener, urllib.request.OpenerDirector)
  1271. @pytest.mark.parametrize('proxy,expected', [
  1272. ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
  1273. ('', {'all': '__noproxy__'}),
  1274. (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
  1275. ])
  1276. def test_proxy(self, proxy, expected, monkeypatch):
  1277. monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081')
  1278. with FakeYDL({'proxy': proxy}) as ydl:
  1279. assert ydl.proxies == expected
  1280. def test_compat_request(self):
  1281. with FakeRHYDL() as ydl:
  1282. assert ydl.urlopen('test://')
  1283. urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
  1284. urllib_req.add_unredirected_header('Cookie', 'bob=bob')
  1285. urllib_req.timeout = 2
  1286. with warnings.catch_warnings():
  1287. warnings.simplefilter('ignore', category=DeprecationWarning)
  1288. req = ydl.urlopen(urllib_req).request
  1289. assert req.url == urllib_req.get_full_url()
  1290. assert req.data == urllib_req.data
  1291. assert req.method == urllib_req.get_method()
  1292. assert 'X-Test' in req.headers
  1293. assert 'Cookie' in req.headers
  1294. assert req.extensions.get('timeout') == 2
  1295. with pytest.raises(AssertionError):
  1296. ydl.urlopen(None)
  1297. def test_extract_basic_auth(self):
  1298. with FakeRHYDL() as ydl:
  1299. res = ydl.urlopen(Request('http://user:pass@foo.bar'))
  1300. assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
  1301. def test_sanitize_url(self):
  1302. with FakeRHYDL() as ydl:
  1303. res = ydl.urlopen(Request('httpss://foo.bar'))
  1304. assert res.request.url == 'https://foo.bar'
  1305. def test_file_urls_error(self):
  1306. # use urllib handler
  1307. with FakeYDL() as ydl:
  1308. with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
  1309. ydl.urlopen('file://')
  1310. @pytest.mark.parametrize('scheme', (['ws', 'wss']))
  1311. def test_websocket_unavailable_error(self, scheme):
  1312. with AllUnsupportedRHYDL() as ydl:
  1313. with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
  1314. ydl.urlopen(f'{scheme}://')
  1315. def test_legacy_server_connect_error(self):
  1316. with FakeRHYDL() as ydl:
  1317. for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
  1318. with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
  1319. ydl.urlopen(f'ssl://{error}')
  1320. with pytest.raises(SSLError, match='testerror'):
  1321. ydl.urlopen('ssl://testerror')
  1322. def test_unsupported_impersonate_target(self):
  1323. class FakeImpersonationRHYDL(FakeYDL):
  1324. def __init__(self, *args, **kwargs):
  1325. class HTTPRH(RequestHandler):
  1326. def _send(self, request: Request):
  1327. pass
  1328. _SUPPORTED_URL_SCHEMES = ('http',)
  1329. _SUPPORTED_PROXY_SCHEMES = None
  1330. super().__init__(*args, **kwargs)
  1331. self._request_director = self.build_request_director([HTTPRH])
  1332. with FakeImpersonationRHYDL() as ydl:
  1333. with pytest.raises(
  1334. RequestError,
  1335. match=r'Impersonate target "test" is not available'
  1336. ):
  1337. ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
  1338. def test_unsupported_impersonate_extension(self):
  1339. class FakeHTTPRHYDL(FakeYDL):
  1340. def __init__(self, *args, **kwargs):
  1341. class IRH(ImpersonateRequestHandler):
  1342. def _send(self, request: Request):
  1343. pass
  1344. _SUPPORTED_URL_SCHEMES = ('http',)
  1345. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc',): 'test'}
  1346. _SUPPORTED_PROXY_SCHEMES = None
  1347. super().__init__(*args, **kwargs)
  1348. self._request_director = self.build_request_director([IRH])
  1349. with FakeHTTPRHYDL() as ydl:
  1350. with pytest.raises(
  1351. RequestError,
  1352. match=r'Impersonate target "test" is not available'
  1353. ):
  1354. ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
  1355. def test_raise_impersonate_error(self):
  1356. with pytest.raises(
  1357. YoutubeDLError,
  1358. match=r'Impersonate target "test" is not available'
  1359. ):
  1360. FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)})
  1361. def test_pass_impersonate_param(self, monkeypatch):
  1362. class IRH(ImpersonateRequestHandler):
  1363. def _send(self, request: Request):
  1364. pass
  1365. _SUPPORTED_URL_SCHEMES = ('http',)
  1366. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'}
  1367. # Bypass the check on initialize
  1368. brh = FakeYDL.build_request_director
  1369. monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH]))
  1370. with FakeYDL({
  1371. 'impersonate': ImpersonateTarget('abc', None, None, None)
  1372. }) as ydl:
  1373. rh = self.build_handler(ydl, IRH)
  1374. assert rh.impersonate == ImpersonateTarget('abc', None, None, None)
  1375. def test_get_impersonate_targets(self):
  1376. handlers = []
  1377. for target_client in ('abc', 'xyz', 'asd'):
  1378. class TestRH(ImpersonateRequestHandler):
  1379. def _send(self, request: Request):
  1380. pass
  1381. _SUPPORTED_URL_SCHEMES = ('http',)
  1382. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client,): 'test'}
  1383. RH_KEY = target_client
  1384. RH_NAME = target_client
  1385. handlers.append(TestRH)
  1386. with FakeYDL() as ydl:
  1387. ydl._request_director = ydl.build_request_director(handlers)
  1388. assert set(ydl._get_available_impersonate_targets()) == {
  1389. (ImpersonateTarget('xyz'), 'xyz'),
  1390. (ImpersonateTarget('abc'), 'abc'),
  1391. (ImpersonateTarget('asd'), 'asd')
  1392. }
  1393. assert ydl._impersonate_target_available(ImpersonateTarget('abc'))
  1394. assert ydl._impersonate_target_available(ImpersonateTarget())
  1395. assert not ydl._impersonate_target_available(ImpersonateTarget('zxy'))
  1396. @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
  1397. ('http', '__noproxy__', None),
  1398. ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
  1399. ('https', 'example.com', 'http://example.com'),
  1400. ('https', '//example.com', 'http://example.com'),
  1401. ('https', 'socks5://example.com', 'socks5h://example.com'),
  1402. ('http', 'socks://example.com', 'socks4://example.com'),
  1403. ('http', 'socks4://example.com', 'socks4://example.com'),
  1404. ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
  1405. ])
  1406. def test_clean_proxy(self, proxy_key, proxy_url, expected, monkeypatch):
  1407. # proxies should be cleaned in urlopen()
  1408. with FakeRHYDL() as ydl:
  1409. req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
  1410. assert req.proxies[proxy_key] == expected
  1411. # and should also be cleaned when building the handler
  1412. monkeypatch.setenv(f'{proxy_key.upper()}_PROXY', proxy_url)
  1413. with FakeYDL() as ydl:
  1414. rh = self.build_handler(ydl)
  1415. assert rh.proxies[proxy_key] == expected
  1416. def test_clean_proxy_header(self):
  1417. with FakeRHYDL() as ydl:
  1418. req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
  1419. assert 'ytdl-request-proxy' not in req.headers
  1420. assert req.proxies == {'all': 'http://foo.bar'}
  1421. with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
  1422. rh = self.build_handler(ydl)
  1423. assert 'ytdl-request-proxy' not in rh.headers
  1424. assert rh.proxies == {'all': 'http://foo.bar'}
  1425. def test_clean_header(self):
  1426. with FakeRHYDL() as ydl:
  1427. res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
  1428. assert 'Youtubedl-no-compression' not in res.request.headers
  1429. assert res.request.headers.get('Accept-Encoding') == 'identity'
  1430. with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
  1431. rh = self.build_handler(ydl)
  1432. assert 'Youtubedl-no-compression' not in rh.headers
  1433. assert rh.headers.get('Accept-Encoding') == 'identity'
  1434. with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
  1435. rh = self.build_handler(ydl)
  1436. assert 'Ytdl-socks-proxy' not in rh.headers
  1437. def test_build_handler_params(self):
  1438. with FakeYDL({
  1439. 'http_headers': {'test': 'testtest'},
  1440. 'socket_timeout': 2,
  1441. 'proxy': 'http://127.0.0.1:8080',
  1442. 'source_address': '127.0.0.45',
  1443. 'debug_printtraffic': True,
  1444. 'compat_opts': ['no-certifi'],
  1445. 'nocheckcertificate': True,
  1446. 'legacyserverconnect': True,
  1447. }) as ydl:
  1448. rh = self.build_handler(ydl)
  1449. assert rh.headers.get('test') == 'testtest'
  1450. assert 'Accept' in rh.headers # ensure std_headers are still there
  1451. assert rh.timeout == 2
  1452. assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
  1453. assert rh.source_address == '127.0.0.45'
  1454. assert rh.verbose is True
  1455. assert rh.prefer_system_certs is True
  1456. assert rh.verify is False
  1457. assert rh.legacy_ssl_support is True
  1458. @pytest.mark.parametrize('ydl_params', [
  1459. {'client_certificate': 'fakecert.crt'},
  1460. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
  1461. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1462. {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1463. ])
  1464. def test_client_certificate(self, ydl_params):
  1465. with FakeYDL(ydl_params) as ydl:
  1466. rh = self.build_handler(ydl)
  1467. assert rh._client_cert == ydl_params # XXX: Too bound to implementation
  1468. def test_urllib_file_urls(self):
  1469. with FakeYDL({'enable_file_urls': False}) as ydl:
  1470. rh = self.build_handler(ydl, UrllibRH)
  1471. assert rh.enable_file_urls is False
  1472. with FakeYDL({'enable_file_urls': True}) as ydl:
  1473. rh = self.build_handler(ydl, UrllibRH)
  1474. assert rh.enable_file_urls is True
  1475. def test_compat_opt_prefer_urllib(self):
  1476. # This assumes urllib only has a preference when this compat opt is given
  1477. with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
  1478. director = ydl.build_request_director([UrllibRH])
  1479. assert len(director.preferences) == 1
  1480. assert director.preferences.pop()(UrllibRH, None)
  1481. class TestRequest:
  1482. def test_query(self):
  1483. req = Request('http://example.com?q=something', query={'v': 'xyz'})
  1484. assert req.url == 'http://example.com?q=something&v=xyz'
  1485. req.update(query={'v': '123'})
  1486. assert req.url == 'http://example.com?q=something&v=123'
  1487. req.update(url='http://example.com', query={'v': 'xyz'})
  1488. assert req.url == 'http://example.com?v=xyz'
  1489. def test_method(self):
  1490. req = Request('http://example.com')
  1491. assert req.method == 'GET'
  1492. req.data = b'test'
  1493. assert req.method == 'POST'
  1494. req.data = None
  1495. assert req.method == 'GET'
  1496. req.data = b'test2'
  1497. req.method = 'PUT'
  1498. assert req.method == 'PUT'
  1499. req.data = None
  1500. assert req.method == 'PUT'
  1501. with pytest.raises(TypeError):
  1502. req.method = 1
  1503. def test_request_helpers(self):
  1504. assert HEADRequest('http://example.com').method == 'HEAD'
  1505. assert PUTRequest('http://example.com').method == 'PUT'
  1506. def test_headers(self):
  1507. req = Request('http://example.com', headers={'tesT': 'test'})
  1508. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1509. req.update(headers={'teSt2': 'test2'})
  1510. assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
  1511. req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
  1512. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1513. assert req.headers is new_headers
  1514. # test converts dict to case insensitive dict
  1515. req.headers = new_headers = {'test2': 'test2'}
  1516. assert isinstance(req.headers, HTTPHeaderDict)
  1517. assert req.headers is not new_headers
  1518. with pytest.raises(TypeError):
  1519. req.headers = None
  1520. def test_data_type(self):
  1521. req = Request('http://example.com')
  1522. assert req.data is None
  1523. # test bytes is allowed
  1524. req.data = b'test'
  1525. assert req.data == b'test'
  1526. # test iterable of bytes is allowed
  1527. i = [b'test', b'test2']
  1528. req.data = i
  1529. assert req.data == i
  1530. # test file-like object is allowed
  1531. f = io.BytesIO(b'test')
  1532. req.data = f
  1533. assert req.data == f
  1534. # common mistake: test str not allowed
  1535. with pytest.raises(TypeError):
  1536. req.data = 'test'
  1537. assert req.data != 'test'
  1538. # common mistake: test dict is not allowed
  1539. with pytest.raises(TypeError):
  1540. req.data = {'test': 'test'}
  1541. assert req.data != {'test': 'test'}
  1542. def test_content_length_header(self):
  1543. req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
  1544. assert req.headers.get('Content-Length') == '0'
  1545. req.data = b'test'
  1546. assert 'Content-Length' not in req.headers
  1547. req = Request('http://example.com', headers={'Content-Length': '10'})
  1548. assert 'Content-Length' not in req.headers
  1549. def test_content_type_header(self):
  1550. req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
  1551. assert req.headers.get('Content-Type') == 'test'
  1552. req.data = b'test2'
  1553. assert req.headers.get('Content-Type') == 'test'
  1554. req.data = None
  1555. assert 'Content-Type' not in req.headers
  1556. req.data = b'test3'
  1557. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1558. def test_update_req(self):
  1559. req = Request('http://example.com')
  1560. assert req.data is None
  1561. assert req.method == 'GET'
  1562. assert 'Content-Type' not in req.headers
  1563. # Test that zero-byte payloads will be sent
  1564. req.update(data=b'')
  1565. assert req.data == b''
  1566. assert req.method == 'POST'
  1567. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1568. def test_proxies(self):
  1569. req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
  1570. assert req.proxies == {'http': 'http://127.0.0.1:8080'}
  1571. def test_extensions(self):
  1572. req = Request(url='http://example.com', extensions={'timeout': 2})
  1573. assert req.extensions == {'timeout': 2}
  1574. def test_copy(self):
  1575. req = Request(
  1576. url='http://example.com',
  1577. extensions={'cookiejar': CookieJar()},
  1578. headers={'Accept-Encoding': 'br'},
  1579. proxies={'http': 'http://127.0.0.1'},
  1580. data=[b'123']
  1581. )
  1582. req_copy = req.copy()
  1583. assert req_copy is not req
  1584. assert req_copy.url == req.url
  1585. assert req_copy.headers == req.headers
  1586. assert req_copy.headers is not req.headers
  1587. assert req_copy.proxies == req.proxies
  1588. assert req_copy.proxies is not req.proxies
  1589. # Data is not able to be copied
  1590. assert req_copy.data == req.data
  1591. assert req_copy.data is req.data
  1592. # Shallow copy extensions
  1593. assert req_copy.extensions is not req.extensions
  1594. assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
  1595. # Subclasses are copied by default
  1596. class AnotherRequest(Request):
  1597. pass
  1598. req = AnotherRequest(url='http://127.0.0.1')
  1599. assert isinstance(req.copy(), AnotherRequest)
  1600. def test_url(self):
  1601. req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
  1602. assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
  1603. assert Request(url='//example.com').url == 'http://example.com'
  1604. with pytest.raises(TypeError):
  1605. Request(url='https://').url = None
  1606. class TestResponse:
  1607. @pytest.mark.parametrize('reason,status,expected', [
  1608. ('custom', 200, 'custom'),
  1609. (None, 404, 'Not Found'), # fallback status
  1610. ('', 403, 'Forbidden'),
  1611. (None, 999, None)
  1612. ])
  1613. def test_reason(self, reason, status, expected):
  1614. res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
  1615. assert res.reason == expected
  1616. def test_headers(self):
  1617. headers = Message()
  1618. headers.add_header('Test', 'test')
  1619. headers.add_header('Test', 'test2')
  1620. headers.add_header('content-encoding', 'br')
  1621. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1622. assert res.headers.get_all('test') == ['test', 'test2']
  1623. assert 'Content-Encoding' in res.headers
  1624. def test_get_header(self):
  1625. headers = Message()
  1626. headers.add_header('Set-Cookie', 'cookie1')
  1627. headers.add_header('Set-cookie', 'cookie2')
  1628. headers.add_header('Test', 'test')
  1629. headers.add_header('Test', 'test2')
  1630. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1631. assert res.get_header('test') == 'test, test2'
  1632. assert res.get_header('set-Cookie') == 'cookie1'
  1633. assert res.get_header('notexist', 'default') == 'default'
  1634. def test_compat(self):
  1635. res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
  1636. with warnings.catch_warnings():
  1637. warnings.simplefilter('ignore', category=DeprecationWarning)
  1638. assert res.code == res.getcode() == res.status
  1639. assert res.geturl() == res.url
  1640. assert res.info() is res.headers
  1641. assert res.getheader('test') == res.get_header('test')
  1642. class TestImpersonateTarget:
  1643. @pytest.mark.parametrize('target_str,expected', [
  1644. ('abc', ImpersonateTarget('abc', None, None, None)),
  1645. ('abc-120_esr', ImpersonateTarget('abc', '120_esr', None, None)),
  1646. ('abc-120:xyz', ImpersonateTarget('abc', '120', 'xyz', None)),
  1647. ('abc-120:xyz-5.6', ImpersonateTarget('abc', '120', 'xyz', '5.6')),
  1648. ('abc:xyz', ImpersonateTarget('abc', None, 'xyz', None)),
  1649. ('abc:', ImpersonateTarget('abc', None, None, None)),
  1650. ('abc-120:', ImpersonateTarget('abc', '120', None, None)),
  1651. (':xyz', ImpersonateTarget(None, None, 'xyz', None)),
  1652. (':xyz-6.5', ImpersonateTarget(None, None, 'xyz', '6.5')),
  1653. (':', ImpersonateTarget(None, None, None, None)),
  1654. ('', ImpersonateTarget(None, None, None, None)),
  1655. ])
  1656. def test_target_from_str(self, target_str, expected):
  1657. assert ImpersonateTarget.from_str(target_str) == expected
  1658. @pytest.mark.parametrize('target_str', [
  1659. '-120', ':-12.0', '-12:-12', '-:-',
  1660. '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:'
  1661. ])
  1662. def test_target_from_invalid_str(self, target_str):
  1663. with pytest.raises(ValueError):
  1664. ImpersonateTarget.from_str(target_str)
  1665. @pytest.mark.parametrize('target,expected', [
  1666. (ImpersonateTarget('abc', None, None, None), 'abc'),
  1667. (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
  1668. (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
  1669. (ImpersonateTarget('abc', '120', 'xyz', '5'), 'abc-120:xyz-5'),
  1670. (ImpersonateTarget('abc', None, 'xyz', None), 'abc:xyz'),
  1671. (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
  1672. (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
  1673. (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'),
  1674. (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'),
  1675. (ImpersonateTarget('abc', ), 'abc'),
  1676. (ImpersonateTarget(None, None, None, None), ''),
  1677. ])
  1678. def test_str(self, target, expected):
  1679. assert str(target) == expected
  1680. @pytest.mark.parametrize('args', [
  1681. ('abc', None, None, '5'),
  1682. ('abc', '120', None, '5'),
  1683. (None, '120', None, None),
  1684. (None, '120', None, '5'),
  1685. (None, None, None, '5'),
  1686. (None, '120', 'xyz', '5'),
  1687. ])
  1688. def test_invalid_impersonate_target(self, args):
  1689. with pytest.raises(ValueError):
  1690. ImpersonateTarget(*args)
  1691. @pytest.mark.parametrize('target1,target2,is_in,is_eq', [
  1692. (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', None, None, None), True, True),
  1693. (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', '120', None, None), True, False),
  1694. (ImpersonateTarget('abc', None, 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', None), True, False),
  1695. (ImpersonateTarget('abc', '121', 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', 'test'), False, False),
  1696. (ImpersonateTarget('abc'), ImpersonateTarget('abc', '120', 'xyz', 'test'), True, False),
  1697. (ImpersonateTarget('abc', '120', 'xyz', 'test'), ImpersonateTarget('abc'), True, False),
  1698. (ImpersonateTarget(), ImpersonateTarget('abc', '120', 'xyz'), True, False),
  1699. (ImpersonateTarget(), ImpersonateTarget(), True, True),
  1700. ])
  1701. def test_impersonate_target_in(self, target1, target2, is_in, is_eq):
  1702. assert (target1 in target2) is is_in
  1703. assert (target1 == target2) is is_eq