test_networking.py 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010
  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import sys
  5. import pytest
  6. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  7. import gzip
  8. import http.client
  9. import http.cookiejar
  10. import http.server
  11. import io
  12. import logging
  13. import pathlib
  14. import random
  15. import ssl
  16. import tempfile
  17. import threading
  18. import time
  19. import urllib.error
  20. import urllib.request
  21. import warnings
  22. import zlib
  23. from email.message import Message
  24. from http.cookiejar import CookieJar
  25. from test.conftest import validate_and_send
  26. from test.helper import FakeYDL, http_server_port, verify_address_availability
  27. from yt_dlp.cookies import YoutubeDLCookieJar
  28. from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
  29. from yt_dlp.networking import (
  30. HEADRequest,
  31. PUTRequest,
  32. Request,
  33. RequestDirector,
  34. RequestHandler,
  35. Response,
  36. )
  37. from yt_dlp.networking._urllib import UrllibRH
  38. from yt_dlp.networking.exceptions import (
  39. CertificateVerifyError,
  40. HTTPError,
  41. IncompleteRead,
  42. NoSupportingHandlers,
  43. ProxyError,
  44. RequestError,
  45. SSLError,
  46. TransportError,
  47. UnsupportedRequest,
  48. )
  49. from yt_dlp.networking.impersonate import (
  50. ImpersonateRequestHandler,
  51. ImpersonateTarget,
  52. )
  53. from yt_dlp.utils import YoutubeDLError
  54. from yt_dlp.utils._utils import _YDLLogger as FakeLogger
  55. from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
  56. TEST_DIR = os.path.dirname(os.path.abspath(__file__))
  57. def _build_proxy_handler(name):
  58. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  59. proxy_name = name
  60. def log_message(self, format, *args):
  61. pass
  62. def do_GET(self):
  63. self.send_response(200)
  64. self.send_header('Content-Type', 'text/plain; charset=utf-8')
  65. self.end_headers()
  66. self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
  67. return HTTPTestRequestHandler
  68. class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
  69. protocol_version = 'HTTP/1.1'
  70. default_request_version = 'HTTP/1.1'
  71. def log_message(self, format, *args):
  72. pass
  73. def _headers(self):
  74. payload = str(self.headers).encode()
  75. self.send_response(200)
  76. self.send_header('Content-Type', 'application/json')
  77. self.send_header('Content-Length', str(len(payload)))
  78. self.end_headers()
  79. self.wfile.write(payload)
  80. def _redirect(self):
  81. self.send_response(int(self.path[len('/redirect_'):]))
  82. self.send_header('Location', '/method')
  83. self.send_header('Content-Length', '0')
  84. self.end_headers()
  85. def _method(self, method, payload=None):
  86. self.send_response(200)
  87. self.send_header('Content-Length', str(len(payload or '')))
  88. self.send_header('Method', method)
  89. self.end_headers()
  90. if payload:
  91. self.wfile.write(payload)
  92. def _status(self, status):
  93. payload = f'<html>{status} NOT FOUND</html>'.encode()
  94. self.send_response(int(status))
  95. self.send_header('Content-Type', 'text/html; charset=utf-8')
  96. self.send_header('Content-Length', str(len(payload)))
  97. self.end_headers()
  98. self.wfile.write(payload)
  99. def _read_data(self):
  100. if 'Content-Length' in self.headers:
  101. return self.rfile.read(int(self.headers['Content-Length']))
  102. else:
  103. return b''
  104. def do_POST(self):
  105. data = self._read_data() + str(self.headers).encode()
  106. if self.path.startswith('/redirect_'):
  107. self._redirect()
  108. elif self.path.startswith('/method'):
  109. self._method('POST', data)
  110. elif self.path.startswith('/headers'):
  111. self._headers()
  112. else:
  113. self._status(404)
  114. def do_HEAD(self):
  115. if self.path.startswith('/redirect_'):
  116. self._redirect()
  117. elif self.path.startswith('/method'):
  118. self._method('HEAD')
  119. else:
  120. self._status(404)
  121. def do_PUT(self):
  122. data = self._read_data() + str(self.headers).encode()
  123. if self.path.startswith('/redirect_'):
  124. self._redirect()
  125. elif self.path.startswith('/method'):
  126. self._method('PUT', data)
  127. else:
  128. self._status(404)
  129. def do_GET(self):
  130. if self.path == '/video.html':
  131. payload = b'<html><video src="/vid.mp4" /></html>'
  132. self.send_response(200)
  133. self.send_header('Content-Type', 'text/html; charset=utf-8')
  134. self.send_header('Content-Length', str(len(payload)))
  135. self.end_headers()
  136. self.wfile.write(payload)
  137. elif self.path == '/vid.mp4':
  138. payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
  139. self.send_response(200)
  140. self.send_header('Content-Type', 'video/mp4')
  141. self.send_header('Content-Length', str(len(payload)))
  142. self.end_headers()
  143. self.wfile.write(payload)
  144. elif self.path == '/%E4%B8%AD%E6%96%87.html':
  145. payload = b'<html><video src="/vid.mp4" /></html>'
  146. self.send_response(200)
  147. self.send_header('Content-Type', 'text/html; charset=utf-8')
  148. self.send_header('Content-Length', str(len(payload)))
  149. self.end_headers()
  150. self.wfile.write(payload)
  151. elif self.path == '/%c7%9f':
  152. payload = b'<html><video src="/vid.mp4" /></html>'
  153. self.send_response(200)
  154. self.send_header('Content-Type', 'text/html; charset=utf-8')
  155. self.send_header('Content-Length', str(len(payload)))
  156. self.end_headers()
  157. self.wfile.write(payload)
  158. elif self.path.startswith('/redirect_loop'):
  159. self.send_response(301)
  160. self.send_header('Location', self.path)
  161. self.send_header('Content-Length', '0')
  162. self.end_headers()
  163. elif self.path == '/redirect_dotsegments':
  164. self.send_response(301)
  165. # redirect to /headers but with dot segments before
  166. self.send_header('Location', '/a/b/./../../headers')
  167. self.send_header('Content-Length', '0')
  168. self.end_headers()
  169. elif self.path == '/redirect_dotsegments_absolute':
  170. self.send_response(301)
  171. # redirect to /headers but with dot segments before - absolute url
  172. self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
  173. self.send_header('Content-Length', '0')
  174. self.end_headers()
  175. elif self.path.startswith('/redirect_'):
  176. self._redirect()
  177. elif self.path.startswith('/method'):
  178. self._method('GET', str(self.headers).encode())
  179. elif self.path.startswith('/headers'):
  180. self._headers()
  181. elif self.path.startswith('/308-to-headers'):
  182. self.send_response(308)
  183. # redirect to "localhost" for testing cookie redirection handling
  184. self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers')
  185. self.send_header('Content-Length', '0')
  186. self.end_headers()
  187. elif self.path == '/trailing_garbage':
  188. payload = b'<html><video src="/vid.mp4" /></html>'
  189. self.send_response(200)
  190. self.send_header('Content-Type', 'text/html; charset=utf-8')
  191. self.send_header('Content-Encoding', 'gzip')
  192. buf = io.BytesIO()
  193. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  194. f.write(payload)
  195. compressed = buf.getvalue() + b'trailing garbage'
  196. self.send_header('Content-Length', str(len(compressed)))
  197. self.end_headers()
  198. self.wfile.write(compressed)
  199. elif self.path == '/302-non-ascii-redirect':
  200. new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
  201. self.send_response(301)
  202. self.send_header('Location', new_url)
  203. self.send_header('Content-Length', '0')
  204. self.end_headers()
  205. elif self.path == '/content-encoding':
  206. encodings = self.headers.get('ytdl-encoding', '')
  207. payload = b'<html><video src="/vid.mp4" /></html>'
  208. for encoding in filter(None, (e.strip() for e in encodings.split(','))):
  209. if encoding == 'br' and brotli:
  210. payload = brotli.compress(payload)
  211. elif encoding == 'gzip':
  212. buf = io.BytesIO()
  213. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  214. f.write(payload)
  215. payload = buf.getvalue()
  216. elif encoding == 'deflate':
  217. payload = zlib.compress(payload)
  218. elif encoding == 'unsupported':
  219. payload = b'raw'
  220. break
  221. else:
  222. self._status(415)
  223. return
  224. self.send_response(200)
  225. self.send_header('Content-Encoding', encodings)
  226. self.send_header('Content-Length', str(len(payload)))
  227. self.end_headers()
  228. self.wfile.write(payload)
  229. elif self.path.startswith('/gen_'):
  230. payload = b'<html></html>'
  231. self.send_response(int(self.path[len('/gen_'):]))
  232. self.send_header('Content-Type', 'text/html; charset=utf-8')
  233. self.send_header('Content-Length', str(len(payload)))
  234. self.end_headers()
  235. self.wfile.write(payload)
  236. elif self.path.startswith('/incompleteread'):
  237. payload = b'<html></html>'
  238. self.send_response(200)
  239. self.send_header('Content-Type', 'text/html; charset=utf-8')
  240. self.send_header('Content-Length', '234234')
  241. self.end_headers()
  242. self.wfile.write(payload)
  243. self.finish()
  244. elif self.path.startswith('/timeout_'):
  245. time.sleep(int(self.path[len('/timeout_'):]))
  246. self._headers()
  247. elif self.path == '/source_address':
  248. payload = str(self.client_address[0]).encode()
  249. self.send_response(200)
  250. self.send_header('Content-Type', 'text/html; charset=utf-8')
  251. self.send_header('Content-Length', str(len(payload)))
  252. self.end_headers()
  253. self.wfile.write(payload)
  254. self.finish()
  255. else:
  256. self._status(404)
  257. def send_header(self, keyword, value):
  258. """
  259. Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
  260. This is against what is defined in RFC 3986, however we need to test we support this
  261. since some sites incorrectly do this.
  262. """
  263. if keyword.lower() == 'connection':
  264. return super().send_header(keyword, value)
  265. if not hasattr(self, '_headers_buffer'):
  266. self._headers_buffer = []
  267. self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
  268. class TestRequestHandlerBase:
  269. @classmethod
  270. def setup_class(cls):
  271. cls.http_httpd = http.server.ThreadingHTTPServer(
  272. ('127.0.0.1', 0), HTTPTestRequestHandler)
  273. cls.http_port = http_server_port(cls.http_httpd)
  274. cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever)
  275. # FIXME: we should probably stop the http server thread after each test
  276. # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
  277. cls.http_server_thread.daemon = True
  278. cls.http_server_thread.start()
  279. # HTTPS server
  280. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  281. cls.https_httpd = http.server.ThreadingHTTPServer(
  282. ('127.0.0.1', 0), HTTPTestRequestHandler)
  283. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  284. sslctx.load_cert_chain(certfn, None)
  285. cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True)
  286. cls.https_port = http_server_port(cls.https_httpd)
  287. cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever)
  288. cls.https_server_thread.daemon = True
  289. cls.https_server_thread.start()
  290. class TestHTTPRequestHandler(TestRequestHandlerBase):
  291. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  292. def test_verify_cert(self, handler):
  293. with handler() as rh:
  294. with pytest.raises(CertificateVerifyError):
  295. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  296. with handler(verify=False) as rh:
  297. r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  298. assert r.status == 200
  299. r.close()
  300. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  301. def test_ssl_error(self, handler):
  302. # HTTPS server with too old TLS version
  303. # XXX: is there a better way to test this than to create a new server?
  304. https_httpd = http.server.ThreadingHTTPServer(
  305. ('127.0.0.1', 0), HTTPTestRequestHandler)
  306. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  307. https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
  308. https_port = http_server_port(https_httpd)
  309. https_server_thread = threading.Thread(target=https_httpd.serve_forever)
  310. https_server_thread.daemon = True
  311. https_server_thread.start()
  312. with handler(verify=False) as rh:
  313. with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info:
  314. validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
  315. assert not issubclass(exc_info.type, CertificateVerifyError)
  316. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  317. def test_percent_encode(self, handler):
  318. with handler() as rh:
  319. # Unicode characters should be encoded with uppercase percent-encoding
  320. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
  321. assert res.status == 200
  322. res.close()
  323. # don't normalize existing percent encodings
  324. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
  325. assert res.status == 200
  326. res.close()
  327. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  328. @pytest.mark.parametrize('path', [
  329. '/a/b/./../../headers',
  330. '/redirect_dotsegments',
  331. # https://github.com/yt-dlp/yt-dlp/issues/9020
  332. '/redirect_dotsegments_absolute',
  333. ])
  334. def test_remove_dot_segments(self, handler, path):
  335. with handler(verbose=True) as rh:
  336. # This isn't a comprehensive test,
  337. # but it should be enough to check whether the handler is removing dot segments in required scenarios
  338. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
  339. assert res.status == 200
  340. assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
  341. res.close()
  342. # Not supported by CurlCFFI (non-standard)
  343. @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
  344. def test_unicode_path_redirection(self, handler):
  345. with handler() as rh:
  346. r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
  347. assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
  348. r.close()
  349. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  350. def test_raise_http_error(self, handler):
  351. with handler() as rh:
  352. for bad_status in (400, 500, 599, 302):
  353. with pytest.raises(HTTPError):
  354. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status)))
  355. # Should not raise an error
  356. validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
  357. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  358. def test_response_url(self, handler):
  359. with handler() as rh:
  360. # Response url should be that of the last url in redirect chain
  361. res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301'))
  362. assert res.url == f'http://127.0.0.1:{self.http_port}/method'
  363. res.close()
  364. res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200'))
  365. assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200'
  366. res2.close()
  367. # Covers some basic cases we expect some level of consistency between request handlers for
  368. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  369. @pytest.mark.parametrize('redirect_status,method,expected', [
  370. # A 303 must either use GET or HEAD for subsequent request
  371. (303, 'POST', ('', 'GET', False)),
  372. (303, 'HEAD', ('', 'HEAD', False)),
  373. # 301 and 302 turn POST only into a GET
  374. (301, 'POST', ('', 'GET', False)),
  375. (301, 'HEAD', ('', 'HEAD', False)),
  376. (302, 'POST', ('', 'GET', False)),
  377. (302, 'HEAD', ('', 'HEAD', False)),
  378. # 307 and 308 should not change method
  379. (307, 'POST', ('testdata', 'POST', True)),
  380. (308, 'POST', ('testdata', 'POST', True)),
  381. (307, 'HEAD', ('', 'HEAD', False)),
  382. (308, 'HEAD', ('', 'HEAD', False)),
  383. ])
  384. def test_redirect(self, handler, redirect_status, method, expected):
  385. with handler() as rh:
  386. data = b'testdata' if method == 'POST' else None
  387. headers = {}
  388. if data is not None:
  389. headers['Content-Type'] = 'application/test'
  390. res = validate_and_send(
  391. rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data,
  392. headers=headers))
  393. headers = b''
  394. data_recv = b''
  395. if data is not None:
  396. data_recv += res.read(len(data))
  397. if data_recv != data:
  398. headers += data_recv
  399. data_recv = b''
  400. headers += res.read()
  401. assert expected[0] == data_recv.decode()
  402. assert expected[1] == res.headers.get('method')
  403. assert expected[2] == ('content-length' in headers.decode().lower())
  404. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  405. def test_request_cookie_header(self, handler):
  406. # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
  407. with handler() as rh:
  408. # Specified Cookie header should be used
  409. res = validate_and_send(
  410. rh, Request(
  411. f'http://127.0.0.1:{self.http_port}/headers',
  412. headers={'Cookie': 'test=test'})).read().decode()
  413. assert 'cookie: test=test' in res.lower()
  414. # Specified Cookie header should be removed on any redirect
  415. res = validate_and_send(
  416. rh, Request(
  417. f'http://127.0.0.1:{self.http_port}/308-to-headers',
  418. headers={'Cookie': 'test=test2'})).read().decode()
  419. assert 'cookie: test=test2' not in res.lower()
  420. # Specified Cookie header should override global cookiejar for that request
  421. # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now
  422. cookiejar = YoutubeDLCookieJar()
  423. cookiejar.set_cookie(http.cookiejar.Cookie(
  424. version=0, name='test', value='ytdlp', port=None, port_specified=False,
  425. domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
  426. path_specified=True, secure=False, expires=None, discard=False, comment=None,
  427. comment_url=None, rest={}))
  428. with handler(cookiejar=cookiejar) as rh:
  429. data = validate_and_send(
  430. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read()
  431. assert b'cookie: test=ytdlp' not in data.lower()
  432. assert b'cookie: test=test3' in data.lower()
  433. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  434. def test_redirect_loop(self, handler):
  435. with handler() as rh:
  436. with pytest.raises(HTTPError, match='redirect loop'):
  437. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
  438. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  439. def test_incompleteread(self, handler):
  440. with handler(timeout=2) as rh:
  441. with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
  442. validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
  443. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  444. def test_cookies(self, handler):
  445. cookiejar = YoutubeDLCookieJar()
  446. cookiejar.set_cookie(http.cookiejar.Cookie(
  447. 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
  448. False, '/headers', True, False, None, False, None, None, {}))
  449. with handler(cookiejar=cookiejar) as rh:
  450. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
  451. assert b'cookie: test=ytdlp' in data.lower()
  452. # Per request
  453. with handler() as rh:
  454. data = validate_and_send(
  455. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
  456. assert b'cookie: test=ytdlp' in data.lower()
  457. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  458. def test_headers(self, handler):
  459. with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
  460. # Global Headers
  461. data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower()
  462. assert b'test1: test' in data
  463. # Per request headers, merged with global
  464. data = validate_and_send(rh, Request(
  465. f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower()
  466. assert b'test1: test' in data
  467. assert b'test2: changed' in data
  468. assert b'test2: test2' not in data
  469. assert b'test3: test3' in data
  470. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  471. def test_read_timeout(self, handler):
  472. with handler() as rh:
  473. # Default timeout is 20 seconds, so this should go through
  474. validate_and_send(
  475. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1'))
  476. with handler(timeout=0.1) as rh:
  477. with pytest.raises(TransportError):
  478. validate_and_send(
  479. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5'))
  480. # Per request timeout, should override handler timeout
  481. validate_and_send(
  482. rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
  483. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  484. def test_connect_timeout(self, handler):
  485. # nothing should be listening on this port
  486. connect_timeout_url = 'http://10.255.255.255'
  487. with handler(timeout=0.01) as rh:
  488. now = time.time()
  489. with pytest.raises(TransportError):
  490. validate_and_send(
  491. rh, Request(connect_timeout_url))
  492. assert 0.01 <= time.time() - now < 20
  493. with handler() as rh:
  494. with pytest.raises(TransportError):
  495. # Per request timeout, should override handler timeout
  496. now = time.time()
  497. validate_and_send(
  498. rh, Request(connect_timeout_url, extensions={'timeout': 0.01}))
  499. assert 0.01 <= time.time() - now < 20
  500. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  501. def test_source_address(self, handler):
  502. source_address = f'127.0.0.{random.randint(5, 255)}'
  503. # on some systems these loopback addresses we need for testing may not be available
  504. # see: https://github.com/yt-dlp/yt-dlp/issues/8890
  505. verify_address_availability(source_address)
  506. with handler(source_address=source_address) as rh:
  507. data = validate_and_send(
  508. rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
  509. assert source_address == data
  510. # Not supported by CurlCFFI
  511. @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
  512. def test_gzip_trailing_garbage(self, handler):
  513. with handler() as rh:
  514. data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
  515. assert data == '<html><video src="/vid.mp4" /></html>'
  516. @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
  517. @pytest.mark.skipif(not brotli, reason='brotli support is not installed')
  518. def test_brotli(self, handler):
  519. with handler() as rh:
  520. res = validate_and_send(
  521. rh, Request(
  522. f'http://127.0.0.1:{self.http_port}/content-encoding',
  523. headers={'ytdl-encoding': 'br'}))
  524. assert res.headers.get('Content-Encoding') == 'br'
  525. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  526. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  527. def test_deflate(self, handler):
  528. with handler() as rh:
  529. res = validate_and_send(
  530. rh, Request(
  531. f'http://127.0.0.1:{self.http_port}/content-encoding',
  532. headers={'ytdl-encoding': 'deflate'}))
  533. assert res.headers.get('Content-Encoding') == 'deflate'
  534. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  535. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  536. def test_gzip(self, handler):
  537. with handler() as rh:
  538. res = validate_and_send(
  539. rh, Request(
  540. f'http://127.0.0.1:{self.http_port}/content-encoding',
  541. headers={'ytdl-encoding': 'gzip'}))
  542. assert res.headers.get('Content-Encoding') == 'gzip'
  543. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  544. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  545. def test_multiple_encodings(self, handler):
  546. with handler() as rh:
  547. for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
  548. res = validate_and_send(
  549. rh, Request(
  550. f'http://127.0.0.1:{self.http_port}/content-encoding',
  551. headers={'ytdl-encoding': pair}))
  552. assert res.headers.get('Content-Encoding') == pair
  553. assert res.read() == b'<html><video src="/vid.mp4" /></html>'
  554. # Not supported by curl_cffi
  555. @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
  556. def test_unsupported_encoding(self, handler):
  557. with handler() as rh:
  558. res = validate_and_send(
  559. rh, Request(
  560. f'http://127.0.0.1:{self.http_port}/content-encoding',
  561. headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'}))
  562. assert res.headers.get('Content-Encoding') == 'unsupported'
  563. assert res.read() == b'raw'
  564. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  565. def test_read(self, handler):
  566. with handler() as rh:
  567. res = validate_and_send(
  568. rh, Request(f'http://127.0.0.1:{self.http_port}/headers'))
  569. assert res.readable()
  570. assert res.read(1) == b'H'
  571. assert res.read(3) == b'ost'
  572. assert res.read().decode().endswith('\n\n')
  573. assert res.read() == b''
  574. class TestHTTPProxy(TestRequestHandlerBase):
  575. # Note: this only tests http urls over non-CONNECT proxy
  576. @classmethod
  577. def setup_class(cls):
  578. super().setup_class()
  579. # HTTP Proxy server
  580. cls.proxy = http.server.ThreadingHTTPServer(
  581. ('127.0.0.1', 0), _build_proxy_handler('normal'))
  582. cls.proxy_port = http_server_port(cls.proxy)
  583. cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
  584. cls.proxy_thread.daemon = True
  585. cls.proxy_thread.start()
  586. # Geo proxy server
  587. cls.geo_proxy = http.server.ThreadingHTTPServer(
  588. ('127.0.0.1', 0), _build_proxy_handler('geo'))
  589. cls.geo_port = http_server_port(cls.geo_proxy)
  590. cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
  591. cls.geo_proxy_thread.daemon = True
  592. cls.geo_proxy_thread.start()
  593. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  594. def test_http_proxy(self, handler):
  595. http_proxy = f'http://127.0.0.1:{self.proxy_port}'
  596. geo_proxy = f'http://127.0.0.1:{self.geo_port}'
  597. # Test global http proxy
  598. # Test per request http proxy
  599. # Test per request http proxy disables proxy
  600. url = 'http://foo.com/bar'
  601. # Global HTTP proxy
  602. with handler(proxies={'http': http_proxy}) as rh:
  603. res = validate_and_send(rh, Request(url)).read().decode()
  604. assert res == f'normal: {url}'
  605. # Per request proxy overrides global
  606. res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
  607. assert res == f'geo: {url}'
  608. # and setting to None disables all proxies for that request
  609. real_url = f'http://127.0.0.1:{self.http_port}/headers'
  610. res = validate_and_send(
  611. rh, Request(real_url, proxies={'http': None})).read().decode()
  612. assert res != f'normal: {real_url}'
  613. assert 'Accept' in res
  614. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  615. def test_noproxy(self, handler):
  616. with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
  617. # NO_PROXY
  618. for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
  619. nop_response = validate_and_send(
  620. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
  621. 'utf-8')
  622. assert 'Accept' in nop_response
  623. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  624. def test_allproxy(self, handler):
  625. url = 'http://foo.com/bar'
  626. with handler() as rh:
  627. response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
  628. 'utf-8')
  629. assert response == f'normal: {url}'
  630. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  631. def test_http_proxy_with_idn(self, handler):
  632. with handler(proxies={
  633. 'http': f'http://127.0.0.1:{self.proxy_port}',
  634. }) as rh:
  635. url = 'http://中文.tw/'
  636. response = rh.send(Request(url)).read().decode()
  637. # b'xn--fiq228c' is '中文'.encode('idna')
  638. assert response == 'normal: http://xn--fiq228c.tw/'
  639. class TestClientCertificate:
  640. @classmethod
  641. def setup_class(cls):
  642. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  643. cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
  644. cacertfn = os.path.join(cls.certdir, 'ca.crt')
  645. cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
  646. sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
  647. sslctx.verify_mode = ssl.CERT_REQUIRED
  648. sslctx.load_verify_locations(cafile=cacertfn)
  649. sslctx.load_cert_chain(certfn, None)
  650. cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True)
  651. cls.port = http_server_port(cls.httpd)
  652. cls.server_thread = threading.Thread(target=cls.httpd.serve_forever)
  653. cls.server_thread.daemon = True
  654. cls.server_thread.start()
  655. def _run_test(self, handler, **handler_kwargs):
  656. with handler(
  657. # Disable client-side validation of unacceptable self-signed testcert.pem
  658. # The test is of a check on the server side, so unaffected
  659. verify=False,
  660. **handler_kwargs,
  661. ) as rh:
  662. validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
  663. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  664. def test_certificate_combined_nopass(self, handler):
  665. self._run_test(handler, client_cert={
  666. 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
  667. })
  668. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  669. def test_certificate_nocombined_nopass(self, handler):
  670. self._run_test(handler, client_cert={
  671. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  672. 'client_certificate_key': os.path.join(self.certdir, 'client.key'),
  673. })
  674. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  675. def test_certificate_combined_pass(self, handler):
  676. self._run_test(handler, client_cert={
  677. 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
  678. 'client_certificate_password': 'foobar',
  679. })
  680. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  681. def test_certificate_nocombined_pass(self, handler):
  682. self._run_test(handler, client_cert={
  683. 'client_certificate': os.path.join(self.certdir, 'client.crt'),
  684. 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'),
  685. 'client_certificate_password': 'foobar',
  686. })
  687. @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
  688. class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
  689. def test_supported_impersonate_targets(self, handler):
  690. with handler(headers=std_headers) as rh:
  691. # note: this assumes the impersonate request handler supports the impersonate extension
  692. for target in rh.supported_targets:
  693. res = validate_and_send(rh, Request(
  694. f'http://127.0.0.1:{self.http_port}/headers', extensions={'impersonate': target}))
  695. assert res.status == 200
  696. assert std_headers['user-agent'].lower() not in res.read().decode().lower()
  697. class TestRequestHandlerMisc:
  698. """Misc generic tests for request handlers, not related to request or validation testing"""
  699. @pytest.mark.parametrize('handler,logger_name', [
  700. ('Requests', 'urllib3'),
  701. ('Websockets', 'websockets.client'),
  702. ('Websockets', 'websockets.server')
  703. ], indirect=['handler'])
  704. def test_remove_logging_handler(self, handler, logger_name):
  705. # Ensure any logging handlers, which may contain a YoutubeDL instance,
  706. # are removed when we close the request handler
  707. # See: https://github.com/yt-dlp/yt-dlp/issues/8922
  708. logging_handlers = logging.getLogger(logger_name).handlers
  709. before_count = len(logging_handlers)
  710. rh = handler()
  711. assert len(logging_handlers) == before_count + 1
  712. rh.close()
  713. assert len(logging_handlers) == before_count
  714. class TestUrllibRequestHandler(TestRequestHandlerBase):
  715. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  716. def test_file_urls(self, handler):
  717. # See https://github.com/ytdl-org/youtube-dl/issues/8227
  718. tf = tempfile.NamedTemporaryFile(delete=False)
  719. tf.write(b'foobar')
  720. tf.close()
  721. req = Request(pathlib.Path(tf.name).as_uri())
  722. with handler() as rh:
  723. with pytest.raises(UnsupportedRequest):
  724. rh.validate(req)
  725. # Test that urllib never loaded FileHandler
  726. with pytest.raises(TransportError):
  727. rh.send(req)
  728. with handler(enable_file_urls=True) as rh:
  729. res = validate_and_send(rh, req)
  730. assert res.read() == b'foobar'
  731. res.close()
  732. os.unlink(tf.name)
  733. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  734. def test_http_error_returns_content(self, handler):
  735. # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
  736. def get_response():
  737. with handler() as rh:
  738. # headers url
  739. try:
  740. validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404'))
  741. except HTTPError as e:
  742. return e.response
  743. assert get_response().read() == b'<html></html>'
  744. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  745. def test_verify_cert_error_text(self, handler):
  746. # Check the output of the error message
  747. with handler() as rh:
  748. with pytest.raises(
  749. CertificateVerifyError,
  750. match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate'
  751. ):
  752. validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
  753. @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
  754. @pytest.mark.parametrize('req,match,version_check', [
  755. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
  756. # bpo-39603: Check implemented in 3.7.9+, 3.8.5+
  757. (
  758. Request('http://127.0.0.1', method='GET\n'),
  759. 'method can\'t contain control characters',
  760. lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5)
  761. ),
  762. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265
  763. # bpo-38576: Check implemented in 3.7.8+, 3.8.3+
  764. (
  765. Request('http://127.0.0. 1', method='GET'),
  766. 'URL can\'t contain control characters',
  767. lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3)
  768. ),
  769. # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50
  770. (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None),
  771. ])
  772. def test_httplib_validation_errors(self, handler, req, match, version_check):
  773. if version_check and version_check(sys.version_info):
  774. pytest.skip(f'Python {sys.version} version does not have the required validation for this test.')
  775. with handler() as rh:
  776. with pytest.raises(RequestError, match=match) as exc_info:
  777. validate_and_send(rh, req)
  778. assert not isinstance(exc_info.value, TransportError)
  779. @pytest.mark.parametrize('handler', ['Requests'], indirect=True)
  780. class TestRequestsRequestHandler(TestRequestHandlerBase):
  781. @pytest.mark.parametrize('raised,expected', [
  782. (lambda: requests.exceptions.ConnectTimeout(), TransportError),
  783. (lambda: requests.exceptions.ReadTimeout(), TransportError),
  784. (lambda: requests.exceptions.Timeout(), TransportError),
  785. (lambda: requests.exceptions.ConnectionError(), TransportError),
  786. (lambda: requests.exceptions.ProxyError(), ProxyError),
  787. (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError),
  788. (lambda: requests.exceptions.SSLError(), SSLError),
  789. (lambda: requests.exceptions.InvalidURL(), RequestError),
  790. (lambda: requests.exceptions.InvalidHeader(), RequestError),
  791. # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535
  792. (lambda: urllib3.exceptions.HTTPError(), TransportError),
  793. (lambda: requests.exceptions.RequestException(), RequestError)
  794. # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
  795. ])
  796. def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
  797. with handler() as rh:
  798. def mock_get_instance(*args, **kwargs):
  799. class MockSession:
  800. def request(self, *args, **kwargs):
  801. raise raised()
  802. return MockSession()
  803. monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
  804. with pytest.raises(expected) as exc_info:
  805. rh.send(Request('http://fake'))
  806. assert exc_info.type is expected
  807. @pytest.mark.parametrize('raised,expected,match', [
  808. (lambda: urllib3.exceptions.SSLError(), SSLError, None),
  809. (lambda: urllib3.exceptions.TimeoutError(), TransportError, None),
  810. (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None),
  811. (lambda: urllib3.exceptions.ProtocolError(), TransportError, None),
  812. (lambda: urllib3.exceptions.DecodeError(), TransportError, None),
  813. (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all
  814. (
  815. lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)),
  816. IncompleteRead,
  817. '3 bytes read, 4 more expected'
  818. ),
  819. (
  820. lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)),
  821. IncompleteRead,
  822. '3 bytes read, 5 more expected'
  823. ),
  824. ])
  825. def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
  826. from requests.models import Response as RequestsResponse
  827. from urllib3.response import HTTPResponse as Urllib3Response
  828. from yt_dlp.networking._requests import RequestsResponseAdapter
  829. requests_res = RequestsResponse()
  830. requests_res.raw = Urllib3Response(body=b'', status=200)
  831. res = RequestsResponseAdapter(requests_res)
  832. def mock_read(*args, **kwargs):
  833. raise raised()
  834. monkeypatch.setattr(res.fp, 'read', mock_read)
  835. with pytest.raises(expected, match=match) as exc_info:
  836. res.read()
  837. assert exc_info.type is expected
  838. def test_close(self, handler, monkeypatch):
  839. rh = handler()
  840. session = rh._get_instance(cookiejar=rh.cookiejar)
  841. called = False
  842. original_close = session.close
  843. def mock_close(*args, **kwargs):
  844. nonlocal called
  845. called = True
  846. return original_close(*args, **kwargs)
  847. monkeypatch.setattr(session, 'close', mock_close)
  848. rh.close()
  849. assert called
  850. @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True)
  851. class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
  852. @pytest.mark.parametrize('params,extensions', [
  853. ({}, {'impersonate': ImpersonateTarget('chrome')}),
  854. ({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
  855. ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
  856. ])
  857. def test_impersonate(self, handler, params, extensions):
  858. with handler(headers=std_headers, **params) as rh:
  859. res = validate_and_send(
  860. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions=extensions)).read().decode()
  861. assert 'sec-ch-ua: "Chromium";v="110"' in res
  862. # Check that user agent is added over ours
  863. assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res
  864. def test_headers(self, handler):
  865. with handler(headers=std_headers) as rh:
  866. # Ensure curl-impersonate overrides our standard headers (usually added
  867. res = validate_and_send(
  868. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={
  869. 'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower()
  870. assert std_headers['user-agent'].lower() not in res
  871. assert std_headers['accept-language'].lower() not in res
  872. assert std_headers['sec-fetch-mode'].lower() not in res
  873. # other than UA, custom headers that differ from std_headers should be kept
  874. assert 'sec-fetch-mode: custom' in res
  875. assert 'x-custom: test' in res
  876. # but when not impersonating don't remove std_headers
  877. res = validate_and_send(
  878. rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower()
  879. # std_headers should be present
  880. for k, v in std_headers.items():
  881. assert f'{k}: {v}'.lower() in res
  882. @pytest.mark.parametrize('raised,expected,match', [
  883. (lambda: curl_cffi.requests.errors.RequestsError(
  884. '', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None),
  885. (lambda: curl_cffi.requests.errors.RequestsError(
  886. '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
  887. (lambda: curl_cffi.requests.errors.RequestsError(
  888. '', code=curl_cffi.const.CurlECode.RECV_ERROR), TransportError, None),
  889. ])
  890. def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
  891. import curl_cffi.requests
  892. from yt_dlp.networking._curlcffi import CurlCFFIResponseAdapter
  893. curl_res = curl_cffi.requests.Response()
  894. res = CurlCFFIResponseAdapter(curl_res)
  895. def mock_read(*args, **kwargs):
  896. try:
  897. raise raised()
  898. except Exception as e:
  899. e.response = curl_res
  900. raise
  901. monkeypatch.setattr(res.fp, 'read', mock_read)
  902. with pytest.raises(expected, match=match) as exc_info:
  903. res.read()
  904. assert exc_info.type is expected
  905. @pytest.mark.parametrize('raised,expected,match', [
  906. (lambda: curl_cffi.requests.errors.RequestsError(
  907. '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None),
  908. (lambda: curl_cffi.requests.errors.RequestsError(
  909. '', code=curl_cffi.const.CurlECode.PEER_FAILED_VERIFICATION), CertificateVerifyError, None),
  910. (lambda: curl_cffi.requests.errors.RequestsError(
  911. '', code=curl_cffi.const.CurlECode.SSL_CONNECT_ERROR), SSLError, None),
  912. (lambda: curl_cffi.requests.errors.RequestsError(
  913. '', code=curl_cffi.const.CurlECode.TOO_MANY_REDIRECTS), HTTPError, None),
  914. (lambda: curl_cffi.requests.errors.RequestsError(
  915. '', code=curl_cffi.const.CurlECode.PROXY), ProxyError, None),
  916. ])
  917. def test_request_error_mapping(self, handler, monkeypatch, raised, expected, match):
  918. import curl_cffi.requests
  919. curl_res = curl_cffi.requests.Response()
  920. curl_res.status_code = 301
  921. with handler() as rh:
  922. original_get_instance = rh._get_instance
  923. def mock_get_instance(*args, **kwargs):
  924. instance = original_get_instance(*args, **kwargs)
  925. def request(*_, **__):
  926. try:
  927. raise raised()
  928. except Exception as e:
  929. e.response = curl_res
  930. raise
  931. monkeypatch.setattr(instance, 'request', request)
  932. return instance
  933. monkeypatch.setattr(rh, '_get_instance', mock_get_instance)
  934. with pytest.raises(expected) as exc_info:
  935. rh.send(Request('http://fake'))
  936. assert exc_info.type is expected
  937. def test_response_reader(self, handler):
  938. class FakeResponse:
  939. def __init__(self, raise_error=False):
  940. self.raise_error = raise_error
  941. self.closed = False
  942. def iter_content(self):
  943. yield b'foo'
  944. yield b'bar'
  945. yield b'z'
  946. if self.raise_error:
  947. raise Exception('test')
  948. def close(self):
  949. self.closed = True
  950. from yt_dlp.networking._curlcffi import CurlCFFIResponseReader
  951. res = CurlCFFIResponseReader(FakeResponse())
  952. assert res.readable
  953. assert res.bytes_read == 0
  954. assert res.read(1) == b'f'
  955. assert res.bytes_read == 3
  956. assert res._buffer == b'oo'
  957. assert res.read(2) == b'oo'
  958. assert res.bytes_read == 3
  959. assert res._buffer == b''
  960. assert res.read(2) == b'ba'
  961. assert res.bytes_read == 6
  962. assert res._buffer == b'r'
  963. assert res.read(3) == b'rz'
  964. assert res.bytes_read == 7
  965. assert res._buffer == b''
  966. assert res.closed
  967. assert res._response.closed
  968. # should handle no size param
  969. res2 = CurlCFFIResponseReader(FakeResponse())
  970. assert res2.read() == b'foobarz'
  971. assert res2.bytes_read == 7
  972. assert res2._buffer == b''
  973. assert res2.closed
  974. # should close on an exception
  975. res3 = CurlCFFIResponseReader(FakeResponse(raise_error=True))
  976. with pytest.raises(Exception, match='test'):
  977. res3.read()
  978. assert res3._buffer == b''
  979. assert res3.bytes_read == 7
  980. assert res3.closed
  981. # buffer should be cleared on close
  982. res4 = CurlCFFIResponseReader(FakeResponse())
  983. res4.read(2)
  984. assert res4._buffer == b'o'
  985. res4.close()
  986. assert res4.closed
  987. assert res4._buffer == b''
  988. def run_validation(handler, error, req, **handler_kwargs):
  989. with handler(**handler_kwargs) as rh:
  990. if error:
  991. with pytest.raises(error):
  992. rh.validate(req)
  993. else:
  994. rh.validate(req)
  995. class TestRequestHandlerValidation:
  996. class ValidationRH(RequestHandler):
  997. def _send(self, request):
  998. raise RequestError('test')
  999. class NoCheckRH(ValidationRH):
  1000. _SUPPORTED_FEATURES = None
  1001. _SUPPORTED_PROXY_SCHEMES = None
  1002. _SUPPORTED_URL_SCHEMES = None
  1003. def _check_extensions(self, extensions):
  1004. extensions.clear()
  1005. class HTTPSupportedRH(ValidationRH):
  1006. _SUPPORTED_URL_SCHEMES = ('http',)
  1007. URL_SCHEME_TESTS = [
  1008. # scheme, expected to fail, handler kwargs
  1009. ('Urllib', [
  1010. ('http', False, {}),
  1011. ('https', False, {}),
  1012. ('data', False, {}),
  1013. ('ftp', False, {}),
  1014. ('file', UnsupportedRequest, {}),
  1015. ('file', False, {'enable_file_urls': True}),
  1016. ]),
  1017. ('Requests', [
  1018. ('http', False, {}),
  1019. ('https', False, {}),
  1020. ]),
  1021. ('Websockets', [
  1022. ('ws', False, {}),
  1023. ('wss', False, {}),
  1024. ]),
  1025. ('CurlCFFI', [
  1026. ('http', False, {}),
  1027. ('https', False, {}),
  1028. ]),
  1029. (NoCheckRH, [('http', False, {})]),
  1030. (ValidationRH, [('http', UnsupportedRequest, {})])
  1031. ]
  1032. PROXY_SCHEME_TESTS = [
  1033. # scheme, expected to fail
  1034. ('Urllib', 'http', [
  1035. ('http', False),
  1036. ('https', UnsupportedRequest),
  1037. ('socks4', False),
  1038. ('socks4a', False),
  1039. ('socks5', False),
  1040. ('socks5h', False),
  1041. ('socks', UnsupportedRequest),
  1042. ]),
  1043. ('Requests', 'http', [
  1044. ('http', False),
  1045. ('https', False),
  1046. ('socks4', False),
  1047. ('socks4a', False),
  1048. ('socks5', False),
  1049. ('socks5h', False),
  1050. ]),
  1051. ('CurlCFFI', 'http', [
  1052. ('http', False),
  1053. ('https', False),
  1054. ('socks4', False),
  1055. ('socks4a', False),
  1056. ('socks5', False),
  1057. ('socks5h', False),
  1058. ]),
  1059. (NoCheckRH, 'http', [('http', False)]),
  1060. (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
  1061. ('Websockets', 'ws', [('http', UnsupportedRequest)]),
  1062. (NoCheckRH, 'http', [('http', False)]),
  1063. (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
  1064. ]
  1065. PROXY_KEY_TESTS = [
  1066. # key, expected to fail
  1067. ('Urllib', [
  1068. ('all', False),
  1069. ('unrelated', False),
  1070. ]),
  1071. ('Requests', [
  1072. ('all', False),
  1073. ('unrelated', False),
  1074. ]),
  1075. ('CurlCFFI', [
  1076. ('all', False),
  1077. ('unrelated', False),
  1078. ]),
  1079. (NoCheckRH, [('all', False)]),
  1080. (HTTPSupportedRH, [('all', UnsupportedRequest)]),
  1081. (HTTPSupportedRH, [('no', UnsupportedRequest)]),
  1082. ]
  1083. EXTENSION_TESTS = [
  1084. ('Urllib', 'http', [
  1085. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1086. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1087. ({'cookiejar': CookieJar()}, AssertionError),
  1088. ({'timeout': 1}, False),
  1089. ({'timeout': 'notatimeout'}, AssertionError),
  1090. ({'unsupported': 'value'}, UnsupportedRequest),
  1091. ]),
  1092. ('Requests', 'http', [
  1093. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1094. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1095. ({'timeout': 1}, False),
  1096. ({'timeout': 'notatimeout'}, AssertionError),
  1097. ({'unsupported': 'value'}, UnsupportedRequest),
  1098. ]),
  1099. ('CurlCFFI', 'http', [
  1100. ({'cookiejar': 'notacookiejar'}, AssertionError),
  1101. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1102. ({'timeout': 1}, False),
  1103. ({'timeout': 'notatimeout'}, AssertionError),
  1104. ({'unsupported': 'value'}, UnsupportedRequest),
  1105. ({'impersonate': ImpersonateTarget('badtarget', None, None, None)}, UnsupportedRequest),
  1106. ({'impersonate': 123}, AssertionError),
  1107. ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False),
  1108. ({'impersonate': ImpersonateTarget(None, None, None, None)}, False),
  1109. ({'impersonate': ImpersonateTarget()}, False),
  1110. ({'impersonate': 'chrome'}, AssertionError)
  1111. ]),
  1112. (NoCheckRH, 'http', [
  1113. ({'cookiejar': 'notacookiejar'}, False),
  1114. ({'somerandom': 'test'}, False), # but any extension is allowed through
  1115. ]),
  1116. ('Websockets', 'ws', [
  1117. ({'cookiejar': YoutubeDLCookieJar()}, False),
  1118. ({'timeout': 2}, False),
  1119. ]),
  1120. ]
  1121. @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
  1122. (handler_tests[0], scheme, fail, handler_kwargs)
  1123. for handler_tests in URL_SCHEME_TESTS
  1124. for scheme, fail, handler_kwargs in handler_tests[1]
  1125. ], indirect=['handler'])
  1126. def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
  1127. run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
  1128. @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('CurlCFFI', False)], indirect=['handler'])
  1129. def test_no_proxy(self, handler, fail):
  1130. run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
  1131. run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
  1132. @pytest.mark.parametrize('handler,proxy_key,fail', [
  1133. (handler_tests[0], proxy_key, fail)
  1134. for handler_tests in PROXY_KEY_TESTS
  1135. for proxy_key, fail in handler_tests[1]
  1136. ], indirect=['handler'])
  1137. def test_proxy_key(self, handler, proxy_key, fail):
  1138. run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
  1139. run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
  1140. @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
  1141. (handler_tests[0], handler_tests[1], scheme, fail)
  1142. for handler_tests in PROXY_SCHEME_TESTS
  1143. for scheme, fail in handler_tests[2]
  1144. ], indirect=['handler'])
  1145. def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
  1146. run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
  1147. run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
  1148. @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'CurlCFFI'], indirect=True)
  1149. def test_empty_proxy(self, handler):
  1150. run_validation(handler, False, Request('http://', proxies={'http': None}))
  1151. run_validation(handler, False, Request('http://'), proxies={'http': None})
  1152. @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
  1153. @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
  1154. def test_invalid_proxy_url(self, handler, proxy_url):
  1155. run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
  1156. @pytest.mark.parametrize('handler,scheme,extensions,fail', [
  1157. (handler_tests[0], handler_tests[1], extensions, fail)
  1158. for handler_tests in EXTENSION_TESTS
  1159. for extensions, fail in handler_tests[2]
  1160. ], indirect=['handler'])
  1161. def test_extension(self, handler, scheme, extensions, fail):
  1162. run_validation(
  1163. handler, fail, Request(f'{scheme}://', extensions=extensions))
  1164. def test_invalid_request_type(self):
  1165. rh = self.ValidationRH(logger=FakeLogger())
  1166. for method in (rh.validate, rh.send):
  1167. with pytest.raises(TypeError, match='Expected an instance of Request'):
  1168. method('not a request')
  1169. class FakeResponse(Response):
  1170. def __init__(self, request):
  1171. # XXX: we could make request part of standard response interface
  1172. self.request = request
  1173. super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url)
  1174. class FakeRH(RequestHandler):
  1175. def __init__(self, *args, **params):
  1176. self.params = params
  1177. super().__init__(*args, **params)
  1178. def _validate(self, request):
  1179. return
  1180. def _send(self, request: Request):
  1181. if request.url.startswith('ssl://'):
  1182. raise SSLError(request.url[len('ssl://'):])
  1183. return FakeResponse(request)
  1184. class FakeRHYDL(FakeYDL):
  1185. def __init__(self, *args, **kwargs):
  1186. super().__init__(*args, **kwargs)
  1187. self._request_director = self.build_request_director([FakeRH])
  1188. class AllUnsupportedRHYDL(FakeYDL):
  1189. def __init__(self, *args, **kwargs):
  1190. class UnsupportedRH(RequestHandler):
  1191. def _send(self, request: Request):
  1192. pass
  1193. _SUPPORTED_FEATURES = ()
  1194. _SUPPORTED_PROXY_SCHEMES = ()
  1195. _SUPPORTED_URL_SCHEMES = ()
  1196. super().__init__(*args, **kwargs)
  1197. self._request_director = self.build_request_director([UnsupportedRH])
  1198. class TestRequestDirector:
  1199. def test_handler_operations(self):
  1200. director = RequestDirector(logger=FakeLogger())
  1201. handler = FakeRH(logger=FakeLogger())
  1202. director.add_handler(handler)
  1203. assert director.handlers.get(FakeRH.RH_KEY) is handler
  1204. # Handler should overwrite
  1205. handler2 = FakeRH(logger=FakeLogger())
  1206. director.add_handler(handler2)
  1207. assert director.handlers.get(FakeRH.RH_KEY) is not handler
  1208. assert director.handlers.get(FakeRH.RH_KEY) is handler2
  1209. assert len(director.handlers) == 1
  1210. class AnotherFakeRH(FakeRH):
  1211. pass
  1212. director.add_handler(AnotherFakeRH(logger=FakeLogger()))
  1213. assert len(director.handlers) == 2
  1214. assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY
  1215. director.handlers.pop(FakeRH.RH_KEY, None)
  1216. assert director.handlers.get(FakeRH.RH_KEY) is None
  1217. assert len(director.handlers) == 1
  1218. # RequestErrors should passthrough
  1219. with pytest.raises(SSLError):
  1220. director.send(Request('ssl://something'))
  1221. def test_send(self):
  1222. director = RequestDirector(logger=FakeLogger())
  1223. with pytest.raises(RequestError):
  1224. director.send(Request('any://'))
  1225. director.add_handler(FakeRH(logger=FakeLogger()))
  1226. assert isinstance(director.send(Request('http://')), FakeResponse)
  1227. def test_unsupported_handlers(self):
  1228. class SupportedRH(RequestHandler):
  1229. _SUPPORTED_URL_SCHEMES = ['http']
  1230. def _send(self, request: Request):
  1231. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  1232. director = RequestDirector(logger=FakeLogger())
  1233. director.add_handler(SupportedRH(logger=FakeLogger()))
  1234. director.add_handler(FakeRH(logger=FakeLogger()))
  1235. # First should take preference
  1236. assert director.send(Request('http://')).read() == b'supported'
  1237. assert director.send(Request('any://')).read() == b''
  1238. director.handlers.pop(FakeRH.RH_KEY)
  1239. with pytest.raises(NoSupportingHandlers):
  1240. director.send(Request('any://'))
  1241. def test_unexpected_error(self):
  1242. director = RequestDirector(logger=FakeLogger())
  1243. class UnexpectedRH(FakeRH):
  1244. def _send(self, request: Request):
  1245. raise TypeError('something')
  1246. director.add_handler(UnexpectedRH(logger=FakeLogger))
  1247. with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'):
  1248. director.send(Request('any://'))
  1249. director.handlers.clear()
  1250. assert len(director.handlers) == 0
  1251. # Should not be fatal
  1252. director.add_handler(FakeRH(logger=FakeLogger()))
  1253. director.add_handler(UnexpectedRH(logger=FakeLogger))
  1254. assert director.send(Request('any://'))
  1255. def test_preference(self):
  1256. director = RequestDirector(logger=FakeLogger())
  1257. director.add_handler(FakeRH(logger=FakeLogger()))
  1258. class SomeRH(RequestHandler):
  1259. _SUPPORTED_URL_SCHEMES = ['http']
  1260. def _send(self, request: Request):
  1261. return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url)
  1262. def some_preference(rh, request):
  1263. return (0 if not isinstance(rh, SomeRH)
  1264. else 100 if 'prefer' in request.headers
  1265. else -1)
  1266. director.add_handler(SomeRH(logger=FakeLogger()))
  1267. director.preferences.add(some_preference)
  1268. assert director.send(Request('http://')).read() == b''
  1269. assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
  1270. def test_close(self, monkeypatch):
  1271. director = RequestDirector(logger=FakeLogger())
  1272. director.add_handler(FakeRH(logger=FakeLogger()))
  1273. called = False
  1274. def mock_close(*args, **kwargs):
  1275. nonlocal called
  1276. called = True
  1277. monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
  1278. director.close()
  1279. assert called
  1280. # XXX: do we want to move this to test_YoutubeDL.py?
  1281. class TestYoutubeDLNetworking:
  1282. @staticmethod
  1283. def build_handler(ydl, handler: RequestHandler = FakeRH):
  1284. return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY)
  1285. def test_compat_opener(self):
  1286. with FakeYDL() as ydl:
  1287. with warnings.catch_warnings():
  1288. warnings.simplefilter('ignore', category=DeprecationWarning)
  1289. assert isinstance(ydl._opener, urllib.request.OpenerDirector)
  1290. @pytest.mark.parametrize('proxy,expected', [
  1291. ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}),
  1292. ('', {'all': '__noproxy__'}),
  1293. (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https
  1294. ])
  1295. def test_proxy(self, proxy, expected, monkeypatch):
  1296. monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081')
  1297. with FakeYDL({'proxy': proxy}) as ydl:
  1298. assert ydl.proxies == expected
  1299. def test_compat_request(self):
  1300. with FakeRHYDL() as ydl:
  1301. assert ydl.urlopen('test://')
  1302. urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'})
  1303. urllib_req.add_unredirected_header('Cookie', 'bob=bob')
  1304. urllib_req.timeout = 2
  1305. with warnings.catch_warnings():
  1306. warnings.simplefilter('ignore', category=DeprecationWarning)
  1307. req = ydl.urlopen(urllib_req).request
  1308. assert req.url == urllib_req.get_full_url()
  1309. assert req.data == urllib_req.data
  1310. assert req.method == urllib_req.get_method()
  1311. assert 'X-Test' in req.headers
  1312. assert 'Cookie' in req.headers
  1313. assert req.extensions.get('timeout') == 2
  1314. with pytest.raises(AssertionError):
  1315. ydl.urlopen(None)
  1316. def test_extract_basic_auth(self):
  1317. with FakeRHYDL() as ydl:
  1318. res = ydl.urlopen(Request('http://user:pass@foo.bar'))
  1319. assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz'
  1320. def test_sanitize_url(self):
  1321. with FakeRHYDL() as ydl:
  1322. res = ydl.urlopen(Request('httpss://foo.bar'))
  1323. assert res.request.url == 'https://foo.bar'
  1324. def test_file_urls_error(self):
  1325. # use urllib handler
  1326. with FakeYDL() as ydl:
  1327. with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
  1328. ydl.urlopen('file://')
  1329. @pytest.mark.parametrize('scheme', (['ws', 'wss']))
  1330. def test_websocket_unavailable_error(self, scheme):
  1331. with AllUnsupportedRHYDL() as ydl:
  1332. with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
  1333. ydl.urlopen(f'{scheme}://')
  1334. def test_legacy_server_connect_error(self):
  1335. with FakeRHYDL() as ydl:
  1336. for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
  1337. with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'):
  1338. ydl.urlopen(f'ssl://{error}')
  1339. with pytest.raises(SSLError, match='testerror'):
  1340. ydl.urlopen('ssl://testerror')
  1341. def test_unsupported_impersonate_target(self):
  1342. class FakeImpersonationRHYDL(FakeYDL):
  1343. def __init__(self, *args, **kwargs):
  1344. class HTTPRH(RequestHandler):
  1345. def _send(self, request: Request):
  1346. pass
  1347. _SUPPORTED_URL_SCHEMES = ('http',)
  1348. _SUPPORTED_PROXY_SCHEMES = None
  1349. super().__init__(*args, **kwargs)
  1350. self._request_director = self.build_request_director([HTTPRH])
  1351. with FakeImpersonationRHYDL() as ydl:
  1352. with pytest.raises(
  1353. RequestError,
  1354. match=r'Impersonate target "test" is not available'
  1355. ):
  1356. ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
  1357. def test_unsupported_impersonate_extension(self):
  1358. class FakeHTTPRHYDL(FakeYDL):
  1359. def __init__(self, *args, **kwargs):
  1360. class IRH(ImpersonateRequestHandler):
  1361. def _send(self, request: Request):
  1362. pass
  1363. _SUPPORTED_URL_SCHEMES = ('http',)
  1364. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc',): 'test'}
  1365. _SUPPORTED_PROXY_SCHEMES = None
  1366. super().__init__(*args, **kwargs)
  1367. self._request_director = self.build_request_director([IRH])
  1368. with FakeHTTPRHYDL() as ydl:
  1369. with pytest.raises(
  1370. RequestError,
  1371. match=r'Impersonate target "test" is not available'
  1372. ):
  1373. ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)}))
  1374. def test_raise_impersonate_error(self):
  1375. with pytest.raises(
  1376. YoutubeDLError,
  1377. match=r'Impersonate target "test" is not available'
  1378. ):
  1379. FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)})
  1380. def test_pass_impersonate_param(self, monkeypatch):
  1381. class IRH(ImpersonateRequestHandler):
  1382. def _send(self, request: Request):
  1383. pass
  1384. _SUPPORTED_URL_SCHEMES = ('http',)
  1385. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'}
  1386. # Bypass the check on initialize
  1387. brh = FakeYDL.build_request_director
  1388. monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH]))
  1389. with FakeYDL({
  1390. 'impersonate': ImpersonateTarget('abc', None, None, None)
  1391. }) as ydl:
  1392. rh = self.build_handler(ydl, IRH)
  1393. assert rh.impersonate == ImpersonateTarget('abc', None, None, None)
  1394. def test_get_impersonate_targets(self):
  1395. handlers = []
  1396. for target_client in ('abc', 'xyz', 'asd'):
  1397. class TestRH(ImpersonateRequestHandler):
  1398. def _send(self, request: Request):
  1399. pass
  1400. _SUPPORTED_URL_SCHEMES = ('http',)
  1401. _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client,): 'test'}
  1402. RH_KEY = target_client
  1403. RH_NAME = target_client
  1404. handlers.append(TestRH)
  1405. with FakeYDL() as ydl:
  1406. ydl._request_director = ydl.build_request_director(handlers)
  1407. assert set(ydl._get_available_impersonate_targets()) == {
  1408. (ImpersonateTarget('xyz'), 'xyz'),
  1409. (ImpersonateTarget('abc'), 'abc'),
  1410. (ImpersonateTarget('asd'), 'asd')
  1411. }
  1412. assert ydl._impersonate_target_available(ImpersonateTarget('abc'))
  1413. assert ydl._impersonate_target_available(ImpersonateTarget())
  1414. assert not ydl._impersonate_target_available(ImpersonateTarget('zxy'))
  1415. @pytest.mark.parametrize('proxy_key,proxy_url,expected', [
  1416. ('http', '__noproxy__', None),
  1417. ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
  1418. ('https', 'example.com', 'http://example.com'),
  1419. ('https', '//example.com', 'http://example.com'),
  1420. ('https', 'socks5://example.com', 'socks5h://example.com'),
  1421. ('http', 'socks://example.com', 'socks4://example.com'),
  1422. ('http', 'socks4://example.com', 'socks4://example.com'),
  1423. ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies
  1424. ])
  1425. def test_clean_proxy(self, proxy_key, proxy_url, expected, monkeypatch):
  1426. # proxies should be cleaned in urlopen()
  1427. with FakeRHYDL() as ydl:
  1428. req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request
  1429. assert req.proxies[proxy_key] == expected
  1430. # and should also be cleaned when building the handler
  1431. monkeypatch.setenv(f'{proxy_key.upper()}_PROXY', proxy_url)
  1432. with FakeYDL() as ydl:
  1433. rh = self.build_handler(ydl)
  1434. assert rh.proxies[proxy_key] == expected
  1435. def test_clean_proxy_header(self):
  1436. with FakeRHYDL() as ydl:
  1437. req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request
  1438. assert 'ytdl-request-proxy' not in req.headers
  1439. assert req.proxies == {'all': 'http://foo.bar'}
  1440. with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl:
  1441. rh = self.build_handler(ydl)
  1442. assert 'ytdl-request-proxy' not in rh.headers
  1443. assert rh.proxies == {'all': 'http://foo.bar'}
  1444. def test_clean_header(self):
  1445. with FakeRHYDL() as ydl:
  1446. res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True}))
  1447. assert 'Youtubedl-no-compression' not in res.request.headers
  1448. assert res.request.headers.get('Accept-Encoding') == 'identity'
  1449. with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl:
  1450. rh = self.build_handler(ydl)
  1451. assert 'Youtubedl-no-compression' not in rh.headers
  1452. assert rh.headers.get('Accept-Encoding') == 'identity'
  1453. with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
  1454. rh = self.build_handler(ydl)
  1455. assert 'Ytdl-socks-proxy' not in rh.headers
  1456. def test_build_handler_params(self):
  1457. with FakeYDL({
  1458. 'http_headers': {'test': 'testtest'},
  1459. 'socket_timeout': 2,
  1460. 'proxy': 'http://127.0.0.1:8080',
  1461. 'source_address': '127.0.0.45',
  1462. 'debug_printtraffic': True,
  1463. 'compat_opts': ['no-certifi'],
  1464. 'nocheckcertificate': True,
  1465. 'legacyserverconnect': True,
  1466. }) as ydl:
  1467. rh = self.build_handler(ydl)
  1468. assert rh.headers.get('test') == 'testtest'
  1469. assert 'Accept' in rh.headers # ensure std_headers are still there
  1470. assert rh.timeout == 2
  1471. assert rh.proxies.get('all') == 'http://127.0.0.1:8080'
  1472. assert rh.source_address == '127.0.0.45'
  1473. assert rh.verbose is True
  1474. assert rh.prefer_system_certs is True
  1475. assert rh.verify is False
  1476. assert rh.legacy_ssl_support is True
  1477. @pytest.mark.parametrize('ydl_params', [
  1478. {'client_certificate': 'fakecert.crt'},
  1479. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'},
  1480. {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1481. {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'},
  1482. ])
  1483. def test_client_certificate(self, ydl_params):
  1484. with FakeYDL(ydl_params) as ydl:
  1485. rh = self.build_handler(ydl)
  1486. assert rh._client_cert == ydl_params # XXX: Too bound to implementation
  1487. def test_urllib_file_urls(self):
  1488. with FakeYDL({'enable_file_urls': False}) as ydl:
  1489. rh = self.build_handler(ydl, UrllibRH)
  1490. assert rh.enable_file_urls is False
  1491. with FakeYDL({'enable_file_urls': True}) as ydl:
  1492. rh = self.build_handler(ydl, UrllibRH)
  1493. assert rh.enable_file_urls is True
  1494. def test_compat_opt_prefer_urllib(self):
  1495. # This assumes urllib only has a preference when this compat opt is given
  1496. with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl:
  1497. director = ydl.build_request_director([UrllibRH])
  1498. assert len(director.preferences) == 1
  1499. assert director.preferences.pop()(UrllibRH, None)
  1500. class TestRequest:
  1501. def test_query(self):
  1502. req = Request('http://example.com?q=something', query={'v': 'xyz'})
  1503. assert req.url == 'http://example.com?q=something&v=xyz'
  1504. req.update(query={'v': '123'})
  1505. assert req.url == 'http://example.com?q=something&v=123'
  1506. req.update(url='http://example.com', query={'v': 'xyz'})
  1507. assert req.url == 'http://example.com?v=xyz'
  1508. def test_method(self):
  1509. req = Request('http://example.com')
  1510. assert req.method == 'GET'
  1511. req.data = b'test'
  1512. assert req.method == 'POST'
  1513. req.data = None
  1514. assert req.method == 'GET'
  1515. req.data = b'test2'
  1516. req.method = 'PUT'
  1517. assert req.method == 'PUT'
  1518. req.data = None
  1519. assert req.method == 'PUT'
  1520. with pytest.raises(TypeError):
  1521. req.method = 1
  1522. def test_request_helpers(self):
  1523. assert HEADRequest('http://example.com').method == 'HEAD'
  1524. assert PUTRequest('http://example.com').method == 'PUT'
  1525. def test_headers(self):
  1526. req = Request('http://example.com', headers={'tesT': 'test'})
  1527. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1528. req.update(headers={'teSt2': 'test2'})
  1529. assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'})
  1530. req.headers = new_headers = HTTPHeaderDict({'test': 'test'})
  1531. assert req.headers == HTTPHeaderDict({'test': 'test'})
  1532. assert req.headers is new_headers
  1533. # test converts dict to case insensitive dict
  1534. req.headers = new_headers = {'test2': 'test2'}
  1535. assert isinstance(req.headers, HTTPHeaderDict)
  1536. assert req.headers is not new_headers
  1537. with pytest.raises(TypeError):
  1538. req.headers = None
  1539. def test_data_type(self):
  1540. req = Request('http://example.com')
  1541. assert req.data is None
  1542. # test bytes is allowed
  1543. req.data = b'test'
  1544. assert req.data == b'test'
  1545. # test iterable of bytes is allowed
  1546. i = [b'test', b'test2']
  1547. req.data = i
  1548. assert req.data == i
  1549. # test file-like object is allowed
  1550. f = io.BytesIO(b'test')
  1551. req.data = f
  1552. assert req.data == f
  1553. # common mistake: test str not allowed
  1554. with pytest.raises(TypeError):
  1555. req.data = 'test'
  1556. assert req.data != 'test'
  1557. # common mistake: test dict is not allowed
  1558. with pytest.raises(TypeError):
  1559. req.data = {'test': 'test'}
  1560. assert req.data != {'test': 'test'}
  1561. def test_content_length_header(self):
  1562. req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'')
  1563. assert req.headers.get('Content-Length') == '0'
  1564. req.data = b'test'
  1565. assert 'Content-Length' not in req.headers
  1566. req = Request('http://example.com', headers={'Content-Length': '10'})
  1567. assert 'Content-Length' not in req.headers
  1568. def test_content_type_header(self):
  1569. req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test')
  1570. assert req.headers.get('Content-Type') == 'test'
  1571. req.data = b'test2'
  1572. assert req.headers.get('Content-Type') == 'test'
  1573. req.data = None
  1574. assert 'Content-Type' not in req.headers
  1575. req.data = b'test3'
  1576. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1577. def test_update_req(self):
  1578. req = Request('http://example.com')
  1579. assert req.data is None
  1580. assert req.method == 'GET'
  1581. assert 'Content-Type' not in req.headers
  1582. # Test that zero-byte payloads will be sent
  1583. req.update(data=b'')
  1584. assert req.data == b''
  1585. assert req.method == 'POST'
  1586. assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded'
  1587. def test_proxies(self):
  1588. req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'})
  1589. assert req.proxies == {'http': 'http://127.0.0.1:8080'}
  1590. def test_extensions(self):
  1591. req = Request(url='http://example.com', extensions={'timeout': 2})
  1592. assert req.extensions == {'timeout': 2}
  1593. def test_copy(self):
  1594. req = Request(
  1595. url='http://example.com',
  1596. extensions={'cookiejar': CookieJar()},
  1597. headers={'Accept-Encoding': 'br'},
  1598. proxies={'http': 'http://127.0.0.1'},
  1599. data=[b'123']
  1600. )
  1601. req_copy = req.copy()
  1602. assert req_copy is not req
  1603. assert req_copy.url == req.url
  1604. assert req_copy.headers == req.headers
  1605. assert req_copy.headers is not req.headers
  1606. assert req_copy.proxies == req.proxies
  1607. assert req_copy.proxies is not req.proxies
  1608. # Data is not able to be copied
  1609. assert req_copy.data == req.data
  1610. assert req_copy.data is req.data
  1611. # Shallow copy extensions
  1612. assert req_copy.extensions is not req.extensions
  1613. assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar']
  1614. # Subclasses are copied by default
  1615. class AnotherRequest(Request):
  1616. pass
  1617. req = AnotherRequest(url='http://127.0.0.1')
  1618. assert isinstance(req.copy(), AnotherRequest)
  1619. def test_url(self):
  1620. req = Request(url='https://фtest.example.com/ some spaceв?ä=c',)
  1621. assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c'
  1622. assert Request(url='//example.com').url == 'http://example.com'
  1623. with pytest.raises(TypeError):
  1624. Request(url='https://').url = None
  1625. class TestResponse:
  1626. @pytest.mark.parametrize('reason,status,expected', [
  1627. ('custom', 200, 'custom'),
  1628. (None, 404, 'Not Found'), # fallback status
  1629. ('', 403, 'Forbidden'),
  1630. (None, 999, None)
  1631. ])
  1632. def test_reason(self, reason, status, expected):
  1633. res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason)
  1634. assert res.reason == expected
  1635. def test_headers(self):
  1636. headers = Message()
  1637. headers.add_header('Test', 'test')
  1638. headers.add_header('Test', 'test2')
  1639. headers.add_header('content-encoding', 'br')
  1640. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1641. assert res.headers.get_all('test') == ['test', 'test2']
  1642. assert 'Content-Encoding' in res.headers
  1643. def test_get_header(self):
  1644. headers = Message()
  1645. headers.add_header('Set-Cookie', 'cookie1')
  1646. headers.add_header('Set-cookie', 'cookie2')
  1647. headers.add_header('Test', 'test')
  1648. headers.add_header('Test', 'test2')
  1649. res = Response(io.BytesIO(b''), headers=headers, url='test://')
  1650. assert res.get_header('test') == 'test, test2'
  1651. assert res.get_header('set-Cookie') == 'cookie1'
  1652. assert res.get_header('notexist', 'default') == 'default'
  1653. def test_compat(self):
  1654. res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'})
  1655. with warnings.catch_warnings():
  1656. warnings.simplefilter('ignore', category=DeprecationWarning)
  1657. assert res.code == res.getcode() == res.status
  1658. assert res.geturl() == res.url
  1659. assert res.info() is res.headers
  1660. assert res.getheader('test') == res.get_header('test')
  1661. class TestImpersonateTarget:
  1662. @pytest.mark.parametrize('target_str,expected', [
  1663. ('abc', ImpersonateTarget('abc', None, None, None)),
  1664. ('abc-120_esr', ImpersonateTarget('abc', '120_esr', None, None)),
  1665. ('abc-120:xyz', ImpersonateTarget('abc', '120', 'xyz', None)),
  1666. ('abc-120:xyz-5.6', ImpersonateTarget('abc', '120', 'xyz', '5.6')),
  1667. ('abc:xyz', ImpersonateTarget('abc', None, 'xyz', None)),
  1668. ('abc:', ImpersonateTarget('abc', None, None, None)),
  1669. ('abc-120:', ImpersonateTarget('abc', '120', None, None)),
  1670. (':xyz', ImpersonateTarget(None, None, 'xyz', None)),
  1671. (':xyz-6.5', ImpersonateTarget(None, None, 'xyz', '6.5')),
  1672. (':', ImpersonateTarget(None, None, None, None)),
  1673. ('', ImpersonateTarget(None, None, None, None)),
  1674. ])
  1675. def test_target_from_str(self, target_str, expected):
  1676. assert ImpersonateTarget.from_str(target_str) == expected
  1677. @pytest.mark.parametrize('target_str', [
  1678. '-120', ':-12.0', '-12:-12', '-:-',
  1679. '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:'
  1680. ])
  1681. def test_target_from_invalid_str(self, target_str):
  1682. with pytest.raises(ValueError):
  1683. ImpersonateTarget.from_str(target_str)
  1684. @pytest.mark.parametrize('target,expected', [
  1685. (ImpersonateTarget('abc', None, None, None), 'abc'),
  1686. (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
  1687. (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
  1688. (ImpersonateTarget('abc', '120', 'xyz', '5'), 'abc-120:xyz-5'),
  1689. (ImpersonateTarget('abc', None, 'xyz', None), 'abc:xyz'),
  1690. (ImpersonateTarget('abc', '120', None, None), 'abc-120'),
  1691. (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'),
  1692. (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'),
  1693. (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'),
  1694. (ImpersonateTarget('abc', ), 'abc'),
  1695. (ImpersonateTarget(None, None, None, None), ''),
  1696. ])
  1697. def test_str(self, target, expected):
  1698. assert str(target) == expected
  1699. @pytest.mark.parametrize('args', [
  1700. ('abc', None, None, '5'),
  1701. ('abc', '120', None, '5'),
  1702. (None, '120', None, None),
  1703. (None, '120', None, '5'),
  1704. (None, None, None, '5'),
  1705. (None, '120', 'xyz', '5'),
  1706. ])
  1707. def test_invalid_impersonate_target(self, args):
  1708. with pytest.raises(ValueError):
  1709. ImpersonateTarget(*args)
  1710. @pytest.mark.parametrize('target1,target2,is_in,is_eq', [
  1711. (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', None, None, None), True, True),
  1712. (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', '120', None, None), True, False),
  1713. (ImpersonateTarget('abc', None, 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', None), True, False),
  1714. (ImpersonateTarget('abc', '121', 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', 'test'), False, False),
  1715. (ImpersonateTarget('abc'), ImpersonateTarget('abc', '120', 'xyz', 'test'), True, False),
  1716. (ImpersonateTarget('abc', '120', 'xyz', 'test'), ImpersonateTarget('abc'), True, False),
  1717. (ImpersonateTarget(), ImpersonateTarget('abc', '120', 'xyz'), True, False),
  1718. (ImpersonateTarget(), ImpersonateTarget(), True, True),
  1719. ])
  1720. def test_impersonate_target_in(self, target1, target2, is_in, is_eq):
  1721. assert (target1 in target2) is is_in
  1722. assert (target1 == target2) is is_eq