CookieJar.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. <?php
  2. /**
  3. * Stores cookies and passes them between HTTP requests
  4. *
  5. * PHP version 5
  6. *
  7. * LICENSE
  8. *
  9. * This source file is subject to BSD 3-Clause License that is bundled
  10. * with this package in the file LICENSE and available at the URL
  11. * https://raw.github.com/pear/HTTP_Request2/trunk/docs/LICENSE
  12. *
  13. * @category HTTP
  14. * @package HTTP_Request2
  15. * @author Alexey Borzov <avb@php.net>
  16. * @copyright 2008-2016 Alexey Borzov <avb@php.net>
  17. * @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause License
  18. * @link http://pear.php.net/package/HTTP_Request2
  19. */
  20. /** Class representing a HTTP request message */
  21. require_once 'HTTP/Request2.php';
  22. /**
  23. * Stores cookies and passes them between HTTP requests
  24. *
  25. * @category HTTP
  26. * @package HTTP_Request2
  27. * @author Alexey Borzov <avb@php.net>
  28. * @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause License
  29. * @version Release: @package_version@
  30. * @link http://pear.php.net/package/HTTP_Request2
  31. */
  32. class HTTP_Request2_CookieJar implements Serializable
  33. {
  34. /**
  35. * Array of stored cookies
  36. *
  37. * The array is indexed by domain, path and cookie name
  38. * .example.com
  39. * /
  40. * some_cookie => cookie data
  41. * /subdir
  42. * other_cookie => cookie data
  43. * .example.org
  44. * ...
  45. *
  46. * @var array
  47. */
  48. protected $cookies = array();
  49. /**
  50. * Whether session cookies should be serialized when serializing the jar
  51. * @var bool
  52. */
  53. protected $serializeSession = false;
  54. /**
  55. * Whether Public Suffix List should be used for domain matching
  56. * @var bool
  57. */
  58. protected $useList = true;
  59. /**
  60. * Whether an attempt to store an invalid cookie should be ignored, rather than cause an Exception
  61. * @var bool
  62. */
  63. protected $ignoreInvalid = false;
  64. /**
  65. * Array with Public Suffix List data
  66. * @var array
  67. * @link http://publicsuffix.org/
  68. */
  69. protected static $psl = array();
  70. /**
  71. * Class constructor, sets various options
  72. *
  73. * @param bool $serializeSessionCookies Controls serializing session cookies,
  74. * see {@link serializeSessionCookies()}
  75. * @param bool $usePublicSuffixList Controls using Public Suffix List,
  76. * see {@link usePublicSuffixList()}
  77. * @param bool $ignoreInvalidCookies Whether invalid cookies should be ignored,
  78. * see {@link ignoreInvalidCookies()}
  79. */
  80. public function __construct(
  81. $serializeSessionCookies = false, $usePublicSuffixList = true,
  82. $ignoreInvalidCookies = false
  83. ) {
  84. $this->serializeSessionCookies($serializeSessionCookies);
  85. $this->usePublicSuffixList($usePublicSuffixList);
  86. $this->ignoreInvalidCookies($ignoreInvalidCookies);
  87. }
  88. /**
  89. * Returns current time formatted in ISO-8601 at UTC timezone
  90. *
  91. * @return string
  92. */
  93. protected function now()
  94. {
  95. $dt = new DateTime();
  96. $dt->setTimezone(new DateTimeZone('UTC'));
  97. return $dt->format(DateTime::ISO8601);
  98. }
  99. /**
  100. * Checks cookie array for correctness, possibly updating its 'domain', 'path' and 'expires' fields
  101. *
  102. * The checks are as follows:
  103. * - cookie array should contain 'name' and 'value' fields;
  104. * - name and value should not contain disallowed symbols;
  105. * - 'expires' should be either empty parseable by DateTime;
  106. * - 'domain' and 'path' should be either not empty or an URL where
  107. * cookie was set should be provided.
  108. * - if $setter is provided, then document at that URL should be allowed
  109. * to set a cookie for that 'domain'. If $setter is not provided,
  110. * then no domain checks will be made.
  111. *
  112. * 'expires' field will be converted to ISO8601 format from COOKIE format,
  113. * 'domain' and 'path' will be set from setter URL if empty.
  114. *
  115. * @param array $cookie cookie data, as returned by
  116. * {@link HTTP_Request2_Response::getCookies()}
  117. * @param Net_URL2 $setter URL of the document that sent Set-Cookie header
  118. *
  119. * @return array Updated cookie array
  120. * @throws HTTP_Request2_LogicException
  121. * @throws HTTP_Request2_MessageException
  122. */
  123. protected function checkAndUpdateFields(array $cookie, Net_URL2 $setter = null)
  124. {
  125. if ($missing = array_diff(array('name', 'value'), array_keys($cookie))) {
  126. throw new HTTP_Request2_LogicException(
  127. "Cookie array should contain 'name' and 'value' fields",
  128. HTTP_Request2_Exception::MISSING_VALUE
  129. );
  130. }
  131. if (preg_match(HTTP_Request2::REGEXP_INVALID_COOKIE, $cookie['name'])) {
  132. throw new HTTP_Request2_LogicException(
  133. "Invalid cookie name: '{$cookie['name']}'",
  134. HTTP_Request2_Exception::INVALID_ARGUMENT
  135. );
  136. }
  137. if (preg_match(HTTP_Request2::REGEXP_INVALID_COOKIE, $cookie['value'])) {
  138. throw new HTTP_Request2_LogicException(
  139. "Invalid cookie value: '{$cookie['value']}'",
  140. HTTP_Request2_Exception::INVALID_ARGUMENT
  141. );
  142. }
  143. $cookie += array('domain' => '', 'path' => '', 'expires' => null, 'secure' => false);
  144. // Need ISO-8601 date @ UTC timezone
  145. if (!empty($cookie['expires'])
  146. && !preg_match('/^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+0000$/', $cookie['expires'])
  147. ) {
  148. try {
  149. $dt = new DateTime($cookie['expires']);
  150. $dt->setTimezone(new DateTimeZone('UTC'));
  151. $cookie['expires'] = $dt->format(DateTime::ISO8601);
  152. } catch (Exception $e) {
  153. throw new HTTP_Request2_LogicException($e->getMessage());
  154. }
  155. }
  156. if (empty($cookie['domain']) || empty($cookie['path'])) {
  157. if (!$setter) {
  158. throw new HTTP_Request2_LogicException(
  159. 'Cookie misses domain and/or path component, cookie setter URL needed',
  160. HTTP_Request2_Exception::MISSING_VALUE
  161. );
  162. }
  163. if (empty($cookie['domain'])) {
  164. if ($host = $setter->getHost()) {
  165. $cookie['domain'] = $host;
  166. } else {
  167. throw new HTTP_Request2_LogicException(
  168. 'Setter URL does not contain host part, can\'t set cookie domain',
  169. HTTP_Request2_Exception::MISSING_VALUE
  170. );
  171. }
  172. }
  173. if (empty($cookie['path'])) {
  174. $path = $setter->getPath();
  175. $cookie['path'] = empty($path)? '/': substr($path, 0, strrpos($path, '/') + 1);
  176. }
  177. }
  178. if ($setter && !$this->domainMatch($setter->getHost(), $cookie['domain'])) {
  179. throw new HTTP_Request2_MessageException(
  180. "Domain " . $setter->getHost() . " cannot set cookies for "
  181. . $cookie['domain']
  182. );
  183. }
  184. return $cookie;
  185. }
  186. /**
  187. * Stores a cookie in the jar
  188. *
  189. * @param array $cookie cookie data, as returned by
  190. * {@link HTTP_Request2_Response::getCookies()}
  191. * @param Net_URL2 $setter URL of the document that sent Set-Cookie header
  192. *
  193. * @return bool whether the cookie was successfully stored
  194. * @throws HTTP_Request2_Exception
  195. */
  196. public function store(array $cookie, Net_URL2 $setter = null)
  197. {
  198. try {
  199. $cookie = $this->checkAndUpdateFields($cookie, $setter);
  200. } catch (HTTP_Request2_Exception $e) {
  201. if ($this->ignoreInvalid) {
  202. return false;
  203. } else {
  204. throw $e;
  205. }
  206. }
  207. if (strlen($cookie['value'])
  208. && (is_null($cookie['expires']) || $cookie['expires'] > $this->now())
  209. ) {
  210. if (!isset($this->cookies[$cookie['domain']])) {
  211. $this->cookies[$cookie['domain']] = array();
  212. }
  213. if (!isset($this->cookies[$cookie['domain']][$cookie['path']])) {
  214. $this->cookies[$cookie['domain']][$cookie['path']] = array();
  215. }
  216. $this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']] = $cookie;
  217. } elseif (isset($this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']])) {
  218. unset($this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']]);
  219. }
  220. return true;
  221. }
  222. /**
  223. * Adds cookies set in HTTP response to the jar
  224. *
  225. * @param HTTP_Request2_Response $response HTTP response message
  226. * @param Net_URL2 $setter original request URL, needed for
  227. * setting default domain/path. If not given,
  228. * effective URL from response will be used.
  229. *
  230. * @return bool whether all cookies were successfully stored
  231. * @throws HTTP_Request2_LogicException
  232. */
  233. public function addCookiesFromResponse(HTTP_Request2_Response $response, Net_URL2 $setter = null)
  234. {
  235. if (null === $setter) {
  236. if (!($effectiveUrl = $response->getEffectiveUrl())) {
  237. throw new HTTP_Request2_LogicException(
  238. 'Response URL required for adding cookies from response',
  239. HTTP_Request2_Exception::MISSING_VALUE
  240. );
  241. }
  242. $setter = new Net_URL2($effectiveUrl);
  243. }
  244. $success = true;
  245. foreach ($response->getCookies() as $cookie) {
  246. $success = $this->store($cookie, $setter) && $success;
  247. }
  248. return $success;
  249. }
  250. /**
  251. * Returns all cookies matching a given request URL
  252. *
  253. * The following checks are made:
  254. * - cookie domain should match request host
  255. * - cookie path should be a prefix for request path
  256. * - 'secure' cookies will only be sent for HTTPS requests
  257. *
  258. * @param Net_URL2 $url Request url
  259. * @param bool $asString Whether to return cookies as string for "Cookie: " header
  260. *
  261. * @return array|string Matching cookies
  262. */
  263. public function getMatching(Net_URL2 $url, $asString = false)
  264. {
  265. $host = $url->getHost();
  266. $path = $url->getPath();
  267. $secure = 0 == strcasecmp($url->getScheme(), 'https');
  268. $matched = $ret = array();
  269. foreach (array_keys($this->cookies) as $domain) {
  270. if ($this->domainMatch($host, $domain)) {
  271. foreach (array_keys($this->cookies[$domain]) as $cPath) {
  272. if (0 === strpos($path, $cPath)) {
  273. foreach ($this->cookies[$domain][$cPath] as $name => $cookie) {
  274. if (!$cookie['secure'] || $secure) {
  275. $matched[$name][strlen($cookie['path'])] = $cookie;
  276. }
  277. }
  278. }
  279. }
  280. }
  281. }
  282. foreach ($matched as $cookies) {
  283. krsort($cookies);
  284. $ret = array_merge($ret, $cookies);
  285. }
  286. if (!$asString) {
  287. return $ret;
  288. } else {
  289. $str = '';
  290. foreach ($ret as $c) {
  291. $str .= (empty($str)? '': '; ') . $c['name'] . '=' . $c['value'];
  292. }
  293. return $str;
  294. }
  295. }
  296. /**
  297. * Returns all cookies stored in a jar
  298. *
  299. * @return array
  300. */
  301. public function getAll()
  302. {
  303. $cookies = array();
  304. foreach (array_keys($this->cookies) as $domain) {
  305. foreach (array_keys($this->cookies[$domain]) as $path) {
  306. foreach ($this->cookies[$domain][$path] as $name => $cookie) {
  307. $cookies[] = $cookie;
  308. }
  309. }
  310. }
  311. return $cookies;
  312. }
  313. /**
  314. * Sets whether session cookies should be serialized when serializing the jar
  315. *
  316. * @param boolean $serialize serialize?
  317. */
  318. public function serializeSessionCookies($serialize)
  319. {
  320. $this->serializeSession = (bool)$serialize;
  321. }
  322. /**
  323. * Sets whether invalid cookies should be silently ignored or cause an Exception
  324. *
  325. * @param boolean $ignore ignore?
  326. * @link http://pear.php.net/bugs/bug.php?id=19937
  327. * @link http://pear.php.net/bugs/bug.php?id=20401
  328. */
  329. public function ignoreInvalidCookies($ignore)
  330. {
  331. $this->ignoreInvalid = (bool)$ignore;
  332. }
  333. /**
  334. * Sets whether Public Suffix List should be used for restricting cookie-setting
  335. *
  336. * Without PSL {@link domainMatch()} will only prevent setting cookies for
  337. * top-level domains like '.com' or '.org'. However, it will not prevent
  338. * setting a cookie for '.co.uk' even though only third-level registrations
  339. * are possible in .uk domain.
  340. *
  341. * With the List it is possible to find the highest level at which a domain
  342. * may be registered for a particular top-level domain and consequently
  343. * prevent cookies set for '.co.uk' or '.msk.ru'. The same list is used by
  344. * Firefox, Chrome and Opera browsers to restrict cookie setting.
  345. *
  346. * Note that PSL is licensed differently to HTTP_Request2 package (refer to
  347. * the license information in public-suffix-list.php), so you can disable
  348. * its use if this is an issue for you.
  349. *
  350. * @param boolean $useList use the list?
  351. *
  352. * @link http://publicsuffix.org/learn/
  353. */
  354. public function usePublicSuffixList($useList)
  355. {
  356. $this->useList = (bool)$useList;
  357. }
  358. /**
  359. * Returns string representation of object
  360. *
  361. * @return string
  362. *
  363. * @see Serializable::serialize()
  364. */
  365. public function serialize()
  366. {
  367. $cookies = $this->getAll();
  368. if (!$this->serializeSession) {
  369. for ($i = count($cookies) - 1; $i >= 0; $i--) {
  370. if (empty($cookies[$i]['expires'])) {
  371. unset($cookies[$i]);
  372. }
  373. }
  374. }
  375. return serialize(array(
  376. 'cookies' => $cookies,
  377. 'serializeSession' => $this->serializeSession,
  378. 'useList' => $this->useList,
  379. 'ignoreInvalid' => $this->ignoreInvalid
  380. ));
  381. }
  382. /**
  383. * Constructs the object from serialized string
  384. *
  385. * @param string $serialized string representation
  386. *
  387. * @see Serializable::unserialize()
  388. */
  389. public function unserialize($serialized)
  390. {
  391. $data = unserialize($serialized);
  392. $now = $this->now();
  393. $this->serializeSessionCookies($data['serializeSession']);
  394. $this->usePublicSuffixList($data['useList']);
  395. if (array_key_exists('ignoreInvalid', $data)) {
  396. $this->ignoreInvalidCookies($data['ignoreInvalid']);
  397. }
  398. foreach ($data['cookies'] as $cookie) {
  399. if (!empty($cookie['expires']) && $cookie['expires'] <= $now) {
  400. continue;
  401. }
  402. if (!isset($this->cookies[$cookie['domain']])) {
  403. $this->cookies[$cookie['domain']] = array();
  404. }
  405. if (!isset($this->cookies[$cookie['domain']][$cookie['path']])) {
  406. $this->cookies[$cookie['domain']][$cookie['path']] = array();
  407. }
  408. $this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']] = $cookie;
  409. }
  410. }
  411. /**
  412. * Checks whether a cookie domain matches a request host.
  413. *
  414. * The method is used by {@link store()} to check for whether a document
  415. * at given URL can set a cookie with a given domain attribute and by
  416. * {@link getMatching()} to find cookies matching the request URL.
  417. *
  418. * @param string $requestHost request host
  419. * @param string $cookieDomain cookie domain
  420. *
  421. * @return bool match success
  422. */
  423. public function domainMatch($requestHost, $cookieDomain)
  424. {
  425. if ($requestHost == $cookieDomain) {
  426. return true;
  427. }
  428. // IP address, we require exact match
  429. if (preg_match('/^(?:\d{1,3}\.){3}\d{1,3}$/', $requestHost)) {
  430. return false;
  431. }
  432. if ('.' != $cookieDomain[0]) {
  433. $cookieDomain = '.' . $cookieDomain;
  434. }
  435. // prevents setting cookies for '.com' and similar domains
  436. if (!$this->useList && substr_count($cookieDomain, '.') < 2
  437. || $this->useList && !self::getRegisteredDomain($cookieDomain)
  438. ) {
  439. return false;
  440. }
  441. return substr('.' . $requestHost, -strlen($cookieDomain)) == $cookieDomain;
  442. }
  443. /**
  444. * Removes subdomains to get the registered domain (the first after top-level)
  445. *
  446. * The method will check Public Suffix List to find out where top-level
  447. * domain ends and registered domain starts. It will remove domain parts
  448. * to the left of registered one.
  449. *
  450. * @param string $domain domain name
  451. *
  452. * @return string|bool registered domain, will return false if $domain is
  453. * either invalid or a TLD itself
  454. */
  455. public static function getRegisteredDomain($domain)
  456. {
  457. $domainParts = explode('.', ltrim($domain, '.'));
  458. // load the list if needed
  459. if (empty(self::$psl)) {
  460. $path = '@data_dir@' . DIRECTORY_SEPARATOR . 'HTTP_Request2';
  461. if (0 === strpos($path, '@' . 'data_dir@')) {
  462. $path = realpath(
  463. dirname(__FILE__) . DIRECTORY_SEPARATOR . '..'
  464. . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'data'
  465. );
  466. }
  467. self::$psl = include_once $path . DIRECTORY_SEPARATOR . 'public-suffix-list.php';
  468. }
  469. if (!($result = self::checkDomainsList($domainParts, self::$psl))) {
  470. // known TLD, invalid domain name
  471. return false;
  472. }
  473. // unknown TLD
  474. if (!strpos($result, '.')) {
  475. // fallback to checking that domain "has at least two dots"
  476. if (2 > ($count = count($domainParts))) {
  477. return false;
  478. }
  479. return $domainParts[$count - 2] . '.' . $domainParts[$count - 1];
  480. }
  481. return $result;
  482. }
  483. /**
  484. * Recursive helper method for {@link getRegisteredDomain()}
  485. *
  486. * @param array $domainParts remaining domain parts
  487. * @param mixed $listNode node in {@link HTTP_Request2_CookieJar::$psl} to check
  488. *
  489. * @return string|null concatenated domain parts, null in case of error
  490. */
  491. protected static function checkDomainsList(array $domainParts, $listNode)
  492. {
  493. $sub = array_pop($domainParts);
  494. $result = null;
  495. if (!is_array($listNode) || is_null($sub)
  496. || array_key_exists('!' . $sub, $listNode)
  497. ) {
  498. return $sub;
  499. } elseif (array_key_exists($sub, $listNode)) {
  500. $result = self::checkDomainsList($domainParts, $listNode[$sub]);
  501. } elseif (array_key_exists('*', $listNode)) {
  502. $result = self::checkDomainsList($domainParts, $listNode['*']);
  503. } else {
  504. return $sub;
  505. }
  506. return (strlen($result) > 0) ? ($result . '.' . $sub) : null;
  507. }
  508. }
  509. ?>