CookieJar.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. <?php
  2. /**
  3. * Stores cookies and passes them between HTTP requests
  4. *
  5. * PHP version 5
  6. *
  7. * LICENSE
  8. *
  9. * This source file is subject to BSD 3-Clause License that is bundled
  10. * with this package in the file LICENSE and available at the URL
  11. * https://raw.github.com/pear/HTTP_Request2/trunk/docs/LICENSE
  12. *
  13. * @category HTTP
  14. * @package HTTP_Request2
  15. * @author Alexey Borzov <avb@php.net>
  16. * @copyright 2008-2014 Alexey Borzov <avb@php.net>
  17. * @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause License
  18. * @link http://pear.php.net/package/HTTP_Request2
  19. */
  20. /** Class representing a HTTP request message */
  21. require_once 'HTTP/Request2.php';
  22. /**
  23. * Stores cookies and passes them between HTTP requests
  24. *
  25. * @category HTTP
  26. * @package HTTP_Request2
  27. * @author Alexey Borzov <avb@php.net>
  28. * @license http://opensource.org/licenses/BSD-3-Clause BSD 3-Clause License
  29. * @version Release: @package_version@
  30. * @link http://pear.php.net/package/HTTP_Request2
  31. */
  32. class HTTP_Request2_CookieJar implements Serializable
  33. {
  34. /**
  35. * Array of stored cookies
  36. *
  37. * The array is indexed by domain, path and cookie name
  38. * .example.com
  39. * /
  40. * some_cookie => cookie data
  41. * /subdir
  42. * other_cookie => cookie data
  43. * .example.org
  44. * ...
  45. *
  46. * @var array
  47. */
  48. protected $cookies = array();
  49. /**
  50. * Whether session cookies should be serialized when serializing the jar
  51. * @var bool
  52. */
  53. protected $serializeSession = false;
  54. /**
  55. * Whether Public Suffix List should be used for domain matching
  56. * @var bool
  57. */
  58. protected $useList = true;
  59. /**
  60. * Array with Public Suffix List data
  61. * @var array
  62. * @link http://publicsuffix.org/
  63. */
  64. protected static $psl = array();
  65. /**
  66. * Class constructor, sets various options
  67. *
  68. * @param bool $serializeSessionCookies Controls serializing session cookies,
  69. * see {@link serializeSessionCookies()}
  70. * @param bool $usePublicSuffixList Controls using Public Suffix List,
  71. * see {@link usePublicSuffixList()}
  72. */
  73. public function __construct(
  74. $serializeSessionCookies = false, $usePublicSuffixList = true
  75. ) {
  76. $this->serializeSessionCookies($serializeSessionCookies);
  77. $this->usePublicSuffixList($usePublicSuffixList);
  78. }
  79. /**
  80. * Returns current time formatted in ISO-8601 at UTC timezone
  81. *
  82. * @return string
  83. */
  84. protected function now()
  85. {
  86. $dt = new DateTime();
  87. $dt->setTimezone(new DateTimeZone('UTC'));
  88. return $dt->format(DateTime::ISO8601);
  89. }
  90. /**
  91. * Checks cookie array for correctness, possibly updating its 'domain', 'path' and 'expires' fields
  92. *
  93. * The checks are as follows:
  94. * - cookie array should contain 'name' and 'value' fields;
  95. * - name and value should not contain disallowed symbols;
  96. * - 'expires' should be either empty parseable by DateTime;
  97. * - 'domain' and 'path' should be either not empty or an URL where
  98. * cookie was set should be provided.
  99. * - if $setter is provided, then document at that URL should be allowed
  100. * to set a cookie for that 'domain'. If $setter is not provided,
  101. * then no domain checks will be made.
  102. *
  103. * 'expires' field will be converted to ISO8601 format from COOKIE format,
  104. * 'domain' and 'path' will be set from setter URL if empty.
  105. *
  106. * @param array $cookie cookie data, as returned by
  107. * {@link HTTP_Request2_Response::getCookies()}
  108. * @param Net_URL2 $setter URL of the document that sent Set-Cookie header
  109. *
  110. * @return array Updated cookie array
  111. * @throws HTTP_Request2_LogicException
  112. * @throws HTTP_Request2_MessageException
  113. */
  114. protected function checkAndUpdateFields(array $cookie, Net_URL2 $setter = null)
  115. {
  116. if ($missing = array_diff(array('name', 'value'), array_keys($cookie))) {
  117. throw new HTTP_Request2_LogicException(
  118. "Cookie array should contain 'name' and 'value' fields",
  119. HTTP_Request2_Exception::MISSING_VALUE
  120. );
  121. }
  122. if (preg_match(HTTP_Request2::REGEXP_INVALID_COOKIE, $cookie['name'])) {
  123. throw new HTTP_Request2_LogicException(
  124. "Invalid cookie name: '{$cookie['name']}'",
  125. HTTP_Request2_Exception::INVALID_ARGUMENT
  126. );
  127. }
  128. if (preg_match(HTTP_Request2::REGEXP_INVALID_COOKIE, $cookie['value'])) {
  129. throw new HTTP_Request2_LogicException(
  130. "Invalid cookie value: '{$cookie['value']}'",
  131. HTTP_Request2_Exception::INVALID_ARGUMENT
  132. );
  133. }
  134. $cookie += array('domain' => '', 'path' => '', 'expires' => null, 'secure' => false);
  135. // Need ISO-8601 date @ UTC timezone
  136. if (!empty($cookie['expires'])
  137. && !preg_match('/^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\+0000$/', $cookie['expires'])
  138. ) {
  139. try {
  140. $dt = new DateTime($cookie['expires']);
  141. $dt->setTimezone(new DateTimeZone('UTC'));
  142. $cookie['expires'] = $dt->format(DateTime::ISO8601);
  143. } catch (Exception $e) {
  144. throw new HTTP_Request2_LogicException($e->getMessage());
  145. }
  146. }
  147. if (empty($cookie['domain']) || empty($cookie['path'])) {
  148. if (!$setter) {
  149. throw new HTTP_Request2_LogicException(
  150. 'Cookie misses domain and/or path component, cookie setter URL needed',
  151. HTTP_Request2_Exception::MISSING_VALUE
  152. );
  153. }
  154. if (empty($cookie['domain'])) {
  155. if ($host = $setter->getHost()) {
  156. $cookie['domain'] = $host;
  157. } else {
  158. throw new HTTP_Request2_LogicException(
  159. 'Setter URL does not contain host part, can\'t set cookie domain',
  160. HTTP_Request2_Exception::MISSING_VALUE
  161. );
  162. }
  163. }
  164. if (empty($cookie['path'])) {
  165. $path = $setter->getPath();
  166. $cookie['path'] = empty($path)? '/': substr($path, 0, strrpos($path, '/') + 1);
  167. }
  168. }
  169. if ($setter && !$this->domainMatch($setter->getHost(), $cookie['domain'])) {
  170. throw new HTTP_Request2_MessageException(
  171. "Domain " . $setter->getHost() . " cannot set cookies for "
  172. . $cookie['domain']
  173. );
  174. }
  175. return $cookie;
  176. }
  177. /**
  178. * Stores a cookie in the jar
  179. *
  180. * @param array $cookie cookie data, as returned by
  181. * {@link HTTP_Request2_Response::getCookies()}
  182. * @param Net_URL2 $setter URL of the document that sent Set-Cookie header
  183. *
  184. * @throws HTTP_Request2_Exception
  185. */
  186. public function store(array $cookie, Net_URL2 $setter = null)
  187. {
  188. $cookie = $this->checkAndUpdateFields($cookie, $setter);
  189. if (strlen($cookie['value'])
  190. && (is_null($cookie['expires']) || $cookie['expires'] > $this->now())
  191. ) {
  192. if (!isset($this->cookies[$cookie['domain']])) {
  193. $this->cookies[$cookie['domain']] = array();
  194. }
  195. if (!isset($this->cookies[$cookie['domain']][$cookie['path']])) {
  196. $this->cookies[$cookie['domain']][$cookie['path']] = array();
  197. }
  198. $this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']] = $cookie;
  199. } elseif (isset($this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']])) {
  200. unset($this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']]);
  201. }
  202. }
  203. /**
  204. * Adds cookies set in HTTP response to the jar
  205. *
  206. * @param HTTP_Request2_Response $response HTTP response message
  207. * @param Net_URL2 $setter original request URL, needed for
  208. * setting default domain/path
  209. */
  210. public function addCookiesFromResponse(HTTP_Request2_Response $response, Net_URL2 $setter)
  211. {
  212. foreach ($response->getCookies() as $cookie) {
  213. $this->store($cookie, $setter);
  214. }
  215. }
  216. /**
  217. * Returns all cookies matching a given request URL
  218. *
  219. * The following checks are made:
  220. * - cookie domain should match request host
  221. * - cookie path should be a prefix for request path
  222. * - 'secure' cookies will only be sent for HTTPS requests
  223. *
  224. * @param Net_URL2 $url Request url
  225. * @param bool $asString Whether to return cookies as string for "Cookie: " header
  226. *
  227. * @return array|string Matching cookies
  228. */
  229. public function getMatching(Net_URL2 $url, $asString = false)
  230. {
  231. $host = $url->getHost();
  232. $path = $url->getPath();
  233. $secure = 0 == strcasecmp($url->getScheme(), 'https');
  234. $matched = $ret = array();
  235. foreach (array_keys($this->cookies) as $domain) {
  236. if ($this->domainMatch($host, $domain)) {
  237. foreach (array_keys($this->cookies[$domain]) as $cPath) {
  238. if (0 === strpos($path, $cPath)) {
  239. foreach ($this->cookies[$domain][$cPath] as $name => $cookie) {
  240. if (!$cookie['secure'] || $secure) {
  241. $matched[$name][strlen($cookie['path'])] = $cookie;
  242. }
  243. }
  244. }
  245. }
  246. }
  247. }
  248. foreach ($matched as $cookies) {
  249. krsort($cookies);
  250. $ret = array_merge($ret, $cookies);
  251. }
  252. if (!$asString) {
  253. return $ret;
  254. } else {
  255. $str = '';
  256. foreach ($ret as $c) {
  257. $str .= (empty($str)? '': '; ') . $c['name'] . '=' . $c['value'];
  258. }
  259. return $str;
  260. }
  261. }
  262. /**
  263. * Returns all cookies stored in a jar
  264. *
  265. * @return array
  266. */
  267. public function getAll()
  268. {
  269. $cookies = array();
  270. foreach (array_keys($this->cookies) as $domain) {
  271. foreach (array_keys($this->cookies[$domain]) as $path) {
  272. foreach ($this->cookies[$domain][$path] as $name => $cookie) {
  273. $cookies[] = $cookie;
  274. }
  275. }
  276. }
  277. return $cookies;
  278. }
  279. /**
  280. * Sets whether session cookies should be serialized when serializing the jar
  281. *
  282. * @param boolean $serialize serialize?
  283. */
  284. public function serializeSessionCookies($serialize)
  285. {
  286. $this->serializeSession = (bool)$serialize;
  287. }
  288. /**
  289. * Sets whether Public Suffix List should be used for restricting cookie-setting
  290. *
  291. * Without PSL {@link domainMatch()} will only prevent setting cookies for
  292. * top-level domains like '.com' or '.org'. However, it will not prevent
  293. * setting a cookie for '.co.uk' even though only third-level registrations
  294. * are possible in .uk domain.
  295. *
  296. * With the List it is possible to find the highest level at which a domain
  297. * may be registered for a particular top-level domain and consequently
  298. * prevent cookies set for '.co.uk' or '.msk.ru'. The same list is used by
  299. * Firefox, Chrome and Opera browsers to restrict cookie setting.
  300. *
  301. * Note that PSL is licensed differently to HTTP_Request2 package (refer to
  302. * the license information in public-suffix-list.php), so you can disable
  303. * its use if this is an issue for you.
  304. *
  305. * @param boolean $useList use the list?
  306. *
  307. * @link http://publicsuffix.org/learn/
  308. */
  309. public function usePublicSuffixList($useList)
  310. {
  311. $this->useList = (bool)$useList;
  312. }
  313. /**
  314. * Returns string representation of object
  315. *
  316. * @return string
  317. *
  318. * @see Serializable::serialize()
  319. */
  320. public function serialize()
  321. {
  322. $cookies = $this->getAll();
  323. if (!$this->serializeSession) {
  324. for ($i = count($cookies) - 1; $i >= 0; $i--) {
  325. if (empty($cookies[$i]['expires'])) {
  326. unset($cookies[$i]);
  327. }
  328. }
  329. }
  330. return serialize(array(
  331. 'cookies' => $cookies,
  332. 'serializeSession' => $this->serializeSession,
  333. 'useList' => $this->useList
  334. ));
  335. }
  336. /**
  337. * Constructs the object from serialized string
  338. *
  339. * @param string $serialized string representation
  340. *
  341. * @see Serializable::unserialize()
  342. */
  343. public function unserialize($serialized)
  344. {
  345. $data = unserialize($serialized);
  346. $now = $this->now();
  347. $this->serializeSessionCookies($data['serializeSession']);
  348. $this->usePublicSuffixList($data['useList']);
  349. foreach ($data['cookies'] as $cookie) {
  350. if (!empty($cookie['expires']) && $cookie['expires'] <= $now) {
  351. continue;
  352. }
  353. if (!isset($this->cookies[$cookie['domain']])) {
  354. $this->cookies[$cookie['domain']] = array();
  355. }
  356. if (!isset($this->cookies[$cookie['domain']][$cookie['path']])) {
  357. $this->cookies[$cookie['domain']][$cookie['path']] = array();
  358. }
  359. $this->cookies[$cookie['domain']][$cookie['path']][$cookie['name']] = $cookie;
  360. }
  361. }
  362. /**
  363. * Checks whether a cookie domain matches a request host.
  364. *
  365. * The method is used by {@link store()} to check for whether a document
  366. * at given URL can set a cookie with a given domain attribute and by
  367. * {@link getMatching()} to find cookies matching the request URL.
  368. *
  369. * @param string $requestHost request host
  370. * @param string $cookieDomain cookie domain
  371. *
  372. * @return bool match success
  373. */
  374. public function domainMatch($requestHost, $cookieDomain)
  375. {
  376. if ($requestHost == $cookieDomain) {
  377. return true;
  378. }
  379. // IP address, we require exact match
  380. if (preg_match('/^(?:\d{1,3}\.){3}\d{1,3}$/', $requestHost)) {
  381. return false;
  382. }
  383. if ('.' != $cookieDomain[0]) {
  384. $cookieDomain = '.' . $cookieDomain;
  385. }
  386. // prevents setting cookies for '.com' and similar domains
  387. if (!$this->useList && substr_count($cookieDomain, '.') < 2
  388. || $this->useList && !self::getRegisteredDomain($cookieDomain)
  389. ) {
  390. return false;
  391. }
  392. return substr('.' . $requestHost, -strlen($cookieDomain)) == $cookieDomain;
  393. }
  394. /**
  395. * Removes subdomains to get the registered domain (the first after top-level)
  396. *
  397. * The method will check Public Suffix List to find out where top-level
  398. * domain ends and registered domain starts. It will remove domain parts
  399. * to the left of registered one.
  400. *
  401. * @param string $domain domain name
  402. *
  403. * @return string|bool registered domain, will return false if $domain is
  404. * either invalid or a TLD itself
  405. */
  406. public static function getRegisteredDomain($domain)
  407. {
  408. $domainParts = explode('.', ltrim($domain, '.'));
  409. // load the list if needed
  410. if (empty(self::$psl)) {
  411. $path = '@data_dir@' . DIRECTORY_SEPARATOR . 'HTTP_Request2';
  412. if (0 === strpos($path, '@' . 'data_dir@')) {
  413. $path = realpath(
  414. dirname(__FILE__) . DIRECTORY_SEPARATOR . '..'
  415. . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'data'
  416. );
  417. }
  418. self::$psl = include_once $path . DIRECTORY_SEPARATOR . 'public-suffix-list.php';
  419. }
  420. if (!($result = self::checkDomainsList($domainParts, self::$psl))) {
  421. // known TLD, invalid domain name
  422. return false;
  423. }
  424. // unknown TLD
  425. if (!strpos($result, '.')) {
  426. // fallback to checking that domain "has at least two dots"
  427. if (2 > ($count = count($domainParts))) {
  428. return false;
  429. }
  430. return $domainParts[$count - 2] . '.' . $domainParts[$count - 1];
  431. }
  432. return $result;
  433. }
  434. /**
  435. * Recursive helper method for {@link getRegisteredDomain()}
  436. *
  437. * @param array $domainParts remaining domain parts
  438. * @param mixed $listNode node in {@link HTTP_Request2_CookieJar::$psl} to check
  439. *
  440. * @return string|null concatenated domain parts, null in case of error
  441. */
  442. protected static function checkDomainsList(array $domainParts, $listNode)
  443. {
  444. $sub = array_pop($domainParts);
  445. $result = null;
  446. if (!is_array($listNode) || is_null($sub)
  447. || array_key_exists('!' . $sub, $listNode)
  448. ) {
  449. return $sub;
  450. } elseif (array_key_exists($sub, $listNode)) {
  451. $result = self::checkDomainsList($domainParts, $listNode[$sub]);
  452. } elseif (array_key_exists('*', $listNode)) {
  453. $result = self::checkDomainsList($domainParts, $listNode['*']);
  454. } else {
  455. return $sub;
  456. }
  457. return (strlen($result) > 0) ? ($result . '.' . $sub) : null;
  458. }
  459. }
  460. ?>