File_redirection.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. <?php
  2. // This file is part of GNU social - https://www.gnu.org/software/social
  3. //
  4. // GNU social is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // GNU social is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  16. defined('GNUSOCIAL') || die();
  17. /**
  18. * Table Definition for file_redirection
  19. */
  20. class File_redirection extends Managed_DataObject
  21. {
  22. ###START_AUTOCODE
  23. /* the code below is auto generated do not remove the above tag */
  24. public $__table = 'file_redirection'; // table name
  25. public $urlhash; // varchar(64) primary_key not_null
  26. public $url; // text
  27. public $file_id; // int(4)
  28. public $redirections; // int(4)
  29. public $httpcode; // int(4)
  30. public $modified; // datetime() not_null default_CURRENT_TIMESTAMP
  31. /* the code above is auto generated do not remove the tag below */
  32. ###END_AUTOCODE
  33. protected $file; /* Cache the associated file sometimes */
  34. public static function schemaDef()
  35. {
  36. return array(
  37. 'fields' => array(
  38. 'urlhash' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'sha256 hash of the URL'),
  39. 'url' => array('type' => 'text', 'description' => 'short URL (or any other kind of redirect) for file (id)'),
  40. 'file_id' => array('type' => 'int', 'description' => 'short URL for what URL/file'),
  41. 'redirections' => array('type' => 'int', 'description' => 'redirect count'),
  42. 'httpcode' => array('type' => 'int', 'description' => 'HTTP status code (20x, 30x, etc.)'),
  43. 'modified' => array('type' => 'datetime', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was modified'),
  44. ),
  45. 'primary key' => array('urlhash'),
  46. 'foreign keys' => array(
  47. 'file_redirection_file_id_fkey' => array('file', array('file_id' => 'id')),
  48. ),
  49. );
  50. }
  51. public static function getByUrl($url)
  52. {
  53. return self::getByPK(array('urlhash' => File::hashurl($url)));
  54. }
  55. public static function _commonHttp($url, $redirs)
  56. {
  57. $request = new HTTPClient($url);
  58. $request->setConfig(array(
  59. 'connect_timeout' => 10, // # seconds to wait
  60. 'max_redirs' => $redirs, // # max number of http redirections to follow
  61. 'follow_redirects' => false, // We follow redirects ourselves in lib/httpclient.php
  62. 'store_body' => false, // We won't need body content here.
  63. ));
  64. return $request;
  65. }
  66. /**
  67. * Check if this URL is a redirect and return redir info.
  68. *
  69. * Most code should call File_redirection::where instead, to check if we
  70. * already know that redirection and avoid extra hits to the web.
  71. *
  72. * The URL is hit and any redirects are followed, up to 10 levels or until
  73. * a protected URL is reached.
  74. *
  75. * @param string $in_url
  76. * @return mixed one of:
  77. * string - target URL, if this is a direct link or can't be followed
  78. * array - redirect info if this is an *unknown* redirect:
  79. * associative array with the following elements:
  80. * code: HTTP status code
  81. * redirects: count of redirects followed
  82. * url: URL string of final target
  83. * type (optional): MIME type from Content-Type header
  84. * size (optional): byte size from Content-Length header
  85. * time (optional): timestamp from Last-Modified header
  86. */
  87. public static function lookupWhere($short_url, $redirs = 10, $protected = false)
  88. {
  89. if ($redirs < 0) {
  90. return false;
  91. }
  92. if (strpos($short_url, '://') === false) {
  93. return $short_url;
  94. }
  95. try {
  96. $request = self::_commonHttp($short_url, $redirs);
  97. // Don't include body in output
  98. $request->setMethod(HTTP_Request2::METHOD_HEAD);
  99. $response = $request->send();
  100. if (405 == $response->getStatus() || 204 == $response->getStatus()) {
  101. // HTTP 405 Unsupported Method
  102. // Server doesn't support HEAD method? Can this really happen?
  103. // We'll try again as a GET and ignore the response data.
  104. //
  105. // HTTP 204 No Content
  106. // YFrog sends 204 responses back for our HEAD checks, which
  107. // seems like it may be a logic error in their servers. If
  108. // we get a 204 back, re-run it as a GET... if there's really
  109. // no content it'll be cheap. :)
  110. $request = self::_commonHttp($short_url, $redirs);
  111. $response = $request->send();
  112. } elseif (400 == $response->getStatus()) {
  113. throw new Exception('Got error 400 on HEAD request, will not go further.');
  114. }
  115. } catch (Exception $e) {
  116. // Invalid URL or failure to reach server
  117. common_log(LOG_ERR, "Error while following redirects for $short_url: " . $e->getMessage());
  118. return $short_url;
  119. }
  120. // if last url after all redirections is protected,
  121. // use the url before it in the redirection chain
  122. if ($response->getRedirectCount() && File::isProtected($response->getEffectiveUrl())) {
  123. $return_url = $response->redirUrls[$response->getRedirectCount() - 1];
  124. } else {
  125. $return_url = $response->getEffectiveUrl();
  126. }
  127. $ret = array('code' => $response->getStatus()
  128. , 'redirects' => $response->getRedirectCount()
  129. , 'url' => $return_url);
  130. $type = $response->getHeader('Content-Type');
  131. if ($type) {
  132. $ret['type'] = $type;
  133. }
  134. if ($protected) {
  135. $ret['protected'] = true;
  136. }
  137. $size = $response->getHeader('Content-Length'); // @fixme bytes?
  138. if ($size) {
  139. $ret['size'] = $size;
  140. }
  141. $time = $response->getHeader('Last-Modified');
  142. if ($time) {
  143. $ret['time'] = strtotime($time);
  144. }
  145. return $ret;
  146. }
  147. /**
  148. * Check if this URL is a redirect and return redir info.
  149. * If a File record is present for this URL, it is not considered a redirect.
  150. * If a File_redirection record is present for this URL, the recorded target is returned.
  151. *
  152. * If no File or File_redirect record is present, the URL is hit and any
  153. * redirects are followed, up to 10 levels or until a protected URL is
  154. * reached.
  155. *
  156. * @param string $in_url
  157. * @param boolean $discover true to attempt dereferencing the redirect if we don't know it already
  158. * @return File_redirection
  159. */
  160. public static function where($in_url, $discover = true)
  161. {
  162. $redir = new File_redirection();
  163. $redir->url = $in_url;
  164. $redir->urlhash = File::hashurl($redir->url);
  165. $redir->redirections = 0;
  166. try {
  167. $r = File_redirection::getByUrl($in_url);
  168. try {
  169. $f = File::getByID($r->file_id);
  170. $r->file = $f;
  171. $r->redir_url = $f->url;
  172. } catch (NoResultException $e) {
  173. // Invalid entry, delete and run again
  174. common_log(
  175. LOG_ERR,
  176. 'Could not find File with id=' . $r->file_id . ' referenced in File_redirection, deleting File redirection entry and and trying again...'
  177. );
  178. $r->delete();
  179. return self::where($in_url);
  180. }
  181. // File_redirecion and File record found, return both
  182. return $r;
  183. } catch (NoResultException $e) {
  184. // File_redirecion record not found, but this might be a direct link to a file
  185. try {
  186. $f = File::getByUrl($in_url);
  187. $redir->file_id = $f->id;
  188. $redir->file = $f;
  189. return $redir;
  190. } catch (NoResultException $e) {
  191. // nope, this was not a direct link to a file either, let's keep going
  192. }
  193. }
  194. if ($discover) {
  195. // try to follow redirects and get the final url
  196. $redir_info = File_redirection::lookupWhere($in_url);
  197. if (is_string($redir_info)) {
  198. $redir_info = array('url' => $redir_info);
  199. }
  200. // the last url in the redirection chain can actually be a redirect!
  201. // this is the case with local /attachment/{file_id} links
  202. // in that case we have the file id already
  203. try {
  204. $r = File_redirection::getByUrl($redir_info['url']);
  205. $f = File::getKV('id', $r->file_id);
  206. if ($f instanceof File) {
  207. $redir->file = $f;
  208. $redir->redir_url = $f->url;
  209. } else {
  210. // Invalid entry in File_redirection, delete and run again
  211. common_log(
  212. LOG_ERR,
  213. 'Could not find File with id=' . $r->file_id . ' referenced in File_redirection, deleting File_redirection entry and trying again...'
  214. );
  215. $r->delete();
  216. return self::where($in_url);
  217. }
  218. } catch (NoResultException $e) {
  219. // save the file now when we know that we don't have it in File_redirection
  220. try {
  221. $redir->file = File::saveNew($redir_info, $redir_info['url']);
  222. } catch (ServerException $e) {
  223. common_log(LOG_ERR, $e);
  224. }
  225. }
  226. // If this is a redirection and we have a file to redirect to, save it
  227. // (if it doesn't exist in File_redirection already)
  228. if ($redir->file instanceof File && $redir_info['url'] != $in_url) {
  229. try {
  230. $file_redir = File_redirection::getByUrl($in_url);
  231. } catch (NoResultException $e) {
  232. $file_redir = new File_redirection();
  233. $file_redir->urlhash = File::hashurl($in_url);
  234. $file_redir->url = $in_url;
  235. $file_redir->file_id = $redir->file->getID();
  236. $file_redir->insert();
  237. $file_redir->redir_url = $redir->file->url;
  238. }
  239. $file_redir->file = $redir->file;
  240. return $file_redir;
  241. }
  242. }
  243. return $redir;
  244. }
  245. /**
  246. * Shorten a URL with the current user's configured shortening
  247. * options, if applicable.
  248. *
  249. * If it cannot be shortened or the "short" URL is longer than the
  250. * original, the original is returned.
  251. *
  252. * If the referenced item has not been seen before, embedding data
  253. * may be saved.
  254. *
  255. * @param string $long_url
  256. * @param User $user whose shortening options to use; defaults to the current web session user
  257. * @return string
  258. */
  259. public static function makeShort($long_url, $user = null)
  260. {
  261. $canon = File_redirection::_canonUrl($long_url);
  262. $short_url = File_redirection::_userMakeShort($canon, $user);
  263. // Did we get one? Is it shorter?
  264. return !empty($short_url) ? $short_url : $long_url;
  265. }
  266. /**
  267. * Shorten a URL with the current user's configured shortening
  268. * options, if applicable.
  269. *
  270. * If it cannot be shortened or the "short" URL is longer than the
  271. * original, the original is returned.
  272. *
  273. * If the referenced item has not been seen before, embedding data
  274. * may be saved.
  275. *
  276. * @param string $long_url
  277. * @return string
  278. */
  279. public static function forceShort($long_url, $user)
  280. {
  281. $canon = File_redirection::_canonUrl($long_url);
  282. $short_url = File_redirection::_userMakeShort($canon, $user, true);
  283. // Did we get one? Is it shorter?
  284. return !empty($short_url) ? $short_url : $long_url;
  285. }
  286. public static function _userMakeShort($long_url, User $user = null, $force = false)
  287. {
  288. $short_url = common_shorten_url($long_url, $user, $force);
  289. if (!empty($short_url) && $short_url != $long_url) {
  290. $short_url = (string)$short_url;
  291. // store it
  292. try {
  293. $file = File::getByUrl($long_url);
  294. } catch (NoResultException $e) {
  295. // Check if the target URL is itself a redirect...
  296. // This should already have happened in processNew in common_shorten_url()
  297. $redir = File_redirection::where($long_url);
  298. $file = $redir->file;
  299. }
  300. // Now we definitely have a File object in $file
  301. try {
  302. $file_redir = File_redirection::getByUrl($short_url);
  303. } catch (NoResultException $e) {
  304. $file_redir = new File_redirection();
  305. $file_redir->urlhash = File::hashurl($short_url);
  306. $file_redir->url = $short_url;
  307. $file_redir->file_id = $file->getID();
  308. $file_redir->insert();
  309. }
  310. return $short_url;
  311. }
  312. return null;
  313. }
  314. /**
  315. * Basic attempt to canonicalize a URL, cleaning up some standard variants
  316. * such as funny syntax or a missing path. Used internally when cleaning
  317. * up URLs for storage and following redirect chains.
  318. *
  319. * Note that despite being on File_redirect, this function DOES NOT perform
  320. * any dereferencing of redirects.
  321. *
  322. * @param string $in_url input URL
  323. * @param string $default_scheme if given a bare link; defaults to 'http://'
  324. * @return string
  325. */
  326. public static function _canonUrl($in_url, $default_scheme = 'http://')
  327. {
  328. if (empty($in_url)) {
  329. return false;
  330. }
  331. $out_url = $in_url;
  332. $p = parse_url($out_url);
  333. if (empty($p['host']) || empty($p['scheme'])) {
  334. list($scheme) = explode(':', $in_url, 2);
  335. switch (strtolower($scheme)) {
  336. case 'fax':
  337. case 'tel':
  338. $out_url = str_replace('.-()', '', $out_url);
  339. break;
  340. // non-HTTP schemes, so no redirects
  341. case 'bitcoin':
  342. case 'mailto':
  343. case 'aim':
  344. case 'jabber':
  345. case 'xmpp':
  346. // don't touch anything
  347. break;
  348. // URLs without domain name, so no redirects
  349. case 'magnet':
  350. // don't touch anything
  351. break;
  352. // URLs with coordinates, not browsable domain names
  353. case 'geo':
  354. // don't touch anything
  355. break;
  356. default:
  357. $out_url = $default_scheme . ltrim($out_url, '/');
  358. $p = parse_url($out_url);
  359. if (empty($p['scheme'])) {
  360. return false;
  361. }
  362. break;
  363. }
  364. }
  365. if (('ftp' == $p['scheme']) || ('ftps' == $p['scheme']) || ('http' == $p['scheme']) || ('https' == $p['scheme'])) {
  366. if (empty($p['host'])) {
  367. return false;
  368. }
  369. if (empty($p['path'])) {
  370. $out_url .= '/';
  371. }
  372. }
  373. return $out_url;
  374. }
  375. public static function saveNew($data, $file_id, $url)
  376. {
  377. $file_redir = new File_redirection;
  378. $file_redir->urlhash = File::hashurl($url);
  379. $file_redir->url = $url;
  380. $file_redir->file_id = $file_id;
  381. $file_redir->redirections = intval($data['redirects']);
  382. $file_redir->httpcode = intval($data['code']);
  383. $file_redir->insert();
  384. }
  385. public static function beforeSchemaUpdate()
  386. {
  387. $table = strtolower(get_called_class());
  388. $schema = Schema::get();
  389. $schemadef = $schema->getTableDef($table);
  390. // 2015-02-19 We have to upgrade our table definitions to have the urlhash field populated
  391. if (isset($schemadef['fields']['urlhash']) && in_array('urlhash', $schemadef['primary key'])) {
  392. // We already have the urlhash field, so no need to migrate it.
  393. return;
  394. }
  395. echo "\nFound old $table table, upgrading it to contain 'urlhash' field...";
  396. // We have to create a urlhash that is _not_ the primary key,
  397. // transfer data and THEN run checkSchema
  398. $schemadef['fields']['urlhash'] = [
  399. 'type' => 'varchar',
  400. 'length' => 64,
  401. 'not null' => true,
  402. 'description' => 'sha256 hash of the URL',
  403. ];
  404. $schemadef['fields']['url'] = [
  405. 'type' => 'text',
  406. 'description' => 'short URL (or any other kind of redirect) for file (id)',
  407. ];
  408. unset($schemadef['primary key']);
  409. $schema->ensureTable($table, $schemadef);
  410. echo "DONE.\n";
  411. $classname = ucfirst($table);
  412. $tablefix = new $classname;
  413. // urlhash is hash('sha256', $url) in the File table
  414. echo "Updating urlhash fields in $table table...";
  415. switch (common_config('db', 'type')) {
  416. case 'pgsql':
  417. $url_sha256 = 'encode(sha256(CAST("url" AS bytea)), \'hex\')';
  418. break;
  419. case 'mysql':
  420. $url_sha256 = 'sha2(`url`, 256)';
  421. break;
  422. default:
  423. throw new ServerException('Unknown DB type selected.');
  424. }
  425. $tablefix->query(sprintf(
  426. 'UPDATE %1$s SET urlhash = %2$s;',
  427. $tablefix->escapedTableName(),
  428. $url_sha256
  429. ));
  430. echo "DONE.\n";
  431. echo "Resuming core schema upgrade...";
  432. }
  433. public function getFile()
  434. {
  435. if (!$this->file instanceof File) {
  436. $this->file = File::getByID($this->file_id);
  437. }
  438. return $this->file;
  439. }
  440. }