LinkbackPlugin.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. <?php
  2. /**
  3. * StatusNet, the distributed open-source microblogging tool
  4. *
  5. * Plugin to do linkbacks for notices containing links
  6. *
  7. * PHP version 5
  8. *
  9. * LICENCE: This program is free software: you can redistribute it and/or modify
  10. * it under the terms of the GNU Affero General Public License as published by
  11. * the Free Software Foundation, either version 3 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU Affero General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Affero General Public License
  20. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. *
  22. * @category Plugin
  23. * @package StatusNet
  24. * @author Evan Prodromou <evan@status.net>
  25. * @copyright 2009 StatusNet, Inc.
  26. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  27. * @link http://status.net/
  28. */
  29. if (!defined('STATUSNET')) {
  30. exit(1);
  31. }
  32. require_once(__DIR__ . '/lib/util.php');
  33. define('LINKBACKPLUGIN_VERSION', '0.2');
  34. /**
  35. * Plugin to do linkbacks for notices containing URLs
  36. *
  37. * After new notices are saved, we check their text for URLs. If there
  38. * are URLs, we test each URL to see if it supports any
  39. *
  40. * @category Plugin
  41. * @package StatusNet
  42. * @author Evan Prodromou <evan@status.net>
  43. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  44. * @link http://status.net/
  45. *
  46. * @see Event
  47. */
  48. class LinkbackPlugin extends Plugin
  49. {
  50. var $notice = null;
  51. function __construct()
  52. {
  53. parent::__construct();
  54. }
  55. function onHandleQueuedNotice(Notice $notice)
  56. {
  57. if (!$notice->isLocal() || !$notice->isPublic()) {
  58. return true;
  59. }
  60. // Try to avoid actually mucking with the
  61. // notice content
  62. $c = $notice->content;
  63. $this->notice = $notice;
  64. if (!$notice->getProfile()->getPref('linkbackplugin', 'disable_linkbacks')) {
  65. // Ignoring results
  66. common_replace_urls_callback($c, array($this, 'linkbackUrl'));
  67. }
  68. try {
  69. if ($notice->isRepeat()) {
  70. $repeat = Notice::getByID($notice->repeat_of);
  71. $this->linkbackUrl($repeat->getUrl());
  72. } elseif (!empty($notice->reply_to)) {
  73. $parent = $notice->getParent();
  74. $this->linkbackUrl($parent->getUrl());
  75. }
  76. } catch (InvalidUrlException $e) {
  77. // can't send linkback to notice if we don't have a remote HTTP(S) URL
  78. // but we can still ping the attention-receivers below
  79. } catch (NoParentNoticeException $e) {
  80. // can't send linkback to non-existing parent URL
  81. return true;
  82. }
  83. // doubling up getReplies and getAttentionProfileIDs because we're not entirely migrated yet
  84. $replyProfiles = Profile::multiGet('id', array_unique(array_merge($notice->getReplies(), $notice->getAttentionProfileIDs())));
  85. foreach ($replyProfiles->fetchAll('profileurl') as $profileurl) {
  86. if (common_valid_http_url($profileurl)) {
  87. $this->linkbackUrl($profileurl);
  88. }
  89. }
  90. return true;
  91. }
  92. function unparse_url($parsed_url)
  93. {
  94. $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
  95. $host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
  96. $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
  97. $user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
  98. $pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
  99. $pass = ($user || $pass) ? "$pass@" : '';
  100. $path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
  101. $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
  102. $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
  103. return "$scheme$user$pass$host$port$path$query$fragment";
  104. }
  105. function linkbackUrl($url)
  106. {
  107. common_log(LOG_DEBUG,"Attempting linkback for " . $url);
  108. $orig = $url;
  109. $url = htmlspecialchars_decode($orig);
  110. $base = parse_url($url);
  111. if (!in_array($base['scheme'], array('http', 'https'))) {
  112. return $orig;
  113. }
  114. // XXX: Do a HEAD first to save some time/bandwidth
  115. try {
  116. $httpclient = new HTTPClient();
  117. $response = $httpclient->get($url, ["User-Agent: {$this->userAgent()}",
  118. "Accept: application/html+xml,text/html"]);
  119. if (!in_array($response->getStatus(), array(200, 206))) {
  120. throw new Exception('Invalid response code for GET request');
  121. }
  122. } catch (Exception $e) {
  123. // something didn't work out in our GET request
  124. return $orig;
  125. }
  126. $wm = $this->getWebmention($response);
  127. if(!is_null($wm)) {
  128. $wm = parse_url($wm);
  129. if(!$wm) $wm = array();
  130. if(!$wm['host']) $wm['host'] = $base['host'];
  131. if(!$wm['scheme']) $wm['scheme'] = $base['scheme'];
  132. if(!$wm['path']) $wm['path'] = $base['path'];
  133. // It is the webmention receiver's job to resolve source
  134. // Ref: https://github.com/converspace/webmention/issues/43
  135. $this->webmention($url, $this->unparse_url($wm));
  136. } else {
  137. $pb = $this->getPingback($response);
  138. if (!empty($pb)) {
  139. // Pingback still looks for exact URL in our source, so we
  140. // must send what we have
  141. $this->pingback($url, $pb);
  142. } else {
  143. $tb = $this->getTrackback($response);
  144. if (!empty($tb)) {
  145. $this->trackback($response->getEffectiveUrl(), $tb);
  146. }
  147. }
  148. }
  149. return $orig;
  150. }
  151. // Based on https://github.com/indieweb/mention-client-php
  152. // which is licensed Apache 2.0
  153. function getWebmention(HTTP_Request2_Response $response) {
  154. $link = $response->getHeader('Link');
  155. if (!is_null($link)) {
  156. // XXX: the fetcher gives back a comma-separated string of all Link headers, I hope the parsing works reliably
  157. if (preg_match('~<([^>]+)>; rel="?(?:[^" ]* )*(?:http://webmention.org/|webmention)(?: [^" ]*)*"?~', $link, $match)) {
  158. return $match[1];
  159. }
  160. }
  161. // FIXME: Do proper DOM traversal
  162. // Currently fails https://webmention.rocks/test/13, https://webmention.rocks/test/17
  163. if(preg_match('~<(?:link|a)[ ]+href="([^"]*)"[ ]+rel="(?:[^" ]* )*(?:http://webmention.org/|webmention)(?: [^" ]*)*"[ ]*/?>~i', $response->getBody(), $match)
  164. || preg_match('~<(?:link|a)[ ]+rel="(?:[^" ]* )*(?:http://webmention.org/|webmention)(?: [^" ]*)*"[ ]+href="([^"]*)"[ ]*/?>~i', $response->getBody(), $match)) {
  165. return $match[1];
  166. }
  167. return NULL;
  168. }
  169. function webmention($url, $endpoint) {
  170. $source = $this->notice->getUrl();
  171. common_log(LOG_DEBUG,"Attempting webmention to $endpoint for $url from $source");
  172. $payload = array(
  173. 'source' => $source,
  174. 'target' => $url
  175. );
  176. $request = HTTPClient::start();
  177. try {
  178. $response = $request->post($endpoint,
  179. array(
  180. 'Content-type: application/x-www-form-urlencoded',
  181. 'Accept: application/json'
  182. ),
  183. $payload
  184. );
  185. if(!in_array($response->getStatus(), array(200,201,202))) {
  186. common_log(LOG_WARNING,
  187. "Webmention request failed for '$url' ($endpoint)");
  188. }
  189. } catch (Exception $e) {
  190. common_log(LOG_WARNING, "Webmention request failed for '{$url}' ({$endpoint}): {$e->getMessage()}");
  191. }
  192. }
  193. function getPingback(HTTP_Request2_Response $response) {
  194. if ($response->getHeader('X-Pingback')) {
  195. return $response->getHeader('X-Pingback');
  196. } elseif (preg_match('/<(?:link|a)[ ]+href="([^"]+)"[ ]+rel="[^" ]* ?pingback ?[^" ]*"[ ]*\/?>/i', $response->getBody(), $match)
  197. || preg_match('/<(?:link|a)[ ]+rel="[^" ]* ?pingback ?[^" ]*"[ ]+href="([^"]+)"[ ]*\/?>/i', $response->getBody(), $match)) {
  198. return $match[1];
  199. }
  200. }
  201. function pingback($url, $endpoint)
  202. {
  203. $args = array($this->notice->getUrl(), $url);
  204. if (!extension_loaded('xmlrpc')) {
  205. if (!dl('xmlrpc.so')) {
  206. common_log(LOG_ERR, "Can't pingback; xmlrpc extension not available.");
  207. return;
  208. }
  209. }
  210. $request = HTTPClient::start();
  211. try {
  212. $request->setBody(xmlrpc_encode_request('pingback.ping', $args));
  213. $response = $request->post($endpoint,
  214. array('Content-Type: text/xml'),
  215. false);
  216. $response = xmlrpc_decode($response->getBody());
  217. if (xmlrpc_is_fault($response)) {
  218. common_log(LOG_WARNING,
  219. "Pingback error for '$url' ($endpoint): ".
  220. "$response[faultString] ($response[faultCode])");
  221. } else {
  222. common_log(LOG_INFO,
  223. "Pingback success for '$url' ($endpoint): ".
  224. "'$response'");
  225. }
  226. } catch (Exception $e) {
  227. common_log(LOG_WARNING, "Pingback request failed for '{$url}' ({$endpoint}): {$e->getMessage()}");
  228. }
  229. }
  230. // Largely cadged from trackback_cls.php by
  231. // Ran Aroussi <ran@blogish.org>, GPL2 or any later version
  232. // http://phptrackback.sourceforge.net/
  233. function getTrackback(HTTP_Request2_Response $response)
  234. {
  235. $text = $response->getBody();
  236. $url = $response->getEffectiveUrl();
  237. if (preg_match_all('/(<rdf:RDF.*?<\/rdf:RDF>)/sm', $text, $match, PREG_SET_ORDER)) {
  238. for ($i = 0; $i < count($match); $i++) {
  239. if (preg_match('|dc:identifier="' . preg_quote($url) . '"|ms', $match[$i][1])) {
  240. $rdf_array[] = trim($match[$i][1]);
  241. }
  242. }
  243. // Loop through the RDFs array and extract trackback URIs
  244. $tb_array = array(); // <- holds list of trackback URIs
  245. if (!empty($rdf_array)) {
  246. for ($i = 0; $i < count($rdf_array); $i++) {
  247. if (preg_match('/trackback:ping="([^"]+)"/', $rdf_array[$i], $array)) {
  248. $tb_array[] = trim($array[1]);
  249. break;
  250. }
  251. }
  252. }
  253. // Return Trackbacks
  254. if (empty($tb_array)) {
  255. return null;
  256. } else {
  257. return $tb_array[0];
  258. }
  259. }
  260. if (preg_match_all('/(<a[^>]*?rel=[\'"]trackback[\'"][^>]*?>)/', $text, $match)) {
  261. foreach ($match[1] as $atag) {
  262. if (preg_match('/href=[\'"]([^\'"]*?)[\'"]/', $atag, $url)) {
  263. return $url[1];
  264. }
  265. }
  266. }
  267. return null;
  268. }
  269. function trackback($url, $endpoint)
  270. {
  271. $profile = $this->notice->getProfile();
  272. // TRANS: Trackback title.
  273. // TRANS: %1$s is a profile nickname, %2$s is a timestamp.
  274. $args = array('title' => sprintf(_m('%1$s\'s status on %2$s'),
  275. $profile->getNickname(),
  276. common_exact_date($this->notice->getCreated())),
  277. 'excerpt' => $this->notice->getContent(),
  278. 'url' => $this->notice->getUrl(),
  279. 'blog_name' => $profile->getNickname());
  280. try {
  281. $httpclient = new HTTPClient(null, HTTPClient::METHOD_POST);
  282. $response = $httpclient->post($endpoint, ["User-Agent: {$this->userAgent()}"], $args);
  283. if ($response->getStatus() === 200) {
  284. common_log(LOG_INFO, "Trackback success for '$url' ($endpoint): "._ve($response->getBody()));
  285. } else {
  286. common_log(LOG_WARNING, "Trackback error for '$url' ($endpoint): "._ve($response->getBody()));
  287. }
  288. } catch (Exception $e) {
  289. common_log(LOG_INFO, "Trackback error for '$url' ($endpoint): "._ve($e->getMessage()));
  290. }
  291. }
  292. public function onRouterInitialized(URLMapper $m)
  293. {
  294. $m->connect('main/linkback/webmention',
  295. ['action' => 'webmention']);
  296. $m->connect('main/linkback/pingback',
  297. ['action' => 'pingback']);
  298. }
  299. public function onStartShowHTML($action)
  300. {
  301. header('Link: <' . common_local_url('webmention') . '>; rel="webmention"', false);
  302. header('X-Pingback: ' . common_local_url('pingback'));
  303. }
  304. public function version()
  305. {
  306. return LINKBACKPLUGIN_VERSION;
  307. }
  308. function onPluginVersion(array &$versions)
  309. {
  310. $versions[] = array('name' => 'Linkback',
  311. 'version' => LINKBACKPLUGIN_VERSION,
  312. 'author' => 'Evan Prodromou',
  313. 'homepage' => 'https://git.gnu.io/gnu/gnu-social/tree/master/plugins/Linkback',
  314. 'rawdescription' =>
  315. // TRANS: Plugin description.
  316. _m('Notify blog authors when their posts have been linked in '.
  317. 'microblog notices using '.
  318. '<a href="http://www.hixie.ch/specs/pingback/pingback">Pingback</a> '.
  319. 'or <a href="http://www.movabletype.org/docs/mttrackback.html">Trackback</a> protocols.'));
  320. return true;
  321. }
  322. public function onStartInitializeRouter(URLMapper $m)
  323. {
  324. $m->connect('settings/linkback', array('action' => 'linkbacksettings'));
  325. return true;
  326. }
  327. function onEndAccountSettingsNav($action)
  328. {
  329. $action_name = $action->trimmed('action');
  330. $action->menuItem(common_local_url('linkbacksettings'),
  331. // TRANS: OpenID plugin menu item on user settings page.
  332. _m('MENU', 'Send Linkbacks'),
  333. // TRANS: OpenID plugin tooltip for user settings menu item.
  334. _m('Opt-out of sending linkbacks.'),
  335. $action_name === 'linkbacksettings');
  336. return true;
  337. }
  338. function onStartNoticeSourceLink($notice, &$name, &$url, &$title)
  339. {
  340. // If we don't handle this, keep the event handler going
  341. if (!in_array($notice->source, array('linkback'))) {
  342. return true;
  343. }
  344. try {
  345. $url = $notice->getUrl();
  346. // If getUrl() throws exception, $url is never set
  347. $bits = parse_url($url);
  348. $domain = $bits['host'];
  349. if (substr($domain, 0, 4) == 'www.') {
  350. $name = substr($domain, 4);
  351. } else {
  352. $name = $domain;
  353. }
  354. // TRANS: Title. %s is a domain name.
  355. $title = sprintf(_m('Sent from %s via Linkback'), $domain);
  356. // Abort event handler, we have a name and URL!
  357. return false;
  358. } catch (InvalidUrlException $e) {
  359. // This just means we don't have the notice source data
  360. return true;
  361. }
  362. }
  363. }