util.php 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. <?php
  2. function linkback_lenient_target_match($body, $target) {
  3. return strpos(''.$body, str_replace(array('http://www.', 'http://', 'https://www.', 'https://'), '', preg_replace('/\/+$/', '', preg_replace( '/#.*/', '', $target))));
  4. }
  5. function linkback_get_source($source, $target) {
  6. // Check if we are pinging ourselves and ignore
  7. $localprefix = common_config('site', 'server') . '/' . common_config('site', 'path');
  8. if(linkback_lenient_target_match($source, $localprefix)) {
  9. common_debug('Ignoring self ping from ' . $source . ' to ' . $target);
  10. return NULL;
  11. }
  12. $request = HTTPClient::start();
  13. try {
  14. $response = $request->get($source);
  15. } catch(Exception $ex) {
  16. return NULL;
  17. }
  18. $body = htmlspecialchars_decode($response->getBody());
  19. // We're slightly more lenient in our link detection than the spec requires
  20. if(!linkback_lenient_target_match($body, $target)) {
  21. return NULL;
  22. }
  23. return $response;
  24. }
  25. function linkback_get_target($target) {
  26. // Resolve target (https://github.com/converspace/webmention/issues/43)
  27. $request = HTTPClient::start();
  28. try {
  29. $response = $request->head($target);
  30. } catch(Exception $ex) {
  31. return NULL;
  32. }
  33. try {
  34. $notice = Notice::fromUri($response->getEffectiveUrl());
  35. } catch(UnknownUriException $ex) {
  36. preg_match('/\/notice\/(\d+)(?:#.*)?$/', $response->getEffectiveUrl(), $match);
  37. $notice = Notice::getKV('id', $match[1]);
  38. }
  39. if($notice instanceof Notice && $notice->isLocal()) {
  40. return $notice;
  41. } else {
  42. $user = User::getKV('uri', $response->getEffectiveUrl());
  43. if(!$user) {
  44. preg_match('/\/user\/(\d+)(?:#.*)?$/', $response->getEffectiveUrl(), $match);
  45. $user = User::getKV('id', $match[1]);
  46. }
  47. if(!$user) {
  48. preg_match('/\/([^\/\?#]+)(?:#.*)?$/', $response->getEffectiveUrl(), $match);
  49. if(linkback_lenient_target_match(common_profile_url($match[1]), $response->getEffectiveUrl())) {
  50. $user = User::getKV('nickname', $match[1]);
  51. }
  52. }
  53. if($user instanceof User) {
  54. return $user;
  55. }
  56. }
  57. return NULL;
  58. }
  59. function linkback_is_contained_in($entry, $target) {
  60. foreach ((array)$entry['properties'] as $key => $values) {
  61. if(count(array_filter($values, function($x) use ($target) { return linkback_lenient_target_match($x, $target); })) > 0) {
  62. return $entry['properties'];
  63. }
  64. // check included h-* formats and their links
  65. foreach ($values as $obj) {
  66. if(isset($obj['type']) && array_intersect(array('h-cite', 'h-entry'), $obj['type']) &&
  67. isset($obj['properties']) && isset($obj['properties']['url']) &&
  68. count(array_filter($obj['properties']['url'],
  69. function($x) use ($target) { return linkback_lenient_target_match($x, $target); })) > 0
  70. ) {
  71. return $entry['properties'];
  72. }
  73. }
  74. // check content for the link
  75. if ($key == "content" && preg_match_all("/<a[^>]+?".preg_quote($target, "/")."[^>]*>([^>]+?)<\/a>/i", htmlspecialchars_decode($values[0]['html']), $context)) {
  76. return $entry['properties'];
  77. // check summary for the link
  78. } elseif ($key == "summary" && preg_match_all("/<a[^>]+?".preg_quote($target, "/")."[^>]*>([^>]+?)<\/a>/i", htmlspecialchars_decode($values[0]), $context)) {
  79. return $entry['properties'];
  80. }
  81. }
  82. foreach((array)$entry['children'] as $mf2) {
  83. if(linkback_is_contained_in($mf2, $target)) {
  84. return $entry['properties'];
  85. }
  86. }
  87. return null;
  88. }
  89. // Based on https://github.com/acegiak/Semantic-Linkbacks/blob/master/semantic-linkbacks-microformats-handler.php, GPL-2.0+
  90. function linkback_find_entry($mf2, $target) {
  91. if(isset($mf2['items'][0]['type']) && in_array("h-feed", $mf2['items'][0]["type"]) && isset($mf2['items'][0]['children'])) {
  92. $mf2['items'] = $mf2['items'][0]['children'];
  93. }
  94. $entries = array_filter($mf2['items'], function($x) { return isset($x['type']) && in_array('h-entry', $x['type']); });
  95. foreach ($entries as $entry) {
  96. if($prop = linkback_is_contained_in($entry, $target)) {
  97. return $prop;
  98. }
  99. }
  100. // Default to first one
  101. if(count($entries) > 0) {
  102. return $entries[0]['properties'];
  103. }
  104. return NULL;
  105. }
  106. function linkback_entry_type($entry, $mf2, $target) {
  107. if(!$entry) { return 'mention'; }
  108. if($mf2['rels'] && $mf2['rels']['in-reply-to']) {
  109. foreach($mf2['rels']['in-reply-to'] as $url) {
  110. if(linkback_lenient_target_match($url, $target)) {
  111. return 'reply';
  112. }
  113. }
  114. }
  115. $classes = array(
  116. 'in-reply-to' => 'reply',
  117. 'repost-of' => 'repost',
  118. 'like-of' => 'like',
  119. 'tag-of' => 'tag'
  120. );
  121. foreach((array)$entry as $key => $values) {
  122. if(count(array_filter($values, function($x) use ($target) { return linkback_lenient_target_match($x, $target); })) > 0) {
  123. if($classes[$key]) { return $classes[$key]; }
  124. }
  125. foreach ($values as $obj) {
  126. if(isset($obj['type']) && array_intersect(array('h-cite', 'h-entry'), $obj['type']) &&
  127. isset($obj['properties']) && isset($obj['properties']['url']) &&
  128. count(array_filter($obj['properties']['url'],
  129. function($x) use ($target) { return linkback_lenient_target_match($x, $target); })) > 0
  130. ) {
  131. if($classes[$key]) { return $classes[$key]; }
  132. }
  133. }
  134. }
  135. return 'mention';
  136. }
  137. function linkback_is_dupe($key, $url) {
  138. $dupe = Notice::getKV($key, $url);
  139. if ($dupe instanceof Notice) {
  140. return $dupe;
  141. }
  142. return false;
  143. }
  144. function linkback_hcard($mf2, $url) {
  145. if(empty($mf2['items'])) {
  146. return null;
  147. }
  148. $hcards = array();
  149. foreach($mf2['items'] as $item) {
  150. if(!in_array('h-card', $item['type'])) {
  151. continue;
  152. }
  153. // We found a match, return it immediately
  154. if(isset($item['properties']['url']) && in_array($url, $item['properties']['url'])) {
  155. return $item['properties'];
  156. }
  157. // Let's keep all the hcards for later, to return one of them at least
  158. $hcards[] = $item['properties'];
  159. }
  160. // No match immediately for the url we expected, but there were h-cards found
  161. if (count($hcards) > 0) {
  162. return $hcards[0];
  163. }
  164. return null;
  165. }
  166. function linkback_notice($source, $notice_or_user, $entry, $author, $mf2) {
  167. $content = $entry['content'] ? $entry['content'][0]['html'] :
  168. ($entry['summary'] ? $entry['sumary'][0] : $entry['name'][0]);
  169. $rendered = common_purify($content);
  170. if($notice_or_user instanceof Notice && $entry['type'] == 'mention') {
  171. $name = $entry['name'] ? $entry['name'][0] : substr(common_strip_html($content), 0, 20).'…';
  172. $rendered = _m('linked to this from <a href="'.htmlspecialchars($source).'">'.htmlspecialchars($name).'</a>');
  173. }
  174. $content = common_strip_html($rendered);
  175. $shortened = common_shorten_links($content);
  176. if(Notice::contentTooLong($shortened)) {
  177. $content = substr($content,
  178. 0,
  179. Notice::maxContent() - (mb_strlen($source) + 2));
  180. $rendered = $content . '<a href="'.htmlspecialchars($source).'">…</a>';
  181. $content .= ' ' . $source;
  182. }
  183. $options = array('is_local' => Notice::REMOTE,
  184. 'url' => $entry['url'][0],
  185. 'uri' => $entry['url'][0],
  186. 'rendered' => $rendered,
  187. 'replies' => array(),
  188. 'groups' => array(),
  189. 'peopletags' => array(),
  190. 'tags' => array(),
  191. 'urls' => array());
  192. if($notice_or_user instanceof User) {
  193. $options['replies'][] = $notice_or_user->getUri();
  194. } else {
  195. if($entry['type'] == 'repost') {
  196. $options['repeat_of'] = $notice_or_user->id;
  197. } else {
  198. $options['reply_to'] = $notice_or_user->id;
  199. }
  200. }
  201. if($entry['published'] || $entry['updated']) {
  202. $options['created'] = $entry['published'] ? common_sql_date($entry['published'][0]) : common_sql_date($entry['updated'][0]);
  203. }
  204. if($entry['photo']) {
  205. $options['urls'][] = $entry['photo'][0];
  206. }
  207. foreach((array)$entry['category'] as $tag) {
  208. $tag = common_canonical_tag($tag);
  209. if($tag) { $options['tags'][] = $tag; }
  210. }
  211. if($mf2['rels'] && $mf2['rels']['enclosure']) {
  212. foreach($mf2['rels']['enclosure'] as $url) {
  213. $options['urls'][] = $url;
  214. }
  215. }
  216. if($mf2['rels'] && $mf2['rels']['tag']) {
  217. foreach($mf2['rels']['tag'] as $url) {
  218. preg_match('/\/([^\/]+)\/*$/', $url, $match);
  219. $tag = common_canonical_tag($match[1]);
  220. if($tag) { $options['tags'][] = $tag; }
  221. }
  222. }
  223. if($entry['type'] != 'reply' && $entry['type'] != 'repost') {
  224. $options['urls'] = array();
  225. }
  226. return array($content, $options);
  227. }
  228. function linkback_profile($entry, $mf2, $response, $target) {
  229. if(isset($entry['properties']['author']) && isset($entry['properties']['author'][0]['properties'])) {
  230. $author = $entry['properties']['author'][0]['properties'];
  231. } else {
  232. $author = linkback_hcard($mf2, $response->getEffectiveUrl());
  233. }
  234. if(!$author) {
  235. $author = array('name' => array($entry['name']));
  236. }
  237. if(!$author['url']) {
  238. $author['url'] = array($response->getEffectiveUrl());
  239. }
  240. $user = User::getKV('uri', $author['url'][0]);
  241. if ($user instanceof User) {
  242. common_log(LOG_INFO, "Linkback: ignoring linkback from local user: $url");
  243. return true;
  244. }
  245. try {
  246. $profile = Profile::fromUri($author['url'][0]);
  247. } catch(UnknownUriException $ex) {}
  248. if(!($profile instanceof Profile)) {
  249. $profile = Profile::getKV('profileurl', $author['url'][0]);
  250. }
  251. if(!($profile instanceof Profile)) {
  252. $profile = new Profile();
  253. $profile->profileurl = $author['url'][0];
  254. $profile->fullname = $author['name'][0];
  255. $profile->nickname = $author['nickname'] ? $author['nickname'][0] : str_replace(' ', '', $author['name'][0]);
  256. $profile->created = common_sql_now();
  257. $profile->insert();
  258. }
  259. return array($profile, $author);
  260. }
  261. function linkback_save($source, $target, $response, $notice_or_user) {
  262. $dupe = linkback_is_dupe('uri', $response->getEffectiveUrl());
  263. if(!$dupe) { $dupe = linkback_is_dupe('url', $response->getEffectiveUrl()); }
  264. if(!$dupe) { $dupe = linkback_is_dupe('uri', $source); }
  265. if(!$dupe) { $dupe = linkback_is_dupe('url', $source); }
  266. $mf2 = new Mf2\Parser($response->getBody(), $response->getEffectiveUrl());
  267. $mf2 = $mf2->parse();
  268. $entry = linkback_find_entry($mf2, $target);
  269. if(!$entry) {
  270. preg_match('/<title>([^<]+)', $response->getBody(), $match);
  271. $entry = array(
  272. 'content' => array('html' => $response->getBody()),
  273. 'name' => $match[1] ? htmlspecialchars_decode($match[1]) : $source
  274. );
  275. }
  276. if(!$entry['url']) {
  277. $entry['url'] = array($response->getEffectiveUrl());
  278. }
  279. if(!$dupe) { $dupe = linkback_is_dupe('uri', $entry['url'][0]); }
  280. if(!$dupe) { $dupe = linkback_is_dupe('url', $entry['url'][0]); }
  281. $entry['type'] = linkback_entry_type($entry, $mf2, $target);
  282. list($profile, $author) = linkback_profile($entry, $mf2, $response, $target);
  283. list($content, $options) = linkback_notice($source, $notice_or_user, $entry, $author, $mf2);
  284. if($dupe) {
  285. $orig = clone($dupe);
  286. try {
  287. // Ignore duplicate save error
  288. try { $dupe->saveKnownReplies($options['replies']); } catch (ServerException $ex) {}
  289. try { $dupe->saveKnownTags($options['tags']); } catch (ServerException $ex) {}
  290. try { $dupe->saveKnownUrls($options['urls']); } catch (ServerException $ex) {}
  291. if($options['reply_to']) { $dupe->reply_to = $options['reply_to']; }
  292. if($options['repeat_of']) { $dupe->repeat_of = $options['repeat_of']; }
  293. if($dupe->reply_to != $orig->reply_to || $dupe->repeat_of != $orig->repeat_of) {
  294. $parent = Notice::getKV('id', $dupe->repost_of ? $dupe->repost_of : $dupe->reply_to);
  295. if($parent instanceof Notice) {
  296. // If we changed the reply_to or repeat_of we might live in a new conversation now
  297. $dupe->conversation = $parent->conversation;
  298. }
  299. }
  300. if($dupe->update($orig)) { $saved = $dupe; }
  301. if($dupe->conversation != $orig->conversation && Conversation::noticeCount($orig->conversation) < 1) {
  302. // Delete empty conversation
  303. $emptyConversation = Conversation::getKV('id', $orig->conversation);
  304. $emptyConversation->delete();
  305. }
  306. } catch (Exception $e) {
  307. common_log(LOG_ERR, "Linkback update of remote message $source failed: " . $e->getMessage());
  308. return false;
  309. }
  310. common_log(LOG_INFO, "Linkback updated remote message $source as notice id $saved->id");
  311. } else if($entry['type'] == 'like' || ($entry['type'] == 'reply' && $entry['rsvp'])) {
  312. $act = new Activity();
  313. $act->type = ActivityObject::ACTIVITY;
  314. $act->time = $options['created'] ? strtotime($options['created']) : time();
  315. $act->title = $entry["name"] ? $entry["name"][0] : _m("Favor");
  316. $act->actor = $profile->asActivityObject();
  317. $act->target = $notice_or_user->asActivityObject();
  318. $act->objects = array(clone($act->target));
  319. // TRANS: Message that is the "content" of a favorite (%1$s is the actor's nickname, %2$ is the favorited
  320. // notice's nickname and %3$s is the content of the favorited notice.)
  321. $act->content = sprintf(_('%1$s favorited something by %2$s: %3$s'),
  322. $profile->getNickname(), $notice_or_user->getProfile()->getNickname(),
  323. $notice_or_user->getRendered());
  324. if($entry['rsvp']) {
  325. $act->content = $options['rendered'];
  326. }
  327. $act->verb = ActivityVerb::FAVORITE;
  328. if(strtolower($entry['rsvp'][0]) == 'yes') {
  329. $act->verb = 'http://activitystrea.ms/schema/1.0/rsvp-yes';
  330. } else if(strtolower($entry['rsvp'][0]) == 'no') {
  331. $act->verb = 'http://activitystrea.ms/schema/1.0/rsvp-no';
  332. } else if(strtolower($entry['rsvp'][0]) == 'maybe') {
  333. $act->verb = 'http://activitystrea.ms/schema/1.0/rsvp-maybe';
  334. }
  335. $act->id = $source;
  336. $act->link = $entry['url'][0];
  337. $options['source'] = 'linkback';
  338. $options['mentions'] = $options['replies'];
  339. unset($options['reply_to']);
  340. unset($options['repeat_of']);
  341. try {
  342. $saved = Notice::saveActivity($act, $profile, $options);
  343. } catch (Exception $e) {
  344. common_log(LOG_ERR, "Linkback save of remote message $source failed: " . $e->getMessage());
  345. return false;
  346. }
  347. common_log(LOG_INFO, "Linkback saved remote message $source as notice id $saved->id");
  348. } else {
  349. // Fallback is to make a notice manually
  350. try {
  351. $saved = Notice::saveNew($profile->id,
  352. $content,
  353. 'linkback',
  354. $options);
  355. } catch (Exception $e) {
  356. common_log(LOG_ERR, "Linkback save of remote message $source failed: " . $e->getMessage());
  357. return false;
  358. }
  359. common_log(LOG_INFO, "Linkback saved remote message $source as notice id $saved->id");
  360. }
  361. return $saved->getLocalUrl();
  362. }