activityutils.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. <?php
  2. /**
  3. * StatusNet, the distributed open-source microblogging tool
  4. *
  5. * An activity
  6. *
  7. * PHP version 5
  8. *
  9. * LICENCE: This program is free software: you can redistribute it and/or modify
  10. * it under the terms of the GNU Affero General Public License as published by
  11. * the Free Software Foundation, either version 3 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU Affero General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Affero General Public License
  20. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  21. *
  22. * @category Feed
  23. * @package StatusNet
  24. * @author Evan Prodromou <evan@status.net>
  25. * @author Zach Copley <zach@status.net>
  26. * @copyright 2010 StatusNet, Inc.
  27. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPLv3
  28. * @link http://status.net/
  29. */
  30. if (!defined('STATUSNET')) {
  31. exit(1);
  32. }
  33. /**
  34. * Utilities for turning DOMish things into Activityish things
  35. *
  36. * Some common functions that I didn't have the bandwidth to try to factor
  37. * into some kind of reasonable superclass, so just dumped here. Might
  38. * be useful to have an ActivityObject parent class or something.
  39. *
  40. * @category OStatus
  41. * @package StatusNet
  42. * @author Evan Prodromou <evan@status.net>
  43. * @copyright 2010 StatusNet, Inc.
  44. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPLv3
  45. * @link http://status.net/
  46. */
  47. class ActivityUtils
  48. {
  49. const ATOM = 'http://www.w3.org/2005/Atom';
  50. const LINK = 'link';
  51. const REL = 'rel';
  52. const TYPE = 'type';
  53. const HREF = 'href';
  54. const CONTENT = 'content';
  55. const SRC = 'src';
  56. /**
  57. * Get the permalink for an Activity object
  58. *
  59. * @param DOMElement $element A DOM element
  60. *
  61. * @return string related link, if any
  62. */
  63. static function getPermalink($element)
  64. {
  65. return self::getLink($element, 'alternate', 'text/html');
  66. }
  67. /**
  68. * Get the permalink for an Activity object
  69. *
  70. * @param DOMElement $element A DOM element
  71. *
  72. * @return string related link, if any
  73. */
  74. static function getLink(DOMNode $element, $rel, $type=null)
  75. {
  76. $els = $element->childNodes;
  77. foreach ($els as $link) {
  78. if (!($link instanceof DOMElement)) {
  79. continue;
  80. }
  81. if ($link->localName == self::LINK && $link->namespaceURI == self::ATOM) {
  82. $linkRel = $link->getAttribute(self::REL);
  83. $linkType = $link->getAttribute(self::TYPE);
  84. if ($linkRel == $rel &&
  85. (is_null($type) || $linkType == $type)) {
  86. return $link->getAttribute(self::HREF);
  87. }
  88. }
  89. }
  90. return null;
  91. }
  92. static function getLinks(DOMNode $element, $rel, $type=null)
  93. {
  94. $els = $element->childNodes;
  95. $out = array();
  96. for ($i = 0; $i < $els->length; $i++) {
  97. $link = $els->item($i);
  98. if ($link->localName == self::LINK && $link->namespaceURI == self::ATOM) {
  99. $linkRel = $link->getAttribute(self::REL);
  100. $linkType = $link->getAttribute(self::TYPE);
  101. if ($linkRel == $rel &&
  102. (is_null($type) || $linkType == $type)) {
  103. $out[] = $link;
  104. }
  105. }
  106. }
  107. return $out;
  108. }
  109. /**
  110. * Gets the first child element with the given tag
  111. *
  112. * @param DOMElement $element element to pick at
  113. * @param string $tag tag to look for
  114. * @param string $namespace Namespace to look under
  115. *
  116. * @return DOMElement found element or null
  117. */
  118. static function child(DOMNode $element, $tag, $namespace=self::ATOM)
  119. {
  120. $els = $element->childNodes;
  121. if (empty($els) || $els->length == 0) {
  122. return null;
  123. } else {
  124. for ($i = 0; $i < $els->length; $i++) {
  125. $el = $els->item($i);
  126. if ($el->localName == $tag && $el->namespaceURI == $namespace) {
  127. return $el;
  128. }
  129. }
  130. }
  131. }
  132. /**
  133. * Gets all immediate child elements with the given tag
  134. *
  135. * @param DOMElement $element element to pick at
  136. * @param string $tag tag to look for
  137. * @param string $namespace Namespace to look under
  138. *
  139. * @return array found element or null
  140. */
  141. static function children(DOMNode $element, $tag, $namespace=self::ATOM)
  142. {
  143. $results = array();
  144. $els = $element->childNodes;
  145. if (!empty($els) && $els->length > 0) {
  146. for ($i = 0; $i < $els->length; $i++) {
  147. $el = $els->item($i);
  148. if ($el->localName == $tag && $el->namespaceURI == $namespace) {
  149. $results[] = $el;
  150. }
  151. }
  152. }
  153. return $results;
  154. }
  155. /**
  156. * Grab the text content of a DOM element child of the current element
  157. *
  158. * @param DOMElement $element Element whose children we examine
  159. * @param string $tag Tag to look up
  160. * @param string $namespace Namespace to use, defaults to Atom
  161. *
  162. * @return string content of the child
  163. */
  164. static function childContent(DOMNode $element, $tag, $namespace=self::ATOM)
  165. {
  166. $el = self::child($element, $tag, $namespace);
  167. if (empty($el)) {
  168. return null;
  169. } else {
  170. return $el->textContent;
  171. }
  172. }
  173. static function childHtmlContent(DOMNode $element, $tag, $namespace=self::ATOM)
  174. {
  175. $el = self::child($element, $tag, $namespace);
  176. if (empty($el)) {
  177. return null;
  178. } else {
  179. return self::textConstruct($el);
  180. }
  181. }
  182. /**
  183. * Get the content of an atom:entry-like object
  184. *
  185. * @param DOMElement $element The element to examine.
  186. *
  187. * @return string unencoded HTML content of the element, like "This -&lt; is <b>HTML</b>."
  188. *
  189. * @todo handle remote content
  190. * @todo handle embedded XML mime types
  191. * @todo handle base64-encoded non-XML and non-text mime types
  192. */
  193. static function getContent($element)
  194. {
  195. return self::childHtmlContent($element, self::CONTENT, self::ATOM);
  196. }
  197. static function textConstruct($el)
  198. {
  199. $src = $el->getAttribute(self::SRC);
  200. if (!empty($src)) {
  201. // TRANS: Client exception thrown when there is no source attribute.
  202. throw new ClientException(_("Can't handle remote content yet."));
  203. }
  204. $type = $el->getAttribute(self::TYPE);
  205. // slavishly following http://atompub.org/rfc4287.html#rfc.section.4.1.3.3
  206. if (empty($type) || $type == 'text') {
  207. // We have plaintext saved as the XML text content.
  208. // Since we want HTML, we need to escape any special chars.
  209. return htmlspecialchars($el->textContent);
  210. } else if ($type == 'html') {
  211. // We have HTML saved as the XML text content.
  212. // No additional processing required once we've got it.
  213. $text = $el->textContent;
  214. return $text;
  215. } else if ($type == 'xhtml') {
  216. // Per spec, the <content type="xhtml"> contains a single
  217. // HTML <div> with XHTML namespace on it as a child node.
  218. // We need to pull all of that <div>'s child nodes and
  219. // serialize them back to an (X)HTML source fragment.
  220. $divEl = ActivityUtils::child($el, 'div', 'http://www.w3.org/1999/xhtml');
  221. if (empty($divEl)) {
  222. return null;
  223. }
  224. $doc = $divEl->ownerDocument;
  225. $text = '';
  226. $children = $divEl->childNodes;
  227. for ($i = 0; $i < $children->length; $i++) {
  228. $child = $children->item($i);
  229. $text .= $doc->saveXML($child);
  230. }
  231. return trim($text);
  232. } else if (in_array($type, array('text/xml', 'application/xml')) ||
  233. preg_match('#(+|/)xml$#', $type)) {
  234. // TRANS: Client exception thrown when there embedded XML content is found that cannot be processed yet.
  235. throw new ClientException(_("Can't handle embedded XML content yet."));
  236. } else if (strncasecmp($type, 'text/', 5)) {
  237. return $el->textContent;
  238. } else {
  239. // TRANS: Client exception thrown when base64 encoded content is found that cannot be processed yet.
  240. throw new ClientException(_("Can't handle embedded Base64 content yet."));
  241. }
  242. }
  243. /**
  244. * Is this a valid URI for remote profile/notice identification?
  245. * Does not have to be a resolvable URL.
  246. * @param string $uri
  247. * @return boolean
  248. */
  249. static function validateUri($uri)
  250. {
  251. // Check mailto: URIs first
  252. $validate = new Validate();
  253. if (preg_match('/^mailto:(.*)$/', $uri, $match)) {
  254. return $validate->email($match[1], common_config('email', 'check_domain'));
  255. }
  256. if ($validate->uri($uri)) {
  257. return true;
  258. }
  259. // Possibly an upstream bug; tag: URIs aren't validated properly
  260. // unless you explicitly ask for them. All other schemes are accepted
  261. // for basic URI validation without asking.
  262. if ($validate->uri($uri, array('allowed_scheme' => array('tag')))) {
  263. return true;
  264. }
  265. return false;
  266. }
  267. static function getFeedAuthor($feedEl)
  268. {
  269. // Try old and deprecated activity:subject
  270. $subject = ActivityUtils::child($feedEl, Activity::SUBJECT, Activity::SPEC);
  271. if (!empty($subject)) {
  272. return new ActivityObject($subject);
  273. }
  274. // Try the feed author
  275. $author = ActivityUtils::child($feedEl, Activity::AUTHOR, Activity::ATOM);
  276. if (!empty($author)) {
  277. return new ActivityObject($author);
  278. }
  279. // Sheesh. Not a very nice feed! Let's try fingerpoken in the
  280. // entries.
  281. $entries = $feedEl->getElementsByTagNameNS(Activity::ATOM, 'entry');
  282. if (!empty($entries) && $entries->length > 0) {
  283. $entry = $entries->item(0);
  284. // Try the (deprecated) activity:actor
  285. $actor = ActivityUtils::child($entry, Activity::ACTOR, Activity::SPEC);
  286. if (!empty($actor)) {
  287. return new ActivityObject($actor);
  288. }
  289. // Try the author
  290. $author = ActivityUtils::child($entry, Activity::AUTHOR, Activity::ATOM);
  291. if (!empty($author)) {
  292. return new ActivityObject($author);
  293. }
  294. }
  295. return null;
  296. }
  297. static function compareTypes($type, $objects)
  298. {
  299. $type = self::resolveUri($type, false);
  300. foreach ((array)$objects as $object) {
  301. if ($type === self::resolveUri($object)) {
  302. return true;
  303. }
  304. }
  305. return false;
  306. }
  307. static function compareVerbs($type, $objects)
  308. {
  309. return self::compareTypes($type, $objects);
  310. }
  311. static function resolveUri($uri, $make_relative=false)
  312. {
  313. if (empty($uri)) {
  314. throw new ServerException('No URI to resolve in ActivityUtils::resolveUri');
  315. }
  316. if (!$make_relative && parse_url($uri, PHP_URL_SCHEME) == '') { // relative -> absolute
  317. $uri = Activity::SCHEMA . $uri;
  318. } elseif ($make_relative) { // absolute -> relative
  319. $uri = basename($uri); //preg_replace('/^http:\/\/activitystrea\.ms\/schema\/1\.0\//', '', $uri);
  320. } // absolute schemas pass through unharmed
  321. return $uri;
  322. }
  323. static function findLocalObject(array $uris, $type=ActivityObject::NOTE) {
  324. $obj_class = null;
  325. // TODO: Extend this in plugins etc. and describe in EVENTS.txt
  326. if (Event::handle('StartFindLocalActivityObject', array($uris, $type, &$obj_class))) {
  327. switch (self::resolveUri($type)) {
  328. case ActivityObject::PERSON:
  329. // GROUP will also be here in due time...
  330. $obj_class = 'Profile';
  331. break;
  332. default:
  333. $obj_class = 'Notice';
  334. }
  335. }
  336. $object = null;
  337. $uris = array_unique($uris);
  338. foreach ($uris as $uri) {
  339. try {
  340. // the exception thrown will cancel before reaching $object
  341. $object = call_user_func("{$obj_class}::fromUri", $uri);
  342. break;
  343. } catch (UnknownUriException $e) {
  344. common_debug('Could not find local activity object from uri: '.$e->object_uri);
  345. }
  346. }
  347. if (!$object instanceof Managed_DataObject) {
  348. throw new ServerException('Could not find any activityobject stored locally with given URIs: '.var_export($uris,true));
  349. }
  350. Event::handle('EndFindLocalActivityObject', array($object->getUri(), $object->getObjectType(), $object));
  351. return $object;
  352. }
  353. // Check authorship by supplying a Profile as a default and letting plugins
  354. // set it to something else if the activity's author is actually someone
  355. // else (like with a group or peopletag feed as handled in OStatus).
  356. //
  357. // NOTE: Returned is not necessarily the supplied profile! For example,
  358. // the "feed author" may be a group, but the "activity author" is a person!
  359. static function checkAuthorship(Activity $activity, Profile $profile)
  360. {
  361. if (Event::handle('CheckActivityAuthorship', array($activity, &$profile))) {
  362. // if (empty($activity->actor)), then we generated this Activity ourselves and can trust $profile
  363. $actor_uri = $profile->getUri();
  364. if (!in_array($actor_uri, array($activity->actor->id, $activity->actor->link))) {
  365. // A mismatch between our locally stored URI and the supplied author?
  366. // Probably not more than a blog feed or something (with multiple authors or so)
  367. // but log it for future inspection.
  368. common_log(LOG_WARNING, "Got an actor '{$activity->actor->title}' ({$activity->actor->id}) on single-user feed for " . $actor_uri);
  369. } elseif (empty($activity->actor->id)) {
  370. // Plain <author> without ActivityStreams actor info.
  371. // We'll just ignore this info for now and save the update under the feed's identity.
  372. }
  373. }
  374. if (!$profile instanceof Profile) {
  375. throw new ServerException('Could not get an author Profile for activity');
  376. }
  377. return $profile;
  378. }
  379. static public function typeToTitle($type)
  380. {
  381. return ucfirst(self::resolveUri($type, true));
  382. }
  383. static public function verbToTitle($verb)
  384. {
  385. return ucfirst(self::resolveUri($verb, true));
  386. }
  387. }