123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296 |
- <?php
- if (!defined('STATUSNET')) {
- exit(1);
- }
- class FeedSubBadURLException extends FeedSubException
- {
- }
- class FeedSubBadResponseException extends FeedSubException
- {
- }
- class FeedSubEmptyException extends FeedSubException
- {
- }
- class FeedSubBadHTMLException extends FeedSubException
- {
- }
- class FeedSubUnrecognizedTypeException extends FeedSubException
- {
- }
- class FeedSubNoFeedException extends FeedSubException
- {
- }
- class FeedSubNoSalmonException extends FeedSubException
- {
- }
- class FeedSubBadXmlException extends FeedSubException
- {
- }
- class FeedSubNoHubException extends FeedSubException
- {
- }
- class FeedDiscovery
- {
- public $uri;
- public $type;
- public $feed;
- public $root;
-
- public function getLink($rel, $type=null)
- {
-
- return self::getAtomLink($rel, $type);
- }
- public function getAtomLink($rel, $type=null)
- {
- return ActivityUtils::getLink($this->root, $rel, $type);
- }
-
- public function getHubLink()
- {
- return $this->getAtomLink('hub');
- }
-
- function discoverFromURL($url, $htmlOk=true)
- {
- try {
- $client = new HTTPClient();
- $response = $client->get($url);
- } catch (Exception $e) {
- common_log(LOG_ERR, __METHOD__ . " Failure for $url - " . $e->getMessage());
- throw new FeedSubBadURLException($e->getMessage());
- }
- if ($htmlOk) {
- $type = $response->getHeader('Content-Type');
- $isHtml = preg_match('!^(text/html|application/xhtml\+xml)!i', $type);
- if ($isHtml) {
- $target = $this->discoverFromHTML($response->getEffectiveUrl(), $response->getBody());
- if (!$target) {
- throw new FeedSubNoFeedException($url);
- }
- return $this->discoverFromURL($target, false);
- }
- }
- return $this->initFromResponse($response);
- }
- function discoverFromFeedURL($url)
- {
- return $this->discoverFromURL($url, false);
- }
- function initFromResponse($response)
- {
- if (!$response->isOk()) {
- throw new FeedSubBadResponseException($response->getStatus());
- }
- $sourceurl = $response->getEffectiveUrl();
- $body = $response->getBody();
- if (!$body) {
- throw new FeedSubEmptyException($sourceurl);
- }
- $type = $response->getHeader('Content-Type');
- if (preg_match('!^(text/xml|application/xml|application/(rss|atom)\+xml)!i', $type)) {
- return $this->init($sourceurl, $type, $body);
- } else {
- common_log(LOG_WARNING, "Unrecognized feed type $type for $sourceurl");
- throw new FeedSubUnrecognizedTypeException($type);
- }
- }
- function init($sourceurl, $type, $body)
- {
- $feed = new DOMDocument();
- if ($feed->loadXML($body)) {
- $this->uri = $sourceurl;
- $this->type = $type;
- $this->feed = $feed;
- $el = $this->feed->documentElement;
-
- if ($el->tagName == 'rss') {
- $channels = $el->getElementsByTagName('channel');
- if ($channels->length > 0) {
- $this->root = $channels->item(0);
- } else {
- throw new FeedSubBadXmlException($sourceurl);
- }
- } else if ($el->tagName == 'feed') {
- $this->root = $el;
- } else {
- throw new FeedSubBadXmlException($sourceurl);
- }
- return $this->uri;
- } else {
- throw new FeedSubBadXmlException($sourceurl);
- }
- }
-
- function discoverFromHTML($url, $body)
- {
-
-
- $old = error_reporting(error_reporting() & ~(E_WARNING | E_NOTICE));
- $dom = new DOMDocument();
- $ok = $dom->loadHTML($body);
- error_reporting($old);
- if (!$ok) {
- throw new FeedSubBadHtmlException();
- }
-
- $base = false;
- $nodes = $dom->getElementsByTagName('base');
- for ($i = 0; $i < $nodes->length; $i++) {
- $node = $nodes->item($i);
- if ($node->hasAttributes()) {
- $href = $node->attributes->getNamedItem('href');
- if ($href) {
- $base = trim($href->value);
- }
- }
- }
- if ($base) {
- $base = $this->resolveURI($base, $url);
- } else {
- $base = $url;
- }
-
-
-
- $feeds = array(
- 'application/atom+xml' => false,
- 'application/rss+xml' => false,
- );
- $nodes = $dom->getElementsByTagName('link');
- for ($i = 0; $i < $nodes->length; $i++) {
- $node = $nodes->item($i);
- if ($node->hasAttributes()) {
- $rel = $node->attributes->getNamedItem('rel');
- $type = $node->attributes->getNamedItem('type');
- $href = $node->attributes->getNamedItem('href');
- if ($rel && $type && $href) {
- $rel = array_filter(explode(" ", $rel->value));
- $type = trim($type->value);
- $href = trim($href->value);
- if (in_array('alternate', $rel) && array_key_exists($type, $feeds) && empty($feeds[$type])) {
-
- $feeds[$type] = $this->resolveURI($href, $base);
- }
- }
- }
- }
-
- foreach ($feeds as $type => $url) {
- if ($url) {
- return $url;
- }
- }
- return false;
- }
-
- function resolveURI($rel, $base)
- {
- require_once "Net/URL2.php";
- try {
- $relUrl = new Net_URL2($rel);
- if ($relUrl->isAbsolute()) {
- return $rel;
- }
- $baseUrl = new Net_URL2($base);
- $absUrl = $baseUrl->resolve($relUrl);
- return $absUrl->getURL();
- } catch (Exception $e) {
- common_log(LOG_WARNING, 'Unable to resolve relative link "' .
- $rel . '" against base "' . $base . '": ' . $e->getMessage());
- return $rel;
- }
- }
- }
|