123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415 |
- <?php
- /*******************************************************************************
- * File name: bl-feed-reader.php
- * Copyright 2012 Iurie Nistor
- * This file is part of bloggerland.
- *
- * Bloggers Land is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation; either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *******************************************************************************
- */
- /**
- * Get feeds function.
- *
- * Gets the rss or Atom feeds.
- *
- * @param $freed_url
- * Url of a feed.
- *
- * @return
- * Returns feed content an empty array, or FLASE on error.
- */
- function get_feed($feed_url)
- {
- $feeds = array();
- // Check blog data.
- if (empty($feed_url)) return FALSE;
- // Create feed object.
- $feed = new DOMDocument();
- // Load feed.
- $result = $feed->load($feed_url);
- // Verify load result.
- if( !$result ) return FALSE;
- // Check the feed type.
- if ( feed_type($feed) == 'none' )
- {
- // Return FALSE because no feed type have been detected.
- return FALSE;
- }
- else if( feed_type($feed) == 'rss' )
- {
- echo "\tIs RSS feed: \n";
- // Get RSS feeds.
- $feeds = rss_feeds($feed);
- }
- else
- {
- echo "\tIs Atom feed: \n";
- // Get Atom feeds.
- $feeds = atom_feeds($feed);
- }
- return $feeds;
- }
- /**
- * Detects the rss 2.0 feed or atom.
- */
- function feed_type( $feed )
- {
- // Get the RSS 2.0 container 'rss'.
- $rss = $feed->getElementsByTagName('rss');
-
- // Verify the number of tags. For RSS 2.0 there should be only one.
- if( $rss->length == 1 ) return 'rss';
- // Get the tags with name 'feed'.
- $atom = $feed->getElementsByTagName('feed');
-
- // Verify Atom 'feed' tag. Atom dosen't have a continer.
- // So there may be more than one tag named 'feed'. For now we just
- // read blogs that provide Atom feeds only with one 'feed' tag.
- if( $atom->length == 1 ) return 'atom';
- return 'none';
- }
- /**
- * RSS feeds function.
- *
- * Gets the RSS feeds of a blog.
- *
- * @param $feed
- * DOM object (XML file).
- *
- * @return.
- * Returns an array with blog feeds.
- */
- function rss_feeds( $feed )
- {
- $feeds = array();
-
- // Verify object.
- if ( !is_object($feed) ) return FALSE;
-
- // Get the RSS channels.
- $channels = $feed->getElementsByTagName("channel");
- // Verify channels. Blogs only with one channel.
- if ( $channels->length != 1 ) return FALSE;
- // Get the channel.
- $channel = $channels->item(0);
- // Get the channel items.
- $items = $channel->getElementsByTagName("item");
- // Process chnnel items.
- foreach ( $items as $item )
- {
- // Get items tags.
- $tags = rss_tags($item);
- // Verify tags data.
- if ( !empty($tags) ) $feeds[] = $tags;
- }
- return $feeds;
- }
- /**
- * Atom feeds function.
- *
- * Gets the Atom feeds of a blog.
- *
- * @param $feed
- * DOM object (XML file).
- *
- * @return.
- * Returns an array with blog feeds.
- */
- function atom_feeds( $feed )
- {
- $feeds = array();
-
- // Verify object.
- if ( !is_object($feed) ) return FALSE;
- // Get the Atom feed tags.
- $feed_tags = $feed->getElementsByTagName("feed");
- // Verify feed. Blogs only with one feed tag.
- if ( $feed_tags->length != 1 ) return FALSE;
- // Get the feed tag.
- $feed_tag = $feed_tags->item(0);
- // Get the feed entries tags.
- $entries = $feed_tag->getElementsByTagName("entry");
- // Process chnnel items.
- foreach ($entries as $entry)
- {
- // Get the entry tags.
- $tags = atom_tags($entry);
- // Verify tags data.
- if ( !empty($tags) ) $feeds[] = $tags;
- }
- return $feeds;
- }
- /**
- * RSS tags function.
- *
- * Gets the rss tags (title, pubDate, description, link etc.) of an item.
- *
- * @param $item
- * Item of a RSS channel.
- *
- * @return
- * Returns the RSS item tags or FALSE on error.
- */
- function rss_tags( $item )
- {
- // Get the title tag.
- $title = $item->getElementsByTagName("title");
- if ( $title->length == 0 ) return FALSE;
- // Get the title text content.
- $title = $title->item(0);
- if( !is_object($title) ) return FALSE;
- $title = $title->textContent;
- // Verify title text.
- if( empty($title) ) return FALSE;
- // Get the link tag.
- $link = $item->getElementsByTagName("link");
- if ( $link->length == 0 ) return FALSE;
- // Get the link text content.
- $link = $link->item(0);
- if( !is_object($link) ) return FALSE;
- $link = $link->textContent;
- // Verify $link context
- if( empty($link) ) return FALSE;
-
- // Get pubDate tag.
- $date = $item->getElementsByTagName("pubDate");
- if ( $date->length == 0 ) return FALSE;
- // Get the date text content.
- $date = $date->item(0);
- if( !is_object($date) ) return FALSE;
- $date = $date->textContent;
- // Verify date context.
- if( empty($date) ) return FALSE;
- // Get description tag.
- $description = $item->getElementsByTagName("description");
- if ( $description->length == 0 )
- {
- // Set default value.
- $description = '';
- }
- else
- {
- // Get the description text content.
- $description = $description->item(0);
- if ( !is_object($description) ) $description = '';
- else $description = $description->textContent;
- }
- // Prepare data.
- $tags["title"] = $title;
- $tags["link"] = $link;
- $tags["date"] = $date;
- $tags["description"] = $description;
- return $tags;
- }
- /**
- * Atom tags function.
- *
- * Gets the Atom entry tags.
- *
- * @param $entry
- * Entry of an Atom feed.
- *
- * @return
- * Returns the Atom entry tags or FALSE on error.
- */
- function atom_tags( $entry )
- {
- // Get the title tag.
- $title = $entry->getElementsByTagName("title");
- if ( $title->length == 0 ) return FALSE;
- // Get the title text content.
- $title = $title->item(0);
- if (!is_object($title)) return FALSE;
- $title = $title->textContent;
- // Verify title text.
- if(empty($title)) return FALSE;
- // Get the links.
- $links = $entry->getElementsByTagName("link");
- if ( $links->length == 0 ) return FALSE;
- // Get the link.
- foreach ($links as $link)
- {
- // Get the link type.
- $type = $link->getAttributeNode('type');
- // Get the link rel.
- $rel = $link->getAttributeNode('rel');
- if (is_object($type) && is_object($rel))
- {
- // Verify link type and rel.
- if ( $rel->value == 'alternate' && $type->value == 'text/html')
- {
- $link_tag = $link;
- break;
- }
-
- }
- }
- // Verify link tag.
- if (!isset($link_tag) || !is_object($link_tag)) return FALSE;
- // Get the link attribute 'href'.
- $link = $link->getAttributeNode('href');
- if( !is_object($link) ) return FALSE;
- $url = $link->value;
- // Verify url text.
- if (empty($url)) return FALSE;
-
- // Get published tag.
- $date = $entry->getElementsByTagName('published');
- if ($date->length == 0) return FALSE;
- // Get the date.
- $date = $date->item(0);
- if( !is_object($date) ) return FALSE;
- $date = $date->textContent;
- // Verify date context.
- if( empty($date) ) return FALSE;
- // Get the content tag.
- $contents = $entry->getElementsByTagName("content");
- if ( $contents->length == 0 )
- {
- // Set default value.
- $content = '';
- }
- else
- {
- // Get the content text.
- $content = $contents->item(0);
- if ( !is_object($content) ) $content = '';
- else $content = $content->textContent;
- }
- // Prepare data.
- $tags["title"] = $title;
- $tags["link"] = $url;
- $tags["date"] = $date;
- $tags["description"] = $content;
- return $tags;
- }
- /**
- * Feds after date functions.
- *
- * Slects all feeds after a date.
- *
- * @param $feeds
- * An array of feeds.
- *
- * @return
- * An array of feeds or an empty array.
- */
- function feeds_after_date( $feeds, $date )
- {
- // Feeds list afer the date $date.
- $feeds_list = array();
- // Verify feeds data.
- if ( empty($feeds) ) return array();
-
- foreach ( $feeds as $feed )
- {
- // Compare dates.
- if ( strtotime($feed['date']) > strtotime($date) ) $feeds_list[] = $feed;
- }
-
- return $feeds_list;
- }
- /**
- * Last feeds date.
- *
- * Gets the most recent date of a feeds list.
- *
- * @param $feeds
- * An array of feeds.
- *
- * @return
- * Date in format "Y-m-d H:i:s" or FALSE on error.
- */
- function last_feeds_date( $feeds )
- {
- // Verify feeds data.
- if ( empty($feeds) ) return FALSE;
- // Init last date.
- $last_date = 0;
-
- foreach ( $feeds as $feed )
- {
- // Compare dates.
- if ($last_date < strtotime($feed['date']))
- {
- $last_date = strtotime($feed['date']);
- }
- }
-
- return date("Y-m-d H:i:s", $last_date);
- }
|