read-feed.php 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. <?php
  2. /*******************************************************************************
  3. * File name: bl-feed-reader.php
  4. * Copyright 2012 Iurie Nistor
  5. * This file is part of bloggerland.
  6. *
  7. * Bloggers Land is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation; either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. *
  21. *******************************************************************************
  22. */
  23. /**
  24. * Get feeds function.
  25. *
  26. * Gets the rss or Atom feeds.
  27. *
  28. * @param $freed_url
  29. * Url of a feed.
  30. *
  31. * @return
  32. * Returns feed content an empty array, or FLASE on error.
  33. */
  34. function get_feed($feed_url)
  35. {
  36. $feeds = array();
  37. // Check blog data.
  38. if (empty($feed_url)) return FALSE;
  39. // Create feed object.
  40. $feed = new DOMDocument();
  41. // Load feed.
  42. $result = $feed->load($feed_url);
  43. // Verify load result.
  44. if( !$result ) return FALSE;
  45. // Check the feed type.
  46. if ( feed_type($feed) == 'none' )
  47. {
  48. // Return FALSE because no feed type have been detected.
  49. return FALSE;
  50. }
  51. else if( feed_type($feed) == 'rss' )
  52. {
  53. echo "\tIs RSS feed: \n";
  54. // Get RSS feeds.
  55. $feeds = rss_feeds($feed);
  56. }
  57. else
  58. {
  59. echo "\tIs Atom feed: \n";
  60. // Get Atom feeds.
  61. $feeds = atom_feeds($feed);
  62. }
  63. return $feeds;
  64. }
  65. /**
  66. * Detects the rss 2.0 feed or atom.
  67. */
  68. function feed_type( $feed )
  69. {
  70. // Get the RSS 2.0 container 'rss'.
  71. $rss = $feed->getElementsByTagName('rss');
  72. // Verify the number of tags. For RSS 2.0 there should be only one.
  73. if( $rss->length == 1 ) return 'rss';
  74. // Get the tags with name 'feed'.
  75. $atom = $feed->getElementsByTagName('feed');
  76. // Verify Atom 'feed' tag. Atom dosen't have a continer.
  77. // So there may be more than one tag named 'feed'. For now we just
  78. // read blogs that provide Atom feeds only with one 'feed' tag.
  79. if( $atom->length == 1 ) return 'atom';
  80. return 'none';
  81. }
  82. /**
  83. * RSS feeds function.
  84. *
  85. * Gets the RSS feeds of a blog.
  86. *
  87. * @param $feed
  88. * DOM object (XML file).
  89. *
  90. * @return.
  91. * Returns an array with blog feeds.
  92. */
  93. function rss_feeds( $feed )
  94. {
  95. $feeds = array();
  96. // Verify object.
  97. if ( !is_object($feed) ) return FALSE;
  98. // Get the RSS channels.
  99. $channels = $feed->getElementsByTagName("channel");
  100. // Verify channels. Blogs only with one channel.
  101. if ( $channels->length != 1 ) return FALSE;
  102. // Get the channel.
  103. $channel = $channels->item(0);
  104. // Get the channel items.
  105. $items = $channel->getElementsByTagName("item");
  106. // Process chnnel items.
  107. foreach ( $items as $item )
  108. {
  109. // Get items tags.
  110. $tags = rss_tags($item);
  111. // Verify tags data.
  112. if ( !empty($tags) ) $feeds[] = $tags;
  113. }
  114. return $feeds;
  115. }
  116. /**
  117. * Atom feeds function.
  118. *
  119. * Gets the Atom feeds of a blog.
  120. *
  121. * @param $feed
  122. * DOM object (XML file).
  123. *
  124. * @return.
  125. * Returns an array with blog feeds.
  126. */
  127. function atom_feeds( $feed )
  128. {
  129. $feeds = array();
  130. // Verify object.
  131. if ( !is_object($feed) ) return FALSE;
  132. // Get the Atom feed tags.
  133. $feed_tags = $feed->getElementsByTagName("feed");
  134. // Verify feed. Blogs only with one feed tag.
  135. if ( $feed_tags->length != 1 ) return FALSE;
  136. // Get the feed tag.
  137. $feed_tag = $feed_tags->item(0);
  138. // Get the feed entries tags.
  139. $entries = $feed_tag->getElementsByTagName("entry");
  140. // Process chnnel items.
  141. foreach ($entries as $entry)
  142. {
  143. // Get the entry tags.
  144. $tags = atom_tags($entry);
  145. // Verify tags data.
  146. if ( !empty($tags) ) $feeds[] = $tags;
  147. }
  148. return $feeds;
  149. }
  150. /**
  151. * RSS tags function.
  152. *
  153. * Gets the rss tags (title, pubDate, description, link etc.) of an item.
  154. *
  155. * @param $item
  156. * Item of a RSS channel.
  157. *
  158. * @return
  159. * Returns the RSS item tags or FALSE on error.
  160. */
  161. function rss_tags( $item )
  162. {
  163. // Get the title tag.
  164. $title = $item->getElementsByTagName("title");
  165. if ( $title->length == 0 ) return FALSE;
  166. // Get the title text content.
  167. $title = $title->item(0);
  168. if( !is_object($title) ) return FALSE;
  169. $title = $title->textContent;
  170. // Verify title text.
  171. if( empty($title) ) return FALSE;
  172. // Get the link tag.
  173. $link = $item->getElementsByTagName("link");
  174. if ( $link->length == 0 ) return FALSE;
  175. // Get the link text content.
  176. $link = $link->item(0);
  177. if( !is_object($link) ) return FALSE;
  178. $link = $link->textContent;
  179. // Verify $link context
  180. if( empty($link) ) return FALSE;
  181. // Get pubDate tag.
  182. $date = $item->getElementsByTagName("pubDate");
  183. if ( $date->length == 0 ) return FALSE;
  184. // Get the date text content.
  185. $date = $date->item(0);
  186. if( !is_object($date) ) return FALSE;
  187. $date = $date->textContent;
  188. // Verify date context.
  189. if( empty($date) ) return FALSE;
  190. // Get description tag.
  191. $description = $item->getElementsByTagName("description");
  192. if ( $description->length == 0 )
  193. {
  194. // Set default value.
  195. $description = '';
  196. }
  197. else
  198. {
  199. // Get the description text content.
  200. $description = $description->item(0);
  201. if ( !is_object($description) ) $description = '';
  202. else $description = $description->textContent;
  203. }
  204. // Prepare data.
  205. $tags["title"] = $title;
  206. $tags["link"] = $link;
  207. $tags["date"] = $date;
  208. $tags["description"] = $description;
  209. return $tags;
  210. }
  211. /**
  212. * Atom tags function.
  213. *
  214. * Gets the Atom entry tags.
  215. *
  216. * @param $entry
  217. * Entry of an Atom feed.
  218. *
  219. * @return
  220. * Returns the Atom entry tags or FALSE on error.
  221. */
  222. function atom_tags( $entry )
  223. {
  224. // Get the title tag.
  225. $title = $entry->getElementsByTagName("title");
  226. if ( $title->length == 0 ) return FALSE;
  227. // Get the title text content.
  228. $title = $title->item(0);
  229. if (!is_object($title)) return FALSE;
  230. $title = $title->textContent;
  231. // Verify title text.
  232. if(empty($title)) return FALSE;
  233. // Get the links.
  234. $links = $entry->getElementsByTagName("link");
  235. if ( $links->length == 0 ) return FALSE;
  236. // Get the link.
  237. foreach ($links as $link)
  238. {
  239. // Get the link type.
  240. $type = $link->getAttributeNode('type');
  241. // Get the link rel.
  242. $rel = $link->getAttributeNode('rel');
  243. if (is_object($type) && is_object($rel))
  244. {
  245. // Verify link type and rel.
  246. if ( $rel->value == 'alternate' && $type->value == 'text/html')
  247. {
  248. $link_tag = $link;
  249. break;
  250. }
  251. }
  252. }
  253. // Verify link tag.
  254. if (!isset($link_tag) || !is_object($link_tag)) return FALSE;
  255. // Get the link attribute 'href'.
  256. $link = $link->getAttributeNode('href');
  257. if( !is_object($link) ) return FALSE;
  258. $url = $link->value;
  259. // Verify url text.
  260. if (empty($url)) return FALSE;
  261. // Get published tag.
  262. $date = $entry->getElementsByTagName('published');
  263. if ($date->length == 0) return FALSE;
  264. // Get the date.
  265. $date = $date->item(0);
  266. if( !is_object($date) ) return FALSE;
  267. $date = $date->textContent;
  268. // Verify date context.
  269. if( empty($date) ) return FALSE;
  270. // Get the content tag.
  271. $contents = $entry->getElementsByTagName("content");
  272. if ( $contents->length == 0 )
  273. {
  274. // Set default value.
  275. $content = '';
  276. }
  277. else
  278. {
  279. // Get the content text.
  280. $content = $contents->item(0);
  281. if ( !is_object($content) ) $content = '';
  282. else $content = $content->textContent;
  283. }
  284. // Prepare data.
  285. $tags["title"] = $title;
  286. $tags["link"] = $url;
  287. $tags["date"] = $date;
  288. $tags["description"] = $content;
  289. return $tags;
  290. }
  291. /**
  292. * Feds after date functions.
  293. *
  294. * Slects all feeds after a date.
  295. *
  296. * @param $feeds
  297. * An array of feeds.
  298. *
  299. * @return
  300. * An array of feeds or an empty array.
  301. */
  302. function feeds_after_date( $feeds, $date )
  303. {
  304. // Feeds list afer the date $date.
  305. $feeds_list = array();
  306. // Verify feeds data.
  307. if ( empty($feeds) ) return array();
  308. foreach ( $feeds as $feed )
  309. {
  310. // Compare dates.
  311. if ( strtotime($feed['date']) > strtotime($date) ) $feeds_list[] = $feed;
  312. }
  313. return $feeds_list;
  314. }
  315. /**
  316. * Last feeds date.
  317. *
  318. * Gets the most recent date of a feeds list.
  319. *
  320. * @param $feeds
  321. * An array of feeds.
  322. *
  323. * @return
  324. * Date in format "Y-m-d H:i:s" or FALSE on error.
  325. */
  326. function last_feeds_date( $feeds )
  327. {
  328. // Verify feeds data.
  329. if ( empty($feeds) ) return FALSE;
  330. // Init last date.
  331. $last_date = 0;
  332. foreach ( $feeds as $feed )
  333. {
  334. // Compare dates.
  335. if ($last_date < strtotime($feed['date']))
  336. {
  337. $last_date = strtotime($feed['date']);
  338. }
  339. }
  340. return date("Y-m-d H:i:s", $last_date);
  341. }