Feed.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. <?php
  2. /**
  3. * Basic support for outputting syndication feeds in RSS, other formats.
  4. *
  5. * Contain a feed class as well as classes to build rss / atom ... feeds
  6. * Available feeds are defined in Defines.php
  7. *
  8. * Copyright © 2004 Brion Vibber <brion@pobox.com>
  9. * https://www.mediawiki.org/
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License along
  22. * with this program; if not, write to the Free Software Foundation, Inc.,
  23. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  24. * http://www.gnu.org/copyleft/gpl.html
  25. *
  26. * @file
  27. */
  28. /**
  29. * @defgroup Feed Feed
  30. */
  31. /**
  32. * A base class for basic support for outputting syndication feeds in RSS and other formats.
  33. *
  34. * @ingroup Feed
  35. */
  36. class FeedItem {
  37. /** @var Title */
  38. public $title;
  39. public $description;
  40. public $url;
  41. public $date;
  42. public $author;
  43. public $uniqueId;
  44. public $comments;
  45. public $rssIsPermalink = false;
  46. /**
  47. * @param string|Title $title Item's title
  48. * @param string $description
  49. * @param string $url URL uniquely designating the item.
  50. * @param string $date Item's date
  51. * @param string $author Author's user name
  52. * @param string $comments
  53. */
  54. function __construct( $title, $description, $url, $date = '', $author = '', $comments = '' ) {
  55. $this->title = $title;
  56. $this->description = $description;
  57. $this->url = $url;
  58. $this->uniqueId = $url;
  59. $this->date = $date;
  60. $this->author = $author;
  61. $this->comments = $comments;
  62. }
  63. /**
  64. * Encode $string so that it can be safely embedded in a XML document
  65. *
  66. * @param string $string String to encode
  67. * @return string
  68. */
  69. public function xmlEncode( $string ) {
  70. $string = str_replace( "\r\n", "\n", $string );
  71. $string = preg_replace( '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', '', $string );
  72. return htmlspecialchars( $string );
  73. }
  74. /**
  75. * Get the unique id of this item; already xml-encoded
  76. * @return string
  77. */
  78. public function getUniqueID() {
  79. $id = $this->getUniqueIdUnescaped();
  80. if ( $id ) {
  81. return $this->xmlEncode( $id );
  82. }
  83. }
  84. /**
  85. * Get the unique id of this item, without any escaping
  86. * @return string
  87. */
  88. public function getUniqueIdUnescaped() {
  89. if ( $this->uniqueId ) {
  90. return wfExpandUrl( $this->uniqueId, PROTO_CURRENT );
  91. }
  92. }
  93. /**
  94. * Set the unique id of an item
  95. *
  96. * @param string $uniqueId Unique id for the item
  97. * @param bool $rssIsPermalink Set to true if the guid (unique id) is a permalink (RSS feeds only)
  98. */
  99. public function setUniqueId( $uniqueId, $rssIsPermalink = false ) {
  100. $this->uniqueId = $uniqueId;
  101. $this->rssIsPermalink = $rssIsPermalink;
  102. }
  103. /**
  104. * Get the title of this item; already xml-encoded
  105. *
  106. * @return string
  107. */
  108. public function getTitle() {
  109. return $this->xmlEncode( $this->title );
  110. }
  111. /**
  112. * Get the URL of this item; already xml-encoded
  113. *
  114. * @return string
  115. */
  116. public function getUrl() {
  117. return $this->xmlEncode( $this->url );
  118. }
  119. /** Get the URL of this item without any escaping
  120. *
  121. * @return string
  122. */
  123. public function getUrlUnescaped() {
  124. return $this->url;
  125. }
  126. /**
  127. * Get the description of this item; already xml-encoded
  128. *
  129. * @return string
  130. */
  131. public function getDescription() {
  132. return $this->xmlEncode( $this->description );
  133. }
  134. /**
  135. * Get the description of this item without any escaping
  136. *
  137. * @return string
  138. */
  139. public function getDescriptionUnescaped() {
  140. return $this->description;
  141. }
  142. /**
  143. * Get the language of this item
  144. *
  145. * @return string
  146. */
  147. public function getLanguage() {
  148. global $wgLanguageCode;
  149. return LanguageCode::bcp47( $wgLanguageCode );
  150. }
  151. /**
  152. * Get the date of this item
  153. *
  154. * @return string
  155. */
  156. public function getDate() {
  157. return $this->date;
  158. }
  159. /**
  160. * Get the author of this item; already xml-encoded
  161. *
  162. * @return string
  163. */
  164. public function getAuthor() {
  165. return $this->xmlEncode( $this->author );
  166. }
  167. /**
  168. * Get the author of this item without any escaping
  169. *
  170. * @return string
  171. */
  172. public function getAuthorUnescaped() {
  173. return $this->author;
  174. }
  175. /**
  176. * Get the comment of this item; already xml-encoded
  177. *
  178. * @return string
  179. */
  180. public function getComments() {
  181. return $this->xmlEncode( $this->comments );
  182. }
  183. /**
  184. * Get the comment of this item without any escaping
  185. *
  186. * @return string
  187. */
  188. public function getCommentsUnescaped() {
  189. return $this->comments;
  190. }
  191. /**
  192. * Quickie hack... strip out wikilinks to more legible form from the comment.
  193. *
  194. * @param string $text Wikitext
  195. * @return string
  196. */
  197. public static function stripComment( $text ) {
  198. return preg_replace( '/\[\[([^]]*\|)?([^]]+)\]\]/', '\2', $text );
  199. }
  200. /**#@-*/
  201. }
  202. /**
  203. * Class to support the outputting of syndication feeds in Atom and RSS format.
  204. *
  205. * @ingroup Feed
  206. */
  207. abstract class ChannelFeed extends FeedItem {
  208. /** @var TemplateParser */
  209. protected $templateParser;
  210. /**
  211. * @param string|Title $title Feed's title
  212. * @param string $description
  213. * @param string $url URL uniquely designating the feed.
  214. * @param string $date Feed's date
  215. * @param string $author Author's user name
  216. * @param string $comments
  217. */
  218. function __construct( $title, $description, $url, $date = '', $author = '', $comments = '' ) {
  219. parent::__construct( $title, $description, $url, $date, $author, $comments );
  220. $this->templateParser = new TemplateParser();
  221. }
  222. /**
  223. * Generate Header of the feed
  224. * @par Example:
  225. * @code
  226. * print "<feed>";
  227. * @endcode
  228. */
  229. abstract public function outHeader();
  230. /**
  231. * Generate an item
  232. * @par Example:
  233. * @code
  234. * print "<item>...</item>";
  235. * @endcode
  236. * @param FeedItem $item
  237. */
  238. abstract public function outItem( $item );
  239. /**
  240. * Generate Footer of the feed
  241. * @par Example:
  242. * @code
  243. * print "</feed>";
  244. * @endcode
  245. */
  246. abstract public function outFooter();
  247. /**
  248. * Setup and send HTTP headers. Don't send any content;
  249. * content might end up being cached and re-sent with
  250. * these same headers later.
  251. *
  252. * This should be called from the outHeader() method,
  253. * but can also be called separately.
  254. */
  255. public function httpHeaders() {
  256. global $wgOut, $wgVaryOnXFP;
  257. # We take over from $wgOut, excepting its cache header info
  258. $wgOut->disable();
  259. $mimetype = $this->contentType();
  260. header( "Content-type: $mimetype; charset=UTF-8" );
  261. // Set a sane filename
  262. $exts = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer()
  263. ->getExtensionsForType( $mimetype );
  264. $ext = $exts ? strtok( $exts, ' ' ) : 'xml';
  265. header( "Content-Disposition: inline; filename=\"feed.{$ext}\"" );
  266. if ( $wgVaryOnXFP ) {
  267. $wgOut->addVaryHeader( 'X-Forwarded-Proto' );
  268. }
  269. $wgOut->sendCacheControl();
  270. }
  271. /**
  272. * Return an internet media type to be sent in the headers.
  273. *
  274. * @return string
  275. */
  276. private function contentType() {
  277. global $wgRequest;
  278. $ctype = $wgRequest->getVal( 'ctype', 'application/xml' );
  279. $allowedctypes = [
  280. 'application/xml',
  281. 'text/xml',
  282. 'application/rss+xml',
  283. 'application/atom+xml'
  284. ];
  285. return ( in_array( $ctype, $allowedctypes ) ? $ctype : 'application/xml' );
  286. }
  287. /**
  288. * Output the initial XML headers.
  289. */
  290. protected function outXmlHeader() {
  291. $this->httpHeaders();
  292. echo '<?xml version="1.0"?>' . "\n";
  293. }
  294. }
  295. /**
  296. * Generate a RSS feed
  297. *
  298. * @ingroup Feed
  299. */
  300. class RSSFeed extends ChannelFeed {
  301. /**
  302. * Format a date given a timestamp. If a timestamp is not given, nothing is returned
  303. *
  304. * @param int|null $ts Timestamp
  305. * @return string|null Date string
  306. */
  307. function formatTime( $ts ) {
  308. if ( $ts ) {
  309. return gmdate( 'D, d M Y H:i:s \G\M\T', wfTimestamp( TS_UNIX, $ts ) );
  310. }
  311. }
  312. /**
  313. * Output an RSS 2.0 header
  314. */
  315. function outHeader() {
  316. global $wgVersion;
  317. $this->outXmlHeader();
  318. // Manually escaping rather than letting Mustache do it because Mustache
  319. // uses htmlentities, which does not work with XML
  320. $templateParams = [
  321. 'title' => $this->getTitle(),
  322. 'url' => $this->xmlEncode( wfExpandUrl( $this->getUrlUnescaped(), PROTO_CURRENT ) ),
  323. 'description' => $this->getDescription(),
  324. 'language' => $this->xmlEncode( $this->getLanguage() ),
  325. 'version' => $this->xmlEncode( $wgVersion ),
  326. 'timestamp' => $this->xmlEncode( $this->formatTime( wfTimestampNow() ) )
  327. ];
  328. print $this->templateParser->processTemplate( 'RSSHeader', $templateParams );
  329. }
  330. /**
  331. * Output an RSS 2.0 item
  332. * @param FeedItem $item Item to be output
  333. */
  334. function outItem( $item ) {
  335. // Manually escaping rather than letting Mustache do it because Mustache
  336. // uses htmlentities, which does not work with XML
  337. $templateParams = [
  338. "title" => $item->getTitle(),
  339. "url" => $this->xmlEncode( wfExpandUrl( $item->getUrlUnescaped(), PROTO_CURRENT ) ),
  340. "permalink" => $item->rssIsPermalink,
  341. "uniqueID" => $item->getUniqueID(),
  342. "description" => $item->getDescription(),
  343. "date" => $this->xmlEncode( $this->formatTime( $item->getDate() ) ),
  344. "author" => $item->getAuthor()
  345. ];
  346. $comments = $item->getCommentsUnescaped();
  347. if ( $comments ) {
  348. $commentsEscaped = $this->xmlEncode( wfExpandUrl( $comments, PROTO_CURRENT ) );
  349. $templateParams["comments"] = $commentsEscaped;
  350. }
  351. print $this->templateParser->processTemplate( 'RSSItem', $templateParams );
  352. }
  353. /**
  354. * Output an RSS 2.0 footer
  355. */
  356. function outFooter() {
  357. print "</channel></rss>";
  358. }
  359. }
  360. /**
  361. * Generate an Atom feed
  362. *
  363. * @ingroup Feed
  364. */
  365. class AtomFeed extends ChannelFeed {
  366. /**
  367. * Format a date given timestamp, if one is given.
  368. *
  369. * @param string|int|null $timestamp
  370. * @return string|null
  371. */
  372. function formatTime( $timestamp ) {
  373. if ( $timestamp ) {
  374. // need to use RFC 822 time format at least for rss2.0
  375. return gmdate( 'Y-m-d\TH:i:s', wfTimestamp( TS_UNIX, $timestamp ) );
  376. }
  377. }
  378. /**
  379. * Outputs a basic header for Atom 1.0 feeds.
  380. */
  381. function outHeader() {
  382. global $wgVersion;
  383. $this->outXmlHeader();
  384. // Manually escaping rather than letting Mustache do it because Mustache
  385. // uses htmlentities, which does not work with XML
  386. $templateParams = [
  387. 'language' => $this->xmlEncode( $this->getLanguage() ),
  388. 'feedID' => $this->getFeedId(),
  389. 'title' => $this->getTitle(),
  390. 'url' => $this->xmlEncode( wfExpandUrl( $this->getUrlUnescaped(), PROTO_CURRENT ) ),
  391. 'selfUrl' => $this->getSelfUrl(),
  392. 'timestamp' => $this->xmlEncode( $this->formatTime( wfTimestampNow() ) ),
  393. 'description' => $this->getDescription(),
  394. 'version' => $this->xmlEncode( $wgVersion ),
  395. ];
  396. print $this->templateParser->processTemplate( 'AtomHeader', $templateParams );
  397. }
  398. /**
  399. * Atom 1.0 requires a unique, opaque IRI as a unique identifier
  400. * for every feed we create. For now just use the URL, but who
  401. * can tell if that's right? If we put options on the feed, do we
  402. * have to change the id? Maybe? Maybe not.
  403. *
  404. * @return string
  405. */
  406. private function getFeedId() {
  407. return $this->getSelfUrl();
  408. }
  409. /**
  410. * Atom 1.0 requests a self-reference to the feed.
  411. * @return string
  412. */
  413. private function getSelfUrl() {
  414. global $wgRequest;
  415. return htmlspecialchars( $wgRequest->getFullRequestURL() );
  416. }
  417. /**
  418. * Output a given item.
  419. * @param FeedItem $item
  420. */
  421. function outItem( $item ) {
  422. global $wgMimeType;
  423. // Manually escaping rather than letting Mustache do it because Mustache
  424. // uses htmlentities, which does not work with XML
  425. $templateParams = [
  426. "uniqueID" => $item->getUniqueID(),
  427. "title" => $item->getTitle(),
  428. "mimeType" => $this->xmlEncode( $wgMimeType ),
  429. "url" => $this->xmlEncode( wfExpandUrl( $item->getUrlUnescaped(), PROTO_CURRENT ) ),
  430. "date" => $this->xmlEncode( $this->formatTime( $item->getDate() ) ),
  431. "description" => $item->getDescription(),
  432. "author" => $item->getAuthor()
  433. ];
  434. print $this->templateParser->processTemplate( 'AtomItem', $templateParams );
  435. }
  436. /**
  437. * Outputs the footer for Atom 1.0 feed (basically '\</feed\>').
  438. */
  439. function outFooter() {
  440. print "</feed>";
  441. }
  442. }