123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760 |
- <?php
- /**#@+
- * A parser extension that adds two tags, <ref> and <references> for adding
- * citations to pages
- *
- * @addtogroup Extensions
- *
- * @link http://meta.wikimedia.org/wiki/Cite/Cite.php Documentation
- * @link http://www.w3.org/TR/html4/struct/text.html#edef-CITE <cite> definition in HTML
- * @link http://www.w3.org/TR/2005/WD-xhtml2-20050527/mod-text.html#edef_text_cite <cite> definition in XHTML 2.0
- *
- * @bug 4579
- *
- * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
- * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
- * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
- */
- class Cite {
- /**#@+
- * @access private
- */
- /**
- * Datastructure representing <ref> input, in the format of:
- * <code>
- * array(
- * 'user supplied' => array(
- * 'text' => 'user supplied reference & key',
- * 'count' => 1, // occurs twice
- * 'number' => 1, // The first reference, we want
- * // all occourances of it to
- * // use the same number
- * ),
- * 0 => 'Anonymous reference',
- * 1 => 'Another anonymous reference',
- * 'some key' => array(
- * 'text' => 'this one occurs once'
- * 'count' => 0,
- * 'number' => 4
- * ),
- * 3 => 'more stuff'
- * );
- * </code>
- *
- * This works because:
- * * PHP's datastructures are guarenteed to be returned in the
- * order that things are inserted into them (unless you mess
- * with that)
- * * User supplied keys can't be integers, therefore avoiding
- * conflict with anonymous keys
- *
- * @var array
- **/
- var $mRefs = array();
- /**
- * Count for user displayed output (ref[1], ref[2], ...)
- *
- * @var int
- */
- var $mOutCnt = 0;
- var $mGroupCnt = array();
- /**
- * Internal counter for anonymous references, separate from
- * $mOutCnt because anonymous references won't increment it,
- * but will incremement $mOutCnt
- *
- * @var int
- */
- var $mInCnt = 0;
- /**
- * The backlinks, in order, to pass as $3 to
- * 'cite_references_link_many_format', defined in
- * 'cite_references_link_many_format_backlink_labels
- *
- * @var array
- */
- var $mBacklinkLabels;
- /**
- * @var object
- */
- var $mParser;
- /**
- * True when a <ref> or <references> tag is being processed.
- * Used to avoid infinite recursion
- *
- * @var boolean
- */
- var $mInCite = false;
- /**#@-*/
- /**
- * Constructor
- */
- function Cite() {
- $this->setHooks();
- }
- /**#@+ @access private */
- /**
- * Callback function for <ref>
- *
- * @param string $str Input
- * @param array $argv Arguments
- * @return string
- */
- function ref( $str, $argv, $parser ) {
- wfLoadExtensionMessages( 'Cite' );
- if ( $this->mInCite ) {
- return htmlspecialchars( "<ref>$str</ref>" );
- } else {
- $this->mInCite = true;
- $ret = $this->guardedRef( $str, $argv, $parser );
- $this->mInCite = false;
- return $ret;
- }
- }
- function guardedRef( $str, $argv, $parser, $default_group=CITE_DEFAULT_GROUP ) {
- $this->mParser = $parser;
- # The key here is the "name" attribute.
- list($key,$group) = $this->refArg( $argv );
- if( $str === '' ) {
- # <ref ...></ref>. This construct is invalid if
- # it's a contentful ref, but OK if it's a named duplicate and should
- # be equivalent <ref ... />, for compatability with #tag.
- if ( $key == false )
- return $this->error( 'cite_error_ref_no_input' );
- else
- $str = null;
- }
- if( $key === false ) {
- # TODO: Comment this case; what does this condition mean?
- return $this->error( 'cite_error_ref_too_many_keys' );
- }
- if( $str === null and $key === null ) {
- # Something like <ref />; this makes no sense.
- return $this->error( 'cite_error_ref_no_key' );
- }
- if( preg_match( '/^[0-9]+$/', $key ) ) {
- # Numeric names mess up the resulting id's, potentially produ-
- # cing duplicate id's in the XHTML. The Right Thing To Do
- # would be to mangle them, but it's not really high-priority
- # (and would produce weird id's anyway).
- return $this->error( 'cite_error_ref_numeric_key' );
- }
- if( preg_match(
- '/<ref\b[^<]*?>/',
- preg_replace( '#<([^ ]+?).*?>.*?</\\1 *>|<!--.*?-->#', '', $str )
- ) ) {
- # (bug 6199) This most likely implies that someone left off the
- # closing </ref> tag, which will cause the entire article to be
- # eaten up until the next <ref>. So we bail out early instead.
- # The fancy regex above first tries chopping out anything that
- # looks like a comment or SGML tag, which is a crude way to avoid
- # false alarms for <nowiki>, <pre>, etc.
- #
- # Possible improvement: print the warning, followed by the contents
- # of the <ref> tag. This way no part of the article will be eaten
- # even temporarily.
- return $this->error( 'cite_error_included_ref' );
- }
- # Split these into groups.
- if( $group === null ) {
- $group = $default_group;
- }
- if( is_string( $key ) or is_string( $str ) ) {
- # We don't care about the content: if the key exists, the ref
- # is presumptively valid. Either it stores a new ref, or re-
- # fers to an existing one. If it refers to a nonexistent ref,
- # we'll figure that out later. Likewise it's definitely valid
- # if there's any content, regardless of key.
- return $this->stack( $str, $key, $group );
- }
- # Not clear how we could get here, but something is probably
- # wrong with the types. Let's fail fast.
- $this->croak( 'cite_error_key_str_invalid', serialize( "$str; $key" ) );
- }
- /**
- * Parse the arguments to the <ref> tag
- *
- * @static
- *
- * @param array $argv The argument vector
- * @return mixed false on invalid input, a string on valid
- * input and null on no input
- */
- function refArg( $argv ) {
- global $wgAllowCiteGroups;
- $cnt = count( $argv );
- $group = null;
- $key = null;
- if ( $cnt > 2 )
- // There should only be one key and one group
- return false;
- else if ( $cnt >= 1 ) {
- if ( isset( $argv['name'] ) ) {
- // Key given.
- $key = Sanitizer::escapeId( $argv['name'], 'noninitial' );
- unset( $argv['name']);
- --$cnt;
- }
- if ( isset( $argv['group'] ) ){
- if (! $wgAllowCiteGroups ) return array(false); //remove when groups are fully tested.
- // Group given.
- $group = $argv['group'];
- unset( $argv['group']);
- --$cnt;
- }
- if ( $cnt == 0)
- return array ($key,$group);
- else
- // Invalid key
- return array(false,false);
- }
- else
- // No key
- return array(null,$group);
- }
- /**
- * Populate $this->mRefs based on input and arguments to <ref>
- *
- * @param string $str Input from the <ref> tag
- * @param mixed $key Argument to the <ref> tag as returned by $this->refArg()
- * @return string
- */
- function stack( $str, $key = null, $group ) {
- if (! isset($this->mRefs[$group]))
- $this->mRefs[$group]=array();
- if (! isset($this->mGroupCnt[$group]))
- $this->mGroupCnt[$group]=0;
- if ( $key === null ) {
- // No key
- //$this->mRefs[$group][] = $str;
- $this->mRefs[$group][] = array('count'=>-1, 'text'=>$str, 'key'=>++$this->mOutCnt);
- return $this->linkRef( $group, $this->mInCnt++ );
- } else if ( is_string( $key ) ) {
- // Valid key
- if ( ! isset( $this->mRefs[$group][$key] ) || ! is_array( $this->mRefs[$group][$key] ) ) {
- // First occurance
- $this->mRefs[$group][$key] = array(
- 'text' => $str,
- 'count' => 0,
- 'key' => ++$this->mOutCnt,
- 'number' => ++$this->mGroupCnt[$group]
- );
- $this->mInCnt++;
- return
- $this->linkRef(
- $group,
- $key,
- $this->mRefs[$group][$key]['key']."-".$this->mRefs[$group][$key]['count'],
- $this->mRefs[$group][$key]['number'],
- "-".$this->mRefs[$group][$key]['key']
- );
- } else {
- // We've been here before
- if ( $this->mRefs[$group][$key]['text'] === null && $str !== '' ) {
- // If no text found before, use this text
- $this->mRefs[$group][$key]['text'] = $str;
- };
- return
- $this->linkRef(
- $group,
- $key,
- $this->mRefs[$group][$key]['key']."-".++$this->mRefs[$group][$key]['count'],
- $this->mRefs[$group][$key]['number'],
- "-".$this->mRefs[$group][$key]['key']
- ); }
- }
- else
- $this->croak( 'cite_error_stack_invalid_input', serialize( array( $key, $str ) ) );
- }
- /**
- * Callback function for <references>
- *
- * @param string $str Input
- * @param array $argv Arguments
- * @return string
- */
- function references( $str, $argv, $parser ) {
- wfLoadExtensionMessages( 'Cite' );
- if ( $this->mInCite ) {
- if ( is_null( $str ) ) {
- return htmlspecialchars( "<references/>" );
- } else {
- return htmlspecialchars( "<references>$str</references>" );
- }
- } else {
- $this->mInCite = true;
- $ret = $this->guardedReferences( $str, $argv, $parser );
- $this->mInCite = false;
- return $ret;
- }
- }
- function guardedReferences( $str, $argv, $parser, $group = CITE_DEFAULT_GROUP ) {
- global $wgAllowCiteGroups;
- $this->mParser = $parser;
- if ( strval( $str ) !== '' )
- return $this->error( 'cite_error_references_invalid_input' );
- if ( isset( $argv['group'] ) and $wgAllowCiteGroups) {
- $group = $argv['group'];
- unset ($argv['group']);
- }
- if ( count( $argv ) && $wgAllowCiteGroups )
- return $this->error( 'cite_error_references_invalid_parameters_group' );
- elseif ( count( $argv ) )
- return $this->error( 'cite_error_references_invalid_parameters' );
- else
- return $this->referencesFormat($group);
- }
- /**
- * Make output to be returned from the references() function
- *
- * @return string XHTML ready for output
- */
- function referencesFormat($group) {
- if (( count( $this->mRefs ) == 0 ) or (empty( $this->mRefs[$group] ) ))
- return '';
- wfProfileIn( __METHOD__ );
- wfProfileIn( __METHOD__ .'-entries' );
- $ent = array();
- foreach ( $this->mRefs[$group] as $k => $v )
- $ent[] = $this->referencesFormatEntry( $k, $v );
- $prefix = wfMsgForContentNoTrans( 'cite_references_prefix' );
- $suffix = wfMsgForContentNoTrans( 'cite_references_suffix' );
- $content = implode( "\n", $ent );
- // Let's try to cache it.
- $parserInput = $prefix . $content . $suffix;
- global $wgMemc;
- $cacheKey = wfMemcKey( 'citeref', md5($parserInput), $this->mParser->Title()->getArticleID() );
- wfProfileOut( __METHOD__ .'-entries' );
- global $wgCiteCacheReferences;
- if ( $wgCiteCacheReferences ) {
- wfProfileIn( __METHOD__.'-cache-get' );
- $data = $wgMemc->get( $cacheKey );
- wfProfileOut( __METHOD__.'-cache-get' );
- }
- if ( empty($data) ) {
- wfProfileIn( __METHOD__ .'-parse' );
- // Live hack: parse() adds two newlines on WM, can't reproduce it locally -ævar
- $ret = rtrim( $this->parse( $parserInput ), "\n" );
- if ( $wgCiteCacheReferences ) {
- $serData = $this->mParser->serialiseHalfParsedText( $ret );
- $wgMemc->set( $cacheKey, $serData, 86400 );
- }
- wfProfileOut( __METHOD__ .'-parse' );
- } else {
- $ret = $this->mParser->unserialiseHalfParsedText( $data );
- }
- wfProfileOut( __METHOD__ );
- //done, clean up so we can reuse the group
- unset ($this->mRefs[$group]);
- unset($this->mGroupCnt[$group]);
- return $ret;
- }
- /**
- * Format a single entry for the referencesFormat() function
- *
- * @param string $key The key of the reference
- * @param mixed $val The value of the reference, string for anonymous
- * references, array for user-suppplied
- * @return string Wikitext
- */
- function referencesFormatEntry( $key, $val ) {
- // Anonymous reference
- if ( ! is_array( $val ) )
- return
- wfMsgForContentNoTrans(
- 'cite_references_link_one',
- $this->referencesKey( $key ),
- $this->refKey( $key ),
- $val
- );
- else if ($val['text']=='') return
- wfMsgForContentNoTrans(
- 'cite_references_link_one',
- $this->referencesKey( $key ),
- $this->refKey( $key, $val['count'] ),
- $this->error( 'cite_error_references_no_text', $key )
- );
- if ( $val['count'] < 0 )
- return
- wfMsgForContentNoTrans(
- 'cite_references_link_one',
- $this->referencesKey( $val['key'] ),
- #$this->refKey( $val['key'], $val['count'] ),
- $this->refKey( $val['key'] ),
- ( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
- );
- // Standalone named reference, I want to format this like an
- // anonymous reference because displaying "1. 1.1 Ref text" is
- // overkill and users frequently use named references when they
- // don't need them for convenience
- else if ( $val['count'] === 0 )
- return
- wfMsgForContentNoTrans(
- 'cite_references_link_one',
- $this->referencesKey( $key ."-" . $val['key'] ),
- #$this->refKey( $key, $val['count'] ),
- $this->refKey( $key, $val['key']."-".$val['count'] ),
- ( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
- );
- // Named references with >1 occurrences
- else {
- $links = array();
- //for group handling, we have an extra key here.
- for ( $i = 0; $i <= $val['count']; ++$i ) {
- $links[] = wfMsgForContentNoTrans(
- 'cite_references_link_many_format',
- $this->refKey( $key, $val['key']."-$i" ),
- $this->referencesFormatEntryNumericBacklinkLabel( $val['number'], $i, $val['count'] ),
- $this->referencesFormatEntryAlternateBacklinkLabel( $i )
- );
- }
- $list = $this->listToText( $links );
- return
- wfMsgForContentNoTrans( 'cite_references_link_many',
- $this->referencesKey( $key ."-" . $val['key'] ),
- $list,
- ( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
- );
- }
- }
- /**
- * Generate a numeric backlink given a base number and an
- * offset, e.g. $base = 1, $offset = 2; = 1.2
- * Since bug #5525, it correctly does 1.9 -> 1.10 as well as 1.099 -> 1.100
- *
- * @static
- *
- * @param int $base The base
- * @param int $offset The offset
- * @param int $max Maximum value expected.
- * @return string
- */
- function referencesFormatEntryNumericBacklinkLabel( $base, $offset, $max ) {
- global $wgContLang;
- $scope = strlen( $max );
- $ret = $wgContLang->formatNum(
- sprintf("%s.%0{$scope}s", $base, $offset)
- );
- return $ret;
- }
- /**
- * Generate a custom format backlink given an offset, e.g.
- * $offset = 2; = c if $this->mBacklinkLabels = array( 'a',
- * 'b', 'c', ...). Return an error if the offset > the # of
- * array items
- *
- * @param int $offset The offset
- *
- * @return string
- */
- function referencesFormatEntryAlternateBacklinkLabel( $offset ) {
- if ( !isset( $this->mBacklinkLabels ) ) {
- $this->genBacklinkLabels();
- }
- if ( isset( $this->mBacklinkLabels[$offset] ) ) {
- return $this->mBacklinkLabels[$offset];
- } else {
- // Feed me!
- return $this->error( 'cite_error_references_no_backlink_label' );
- }
- }
- /**
- * Return an id for use in wikitext output based on a key and
- * optionally the number of it, used in <references>, not <ref>
- * (since otherwise it would link to itself)
- *
- * @static
- *
- * @param string $key The key
- * @param int $num The number of the key
- * @return string A key for use in wikitext
- */
- function refKey( $key, $num = null ) {
- $prefix = wfMsgForContent( 'cite_reference_link_prefix' );
- $suffix = wfMsgForContent( 'cite_reference_link_suffix' );
- if ( isset( $num ) )
- $key = wfMsgForContentNoTrans( 'cite_reference_link_key_with_num', $key, $num );
- return $prefix . $key . $suffix;
- }
- /**
- * Return an id for use in wikitext output based on a key and
- * optionally the number of it, used in <ref>, not <references>
- * (since otherwise it would link to itself)
- *
- * @static
- *
- * @param string $key The key
- * @param int $num The number of the key
- * @return string A key for use in wikitext
- */
- function referencesKey( $key, $num = null ) {
- $prefix = wfMsgForContent( 'cite_references_link_prefix' );
- $suffix = wfMsgForContent( 'cite_references_link_suffix' );
- if ( isset( $num ) )
- $key = wfMsgForContentNoTrans( 'cite_reference_link_key_with_num', $key, $num );
- return $prefix . $key . $suffix;
- }
- /**
- * Generate a link (<sup ...) for the <ref> element from a key
- * and return XHTML ready for output
- *
- * @param string $key The key for the link
- * @param int $count The index of the key, used for distinguishing
- * multiple occurances of the same key
- * @param int $label The label to use for the link, I want to
- * use the same label for all occourances of
- * the same named reference.
- * @return string
- */
- function linkRef( $group, $key, $count = null, $label = null, $subkey = '' ) {
- global $wgContLang;
- return
- $this->parse(
- wfMsgForContentNoTrans(
- 'cite_reference_link',
- $this->refKey( $key, $count ),
- $this->referencesKey( $key . $subkey ),
- (($group == CITE_DEFAULT_GROUP)?'':"$group ").$wgContLang->formatNum( is_null( $label ) ? ++$this->mGroupCnt[$group] : $label )
- )
- );
- }
- /**
- * This does approximately the same thing as
- * Language::listToText() but due to this being used for a
- * slightly different purpose (people might not want , as the
- * first separator and not 'and' as the second, and this has to
- * use messages from the content language) I'm rolling my own.
- *
- * @static
- *
- * @param array $arr The array to format
- * @return string
- */
- function listToText( $arr ) {
- $cnt = count( $arr );
- $sep = wfMsgForContentNoTrans( 'cite_references_link_many_sep' );
- $and = wfMsgForContentNoTrans( 'cite_references_link_many_and' );
- if ( $cnt == 1 )
- // Enforce always returning a string
- return (string)$arr[0];
- else {
- $t = array_slice( $arr, 0, $cnt - 1 );
- return implode( $sep, $t ) . $and . $arr[$cnt - 1];
- }
- }
- /**
- * Parse a given fragment and fix up Tidy's trail of blood on
- * it...
- *
- * @param string $in The text to parse
- * @return string The parsed text
- */
- function parse( $in ) {
- if ( method_exists( $this->mParser, 'recursiveTagParse' ) ) {
- // New fast method
- return $this->mParser->recursiveTagParse( $in );
- } else {
- // Old method
- $ret = $this->mParser->parse(
- $in,
- $this->mParser->mTitle,
- $this->mParser->mOptions,
- // Avoid whitespace buildup
- false,
- // Important, otherwise $this->clearState()
- // would get run every time <ref> or
- // <references> is called, fucking the whole
- // thing up.
- false
- );
- $text = $ret->getText();
- return $this->fixTidy( $text );
- }
- }
- /**
- * Tidy treats all input as a block, it will e.g. wrap most
- * input in <p> if it isn't already, fix that and return the fixed text
- *
- * @static
- *
- * @param string $text The text to fix
- * @return string The fixed text
- */
- function fixTidy( $text ) {
- global $wgUseTidy;
- if ( ! $wgUseTidy )
- return $text;
- else {
- $text = preg_replace( '~^<p>\s*~', '', $text );
- $text = preg_replace( '~\s*</p>\s*~', '', $text );
- $text = preg_replace( '~\n$~', '', $text );
- return $text;
- }
- }
- /**
- * Generate the labels to pass to the
- * 'cite_references_link_many_format' message, the format is an
- * arbitary number of tokens separated by [\t\n ]
- */
- function genBacklinkLabels() {
- wfProfileIn( __METHOD__ );
- $text = wfMsgForContentNoTrans( 'cite_references_link_many_format_backlink_labels' );
- $this->mBacklinkLabels = preg_split( '#[\n\t ]#', $text );
- wfProfileOut( __METHOD__ );
- }
- /**
- * Gets run when Parser::clearState() gets run, since we don't
- * want the counts to transcend pages and other instances
- */
- function clearState() {
- # Don't clear state when we're in the middle of parsing
- # a <ref> tag
- if($this->mInCite)
- return true;
- $this->mGroupCnt = array();
- $this->mOutCnt = -1;
- $this->mInCnt = 0;
- $this->mRefs = array();
- return true;
- }
- /**
- * Called at the end of page processing to append an error if refs were
- * used without a references tag.
- */
- function checkRefsNoReferences(&$parser, &$text){
- if ( $parser->getOptions()->getIsSectionPreview() ) return true;
- foreach ( $this->mRefs as $group => $refs ) {
- if ( count( $refs ) == 0 ) continue;
- $text .= "\n<br />";
- if ( $group == CITE_DEFAULT_GROUP ) {
- $text .= $this->error( 'cite_error_refs_without_references' );
- } else {
- $text .= $this->error( 'cite_error_group_refs_without_references', htmlspecialchars( $group ) );
- }
- }
- return true;
- }
- /**
- * Initialize the parser hooks
- */
- function setHooks() {
- global $wgParser, $wgHooks;
- $wgParser->setHook( 'ref' , array( &$this, 'ref' ) );
- $wgParser->setHook( 'references' , array( &$this, 'references' ) );
- $wgHooks['ParserClearState'][] = array( &$this, 'clearState' );
- $wgHooks['ParserBeforeTidy'][] = array( &$this, 'checkRefsNoReferences' );
- }
- /**
- * Return an error message based on an error ID
- *
- * @param string $key Message name for the error
- * @param string $param Parameter to pass to the message
- * @return string XHTML ready for output
- */
- function error( $key, $param=null ) {
- # We rely on the fact that PHP is okay with passing unused argu-
- # ments to functions. If $1 is not used in the message, wfMsg will
- # just ignore the extra parameter.
- return
- $this->parse(
- '<strong class="error">' .
- wfMsgNoTrans( 'cite_error', wfMsgNoTrans( $key, $param ) ) .
- '</strong>'
- );
- }
- /**
- * Die with a backtrace if something happens in the code which
- * shouldn't have
- *
- * @param int $error ID for the error
- * @param string $data Serialized error data
- */
- function croak( $error, $data ) {
- wfDebugDieBacktrace( wfMsgForContent( 'cite_croak', $this->error( $error ), $data ) );
- }
- /**#@-*/
- }
- ?>
|