MediaWikiTitleCodecTest.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. * @author Daniel Kinzler
  20. */
  21. /**
  22. * @covers MediaWikiTitleCodec
  23. *
  24. * @group Title
  25. * @group Database
  26. * ^--- needed because of global state in
  27. */
  28. class MediaWikiTitleCodecTest extends MediaWikiTestCase {
  29. public function setUp() {
  30. parent::setUp();
  31. $this->setMwGlobals( [
  32. 'wgAllowUserJs' => false,
  33. 'wgDefaultLanguageVariant' => false,
  34. 'wgMetaNamespace' => 'Project',
  35. 'wgLocalInterwikis' => [ 'localtestiw' ],
  36. 'wgCapitalLinks' => true,
  37. // NOTE: this is why global state is evil.
  38. // TODO: refactor access to the interwiki codes so it can be injected.
  39. 'wgHooks' => [
  40. 'InterwikiLoadPrefix' => [
  41. function ( $prefix, &$data ) {
  42. if ( $prefix === 'localtestiw' ) {
  43. $data = [ 'iw_url' => 'localtestiw' ];
  44. } elseif ( $prefix === 'remotetestiw' ) {
  45. $data = [ 'iw_url' => 'remotetestiw' ];
  46. }
  47. return false;
  48. }
  49. ]
  50. ]
  51. ] );
  52. $this->setUserLang( 'en' );
  53. $this->setContentLang( 'en' );
  54. }
  55. /**
  56. * Returns a mock GenderCache that will consider a user "female" if the
  57. * first part of the user name ends with "a".
  58. *
  59. * @return GenderCache
  60. */
  61. private function getGenderCache() {
  62. $genderCache = $this->getMockBuilder( GenderCache::class )
  63. ->disableOriginalConstructor()
  64. ->getMock();
  65. $genderCache->expects( $this->any() )
  66. ->method( 'getGenderOf' )
  67. ->will( $this->returnCallback( function ( $userName ) {
  68. return preg_match( '/^[^- _]+a( |_|$)/u', $userName ) ? 'female' : 'male';
  69. } ) );
  70. return $genderCache;
  71. }
  72. protected function makeCodec( $lang ) {
  73. $gender = $this->getGenderCache();
  74. $lang = Language::factory( $lang );
  75. // language object can came from cache, which does not respect test settings
  76. $lang->resetNamespaces();
  77. return new MediaWikiTitleCodec( $lang, $gender );
  78. }
  79. public static function provideFormat() {
  80. return [
  81. [ NS_MAIN, 'Foo_Bar', '', '', 'en', 'Foo Bar' ],
  82. [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', '', 'en', 'User:Hansi Maier#stuff and so on' ],
  83. [ false, 'Hansi_Maier', '', '', 'en', 'Hansi Maier' ],
  84. [
  85. NS_USER_TALK,
  86. 'hansi__maier',
  87. '',
  88. '',
  89. 'en',
  90. 'User talk:hansi maier',
  91. 'User talk:Hansi maier'
  92. ],
  93. // getGenderCache() provides a mock that considers first
  94. // names ending in "a" to be female.
  95. [ NS_USER, 'Lisa_Müller', '', '', 'de', 'Benutzerin:Lisa Müller' ],
  96. [ NS_MAIN, 'FooBar', '', 'remotetestiw', 'en', 'remotetestiw:FooBar' ],
  97. ];
  98. }
  99. /**
  100. * @dataProvider provideFormat
  101. */
  102. public function testFormat( $namespace, $text, $fragment, $interwiki, $lang, $expected,
  103. $normalized = null
  104. ) {
  105. if ( $normalized === null ) {
  106. $normalized = $expected;
  107. }
  108. $codec = $this->makeCodec( $lang );
  109. $actual = $codec->formatTitle( $namespace, $text, $fragment, $interwiki );
  110. $this->assertEquals( $expected, $actual, 'formatted' );
  111. // test round trip
  112. $parsed = $codec->parseTitle( $actual, NS_MAIN );
  113. $actual2 = $codec->formatTitle(
  114. $parsed->getNamespace(),
  115. $parsed->getText(),
  116. $parsed->getFragment(),
  117. $parsed->getInterwiki()
  118. );
  119. $this->assertEquals( $normalized, $actual2, 'normalized after round trip' );
  120. }
  121. public static function provideGetText() {
  122. return [
  123. [ NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ],
  124. [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'Hansi Maier' ],
  125. ];
  126. }
  127. /**
  128. * @dataProvider provideGetText
  129. */
  130. public function testGetText( $namespace, $dbkey, $fragment, $lang, $expected ) {
  131. $codec = $this->makeCodec( $lang );
  132. $title = new TitleValue( $namespace, $dbkey, $fragment );
  133. $actual = $codec->getText( $title );
  134. $this->assertEquals( $expected, $actual );
  135. }
  136. public static function provideGetPrefixedText() {
  137. return [
  138. [ NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ],
  139. [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier' ],
  140. // No capitalization or normalization is applied while formatting!
  141. [ NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ],
  142. // getGenderCache() provides a mock that considers first
  143. // names ending in "a" to be female.
  144. [ NS_USER, 'Lisa_Müller', '', 'de', 'Benutzerin:Lisa Müller' ],
  145. [ 1000000, 'Invalid_namespace', '', 'en', 'Special:Badtitle/NS1000000:Invalid namespace' ],
  146. ];
  147. }
  148. /**
  149. * @dataProvider provideGetPrefixedText
  150. */
  151. public function testGetPrefixedText( $namespace, $dbkey, $fragment, $lang, $expected ) {
  152. $codec = $this->makeCodec( $lang );
  153. $title = new TitleValue( $namespace, $dbkey, $fragment );
  154. $actual = $codec->getPrefixedText( $title );
  155. $this->assertEquals( $expected, $actual );
  156. }
  157. public static function provideGetPrefixedDBkey() {
  158. return [
  159. [ NS_MAIN, 'Foo_Bar', '', '', 'en', 'Foo_Bar' ],
  160. [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', '', 'en', 'User:Hansi_Maier' ],
  161. // No capitalization or normalization is applied while formatting!
  162. [ NS_USER_TALK, 'hansi__maier', '', '', 'en', 'User_talk:hansi__maier' ],
  163. // getGenderCache() provides a mock that considers first
  164. // names ending in "a" to be female.
  165. [ NS_USER, 'Lisa_Müller', '', '', 'de', 'Benutzerin:Lisa_Müller' ],
  166. [ NS_MAIN, 'Remote_page', '', 'remotetestiw', 'en', 'remotetestiw:Remote_page' ],
  167. // non-existent namespace
  168. [ 10000000, 'Foobar', '', '', 'en', 'Special:Badtitle/NS10000000:Foobar' ],
  169. ];
  170. }
  171. /**
  172. * @dataProvider provideGetPrefixedDBkey
  173. */
  174. public function testGetPrefixedDBkey( $namespace, $dbkey, $fragment,
  175. $interwiki, $lang, $expected
  176. ) {
  177. $codec = $this->makeCodec( $lang );
  178. $title = new TitleValue( $namespace, $dbkey, $fragment, $interwiki );
  179. $actual = $codec->getPrefixedDBkey( $title );
  180. $this->assertEquals( $expected, $actual );
  181. }
  182. public static function provideGetFullText() {
  183. return [
  184. [ NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ],
  185. [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier#stuff and so on' ],
  186. // No capitalization or normalization is applied while formatting!
  187. [ NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ],
  188. ];
  189. }
  190. /**
  191. * @dataProvider provideGetFullText
  192. */
  193. public function testGetFullText( $namespace, $dbkey, $fragment, $lang, $expected ) {
  194. $codec = $this->makeCodec( $lang );
  195. $title = new TitleValue( $namespace, $dbkey, $fragment );
  196. $actual = $codec->getFullText( $title );
  197. $this->assertEquals( $expected, $actual );
  198. }
  199. public static function provideParseTitle() {
  200. // TODO: test capitalization and trimming
  201. // TODO: test unicode normalization
  202. return [
  203. [ ' : Hansi_Maier _ ', NS_MAIN, 'en',
  204. new TitleValue( NS_MAIN, 'Hansi_Maier', '' ) ],
  205. [ 'User:::1', NS_MAIN, 'de',
  206. new TitleValue( NS_USER, '0:0:0:0:0:0:0:1', '' ) ],
  207. [ ' lisa Müller', NS_USER, 'de',
  208. new TitleValue( NS_USER, 'Lisa_Müller', '' ) ],
  209. [ 'benutzerin:lisa Müller#stuff', NS_MAIN, 'de',
  210. new TitleValue( NS_USER, 'Lisa_Müller', 'stuff' ) ],
  211. [ ':Category:Quux', NS_MAIN, 'en',
  212. new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
  213. [ 'Category:Quux', NS_MAIN, 'en',
  214. new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
  215. [ 'Category:Quux', NS_CATEGORY, 'en',
  216. new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
  217. [ 'Quux', NS_CATEGORY, 'en',
  218. new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
  219. [ ':Quux', NS_CATEGORY, 'en',
  220. new TitleValue( NS_MAIN, 'Quux', '' ) ],
  221. // getGenderCache() provides a mock that considers first
  222. // names ending in "a" to be female.
  223. [ 'a b c', NS_MAIN, 'en',
  224. new TitleValue( NS_MAIN, 'A_b_c' ) ],
  225. [ ' a b c ', NS_MAIN, 'en',
  226. new TitleValue( NS_MAIN, 'A_b_c' ) ],
  227. [ ' _ Foo __ Bar_ _', NS_MAIN, 'en',
  228. new TitleValue( NS_MAIN, 'Foo_Bar' ) ],
  229. // NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync.
  230. [ 'Sandbox', NS_MAIN, 'en', ],
  231. [ 'A "B"', NS_MAIN, 'en', ],
  232. [ 'A \'B\'', NS_MAIN, 'en', ],
  233. [ '.com', NS_MAIN, 'en', ],
  234. [ '~', NS_MAIN, 'en', ],
  235. [ '"', NS_MAIN, 'en', ],
  236. [ '\'', NS_MAIN, 'en', ],
  237. [ 'Talk:Sandbox', NS_MAIN, 'en',
  238. new TitleValue( NS_TALK, 'Sandbox' ) ],
  239. [ 'Talk:Foo:Sandbox', NS_MAIN, 'en',
  240. new TitleValue( NS_TALK, 'Foo:Sandbox' ) ],
  241. [ 'File:Example.svg', NS_MAIN, 'en',
  242. new TitleValue( NS_FILE, 'Example.svg' ) ],
  243. [ 'File_talk:Example.svg', NS_MAIN, 'en',
  244. new TitleValue( NS_FILE_TALK, 'Example.svg' ) ],
  245. [ 'Foo/.../Sandbox', NS_MAIN, 'en',
  246. 'Foo/.../Sandbox' ],
  247. [ 'Sandbox/...', NS_MAIN, 'en',
  248. 'Sandbox/...' ],
  249. [ 'A~~', NS_MAIN, 'en',
  250. 'A~~' ],
  251. // Length is 256 total, but only title part matters
  252. [ 'Category:' . str_repeat( 'x', 248 ), NS_MAIN, 'en',
  253. new TitleValue( NS_CATEGORY,
  254. 'X' . str_repeat( 'x', 247 ) ) ],
  255. [ str_repeat( 'x', 252 ), NS_MAIN, 'en',
  256. 'X' . str_repeat( 'x', 251 ) ]
  257. ];
  258. }
  259. /**
  260. * @dataProvider provideParseTitle
  261. */
  262. public function testParseTitle( $text, $ns, $lang, $title = null ) {
  263. if ( $title === null ) {
  264. $title = str_replace( ' ', '_', trim( $text ) );
  265. }
  266. if ( is_string( $title ) ) {
  267. $title = new TitleValue( NS_MAIN, $title, '' );
  268. }
  269. $codec = $this->makeCodec( $lang );
  270. $actual = $codec->parseTitle( $text, $ns );
  271. $this->assertEquals( $title, $actual );
  272. }
  273. public static function provideParseTitle_invalid() {
  274. // TODO: test unicode errors
  275. return [
  276. [ '#' ],
  277. [ '::' ],
  278. [ '::xx' ],
  279. [ '::##' ],
  280. [ ' :: x' ],
  281. [ 'Talk:File:Foo.jpg' ],
  282. [ 'Talk:localtestiw:Foo' ],
  283. [ '::1' ], // only valid in user namespace
  284. [ 'User::x' ], // leading ":" in a user name is only valid of IPv6 addresses
  285. // NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync.
  286. [ '' ],
  287. [ ':' ],
  288. [ '__ __' ],
  289. [ ' __ ' ],
  290. // Bad characters forbidden regardless of wgLegalTitleChars
  291. [ 'A [ B' ],
  292. [ 'A ] B' ],
  293. [ 'A { B' ],
  294. [ 'A } B' ],
  295. [ 'A < B' ],
  296. [ 'A > B' ],
  297. [ 'A | B' ],
  298. // URL encoding
  299. [ 'A%20B' ],
  300. [ 'A%23B' ],
  301. [ 'A%2523B' ],
  302. // XML/HTML character entity references
  303. // Note: Commented out because they are not marked invalid by the PHP test as
  304. // Title::newFromText runs Sanitizer::decodeCharReferencesAndNormalize first.
  305. // [ 'A &eacute; B' ],
  306. // [ 'A &#233; B' ],
  307. // [ 'A &#x00E9; B' ],
  308. // Subject of NS_TALK does not roundtrip to NS_MAIN
  309. [ 'Talk:File:Example.svg' ],
  310. // Directory navigation
  311. [ '.' ],
  312. [ '..' ],
  313. [ './Sandbox' ],
  314. [ '../Sandbox' ],
  315. [ 'Foo/./Sandbox' ],
  316. [ 'Foo/../Sandbox' ],
  317. [ 'Sandbox/.' ],
  318. [ 'Sandbox/..' ],
  319. // Tilde
  320. [ 'A ~~~ Name' ],
  321. [ 'A ~~~~ Signature' ],
  322. [ 'A ~~~~~ Timestamp' ],
  323. [ str_repeat( 'x', 256 ) ],
  324. // Namespace prefix without actual title
  325. [ 'Talk:' ],
  326. [ 'Category: ' ],
  327. [ 'Category: #bar' ]
  328. ];
  329. }
  330. /**
  331. * @dataProvider provideParseTitle_invalid
  332. */
  333. public function testParseTitle_invalid( $text ) {
  334. $this->setExpectedException( MalformedTitleException::class );
  335. $codec = $this->makeCodec( 'en' );
  336. $codec->parseTitle( $text, NS_MAIN );
  337. }
  338. public static function provideGetNamespaceName() {
  339. return [
  340. [ NS_MAIN, 'Foo', 'en', '' ],
  341. [ NS_USER, 'Foo', 'en', 'User' ],
  342. [ NS_USER, 'Hansi Maier', 'de', 'Benutzer' ],
  343. // getGenderCache() provides a mock that considers first
  344. // names ending in "a" to be female.
  345. [ NS_USER, 'Lisa Müller', 'de', 'Benutzerin' ],
  346. ];
  347. }
  348. /**
  349. * @dataProvider provideGetNamespaceName
  350. */
  351. public function testGetNamespaceName( $namespace, $text, $lang, $expected ) {
  352. $codec = $this->makeCodec( $lang );
  353. $name = $codec->getNamespaceName( $namespace, $text );
  354. $this->assertEquals( $expected, $name );
  355. }
  356. }