LanguageCodeTest.php 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. <?php
  2. /**
  3. * @covers LanguageCode
  4. * @group Language
  5. *
  6. * @author Thiemo Kreuz
  7. */
  8. class LanguageCodeTest extends PHPUnit\Framework\TestCase {
  9. use MediaWikiCoversValidator;
  10. public function testConstructor() {
  11. $instance = new LanguageCode();
  12. $this->assertInstanceOf( LanguageCode::class, $instance );
  13. }
  14. public function testGetDeprecatedCodeMapping() {
  15. $map = LanguageCode::getDeprecatedCodeMapping();
  16. $this->assertInternalType( 'array', $map );
  17. $this->assertContainsOnly( 'string', array_keys( $map ) );
  18. $this->assertArrayNotHasKey( '', $map );
  19. $this->assertContainsOnly( 'string', $map );
  20. $this->assertNotContains( '', $map );
  21. // Codes special to MediaWiki should never appear in a map of "deprecated" codes
  22. $this->assertArrayNotHasKey( 'qqq', $map, 'documentation' );
  23. $this->assertNotContains( 'qqq', $map, 'documentation' );
  24. $this->assertArrayNotHasKey( 'qqx', $map, 'debug code' );
  25. $this->assertNotContains( 'qqx', $map, 'debug code' );
  26. // Valid language codes that are currently not "deprecated"
  27. $this->assertArrayNotHasKey( 'bh', $map, 'family of Bihari languages' );
  28. $this->assertArrayNotHasKey( 'no', $map, 'family of Norwegian languages' );
  29. $this->assertArrayNotHasKey( 'simple', $map );
  30. }
  31. public function testReplaceDeprecatedCodes() {
  32. $this->assertEquals( 'gsw', LanguageCode::replaceDeprecatedCodes( 'als' ) );
  33. $this->assertEquals( 'gsw', LanguageCode::replaceDeprecatedCodes( 'gsw' ) );
  34. $this->assertEquals( null, LanguageCode::replaceDeprecatedCodes( null ) );
  35. }
  36. /**
  37. * test @see LanguageCode::bcp47().
  38. * Please note the BCP 47 explicitly state that language codes are case
  39. * insensitive, there are some exceptions to the rule :)
  40. * This test is used to verify our formatting against all lower and
  41. * all upper cases language code.
  42. *
  43. * @see https://tools.ietf.org/html/bcp47
  44. * @dataProvider provideLanguageCodes()
  45. */
  46. public function testBcp47( $code, $expected ) {
  47. $code = strtolower( $code );
  48. $this->assertEquals( $expected, LanguageCode::bcp47( $code ),
  49. "Applying BCP47 standard to lower case '$code'"
  50. );
  51. $code = strtoupper( $code );
  52. $this->assertEquals( $expected, LanguageCode::bcp47( $code ),
  53. "Applying BCP47 standard to upper case '$code'"
  54. );
  55. }
  56. /**
  57. * Array format is ($code, $expected)
  58. */
  59. public static function provideLanguageCodes() {
  60. return [
  61. // Extracted from BCP 47 (list not exhaustive)
  62. # 2.1.1
  63. [ 'en-ca-x-ca', 'en-CA-x-ca' ],
  64. [ 'sgn-be-fr', 'sgn-BE-FR' ],
  65. [ 'az-latn-x-latn', 'az-Latn-x-latn' ],
  66. # 2.2
  67. [ 'sr-Latn-RS', 'sr-Latn-RS' ],
  68. [ 'az-arab-ir', 'az-Arab-IR' ],
  69. # 2.2.5
  70. [ 'sl-nedis', 'sl-nedis' ],
  71. [ 'de-ch-1996', 'de-CH-1996' ],
  72. # 2.2.6
  73. [
  74. 'en-latn-gb-boont-r-extended-sequence-x-private',
  75. 'en-Latn-GB-boont-r-extended-sequence-x-private'
  76. ],
  77. // Examples from BCP 47 Appendix A
  78. # Simple language subtag:
  79. [ 'DE', 'de' ],
  80. [ 'fR', 'fr' ],
  81. [ 'ja', 'ja' ],
  82. # Language subtag plus script subtag:
  83. [ 'zh-hans', 'zh-Hans' ],
  84. [ 'sr-cyrl', 'sr-Cyrl' ],
  85. [ 'sr-latn', 'sr-Latn' ],
  86. # Extended language subtags and their primary language subtag
  87. # counterparts:
  88. [ 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ],
  89. [ 'cmn-hans-cn', 'cmn-Hans-CN' ],
  90. [ 'zh-yue-hk', 'zh-yue-HK' ],
  91. [ 'yue-hk', 'yue-HK' ],
  92. # Language-Script-Region:
  93. [ 'zh-hans-cn', 'zh-Hans-CN' ],
  94. [ 'sr-latn-RS', 'sr-Latn-RS' ],
  95. # Language-Variant:
  96. [ 'sl-rozaj', 'sl-rozaj' ],
  97. [ 'sl-rozaj-biske', 'sl-rozaj-biske' ],
  98. [ 'sl-nedis', 'sl-nedis' ],
  99. # Language-Region-Variant:
  100. [ 'de-ch-1901', 'de-CH-1901' ],
  101. [ 'sl-it-nedis', 'sl-IT-nedis' ],
  102. # Language-Script-Region-Variant:
  103. [ 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ],
  104. # Language-Region:
  105. [ 'de-de', 'de-DE' ],
  106. [ 'en-us', 'en-US' ],
  107. [ 'es-419', 'es-419' ],
  108. # Private use subtags:
  109. [ 'de-ch-x-phonebk', 'de-CH-x-phonebk' ],
  110. [ 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ],
  111. /**
  112. * Previous test does not reflect the BCP 47 which states:
  113. * az-Arab-x-AZE-derbend
  114. * AZE being private, it should be lower case, hence the test above
  115. * should probably be:
  116. * [ 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ],
  117. */
  118. # Private use registry values:
  119. [ 'x-whatever', 'x-whatever' ],
  120. [ 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ],
  121. [ 'de-qaaa', 'de-Qaaa' ],
  122. [ 'sr-latn-qm', 'sr-Latn-QM' ],
  123. [ 'sr-qaaa-rs', 'sr-Qaaa-RS' ],
  124. # Tags that use extensions
  125. [ 'en-us-u-islamcal', 'en-US-u-islamcal' ],
  126. [ 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ],
  127. [ 'en-a-myext-b-another', 'en-a-myext-b-another' ],
  128. # Invalid:
  129. // de-419-DE
  130. // a-DE
  131. // ar-a-aaa-b-bbb-a-ccc
  132. ];
  133. }
  134. }