Collation.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. <?php
  2. /**
  3. * Database row sorting.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. /**
  23. * @since 1.16.3
  24. * @author Tim Starling
  25. */
  26. abstract class Collation {
  27. private static $instance;
  28. /**
  29. * @since 1.16.3
  30. * @return Collation
  31. */
  32. public static function singleton() {
  33. if ( !self::$instance ) {
  34. global $wgCategoryCollation;
  35. self::$instance = self::factory( $wgCategoryCollation );
  36. }
  37. return self::$instance;
  38. }
  39. /**
  40. * @since 1.16.3
  41. * @throws MWException
  42. * @param string $collationName
  43. * @return Collation
  44. */
  45. public static function factory( $collationName ) {
  46. global $wgContLang;
  47. switch ( $collationName ) {
  48. case 'uppercase':
  49. return new UppercaseCollation;
  50. case 'numeric':
  51. return new NumericUppercaseCollation( $wgContLang );
  52. case 'identity':
  53. return new IdentityCollation;
  54. case 'uca-default':
  55. return new IcuCollation( 'root' );
  56. case 'uca-default-u-kn':
  57. return new IcuCollation( 'root-u-kn' );
  58. case 'xx-uca-ckb':
  59. return new CollationCkb;
  60. case 'xx-uca-et':
  61. return new CollationEt;
  62. case 'xx-uca-fa':
  63. return new CollationFa;
  64. case 'uppercase-ba':
  65. return new BashkirUppercaseCollation;
  66. default:
  67. $match = [];
  68. if ( preg_match( '/^uca-([A-Za-z@=-]+)$/', $collationName, $match ) ) {
  69. return new IcuCollation( $match[1] );
  70. }
  71. # Provide a mechanism for extensions to hook in.
  72. $collationObject = null;
  73. Hooks::run( 'Collation::factory', [ $collationName, &$collationObject ] );
  74. if ( $collationObject instanceof Collation ) {
  75. return $collationObject;
  76. }
  77. // If all else fails...
  78. throw new MWException( __METHOD__ . ": unknown collation type \"$collationName\"" );
  79. }
  80. }
  81. /**
  82. * Given a string, convert it to a (hopefully short) key that can be used
  83. * for efficient sorting. A binary sort according to the sortkeys
  84. * corresponds to a logical sort of the corresponding strings. Current
  85. * code expects that a line feed character should sort before all others, but
  86. * has no other particular expectations (and that one can be changed if
  87. * necessary).
  88. *
  89. * @since 1.16.3
  90. *
  91. * @param string $string UTF-8 string
  92. * @return string Binary sortkey
  93. */
  94. abstract function getSortKey( $string );
  95. /**
  96. * Given a string, return the logical "first letter" to be used for
  97. * grouping on category pages and so on. This has to be coordinated
  98. * carefully with convertToSortkey(), or else the sorted list might jump
  99. * back and forth between the same "initial letters" or other pathological
  100. * behavior. For instance, if you just return the first character, but "a"
  101. * sorts the same as "A" based on getSortKey(), then you might get a
  102. * list like
  103. *
  104. * == A ==
  105. * * [[Aardvark]]
  106. *
  107. * == a ==
  108. * * [[antelope]]
  109. *
  110. * == A ==
  111. * * [[Ape]]
  112. *
  113. * etc., assuming for the sake of argument that $wgCapitalLinks is false.
  114. *
  115. * @since 1.16.3
  116. *
  117. * @param string $string UTF-8 string
  118. * @return string UTF-8 string corresponding to the first letter of input
  119. */
  120. abstract function getFirstLetter( $string );
  121. }