UtfNormal.php 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. <?php
  2. /**
  3. * Unicode normalization routines
  4. *
  5. * Copyright © 2004 Brion Vibber <brion@pobox.com>
  6. * https://www.mediawiki.org/
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, write to the Free Software Foundation, Inc.,
  20. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21. * http://www.gnu.org/copyleft/gpl.html
  22. *
  23. * @file
  24. * @ingroup UtfNormal
  25. */
  26. /**
  27. * @defgroup UtfNormal UtfNormal
  28. */
  29. use UtfNormal\Validator;
  30. /**
  31. * Unicode normalization routines for working with UTF-8 strings.
  32. * Currently assumes that input strings are valid UTF-8!
  33. *
  34. * Not as fast as I'd like, but should be usable for most purposes.
  35. * UtfNormal::toNFC() will bail early if given ASCII text or text
  36. * it can quickly determine is already normalized.
  37. *
  38. * All functions can be called static.
  39. *
  40. * See description of forms at https://www.unicode.org/reports/tr15/
  41. *
  42. * @deprecated since 1.25, use UtfNormal\Validator directly
  43. * @ingroup UtfNormal
  44. */
  45. class UtfNormal {
  46. /**
  47. * The ultimate convenience function! Clean up invalid UTF-8 sequences,
  48. * and convert to normal form C, canonical composition.
  49. *
  50. * Fast return for pure ASCII strings; some lesser optimizations for
  51. * strings containing only known-good characters. Not as fast as toNFC().
  52. *
  53. * @param string $string a UTF-8 string
  54. * @return string a clean, shiny, normalized UTF-8 string
  55. */
  56. static function cleanUp( $string ) {
  57. wfDeprecated( __METHOD__, '1.25' );
  58. return Validator::cleanUp( $string );
  59. }
  60. /**
  61. * Convert a UTF-8 string to normal form C, canonical composition.
  62. * Fast return for pure ASCII strings; some lesser optimizations for
  63. * strings containing only known-good characters.
  64. *
  65. * @param string $string a valid UTF-8 string. Input is not validated.
  66. * @return string a UTF-8 string in normal form C
  67. */
  68. static function toNFC( $string ) {
  69. wfDeprecated( __METHOD__, '1.25' );
  70. return Validator::toNFC( $string );
  71. }
  72. /**
  73. * Convert a UTF-8 string to normal form D, canonical decomposition.
  74. * Fast return for pure ASCII strings.
  75. *
  76. * @param string $string a valid UTF-8 string. Input is not validated.
  77. * @return string a UTF-8 string in normal form D
  78. */
  79. static function toNFD( $string ) {
  80. wfDeprecated( __METHOD__, '1.25' );
  81. return Validator::toNFD( $string );
  82. }
  83. /**
  84. * Convert a UTF-8 string to normal form KC, compatibility composition.
  85. * This may cause irreversible information loss, use judiciously.
  86. * Fast return for pure ASCII strings.
  87. *
  88. * @param string $string a valid UTF-8 string. Input is not validated.
  89. * @return string a UTF-8 string in normal form KC
  90. */
  91. static function toNFKC( $string ) {
  92. wfDeprecated( __METHOD__, '1.25' );
  93. return Validator::toNFKC( $string );
  94. }
  95. /**
  96. * Convert a UTF-8 string to normal form KD, compatibility decomposition.
  97. * This may cause irreversible information loss, use judiciously.
  98. * Fast return for pure ASCII strings.
  99. *
  100. * @param string $string a valid UTF-8 string. Input is not validated.
  101. * @return string a UTF-8 string in normal form KD
  102. */
  103. static function toNFKD( $string ) {
  104. wfDeprecated( __METHOD__, '1.25' );
  105. return Validator::toNFKD( $string );
  106. }
  107. /**
  108. * Returns true if the string is _definitely_ in NFC.
  109. * Returns false if not or uncertain.
  110. * @param string $string a valid UTF-8 string. Input is not validated.
  111. * @return bool
  112. */
  113. static function quickIsNFC( $string ) {
  114. wfDeprecated( __METHOD__, '1.25' );
  115. return Validator::quickIsNFC( $string );
  116. }
  117. /**
  118. * Returns true if the string is _definitely_ in NFC.
  119. * Returns false if not or uncertain.
  120. * @param string &$string a UTF-8 string, altered on output to be valid UTF-8 safe for XML.
  121. * @return bool
  122. */
  123. static function quickIsNFCVerify( &$string ) {
  124. wfDeprecated( __METHOD__, '1.25' );
  125. return Validator::quickIsNFCVerify( $string );
  126. }
  127. }