1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066 |
- <?php
- /**
- * Hoa
- *
- *
- * @license
- *
- * New BSD License
- *
- * Copyright © 2007-2017, Hoa community. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of the Hoa nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
- namespace Hoa\Ustring;
- use Hoa\Consistency;
- /**
- * Class \Hoa\Ustring.
- *
- * This class represents a UTF-8 string.
- * Please, see:
- * • http://www.ietf.org/rfc/rfc3454.txt;
- * • http://unicode.org/reports/tr9/;
- * • http://www.unicode.org/Public/6.0.0/ucd/UnicodeData.txt.
- *
- * @copyright Copyright © 2007-2017 Hoa community
- * @license New BSD License
- */
- class Ustring implements \ArrayAccess, \Countable, \IteratorAggregate
- {
- /**
- * Left-To-Right.
- *
- * @const int
- */
- const LTR = 0;
- /**
- * Right-To-Left.
- *
- * @const int
- */
- const RTL = 1;
- /**
- * ZERO WIDTH NON-BREAKING SPACE (ZWNPBSP, aka byte-order mark, BOM).
- *
- * @const int
- */
- const BOM = 0xfeff;
- /**
- * LEFT-TO-RIGHT MARK.
- *
- * @const int
- */
- const LRM = 0x200e;
- /**
- * RIGHT-TO-LEFT MARK.
- *
- * @const int
- */
- const RLM = 0x200f;
- /**
- * LEFT-TO-RIGHT EMBEDDING.
- *
- * @const int
- */
- const LRE = 0x202a;
- /**
- * RIGHT-TO-LEFT EMBEDDING.
- *
- * @const int
- */
- const RLE = 0x202b;
- /**
- * POP DIRECTIONAL FORMATTING.
- *
- * @const int
- */
- const PDF = 0x202c;
- /**
- * LEFT-TO-RIGHT OVERRIDE.
- *
- * @const int
- */
- const LRO = 0x202d;
- /**
- * RIGHT-TO-LEFT OVERRIDE.
- *
- * @const int
- */
- const RLO = 0x202e;
- /**
- * Represent the beginning of the string.
- *
- * @const int
- */
- const BEGINNING = 1;
- /**
- * Represent the end of the string.
- *
- * @const int
- */
- const END = 2;
- /**
- * Split: non-empty pieces is returned.
- *
- * @const int
- */
- const WITHOUT_EMPTY = PREG_SPLIT_NO_EMPTY;
- /**
- * Split: parenthesized expression in the delimiter pattern will be captured
- * and returned.
- *
- * @const int
- */
- const WITH_DELIMITERS = PREG_SPLIT_DELIM_CAPTURE;
- /**
- * Split: offsets of captures will be returned.
- *
- * @const int
- */
- const WITH_OFFSET = 260; // PREG_OFFSET_CAPTURE
- // | PREG_SPLIT_OFFSET_CAPTURE
- /**
- * Group results by patterns.
- *
- * @const int
- */
- const GROUP_BY_PATTERN = PREG_PATTERN_ORDER;
- /**
- * Group results by tuple (set of patterns).
- *
- * @const int
- */
- const GROUP_BY_TUPLE = PREG_SET_ORDER;
- /**
- * Current string.
- *
- * @var string
- */
- protected $_string = null;
- /**
- * Direction. Please see self::LTR and self::RTL constants.
- *
- * @var int
- */
- protected $_direction = null;
- /**
- * Collator.
- *
- * @var \Collator
- */
- protected static $_collator = null;
- /**
- * Construct a UTF-8 string.
- *
- * @param string $string String.
- */
- public function __construct($string = null)
- {
- if (null !== $string) {
- $this->append($string);
- }
- return;
- }
- /**
- * Check if ext/mbstring is available.
- *
- * @return bool
- */
- public static function checkMbString()
- {
- return function_exists('mb_substr');
- }
- /**
- * Check if ext/iconv is available.
- *
- * @return bool
- */
- public static function checkIconv()
- {
- return function_exists('iconv');
- }
- /**
- * Append a substring to the current string, i.e. add to the end.
- *
- * @param string $substring Substring to append.
- * @return \Hoa\Ustring
- */
- public function append($substring)
- {
- $this->_string .= $substring;
- return $this;
- }
- /**
- * Prepend a substring to the current string, i.e. add to the start.
- *
- * @param string $substring Substring to append.
- * @return \Hoa\Ustring
- */
- public function prepend($substring)
- {
- $this->_string = $substring . $this->_string;
- return $this;
- }
- /**
- * Pad the current string to a certain length with another piece, aka piece.
- *
- * @param int $length Length.
- * @param string $piece Piece.
- * @param int $side Whether we append at the end or the beginning
- * of the current string.
- * @return \Hoa\Ustring
- */
- public function pad($length, $piece, $side = self::END)
- {
- $difference = $length - $this->count();
- if (0 >= $difference) {
- return $this;
- }
- $handle = null;
- for ($i = $difference / mb_strlen($piece) - 1; $i >= 0; --$i) {
- $handle .= $piece;
- }
- $handle .= mb_substr($piece, 0, $difference - mb_strlen($handle));
- return
- static::END === $side
- ? $this->append($handle)
- : $this->prepend($handle);
- }
- /**
- * Make a comparison with a string.
- * Return < 0 if current string is less than $string, > 0 if greater and 0
- * if equal.
- *
- * @param mixed $string String.
- * @return int
- */
- public function compare($string)
- {
- if (null === $collator = static::getCollator()) {
- return strcmp($this->_string, (string) $string);
- }
- return $collator->compare($this->_string, $string);
- }
- /**
- * Get collator.
- *
- * @return \Collator
- */
- public static function getCollator()
- {
- if (false === class_exists('Collator')) {
- return null;
- }
- if (null === static::$_collator) {
- static::$_collator = new \Collator(setlocale(LC_COLLATE, null));
- }
- return static::$_collator;
- }
- /**
- * Ensure that the pattern is safe for Unicode: add the “u” option.
- *
- * @param string $pattern Pattern.
- * @return string
- */
- public static function safePattern($pattern)
- {
- $delimiter = mb_substr($pattern, 0, 1);
- $options = mb_substr(
- mb_strrchr($pattern, $delimiter, false),
- mb_strlen($delimiter)
- );
- if (false === strpos($options, 'u')) {
- $pattern .= 'u';
- }
- return $pattern;
- }
- /**
- * Perform a regular expression (PCRE) match.
- *
- * @param string $pattern Pattern.
- * @param array $matches Matches.
- * @param int $flags Please, see constants self::WITH_OFFSET,
- * self::GROUP_BY_PATTERN and
- * self::GROUP_BY_TUPLE.
- * @param int $offset Alternate place from which to start the
- * search.
- * @param bool $global Whether the match is global or not.
- * @return int
- */
- public function match(
- $pattern,
- &$matches = null,
- $flags = 0,
- $offset = 0,
- $global = false
- ) {
- $pattern = static::safePattern($pattern);
- if (0 === $flags) {
- if (true === $global) {
- $flags = static::GROUP_BY_PATTERN;
- }
- } else {
- $flags &= ~PREG_SPLIT_OFFSET_CAPTURE;
- }
- $offset = strlen(mb_substr($this->_string, 0, $offset));
- if (true === $global) {
- return preg_match_all(
- $pattern,
- $this->_string,
- $matches,
- $flags,
- $offset
- );
- }
- return preg_match($pattern, $this->_string, $matches, $flags, $offset);
- }
- /**
- * Perform a regular expression (PCRE) search and replace.
- *
- * @param mixed $pattern Pattern(s).
- * @param mixed $replacement Replacement(s) (please, see
- * preg_replace() documentation).
- * @param int $limit Maximum of replacements. -1 for unbound.
- * @return \Hoa\Ustring
- */
- public function replace($pattern, $replacement, $limit = -1)
- {
- $pattern = static::safePattern($pattern);
- if (false === is_callable($replacement)) {
- $this->_string = preg_replace(
- $pattern,
- $replacement,
- $this->_string,
- $limit
- );
- } else {
- $this->_string = preg_replace_callback(
- $pattern,
- $replacement,
- $this->_string,
- $limit
- );
- }
- return $this;
- }
- /**
- * Split the current string according to a given pattern (PCRE).
- *
- * @param string $pattern Pattern (as a regular expression).
- * @param int $limit Maximum of split. -1 for unbound.
- * @param int $flags Please, see constants self::WITHOUT_EMPTY,
- * self::WITH_DELIMITERS, self::WITH_OFFSET.
- * @return array
- */
- public function split(
- $pattern,
- $limit = -1,
- $flags = self::WITHOUT_EMPTY
- ) {
- return preg_split(
- static::safePattern($pattern),
- $this->_string,
- $limit,
- $flags
- );
- }
- /**
- * Iterator over chars.
- *
- * @return \ArrayIterator
- */
- public function getIterator()
- {
- return new \ArrayIterator(preg_split('#(?<!^)(?!$)#u', $this->_string));
- }
- /**
- * Perform a lowercase folding on the current string.
- *
- * @return \Hoa\Ustring
- */
- public function toLowerCase()
- {
- $this->_string = mb_strtolower($this->_string);
- return $this;
- }
- /**
- * Perform an uppercase folding on the current string.
- *
- * @return \Hoa\Ustring
- */
- public function toUpperCase()
- {
- $this->_string = mb_strtoupper($this->_string);
- return $this;
- }
- /**
- * Transform a UTF-8 string into an ASCII one.
- * First, try with a transliterator. If not available, will fallback to a
- * normalizer. If not available, will try something homemade.
- *
- * @param bool $try Try something if \Normalizer is not present.
- * @return \Hoa\Ustring
- * @throws \Hoa\Ustring\Exception
- */
- public function toAscii($try = false)
- {
- if (0 === preg_match('#[\x80-\xff]#', $this->_string)) {
- return $this;
- }
- $string = $this->_string;
- $transId =
- 'Any-Latin; ' .
- '[\p{S}] Name; ' .
- 'Latin-ASCII';
- if (null !== $transliterator = static::getTransliterator($transId)) {
- $this->_string = preg_replace_callback(
- '#\\\N\{([A-Z ]+)\}#u',
- function (array $matches) {
- return '(' . strtolower($matches[1]) . ')';
- },
- $transliterator->transliterate($string)
- );
- return $this;
- }
- if (false === class_exists('Normalizer')) {
- if (false === $try) {
- throw new Exception(
- '%s needs the class Normalizer to work properly, ' .
- 'or you can force a try by using %1$s(true).',
- 0,
- __METHOD__
- );
- }
- $string = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT');
- $this->_string = preg_replace('#(?:[\'"`^](\w))#u', '\1', $string);
- return $this;
- }
- $string = \Normalizer::normalize($string, \Normalizer::NFKD);
- $string = preg_replace('#\p{Mn}+#u', '', $string);
- $this->_string = static::transcode($string, 'UTF-8', 'ASCII//IGNORE//TRANSLIT');
- return $this;
- }
- /**
- * Transliterate the string into another.
- * See self::getTransliterator for more information.
- *
- * @param string $identifier Identifier.
- * @param int $start Start.
- * @param int $end End.
- * @return \Hoa\Ustring
- * @throws \Hoa\Ustring\Exception
- */
- public function transliterate($identifier, $start = 0, $end = null)
- {
- if (null === $transliterator = static::getTransliterator($identifier)) {
- throw new Exception(
- '%s needs the class Transliterator to work properly.',
- 1,
- __METHOD__
- );
- }
- $this->_string = $transliterator->transliterate($this->_string, $start, $end);
- return $this;
- }
- /**
- * Get transliterator.
- * See http://userguide.icu-project.org/transforms/general for $identifier.
- *
- * @param string $identifier Identifier.
- * @return \Transliterator
- */
- public static function getTransliterator($identifier)
- {
- if (false === class_exists('Transliterator')) {
- return null;
- }
- return \Transliterator::create($identifier);
- }
- /**
- * Strip characters (default \s) of the current string.
- *
- * @param string $regex Characters to remove.
- * @param int $side Whether we trim the beginning, the end or both
- * sides, of the current string.
- * @return \Hoa\Ustring
- */
- public function trim($regex = '\s', $side = 3 /* static::BEGINNING | static::END */)
- {
- $regex = '(?:' . $regex . ')+';
- $handle = null;
- if (0 !== ($side & static::BEGINNING)) {
- $handle .= '(^' . $regex . ')';
- }
- if (0 !== ($side & static::END)) {
- if (null !== $handle) {
- $handle .= '|';
- }
- $handle .= '(' . $regex . '$)';
- }
- $this->_string = preg_replace('#' . $handle . '#u', '', $this->_string);
- $this->_direction = null;
- return $this;
- }
- /**
- * Compute offset (negative, unbound etc.).
- *
- * @param int $offset Offset.
- * @return int
- */
- protected function computeOffset($offset)
- {
- $length = mb_strlen($this->_string);
- if (0 > $offset) {
- $offset = -$offset % $length;
- if (0 !== $offset) {
- $offset = $length - $offset;
- }
- } elseif ($offset >= $length) {
- $offset %= $length;
- }
- return $offset;
- }
- /**
- * Get a specific chars of the current string.
- *
- * @param int $offset Offset (can be negative and unbound).
- * @return string
- */
- public function offsetGet($offset)
- {
- return mb_substr($this->_string, $this->computeOffset($offset), 1);
- }
- /**
- * Set a specific character of the current string.
- *
- * @param int $offset Offset (can be negative and unbound).
- * @param string $value Value.
- * @return \Hoa\Ustring
- */
- public function offsetSet($offset, $value)
- {
- $head = null;
- $offset = $this->computeOffset($offset);
- if (0 < $offset) {
- $head = mb_substr($this->_string, 0, $offset);
- }
- $tail = mb_substr($this->_string, $offset + 1);
- $this->_string = $head . $value . $tail;
- $this->_direction = null;
- return $this;
- }
- /**
- * Delete a specific character of the current string.
- *
- * @param int $offset Offset (can be negative and unbound).
- * @return string
- */
- public function offsetUnset($offset)
- {
- return $this->offsetSet($offset, null);
- }
- /**
- * Check if a specific offset exists.
- *
- * @return bool
- */
- public function offsetExists($offset)
- {
- return true;
- }
- /**
- * Reduce the strings.
- *
- * @param int $start Position of first character.
- * @param int $length Maximum number of characters.
- * @return \Hoa\Ustring
- */
- public function reduce($start, $length = null)
- {
- $this->_string = mb_substr($this->_string, $start, $length);
- return $this;
- }
- /**
- * Count number of characters of the current string.
- *
- * @return int
- */
- public function count()
- {
- return mb_strlen($this->_string);
- }
- /**
- * Get byte (not character) at a specific offset.
- *
- * @param int $offset Offset (can be negative and unbound).
- * @return string
- */
- public function getByteAt($offset)
- {
- $length = strlen($this->_string);
- if (0 > $offset) {
- $offset = -$offset % $length;
- if (0 !== $offset) {
- $offset = $length - $offset;
- }
- } elseif ($offset >= $length) {
- $offset %= $length;
- }
- return $this->_string[$offset];
- }
- /**
- * Count number of bytes (not characters) of the current string.
- *
- * @return int
- */
- public function getBytesLength()
- {
- return strlen($this->_string);
- }
- /**
- * Get the width of the current string.
- * Useful when printing the string in monotype (some character need more
- * than one column to be printed).
- *
- * @return int
- */
- public function getWidth()
- {
- return mb_strwidth($this->_string);
- }
- /**
- * Get direction of the current string.
- * Please, see the self::LTR and self::RTL constants.
- * It does not yet support embedding directions.
- *
- * @return int
- */
- public function getDirection()
- {
- if (null === $this->_direction) {
- if (null === $this->_string) {
- $this->_direction = static::LTR;
- } else {
- $this->_direction = static::getCharDirection(
- mb_substr($this->_string, 0, 1)
- );
- }
- }
- return $this->_direction;
- }
- /**
- * Get character of a specific character.
- * Please, see the self::LTR and self::RTL constants.
- *
- * @param string $char Character.
- * @return int
- */
- public static function getCharDirection($char)
- {
- $c = static::toCode($char);
- if (!(0x5be <= $c && 0x10b7f >= $c)) {
- return static::LTR;
- }
- if (0x85e >= $c) {
- if (0x5be === $c ||
- 0x5c0 === $c ||
- 0x5c3 === $c ||
- 0x5c6 === $c ||
- (0x5d0 <= $c && 0x5ea >= $c) ||
- (0x5f0 <= $c && 0x5f4 >= $c) ||
- 0x608 === $c ||
- 0x60b === $c ||
- 0x60d === $c ||
- 0x61b === $c ||
- (0x61e <= $c && 0x64a >= $c) ||
- (0x66d <= $c && 0x66f >= $c) ||
- (0x671 <= $c && 0x6d5 >= $c) ||
- (0x6e5 <= $c && 0x6e6 >= $c) ||
- (0x6ee <= $c && 0x6ef >= $c) ||
- (0x6fa <= $c && 0x70d >= $c) ||
- 0x710 === $c ||
- (0x712 <= $c && 0x72f >= $c) ||
- (0x74d <= $c && 0x7a5 >= $c) ||
- 0x7b1 === $c ||
- (0x7c0 <= $c && 0x7ea >= $c) ||
- (0x7f4 <= $c && 0x7f5 >= $c) ||
- 0x7fa === $c ||
- (0x800 <= $c && 0x815 >= $c) ||
- 0x81a === $c ||
- 0x824 === $c ||
- 0x828 === $c ||
- (0x830 <= $c && 0x83e >= $c) ||
- (0x840 <= $c && 0x858 >= $c) ||
- 0x85e === $c) {
- return static::RTL;
- }
- } elseif (0x200f === $c) {
- return static::RTL;
- } elseif (0xfb1d <= $c) {
- if (0xfb1d === $c ||
- (0xfb1f <= $c && 0xfb28 >= $c) ||
- (0xfb2a <= $c && 0xfb36 >= $c) ||
- (0xfb38 <= $c && 0xfb3c >= $c) ||
- 0xfb3e === $c ||
- (0xfb40 <= $c && 0xfb41 >= $c) ||
- (0xfb43 <= $c && 0xfb44 >= $c) ||
- (0xfb46 <= $c && 0xfbc1 >= $c) ||
- (0xfbd3 <= $c && 0xfd3d >= $c) ||
- (0xfd50 <= $c && 0xfd8f >= $c) ||
- (0xfd92 <= $c && 0xfdc7 >= $c) ||
- (0xfdf0 <= $c && 0xfdfc >= $c) ||
- (0xfe70 <= $c && 0xfe74 >= $c) ||
- (0xfe76 <= $c && 0xfefc >= $c) ||
- (0x10800 <= $c && 0x10805 >= $c) ||
- 0x10808 === $c ||
- (0x1080a <= $c && 0x10835 >= $c) ||
- (0x10837 <= $c && 0x10838 >= $c) ||
- 0x1083c === $c ||
- (0x1083f <= $c && 0x10855 >= $c) ||
- (0x10857 <= $c && 0x1085f >= $c) ||
- (0x10900 <= $c && 0x1091b >= $c) ||
- (0x10920 <= $c && 0x10939 >= $c) ||
- 0x1093f === $c ||
- 0x10a00 === $c ||
- (0x10a10 <= $c && 0x10a13 >= $c) ||
- (0x10a15 <= $c && 0x10a17 >= $c) ||
- (0x10a19 <= $c && 0x10a33 >= $c) ||
- (0x10a40 <= $c && 0x10a47 >= $c) ||
- (0x10a50 <= $c && 0x10a58 >= $c) ||
- (0x10a60 <= $c && 0x10a7f >= $c) ||
- (0x10b00 <= $c && 0x10b35 >= $c) ||
- (0x10b40 <= $c && 0x10b55 >= $c) ||
- (0x10b58 <= $c && 0x10b72 >= $c) ||
- (0x10b78 <= $c && 0x10b7f >= $c)) {
- return static::RTL;
- }
- }
- return static::LTR;
- }
- /**
- * Get the number of column positions of a wide-character.
- *
- * This is a PHP implementation of wcwidth() and wcswidth() (defined in IEEE
- * Std 1002.1-2001) for Unicode, by Markus Kuhn. Please, see
- * http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
- *
- * The wcwidth(wc) function shall either return 0 (if wc is a null
- * wide-character code), or return the number of column positions to be
- * occupied by the wide-character code wc, or return -1 (if wc does not
- * correspond to a printable wide-character code).
- *
- * @param string $char Character.
- * @return int
- */
- public static function getCharWidth($char)
- {
- $char = (string) $char;
- $c = static::toCode($char);
- // Test for 8-bit control characters.
- if (0x0 === $c) {
- return 0;
- }
- if (0x20 > $c || (0x7f <= $c && $c < 0xa0)) {
- return -1;
- }
- // Non-spacing characters.
- if (0xad !== $c &&
- 0 !== preg_match('#^[\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11ff}\x{200b}]#u', $char)) {
- return 0;
- }
- // If we arrive here, $c is not a combining C0/C1 control character.
- return 1 +
- (0x1100 <= $c &&
- (0x115f >= $c || // Hangul Jamo init. consonants
- 0x2329 === $c || 0x232a === $c ||
- (0x2e80 <= $c && 0xa4cf >= $c &&
- 0x303f !== $c) || // CJK…Yi
- (0xac00 <= $c && 0xd7a3 >= $c) || // Hangul Syllables
- (0xf900 <= $c && 0xfaff >= $c) || // CJK Compatibility Ideographs
- (0xfe10 <= $c && 0xfe19 >= $c) || // Vertical forms
- (0xfe30 <= $c && 0xfe6f >= $c) || // CJK Compatibility Forms
- (0xff00 <= $c && 0xff60 >= $c) || // Fullwidth Forms
- (0xffe0 <= $c && 0xffe6 >= $c) ||
- (0x20000 <= $c && 0x2fffd >= $c) ||
- (0x30000 <= $c && 0x3fffd >= $c)));
- }
- /**
- * Check whether the character is printable or not.
- *
- * @param string $char Character.
- * @return bool
- */
- public static function isCharPrintable($char)
- {
- return 1 <= static::getCharWidth($char);
- }
- /**
- * Get a UTF-8 character from its decimal code representation.
- *
- * @param int $code Code.
- * @return string
- */
- public static function fromCode($code)
- {
- return mb_convert_encoding(
- '&#x' . dechex($code) . ';',
- 'UTF-8',
- 'HTML-ENTITIES'
- );
- }
- /**
- * Get a decimal code representation of a specific character.
- *
- * @param string $char Character.
- * @return int
- */
- public static function toCode($char)
- {
- $char = (string) $char;
- $code = ord($char[0]);
- $bytes = 1;
- if (!($code & 0x80)) { // 0xxxxxxx
- return $code;
- }
- if (($code & 0xe0) === 0xc0) { // 110xxxxx
- $bytes = 2;
- $code = $code & ~0xc0;
- } elseif (($code & 0xf0) == 0xe0) { // 1110xxxx
- $bytes = 3;
- $code = $code & ~0xe0;
- } elseif (($code & 0xf8) === 0xf0) { // 11110xxx
- $bytes = 4;
- $code = $code & ~0xf0;
- }
- for ($i = 2; $i <= $bytes; $i++) { // 10xxxxxx
- $code = ($code << 6) + (ord($char[$i - 1]) & ~0x80);
- }
- return $code;
- }
- /**
- * Get a binary representation of a specific character.
- *
- * @param string $char Character.
- * @return string
- */
- public static function toBinaryCode($char)
- {
- $char = (string) $char;
- $out = null;
- for ($i = 0, $max = strlen($char); $i < $max; ++$i) {
- $out .= vsprintf('%08b', ord($char[$i]));
- }
- return $out;
- }
- /**
- * Transcode.
- *
- * @param string $string String.
- * @param string $from Original encoding.
- * @param string $to Final encoding.
- * @return string
- * @throws \Hoa\Ustring\Exception
- */
- public static function transcode($string, $from, $to = 'UTF-8')
- {
- if (false === static::checkIconv()) {
- throw new Exception(
- '%s needs the iconv extension.',
- 2,
- __CLASS__
- );
- }
- return iconv($from, $to, $string);
- }
- /**
- * Check if a string is encoded in UTF-8.
- *
- * @param string $string String.
- * @return bool
- */
- public static function isUtf8($string)
- {
- return (bool) preg_match('##u', $string);
- }
- /**
- * Copy current object string
- *
- * @return \Hoa\Ustring
- */
- public function copy()
- {
- return clone $this;
- }
- /**
- * Transform the object as a string.
- *
- * @return string
- */
- public function __toString()
- {
- return $this->_string;
- }
- }
- /**
- * Flex entity.
- */
- Consistency::flexEntity('Hoa\Ustring\Ustring');
- if (false === Ustring::checkMbString()) {
- throw new Exception(
- '%s needs the mbstring extension.',
- 0,
- __NAMESPACE__ . '\Ustring'
- );
- }
|