FormatJson.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. <?php
  2. /**
  3. * Wrapper for json_encode and json_decode.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. /**
  23. * JSON formatter wrapper class
  24. */
  25. class FormatJson {
  26. /**
  27. * Skip escaping most characters above U+007F for readability and compactness.
  28. * This encoding option saves 3 to 8 bytes (uncompressed) for each such character;
  29. * however, it could break compatibility with systems that incorrectly handle UTF-8.
  30. *
  31. * @since 1.22
  32. */
  33. const UTF8_OK = 1;
  34. /**
  35. * Skip escaping the characters '<', '>', and '&', which have special meanings in
  36. * HTML and XML.
  37. *
  38. * @warning Do not use this option for JSON that could end up in inline scripts.
  39. * - HTML 5.2, §4.12.1.3 Restrictions for contents of script elements
  40. * - XML 1.0 (5th Ed.), §2.4 Character Data and Markup
  41. *
  42. * @since 1.22
  43. */
  44. const XMLMETA_OK = 2;
  45. /**
  46. * Skip escaping as many characters as reasonably possible.
  47. *
  48. * @warning When generating inline script blocks, use FormatJson::UTF8_OK instead.
  49. *
  50. * @since 1.22
  51. */
  52. const ALL_OK = self::UTF8_OK | self::XMLMETA_OK;
  53. /**
  54. * If set, treat JSON objects '{...}' as associative arrays. Without this option,
  55. * JSON objects will be converted to stdClass.
  56. *
  57. * @since 1.24
  58. */
  59. const FORCE_ASSOC = 0x100;
  60. /**
  61. * If set, attempt to fix invalid JSON.
  62. *
  63. * @since 1.24
  64. */
  65. const TRY_FIXING = 0x200;
  66. /**
  67. * If set, strip comments from input before parsing as JSON.
  68. *
  69. * @since 1.25
  70. */
  71. const STRIP_COMMENTS = 0x400;
  72. /**
  73. * Characters problematic in JavaScript.
  74. *
  75. * @note These are listed in ECMA-262 (5.1 Ed.), §7.3 Line Terminators along with U+000A (LF)
  76. * and U+000D (CR). However, PHP already escapes LF and CR according to RFC 4627.
  77. */
  78. private static $badChars = [
  79. "\u{2028}", // U+2028 LINE SEPARATOR
  80. "\u{2029}", // U+2029 PARAGRAPH SEPARATOR
  81. ];
  82. /**
  83. * Escape sequences for characters listed in FormatJson::$badChars.
  84. */
  85. private static $badCharsEscaped = [
  86. '\u2028', // U+2028 LINE SEPARATOR
  87. '\u2029', // U+2029 PARAGRAPH SEPARATOR
  88. ];
  89. /**
  90. * Returns the JSON representation of a value.
  91. *
  92. * @note Empty arrays are encoded as numeric arrays, not as objects, so cast any associative
  93. * array that might be empty to an object before encoding it.
  94. *
  95. * @note In pre-1.22 versions of MediaWiki, using this function for generating inline script
  96. * blocks may result in an XSS vulnerability, and quite likely will in XML documents
  97. * (cf. FormatJson::XMLMETA_OK). Use Xml::encodeJsVar() instead in such cases.
  98. *
  99. * @param mixed $value The value to encode. Can be any type except a resource.
  100. * @param string|bool $pretty If a string, add non-significant whitespace to improve
  101. * readability, using that string for indentation. If true, use the default indent
  102. * string (four spaces).
  103. * @param int $escaping Bitfield consisting of _OK class constants
  104. * @return string|false String if successful; false upon failure
  105. */
  106. public static function encode( $value, $pretty = false, $escaping = 0 ) {
  107. if ( !is_string( $pretty ) ) {
  108. $pretty = $pretty ? ' ' : false;
  109. }
  110. // PHP escapes '/' to prevent breaking out of inline script blocks using '</script>',
  111. // which is hardly useful when '<' and '>' are escaped (and inadequate), and such
  112. // escaping negatively impacts the human readability of URLs and similar strings.
  113. $options = JSON_UNESCAPED_SLASHES;
  114. $options |= $pretty !== false ? JSON_PRETTY_PRINT : 0;
  115. $options |= ( $escaping & self::UTF8_OK ) ? JSON_UNESCAPED_UNICODE : 0;
  116. $options |= ( $escaping & self::XMLMETA_OK ) ? 0 : ( JSON_HEX_TAG | JSON_HEX_AMP );
  117. $json = json_encode( $value, $options );
  118. if ( $json === false ) {
  119. return false;
  120. }
  121. if ( $pretty !== false && $pretty !== ' ' ) {
  122. // Change the four-space indent to a tab indent
  123. $json = str_replace( "\n ", "\n\t", $json );
  124. while ( strpos( $json, "\t " ) !== false ) {
  125. $json = str_replace( "\t ", "\t\t", $json );
  126. }
  127. if ( $pretty !== "\t" ) {
  128. // Change the tab indent to the provided indent
  129. $json = str_replace( "\t", $pretty, $json );
  130. }
  131. }
  132. if ( $escaping & self::UTF8_OK ) {
  133. $json = str_replace( self::$badChars, self::$badCharsEscaped, $json );
  134. }
  135. return $json;
  136. }
  137. /**
  138. * Decodes a JSON string. It is recommended to use FormatJson::parse(),
  139. * which returns more comprehensive result in case of an error, and has
  140. * more parsing options.
  141. *
  142. * @param string $value The JSON string being decoded
  143. * @param bool $assoc When true, returned objects will be converted into associative arrays.
  144. *
  145. * @return mixed The value encoded in JSON in appropriate PHP type.
  146. * `null` is returned if $value represented `null`, if $value could not be decoded,
  147. * or if the encoded data was deeper than the recursion limit.
  148. * Use FormatJson::parse() to distinguish between types of `null` and to get proper error code.
  149. */
  150. public static function decode( $value, $assoc = false ) {
  151. return json_decode( $value, $assoc );
  152. }
  153. /**
  154. * Decodes a JSON string.
  155. * Unlike FormatJson::decode(), if $value represents null value, it will be
  156. * properly decoded as valid.
  157. *
  158. * @param string $value The JSON string being decoded
  159. * @param int $options A bit field that allows FORCE_ASSOC, TRY_FIXING,
  160. * STRIP_COMMENTS
  161. * @return Status If valid JSON, the value is available in $result->getValue()
  162. */
  163. public static function parse( $value, $options = 0 ) {
  164. if ( $options & self::STRIP_COMMENTS ) {
  165. $value = self::stripComments( $value );
  166. }
  167. $assoc = ( $options & self::FORCE_ASSOC ) !== 0;
  168. $result = json_decode( $value, $assoc );
  169. $code = json_last_error();
  170. if ( $code === JSON_ERROR_SYNTAX && ( $options & self::TRY_FIXING ) !== 0 ) {
  171. // The most common error is the trailing comma in a list or an object.
  172. // We cannot simply replace /,\s*[}\]]/ because it could be inside a string value.
  173. // But we could use the fact that JSON does not allow multi-line string values,
  174. // And remove trailing commas if they are et the end of a line.
  175. // JSON only allows 4 control characters: [ \t\r\n]. So we must not use '\s' for matching.
  176. // Regex match ,]<any non-quote chars>\n or ,\n] with optional spaces/tabs.
  177. $count = 0;
  178. $value =
  179. preg_replace( '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/', '$1',
  180. $value, -1, $count );
  181. if ( $count > 0 ) {
  182. $result = json_decode( $value, $assoc );
  183. if ( JSON_ERROR_NONE === json_last_error() ) {
  184. // Report warning
  185. $st = Status::newGood( $result );
  186. $st->warning( wfMessage( 'json-warn-trailing-comma' )->numParams( $count ) );
  187. return $st;
  188. }
  189. }
  190. }
  191. switch ( $code ) {
  192. case JSON_ERROR_NONE:
  193. return Status::newGood( $result );
  194. default:
  195. return Status::newFatal( wfMessage( 'json-error-unknown' )->numParams( $code ) );
  196. case JSON_ERROR_DEPTH:
  197. $msg = 'json-error-depth';
  198. break;
  199. case JSON_ERROR_STATE_MISMATCH:
  200. $msg = 'json-error-state-mismatch';
  201. break;
  202. case JSON_ERROR_CTRL_CHAR:
  203. $msg = 'json-error-ctrl-char';
  204. break;
  205. case JSON_ERROR_SYNTAX:
  206. $msg = 'json-error-syntax';
  207. break;
  208. case JSON_ERROR_UTF8:
  209. $msg = 'json-error-utf8';
  210. break;
  211. case JSON_ERROR_RECURSION:
  212. $msg = 'json-error-recursion';
  213. break;
  214. case JSON_ERROR_INF_OR_NAN:
  215. $msg = 'json-error-inf-or-nan';
  216. break;
  217. case JSON_ERROR_UNSUPPORTED_TYPE:
  218. $msg = 'json-error-unsupported-type';
  219. break;
  220. }
  221. return Status::newFatal( $msg );
  222. }
  223. /**
  224. * Remove multiline and single line comments from an otherwise valid JSON
  225. * input string. This can be used as a preprocessor, to allow JSON
  226. * formatted configuration files to contain comments.
  227. *
  228. * @param string $json
  229. * @return string JSON with comments removed
  230. */
  231. public static function stripComments( $json ) {
  232. // Ensure we have a string
  233. $str = (string)$json;
  234. $buffer = '';
  235. $maxLen = strlen( $str );
  236. $mark = 0;
  237. $inString = false;
  238. $inComment = false;
  239. $multiline = false;
  240. for ( $idx = 0; $idx < $maxLen; $idx++ ) {
  241. switch ( $str[$idx] ) {
  242. case '"':
  243. $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
  244. if ( !$inComment && $lookBehind !== '\\' ) {
  245. // Either started or ended a string
  246. $inString = !$inString;
  247. }
  248. break;
  249. case '/':
  250. $lookAhead = ( $idx + 1 < $maxLen ) ? $str[$idx + 1] : '';
  251. $lookBehind = ( $idx - 1 >= 0 ) ? $str[$idx - 1] : '';
  252. if ( $inString ) {
  253. break;
  254. } elseif ( !$inComment &&
  255. ( $lookAhead === '/' || $lookAhead === '*' )
  256. ) {
  257. // Transition into a comment
  258. // Add characters seen to buffer
  259. $buffer .= substr( $str, $mark, $idx - $mark );
  260. // Consume the look ahead character
  261. $idx++;
  262. // Track state
  263. $inComment = true;
  264. $multiline = $lookAhead === '*';
  265. } elseif ( $multiline && $lookBehind === '*' ) {
  266. // Found the end of the current comment
  267. $mark = $idx + 1;
  268. $inComment = false;
  269. $multiline = false;
  270. }
  271. break;
  272. case "\n":
  273. if ( $inComment && !$multiline ) {
  274. // Found the end of the current comment
  275. $mark = $idx + 1;
  276. $inComment = false;
  277. }
  278. break;
  279. }
  280. }
  281. if ( $inComment ) {
  282. // Comment ends with input
  283. // Technically we should check to ensure that we aren't in
  284. // a multiline comment that hasn't been properly ended, but this
  285. // is a strip filter, not a validating parser.
  286. $mark = $maxLen;
  287. }
  288. // Add final chunk to buffer before returning
  289. return $buffer . substr( $str, $mark, $maxLen - $mark );
  290. }
  291. }