PathRouter.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. <?php
  2. /**
  3. * Parser to extract query parameters out of REQUEST_URI paths.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. /**
  23. * PathRouter class.
  24. * This class can take patterns such as /wiki/$1 and use them to
  25. * parse query parameters out of REQUEST_URI paths.
  26. *
  27. * $router->add( "/wiki/$1" );
  28. * - Matches /wiki/Foo style urls and extracts the title
  29. * $router->add( [ 'edit' => "/edit/$key" ], [ 'action' => '$key' ] );
  30. * - Matches /edit/Foo style urls and sets action=edit
  31. * $router->add( '/$2/$1',
  32. * [ 'variant' => '$2' ],
  33. * [ '$2' => [ 'zh-hant', 'zh-hans' ] ]
  34. * );
  35. * - Matches /zh-hant/Foo or /zh-hans/Foo
  36. * $router->addStrict( "/foo/Bar", [ 'title' => 'Baz' ] );
  37. * - Matches /foo/Bar explicitly and uses "Baz" as the title
  38. * $router->add( '/help/$1', [ 'title' => 'Help:$1' ] );
  39. * - Matches /help/Foo with "Help:Foo" as the title
  40. * $router->add( '/$1', [ 'foo' => [ 'value' => 'bar$2' ] ] );
  41. * - Matches /Foo and sets 'foo' to 'bar$2' without $2 being replaced
  42. * $router->add( '/$1', [ 'data:foo' => 'bar' ], [ 'callback' => 'functionname' ] );
  43. * - Matches /Foo, adds the key 'foo' with the value 'bar' to the data array
  44. * and calls functionname( &$matches, $data );
  45. *
  46. * Path patterns:
  47. * - Paths may contain $# patterns such as $1, $2, etc...
  48. * - $1 will match 0 or more while the rest will match 1 or more
  49. * - Unless you use addStrict "/wiki" and "/wiki/" will be expanded to "/wiki/$1"
  50. *
  51. * Params:
  52. * - In a pattern $1, $2, etc... will be replaced with the relevant contents
  53. * - If you used a keyed array as a path pattern, $key will be replaced with
  54. * the relevant contents
  55. * - The default behavior is equivalent to `array( 'title' => '$1' )`,
  56. * if you don't want the title parameter you can explicitly use `array( 'title' => false )`
  57. * - You can specify a value that won't have replacements in it
  58. * using `'foo' => [ 'value' => 'bar' ];`
  59. *
  60. * Options:
  61. * - The option keys $1, $2, etc... can be specified to restrict the possible values
  62. * of that variable. A string can be used for a single value, or an array for multiple.
  63. * - When the option key 'strict' is set (Using addStrict is simpler than doing this directly)
  64. * the path won't have $1 implicitly added to it.
  65. * - The option key 'callback' can specify a callback that will be run when a path is matched.
  66. * The callback will have the arguments ( &$matches, $data ) and the matches array can
  67. * be modified.
  68. *
  69. * @since 1.19
  70. * @author Daniel Friesen
  71. */
  72. class PathRouter {
  73. /**
  74. * @var array
  75. */
  76. private $patterns = [];
  77. /**
  78. * Protected helper to do the actual bulk work of adding a single pattern.
  79. * This is in a separate method so that add() can handle the difference between
  80. * a single string $path and an array() $path that contains multiple path
  81. * patterns each with an associated $key to pass on.
  82. * @param string $path
  83. * @param array $params
  84. * @param array $options
  85. * @param null|string $key
  86. */
  87. protected function doAdd( $path, $params, $options, $key = null ) {
  88. // Make sure all paths start with a /
  89. if ( $path[0] !== '/' ) {
  90. $path = '/' . $path;
  91. }
  92. if ( !isset( $options['strict'] ) || !$options['strict'] ) {
  93. // Unless this is a strict path make sure that the path has a $1
  94. if ( strpos( $path, '$1' ) === false ) {
  95. if ( substr( $path, -1 ) !== '/' ) {
  96. $path .= '/';
  97. }
  98. $path .= '$1';
  99. }
  100. }
  101. // If 'title' is not specified and our path pattern contains a $1
  102. // Add a default 'title' => '$1' rule to the parameters.
  103. if ( !isset( $params['title'] ) && strpos( $path, '$1' ) !== false ) {
  104. $params['title'] = '$1';
  105. }
  106. // If the user explicitly marked 'title' as false then omit it from the matches
  107. if ( isset( $params['title'] ) && $params['title'] === false ) {
  108. unset( $params['title'] );
  109. }
  110. // Loop over our parameters and convert basic key => string
  111. // patterns into fully descriptive array form
  112. foreach ( $params as $paramName => $paramData ) {
  113. if ( is_string( $paramData ) ) {
  114. if ( preg_match( '/\$(\d+|key)/u', $paramData ) ) {
  115. $paramArrKey = 'pattern';
  116. } else {
  117. // If there's no replacement use a value instead
  118. // of a pattern for a little more efficiency
  119. $paramArrKey = 'value';
  120. }
  121. $params[$paramName] = [
  122. $paramArrKey => $paramData
  123. ];
  124. }
  125. }
  126. // Loop over our options and convert any single value $# restrictions
  127. // into an array so we only have to do in_array tests.
  128. foreach ( $options as $optionName => $optionData ) {
  129. if ( preg_match( '/^\$\d+$/u', $optionName ) ) {
  130. if ( !is_array( $optionData ) ) {
  131. $options[$optionName] = [ $optionData ];
  132. }
  133. }
  134. }
  135. $pattern = (object)[
  136. 'path' => $path,
  137. 'params' => $params,
  138. 'options' => $options,
  139. 'key' => $key,
  140. ];
  141. $pattern->weight = self::makeWeight( $pattern );
  142. $this->patterns[] = $pattern;
  143. }
  144. /**
  145. * Add a new path pattern to the path router
  146. *
  147. * @param string|array $path The path pattern to add
  148. * @param array $params The params for this path pattern
  149. * @param array $options The options for this path pattern
  150. */
  151. public function add( $path, $params = [], $options = [] ) {
  152. if ( is_array( $path ) ) {
  153. foreach ( $path as $key => $onePath ) {
  154. $this->doAdd( $onePath, $params, $options, $key );
  155. }
  156. } else {
  157. $this->doAdd( $path, $params, $options );
  158. }
  159. }
  160. /**
  161. * Add a new path pattern to the path router with the strict option on
  162. * @see self::add
  163. * @param string|array $path
  164. * @param array $params
  165. * @param array $options
  166. */
  167. public function addStrict( $path, $params = [], $options = [] ) {
  168. $options['strict'] = true;
  169. $this->add( $path, $params, $options );
  170. }
  171. /**
  172. * Protected helper to re-sort our patterns so that the most specific
  173. * (most heavily weighted) patterns are at the start of the array.
  174. */
  175. protected function sortByWeight() {
  176. $weights = [];
  177. foreach ( $this->patterns as $key => $pattern ) {
  178. $weights[$key] = $pattern->weight;
  179. }
  180. array_multisort( $weights, SORT_DESC, SORT_NUMERIC, $this->patterns );
  181. }
  182. /**
  183. * @param object $pattern
  184. * @return float|int
  185. */
  186. protected static function makeWeight( $pattern ) {
  187. # Start with a weight of 0
  188. $weight = 0;
  189. // Explode the path to work with
  190. $path = explode( '/', $pattern->path );
  191. # For each level of the path
  192. foreach ( $path as $piece ) {
  193. if ( preg_match( '/^\$(\d+|key)$/u', $piece ) ) {
  194. # For a piece that is only a $1 variable add 1 points of weight
  195. $weight += 1;
  196. } elseif ( preg_match( '/\$(\d+|key)/u', $piece ) ) {
  197. # For a piece that simply contains a $1 variable add 2 points of weight
  198. $weight += 2;
  199. } else {
  200. # For a solid piece add a full 3 points of weight
  201. $weight += 3;
  202. }
  203. }
  204. foreach ( $pattern->options as $key => $option ) {
  205. if ( preg_match( '/^\$\d+$/u', $key ) ) {
  206. # Add 0.5 for restrictions to values
  207. # This way given two separate "/$2/$1" patterns the
  208. # one with a limited set of $2 values will dominate
  209. # the one that'll match more loosely
  210. $weight += 0.5;
  211. }
  212. }
  213. return $weight;
  214. }
  215. /**
  216. * Parse a path and return the query matches for the path
  217. *
  218. * @param string $path The path to parse
  219. * @return array The array of matches for the path
  220. */
  221. public function parse( $path ) {
  222. // Make sure our patterns are sorted by weight so the most specific
  223. // matches are tested first
  224. $this->sortByWeight();
  225. $matches = null;
  226. foreach ( $this->patterns as $pattern ) {
  227. $matches = self::extractTitle( $path, $pattern );
  228. if ( !is_null( $matches ) ) {
  229. break;
  230. }
  231. }
  232. // We know the difference between null (no matches) and
  233. // array() (a match with no data) but our WebRequest caller
  234. // expects array() even when we have no matches so return
  235. // a array() when we have null
  236. return is_null( $matches ) ? [] : $matches;
  237. }
  238. /**
  239. * @param string $path
  240. * @param string $pattern
  241. * @return array|null
  242. */
  243. protected static function extractTitle( $path, $pattern ) {
  244. // Convert the path pattern into a regexp we can match with
  245. $regexp = preg_quote( $pattern->path, '#' );
  246. // .* for the $1
  247. $regexp = preg_replace( '#\\\\\$1#u', '(?P<par1>.*)', $regexp );
  248. // .+ for the rest of the parameter numbers
  249. $regexp = preg_replace( '#\\\\\$(\d+)#u', '(?P<par$1>.+?)', $regexp );
  250. $regexp = "#^{$regexp}$#";
  251. $matches = [];
  252. $data = [];
  253. // Try to match the path we were asked to parse with our regexp
  254. if ( preg_match( $regexp, $path, $m ) ) {
  255. // Ensure that any $# restriction we have set in our {$option}s
  256. // matches properly here.
  257. foreach ( $pattern->options as $key => $option ) {
  258. if ( preg_match( '/^\$\d+$/u', $key ) ) {
  259. $n = intval( substr( $key, 1 ) );
  260. $value = rawurldecode( $m["par{$n}"] );
  261. if ( !in_array( $value, $option ) ) {
  262. // If any restriction does not match return null
  263. // to signify that this rule did not match.
  264. return null;
  265. }
  266. }
  267. }
  268. // Give our $data array a copy of every $# that was matched
  269. foreach ( $m as $matchKey => $matchValue ) {
  270. if ( preg_match( '/^par\d+$/u', $matchKey ) ) {
  271. $n = intval( substr( $matchKey, 3 ) );
  272. $data['$' . $n] = rawurldecode( $matchValue );
  273. }
  274. }
  275. // If present give our $data array a $key as well
  276. if ( isset( $pattern->key ) ) {
  277. $data['$key'] = $pattern->key;
  278. }
  279. // Go through our parameters for this match and add data to our matches and data arrays
  280. foreach ( $pattern->params as $paramName => $paramData ) {
  281. $value = null;
  282. // Differentiate data: from normal parameters and keep the correct
  283. // array key around (ie: foo for data:foo)
  284. if ( preg_match( '/^data:/u', $paramName ) ) {
  285. $isData = true;
  286. $key = substr( $paramName, 5 );
  287. } else {
  288. $isData = false;
  289. $key = $paramName;
  290. }
  291. if ( isset( $paramData['value'] ) ) {
  292. // For basic values just set the raw data as the value
  293. $value = $paramData['value'];
  294. } elseif ( isset( $paramData['pattern'] ) ) {
  295. // For patterns we have to make value replacements on the string
  296. $value = $paramData['pattern'];
  297. $replacer = new PathRouterPatternReplacer;
  298. $replacer->params = $m;
  299. if ( isset( $pattern->key ) ) {
  300. $replacer->key = $pattern->key;
  301. }
  302. $value = $replacer->replace( $value );
  303. if ( $value === false ) {
  304. // Pattern required data that wasn't available, abort
  305. return null;
  306. }
  307. }
  308. // Send things that start with data: to $data, the rest to $matches
  309. if ( $isData ) {
  310. $data[$key] = $value;
  311. } else {
  312. $matches[$key] = $value;
  313. }
  314. }
  315. // If this match includes a callback, execute it
  316. if ( isset( $pattern->options['callback'] ) ) {
  317. call_user_func_array( $pattern->options['callback'], [ &$matches, $data ] );
  318. }
  319. } else {
  320. // Our regexp didn't match, return null to signify no match.
  321. return null;
  322. }
  323. // Fall through, everything went ok, return our matches array
  324. return $matches;
  325. }
  326. }
  327. class PathRouterPatternReplacer {
  328. public $key, $params, $error;
  329. /**
  330. * Replace keys inside path router patterns with text.
  331. * We do this inside of a replacement callback because after replacement we can't tell the
  332. * difference between a $1 that was not replaced and a $1 that was part of
  333. * the content a $1 was replaced with.
  334. * @param string $value
  335. * @return string|false
  336. */
  337. public function replace( $value ) {
  338. $this->error = false;
  339. $value = preg_replace_callback( '/\$(\d+|key)/u', [ $this, 'callback' ], $value );
  340. if ( $this->error ) {
  341. return false;
  342. }
  343. return $value;
  344. }
  345. /**
  346. * @param array $m
  347. * @return string
  348. */
  349. protected function callback( $m ) {
  350. if ( $m[1] == "key" ) {
  351. if ( is_null( $this->key ) ) {
  352. $this->error = true;
  353. return '';
  354. }
  355. return $this->key;
  356. } else {
  357. $d = $m[1];
  358. if ( !isset( $this->params["par$d"] ) ) {
  359. $this->error = true;
  360. return '';
  361. }
  362. return rawurldecode( $this->params["par$d"] );
  363. }
  364. }
  365. }