ApiOpenSearch.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. <?php
  2. /**
  3. * Copyright © 2006 Yuri Astrakhan "<Firstname><Lastname>@gmail.com"
  4. * Copyright © 2008 Brion Vibber <brion@wikimedia.org>
  5. * Copyright © 2014 Wikimedia Foundation and contributors
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20. * http://www.gnu.org/copyleft/gpl.html
  21. *
  22. * @file
  23. */
  24. use MediaWiki\MediaWikiServices;
  25. /**
  26. * @ingroup API
  27. */
  28. class ApiOpenSearch extends ApiBase {
  29. use SearchApi;
  30. private $format = null;
  31. private $fm = null;
  32. /** @var array list of api allowed params */
  33. private $allowedParams = null;
  34. /**
  35. * Get the output format
  36. *
  37. * @return string
  38. */
  39. protected function getFormat() {
  40. if ( $this->format === null ) {
  41. $params = $this->extractRequestParams();
  42. $format = $params['format'];
  43. $allowedParams = $this->getAllowedParams();
  44. if ( !in_array( $format, $allowedParams['format'][ApiBase::PARAM_TYPE] ) ) {
  45. $format = $allowedParams['format'][ApiBase::PARAM_DFLT];
  46. }
  47. if ( substr( $format, -2 ) === 'fm' ) {
  48. $this->format = substr( $format, 0, -2 );
  49. $this->fm = 'fm';
  50. } else {
  51. $this->format = $format;
  52. $this->fm = '';
  53. }
  54. }
  55. return $this->format;
  56. }
  57. public function getCustomPrinter() {
  58. switch ( $this->getFormat() ) {
  59. case 'json':
  60. return new ApiOpenSearchFormatJson(
  61. $this->getMain(), $this->fm, $this->getParameter( 'warningsaserror' )
  62. );
  63. case 'xml':
  64. $printer = $this->getMain()->createPrinterByName( 'xml' . $this->fm );
  65. '@phan-var ApiFormatXML $printer';
  66. $printer->setRootElement( 'SearchSuggestion' );
  67. return $printer;
  68. default:
  69. ApiBase::dieDebug( __METHOD__, "Unsupported format '{$this->getFormat()}'" );
  70. }
  71. }
  72. public function execute() {
  73. $params = $this->extractRequestParams();
  74. $search = $params['search'];
  75. $suggest = $params['suggest'];
  76. $results = [];
  77. if ( !$suggest || $this->getConfig()->get( 'EnableOpenSearchSuggest' ) ) {
  78. // Open search results may be stored for a very long time
  79. $this->getMain()->setCacheMaxAge( $this->getConfig()->get( 'SearchSuggestCacheExpiry' ) );
  80. $this->getMain()->setCacheMode( 'public' );
  81. $results = $this->search( $search, $params );
  82. // Allow hooks to populate extracts and images
  83. Hooks::run( 'ApiOpenSearchSuggest', [ &$results ] );
  84. // Trim extracts, if necessary
  85. $length = $this->getConfig()->get( 'OpenSearchDescriptionLength' );
  86. foreach ( $results as &$r ) {
  87. // @phan-suppress-next-line PhanTypeInvalidDimOffset
  88. if ( is_string( $r['extract'] ) && !$r['extract trimmed'] ) {
  89. $r['extract'] = self::trimExtract( $r['extract'], $length );
  90. }
  91. }
  92. }
  93. // Populate result object
  94. $this->populateResult( $search, $results );
  95. }
  96. /**
  97. * Perform the search
  98. * @param string $search the search query
  99. * @param array $params api request params
  100. * @return array search results. Keys are integers.
  101. * @phan-return array<array{title:Title,redirect_from:?Title,extract:false,extract_trimmed:false,image:false,url:string}>
  102. * Note that phan annotations don't support keys containing a space.
  103. */
  104. private function search( $search, array $params ) {
  105. $searchEngine = $this->buildSearchEngine( $params );
  106. $titles = $searchEngine->extractTitles( $searchEngine->completionSearchWithVariants( $search ) );
  107. $results = [];
  108. if ( !$titles ) {
  109. return $results;
  110. }
  111. // Special pages need unique integer ids in the return list, so we just
  112. // assign them negative numbers because those won't clash with the
  113. // always positive articleIds that non-special pages get.
  114. $nextSpecialPageId = -1;
  115. if ( $params['redirects'] === null ) {
  116. // Backwards compatibility, don't resolve for JSON.
  117. $resolveRedir = $this->getFormat() !== 'json';
  118. } else {
  119. $resolveRedir = $params['redirects'] === 'resolve';
  120. }
  121. if ( $resolveRedir ) {
  122. // Query for redirects
  123. $redirects = [];
  124. $lb = new LinkBatch( $titles );
  125. if ( !$lb->isEmpty() ) {
  126. $db = $this->getDB();
  127. $res = $db->select(
  128. [ 'page', 'redirect' ],
  129. [ 'page_namespace', 'page_title', 'rd_namespace', 'rd_title' ],
  130. [
  131. 'rd_from = page_id',
  132. 'rd_interwiki IS NULL OR rd_interwiki = ' . $db->addQuotes( '' ),
  133. $lb->constructSet( 'page', $db ),
  134. ],
  135. __METHOD__
  136. );
  137. foreach ( $res as $row ) {
  138. $redirects[$row->page_namespace][$row->page_title] =
  139. [ $row->rd_namespace, $row->rd_title ];
  140. }
  141. }
  142. // Bypass any redirects
  143. $seen = [];
  144. foreach ( $titles as $title ) {
  145. $ns = $title->getNamespace();
  146. $dbkey = $title->getDBkey();
  147. $from = null;
  148. if ( isset( $redirects[$ns][$dbkey] ) ) {
  149. list( $ns, $dbkey ) = $redirects[$ns][$dbkey];
  150. $from = $title;
  151. $title = Title::makeTitle( $ns, $dbkey );
  152. }
  153. if ( !isset( $seen[$ns][$dbkey] ) ) {
  154. $seen[$ns][$dbkey] = true;
  155. $resultId = $title->getArticleID();
  156. if ( $resultId === 0 ) {
  157. $resultId = $nextSpecialPageId;
  158. $nextSpecialPageId -= 1;
  159. }
  160. $results[$resultId] = [
  161. 'title' => $title,
  162. 'redirect from' => $from,
  163. 'extract' => false,
  164. 'extract trimmed' => false,
  165. 'image' => false,
  166. 'url' => wfExpandUrl( $title->getFullURL(), PROTO_CURRENT ),
  167. ];
  168. }
  169. }
  170. } else {
  171. foreach ( $titles as $title ) {
  172. $resultId = $title->getArticleID();
  173. if ( $resultId === 0 ) {
  174. $resultId = $nextSpecialPageId;
  175. $nextSpecialPageId -= 1;
  176. }
  177. $results[$resultId] = [
  178. 'title' => $title,
  179. 'redirect from' => null,
  180. 'extract' => false,
  181. 'extract trimmed' => false,
  182. 'image' => false,
  183. 'url' => wfExpandUrl( $title->getFullURL(), PROTO_CURRENT ),
  184. ];
  185. }
  186. }
  187. return $results;
  188. }
  189. /**
  190. * @param string $search
  191. * @param array &$results
  192. */
  193. protected function populateResult( $search, &$results ) {
  194. $result = $this->getResult();
  195. switch ( $this->getFormat() ) {
  196. case 'json':
  197. // http://www.opensearch.org/Specifications/OpenSearch/Extensions/Suggestions/1.1
  198. $result->addArrayType( null, 'array' );
  199. $result->addValue( null, 0, strval( $search ) );
  200. $terms = [];
  201. $descriptions = [];
  202. $urls = [];
  203. foreach ( $results as $r ) {
  204. $terms[] = $r['title']->getPrefixedText();
  205. $descriptions[] = strval( $r['extract'] );
  206. $urls[] = $r['url'];
  207. }
  208. $result->addValue( null, 1, $terms );
  209. $result->addValue( null, 2, $descriptions );
  210. $result->addValue( null, 3, $urls );
  211. break;
  212. case 'xml':
  213. // https://msdn.microsoft.com/en-us/library/cc891508(v=vs.85).aspx
  214. $imageKeys = [
  215. 'source' => true,
  216. 'alt' => true,
  217. 'width' => true,
  218. 'height' => true,
  219. 'align' => true,
  220. ];
  221. $items = [];
  222. foreach ( $results as $r ) {
  223. $item = [
  224. 'Text' => $r['title']->getPrefixedText(),
  225. 'Url' => $r['url'],
  226. ];
  227. if ( is_string( $r['extract'] ) && $r['extract'] !== '' ) {
  228. $item['Description'] = $r['extract'];
  229. }
  230. // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
  231. if ( is_array( $r['image'] ) && isset( $r['image']['source'] ) ) {
  232. $item['Image'] = array_intersect_key( $r['image'], $imageKeys );
  233. }
  234. ApiResult::setSubelementsList( $item, array_keys( $item ) );
  235. $items[] = $item;
  236. }
  237. ApiResult::setIndexedTagName( $items, 'Item' );
  238. $result->addValue( null, 'version', '2.0' );
  239. $result->addValue( null, 'xmlns', 'http://opensearch.org/searchsuggest2' );
  240. $result->addValue( null, 'Query', strval( $search ) );
  241. $result->addSubelementsList( null, 'Query' );
  242. $result->addValue( null, 'Section', $items );
  243. break;
  244. default:
  245. ApiBase::dieDebug( __METHOD__, "Unsupported format '{$this->getFormat()}'" );
  246. }
  247. }
  248. public function getAllowedParams() {
  249. if ( $this->allowedParams !== null ) {
  250. return $this->allowedParams;
  251. }
  252. $this->allowedParams = $this->buildCommonApiParams( false ) + [
  253. 'suggest' => false,
  254. 'redirects' => [
  255. ApiBase::PARAM_TYPE => [ 'return', 'resolve' ],
  256. ],
  257. 'format' => [
  258. ApiBase::PARAM_DFLT => 'json',
  259. ApiBase::PARAM_TYPE => [ 'json', 'jsonfm', 'xml', 'xmlfm' ],
  260. ],
  261. 'warningsaserror' => false,
  262. ];
  263. // Use open search specific default limit
  264. $this->allowedParams['limit'][ApiBase::PARAM_DFLT] = $this->getConfig()->get(
  265. 'OpenSearchDefaultLimit'
  266. );
  267. return $this->allowedParams;
  268. }
  269. public function getSearchProfileParams() {
  270. return [
  271. 'profile' => [
  272. 'profile-type' => SearchEngine::COMPLETION_PROFILE_TYPE,
  273. 'help-message' => 'apihelp-query+prefixsearch-param-profile'
  274. ],
  275. ];
  276. }
  277. protected function getExamplesMessages() {
  278. return [
  279. 'action=opensearch&search=Te'
  280. => 'apihelp-opensearch-example-te',
  281. ];
  282. }
  283. public function getHelpUrls() {
  284. return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Opensearch';
  285. }
  286. /**
  287. * Trim an extract to a sensible length.
  288. *
  289. * Adapted from Extension:OpenSearchXml, which adapted it from
  290. * Extension:ActiveAbstract.
  291. *
  292. * @param string $text
  293. * @param int $length Target length; actual result will continue to the end of a sentence.
  294. * @return string
  295. */
  296. public static function trimExtract( $text, $length ) {
  297. static $regex = null;
  298. if ( $regex === null ) {
  299. $endchars = [
  300. '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII
  301. '。', // full-width ideographic full-stop
  302. '.', '!', '?', // double-width roman forms
  303. '。', // half-width ideographic full stop
  304. ];
  305. $endgroup = implode( '|', $endchars );
  306. $end = "(?:$endgroup)";
  307. $sentence = ".{{$length},}?$end+";
  308. $regex = "/^($sentence)/u";
  309. }
  310. $matches = [];
  311. if ( preg_match( $regex, $text, $matches ) ) {
  312. return trim( $matches[1] );
  313. } else {
  314. // Just return the first line
  315. return trim( explode( "\n", $text )[0] );
  316. }
  317. }
  318. /**
  319. * Fetch the template for a type.
  320. *
  321. * @param string $type MIME type
  322. * @return string
  323. * @throws MWException
  324. */
  325. public static function getOpenSearchTemplate( $type ) {
  326. $config = MediaWikiServices::getInstance()->getSearchEngineConfig();
  327. $template = $config->getConfig()->get( 'OpenSearchTemplate' );
  328. if ( $template && $type === 'application/x-suggestions+json' ) {
  329. return $template;
  330. }
  331. $ns = implode( '|', $config->defaultNamespaces() );
  332. if ( !$ns ) {
  333. $ns = '0';
  334. }
  335. switch ( $type ) {
  336. case 'application/x-suggestions+json':
  337. return $config->getConfig()->get( 'CanonicalServer' ) . wfScript( 'api' )
  338. . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
  339. case 'application/x-suggestions+xml':
  340. return $config->getConfig()->get( 'CanonicalServer' ) . wfScript( 'api' )
  341. . '?action=opensearch&format=xml&search={searchTerms}&namespace=' . $ns;
  342. default:
  343. throw new MWException( __METHOD__ . ": Unknown type '$type'" );
  344. }
  345. }
  346. }