PageEditStash.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. <?php
  2. /**
  3. * Predictive edit preparation system for MediaWiki page.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. */
  22. namespace MediaWiki\Storage;
  23. use ActorMigration;
  24. use BagOStuff;
  25. use Content;
  26. use Hooks;
  27. use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
  28. use ParserOutput;
  29. use Psr\Log\LoggerInterface;
  30. use stdClass;
  31. use Title;
  32. use User;
  33. use Wikimedia\Rdbms\ILoadBalancer;
  34. use Wikimedia\ScopedCallback;
  35. use WikiPage;
  36. /**
  37. * Class for managing stashed edits used by the page updater classes
  38. *
  39. * @since 1.34
  40. */
  41. class PageEditStash {
  42. /** @var BagOStuff */
  43. private $cache;
  44. /** @var ILoadBalancer */
  45. private $lb;
  46. /** @var LoggerInterface */
  47. private $logger;
  48. /** @var StatsdDataFactoryInterface */
  49. private $stats;
  50. /** @var int */
  51. private $initiator;
  52. const ERROR_NONE = 'stashed';
  53. const ERROR_PARSE = 'error_parse';
  54. const ERROR_CACHE = 'error_cache';
  55. const ERROR_UNCACHEABLE = 'uncacheable';
  56. const ERROR_BUSY = 'busy';
  57. const PRESUME_FRESH_TTL_SEC = 30;
  58. const MAX_CACHE_TTL = 300; // 5 minutes
  59. const MAX_SIGNATURE_TTL = 60;
  60. const MAX_CACHE_RECENT = 2;
  61. const INITIATOR_USER = 1;
  62. const INITIATOR_JOB_OR_CLI = 2;
  63. /**
  64. * @param BagOStuff $cache
  65. * @param ILoadBalancer $lb
  66. * @param LoggerInterface $logger
  67. * @param StatsdDataFactoryInterface $stats
  68. * @param int $initiator Class INITIATOR__* constant
  69. */
  70. public function __construct(
  71. BagOStuff $cache,
  72. ILoadBalancer $lb,
  73. LoggerInterface $logger,
  74. StatsdDataFactoryInterface $stats,
  75. $initiator
  76. ) {
  77. $this->cache = $cache;
  78. $this->lb = $lb;
  79. $this->logger = $logger;
  80. $this->stats = $stats;
  81. $this->initiator = $initiator;
  82. }
  83. /**
  84. * @param WikiPage $page
  85. * @param Content $content Edit content
  86. * @param User $user
  87. * @param string $summary Edit summary
  88. * @return string Class ERROR_* constant
  89. */
  90. public function parseAndCache( WikiPage $page, Content $content, User $user, $summary ) {
  91. $logger = $this->logger;
  92. $title = $page->getTitle();
  93. $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user );
  94. $fname = __METHOD__;
  95. // Use the master DB to allow for fast blocking locks on the "save path" where this
  96. // value might actually be used to complete a page edit. If the edit submission request
  97. // happens before this edit stash requests finishes, then the submission will block until
  98. // the stash request finishes parsing. For the lock acquisition below, there is not much
  99. // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
  100. // blocking at all here.
  101. $dbw = $this->lb->getConnectionRef( DB_MASTER );
  102. if ( !$dbw->lock( $key, $fname, 0 ) ) {
  103. // De-duplicate requests on the same key
  104. return self::ERROR_BUSY;
  105. }
  106. /** @noinspection PhpUnusedLocalVariableInspection */
  107. $unlocker = new ScopedCallback( function () use ( $dbw, $key, $fname ) {
  108. $dbw->unlock( $key, $fname );
  109. } );
  110. $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
  111. // Reuse any freshly build matching edit stash cache
  112. $editInfo = $this->getStashValue( $key );
  113. if ( $editInfo && wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
  114. $alreadyCached = true;
  115. } else {
  116. $format = $content->getDefaultFormat();
  117. $editInfo = $page->prepareContentForEdit( $content, null, $user, $format, false );
  118. $editInfo->output->setCacheTime( $editInfo->timestamp );
  119. $alreadyCached = false;
  120. }
  121. $context = [ 'cachekey' => $key, 'title' => $title->getPrefixedText() ];
  122. if ( $editInfo && $editInfo->output ) {
  123. // Let extensions add ParserOutput metadata or warm other caches
  124. Hooks::run( 'ParserOutputStashForEdit',
  125. [ $page, $content, $editInfo->output, $summary, $user ] );
  126. if ( $alreadyCached ) {
  127. $logger->debug( "Parser output for key '{cachekey}' already cached.", $context );
  128. return self::ERROR_NONE;
  129. }
  130. $code = $this->storeStashValue(
  131. $key,
  132. $editInfo->pstContent,
  133. $editInfo->output,
  134. $editInfo->timestamp,
  135. $user
  136. );
  137. if ( $code === true ) {
  138. $logger->debug( "Cached parser output for key '{cachekey}'.", $context );
  139. return self::ERROR_NONE;
  140. } elseif ( $code === 'uncacheable' ) {
  141. $logger->info(
  142. "Uncacheable parser output for key '{cachekey}' [{code}].",
  143. $context + [ 'code' => $code ]
  144. );
  145. return self::ERROR_UNCACHEABLE;
  146. } else {
  147. $logger->error(
  148. "Failed to cache parser output for key '{cachekey}'.",
  149. $context + [ 'code' => $code ]
  150. );
  151. return self::ERROR_CACHE;
  152. }
  153. }
  154. return self::ERROR_PARSE;
  155. }
  156. /**
  157. * Check that a prepared edit is in cache and still up-to-date
  158. *
  159. * This method blocks if the prepared edit is already being rendered,
  160. * waiting until rendering finishes before doing final validity checks.
  161. *
  162. * The cache is rejected if template or file changes are detected.
  163. * Note that foreign template or file transclusions are not checked.
  164. *
  165. * This returns an object with the following fields:
  166. * - pstContent: the Content after pre-save-transform
  167. * - output: the ParserOutput instance
  168. * - timestamp: the timestamp of the parse
  169. * - edits: author edit count if they are logged in or NULL otherwise
  170. *
  171. * @param Title $title
  172. * @param Content $content
  173. * @param User $user User to get parser options from
  174. * @return stdClass|bool Returns edit stash object or false on cache miss
  175. */
  176. public function checkCache( Title $title, Content $content, User $user ) {
  177. if (
  178. // The context is not an HTTP POST request
  179. !$user->getRequest()->wasPosted() ||
  180. // The context is a CLI script or a job runner HTTP POST request
  181. $this->initiator !== self::INITIATOR_USER ||
  182. // The editor account is a known bot
  183. $user->isBot()
  184. ) {
  185. // Avoid wasted queries and statsd pollution
  186. return false;
  187. }
  188. $logger = $this->logger;
  189. $key = $this->getStashKey( $title, $this->getContentHash( $content ), $user );
  190. $context = [
  191. 'key' => $key,
  192. 'title' => $title->getPrefixedText(),
  193. 'user' => $user->getName()
  194. ];
  195. $editInfo = $this->getAndWaitForStashValue( $key );
  196. if ( !is_object( $editInfo ) || !$editInfo->output ) {
  197. $this->incrStatsByContent( 'cache_misses.no_stash', $content );
  198. if ( $this->recentStashEntryCount( $user ) > 0 ) {
  199. $logger->info( "Empty cache for key '{key}' but not for user.", $context );
  200. } else {
  201. $logger->debug( "Empty cache for key '{key}'.", $context );
  202. }
  203. return false;
  204. }
  205. $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
  206. $context['age'] = $age;
  207. $isCacheUsable = true;
  208. if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
  209. // Assume nothing changed in this time
  210. $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
  211. $logger->debug( "Timestamp-based cache hit for key '{key}'.", $context );
  212. } elseif ( $user->isAnon() ) {
  213. $lastEdit = $this->lastEditTime( $user );
  214. $cacheTime = $editInfo->output->getCacheTime();
  215. if ( $lastEdit < $cacheTime ) {
  216. // Logged-out user made no local upload/template edits in the meantime
  217. $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
  218. $logger->debug( "Edit check based cache hit for key '{key}'.", $context );
  219. } else {
  220. $isCacheUsable = false;
  221. $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
  222. $logger->info( "Stale cache for key '{key}' due to outside edits.", $context );
  223. }
  224. } else {
  225. if ( $editInfo->edits === $user->getEditCount() ) {
  226. // Logged-in user made no local upload/template edits in the meantime
  227. $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
  228. $logger->debug( "Edit count based cache hit for key '{key}'.", $context );
  229. } else {
  230. $isCacheUsable = false;
  231. $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
  232. $logger->info( "Stale cache for key '{key}'due to outside edits.", $context );
  233. }
  234. }
  235. if ( !$isCacheUsable ) {
  236. return false;
  237. }
  238. if ( $editInfo->output->getFlag( 'vary-revision' ) ) {
  239. // This can be used for the initial parse, e.g. for filters or doEditContent(),
  240. // but a second parse will be triggered in doEditUpdates() no matter what
  241. $logger->info(
  242. "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
  243. $context
  244. );
  245. } else {
  246. static $flagsMaybeReparse = [
  247. // Similar to the above if we didn't guess the ID correctly
  248. 'vary-revision-id',
  249. // Similar to the above if we didn't guess the timestamp correctly
  250. 'vary-revision-timestamp',
  251. // Similar to the above if we didn't guess the content correctly
  252. 'vary-revision-sha1',
  253. // Similar to the above if we didn't guess page ID correctly
  254. 'vary-page-id'
  255. ];
  256. foreach ( $flagsMaybeReparse as $flag ) {
  257. if ( $editInfo->output->getFlag( $flag ) ) {
  258. $logger->debug(
  259. "Cache for key '{key}' has $flag; post-insertion parse possible.",
  260. $context
  261. );
  262. }
  263. }
  264. }
  265. return $editInfo;
  266. }
  267. /**
  268. * @param string $subkey
  269. * @param Content $content
  270. */
  271. private function incrStatsByContent( $subkey, Content $content ) {
  272. $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
  273. $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
  274. }
  275. /**
  276. * @param string $key
  277. * @return bool|stdClass
  278. */
  279. private function getAndWaitForStashValue( $key ) {
  280. $editInfo = $this->getStashValue( $key );
  281. if ( !$editInfo ) {
  282. $start = microtime( true );
  283. // We ignore user aborts and keep parsing. Block on any prior parsing
  284. // so as to use its results and make use of the time spent parsing.
  285. // Skip this logic if there no master connection in case this method
  286. // is called on an HTTP GET request for some reason.
  287. $dbw = $this->lb->getAnyOpenConnection( $this->lb->getWriterIndex() );
  288. if ( $dbw && $dbw->lock( $key, __METHOD__, 30 ) ) {
  289. $editInfo = $this->getStashValue( $key );
  290. $dbw->unlock( $key, __METHOD__ );
  291. }
  292. $timeMs = 1000 * max( 0, microtime( true ) - $start );
  293. $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
  294. }
  295. return $editInfo;
  296. }
  297. /**
  298. * @param string $textHash
  299. * @return string|bool Text or false if missing
  300. */
  301. public function fetchInputText( $textHash ) {
  302. $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
  303. return $this->cache->get( $textKey );
  304. }
  305. /**
  306. * @param string $text
  307. * @param string $textHash
  308. * @return bool Success
  309. */
  310. public function stashInputText( $text, $textHash ) {
  311. $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
  312. return $this->cache->set(
  313. $textKey,
  314. $text,
  315. self::MAX_CACHE_TTL,
  316. BagOStuff::WRITE_ALLOW_SEGMENTS
  317. );
  318. }
  319. /**
  320. * @param User $user
  321. * @return string|null TS_MW timestamp or null
  322. */
  323. private function lastEditTime( User $user ) {
  324. $db = $this->lb->getConnectionRef( DB_REPLICA );
  325. $actorQuery = ActorMigration::newMigration()->getWhere( $db, 'rc_user', $user, false );
  326. $time = $db->selectField(
  327. [ 'recentchanges' ] + $actorQuery['tables'],
  328. 'MAX(rc_timestamp)',
  329. [ $actorQuery['conds'] ],
  330. __METHOD__,
  331. [],
  332. $actorQuery['joins']
  333. );
  334. return wfTimestampOrNull( TS_MW, $time );
  335. }
  336. /**
  337. * Get hash of the content, factoring in model/format
  338. *
  339. * @param Content $content
  340. * @return string
  341. */
  342. private function getContentHash( Content $content ) {
  343. return sha1( implode( "\n", [
  344. $content->getModel(),
  345. $content->getDefaultFormat(),
  346. $content->serialize( $content->getDefaultFormat() )
  347. ] ) );
  348. }
  349. /**
  350. * Get the temporary prepared edit stash key for a user
  351. *
  352. * This key can be used for caching prepared edits provided:
  353. * - a) The $user was used for PST options
  354. * - b) The parser output was made from the PST using cannonical matching options
  355. *
  356. * @param Title $title
  357. * @param string $contentHash Result of getContentHash()
  358. * @param User $user User to get parser options from
  359. * @return string
  360. */
  361. private function getStashKey( Title $title, $contentHash, User $user ) {
  362. return $this->cache->makeKey(
  363. 'stashedit-info-v1',
  364. md5( $title->getPrefixedDBkey() ),
  365. // Account for the edit model/text
  366. $contentHash,
  367. // Account for user name related variables like signatures
  368. md5( $user->getId() . "\n" . $user->getName() )
  369. );
  370. }
  371. /**
  372. * @param string $key
  373. * @return stdClass|bool Object map (pstContent,output,outputID,timestamp,edits) or false
  374. */
  375. private function getStashValue( $key ) {
  376. $stashInfo = $this->cache->get( $key );
  377. if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
  378. return $stashInfo;
  379. }
  380. return false;
  381. }
  382. /**
  383. * Build a value to store in memcached based on the PST content and parser output
  384. *
  385. * This makes a simple version of WikiPage::prepareContentForEdit() as stash info
  386. *
  387. * @param string $key
  388. * @param Content $pstContent Pre-Save transformed content
  389. * @param ParserOutput $parserOutput
  390. * @param string $timestamp TS_MW
  391. * @param User $user
  392. * @return string|bool True or an error code
  393. */
  394. private function storeStashValue(
  395. $key,
  396. Content $pstContent,
  397. ParserOutput $parserOutput,
  398. $timestamp,
  399. User $user
  400. ) {
  401. // If an item is renewed, mind the cache TTL determined by config and parser functions.
  402. // Put an upper limit on the TTL for sanity to avoid extreme template/file staleness.
  403. $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
  404. $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
  405. // Avoid extremely stale user signature timestamps (T84843)
  406. if ( $parserOutput->getFlag( 'user-signature' ) ) {
  407. $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
  408. }
  409. if ( $ttl <= 0 ) {
  410. return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
  411. }
  412. // Store what is actually needed and split the output into another key (T204742)
  413. $stashInfo = (object)[
  414. 'pstContent' => $pstContent,
  415. 'output' => $parserOutput,
  416. 'timestamp' => $timestamp,
  417. 'edits' => $user->getEditCount()
  418. ];
  419. $ok = $this->cache->set( $key, $stashInfo, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
  420. if ( $ok ) {
  421. // These blobs can waste slots in low cardinality memcached slabs
  422. $this->pruneExcessStashedEntries( $user, $key );
  423. }
  424. return $ok ? true : 'store_error';
  425. }
  426. /**
  427. * @param User $user
  428. * @param string $newKey
  429. */
  430. private function pruneExcessStashedEntries( User $user, $newKey ) {
  431. $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
  432. $keyList = $this->cache->get( $key ) ?: [];
  433. if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
  434. $oldestKey = array_shift( $keyList );
  435. $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
  436. }
  437. $keyList[] = $newKey;
  438. $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
  439. }
  440. /**
  441. * @param User $user
  442. * @return int
  443. */
  444. private function recentStashEntryCount( User $user ) {
  445. $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
  446. return count( $this->cache->get( $key ) ?: [] );
  447. }
  448. }