PoolCounterRedis.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. */
  20. use Psr\Log\LoggerInterface;
  21. /**
  22. * Version of PoolCounter that uses Redis
  23. *
  24. * There are four main redis keys used to track each pool counter key:
  25. * - poolcounter:l-slots-* : A list of available slot IDs for a pool.
  26. * - poolcounter:z-renewtime-* : A sorted set of (slot ID, UNIX timestamp as score)
  27. * used for tracking the next time a slot should be
  28. * released. This is -1 when a slot is created, and is
  29. * set when released (expired), locked, and unlocked.
  30. * - poolcounter:z-wait-* : A sorted set of (slot ID, UNIX timestamp as score)
  31. * used for tracking waiting processes (and wait time).
  32. * - poolcounter:l-wakeup-* : A list pushed to for the sake of waking up processes
  33. * when a any process in the pool finishes (lasts for 1ms).
  34. * For a given pool key, all the redis keys start off non-existing and are deleted if not
  35. * used for a while to prevent garbage from building up on the server. They are atomically
  36. * re-initialized as needed. The "z-renewtime" key is used for detecting sessions which got
  37. * slots but then disappeared. Stale entries from there have their timestamp updated and the
  38. * corresponding slots freed up. The "z-wait" key is used for detecting processes registered
  39. * as waiting but that disappeared. Stale entries from there are deleted and the corresponding
  40. * slots are freed up. The worker count is included in all the redis key names as it does not
  41. * vary within each $wgPoolCounterConf type and doing so handles configuration changes.
  42. *
  43. * This class requires Redis 2.6 as it makes use Lua scripts for fast atomic operations.
  44. * Also this should be on a server plenty of RAM for the working set to avoid evictions.
  45. * Evictions could temporarily allow wait queues to double in size or temporarily cause
  46. * pools to appear as full when they are not. Using volatile-ttl and bumping memory-samples
  47. * in redis.conf can be helpful otherwise.
  48. *
  49. * @ingroup Redis
  50. * @since 1.23
  51. */
  52. class PoolCounterRedis extends PoolCounter {
  53. /** @var HashRing */
  54. protected $ring;
  55. /** @var RedisConnectionPool */
  56. protected $pool;
  57. /** @var LoggerInterface */
  58. protected $logger;
  59. /** @var array (server label => host) map */
  60. protected $serversByLabel;
  61. /** @var string SHA-1 of the key */
  62. protected $keySha1;
  63. /** @var int TTL for locks to expire (work should finish in this time) */
  64. protected $lockTTL;
  65. /** @var RedisConnRef */
  66. protected $conn;
  67. /** @var string Pool slot value */
  68. protected $slot;
  69. /** @var int AWAKE_* constant */
  70. protected $onRelease;
  71. /** @var string Unique string to identify this process */
  72. protected $session;
  73. /** @var int UNIX timestamp */
  74. protected $slotTime;
  75. const AWAKE_ONE = 1; // wake-up if when a slot can be taken from an existing process
  76. const AWAKE_ALL = 2; // wake-up if an existing process finishes and wake up such others
  77. /** @var PoolCounterRedis[] List of active PoolCounterRedis objects in this script */
  78. protected static $active = null;
  79. function __construct( $conf, $type, $key ) {
  80. parent::__construct( $conf, $type, $key );
  81. $this->serversByLabel = $conf['servers'];
  82. $this->ring = new HashRing( array_fill_keys( array_keys( $conf['servers'] ), 100 ) );
  83. $conf['redisConfig']['serializer'] = 'none'; // for use with Lua
  84. $this->pool = RedisConnectionPool::singleton( $conf['redisConfig'] );
  85. $this->logger = \MediaWiki\Logger\LoggerFactory::getInstance( 'redis' );
  86. $this->keySha1 = sha1( $this->key );
  87. $met = ini_get( 'max_execution_time' ); // usually 0 in CLI mode
  88. $this->lockTTL = $met ? 2 * $met : 3600;
  89. if ( self::$active === null ) {
  90. self::$active = [];
  91. register_shutdown_function( [ __CLASS__, 'releaseAll' ] );
  92. }
  93. }
  94. /**
  95. * @return Status Uses RediConnRef as value on success
  96. */
  97. protected function getConnection() {
  98. if ( !isset( $this->conn ) ) {
  99. $conn = false;
  100. $servers = $this->ring->getLocations( $this->key, 3 );
  101. ArrayUtils::consistentHashSort( $servers, $this->key );
  102. foreach ( $servers as $server ) {
  103. $conn = $this->pool->getConnection( $this->serversByLabel[$server], $this->logger );
  104. if ( $conn ) {
  105. break;
  106. }
  107. }
  108. if ( !$conn ) {
  109. return Status::newFatal( 'pool-servererror', implode( ', ', $servers ) );
  110. }
  111. $this->conn = $conn;
  112. }
  113. return Status::newGood( $this->conn );
  114. }
  115. function acquireForMe() {
  116. $status = $this->precheckAcquire();
  117. if ( !$status->isGood() ) {
  118. return $status;
  119. }
  120. return $this->waitForSlotOrNotif( self::AWAKE_ONE );
  121. }
  122. function acquireForAnyone() {
  123. $status = $this->precheckAcquire();
  124. if ( !$status->isGood() ) {
  125. return $status;
  126. }
  127. return $this->waitForSlotOrNotif( self::AWAKE_ALL );
  128. }
  129. function release() {
  130. if ( $this->slot === null ) {
  131. return Status::newGood( PoolCounter::NOT_LOCKED ); // not locked
  132. }
  133. $status = $this->getConnection();
  134. if ( !$status->isOK() ) {
  135. return $status;
  136. }
  137. $conn = $status->value;
  138. // phpcs:disable Generic.Files.LineLength
  139. static $script =
  140. /** @lang Lua */
  141. <<<LUA
  142. local kSlots,kSlotsNextRelease,kWakeup,kWaiting = unpack(KEYS)
  143. local rMaxWorkers,rExpiry,rSlot,rSlotTime,rAwakeAll,rTime = unpack(ARGV)
  144. -- Add the slots back to the list (if rSlot is "w" then it is not a slot).
  145. -- Treat the list as expired if the "next release" time sorted-set is missing.
  146. if rSlot ~= 'w' and redis.call('exists',kSlotsNextRelease) == 1 then
  147. if 1*redis.call('zScore',kSlotsNextRelease,rSlot) ~= (rSlotTime + rExpiry) then
  148. -- Slot lock expired and was released already
  149. elseif redis.call('lLen',kSlots) >= 1*rMaxWorkers then
  150. -- Slots somehow got out of sync; reset the list for sanity
  151. redis.call('del',kSlots,kSlotsNextRelease)
  152. elseif redis.call('lLen',kSlots) == (1*rMaxWorkers - 1) and redis.call('zCard',kWaiting) == 0 then
  153. -- Slot list will be made full; clear it to save space (it re-inits as needed)
  154. -- since nothing is waiting on being unblocked by a push to the list
  155. redis.call('del',kSlots,kSlotsNextRelease)
  156. else
  157. -- Add slot back to pool and update the "next release" time
  158. redis.call('rPush',kSlots,rSlot)
  159. redis.call('zAdd',kSlotsNextRelease,rTime + 30,rSlot)
  160. -- Always keep renewing the expiry on use
  161. redis.call('expireAt',kSlots,math.ceil(rTime + rExpiry))
  162. redis.call('expireAt',kSlotsNextRelease,math.ceil(rTime + rExpiry))
  163. end
  164. end
  165. -- Update an ephemeral list to wake up other clients that can
  166. -- reuse any cached work from this process. Only do this if no
  167. -- slots are currently free (e.g. clients could be waiting).
  168. if 1*rAwakeAll == 1 then
  169. local count = redis.call('zCard',kWaiting)
  170. for i = 1,count do
  171. redis.call('rPush',kWakeup,'w')
  172. end
  173. redis.call('pexpire',kWakeup,1)
  174. end
  175. return 1
  176. LUA;
  177. // phpcs:enable
  178. try {
  179. $conn->luaEval( $script,
  180. [
  181. $this->getSlotListKey(),
  182. $this->getSlotRTimeSetKey(),
  183. $this->getWakeupListKey(),
  184. $this->getWaitSetKey(),
  185. $this->workers,
  186. $this->lockTTL,
  187. $this->slot,
  188. $this->slotTime, // used for CAS-style sanity check
  189. ( $this->onRelease === self::AWAKE_ALL ) ? 1 : 0,
  190. microtime( true )
  191. ],
  192. 4 # number of first argument(s) that are keys
  193. );
  194. } catch ( RedisException $e ) {
  195. return Status::newFatal( 'pool-error-unknown', $e->getMessage() );
  196. }
  197. $this->slot = null;
  198. $this->slotTime = null;
  199. $this->onRelease = null;
  200. unset( self::$active[$this->session] );
  201. $this->onRelease();
  202. return Status::newGood( PoolCounter::RELEASED );
  203. }
  204. /**
  205. * @param int $doWakeup AWAKE_* constant
  206. * @return Status
  207. */
  208. protected function waitForSlotOrNotif( $doWakeup ) {
  209. if ( $this->slot !== null ) {
  210. return Status::newGood( PoolCounter::LOCK_HELD ); // already acquired
  211. }
  212. $status = $this->getConnection();
  213. if ( !$status->isOK() ) {
  214. return $status;
  215. }
  216. $conn = $status->value;
  217. $now = microtime( true );
  218. try {
  219. $slot = $this->initAndPopPoolSlotList( $conn, $now );
  220. if ( ctype_digit( $slot ) ) {
  221. // Pool slot acquired by this process
  222. $slotTime = $now;
  223. } elseif ( $slot === 'QUEUE_FULL' ) {
  224. // Too many processes are waiting for pooled processes to finish
  225. return Status::newGood( PoolCounter::QUEUE_FULL );
  226. } elseif ( $slot === 'QUEUE_WAIT' ) {
  227. // This process is now registered as waiting
  228. $keys = ( $doWakeup == self::AWAKE_ALL )
  229. // Wait for an open slot or wake-up signal (preferring the latter)
  230. ? [ $this->getWakeupListKey(), $this->getSlotListKey() ]
  231. // Just wait for an actual pool slot
  232. : [ $this->getSlotListKey() ];
  233. $res = $conn->blPop( $keys, $this->timeout );
  234. if ( $res === [] ) {
  235. $conn->zRem( $this->getWaitSetKey(), $this->session ); // no longer waiting
  236. return Status::newGood( PoolCounter::TIMEOUT );
  237. }
  238. $slot = $res[1]; // pool slot or "w" for wake-up notifications
  239. $slotTime = microtime( true ); // last microtime() was a few RTTs ago
  240. // Unregister this process as waiting and bump slot "next release" time
  241. $this->registerAcquisitionTime( $conn, $slot, $slotTime );
  242. } else {
  243. return Status::newFatal( 'pool-error-unknown', "Server gave slot '$slot'." );
  244. }
  245. } catch ( RedisException $e ) {
  246. return Status::newFatal( 'pool-error-unknown', $e->getMessage() );
  247. }
  248. if ( $slot !== 'w' ) {
  249. $this->slot = $slot;
  250. $this->slotTime = $slotTime;
  251. $this->onRelease = $doWakeup;
  252. self::$active[$this->session] = $this;
  253. }
  254. $this->onAcquire();
  255. return Status::newGood( $slot === 'w' ? PoolCounter::DONE : PoolCounter::LOCKED );
  256. }
  257. /**
  258. * @param RedisConnRef $conn
  259. * @param float $now UNIX timestamp
  260. * @return string|bool False on failure
  261. */
  262. protected function initAndPopPoolSlotList( RedisConnRef $conn, $now ) {
  263. static $script =
  264. /** @lang Lua */
  265. <<<LUA
  266. local kSlots,kSlotsNextRelease,kSlotWaits = unpack(KEYS)
  267. local rMaxWorkers,rMaxQueue,rTimeout,rExpiry,rSess,rTime = unpack(ARGV)
  268. -- Initialize if the "next release" time sorted-set is empty. The slot key
  269. -- itself is empty if all slots are busy or when nothing is initialized.
  270. -- If the list is empty but the set is not, then it is the latter case.
  271. -- For sanity, if the list exists but not the set, then reset everything.
  272. if redis.call('exists',kSlotsNextRelease) == 0 then
  273. redis.call('del',kSlots)
  274. for i = 1,1*rMaxWorkers do
  275. redis.call('rPush',kSlots,i)
  276. redis.call('zAdd',kSlotsNextRelease,-1,i)
  277. end
  278. -- Otherwise do maintenance to clean up after network partitions
  279. else
  280. -- Find stale slot locks and add free them (avoid duplicates for sanity)
  281. local staleLocks = redis.call('zRangeByScore',kSlotsNextRelease,0,rTime)
  282. for k,slot in ipairs(staleLocks) do
  283. redis.call('lRem',kSlots,0,slot)
  284. redis.call('rPush',kSlots,slot)
  285. redis.call('zAdd',kSlotsNextRelease,rTime + 30,slot)
  286. end
  287. -- Find stale wait slot entries and remove them
  288. redis.call('zRemRangeByScore',kSlotWaits,0,rTime - 2*rTimeout)
  289. end
  290. local slot
  291. -- Try to acquire a slot if possible now
  292. if redis.call('lLen',kSlots) > 0 then
  293. slot = redis.call('lPop',kSlots)
  294. -- Update the slot "next release" time
  295. redis.call('zAdd',kSlotsNextRelease,rTime + rExpiry,slot)
  296. elseif redis.call('zCard',kSlotWaits) >= 1*rMaxQueue then
  297. slot = 'QUEUE_FULL'
  298. else
  299. slot = 'QUEUE_WAIT'
  300. -- Register this process as waiting
  301. redis.call('zAdd',kSlotWaits,rTime,rSess)
  302. redis.call('expireAt',kSlotWaits,math.ceil(rTime + 2*rTimeout))
  303. end
  304. -- Always keep renewing the expiry on use
  305. redis.call('expireAt',kSlots,math.ceil(rTime + rExpiry))
  306. redis.call('expireAt',kSlotsNextRelease,math.ceil(rTime + rExpiry))
  307. return slot
  308. LUA;
  309. return $conn->luaEval( $script,
  310. [
  311. $this->getSlotListKey(),
  312. $this->getSlotRTimeSetKey(),
  313. $this->getWaitSetKey(),
  314. $this->workers,
  315. $this->maxqueue,
  316. $this->timeout,
  317. $this->lockTTL,
  318. $this->session,
  319. $now
  320. ],
  321. 3 # number of first argument(s) that are keys
  322. );
  323. }
  324. /**
  325. * @param RedisConnRef $conn
  326. * @param string $slot
  327. * @param float $now
  328. * @return int|bool False on failure
  329. */
  330. protected function registerAcquisitionTime( RedisConnRef $conn, $slot, $now ) {
  331. static $script =
  332. /** @lang Lua */
  333. <<<LUA
  334. local kSlots,kSlotsNextRelease,kSlotWaits = unpack(KEYS)
  335. local rSlot,rExpiry,rSess,rTime = unpack(ARGV)
  336. -- If rSlot is 'w' then the client was told to wake up but got no slot
  337. if rSlot ~= 'w' then
  338. -- Update the slot "next release" time
  339. redis.call('zAdd',kSlotsNextRelease,rTime + rExpiry,rSlot)
  340. -- Always keep renewing the expiry on use
  341. redis.call('expireAt',kSlots,math.ceil(rTime + rExpiry))
  342. redis.call('expireAt',kSlotsNextRelease,math.ceil(rTime + rExpiry))
  343. end
  344. -- Unregister this process as waiting
  345. redis.call('zRem',kSlotWaits,rSess)
  346. return 1
  347. LUA;
  348. return $conn->luaEval( $script,
  349. [
  350. $this->getSlotListKey(),
  351. $this->getSlotRTimeSetKey(),
  352. $this->getWaitSetKey(),
  353. $slot,
  354. $this->lockTTL,
  355. $this->session,
  356. $now
  357. ],
  358. 3 # number of first argument(s) that are keys
  359. );
  360. }
  361. /**
  362. * @return string
  363. */
  364. protected function getSlotListKey() {
  365. return "poolcounter:l-slots-{$this->keySha1}-{$this->workers}";
  366. }
  367. /**
  368. * @return string
  369. */
  370. protected function getSlotRTimeSetKey() {
  371. return "poolcounter:z-renewtime-{$this->keySha1}-{$this->workers}";
  372. }
  373. /**
  374. * @return string
  375. */
  376. protected function getWaitSetKey() {
  377. return "poolcounter:z-wait-{$this->keySha1}-{$this->workers}";
  378. }
  379. /**
  380. * @return string
  381. */
  382. protected function getWakeupListKey() {
  383. return "poolcounter:l-wakeup-{$this->keySha1}-{$this->workers}";
  384. }
  385. /**
  386. * Try to make sure that locks get released (even with exceptions and fatals)
  387. */
  388. public static function releaseAll() {
  389. foreach ( self::$active as $poolCounter ) {
  390. try {
  391. if ( $poolCounter->slot !== null ) {
  392. $poolCounter->release();
  393. }
  394. } catch ( Exception $e ) {
  395. }
  396. }
  397. }
  398. }