CryptRand.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. <?php
  2. /**
  3. * A cryptographic random generator class used for generating secret keys
  4. *
  5. * This is based in part on Drupal code as well as what we used in our own code
  6. * prior to introduction of this class.
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * This program is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License along
  19. * with this program; if not, write to the Free Software Foundation, Inc.,
  20. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  21. * http://www.gnu.org/copyleft/gpl.html
  22. *
  23. * @author Daniel Friesen
  24. * @file
  25. */
  26. use Psr\Log\LoggerInterface;
  27. class CryptRand {
  28. /**
  29. * Minimum number of iterations we want to make in our drift calculations.
  30. */
  31. const MIN_ITERATIONS = 1000;
  32. /**
  33. * Number of milliseconds we want to spend generating each separate byte
  34. * of the final generated bytes.
  35. * This is used in combination with the hash length to determine the duration
  36. * we should spend doing drift calculations.
  37. */
  38. const MSEC_PER_BYTE = 0.5;
  39. /**
  40. * A boolean indicating whether the previous random generation was done using
  41. * cryptographically strong random number generator or not.
  42. */
  43. protected $strong = null;
  44. /**
  45. * List of functions to call to generate some random state
  46. *
  47. * @var callable[]
  48. */
  49. protected $randomFuncs = [];
  50. /**
  51. * List of files to generate some random state from
  52. *
  53. * @var string[]
  54. */
  55. protected $randomFiles = [];
  56. /**
  57. * @var LoggerInterface
  58. */
  59. protected $logger;
  60. public function __construct( array $randomFuncs, array $randomFiles, LoggerInterface $logger ) {
  61. $this->randomFuncs = $randomFuncs;
  62. $this->randomFiles = $randomFiles;
  63. $this->logger = $logger;
  64. }
  65. /**
  66. * Initialize an initial random state based off of whatever we can find
  67. * @return string
  68. */
  69. protected function initialRandomState() {
  70. // $_SERVER contains a variety of unstable user and system specific information
  71. // It'll vary a little with each page, and vary even more with separate users
  72. // It'll also vary slightly across different machines
  73. $state = serialize( $_SERVER );
  74. // Try to gather a little entropy from the different php rand sources
  75. $state .= rand() . uniqid( mt_rand(), true );
  76. // Include some information about the filesystem's current state in the random state
  77. $files = $this->randomFiles;
  78. // We know this file is here so grab some info about ourselves
  79. $files[] = __FILE__;
  80. // We must also have a parent folder, and with the usual file structure, a grandparent
  81. $files[] = __DIR__;
  82. $files[] = dirname( __DIR__ );
  83. foreach ( $files as $file ) {
  84. Wikimedia\suppressWarnings();
  85. $stat = stat( $file );
  86. Wikimedia\restoreWarnings();
  87. if ( $stat ) {
  88. // stat() duplicates data into numeric and string keys so kill off all the numeric ones
  89. foreach ( $stat as $k => $v ) {
  90. if ( is_numeric( $k ) ) {
  91. unset( $k );
  92. }
  93. }
  94. // The absolute filename itself will differ from install to install so don't leave it out
  95. $path = realpath( $file );
  96. if ( $path !== false ) {
  97. $state .= $path;
  98. } else {
  99. $state .= $file;
  100. }
  101. $state .= implode( '', $stat );
  102. } else {
  103. // The fact that the file isn't there is worth at least a
  104. // minuscule amount of entropy.
  105. $state .= '0';
  106. }
  107. }
  108. // Try and make this a little more unstable by including the varying process
  109. // id of the php process we are running inside of if we are able to access it
  110. if ( function_exists( 'getmypid' ) ) {
  111. $state .= getmypid();
  112. }
  113. // If available try to increase the instability of the data by throwing in
  114. // the precise amount of memory that we happen to be using at the moment.
  115. if ( function_exists( 'memory_get_usage' ) ) {
  116. $state .= memory_get_usage( true );
  117. }
  118. foreach ( $this->randomFuncs as $randomFunc ) {
  119. $state .= call_user_func( $randomFunc );
  120. }
  121. return $state;
  122. }
  123. /**
  124. * Randomly hash data while mixing in clock drift data for randomness
  125. *
  126. * @param string $data The data to randomly hash.
  127. * @return string The hashed bytes
  128. * @author Tim Starling
  129. */
  130. protected function driftHash( $data ) {
  131. // Minimum number of iterations (to avoid slow operations causing the
  132. // loop to gather little entropy)
  133. $minIterations = self::MIN_ITERATIONS;
  134. // Duration of time to spend doing calculations (in seconds)
  135. $duration = ( self::MSEC_PER_BYTE / 1000 ) * MWCryptHash::hashLength();
  136. // Create a buffer to use to trigger memory operations
  137. $bufLength = 10000000;
  138. $buffer = str_repeat( ' ', $bufLength );
  139. $bufPos = 0;
  140. // Iterate for $duration seconds or at least $minIterations number of iterations
  141. $iterations = 0;
  142. $startTime = microtime( true );
  143. $currentTime = $startTime;
  144. while ( $iterations < $minIterations || $currentTime - $startTime < $duration ) {
  145. // Trigger some memory writing to trigger some bus activity
  146. // This may create variance in the time between iterations
  147. $bufPos = ( $bufPos + 13 ) % $bufLength;
  148. $buffer[$bufPos] = ' ';
  149. // Add the drift between this iteration and the last in as entropy
  150. $nextTime = microtime( true );
  151. $delta = (int)( ( $nextTime - $currentTime ) * 1000000 );
  152. $data .= $delta;
  153. // Every 100 iterations hash the data and entropy
  154. if ( $iterations % 100 === 0 ) {
  155. $data = sha1( $data );
  156. }
  157. $currentTime = $nextTime;
  158. $iterations++;
  159. }
  160. $timeTaken = $currentTime - $startTime;
  161. $data = MWCryptHash::hash( $data );
  162. $this->logger->debug( "Clock drift calculation " .
  163. "(time-taken=" . ( $timeTaken * 1000 ) . "ms, " .
  164. "iterations=$iterations, " .
  165. "time-per-iteration=" . ( $timeTaken / $iterations * 1e6 ) . "us)" );
  166. return $data;
  167. }
  168. /**
  169. * Return a rolling random state initially build using data from unstable sources
  170. * @return string A new weak random state
  171. */
  172. protected function randomState() {
  173. static $state = null;
  174. if ( is_null( $state ) ) {
  175. // Initialize the state with whatever unstable data we can find
  176. // It's important that this data is hashed right afterwards to prevent
  177. // it from being leaked into the output stream
  178. $state = MWCryptHash::hash( $this->initialRandomState() );
  179. }
  180. // Generate a new random state based on the initial random state or previous
  181. // random state by combining it with clock drift
  182. $state = $this->driftHash( $state );
  183. return $state;
  184. }
  185. /**
  186. * Return a boolean indicating whether or not the source used for cryptographic
  187. * random bytes generation in the previously run generate* call
  188. * was cryptographically strong.
  189. *
  190. * @return bool Returns true if the source was strong, false if not.
  191. */
  192. public function wasStrong() {
  193. if ( is_null( $this->strong ) ) {
  194. throw new RuntimeException( __METHOD__ . ' called before generation of random data' );
  195. }
  196. return $this->strong;
  197. }
  198. /**
  199. * Generate a run of (ideally) cryptographically random data and return
  200. * it in raw binary form.
  201. * You can use CryptRand::wasStrong() if you wish to know if the source used
  202. * was cryptographically strong.
  203. *
  204. * @param int $bytes The number of bytes of random data to generate
  205. * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
  206. * strong sources of entropy even if reading from them may steal
  207. * more entropy from the system than optimal.
  208. * @return string Raw binary random data
  209. */
  210. public function generate( $bytes, $forceStrong = false ) {
  211. $bytes = floor( $bytes );
  212. static $buffer = '';
  213. if ( is_null( $this->strong ) ) {
  214. // Set strength to false initially until we know what source data is coming from
  215. $this->strong = true;
  216. }
  217. if ( strlen( $buffer ) < $bytes ) {
  218. // If available make use of PHP 7's random_bytes
  219. // On Linux, getrandom syscall will be used if available.
  220. // On Windows CryptGenRandom will always be used
  221. // On other platforms, /dev/urandom will be used.
  222. // Avoids polyfills from before php 7.0
  223. // All error situations will throw Exceptions and or Errors
  224. if ( PHP_VERSION_ID >= 70000
  225. || ( defined( 'HHVM_VERSION_ID' ) && HHVM_VERSION_ID >= 31101 )
  226. ) {
  227. $rem = $bytes - strlen( $buffer );
  228. $buffer .= random_bytes( $rem );
  229. }
  230. if ( strlen( $buffer ) >= $bytes ) {
  231. $this->strong = true;
  232. }
  233. }
  234. if ( strlen( $buffer ) < $bytes && function_exists( 'mcrypt_create_iv' ) ) {
  235. // If available make use of mcrypt_create_iv URANDOM source to generate randomness
  236. // On unix-like systems this reads from /dev/urandom but does it without any buffering
  237. // and bypasses openbasedir restrictions, so it's preferable to reading directly
  238. // On Windows starting in PHP 5.3.0 Windows' native CryptGenRandom is used to generate
  239. // entropy so this is also preferable to just trying to read urandom because it may work
  240. // on Windows systems as well.
  241. $rem = $bytes - strlen( $buffer );
  242. $iv = mcrypt_create_iv( $rem, MCRYPT_DEV_URANDOM );
  243. if ( $iv === false ) {
  244. $this->logger->debug( "mcrypt_create_iv returned false." );
  245. } else {
  246. $buffer .= $iv;
  247. $this->logger->debug( "mcrypt_create_iv generated " . strlen( $iv ) .
  248. " bytes of randomness." );
  249. }
  250. }
  251. if ( strlen( $buffer ) < $bytes && function_exists( 'openssl_random_pseudo_bytes' ) ) {
  252. $rem = $bytes - strlen( $buffer );
  253. $openssl_strong = false;
  254. $openssl_bytes = openssl_random_pseudo_bytes( $rem, $openssl_strong );
  255. if ( $openssl_bytes === false ) {
  256. $this->logger->debug( "openssl_random_pseudo_bytes returned false." );
  257. } else {
  258. $buffer .= $openssl_bytes;
  259. $this->logger->debug( "openssl_random_pseudo_bytes generated " .
  260. strlen( $openssl_bytes ) . " bytes of " .
  261. ( $openssl_strong ? "strong" : "weak" ) . " randomness." );
  262. }
  263. if ( strlen( $buffer ) >= $bytes ) {
  264. // openssl tells us if the random source was strong, if some of our data was generated
  265. // using it use it's say on whether the randomness is strong
  266. $this->strong = !!$openssl_strong;
  267. }
  268. }
  269. // Only read from urandom if we can control the buffer size or were passed forceStrong
  270. if ( strlen( $buffer ) < $bytes &&
  271. ( function_exists( 'stream_set_read_buffer' ) || $forceStrong )
  272. ) {
  273. $rem = $bytes - strlen( $buffer );
  274. if ( !function_exists( 'stream_set_read_buffer' ) && $forceStrong ) {
  275. $this->logger->debug( "Was forced to read from /dev/urandom " .
  276. "without control over the buffer size." );
  277. }
  278. // /dev/urandom is generally considered the best possible commonly
  279. // available random source, and is available on most *nix systems.
  280. Wikimedia\suppressWarnings();
  281. $urandom = fopen( "/dev/urandom", "rb" );
  282. Wikimedia\restoreWarnings();
  283. // Attempt to read all our random data from urandom
  284. // php's fread always does buffered reads based on the stream's chunk_size
  285. // so in reality it will usually read more than the amount of data we're
  286. // asked for and not storing that risks depleting the system's random pool.
  287. // If stream_set_read_buffer is available set the chunk_size to the amount
  288. // of data we need. Otherwise read 8k, php's default chunk_size.
  289. if ( $urandom ) {
  290. // php's default chunk_size is 8k
  291. $chunk_size = 1024 * 8;
  292. if ( function_exists( 'stream_set_read_buffer' ) ) {
  293. // If possible set the chunk_size to the amount of data we need
  294. stream_set_read_buffer( $urandom, $rem );
  295. $chunk_size = $rem;
  296. }
  297. $random_bytes = fread( $urandom, max( $chunk_size, $rem ) );
  298. $buffer .= $random_bytes;
  299. fclose( $urandom );
  300. $this->logger->debug( "/dev/urandom generated " . strlen( $random_bytes ) .
  301. " bytes of randomness." );
  302. if ( strlen( $buffer ) >= $bytes ) {
  303. // urandom is always strong, set to true if all our data was generated using it
  304. $this->strong = true;
  305. }
  306. } else {
  307. $this->logger->debug( "/dev/urandom could not be opened." );
  308. }
  309. }
  310. // If we cannot use or generate enough data from a secure source
  311. // use this loop to generate a good set of pseudo random data.
  312. // This works by initializing a random state using a pile of unstable data
  313. // and continually shoving it through a hash along with a variable salt.
  314. // We hash the random state with more salt to avoid the state from leaking
  315. // out and being used to predict the /randomness/ that follows.
  316. if ( strlen( $buffer ) < $bytes ) {
  317. $this->logger->debug( __METHOD__ .
  318. ": Falling back to using a pseudo random state to generate randomness." );
  319. }
  320. while ( strlen( $buffer ) < $bytes ) {
  321. $buffer .= MWCryptHash::hmac( $this->randomState(), strval( mt_rand() ) );
  322. // This code is never really cryptographically strong, if we use it
  323. // at all, then set strong to false.
  324. $this->strong = false;
  325. }
  326. // Once the buffer has been filled up with enough random data to fulfill
  327. // the request shift off enough data to handle the request and leave the
  328. // unused portion left inside the buffer for the next request for random data
  329. $generated = substr( $buffer, 0, $bytes );
  330. $buffer = substr( $buffer, $bytes );
  331. $this->logger->debug( strlen( $buffer ) .
  332. " bytes of randomness leftover in the buffer." );
  333. return $generated;
  334. }
  335. /**
  336. * Generate a run of (ideally) cryptographically random data and return
  337. * it in hexadecimal string format.
  338. * You can use CryptRand::wasStrong() if you wish to know if the source used
  339. * was cryptographically strong.
  340. *
  341. * @param int $chars The number of hex chars of random data to generate
  342. * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
  343. * strong sources of entropy even if reading from them may steal
  344. * more entropy from the system than optimal.
  345. * @return string Hexadecimal random data
  346. */
  347. public function generateHex( $chars, $forceStrong = false ) {
  348. // hex strings are 2x the length of raw binary so we divide the length in half
  349. // odd numbers will result in a .5 that leads the generate() being 1 character
  350. // short, so we use ceil() to ensure that we always have enough bytes
  351. $bytes = ceil( $chars / 2 );
  352. // Generate the data and then convert it to a hex string
  353. $hex = bin2hex( $this->generate( $bytes, $forceStrong ) );
  354. // A bit of paranoia here, the caller asked for a specific length of string
  355. // here, and it's possible (eg when given an odd number) that we may actually
  356. // have at least 1 char more than they asked for. Just in case they made this
  357. // call intending to insert it into a database that does truncation we don't
  358. // want to give them too much and end up with their database and their live
  359. // code having two different values because part of what we gave them is truncated
  360. // hence, we strip out any run of characters longer than what we were asked for.
  361. return substr( $hex, 0, $chars );
  362. }
  363. }