kern_tc.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648
  1. /* $OpenBSD: kern_tc.c,v 1.28 2014/12/10 02:44:47 tedu Exp $ */
  2. /*
  3. * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org>
  4. *
  5. * Permission to use, copy, modify, and distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. /*
  18. * If we meet some day, and you think this stuff is worth it, you
  19. * can buy me a beer in return. Poul-Henning Kamp
  20. */
  21. #include <sys/param.h>
  22. #include <sys/kernel.h>
  23. #include <sys/timeout.h>
  24. #include <sys/sysctl.h>
  25. #include <sys/syslog.h>
  26. #include <sys/systm.h>
  27. #include <sys/timetc.h>
  28. #include <sys/malloc.h>
  29. #include <dev/rndvar.h>
  30. /*
  31. * A large step happens on boot. This constant detects such steps.
  32. * It is relatively small so that ntp_update_second gets called enough
  33. * in the typical 'missed a couple of seconds' case, but doesn't loop
  34. * forever when the time step is large.
  35. */
  36. #define LARGE_STEP 200
  37. u_int dummy_get_timecount(struct timecounter *);
  38. void ntp_update_second(int64_t *, time_t *);
  39. int sysctl_tc_hardware(void *, size_t *, void *, size_t);
  40. int sysctl_tc_choice(void *, size_t *, void *, size_t);
  41. /*
  42. * Implement a dummy timecounter which we can use until we get a real one
  43. * in the air. This allows the console and other early stuff to use
  44. * time services.
  45. */
  46. u_int
  47. dummy_get_timecount(struct timecounter *tc)
  48. {
  49. static u_int now;
  50. return (++now);
  51. }
  52. static struct timecounter dummy_timecounter = {
  53. dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
  54. };
  55. struct timehands {
  56. /* These fields must be initialized by the driver. */
  57. struct timecounter *th_counter;
  58. int64_t th_adjustment;
  59. u_int64_t th_scale;
  60. u_int th_offset_count;
  61. struct bintime th_offset;
  62. struct timeval th_microtime;
  63. struct timespec th_nanotime;
  64. /* Fields not to be copied in tc_windup start with th_generation. */
  65. volatile u_int th_generation;
  66. struct timehands *th_next;
  67. };
  68. static struct timehands th0;
  69. static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
  70. static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
  71. static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
  72. static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
  73. static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
  74. static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
  75. static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
  76. static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
  77. static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
  78. static struct timehands th0 = {
  79. &dummy_timecounter,
  80. 0,
  81. (uint64_t)-1 / 1000000,
  82. 0,
  83. {1, 0},
  84. {0, 0},
  85. {0, 0},
  86. 1,
  87. &th1
  88. };
  89. static struct timehands *volatile timehands = &th0;
  90. struct timecounter *timecounter = &dummy_timecounter;
  91. static struct timecounter *timecounters = &dummy_timecounter;
  92. volatile time_t time_second = 1;
  93. volatile time_t time_uptime = 0;
  94. struct bintime naptime;
  95. static struct bintime boottimebin;
  96. static int timestepwarnings;
  97. void tc_windup(void);
  98. /*
  99. * Return the difference between the timehands' counter value now and what
  100. * was when we copied it to the timehands' offset_count.
  101. */
  102. static __inline u_int
  103. tc_delta(struct timehands *th)
  104. {
  105. struct timecounter *tc;
  106. tc = th->th_counter;
  107. return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
  108. tc->tc_counter_mask);
  109. }
  110. /*
  111. * Functions for reading the time. We have to loop until we are sure that
  112. * the timehands that we operated on was not updated under our feet. See
  113. * the comment in <sys/time.h> for a description of these 12 functions.
  114. */
  115. void
  116. binuptime(struct bintime *bt)
  117. {
  118. struct timehands *th;
  119. u_int gen;
  120. do {
  121. th = timehands;
  122. gen = th->th_generation;
  123. *bt = th->th_offset;
  124. bintime_addx(bt, th->th_scale * tc_delta(th));
  125. } while (gen == 0 || gen != th->th_generation);
  126. }
  127. void
  128. nanouptime(struct timespec *tsp)
  129. {
  130. struct bintime bt;
  131. binuptime(&bt);
  132. bintime2timespec(&bt, tsp);
  133. }
  134. void
  135. microuptime(struct timeval *tvp)
  136. {
  137. struct bintime bt;
  138. binuptime(&bt);
  139. bintime2timeval(&bt, tvp);
  140. }
  141. void
  142. bintime(struct bintime *bt)
  143. {
  144. binuptime(bt);
  145. bintime_add(bt, &boottimebin);
  146. }
  147. void
  148. nanotime(struct timespec *tsp)
  149. {
  150. struct bintime bt;
  151. bintime(&bt);
  152. bintime2timespec(&bt, tsp);
  153. }
  154. void
  155. microtime(struct timeval *tvp)
  156. {
  157. struct bintime bt;
  158. bintime(&bt);
  159. bintime2timeval(&bt, tvp);
  160. }
  161. void
  162. getnanouptime(struct timespec *tsp)
  163. {
  164. struct timehands *th;
  165. u_int gen;
  166. do {
  167. th = timehands;
  168. gen = th->th_generation;
  169. bintime2timespec(&th->th_offset, tsp);
  170. } while (gen == 0 || gen != th->th_generation);
  171. }
  172. void
  173. getmicrouptime(struct timeval *tvp)
  174. {
  175. struct timehands *th;
  176. u_int gen;
  177. do {
  178. th = timehands;
  179. gen = th->th_generation;
  180. bintime2timeval(&th->th_offset, tvp);
  181. } while (gen == 0 || gen != th->th_generation);
  182. }
  183. void
  184. getnanotime(struct timespec *tsp)
  185. {
  186. struct timehands *th;
  187. u_int gen;
  188. do {
  189. th = timehands;
  190. gen = th->th_generation;
  191. *tsp = th->th_nanotime;
  192. } while (gen == 0 || gen != th->th_generation);
  193. }
  194. void
  195. getmicrotime(struct timeval *tvp)
  196. {
  197. struct timehands *th;
  198. u_int gen;
  199. do {
  200. th = timehands;
  201. gen = th->th_generation;
  202. *tvp = th->th_microtime;
  203. } while (gen == 0 || gen != th->th_generation);
  204. }
  205. /*
  206. * Initialize a new timecounter and possibly use it.
  207. */
  208. void
  209. tc_init(struct timecounter *tc)
  210. {
  211. u_int u;
  212. u = tc->tc_frequency / tc->tc_counter_mask;
  213. /* XXX: We need some margin here, 10% is a guess */
  214. u *= 11;
  215. u /= 10;
  216. if (tc->tc_quality >= 0) {
  217. if (u > hz) {
  218. tc->tc_quality = -2000;
  219. printf("Timecounter \"%s\" frequency %lu Hz",
  220. tc->tc_name, (unsigned long)tc->tc_frequency);
  221. printf(" -- Insufficient hz, needs at least %u\n", u);
  222. }
  223. }
  224. tc->tc_next = timecounters;
  225. timecounters = tc;
  226. /*
  227. * Never automatically use a timecounter with negative quality.
  228. * Even though we run on the dummy counter, switching here may be
  229. * worse since this timecounter may not be monotonic.
  230. */
  231. if (tc->tc_quality < 0)
  232. return;
  233. if (tc->tc_quality < timecounter->tc_quality)
  234. return;
  235. if (tc->tc_quality == timecounter->tc_quality &&
  236. tc->tc_frequency < timecounter->tc_frequency)
  237. return;
  238. (void)tc->tc_get_timecount(tc);
  239. add_timer_randomness(tc->tc_get_timecount(tc));
  240. timecounter = tc;
  241. }
  242. /* Report the frequency of the current timecounter. */
  243. u_int64_t
  244. tc_getfrequency(void)
  245. {
  246. return (timehands->th_counter->tc_frequency);
  247. }
  248. /*
  249. * Step our concept of UTC, aka the realtime clock.
  250. * This is done by modifying our estimate of when we booted.
  251. * XXX: not locked.
  252. */
  253. void
  254. tc_setrealtimeclock(struct timespec *ts)
  255. {
  256. struct timespec ts2;
  257. struct bintime bt, bt2;
  258. binuptime(&bt2);
  259. timespec2bintime(ts, &bt);
  260. bintime_sub(&bt, &bt2);
  261. bintime_add(&bt2, &boottimebin);
  262. boottimebin = bt;
  263. bintime2timespec(&bt, &boottime);
  264. add_timer_randomness(ts->tv_sec);
  265. /* XXX fiddle all the little crinkly bits around the fiords... */
  266. tc_windup();
  267. if (timestepwarnings) {
  268. bintime2timespec(&bt2, &ts2);
  269. log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n",
  270. (long long)ts2.tv_sec, ts2.tv_nsec,
  271. (long long)ts->tv_sec, ts->tv_nsec);
  272. }
  273. }
  274. /*
  275. * Step the monotonic and realtime clocks, triggering any timeouts that
  276. * should have occurred across the interval.
  277. * XXX: not locked.
  278. */
  279. void
  280. tc_setclock(struct timespec *ts)
  281. {
  282. struct bintime bt, bt2;
  283. #ifndef SMALL_KERNEL
  284. long long adj_ticks;
  285. #endif
  286. /*
  287. * When we're called for the first time, during boot when
  288. * the root partition is mounted, boottime is still zero:
  289. * we just need to set it.
  290. */
  291. if (boottimebin.sec == 0) {
  292. tc_setrealtimeclock(ts);
  293. return;
  294. }
  295. add_timer_randomness(ts->tv_sec);
  296. timespec2bintime(ts, &bt);
  297. bintime_sub(&bt, &boottimebin);
  298. bt2 = timehands->th_offset;
  299. timehands->th_offset = bt;
  300. /* XXX fiddle all the little crinkly bits around the fiords... */
  301. tc_windup();
  302. #ifndef SMALL_KERNEL
  303. /* convert the bintime to ticks */
  304. bintime_sub(&bt, &bt2);
  305. bintime_add(&naptime, &bt);
  306. adj_ticks = (long long)hz * bt.sec +
  307. (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick;
  308. if (adj_ticks > 0) {
  309. if (adj_ticks > INT_MAX)
  310. adj_ticks = INT_MAX;
  311. timeout_adjust_ticks(adj_ticks);
  312. }
  313. #endif
  314. }
  315. /*
  316. * Initialize the next struct timehands in the ring and make
  317. * it the active timehands. Along the way we might switch to a different
  318. * timecounter and/or do seconds processing in NTP. Slightly magic.
  319. */
  320. void
  321. tc_windup(void)
  322. {
  323. struct bintime bt;
  324. struct timehands *th, *tho;
  325. u_int64_t scale;
  326. u_int delta, ncount, ogen;
  327. int i;
  328. #ifdef leapsecs
  329. time_t t;
  330. #endif
  331. /*
  332. * Make the next timehands a copy of the current one, but do not
  333. * overwrite the generation or next pointer. While we update
  334. * the contents, the generation must be zero.
  335. */
  336. tho = timehands;
  337. th = tho->th_next;
  338. ogen = th->th_generation;
  339. th->th_generation = 0;
  340. memcpy(th, tho, offsetof(struct timehands, th_generation));
  341. /*
  342. * Capture a timecounter delta on the current timecounter and if
  343. * changing timecounters, a counter value from the new timecounter.
  344. * Update the offset fields accordingly.
  345. */
  346. delta = tc_delta(th);
  347. if (th->th_counter != timecounter)
  348. ncount = timecounter->tc_get_timecount(timecounter);
  349. else
  350. ncount = 0;
  351. th->th_offset_count += delta;
  352. th->th_offset_count &= th->th_counter->tc_counter_mask;
  353. bintime_addx(&th->th_offset, th->th_scale * delta);
  354. #ifdef notyet
  355. /*
  356. * Hardware latching timecounters may not generate interrupts on
  357. * PPS events, so instead we poll them. There is a finite risk that
  358. * the hardware might capture a count which is later than the one we
  359. * got above, and therefore possibly in the next NTP second which might
  360. * have a different rate than the current NTP second. It doesn't
  361. * matter in practice.
  362. */
  363. if (tho->th_counter->tc_poll_pps)
  364. tho->th_counter->tc_poll_pps(tho->th_counter);
  365. #endif
  366. /*
  367. * Deal with NTP second processing. The for loop normally
  368. * iterates at most once, but in extreme situations it might
  369. * keep NTP sane if timeouts are not run for several seconds.
  370. * At boot, the time step can be large when the TOD hardware
  371. * has been read, so on really large steps, we call
  372. * ntp_update_second only twice. We need to call it twice in
  373. * case we missed a leap second.
  374. */
  375. bt = th->th_offset;
  376. bintime_add(&bt, &boottimebin);
  377. i = bt.sec - tho->th_microtime.tv_sec;
  378. if (i > LARGE_STEP)
  379. i = 2;
  380. for (; i > 0; i--)
  381. ntp_update_second(&th->th_adjustment, &bt.sec);
  382. /* Update the UTC timestamps used by the get*() functions. */
  383. /* XXX shouldn't do this here. Should force non-`get' versions. */
  384. bintime2timeval(&bt, &th->th_microtime);
  385. bintime2timespec(&bt, &th->th_nanotime);
  386. /* Now is a good time to change timecounters. */
  387. if (th->th_counter != timecounter) {
  388. th->th_counter = timecounter;
  389. th->th_offset_count = ncount;
  390. }
  391. /*-
  392. * Recalculate the scaling factor. We want the number of 1/2^64
  393. * fractions of a second per period of the hardware counter, taking
  394. * into account the th_adjustment factor which the NTP PLL/adjtime(2)
  395. * processing provides us with.
  396. *
  397. * The th_adjustment is nanoseconds per second with 32 bit binary
  398. * fraction and we want 64 bit binary fraction of second:
  399. *
  400. * x = a * 2^32 / 10^9 = a * 4.294967296
  401. *
  402. * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
  403. * we can only multiply by about 850 without overflowing, but that
  404. * leaves suitably precise fractions for multiply before divide.
  405. *
  406. * Divide before multiply with a fraction of 2199/512 results in a
  407. * systematic undercompensation of 10PPM of th_adjustment. On a
  408. * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
  409. *
  410. * We happily sacrifice the lowest of the 64 bits of our result
  411. * to the goddess of code clarity.
  412. *
  413. */
  414. scale = (u_int64_t)1 << 63;
  415. scale += (th->th_adjustment / 1024) * 2199;
  416. scale /= th->th_counter->tc_frequency;
  417. th->th_scale = scale * 2;
  418. /*
  419. * Now that the struct timehands is again consistent, set the new
  420. * generation number, making sure to not make it zero.
  421. */
  422. if (++ogen == 0)
  423. ogen = 1;
  424. th->th_generation = ogen;
  425. /* Go live with the new struct timehands. */
  426. time_second = th->th_microtime.tv_sec;
  427. time_uptime = th->th_offset.sec;
  428. timehands = th;
  429. }
  430. /* Report or change the active timecounter hardware. */
  431. int
  432. sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
  433. {
  434. char newname[32];
  435. struct timecounter *newtc, *tc;
  436. int error;
  437. tc = timecounter;
  438. strlcpy(newname, tc->tc_name, sizeof(newname));
  439. error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname));
  440. if (error != 0 || strcmp(newname, tc->tc_name) == 0)
  441. return (error);
  442. for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
  443. if (strcmp(newname, newtc->tc_name) != 0)
  444. continue;
  445. /* Warm up new timecounter. */
  446. (void)newtc->tc_get_timecount(newtc);
  447. (void)newtc->tc_get_timecount(newtc);
  448. timecounter = newtc;
  449. return (0);
  450. }
  451. return (EINVAL);
  452. }
  453. /* Report or change the active timecounter hardware. */
  454. int
  455. sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
  456. {
  457. char buf[32], *spc, *choices;
  458. struct timecounter *tc;
  459. int error, maxlen;
  460. spc = "";
  461. maxlen = 0;
  462. for (tc = timecounters; tc != NULL; tc = tc->tc_next)
  463. maxlen += sizeof(buf);
  464. choices = malloc(maxlen, M_TEMP, M_WAITOK);
  465. *choices = '\0';
  466. for (tc = timecounters; tc != NULL; tc = tc->tc_next) {
  467. snprintf(buf, sizeof(buf), "%s%s(%d)",
  468. spc, tc->tc_name, tc->tc_quality);
  469. spc = " ";
  470. strlcat(choices, buf, maxlen);
  471. }
  472. error = sysctl_rdstring(oldp, oldlenp, newp, choices);
  473. free(choices, M_TEMP, maxlen);
  474. return (error);
  475. }
  476. /*
  477. * Timecounters need to be updated every so often to prevent the hardware
  478. * counter from overflowing. Updating also recalculates the cached values
  479. * used by the get*() family of functions, so their precision depends on
  480. * the update frequency.
  481. */
  482. static int tc_tick;
  483. void
  484. tc_ticktock(void)
  485. {
  486. static int count;
  487. if (++count < tc_tick)
  488. return;
  489. count = 0;
  490. tc_windup();
  491. }
  492. void
  493. inittimecounter(void)
  494. {
  495. #ifdef DEBUG
  496. u_int p;
  497. #endif
  498. /*
  499. * Set the initial timeout to
  500. * max(1, <approx. number of hardclock ticks in a millisecond>).
  501. * People should probably not use the sysctl to set the timeout
  502. * to smaller than its initial value, since that value is the
  503. * smallest reasonable one. If they want better timestamps they
  504. * should use the non-"get"* functions.
  505. */
  506. if (hz > 1000)
  507. tc_tick = (hz + 500) / 1000;
  508. else
  509. tc_tick = 1;
  510. #ifdef DEBUG
  511. p = (tc_tick * 1000000) / hz;
  512. printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
  513. #endif
  514. /* warm up new timecounter (again) and get rolling. */
  515. (void)timecounter->tc_get_timecount(timecounter);
  516. (void)timecounter->tc_get_timecount(timecounter);
  517. }
  518. /*
  519. * Return timecounter-related information.
  520. */
  521. int
  522. sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp,
  523. void *newp, size_t newlen)
  524. {
  525. if (namelen != 1)
  526. return (ENOTDIR);
  527. switch (name[0]) {
  528. case KERN_TIMECOUNTER_TICK:
  529. return (sysctl_rdint(oldp, oldlenp, newp, tc_tick));
  530. case KERN_TIMECOUNTER_TIMESTEPWARNINGS:
  531. return (sysctl_int(oldp, oldlenp, newp, newlen,
  532. &timestepwarnings));
  533. case KERN_TIMECOUNTER_HARDWARE:
  534. return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen));
  535. case KERN_TIMECOUNTER_CHOICE:
  536. return (sysctl_tc_choice(oldp, oldlenp, newp, newlen));
  537. default:
  538. return (EOPNOTSUPP);
  539. }
  540. /* NOTREACHED */
  541. }
  542. void
  543. ntp_update_second(int64_t *adjust, time_t *sec)
  544. {
  545. int64_t adj;
  546. /* Skew time according to any adjtime(2) adjustments. */
  547. if (adjtimedelta > 0)
  548. adj = MIN(5000, adjtimedelta);
  549. else
  550. adj = MAX(-5000, adjtimedelta);
  551. adjtimedelta -= adj;
  552. *adjust = (adj * 1000) << 32;
  553. *adjust += timecounter->tc_freq_adj;
  554. }
  555. int
  556. tc_adjfreq(int64_t *old, int64_t *new)
  557. {
  558. if (old != NULL) {
  559. *old = timecounter->tc_freq_adj;
  560. }
  561. if (new != NULL) {
  562. timecounter->tc_freq_adj = *new;
  563. }
  564. return 0;
  565. }