123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648 |
- /* $OpenBSD: kern_tc.c,v 1.28 2014/12/10 02:44:47 tedu Exp $ */
- /*
- * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- /*
- * If we meet some day, and you think this stuff is worth it, you
- * can buy me a beer in return. Poul-Henning Kamp
- */
- #include <sys/param.h>
- #include <sys/kernel.h>
- #include <sys/timeout.h>
- #include <sys/sysctl.h>
- #include <sys/syslog.h>
- #include <sys/systm.h>
- #include <sys/timetc.h>
- #include <sys/malloc.h>
- #include <dev/rndvar.h>
- /*
- * A large step happens on boot. This constant detects such steps.
- * It is relatively small so that ntp_update_second gets called enough
- * in the typical 'missed a couple of seconds' case, but doesn't loop
- * forever when the time step is large.
- */
- #define LARGE_STEP 200
- u_int dummy_get_timecount(struct timecounter *);
- void ntp_update_second(int64_t *, time_t *);
- int sysctl_tc_hardware(void *, size_t *, void *, size_t);
- int sysctl_tc_choice(void *, size_t *, void *, size_t);
- /*
- * Implement a dummy timecounter which we can use until we get a real one
- * in the air. This allows the console and other early stuff to use
- * time services.
- */
- u_int
- dummy_get_timecount(struct timecounter *tc)
- {
- static u_int now;
- return (++now);
- }
- static struct timecounter dummy_timecounter = {
- dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
- };
- struct timehands {
- /* These fields must be initialized by the driver. */
- struct timecounter *th_counter;
- int64_t th_adjustment;
- u_int64_t th_scale;
- u_int th_offset_count;
- struct bintime th_offset;
- struct timeval th_microtime;
- struct timespec th_nanotime;
- /* Fields not to be copied in tc_windup start with th_generation. */
- volatile u_int th_generation;
- struct timehands *th_next;
- };
- static struct timehands th0;
- static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
- static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
- static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
- static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
- static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
- static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
- static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
- static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
- static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
- static struct timehands th0 = {
- &dummy_timecounter,
- 0,
- (uint64_t)-1 / 1000000,
- 0,
- {1, 0},
- {0, 0},
- {0, 0},
- 1,
- &th1
- };
- static struct timehands *volatile timehands = &th0;
- struct timecounter *timecounter = &dummy_timecounter;
- static struct timecounter *timecounters = &dummy_timecounter;
- volatile time_t time_second = 1;
- volatile time_t time_uptime = 0;
- struct bintime naptime;
- static struct bintime boottimebin;
- static int timestepwarnings;
- void tc_windup(void);
- /*
- * Return the difference between the timehands' counter value now and what
- * was when we copied it to the timehands' offset_count.
- */
- static __inline u_int
- tc_delta(struct timehands *th)
- {
- struct timecounter *tc;
- tc = th->th_counter;
- return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
- tc->tc_counter_mask);
- }
- /*
- * Functions for reading the time. We have to loop until we are sure that
- * the timehands that we operated on was not updated under our feet. See
- * the comment in <sys/time.h> for a description of these 12 functions.
- */
- void
- binuptime(struct bintime *bt)
- {
- struct timehands *th;
- u_int gen;
- do {
- th = timehands;
- gen = th->th_generation;
- *bt = th->th_offset;
- bintime_addx(bt, th->th_scale * tc_delta(th));
- } while (gen == 0 || gen != th->th_generation);
- }
- void
- nanouptime(struct timespec *tsp)
- {
- struct bintime bt;
- binuptime(&bt);
- bintime2timespec(&bt, tsp);
- }
- void
- microuptime(struct timeval *tvp)
- {
- struct bintime bt;
- binuptime(&bt);
- bintime2timeval(&bt, tvp);
- }
- void
- bintime(struct bintime *bt)
- {
- binuptime(bt);
- bintime_add(bt, &boottimebin);
- }
- void
- nanotime(struct timespec *tsp)
- {
- struct bintime bt;
- bintime(&bt);
- bintime2timespec(&bt, tsp);
- }
- void
- microtime(struct timeval *tvp)
- {
- struct bintime bt;
- bintime(&bt);
- bintime2timeval(&bt, tvp);
- }
- void
- getnanouptime(struct timespec *tsp)
- {
- struct timehands *th;
- u_int gen;
- do {
- th = timehands;
- gen = th->th_generation;
- bintime2timespec(&th->th_offset, tsp);
- } while (gen == 0 || gen != th->th_generation);
- }
- void
- getmicrouptime(struct timeval *tvp)
- {
- struct timehands *th;
- u_int gen;
- do {
- th = timehands;
- gen = th->th_generation;
- bintime2timeval(&th->th_offset, tvp);
- } while (gen == 0 || gen != th->th_generation);
- }
- void
- getnanotime(struct timespec *tsp)
- {
- struct timehands *th;
- u_int gen;
- do {
- th = timehands;
- gen = th->th_generation;
- *tsp = th->th_nanotime;
- } while (gen == 0 || gen != th->th_generation);
- }
- void
- getmicrotime(struct timeval *tvp)
- {
- struct timehands *th;
- u_int gen;
- do {
- th = timehands;
- gen = th->th_generation;
- *tvp = th->th_microtime;
- } while (gen == 0 || gen != th->th_generation);
- }
- /*
- * Initialize a new timecounter and possibly use it.
- */
- void
- tc_init(struct timecounter *tc)
- {
- u_int u;
- u = tc->tc_frequency / tc->tc_counter_mask;
- /* XXX: We need some margin here, 10% is a guess */
- u *= 11;
- u /= 10;
- if (tc->tc_quality >= 0) {
- if (u > hz) {
- tc->tc_quality = -2000;
- printf("Timecounter \"%s\" frequency %lu Hz",
- tc->tc_name, (unsigned long)tc->tc_frequency);
- printf(" -- Insufficient hz, needs at least %u\n", u);
- }
- }
- tc->tc_next = timecounters;
- timecounters = tc;
- /*
- * Never automatically use a timecounter with negative quality.
- * Even though we run on the dummy counter, switching here may be
- * worse since this timecounter may not be monotonic.
- */
- if (tc->tc_quality < 0)
- return;
- if (tc->tc_quality < timecounter->tc_quality)
- return;
- if (tc->tc_quality == timecounter->tc_quality &&
- tc->tc_frequency < timecounter->tc_frequency)
- return;
- (void)tc->tc_get_timecount(tc);
- add_timer_randomness(tc->tc_get_timecount(tc));
- timecounter = tc;
- }
- /* Report the frequency of the current timecounter. */
- u_int64_t
- tc_getfrequency(void)
- {
- return (timehands->th_counter->tc_frequency);
- }
- /*
- * Step our concept of UTC, aka the realtime clock.
- * This is done by modifying our estimate of when we booted.
- * XXX: not locked.
- */
- void
- tc_setrealtimeclock(struct timespec *ts)
- {
- struct timespec ts2;
- struct bintime bt, bt2;
- binuptime(&bt2);
- timespec2bintime(ts, &bt);
- bintime_sub(&bt, &bt2);
- bintime_add(&bt2, &boottimebin);
- boottimebin = bt;
- bintime2timespec(&bt, &boottime);
- add_timer_randomness(ts->tv_sec);
- /* XXX fiddle all the little crinkly bits around the fiords... */
- tc_windup();
- if (timestepwarnings) {
- bintime2timespec(&bt2, &ts2);
- log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n",
- (long long)ts2.tv_sec, ts2.tv_nsec,
- (long long)ts->tv_sec, ts->tv_nsec);
- }
- }
- /*
- * Step the monotonic and realtime clocks, triggering any timeouts that
- * should have occurred across the interval.
- * XXX: not locked.
- */
- void
- tc_setclock(struct timespec *ts)
- {
- struct bintime bt, bt2;
- #ifndef SMALL_KERNEL
- long long adj_ticks;
- #endif
- /*
- * When we're called for the first time, during boot when
- * the root partition is mounted, boottime is still zero:
- * we just need to set it.
- */
- if (boottimebin.sec == 0) {
- tc_setrealtimeclock(ts);
- return;
- }
- add_timer_randomness(ts->tv_sec);
- timespec2bintime(ts, &bt);
- bintime_sub(&bt, &boottimebin);
- bt2 = timehands->th_offset;
- timehands->th_offset = bt;
- /* XXX fiddle all the little crinkly bits around the fiords... */
- tc_windup();
- #ifndef SMALL_KERNEL
- /* convert the bintime to ticks */
- bintime_sub(&bt, &bt2);
- bintime_add(&naptime, &bt);
- adj_ticks = (long long)hz * bt.sec +
- (((uint64_t)1000000 * (uint32_t)(bt.frac >> 32)) >> 32) / tick;
- if (adj_ticks > 0) {
- if (adj_ticks > INT_MAX)
- adj_ticks = INT_MAX;
- timeout_adjust_ticks(adj_ticks);
- }
- #endif
- }
- /*
- * Initialize the next struct timehands in the ring and make
- * it the active timehands. Along the way we might switch to a different
- * timecounter and/or do seconds processing in NTP. Slightly magic.
- */
- void
- tc_windup(void)
- {
- struct bintime bt;
- struct timehands *th, *tho;
- u_int64_t scale;
- u_int delta, ncount, ogen;
- int i;
- #ifdef leapsecs
- time_t t;
- #endif
- /*
- * Make the next timehands a copy of the current one, but do not
- * overwrite the generation or next pointer. While we update
- * the contents, the generation must be zero.
- */
- tho = timehands;
- th = tho->th_next;
- ogen = th->th_generation;
- th->th_generation = 0;
- memcpy(th, tho, offsetof(struct timehands, th_generation));
- /*
- * Capture a timecounter delta on the current timecounter and if
- * changing timecounters, a counter value from the new timecounter.
- * Update the offset fields accordingly.
- */
- delta = tc_delta(th);
- if (th->th_counter != timecounter)
- ncount = timecounter->tc_get_timecount(timecounter);
- else
- ncount = 0;
- th->th_offset_count += delta;
- th->th_offset_count &= th->th_counter->tc_counter_mask;
- bintime_addx(&th->th_offset, th->th_scale * delta);
- #ifdef notyet
- /*
- * Hardware latching timecounters may not generate interrupts on
- * PPS events, so instead we poll them. There is a finite risk that
- * the hardware might capture a count which is later than the one we
- * got above, and therefore possibly in the next NTP second which might
- * have a different rate than the current NTP second. It doesn't
- * matter in practice.
- */
- if (tho->th_counter->tc_poll_pps)
- tho->th_counter->tc_poll_pps(tho->th_counter);
- #endif
- /*
- * Deal with NTP second processing. The for loop normally
- * iterates at most once, but in extreme situations it might
- * keep NTP sane if timeouts are not run for several seconds.
- * At boot, the time step can be large when the TOD hardware
- * has been read, so on really large steps, we call
- * ntp_update_second only twice. We need to call it twice in
- * case we missed a leap second.
- */
- bt = th->th_offset;
- bintime_add(&bt, &boottimebin);
- i = bt.sec - tho->th_microtime.tv_sec;
- if (i > LARGE_STEP)
- i = 2;
- for (; i > 0; i--)
- ntp_update_second(&th->th_adjustment, &bt.sec);
- /* Update the UTC timestamps used by the get*() functions. */
- /* XXX shouldn't do this here. Should force non-`get' versions. */
- bintime2timeval(&bt, &th->th_microtime);
- bintime2timespec(&bt, &th->th_nanotime);
- /* Now is a good time to change timecounters. */
- if (th->th_counter != timecounter) {
- th->th_counter = timecounter;
- th->th_offset_count = ncount;
- }
- /*-
- * Recalculate the scaling factor. We want the number of 1/2^64
- * fractions of a second per period of the hardware counter, taking
- * into account the th_adjustment factor which the NTP PLL/adjtime(2)
- * processing provides us with.
- *
- * The th_adjustment is nanoseconds per second with 32 bit binary
- * fraction and we want 64 bit binary fraction of second:
- *
- * x = a * 2^32 / 10^9 = a * 4.294967296
- *
- * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
- * we can only multiply by about 850 without overflowing, but that
- * leaves suitably precise fractions for multiply before divide.
- *
- * Divide before multiply with a fraction of 2199/512 results in a
- * systematic undercompensation of 10PPM of th_adjustment. On a
- * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
- *
- * We happily sacrifice the lowest of the 64 bits of our result
- * to the goddess of code clarity.
- *
- */
- scale = (u_int64_t)1 << 63;
- scale += (th->th_adjustment / 1024) * 2199;
- scale /= th->th_counter->tc_frequency;
- th->th_scale = scale * 2;
- /*
- * Now that the struct timehands is again consistent, set the new
- * generation number, making sure to not make it zero.
- */
- if (++ogen == 0)
- ogen = 1;
- th->th_generation = ogen;
- /* Go live with the new struct timehands. */
- time_second = th->th_microtime.tv_sec;
- time_uptime = th->th_offset.sec;
- timehands = th;
- }
- /* Report or change the active timecounter hardware. */
- int
- sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
- {
- char newname[32];
- struct timecounter *newtc, *tc;
- int error;
- tc = timecounter;
- strlcpy(newname, tc->tc_name, sizeof(newname));
- error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname));
- if (error != 0 || strcmp(newname, tc->tc_name) == 0)
- return (error);
- for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
- if (strcmp(newname, newtc->tc_name) != 0)
- continue;
- /* Warm up new timecounter. */
- (void)newtc->tc_get_timecount(newtc);
- (void)newtc->tc_get_timecount(newtc);
- timecounter = newtc;
- return (0);
- }
- return (EINVAL);
- }
- /* Report or change the active timecounter hardware. */
- int
- sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
- {
- char buf[32], *spc, *choices;
- struct timecounter *tc;
- int error, maxlen;
- spc = "";
- maxlen = 0;
- for (tc = timecounters; tc != NULL; tc = tc->tc_next)
- maxlen += sizeof(buf);
- choices = malloc(maxlen, M_TEMP, M_WAITOK);
- *choices = '\0';
- for (tc = timecounters; tc != NULL; tc = tc->tc_next) {
- snprintf(buf, sizeof(buf), "%s%s(%d)",
- spc, tc->tc_name, tc->tc_quality);
- spc = " ";
- strlcat(choices, buf, maxlen);
- }
- error = sysctl_rdstring(oldp, oldlenp, newp, choices);
- free(choices, M_TEMP, maxlen);
- return (error);
- }
- /*
- * Timecounters need to be updated every so often to prevent the hardware
- * counter from overflowing. Updating also recalculates the cached values
- * used by the get*() family of functions, so their precision depends on
- * the update frequency.
- */
- static int tc_tick;
- void
- tc_ticktock(void)
- {
- static int count;
- if (++count < tc_tick)
- return;
- count = 0;
- tc_windup();
- }
- void
- inittimecounter(void)
- {
- #ifdef DEBUG
- u_int p;
- #endif
- /*
- * Set the initial timeout to
- * max(1, <approx. number of hardclock ticks in a millisecond>).
- * People should probably not use the sysctl to set the timeout
- * to smaller than its initial value, since that value is the
- * smallest reasonable one. If they want better timestamps they
- * should use the non-"get"* functions.
- */
- if (hz > 1000)
- tc_tick = (hz + 500) / 1000;
- else
- tc_tick = 1;
- #ifdef DEBUG
- p = (tc_tick * 1000000) / hz;
- printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
- #endif
- /* warm up new timecounter (again) and get rolling. */
- (void)timecounter->tc_get_timecount(timecounter);
- (void)timecounter->tc_get_timecount(timecounter);
- }
- /*
- * Return timecounter-related information.
- */
- int
- sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp,
- void *newp, size_t newlen)
- {
- if (namelen != 1)
- return (ENOTDIR);
- switch (name[0]) {
- case KERN_TIMECOUNTER_TICK:
- return (sysctl_rdint(oldp, oldlenp, newp, tc_tick));
- case KERN_TIMECOUNTER_TIMESTEPWARNINGS:
- return (sysctl_int(oldp, oldlenp, newp, newlen,
- ×tepwarnings));
- case KERN_TIMECOUNTER_HARDWARE:
- return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen));
- case KERN_TIMECOUNTER_CHOICE:
- return (sysctl_tc_choice(oldp, oldlenp, newp, newlen));
- default:
- return (EOPNOTSUPP);
- }
- /* NOTREACHED */
- }
- void
- ntp_update_second(int64_t *adjust, time_t *sec)
- {
- int64_t adj;
- /* Skew time according to any adjtime(2) adjustments. */
- if (adjtimedelta > 0)
- adj = MIN(5000, adjtimedelta);
- else
- adj = MAX(-5000, adjtimedelta);
- adjtimedelta -= adj;
- *adjust = (adj * 1000) << 32;
- *adjust += timecounter->tc_freq_adj;
- }
- int
- tc_adjfreq(int64_t *old, int64_t *new)
- {
- if (old != NULL) {
- *old = timecounter->tc_freq_adj;
- }
- if (new != NULL) {
- timecounter->tc_freq_adj = *new;
- }
- return 0;
- }
|