timekeeping.c 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358
  1. /*
  2. * linux/kernel/time/timekeeping.c
  3. *
  4. * Kernel timekeeping code and accessor functions
  5. *
  6. * This code was moved from linux/kernel/timer.c.
  7. * Please see that file for copyright and history logs.
  8. *
  9. */
  10. #include <linux/timekeeper_internal.h>
  11. #include <linux/module.h>
  12. #include <linux/interrupt.h>
  13. #include <linux/percpu.h>
  14. #include <linux/init.h>
  15. #include <linux/mm.h>
  16. #include <linux/sched.h>
  17. #include <linux/syscore_ops.h>
  18. #include <linux/clocksource.h>
  19. #include <linux/jiffies.h>
  20. #include <linux/time.h>
  21. #include <linux/tick.h>
  22. #include <linux/stop_machine.h>
  23. #include <linux/pvclock_gtod.h>
  24. #include <linux/compiler.h>
  25. #include "tick-internal.h"
  26. #include "ntp_internal.h"
  27. #include "timekeeping_internal.h"
  28. #define TK_CLEAR_NTP (1 << 0)
  29. #define TK_MIRROR (1 << 1)
  30. #define TK_CLOCK_WAS_SET (1 << 2)
  31. /*
  32. * The most important data for readout fits into a single 64 byte
  33. * cache line.
  34. */
  35. static struct {
  36. seqcount_t seq;
  37. struct timekeeper timekeeper;
  38. } tk_core ____cacheline_aligned = {
  39. .seq = SEQCNT_ZERO(tk_core.seq),
  40. };
  41. static DEFINE_RAW_SPINLOCK(timekeeper_lock);
  42. static struct timekeeper shadow_timekeeper;
  43. /**
  44. * struct tk_fast - NMI safe timekeeper
  45. * @seq: Sequence counter for protecting updates. The lowest bit
  46. * is the index for the tk_read_base array
  47. * @base: tk_read_base array. Access is indexed by the lowest bit of
  48. * @seq.
  49. *
  50. * See @update_fast_timekeeper() below.
  51. */
  52. struct tk_fast {
  53. seqcount_t seq;
  54. struct tk_read_base base[2];
  55. };
  56. static struct tk_fast tk_fast_mono ____cacheline_aligned;
  57. static struct tk_fast tk_fast_raw ____cacheline_aligned;
  58. /* flag for if timekeeping is suspended */
  59. int __read_mostly timekeeping_suspended;
  60. static inline void tk_normalize_xtime(struct timekeeper *tk)
  61. {
  62. while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
  63. tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
  64. tk->xtime_sec++;
  65. }
  66. }
  67. static inline struct timespec64 tk_xtime(struct timekeeper *tk)
  68. {
  69. struct timespec64 ts;
  70. ts.tv_sec = tk->xtime_sec;
  71. ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
  72. return ts;
  73. }
  74. static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
  75. {
  76. tk->xtime_sec = ts->tv_sec;
  77. tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
  78. }
  79. static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
  80. {
  81. tk->xtime_sec += ts->tv_sec;
  82. tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
  83. tk_normalize_xtime(tk);
  84. }
  85. static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
  86. {
  87. struct timespec64 tmp;
  88. /*
  89. * Verify consistency of: offset_real = -wall_to_monotonic
  90. * before modifying anything
  91. */
  92. set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
  93. -tk->wall_to_monotonic.tv_nsec);
  94. WARN_ON_ONCE(tk->offs_real.tv64 != timespec64_to_ktime(tmp).tv64);
  95. tk->wall_to_monotonic = wtm;
  96. set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
  97. tk->offs_real = timespec64_to_ktime(tmp);
  98. tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
  99. }
  100. static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  101. {
  102. tk->offs_boot = ktime_add(tk->offs_boot, delta);
  103. }
  104. /*
  105. * tk_clock_read - atomic clocksource read() helper
  106. *
  107. * This helper is necessary to use in the read paths because, while the
  108. * seqlock ensures we don't return a bad value while structures are updated,
  109. * it doesn't protect from potential crashes. There is the possibility that
  110. * the tkr's clocksource may change between the read reference, and the
  111. * clock reference passed to the read function. This can cause crashes if
  112. * the wrong clocksource is passed to the wrong read function.
  113. * This isn't necessary to use when holding the timekeeper_lock or doing
  114. * a read of the fast-timekeeper tkrs (which is protected by its own locking
  115. * and update logic).
  116. */
  117. static inline u64 tk_clock_read(struct tk_read_base *tkr)
  118. {
  119. struct clocksource *clock = READ_ONCE(tkr->clock);
  120. return clock->read(clock);
  121. }
  122. #ifdef CONFIG_DEBUG_TIMEKEEPING
  123. #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
  124. static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
  125. {
  126. cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
  127. const char *name = tk->tkr_mono.clock->name;
  128. if (offset > max_cycles) {
  129. printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
  130. offset, name, max_cycles);
  131. printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
  132. } else {
  133. if (offset > (max_cycles >> 1)) {
  134. printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
  135. offset, name, max_cycles >> 1);
  136. printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
  137. }
  138. }
  139. if (tk->underflow_seen) {
  140. if (jiffies - tk->last_warning > WARNING_FREQ) {
  141. printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
  142. printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
  143. printk_deferred(" Your kernel is probably still fine.\n");
  144. tk->last_warning = jiffies;
  145. }
  146. tk->underflow_seen = 0;
  147. }
  148. if (tk->overflow_seen) {
  149. if (jiffies - tk->last_warning > WARNING_FREQ) {
  150. printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
  151. printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
  152. printk_deferred(" Your kernel is probably still fine.\n");
  153. tk->last_warning = jiffies;
  154. }
  155. tk->overflow_seen = 0;
  156. }
  157. }
  158. static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
  159. {
  160. struct timekeeper *tk = &tk_core.timekeeper;
  161. cycle_t now, last, mask, max, delta;
  162. unsigned int seq;
  163. /*
  164. * Since we're called holding a seqlock, the data may shift
  165. * under us while we're doing the calculation. This can cause
  166. * false positives, since we'd note a problem but throw the
  167. * results away. So nest another seqlock here to atomically
  168. * grab the points we are checking with.
  169. */
  170. do {
  171. seq = read_seqcount_begin(&tk_core.seq);
  172. now = tk_clock_read(tkr);
  173. last = tkr->cycle_last;
  174. mask = tkr->mask;
  175. max = tkr->clock->max_cycles;
  176. } while (read_seqcount_retry(&tk_core.seq, seq));
  177. delta = clocksource_delta(now, last, mask);
  178. /*
  179. * Try to catch underflows by checking if we are seeing small
  180. * mask-relative negative values.
  181. */
  182. if (unlikely((~delta & mask) < (mask >> 3))) {
  183. tk->underflow_seen = 1;
  184. delta = 0;
  185. }
  186. /* Cap delta value to the max_cycles values to avoid mult overflows */
  187. if (unlikely(delta > max)) {
  188. tk->overflow_seen = 1;
  189. delta = tkr->clock->max_cycles;
  190. }
  191. return delta;
  192. }
  193. #else
  194. static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
  195. {
  196. }
  197. static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
  198. {
  199. cycle_t cycle_now, delta;
  200. /* read clocksource */
  201. cycle_now = tk_clock_read(tkr);
  202. /* calculate the delta since the last update_wall_time */
  203. delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
  204. return delta;
  205. }
  206. #endif
  207. /**
  208. * tk_setup_internals - Set up internals to use clocksource clock.
  209. *
  210. * @tk: The target timekeeper to setup.
  211. * @clock: Pointer to clocksource.
  212. *
  213. * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
  214. * pair and interval request.
  215. *
  216. * Unless you're the timekeeping code, you should not be using this!
  217. */
  218. static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
  219. {
  220. cycle_t interval;
  221. u64 tmp, ntpinterval;
  222. struct clocksource *old_clock;
  223. ++tk->cs_was_changed_seq;
  224. old_clock = tk->tkr_mono.clock;
  225. tk->tkr_mono.clock = clock;
  226. tk->tkr_mono.mask = clock->mask;
  227. tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
  228. tk->tkr_raw.clock = clock;
  229. tk->tkr_raw.mask = clock->mask;
  230. tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
  231. /* Do the ns -> cycle conversion first, using original mult */
  232. tmp = NTP_INTERVAL_LENGTH;
  233. tmp <<= clock->shift;
  234. ntpinterval = tmp;
  235. tmp += clock->mult/2;
  236. do_div(tmp, clock->mult);
  237. if (tmp == 0)
  238. tmp = 1;
  239. interval = (cycle_t) tmp;
  240. tk->cycle_interval = interval;
  241. /* Go back from cycles -> shifted ns */
  242. tk->xtime_interval = (u64) interval * clock->mult;
  243. tk->xtime_remainder = ntpinterval - tk->xtime_interval;
  244. tk->raw_interval = interval * clock->mult;
  245. /* if changing clocks, convert xtime_nsec shift units */
  246. if (old_clock) {
  247. int shift_change = clock->shift - old_clock->shift;
  248. if (shift_change < 0)
  249. tk->tkr_mono.xtime_nsec >>= -shift_change;
  250. else
  251. tk->tkr_mono.xtime_nsec <<= shift_change;
  252. }
  253. tk->tkr_raw.xtime_nsec = 0;
  254. tk->tkr_mono.shift = clock->shift;
  255. tk->tkr_raw.shift = clock->shift;
  256. tk->ntp_error = 0;
  257. tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
  258. tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
  259. /*
  260. * The timekeeper keeps its own mult values for the currently
  261. * active clocksource. These value will be adjusted via NTP
  262. * to counteract clock drifting.
  263. */
  264. tk->tkr_mono.mult = clock->mult;
  265. tk->tkr_raw.mult = clock->mult;
  266. tk->ntp_err_mult = 0;
  267. }
  268. /* Timekeeper helper functions. */
  269. #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
  270. static u32 default_arch_gettimeoffset(void) { return 0; }
  271. u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
  272. #else
  273. static inline u32 arch_gettimeoffset(void) { return 0; }
  274. #endif
  275. static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
  276. cycle_t delta)
  277. {
  278. u64 nsec;
  279. nsec = delta * tkr->mult + tkr->xtime_nsec;
  280. nsec >>= tkr->shift;
  281. /* If arch requires, add in get_arch_timeoffset() */
  282. return nsec + arch_gettimeoffset();
  283. }
  284. static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
  285. {
  286. cycle_t delta;
  287. delta = timekeeping_get_delta(tkr);
  288. return timekeeping_delta_to_ns(tkr, delta);
  289. }
  290. static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
  291. cycle_t cycles)
  292. {
  293. cycle_t delta;
  294. /* calculate the delta since the last update_wall_time */
  295. delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
  296. return timekeeping_delta_to_ns(tkr, delta);
  297. }
  298. /**
  299. * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
  300. * @tkr: Timekeeping readout base from which we take the update
  301. *
  302. * We want to use this from any context including NMI and tracing /
  303. * instrumenting the timekeeping code itself.
  304. *
  305. * Employ the latch technique; see @raw_write_seqcount_latch.
  306. *
  307. * So if a NMI hits the update of base[0] then it will use base[1]
  308. * which is still consistent. In the worst case this can result is a
  309. * slightly wrong timestamp (a few nanoseconds). See
  310. * @ktime_get_mono_fast_ns.
  311. */
  312. static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
  313. {
  314. struct tk_read_base *base = tkf->base;
  315. /* Force readers off to base[1] */
  316. raw_write_seqcount_latch(&tkf->seq);
  317. /* Update base[0] */
  318. memcpy(base, tkr, sizeof(*base));
  319. /* Force readers back to base[0] */
  320. raw_write_seqcount_latch(&tkf->seq);
  321. /* Update base[1] */
  322. memcpy(base + 1, base, sizeof(*base));
  323. }
  324. /**
  325. * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
  326. *
  327. * This timestamp is not guaranteed to be monotonic across an update.
  328. * The timestamp is calculated by:
  329. *
  330. * now = base_mono + clock_delta * slope
  331. *
  332. * So if the update lowers the slope, readers who are forced to the
  333. * not yet updated second array are still using the old steeper slope.
  334. *
  335. * tmono
  336. * ^
  337. * | o n
  338. * | o n
  339. * | u
  340. * | o
  341. * |o
  342. * |12345678---> reader order
  343. *
  344. * o = old slope
  345. * u = update
  346. * n = new slope
  347. *
  348. * So reader 6 will observe time going backwards versus reader 5.
  349. *
  350. * While other CPUs are likely to be able observe that, the only way
  351. * for a CPU local observation is when an NMI hits in the middle of
  352. * the update. Timestamps taken from that NMI context might be ahead
  353. * of the following timestamps. Callers need to be aware of that and
  354. * deal with it.
  355. */
  356. static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
  357. {
  358. struct tk_read_base *tkr;
  359. unsigned int seq;
  360. u64 now;
  361. do {
  362. seq = raw_read_seqcount_latch(&tkf->seq);
  363. tkr = tkf->base + (seq & 0x01);
  364. now = ktime_to_ns(tkr->base);
  365. now += timekeeping_delta_to_ns(tkr,
  366. clocksource_delta(
  367. tk_clock_read(tkr),
  368. tkr->cycle_last,
  369. tkr->mask));
  370. } while (read_seqcount_retry(&tkf->seq, seq));
  371. return now;
  372. }
  373. u64 ktime_get_mono_fast_ns(void)
  374. {
  375. return __ktime_get_fast_ns(&tk_fast_mono);
  376. }
  377. EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
  378. u64 ktime_get_raw_fast_ns(void)
  379. {
  380. return __ktime_get_fast_ns(&tk_fast_raw);
  381. }
  382. EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
  383. /* Suspend-time cycles value for halted fast timekeeper. */
  384. static cycle_t cycles_at_suspend;
  385. static cycle_t dummy_clock_read(struct clocksource *cs)
  386. {
  387. return cycles_at_suspend;
  388. }
  389. static struct clocksource dummy_clock = {
  390. .read = dummy_clock_read,
  391. };
  392. /**
  393. * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  394. * @tk: Timekeeper to snapshot.
  395. *
  396. * It generally is unsafe to access the clocksource after timekeeping has been
  397. * suspended, so take a snapshot of the readout base of @tk and use it as the
  398. * fast timekeeper's readout base while suspended. It will return the same
  399. * number of cycles every time until timekeeping is resumed at which time the
  400. * proper readout base for the fast timekeeper will be restored automatically.
  401. */
  402. static void halt_fast_timekeeper(struct timekeeper *tk)
  403. {
  404. static struct tk_read_base tkr_dummy;
  405. struct tk_read_base *tkr = &tk->tkr_mono;
  406. memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
  407. cycles_at_suspend = tk_clock_read(tkr);
  408. tkr_dummy.clock = &dummy_clock;
  409. update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
  410. tkr = &tk->tkr_raw;
  411. memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
  412. tkr_dummy.clock = &dummy_clock;
  413. update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
  414. }
  415. #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
  416. static inline void update_vsyscall(struct timekeeper *tk)
  417. {
  418. struct timespec xt, wm;
  419. xt = timespec64_to_timespec(tk_xtime(tk));
  420. wm = timespec64_to_timespec(tk->wall_to_monotonic);
  421. update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
  422. tk->tkr_mono.cycle_last);
  423. }
  424. static inline void old_vsyscall_fixup(struct timekeeper *tk)
  425. {
  426. s64 remainder;
  427. /*
  428. * Store only full nanoseconds into xtime_nsec after rounding
  429. * it up and add the remainder to the error difference.
  430. * XXX - This is necessary to avoid small 1ns inconsistnecies caused
  431. * by truncating the remainder in vsyscalls. However, it causes
  432. * additional work to be done in timekeeping_adjust(). Once
  433. * the vsyscall implementations are converted to use xtime_nsec
  434. * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
  435. * users are removed, this can be killed.
  436. */
  437. remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
  438. if (remainder != 0) {
  439. tk->tkr_mono.xtime_nsec -= remainder;
  440. tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
  441. tk->ntp_error += remainder << tk->ntp_error_shift;
  442. tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
  443. }
  444. }
  445. #else
  446. #define old_vsyscall_fixup(tk)
  447. #endif
  448. static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
  449. static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
  450. {
  451. raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
  452. }
  453. /**
  454. * pvclock_gtod_register_notifier - register a pvclock timedata update listener
  455. */
  456. int pvclock_gtod_register_notifier(struct notifier_block *nb)
  457. {
  458. struct timekeeper *tk = &tk_core.timekeeper;
  459. unsigned long flags;
  460. int ret;
  461. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  462. ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
  463. update_pvclock_gtod(tk, true);
  464. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  465. return ret;
  466. }
  467. EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
  468. /**
  469. * pvclock_gtod_unregister_notifier - unregister a pvclock
  470. * timedata update listener
  471. */
  472. int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
  473. {
  474. unsigned long flags;
  475. int ret;
  476. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  477. ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
  478. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  479. return ret;
  480. }
  481. EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
  482. /*
  483. * tk_update_leap_state - helper to update the next_leap_ktime
  484. */
  485. static inline void tk_update_leap_state(struct timekeeper *tk)
  486. {
  487. tk->next_leap_ktime = ntp_get_next_leap();
  488. if (tk->next_leap_ktime.tv64 != KTIME_MAX)
  489. /* Convert to monotonic time */
  490. tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
  491. }
  492. /*
  493. * Update the ktime_t based scalar nsec members of the timekeeper
  494. */
  495. static inline void tk_update_ktime_data(struct timekeeper *tk)
  496. {
  497. u64 seconds;
  498. u32 nsec;
  499. /*
  500. * The xtime based monotonic readout is:
  501. * nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
  502. * The ktime based monotonic readout is:
  503. * nsec = base_mono + now();
  504. * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
  505. */
  506. seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
  507. nsec = (u32) tk->wall_to_monotonic.tv_nsec;
  508. tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
  509. /* Update the monotonic raw base */
  510. tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
  511. /*
  512. * The sum of the nanoseconds portions of xtime and
  513. * wall_to_monotonic can be greater/equal one second. Take
  514. * this into account before updating tk->ktime_sec.
  515. */
  516. nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
  517. if (nsec >= NSEC_PER_SEC)
  518. seconds++;
  519. tk->ktime_sec = seconds;
  520. }
  521. /* must hold timekeeper_lock */
  522. static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  523. {
  524. if (action & TK_CLEAR_NTP) {
  525. tk->ntp_error = 0;
  526. ntp_clear();
  527. }
  528. tk_update_leap_state(tk);
  529. tk_update_ktime_data(tk);
  530. update_vsyscall(tk);
  531. update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
  532. update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
  533. update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
  534. if (action & TK_CLOCK_WAS_SET)
  535. tk->clock_was_set_seq++;
  536. /*
  537. * The mirroring of the data to the shadow-timekeeper needs
  538. * to happen last here to ensure we don't over-write the
  539. * timekeeper structure on the next update with stale data
  540. */
  541. if (action & TK_MIRROR)
  542. memcpy(&shadow_timekeeper, &tk_core.timekeeper,
  543. sizeof(tk_core.timekeeper));
  544. }
  545. /**
  546. * timekeeping_forward_now - update clock to the current time
  547. *
  548. * Forward the current clock to update its state since the last call to
  549. * update_wall_time(). This is useful before significant clock changes,
  550. * as it avoids having to deal with this time offset explicitly.
  551. */
  552. static void timekeeping_forward_now(struct timekeeper *tk)
  553. {
  554. cycle_t cycle_now, delta;
  555. s64 nsec;
  556. cycle_now = tk_clock_read(&tk->tkr_mono);
  557. delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
  558. tk->tkr_mono.cycle_last = cycle_now;
  559. tk->tkr_raw.cycle_last = cycle_now;
  560. tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
  561. /* If arch requires, add in get_arch_timeoffset() */
  562. tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
  563. tk_normalize_xtime(tk);
  564. nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
  565. timespec64_add_ns(&tk->raw_time, nsec);
  566. }
  567. /**
  568. * __getnstimeofday64 - Returns the time of day in a timespec64.
  569. * @ts: pointer to the timespec to be set
  570. *
  571. * Updates the time of day in the timespec.
  572. * Returns 0 on success, or -ve when suspended (timespec will be undefined).
  573. */
  574. int __getnstimeofday64(struct timespec64 *ts)
  575. {
  576. struct timekeeper *tk = &tk_core.timekeeper;
  577. unsigned long seq;
  578. s64 nsecs = 0;
  579. do {
  580. seq = read_seqcount_begin(&tk_core.seq);
  581. ts->tv_sec = tk->xtime_sec;
  582. nsecs = timekeeping_get_ns(&tk->tkr_mono);
  583. } while (read_seqcount_retry(&tk_core.seq, seq));
  584. ts->tv_nsec = 0;
  585. timespec64_add_ns(ts, nsecs);
  586. /*
  587. * Do not bail out early, in case there were callers still using
  588. * the value, even in the face of the WARN_ON.
  589. */
  590. if (unlikely(timekeeping_suspended))
  591. return -EAGAIN;
  592. return 0;
  593. }
  594. EXPORT_SYMBOL(__getnstimeofday64);
  595. /**
  596. * getnstimeofday64 - Returns the time of day in a timespec64.
  597. * @ts: pointer to the timespec64 to be set
  598. *
  599. * Returns the time of day in a timespec64 (WARN if suspended).
  600. */
  601. void getnstimeofday64(struct timespec64 *ts)
  602. {
  603. WARN_ON(__getnstimeofday64(ts));
  604. }
  605. EXPORT_SYMBOL(getnstimeofday64);
  606. ktime_t ktime_get(void)
  607. {
  608. struct timekeeper *tk = &tk_core.timekeeper;
  609. unsigned int seq;
  610. ktime_t base;
  611. s64 nsecs;
  612. WARN_ON(timekeeping_suspended);
  613. do {
  614. seq = read_seqcount_begin(&tk_core.seq);
  615. base = tk->tkr_mono.base;
  616. nsecs = timekeeping_get_ns(&tk->tkr_mono);
  617. } while (read_seqcount_retry(&tk_core.seq, seq));
  618. return ktime_add_ns(base, nsecs);
  619. }
  620. EXPORT_SYMBOL_GPL(ktime_get);
  621. u32 ktime_get_resolution_ns(void)
  622. {
  623. struct timekeeper *tk = &tk_core.timekeeper;
  624. unsigned int seq;
  625. u32 nsecs;
  626. WARN_ON(timekeeping_suspended);
  627. do {
  628. seq = read_seqcount_begin(&tk_core.seq);
  629. nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
  630. } while (read_seqcount_retry(&tk_core.seq, seq));
  631. return nsecs;
  632. }
  633. EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
  634. static ktime_t *offsets[TK_OFFS_MAX] = {
  635. [TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
  636. [TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
  637. [TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai,
  638. };
  639. ktime_t ktime_get_with_offset(enum tk_offsets offs)
  640. {
  641. struct timekeeper *tk = &tk_core.timekeeper;
  642. unsigned int seq;
  643. ktime_t base, *offset = offsets[offs];
  644. s64 nsecs;
  645. WARN_ON(timekeeping_suspended);
  646. do {
  647. seq = read_seqcount_begin(&tk_core.seq);
  648. base = ktime_add(tk->tkr_mono.base, *offset);
  649. nsecs = timekeeping_get_ns(&tk->tkr_mono);
  650. } while (read_seqcount_retry(&tk_core.seq, seq));
  651. return ktime_add_ns(base, nsecs);
  652. }
  653. EXPORT_SYMBOL_GPL(ktime_get_with_offset);
  654. /**
  655. * ktime_mono_to_any() - convert mononotic time to any other time
  656. * @tmono: time to convert.
  657. * @offs: which offset to use
  658. */
  659. ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
  660. {
  661. ktime_t *offset = offsets[offs];
  662. unsigned long seq;
  663. ktime_t tconv;
  664. do {
  665. seq = read_seqcount_begin(&tk_core.seq);
  666. tconv = ktime_add(tmono, *offset);
  667. } while (read_seqcount_retry(&tk_core.seq, seq));
  668. return tconv;
  669. }
  670. EXPORT_SYMBOL_GPL(ktime_mono_to_any);
  671. /**
  672. * ktime_get_raw - Returns the raw monotonic time in ktime_t format
  673. */
  674. ktime_t ktime_get_raw(void)
  675. {
  676. struct timekeeper *tk = &tk_core.timekeeper;
  677. unsigned int seq;
  678. ktime_t base;
  679. s64 nsecs;
  680. do {
  681. seq = read_seqcount_begin(&tk_core.seq);
  682. base = tk->tkr_raw.base;
  683. nsecs = timekeeping_get_ns(&tk->tkr_raw);
  684. } while (read_seqcount_retry(&tk_core.seq, seq));
  685. return ktime_add_ns(base, nsecs);
  686. }
  687. EXPORT_SYMBOL_GPL(ktime_get_raw);
  688. /**
  689. * ktime_get_ts64 - get the monotonic clock in timespec64 format
  690. * @ts: pointer to timespec variable
  691. *
  692. * The function calculates the monotonic clock from the realtime
  693. * clock and the wall_to_monotonic offset and stores the result
  694. * in normalized timespec64 format in the variable pointed to by @ts.
  695. */
  696. void ktime_get_ts64(struct timespec64 *ts)
  697. {
  698. struct timekeeper *tk = &tk_core.timekeeper;
  699. struct timespec64 tomono;
  700. s64 nsec;
  701. unsigned int seq;
  702. WARN_ON(timekeeping_suspended);
  703. do {
  704. seq = read_seqcount_begin(&tk_core.seq);
  705. ts->tv_sec = tk->xtime_sec;
  706. nsec = timekeeping_get_ns(&tk->tkr_mono);
  707. tomono = tk->wall_to_monotonic;
  708. } while (read_seqcount_retry(&tk_core.seq, seq));
  709. ts->tv_sec += tomono.tv_sec;
  710. ts->tv_nsec = 0;
  711. timespec64_add_ns(ts, nsec + tomono.tv_nsec);
  712. }
  713. EXPORT_SYMBOL_GPL(ktime_get_ts64);
  714. /**
  715. * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
  716. *
  717. * Returns the seconds portion of CLOCK_MONOTONIC with a single non
  718. * serialized read. tk->ktime_sec is of type 'unsigned long' so this
  719. * works on both 32 and 64 bit systems. On 32 bit systems the readout
  720. * covers ~136 years of uptime which should be enough to prevent
  721. * premature wrap arounds.
  722. */
  723. time64_t ktime_get_seconds(void)
  724. {
  725. struct timekeeper *tk = &tk_core.timekeeper;
  726. WARN_ON(timekeeping_suspended);
  727. return tk->ktime_sec;
  728. }
  729. EXPORT_SYMBOL_GPL(ktime_get_seconds);
  730. /**
  731. * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
  732. *
  733. * Returns the wall clock seconds since 1970. This replaces the
  734. * get_seconds() interface which is not y2038 safe on 32bit systems.
  735. *
  736. * For 64bit systems the fast access to tk->xtime_sec is preserved. On
  737. * 32bit systems the access must be protected with the sequence
  738. * counter to provide "atomic" access to the 64bit tk->xtime_sec
  739. * value.
  740. */
  741. time64_t ktime_get_real_seconds(void)
  742. {
  743. struct timekeeper *tk = &tk_core.timekeeper;
  744. time64_t seconds;
  745. unsigned int seq;
  746. if (IS_ENABLED(CONFIG_64BIT))
  747. return tk->xtime_sec;
  748. do {
  749. seq = read_seqcount_begin(&tk_core.seq);
  750. seconds = tk->xtime_sec;
  751. } while (read_seqcount_retry(&tk_core.seq, seq));
  752. return seconds;
  753. }
  754. EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
  755. /**
  756. * __ktime_get_real_seconds - The same as ktime_get_real_seconds
  757. * but without the sequence counter protect. This internal function
  758. * is called just when timekeeping lock is already held.
  759. */
  760. time64_t __ktime_get_real_seconds(void)
  761. {
  762. struct timekeeper *tk = &tk_core.timekeeper;
  763. return tk->xtime_sec;
  764. }
  765. /**
  766. * ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
  767. * @systime_snapshot: pointer to struct receiving the system time snapshot
  768. */
  769. void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
  770. {
  771. struct timekeeper *tk = &tk_core.timekeeper;
  772. unsigned long seq;
  773. ktime_t base_raw;
  774. ktime_t base_real;
  775. s64 nsec_raw;
  776. s64 nsec_real;
  777. cycle_t now;
  778. WARN_ON_ONCE(timekeeping_suspended);
  779. do {
  780. seq = read_seqcount_begin(&tk_core.seq);
  781. now = tk_clock_read(&tk->tkr_mono);
  782. systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
  783. systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
  784. base_real = ktime_add(tk->tkr_mono.base,
  785. tk_core.timekeeper.offs_real);
  786. base_raw = tk->tkr_raw.base;
  787. nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
  788. nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
  789. } while (read_seqcount_retry(&tk_core.seq, seq));
  790. systime_snapshot->cycles = now;
  791. systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
  792. systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
  793. }
  794. EXPORT_SYMBOL_GPL(ktime_get_snapshot);
  795. /* Scale base by mult/div checking for overflow */
  796. static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
  797. {
  798. u64 tmp, rem;
  799. tmp = div64_u64_rem(*base, div, &rem);
  800. if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
  801. ((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
  802. return -EOVERFLOW;
  803. tmp *= mult;
  804. rem *= mult;
  805. do_div(rem, div);
  806. *base = tmp + rem;
  807. return 0;
  808. }
  809. /**
  810. * adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
  811. * @history: Snapshot representing start of history
  812. * @partial_history_cycles: Cycle offset into history (fractional part)
  813. * @total_history_cycles: Total history length in cycles
  814. * @discontinuity: True indicates clock was set on history period
  815. * @ts: Cross timestamp that should be adjusted using
  816. * partial/total ratio
  817. *
  818. * Helper function used by get_device_system_crosststamp() to correct the
  819. * crosstimestamp corresponding to the start of the current interval to the
  820. * system counter value (timestamp point) provided by the driver. The
  821. * total_history_* quantities are the total history starting at the provided
  822. * reference point and ending at the start of the current interval. The cycle
  823. * count between the driver timestamp point and the start of the current
  824. * interval is partial_history_cycles.
  825. */
  826. static int adjust_historical_crosststamp(struct system_time_snapshot *history,
  827. cycle_t partial_history_cycles,
  828. cycle_t total_history_cycles,
  829. bool discontinuity,
  830. struct system_device_crosststamp *ts)
  831. {
  832. struct timekeeper *tk = &tk_core.timekeeper;
  833. u64 corr_raw, corr_real;
  834. bool interp_forward;
  835. int ret;
  836. if (total_history_cycles == 0 || partial_history_cycles == 0)
  837. return 0;
  838. /* Interpolate shortest distance from beginning or end of history */
  839. interp_forward = partial_history_cycles > total_history_cycles/2 ?
  840. true : false;
  841. partial_history_cycles = interp_forward ?
  842. total_history_cycles - partial_history_cycles :
  843. partial_history_cycles;
  844. /*
  845. * Scale the monotonic raw time delta by:
  846. * partial_history_cycles / total_history_cycles
  847. */
  848. corr_raw = (u64)ktime_to_ns(
  849. ktime_sub(ts->sys_monoraw, history->raw));
  850. ret = scale64_check_overflow(partial_history_cycles,
  851. total_history_cycles, &corr_raw);
  852. if (ret)
  853. return ret;
  854. /*
  855. * If there is a discontinuity in the history, scale monotonic raw
  856. * correction by:
  857. * mult(real)/mult(raw) yielding the realtime correction
  858. * Otherwise, calculate the realtime correction similar to monotonic
  859. * raw calculation
  860. */
  861. if (discontinuity) {
  862. corr_real = mul_u64_u32_div
  863. (corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
  864. } else {
  865. corr_real = (u64)ktime_to_ns(
  866. ktime_sub(ts->sys_realtime, history->real));
  867. ret = scale64_check_overflow(partial_history_cycles,
  868. total_history_cycles, &corr_real);
  869. if (ret)
  870. return ret;
  871. }
  872. /* Fixup monotonic raw and real time time values */
  873. if (interp_forward) {
  874. ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
  875. ts->sys_realtime = ktime_add_ns(history->real, corr_real);
  876. } else {
  877. ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
  878. ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
  879. }
  880. return 0;
  881. }
  882. /*
  883. * cycle_between - true if test occurs chronologically between before and after
  884. */
  885. static bool cycle_between(cycle_t before, cycle_t test, cycle_t after)
  886. {
  887. if (test > before && test < after)
  888. return true;
  889. if (test < before && before > after)
  890. return true;
  891. return false;
  892. }
  893. /**
  894. * get_device_system_crosststamp - Synchronously capture system/device timestamp
  895. * @get_time_fn: Callback to get simultaneous device time and
  896. * system counter from the device driver
  897. * @ctx: Context passed to get_time_fn()
  898. * @history_begin: Historical reference point used to interpolate system
  899. * time when counter provided by the driver is before the current interval
  900. * @xtstamp: Receives simultaneously captured system and device time
  901. *
  902. * Reads a timestamp from a device and correlates it to system time
  903. */
  904. int get_device_system_crosststamp(int (*get_time_fn)
  905. (ktime_t *device_time,
  906. struct system_counterval_t *sys_counterval,
  907. void *ctx),
  908. void *ctx,
  909. struct system_time_snapshot *history_begin,
  910. struct system_device_crosststamp *xtstamp)
  911. {
  912. struct system_counterval_t system_counterval;
  913. struct timekeeper *tk = &tk_core.timekeeper;
  914. cycle_t cycles, now, interval_start;
  915. unsigned int clock_was_set_seq = 0;
  916. ktime_t base_real, base_raw;
  917. s64 nsec_real, nsec_raw;
  918. u8 cs_was_changed_seq;
  919. unsigned long seq;
  920. bool do_interp;
  921. int ret;
  922. do {
  923. seq = read_seqcount_begin(&tk_core.seq);
  924. /*
  925. * Try to synchronously capture device time and a system
  926. * counter value calling back into the device driver
  927. */
  928. ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
  929. if (ret)
  930. return ret;
  931. /*
  932. * Verify that the clocksource associated with the captured
  933. * system counter value is the same as the currently installed
  934. * timekeeper clocksource
  935. */
  936. if (tk->tkr_mono.clock != system_counterval.cs)
  937. return -ENODEV;
  938. cycles = system_counterval.cycles;
  939. /*
  940. * Check whether the system counter value provided by the
  941. * device driver is on the current timekeeping interval.
  942. */
  943. now = tk_clock_read(&tk->tkr_mono);
  944. interval_start = tk->tkr_mono.cycle_last;
  945. if (!cycle_between(interval_start, cycles, now)) {
  946. clock_was_set_seq = tk->clock_was_set_seq;
  947. cs_was_changed_seq = tk->cs_was_changed_seq;
  948. cycles = interval_start;
  949. do_interp = true;
  950. } else {
  951. do_interp = false;
  952. }
  953. base_real = ktime_add(tk->tkr_mono.base,
  954. tk_core.timekeeper.offs_real);
  955. base_raw = tk->tkr_raw.base;
  956. nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
  957. system_counterval.cycles);
  958. nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
  959. system_counterval.cycles);
  960. } while (read_seqcount_retry(&tk_core.seq, seq));
  961. xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
  962. xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
  963. /*
  964. * Interpolate if necessary, adjusting back from the start of the
  965. * current interval
  966. */
  967. if (do_interp) {
  968. cycle_t partial_history_cycles, total_history_cycles;
  969. bool discontinuity;
  970. /*
  971. * Check that the counter value occurs after the provided
  972. * history reference and that the history doesn't cross a
  973. * clocksource change
  974. */
  975. if (!history_begin ||
  976. !cycle_between(history_begin->cycles,
  977. system_counterval.cycles, cycles) ||
  978. history_begin->cs_was_changed_seq != cs_was_changed_seq)
  979. return -EINVAL;
  980. partial_history_cycles = cycles - system_counterval.cycles;
  981. total_history_cycles = cycles - history_begin->cycles;
  982. discontinuity =
  983. history_begin->clock_was_set_seq != clock_was_set_seq;
  984. ret = adjust_historical_crosststamp(history_begin,
  985. partial_history_cycles,
  986. total_history_cycles,
  987. discontinuity, xtstamp);
  988. if (ret)
  989. return ret;
  990. }
  991. return 0;
  992. }
  993. EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
  994. /**
  995. * do_gettimeofday - Returns the time of day in a timeval
  996. * @tv: pointer to the timeval to be set
  997. *
  998. * NOTE: Users should be converted to using getnstimeofday()
  999. */
  1000. void do_gettimeofday(struct timeval *tv)
  1001. {
  1002. struct timespec64 now;
  1003. getnstimeofday64(&now);
  1004. tv->tv_sec = now.tv_sec;
  1005. tv->tv_usec = now.tv_nsec/1000;
  1006. }
  1007. EXPORT_SYMBOL(do_gettimeofday);
  1008. /**
  1009. * do_settimeofday64 - Sets the time of day.
  1010. * @ts: pointer to the timespec64 variable containing the new time
  1011. *
  1012. * Sets the time of day to the new time and update NTP and notify hrtimers
  1013. */
  1014. int do_settimeofday64(const struct timespec64 *ts)
  1015. {
  1016. struct timekeeper *tk = &tk_core.timekeeper;
  1017. struct timespec64 ts_delta, xt;
  1018. unsigned long flags;
  1019. int ret = 0;
  1020. if (!timespec64_valid_strict(ts))
  1021. return -EINVAL;
  1022. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1023. write_seqcount_begin(&tk_core.seq);
  1024. timekeeping_forward_now(tk);
  1025. xt = tk_xtime(tk);
  1026. ts_delta.tv_sec = ts->tv_sec - xt.tv_sec;
  1027. ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec;
  1028. if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
  1029. ret = -EINVAL;
  1030. goto out;
  1031. }
  1032. tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
  1033. tk_set_xtime(tk, ts);
  1034. out:
  1035. timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
  1036. write_seqcount_end(&tk_core.seq);
  1037. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1038. /* signal hrtimers about time change */
  1039. clock_was_set();
  1040. return ret;
  1041. }
  1042. EXPORT_SYMBOL(do_settimeofday64);
  1043. /**
  1044. * timekeeping_inject_offset - Adds or subtracts from the current time.
  1045. * @tv: pointer to the timespec variable containing the offset
  1046. *
  1047. * Adds or subtracts an offset value from the current time.
  1048. */
  1049. int timekeeping_inject_offset(struct timespec *ts)
  1050. {
  1051. struct timekeeper *tk = &tk_core.timekeeper;
  1052. unsigned long flags;
  1053. struct timespec64 ts64, tmp;
  1054. int ret = 0;
  1055. if (!timespec_inject_offset_valid(ts))
  1056. return -EINVAL;
  1057. ts64 = timespec_to_timespec64(*ts);
  1058. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1059. write_seqcount_begin(&tk_core.seq);
  1060. timekeeping_forward_now(tk);
  1061. /* Make sure the proposed value is valid */
  1062. tmp = timespec64_add(tk_xtime(tk), ts64);
  1063. if (timespec64_compare(&tk->wall_to_monotonic, &ts64) > 0 ||
  1064. !timespec64_valid_strict(&tmp)) {
  1065. ret = -EINVAL;
  1066. goto error;
  1067. }
  1068. tk_xtime_add(tk, &ts64);
  1069. tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts64));
  1070. error: /* even if we error out, we forwarded the time, so call update */
  1071. timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
  1072. write_seqcount_end(&tk_core.seq);
  1073. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1074. /* signal hrtimers about time change */
  1075. clock_was_set();
  1076. return ret;
  1077. }
  1078. EXPORT_SYMBOL(timekeeping_inject_offset);
  1079. /**
  1080. * timekeeping_get_tai_offset - Returns current TAI offset from UTC
  1081. *
  1082. */
  1083. s32 timekeeping_get_tai_offset(void)
  1084. {
  1085. struct timekeeper *tk = &tk_core.timekeeper;
  1086. unsigned int seq;
  1087. s32 ret;
  1088. do {
  1089. seq = read_seqcount_begin(&tk_core.seq);
  1090. ret = tk->tai_offset;
  1091. } while (read_seqcount_retry(&tk_core.seq, seq));
  1092. return ret;
  1093. }
  1094. /**
  1095. * __timekeeping_set_tai_offset - Lock free worker function
  1096. *
  1097. */
  1098. static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
  1099. {
  1100. tk->tai_offset = tai_offset;
  1101. tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
  1102. }
  1103. /**
  1104. * timekeeping_set_tai_offset - Sets the current TAI offset from UTC
  1105. *
  1106. */
  1107. void timekeeping_set_tai_offset(s32 tai_offset)
  1108. {
  1109. struct timekeeper *tk = &tk_core.timekeeper;
  1110. unsigned long flags;
  1111. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1112. write_seqcount_begin(&tk_core.seq);
  1113. __timekeeping_set_tai_offset(tk, tai_offset);
  1114. timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
  1115. write_seqcount_end(&tk_core.seq);
  1116. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1117. clock_was_set();
  1118. }
  1119. /**
  1120. * change_clocksource - Swaps clocksources if a new one is available
  1121. *
  1122. * Accumulates current time interval and initializes new clocksource
  1123. */
  1124. static int change_clocksource(void *data)
  1125. {
  1126. struct timekeeper *tk = &tk_core.timekeeper;
  1127. struct clocksource *new, *old;
  1128. unsigned long flags;
  1129. new = (struct clocksource *) data;
  1130. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1131. write_seqcount_begin(&tk_core.seq);
  1132. timekeeping_forward_now(tk);
  1133. /*
  1134. * If the cs is in module, get a module reference. Succeeds
  1135. * for built-in code (owner == NULL) as well.
  1136. */
  1137. if (try_module_get(new->owner)) {
  1138. if (!new->enable || new->enable(new) == 0) {
  1139. old = tk->tkr_mono.clock;
  1140. tk_setup_internals(tk, new);
  1141. if (old->disable)
  1142. old->disable(old);
  1143. module_put(old->owner);
  1144. } else {
  1145. module_put(new->owner);
  1146. }
  1147. }
  1148. timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
  1149. write_seqcount_end(&tk_core.seq);
  1150. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1151. return 0;
  1152. }
  1153. /**
  1154. * timekeeping_notify - Install a new clock source
  1155. * @clock: pointer to the clock source
  1156. *
  1157. * This function is called from clocksource.c after a new, better clock
  1158. * source has been registered. The caller holds the clocksource_mutex.
  1159. */
  1160. int timekeeping_notify(struct clocksource *clock)
  1161. {
  1162. struct timekeeper *tk = &tk_core.timekeeper;
  1163. if (tk->tkr_mono.clock == clock)
  1164. return 0;
  1165. stop_machine(change_clocksource, clock, NULL);
  1166. tick_clock_notify();
  1167. return tk->tkr_mono.clock == clock ? 0 : -1;
  1168. }
  1169. /**
  1170. * getrawmonotonic64 - Returns the raw monotonic time in a timespec
  1171. * @ts: pointer to the timespec64 to be set
  1172. *
  1173. * Returns the raw monotonic time (completely un-modified by ntp)
  1174. */
  1175. void getrawmonotonic64(struct timespec64 *ts)
  1176. {
  1177. struct timekeeper *tk = &tk_core.timekeeper;
  1178. struct timespec64 ts64;
  1179. unsigned long seq;
  1180. s64 nsecs;
  1181. do {
  1182. seq = read_seqcount_begin(&tk_core.seq);
  1183. nsecs = timekeeping_get_ns(&tk->tkr_raw);
  1184. ts64 = tk->raw_time;
  1185. } while (read_seqcount_retry(&tk_core.seq, seq));
  1186. timespec64_add_ns(&ts64, nsecs);
  1187. *ts = ts64;
  1188. }
  1189. EXPORT_SYMBOL(getrawmonotonic64);
  1190. /**
  1191. * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
  1192. */
  1193. int timekeeping_valid_for_hres(void)
  1194. {
  1195. struct timekeeper *tk = &tk_core.timekeeper;
  1196. unsigned long seq;
  1197. int ret;
  1198. do {
  1199. seq = read_seqcount_begin(&tk_core.seq);
  1200. ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
  1201. } while (read_seqcount_retry(&tk_core.seq, seq));
  1202. return ret;
  1203. }
  1204. /**
  1205. * timekeeping_max_deferment - Returns max time the clocksource can be deferred
  1206. */
  1207. u64 timekeeping_max_deferment(void)
  1208. {
  1209. struct timekeeper *tk = &tk_core.timekeeper;
  1210. unsigned long seq;
  1211. u64 ret;
  1212. do {
  1213. seq = read_seqcount_begin(&tk_core.seq);
  1214. ret = tk->tkr_mono.clock->max_idle_ns;
  1215. } while (read_seqcount_retry(&tk_core.seq, seq));
  1216. return ret;
  1217. }
  1218. /**
  1219. * read_persistent_clock - Return time from the persistent clock.
  1220. *
  1221. * Weak dummy function for arches that do not yet support it.
  1222. * Reads the time from the battery backed persistent clock.
  1223. * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
  1224. *
  1225. * XXX - Do be sure to remove it once all arches implement it.
  1226. */
  1227. void __weak read_persistent_clock(struct timespec *ts)
  1228. {
  1229. ts->tv_sec = 0;
  1230. ts->tv_nsec = 0;
  1231. }
  1232. void __weak read_persistent_clock64(struct timespec64 *ts64)
  1233. {
  1234. struct timespec ts;
  1235. read_persistent_clock(&ts);
  1236. *ts64 = timespec_to_timespec64(ts);
  1237. }
  1238. /**
  1239. * read_boot_clock64 - Return time of the system start.
  1240. *
  1241. * Weak dummy function for arches that do not yet support it.
  1242. * Function to read the exact time the system has been started.
  1243. * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported.
  1244. *
  1245. * XXX - Do be sure to remove it once all arches implement it.
  1246. */
  1247. void __weak read_boot_clock64(struct timespec64 *ts)
  1248. {
  1249. ts->tv_sec = 0;
  1250. ts->tv_nsec = 0;
  1251. }
  1252. /* Flag for if timekeeping_resume() has injected sleeptime */
  1253. static bool sleeptime_injected;
  1254. /* Flag for if there is a persistent clock on this platform */
  1255. static bool persistent_clock_exists;
  1256. /*
  1257. * timekeeping_init - Initializes the clocksource and common timekeeping values
  1258. */
  1259. void __init timekeeping_init(void)
  1260. {
  1261. struct timekeeper *tk = &tk_core.timekeeper;
  1262. struct clocksource *clock;
  1263. unsigned long flags;
  1264. struct timespec64 now, boot, tmp;
  1265. read_persistent_clock64(&now);
  1266. if (!timespec64_valid_strict(&now)) {
  1267. pr_warn("WARNING: Persistent clock returned invalid value!\n"
  1268. " Check your CMOS/BIOS settings.\n");
  1269. now.tv_sec = 0;
  1270. now.tv_nsec = 0;
  1271. } else if (now.tv_sec || now.tv_nsec)
  1272. persistent_clock_exists = true;
  1273. read_boot_clock64(&boot);
  1274. if (!timespec64_valid_strict(&boot)) {
  1275. pr_warn("WARNING: Boot clock returned invalid value!\n"
  1276. " Check your CMOS/BIOS settings.\n");
  1277. boot.tv_sec = 0;
  1278. boot.tv_nsec = 0;
  1279. }
  1280. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1281. write_seqcount_begin(&tk_core.seq);
  1282. ntp_init();
  1283. clock = clocksource_default_clock();
  1284. if (clock->enable)
  1285. clock->enable(clock);
  1286. tk_setup_internals(tk, clock);
  1287. tk_set_xtime(tk, &now);
  1288. tk->raw_time.tv_sec = 0;
  1289. tk->raw_time.tv_nsec = 0;
  1290. if (boot.tv_sec == 0 && boot.tv_nsec == 0)
  1291. boot = tk_xtime(tk);
  1292. set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
  1293. tk_set_wall_to_mono(tk, tmp);
  1294. timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
  1295. write_seqcount_end(&tk_core.seq);
  1296. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1297. }
  1298. /* time in seconds when suspend began for persistent clock */
  1299. static struct timespec64 timekeeping_suspend_time;
  1300. /**
  1301. * __timekeeping_inject_sleeptime - Internal function to add sleep interval
  1302. * @delta: pointer to a timespec delta value
  1303. *
  1304. * Takes a timespec offset measuring a suspend interval and properly
  1305. * adds the sleep offset to the timekeeping variables.
  1306. */
  1307. static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
  1308. struct timespec64 *delta)
  1309. {
  1310. if (!timespec64_valid_strict(delta)) {
  1311. printk_deferred(KERN_WARNING
  1312. "__timekeeping_inject_sleeptime: Invalid "
  1313. "sleep delta value!\n");
  1314. return;
  1315. }
  1316. tk_xtime_add(tk, delta);
  1317. tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
  1318. tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
  1319. tk_debug_account_sleep_time(delta);
  1320. }
  1321. #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
  1322. /**
  1323. * We have three kinds of time sources to use for sleep time
  1324. * injection, the preference order is:
  1325. * 1) non-stop clocksource
  1326. * 2) persistent clock (ie: RTC accessible when irqs are off)
  1327. * 3) RTC
  1328. *
  1329. * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
  1330. * If system has neither 1) nor 2), 3) will be used finally.
  1331. *
  1332. *
  1333. * If timekeeping has injected sleeptime via either 1) or 2),
  1334. * 3) becomes needless, so in this case we don't need to call
  1335. * rtc_resume(), and this is what timekeeping_rtc_skipresume()
  1336. * means.
  1337. */
  1338. bool timekeeping_rtc_skipresume(void)
  1339. {
  1340. return sleeptime_injected;
  1341. }
  1342. /**
  1343. * 1) can be determined whether to use or not only when doing
  1344. * timekeeping_resume() which is invoked after rtc_suspend(),
  1345. * so we can't skip rtc_suspend() surely if system has 1).
  1346. *
  1347. * But if system has 2), 2) will definitely be used, so in this
  1348. * case we don't need to call rtc_suspend(), and this is what
  1349. * timekeeping_rtc_skipsuspend() means.
  1350. */
  1351. bool timekeeping_rtc_skipsuspend(void)
  1352. {
  1353. return persistent_clock_exists;
  1354. }
  1355. /**
  1356. * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
  1357. * @delta: pointer to a timespec64 delta value
  1358. *
  1359. * This hook is for architectures that cannot support read_persistent_clock64
  1360. * because their RTC/persistent clock is only accessible when irqs are enabled.
  1361. * and also don't have an effective nonstop clocksource.
  1362. *
  1363. * This function should only be called by rtc_resume(), and allows
  1364. * a suspend offset to be injected into the timekeeping values.
  1365. */
  1366. void timekeeping_inject_sleeptime64(struct timespec64 *delta)
  1367. {
  1368. struct timekeeper *tk = &tk_core.timekeeper;
  1369. unsigned long flags;
  1370. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1371. write_seqcount_begin(&tk_core.seq);
  1372. timekeeping_forward_now(tk);
  1373. __timekeeping_inject_sleeptime(tk, delta);
  1374. timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
  1375. write_seqcount_end(&tk_core.seq);
  1376. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1377. /* signal hrtimers about time change */
  1378. clock_was_set();
  1379. }
  1380. #endif
  1381. /**
  1382. * timekeeping_resume - Resumes the generic timekeeping subsystem.
  1383. */
  1384. void timekeeping_resume(void)
  1385. {
  1386. struct timekeeper *tk = &tk_core.timekeeper;
  1387. struct clocksource *clock = tk->tkr_mono.clock;
  1388. unsigned long flags;
  1389. struct timespec64 ts_new, ts_delta;
  1390. cycle_t cycle_now, cycle_delta;
  1391. sleeptime_injected = false;
  1392. read_persistent_clock64(&ts_new);
  1393. clockevents_resume();
  1394. clocksource_resume();
  1395. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1396. write_seqcount_begin(&tk_core.seq);
  1397. /*
  1398. * After system resumes, we need to calculate the suspended time and
  1399. * compensate it for the OS time. There are 3 sources that could be
  1400. * used: Nonstop clocksource during suspend, persistent clock and rtc
  1401. * device.
  1402. *
  1403. * One specific platform may have 1 or 2 or all of them, and the
  1404. * preference will be:
  1405. * suspend-nonstop clocksource -> persistent clock -> rtc
  1406. * The less preferred source will only be tried if there is no better
  1407. * usable source. The rtc part is handled separately in rtc core code.
  1408. */
  1409. cycle_now = tk_clock_read(&tk->tkr_mono);
  1410. if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
  1411. cycle_now > tk->tkr_mono.cycle_last) {
  1412. u64 num, max = ULLONG_MAX;
  1413. u32 mult = clock->mult;
  1414. u32 shift = clock->shift;
  1415. s64 nsec = 0;
  1416. cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
  1417. tk->tkr_mono.mask);
  1418. /*
  1419. * "cycle_delta * mutl" may cause 64 bits overflow, if the
  1420. * suspended time is too long. In that case we need do the
  1421. * 64 bits math carefully
  1422. */
  1423. do_div(max, mult);
  1424. if (cycle_delta > max) {
  1425. num = div64_u64(cycle_delta, max);
  1426. nsec = (((u64) max * mult) >> shift) * num;
  1427. cycle_delta -= num * max;
  1428. }
  1429. nsec += ((u64) cycle_delta * mult) >> shift;
  1430. ts_delta = ns_to_timespec64(nsec);
  1431. sleeptime_injected = true;
  1432. } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
  1433. ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
  1434. sleeptime_injected = true;
  1435. }
  1436. if (sleeptime_injected)
  1437. __timekeeping_inject_sleeptime(tk, &ts_delta);
  1438. /* Re-base the last cycle value */
  1439. tk->tkr_mono.cycle_last = cycle_now;
  1440. tk->tkr_raw.cycle_last = cycle_now;
  1441. tk->ntp_error = 0;
  1442. timekeeping_suspended = 0;
  1443. timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
  1444. write_seqcount_end(&tk_core.seq);
  1445. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1446. touch_softlockup_watchdog();
  1447. tick_resume();
  1448. hrtimers_resume();
  1449. }
  1450. int timekeeping_suspend(void)
  1451. {
  1452. struct timekeeper *tk = &tk_core.timekeeper;
  1453. unsigned long flags;
  1454. struct timespec64 delta, delta_delta;
  1455. static struct timespec64 old_delta;
  1456. read_persistent_clock64(&timekeeping_suspend_time);
  1457. /*
  1458. * On some systems the persistent_clock can not be detected at
  1459. * timekeeping_init by its return value, so if we see a valid
  1460. * value returned, update the persistent_clock_exists flag.
  1461. */
  1462. if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
  1463. persistent_clock_exists = true;
  1464. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1465. write_seqcount_begin(&tk_core.seq);
  1466. timekeeping_forward_now(tk);
  1467. timekeeping_suspended = 1;
  1468. if (persistent_clock_exists) {
  1469. /*
  1470. * To avoid drift caused by repeated suspend/resumes,
  1471. * which each can add ~1 second drift error,
  1472. * try to compensate so the difference in system time
  1473. * and persistent_clock time stays close to constant.
  1474. */
  1475. delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
  1476. delta_delta = timespec64_sub(delta, old_delta);
  1477. if (abs(delta_delta.tv_sec) >= 2) {
  1478. /*
  1479. * if delta_delta is too large, assume time correction
  1480. * has occurred and set old_delta to the current delta.
  1481. */
  1482. old_delta = delta;
  1483. } else {
  1484. /* Otherwise try to adjust old_system to compensate */
  1485. timekeeping_suspend_time =
  1486. timespec64_add(timekeeping_suspend_time, delta_delta);
  1487. }
  1488. }
  1489. timekeeping_update(tk, TK_MIRROR);
  1490. halt_fast_timekeeper(tk);
  1491. write_seqcount_end(&tk_core.seq);
  1492. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1493. tick_suspend();
  1494. clocksource_suspend();
  1495. clockevents_suspend();
  1496. return 0;
  1497. }
  1498. /* sysfs resume/suspend bits for timekeeping */
  1499. static struct syscore_ops timekeeping_syscore_ops = {
  1500. .resume = timekeeping_resume,
  1501. .suspend = timekeeping_suspend,
  1502. };
  1503. static int __init timekeeping_init_ops(void)
  1504. {
  1505. register_syscore_ops(&timekeeping_syscore_ops);
  1506. return 0;
  1507. }
  1508. device_initcall(timekeeping_init_ops);
  1509. /*
  1510. * Apply a multiplier adjustment to the timekeeper
  1511. */
  1512. static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
  1513. s64 offset,
  1514. bool negative,
  1515. int adj_scale)
  1516. {
  1517. s64 interval = tk->cycle_interval;
  1518. s32 mult_adj = 1;
  1519. if (negative) {
  1520. mult_adj = -mult_adj;
  1521. interval = -interval;
  1522. offset = -offset;
  1523. }
  1524. mult_adj <<= adj_scale;
  1525. interval <<= adj_scale;
  1526. offset <<= adj_scale;
  1527. /*
  1528. * So the following can be confusing.
  1529. *
  1530. * To keep things simple, lets assume mult_adj == 1 for now.
  1531. *
  1532. * When mult_adj != 1, remember that the interval and offset values
  1533. * have been appropriately scaled so the math is the same.
  1534. *
  1535. * The basic idea here is that we're increasing the multiplier
  1536. * by one, this causes the xtime_interval to be incremented by
  1537. * one cycle_interval. This is because:
  1538. * xtime_interval = cycle_interval * mult
  1539. * So if mult is being incremented by one:
  1540. * xtime_interval = cycle_interval * (mult + 1)
  1541. * Its the same as:
  1542. * xtime_interval = (cycle_interval * mult) + cycle_interval
  1543. * Which can be shortened to:
  1544. * xtime_interval += cycle_interval
  1545. *
  1546. * So offset stores the non-accumulated cycles. Thus the current
  1547. * time (in shifted nanoseconds) is:
  1548. * now = (offset * adj) + xtime_nsec
  1549. * Now, even though we're adjusting the clock frequency, we have
  1550. * to keep time consistent. In other words, we can't jump back
  1551. * in time, and we also want to avoid jumping forward in time.
  1552. *
  1553. * So given the same offset value, we need the time to be the same
  1554. * both before and after the freq adjustment.
  1555. * now = (offset * adj_1) + xtime_nsec_1
  1556. * now = (offset * adj_2) + xtime_nsec_2
  1557. * So:
  1558. * (offset * adj_1) + xtime_nsec_1 =
  1559. * (offset * adj_2) + xtime_nsec_2
  1560. * And we know:
  1561. * adj_2 = adj_1 + 1
  1562. * So:
  1563. * (offset * adj_1) + xtime_nsec_1 =
  1564. * (offset * (adj_1+1)) + xtime_nsec_2
  1565. * (offset * adj_1) + xtime_nsec_1 =
  1566. * (offset * adj_1) + offset + xtime_nsec_2
  1567. * Canceling the sides:
  1568. * xtime_nsec_1 = offset + xtime_nsec_2
  1569. * Which gives us:
  1570. * xtime_nsec_2 = xtime_nsec_1 - offset
  1571. * Which simplfies to:
  1572. * xtime_nsec -= offset
  1573. *
  1574. * XXX - TODO: Doc ntp_error calculation.
  1575. */
  1576. if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
  1577. /* NTP adjustment caused clocksource mult overflow */
  1578. WARN_ON_ONCE(1);
  1579. return;
  1580. }
  1581. tk->tkr_mono.mult += mult_adj;
  1582. tk->xtime_interval += interval;
  1583. tk->tkr_mono.xtime_nsec -= offset;
  1584. tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
  1585. }
  1586. /*
  1587. * Calculate the multiplier adjustment needed to match the frequency
  1588. * specified by NTP
  1589. */
  1590. static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
  1591. s64 offset)
  1592. {
  1593. s64 interval = tk->cycle_interval;
  1594. s64 xinterval = tk->xtime_interval;
  1595. u32 base = tk->tkr_mono.clock->mult;
  1596. u32 max = tk->tkr_mono.clock->maxadj;
  1597. u32 cur_adj = tk->tkr_mono.mult;
  1598. s64 tick_error;
  1599. bool negative;
  1600. u32 adj_scale;
  1601. /* Remove any current error adj from freq calculation */
  1602. if (tk->ntp_err_mult)
  1603. xinterval -= tk->cycle_interval;
  1604. tk->ntp_tick = ntp_tick_length();
  1605. /* Calculate current error per tick */
  1606. tick_error = ntp_tick_length() >> tk->ntp_error_shift;
  1607. tick_error -= (xinterval + tk->xtime_remainder);
  1608. /* Don't worry about correcting it if its small */
  1609. if (likely((tick_error >= 0) && (tick_error <= interval)))
  1610. return;
  1611. /* preserve the direction of correction */
  1612. negative = (tick_error < 0);
  1613. /* If any adjustment would pass the max, just return */
  1614. if (negative && (cur_adj - 1) <= (base - max))
  1615. return;
  1616. if (!negative && (cur_adj + 1) >= (base + max))
  1617. return;
  1618. /*
  1619. * Sort out the magnitude of the correction, but
  1620. * avoid making so large a correction that we go
  1621. * over the max adjustment.
  1622. */
  1623. adj_scale = 0;
  1624. tick_error = abs(tick_error);
  1625. while (tick_error > interval) {
  1626. u32 adj = 1 << (adj_scale + 1);
  1627. /* Check if adjustment gets us within 1 unit from the max */
  1628. if (negative && (cur_adj - adj) <= (base - max))
  1629. break;
  1630. if (!negative && (cur_adj + adj) >= (base + max))
  1631. break;
  1632. adj_scale++;
  1633. tick_error >>= 1;
  1634. }
  1635. /* scale the corrections */
  1636. timekeeping_apply_adjustment(tk, offset, negative, adj_scale);
  1637. }
  1638. /*
  1639. * Adjust the timekeeper's multiplier to the correct frequency
  1640. * and also to reduce the accumulated error value.
  1641. */
  1642. static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
  1643. {
  1644. /* Correct for the current frequency error */
  1645. timekeeping_freqadjust(tk, offset);
  1646. /* Next make a small adjustment to fix any cumulative error */
  1647. if (!tk->ntp_err_mult && (tk->ntp_error > 0)) {
  1648. tk->ntp_err_mult = 1;
  1649. timekeeping_apply_adjustment(tk, offset, 0, 0);
  1650. } else if (tk->ntp_err_mult && (tk->ntp_error <= 0)) {
  1651. /* Undo any existing error adjustment */
  1652. timekeeping_apply_adjustment(tk, offset, 1, 0);
  1653. tk->ntp_err_mult = 0;
  1654. }
  1655. if (unlikely(tk->tkr_mono.clock->maxadj &&
  1656. (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
  1657. > tk->tkr_mono.clock->maxadj))) {
  1658. printk_once(KERN_WARNING
  1659. "Adjusting %s more than 11%% (%ld vs %ld)\n",
  1660. tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
  1661. (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
  1662. }
  1663. /*
  1664. * It may be possible that when we entered this function, xtime_nsec
  1665. * was very small. Further, if we're slightly speeding the clocksource
  1666. * in the code above, its possible the required corrective factor to
  1667. * xtime_nsec could cause it to underflow.
  1668. *
  1669. * Now, since we already accumulated the second, cannot simply roll
  1670. * the accumulated second back, since the NTP subsystem has been
  1671. * notified via second_overflow. So instead we push xtime_nsec forward
  1672. * by the amount we underflowed, and add that amount into the error.
  1673. *
  1674. * We'll correct this error next time through this function, when
  1675. * xtime_nsec is not as small.
  1676. */
  1677. if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
  1678. s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
  1679. tk->tkr_mono.xtime_nsec = 0;
  1680. tk->ntp_error += neg << tk->ntp_error_shift;
  1681. }
  1682. }
  1683. /**
  1684. * accumulate_nsecs_to_secs - Accumulates nsecs into secs
  1685. *
  1686. * Helper function that accumulates the nsecs greater than a second
  1687. * from the xtime_nsec field to the xtime_secs field.
  1688. * It also calls into the NTP code to handle leapsecond processing.
  1689. *
  1690. */
  1691. static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
  1692. {
  1693. u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
  1694. unsigned int clock_set = 0;
  1695. while (tk->tkr_mono.xtime_nsec >= nsecps) {
  1696. int leap;
  1697. tk->tkr_mono.xtime_nsec -= nsecps;
  1698. tk->xtime_sec++;
  1699. /* Figure out if its a leap sec and apply if needed */
  1700. leap = second_overflow(tk->xtime_sec);
  1701. if (unlikely(leap)) {
  1702. struct timespec64 ts;
  1703. tk->xtime_sec += leap;
  1704. ts.tv_sec = leap;
  1705. ts.tv_nsec = 0;
  1706. tk_set_wall_to_mono(tk,
  1707. timespec64_sub(tk->wall_to_monotonic, ts));
  1708. __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
  1709. clock_set = TK_CLOCK_WAS_SET;
  1710. }
  1711. }
  1712. return clock_set;
  1713. }
  1714. /**
  1715. * logarithmic_accumulation - shifted accumulation of cycles
  1716. *
  1717. * This functions accumulates a shifted interval of cycles into
  1718. * into a shifted interval nanoseconds. Allows for O(log) accumulation
  1719. * loop.
  1720. *
  1721. * Returns the unconsumed cycles.
  1722. */
  1723. static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
  1724. u32 shift,
  1725. unsigned int *clock_set)
  1726. {
  1727. cycle_t interval = tk->cycle_interval << shift;
  1728. u64 snsec_per_sec;
  1729. /* If the offset is smaller than a shifted interval, do nothing */
  1730. if (offset < interval)
  1731. return offset;
  1732. /* Accumulate one shifted interval */
  1733. offset -= interval;
  1734. tk->tkr_mono.cycle_last += interval;
  1735. tk->tkr_raw.cycle_last += interval;
  1736. tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
  1737. *clock_set |= accumulate_nsecs_to_secs(tk);
  1738. /* Accumulate raw time */
  1739. tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
  1740. tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
  1741. snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
  1742. while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
  1743. tk->tkr_raw.xtime_nsec -= snsec_per_sec;
  1744. tk->raw_time.tv_sec++;
  1745. }
  1746. tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
  1747. tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec << tk->tkr_raw.shift;
  1748. /* Accumulate error between NTP and clock interval */
  1749. tk->ntp_error += tk->ntp_tick << shift;
  1750. tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
  1751. (tk->ntp_error_shift + shift);
  1752. return offset;
  1753. }
  1754. /**
  1755. * update_wall_time - Uses the current clocksource to increment the wall time
  1756. *
  1757. */
  1758. void update_wall_time(void)
  1759. {
  1760. struct timekeeper *real_tk = &tk_core.timekeeper;
  1761. struct timekeeper *tk = &shadow_timekeeper;
  1762. cycle_t offset;
  1763. int shift = 0, maxshift;
  1764. unsigned int clock_set = 0;
  1765. unsigned long flags;
  1766. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1767. /* Make sure we're fully resumed: */
  1768. if (unlikely(timekeeping_suspended))
  1769. goto out;
  1770. #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
  1771. offset = real_tk->cycle_interval;
  1772. #else
  1773. offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
  1774. tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
  1775. #endif
  1776. /* Check if there's really nothing to do */
  1777. if (offset < real_tk->cycle_interval)
  1778. goto out;
  1779. /* Do some additional sanity checking */
  1780. timekeeping_check_update(real_tk, offset);
  1781. /*
  1782. * With NO_HZ we may have to accumulate many cycle_intervals
  1783. * (think "ticks") worth of time at once. To do this efficiently,
  1784. * we calculate the largest doubling multiple of cycle_intervals
  1785. * that is smaller than the offset. We then accumulate that
  1786. * chunk in one go, and then try to consume the next smaller
  1787. * doubled multiple.
  1788. */
  1789. shift = ilog2(offset) - ilog2(tk->cycle_interval);
  1790. shift = max(0, shift);
  1791. /* Bound shift to one less than what overflows tick_length */
  1792. maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
  1793. shift = min(shift, maxshift);
  1794. while (offset >= tk->cycle_interval) {
  1795. offset = logarithmic_accumulation(tk, offset, shift,
  1796. &clock_set);
  1797. if (offset < tk->cycle_interval<<shift)
  1798. shift--;
  1799. }
  1800. /* correct the clock when NTP error is too big */
  1801. timekeeping_adjust(tk, offset);
  1802. /*
  1803. * XXX This can be killed once everyone converts
  1804. * to the new update_vsyscall.
  1805. */
  1806. old_vsyscall_fixup(tk);
  1807. /*
  1808. * Finally, make sure that after the rounding
  1809. * xtime_nsec isn't larger than NSEC_PER_SEC
  1810. */
  1811. clock_set |= accumulate_nsecs_to_secs(tk);
  1812. write_seqcount_begin(&tk_core.seq);
  1813. /*
  1814. * Update the real timekeeper.
  1815. *
  1816. * We could avoid this memcpy by switching pointers, but that
  1817. * requires changes to all other timekeeper usage sites as
  1818. * well, i.e. move the timekeeper pointer getter into the
  1819. * spinlocked/seqcount protected sections. And we trade this
  1820. * memcpy under the tk_core.seq against one before we start
  1821. * updating.
  1822. */
  1823. timekeeping_update(tk, clock_set);
  1824. memcpy(real_tk, tk, sizeof(*tk));
  1825. /* The memcpy must come last. Do not put anything here! */
  1826. write_seqcount_end(&tk_core.seq);
  1827. out:
  1828. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1829. if (clock_set)
  1830. /* Have to call _delayed version, since in irq context*/
  1831. clock_was_set_delayed();
  1832. }
  1833. /**
  1834. * getboottime64 - Return the real time of system boot.
  1835. * @ts: pointer to the timespec64 to be set
  1836. *
  1837. * Returns the wall-time of boot in a timespec64.
  1838. *
  1839. * This is based on the wall_to_monotonic offset and the total suspend
  1840. * time. Calls to settimeofday will affect the value returned (which
  1841. * basically means that however wrong your real time clock is at boot time,
  1842. * you get the right time here).
  1843. */
  1844. void getboottime64(struct timespec64 *ts)
  1845. {
  1846. struct timekeeper *tk = &tk_core.timekeeper;
  1847. ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
  1848. *ts = ktime_to_timespec64(t);
  1849. }
  1850. EXPORT_SYMBOL_GPL(getboottime64);
  1851. unsigned long get_seconds(void)
  1852. {
  1853. struct timekeeper *tk = &tk_core.timekeeper;
  1854. return tk->xtime_sec;
  1855. }
  1856. EXPORT_SYMBOL(get_seconds);
  1857. struct timespec __current_kernel_time(void)
  1858. {
  1859. struct timekeeper *tk = &tk_core.timekeeper;
  1860. return timespec64_to_timespec(tk_xtime(tk));
  1861. }
  1862. struct timespec64 current_kernel_time64(void)
  1863. {
  1864. struct timekeeper *tk = &tk_core.timekeeper;
  1865. struct timespec64 now;
  1866. unsigned long seq;
  1867. do {
  1868. seq = read_seqcount_begin(&tk_core.seq);
  1869. now = tk_xtime(tk);
  1870. } while (read_seqcount_retry(&tk_core.seq, seq));
  1871. return now;
  1872. }
  1873. EXPORT_SYMBOL(current_kernel_time64);
  1874. struct timespec64 get_monotonic_coarse64(void)
  1875. {
  1876. struct timekeeper *tk = &tk_core.timekeeper;
  1877. struct timespec64 now, mono;
  1878. unsigned long seq;
  1879. do {
  1880. seq = read_seqcount_begin(&tk_core.seq);
  1881. now = tk_xtime(tk);
  1882. mono = tk->wall_to_monotonic;
  1883. } while (read_seqcount_retry(&tk_core.seq, seq));
  1884. set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec,
  1885. now.tv_nsec + mono.tv_nsec);
  1886. return now;
  1887. }
  1888. EXPORT_SYMBOL(get_monotonic_coarse64);
  1889. /*
  1890. * Must hold jiffies_lock
  1891. */
  1892. void do_timer(unsigned long ticks)
  1893. {
  1894. jiffies_64 += ticks;
  1895. calc_global_load(ticks);
  1896. }
  1897. /**
  1898. * ktime_get_update_offsets_now - hrtimer helper
  1899. * @cwsseq: pointer to check and store the clock was set sequence number
  1900. * @offs_real: pointer to storage for monotonic -> realtime offset
  1901. * @offs_boot: pointer to storage for monotonic -> boottime offset
  1902. * @offs_tai: pointer to storage for monotonic -> clock tai offset
  1903. *
  1904. * Returns current monotonic time and updates the offsets if the
  1905. * sequence number in @cwsseq and timekeeper.clock_was_set_seq are
  1906. * different.
  1907. *
  1908. * Called from hrtimer_interrupt() or retrigger_next_event()
  1909. */
  1910. ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
  1911. ktime_t *offs_boot, ktime_t *offs_tai)
  1912. {
  1913. struct timekeeper *tk = &tk_core.timekeeper;
  1914. unsigned int seq;
  1915. ktime_t base;
  1916. u64 nsecs;
  1917. do {
  1918. seq = read_seqcount_begin(&tk_core.seq);
  1919. base = tk->tkr_mono.base;
  1920. nsecs = timekeeping_get_ns(&tk->tkr_mono);
  1921. base = ktime_add_ns(base, nsecs);
  1922. if (*cwsseq != tk->clock_was_set_seq) {
  1923. *cwsseq = tk->clock_was_set_seq;
  1924. *offs_real = tk->offs_real;
  1925. *offs_boot = tk->offs_boot;
  1926. *offs_tai = tk->offs_tai;
  1927. }
  1928. /* Handle leapsecond insertion adjustments */
  1929. if (unlikely(base.tv64 >= tk->next_leap_ktime.tv64))
  1930. *offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
  1931. } while (read_seqcount_retry(&tk_core.seq, seq));
  1932. return base;
  1933. }
  1934. /**
  1935. * do_adjtimex() - Accessor function to NTP __do_adjtimex function
  1936. */
  1937. int do_adjtimex(struct timex *txc)
  1938. {
  1939. struct timekeeper *tk = &tk_core.timekeeper;
  1940. unsigned long flags;
  1941. struct timespec64 ts;
  1942. s32 orig_tai, tai;
  1943. int ret;
  1944. /* Validate the data before disabling interrupts */
  1945. ret = ntp_validate_timex(txc);
  1946. if (ret)
  1947. return ret;
  1948. if (txc->modes & ADJ_SETOFFSET) {
  1949. struct timespec delta;
  1950. delta.tv_sec = txc->time.tv_sec;
  1951. delta.tv_nsec = txc->time.tv_usec;
  1952. if (!(txc->modes & ADJ_NANO))
  1953. delta.tv_nsec *= 1000;
  1954. ret = timekeeping_inject_offset(&delta);
  1955. if (ret)
  1956. return ret;
  1957. }
  1958. getnstimeofday64(&ts);
  1959. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1960. write_seqcount_begin(&tk_core.seq);
  1961. orig_tai = tai = tk->tai_offset;
  1962. ret = __do_adjtimex(txc, &ts, &tai);
  1963. if (tai != orig_tai) {
  1964. __timekeeping_set_tai_offset(tk, tai);
  1965. timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
  1966. }
  1967. tk_update_leap_state(tk);
  1968. write_seqcount_end(&tk_core.seq);
  1969. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1970. if (tai != orig_tai)
  1971. clock_was_set();
  1972. ntp_notify_cmos_timer();
  1973. return ret;
  1974. }
  1975. #ifdef CONFIG_NTP_PPS
  1976. /**
  1977. * hardpps() - Accessor function to NTP __hardpps function
  1978. */
  1979. void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
  1980. {
  1981. unsigned long flags;
  1982. raw_spin_lock_irqsave(&timekeeper_lock, flags);
  1983. write_seqcount_begin(&tk_core.seq);
  1984. __hardpps(phase_ts, raw_ts);
  1985. write_seqcount_end(&tk_core.seq);
  1986. raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  1987. }
  1988. EXPORT_SYMBOL(hardpps);
  1989. #endif
  1990. /**
  1991. * xtime_update() - advances the timekeeping infrastructure
  1992. * @ticks: number of ticks, that have elapsed since the last call.
  1993. *
  1994. * Must be called with interrupts disabled.
  1995. */
  1996. void xtime_update(unsigned long ticks)
  1997. {
  1998. write_seqlock(&jiffies_lock);
  1999. do_timer(ticks);
  2000. write_sequnlock(&jiffies_lock);
  2001. update_wall_time();
  2002. }