atomic.S 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946
  1. /*
  2. * Copyright 2007-2008 Analog Devices Inc.
  3. * Philippe Gerum <rpm@xenomai.org>
  4. *
  5. * Licensed under the GPL-2 or later.
  6. */
  7. #include <linux/linkage.h>
  8. #include <asm/blackfin.h>
  9. #include <asm/cache.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/rwlock.h>
  12. #include <asm/cplb.h>
  13. .text
  14. .macro coreslot_loadaddr reg:req
  15. \reg\().l = _corelock;
  16. \reg\().h = _corelock;
  17. .endm
  18. .macro safe_testset addr:req, scratch:req
  19. #if ANOMALY_05000477
  20. cli \scratch;
  21. testset (\addr);
  22. sti \scratch;
  23. #else
  24. testset (\addr);
  25. #endif
  26. .endm
  27. /*
  28. * r0 = address of atomic data to flush and invalidate (32bit).
  29. *
  30. * Clear interrupts and return the old mask.
  31. * We assume that no atomic data can span cachelines.
  32. *
  33. * Clobbers: r2:0, p0
  34. */
  35. ENTRY(_get_core_lock)
  36. r1 = -L1_CACHE_BYTES;
  37. r1 = r0 & r1;
  38. cli r0;
  39. coreslot_loadaddr p0;
  40. .Lretry_corelock:
  41. safe_testset p0, r2;
  42. if cc jump .Ldone_corelock;
  43. SSYNC(r2);
  44. jump .Lretry_corelock
  45. .Ldone_corelock:
  46. p0 = r1;
  47. /* flush core internal write buffer before invalidate dcache */
  48. CSYNC(r2);
  49. flushinv[p0];
  50. SSYNC(r2);
  51. rts;
  52. ENDPROC(_get_core_lock)
  53. /*
  54. * r0 = address of atomic data in uncacheable memory region (32bit).
  55. *
  56. * Clear interrupts and return the old mask.
  57. *
  58. * Clobbers: r0, p0
  59. */
  60. ENTRY(_get_core_lock_noflush)
  61. cli r0;
  62. coreslot_loadaddr p0;
  63. .Lretry_corelock_noflush:
  64. safe_testset p0, r2;
  65. if cc jump .Ldone_corelock_noflush;
  66. SSYNC(r2);
  67. jump .Lretry_corelock_noflush
  68. .Ldone_corelock_noflush:
  69. /*
  70. * SMP kgdb runs into dead loop without NOP here, when one core
  71. * single steps over get_core_lock_noflush and the other executes
  72. * get_core_lock as a slave node.
  73. */
  74. nop;
  75. CSYNC(r2);
  76. rts;
  77. ENDPROC(_get_core_lock_noflush)
  78. /*
  79. * r0 = interrupt mask to restore.
  80. * r1 = address of atomic data to flush and invalidate (32bit).
  81. *
  82. * Interrupts are masked on entry (see _get_core_lock).
  83. * Clobbers: r2:0, p0
  84. */
  85. ENTRY(_put_core_lock)
  86. /* Write-through cache assumed, so no flush needed here. */
  87. coreslot_loadaddr p0;
  88. r1 = 0;
  89. [p0] = r1;
  90. SSYNC(r2);
  91. sti r0;
  92. rts;
  93. ENDPROC(_put_core_lock)
  94. #ifdef __ARCH_SYNC_CORE_DCACHE
  95. ENTRY(___raw_smp_mark_barrier_asm)
  96. [--sp] = rets;
  97. [--sp] = ( r7:5 );
  98. [--sp] = r0;
  99. [--sp] = p1;
  100. [--sp] = p0;
  101. call _get_core_lock_noflush;
  102. /*
  103. * Calculate current core mask
  104. */
  105. GET_CPUID(p1, r7);
  106. r6 = 1;
  107. r6 <<= r7;
  108. /*
  109. * Set bit of other cores in barrier mask. Don't change current core bit.
  110. */
  111. p1.l = _barrier_mask;
  112. p1.h = _barrier_mask;
  113. r7 = [p1];
  114. r5 = r7 & r6;
  115. r7 = ~r6;
  116. cc = r5 == 0;
  117. if cc jump 1f;
  118. r7 = r7 | r6;
  119. 1:
  120. [p1] = r7;
  121. SSYNC(r2);
  122. call _put_core_lock;
  123. p0 = [sp++];
  124. p1 = [sp++];
  125. r0 = [sp++];
  126. ( r7:5 ) = [sp++];
  127. rets = [sp++];
  128. rts;
  129. ENDPROC(___raw_smp_mark_barrier_asm)
  130. ENTRY(___raw_smp_check_barrier_asm)
  131. [--sp] = rets;
  132. [--sp] = ( r7:5 );
  133. [--sp] = r0;
  134. [--sp] = p1;
  135. [--sp] = p0;
  136. call _get_core_lock_noflush;
  137. /*
  138. * Calculate current core mask
  139. */
  140. GET_CPUID(p1, r7);
  141. r6 = 1;
  142. r6 <<= r7;
  143. /*
  144. * Clear current core bit in barrier mask if it is set.
  145. */
  146. p1.l = _barrier_mask;
  147. p1.h = _barrier_mask;
  148. r7 = [p1];
  149. r5 = r7 & r6;
  150. cc = r5 == 0;
  151. if cc jump 1f;
  152. r6 = ~r6;
  153. r7 = r7 & r6;
  154. [p1] = r7;
  155. SSYNC(r2);
  156. call _put_core_lock;
  157. /*
  158. * Invalidate the entire D-cache of current core.
  159. */
  160. sp += -12;
  161. call _resync_core_dcache
  162. sp += 12;
  163. jump 2f;
  164. 1:
  165. call _put_core_lock;
  166. 2:
  167. p0 = [sp++];
  168. p1 = [sp++];
  169. r0 = [sp++];
  170. ( r7:5 ) = [sp++];
  171. rets = [sp++];
  172. rts;
  173. ENDPROC(___raw_smp_check_barrier_asm)
  174. /*
  175. * r0 = irqflags
  176. * r1 = address of atomic data
  177. *
  178. * Clobbers: r2:0, p1:0
  179. */
  180. _start_lock_coherent:
  181. [--sp] = rets;
  182. [--sp] = ( r7:6 );
  183. r7 = r0;
  184. p1 = r1;
  185. /*
  186. * Determine whether the atomic data was previously
  187. * owned by another CPU (=r6).
  188. */
  189. GET_CPUID(p0, r2);
  190. r1 = 1;
  191. r1 <<= r2;
  192. r2 = ~r1;
  193. r1 = [p1];
  194. r1 >>= 28; /* CPU fingerprints are stored in the high nibble. */
  195. r6 = r1 & r2;
  196. r1 = [p1];
  197. r1 <<= 4;
  198. r1 >>= 4;
  199. [p1] = r1;
  200. /*
  201. * Release the core lock now, but keep IRQs disabled while we are
  202. * performing the remaining housekeeping chores for the current CPU.
  203. */
  204. coreslot_loadaddr p0;
  205. r1 = 0;
  206. [p0] = r1;
  207. /*
  208. * If another CPU has owned the same atomic section before us,
  209. * then our D-cached copy of the shared data protected by the
  210. * current spin/write_lock may be obsolete.
  211. */
  212. cc = r6 == 0;
  213. if cc jump .Lcache_synced
  214. /*
  215. * Invalidate the entire D-cache of the current core.
  216. */
  217. sp += -12;
  218. call _resync_core_dcache
  219. sp += 12;
  220. .Lcache_synced:
  221. SSYNC(r2);
  222. sti r7;
  223. ( r7:6 ) = [sp++];
  224. rets = [sp++];
  225. rts
  226. /*
  227. * r0 = irqflags
  228. * r1 = address of atomic data
  229. *
  230. * Clobbers: r2:0, p1:0
  231. */
  232. _end_lock_coherent:
  233. p1 = r1;
  234. GET_CPUID(p0, r2);
  235. r2 += 28;
  236. r1 = 1;
  237. r1 <<= r2;
  238. r2 = [p1];
  239. r2 = r1 | r2;
  240. [p1] = r2;
  241. r1 = p1;
  242. jump _put_core_lock;
  243. #endif /* __ARCH_SYNC_CORE_DCACHE */
  244. /*
  245. * r0 = &spinlock->lock
  246. *
  247. * Clobbers: r3:0, p1:0
  248. */
  249. ENTRY(___raw_spin_is_locked_asm)
  250. p1 = r0;
  251. [--sp] = rets;
  252. call _get_core_lock;
  253. r3 = [p1];
  254. cc = bittst( r3, 0 );
  255. r3 = cc;
  256. r1 = p1;
  257. call _put_core_lock;
  258. rets = [sp++];
  259. r0 = r3;
  260. rts;
  261. ENDPROC(___raw_spin_is_locked_asm)
  262. /*
  263. * r0 = &spinlock->lock
  264. *
  265. * Clobbers: r3:0, p1:0
  266. */
  267. ENTRY(___raw_spin_lock_asm)
  268. p1 = r0;
  269. [--sp] = rets;
  270. .Lretry_spinlock:
  271. call _get_core_lock;
  272. r1 = p1;
  273. r2 = [p1];
  274. cc = bittst( r2, 0 );
  275. if cc jump .Lbusy_spinlock
  276. #ifdef __ARCH_SYNC_CORE_DCACHE
  277. r3 = p1;
  278. bitset ( r2, 0 ); /* Raise the lock bit. */
  279. [p1] = r2;
  280. call _start_lock_coherent
  281. #else
  282. r2 = 1;
  283. [p1] = r2;
  284. call _put_core_lock;
  285. #endif
  286. rets = [sp++];
  287. rts;
  288. .Lbusy_spinlock:
  289. /* We don't touch the atomic area if busy, so that flush
  290. will behave like nop in _put_core_lock. */
  291. call _put_core_lock;
  292. SSYNC(r2);
  293. r0 = p1;
  294. jump .Lretry_spinlock
  295. ENDPROC(___raw_spin_lock_asm)
  296. /*
  297. * r0 = &spinlock->lock
  298. *
  299. * Clobbers: r3:0, p1:0
  300. */
  301. ENTRY(___raw_spin_trylock_asm)
  302. p1 = r0;
  303. [--sp] = rets;
  304. call _get_core_lock;
  305. r1 = p1;
  306. r3 = [p1];
  307. cc = bittst( r3, 0 );
  308. if cc jump .Lfailed_trylock
  309. #ifdef __ARCH_SYNC_CORE_DCACHE
  310. bitset ( r3, 0 ); /* Raise the lock bit. */
  311. [p1] = r3;
  312. call _start_lock_coherent
  313. #else
  314. r2 = 1;
  315. [p1] = r2;
  316. call _put_core_lock;
  317. #endif
  318. r0 = 1;
  319. rets = [sp++];
  320. rts;
  321. .Lfailed_trylock:
  322. call _put_core_lock;
  323. r0 = 0;
  324. rets = [sp++];
  325. rts;
  326. ENDPROC(___raw_spin_trylock_asm)
  327. /*
  328. * r0 = &spinlock->lock
  329. *
  330. * Clobbers: r2:0, p1:0
  331. */
  332. ENTRY(___raw_spin_unlock_asm)
  333. p1 = r0;
  334. [--sp] = rets;
  335. call _get_core_lock;
  336. r2 = [p1];
  337. bitclr ( r2, 0 );
  338. [p1] = r2;
  339. r1 = p1;
  340. #ifdef __ARCH_SYNC_CORE_DCACHE
  341. call _end_lock_coherent
  342. #else
  343. call _put_core_lock;
  344. #endif
  345. rets = [sp++];
  346. rts;
  347. ENDPROC(___raw_spin_unlock_asm)
  348. /*
  349. * r0 = &rwlock->lock
  350. *
  351. * Clobbers: r2:0, p1:0
  352. */
  353. ENTRY(___raw_read_lock_asm)
  354. p1 = r0;
  355. [--sp] = rets;
  356. call _get_core_lock;
  357. .Lrdlock_try:
  358. r1 = [p1];
  359. r1 += -1;
  360. [p1] = r1;
  361. cc = r1 < 0;
  362. if cc jump .Lrdlock_failed
  363. r1 = p1;
  364. #ifdef __ARCH_SYNC_CORE_DCACHE
  365. call _start_lock_coherent
  366. #else
  367. call _put_core_lock;
  368. #endif
  369. rets = [sp++];
  370. rts;
  371. .Lrdlock_failed:
  372. r1 += 1;
  373. [p1] = r1;
  374. .Lrdlock_wait:
  375. r1 = p1;
  376. call _put_core_lock;
  377. SSYNC(r2);
  378. r0 = p1;
  379. call _get_core_lock;
  380. r1 = [p1];
  381. cc = r1 < 2;
  382. if cc jump .Lrdlock_wait;
  383. jump .Lrdlock_try
  384. ENDPROC(___raw_read_lock_asm)
  385. /*
  386. * r0 = &rwlock->lock
  387. *
  388. * Clobbers: r3:0, p1:0
  389. */
  390. ENTRY(___raw_read_trylock_asm)
  391. p1 = r0;
  392. [--sp] = rets;
  393. call _get_core_lock;
  394. r1 = [p1];
  395. cc = r1 <= 0;
  396. if cc jump .Lfailed_tryrdlock;
  397. r1 += -1;
  398. [p1] = r1;
  399. r1 = p1;
  400. #ifdef __ARCH_SYNC_CORE_DCACHE
  401. call _start_lock_coherent
  402. #else
  403. call _put_core_lock;
  404. #endif
  405. rets = [sp++];
  406. r0 = 1;
  407. rts;
  408. .Lfailed_tryrdlock:
  409. r1 = p1;
  410. call _put_core_lock;
  411. rets = [sp++];
  412. r0 = 0;
  413. rts;
  414. ENDPROC(___raw_read_trylock_asm)
  415. /*
  416. * r0 = &rwlock->lock
  417. *
  418. * Note: Processing controlled by a reader lock should not have
  419. * any side-effect on cache issues with the other core, so we
  420. * just release the core lock and exit (no _end_lock_coherent).
  421. *
  422. * Clobbers: r3:0, p1:0
  423. */
  424. ENTRY(___raw_read_unlock_asm)
  425. p1 = r0;
  426. [--sp] = rets;
  427. call _get_core_lock;
  428. r1 = [p1];
  429. r1 += 1;
  430. [p1] = r1;
  431. r1 = p1;
  432. call _put_core_lock;
  433. rets = [sp++];
  434. rts;
  435. ENDPROC(___raw_read_unlock_asm)
  436. /*
  437. * r0 = &rwlock->lock
  438. *
  439. * Clobbers: r3:0, p1:0
  440. */
  441. ENTRY(___raw_write_lock_asm)
  442. p1 = r0;
  443. r3.l = lo(RW_LOCK_BIAS);
  444. r3.h = hi(RW_LOCK_BIAS);
  445. [--sp] = rets;
  446. call _get_core_lock;
  447. .Lwrlock_try:
  448. r1 = [p1];
  449. r1 = r1 - r3;
  450. #ifdef __ARCH_SYNC_CORE_DCACHE
  451. r2 = r1;
  452. r2 <<= 4;
  453. r2 >>= 4;
  454. cc = r2 == 0;
  455. #else
  456. cc = r1 == 0;
  457. #endif
  458. if !cc jump .Lwrlock_wait
  459. [p1] = r1;
  460. r1 = p1;
  461. #ifdef __ARCH_SYNC_CORE_DCACHE
  462. call _start_lock_coherent
  463. #else
  464. call _put_core_lock;
  465. #endif
  466. rets = [sp++];
  467. rts;
  468. .Lwrlock_wait:
  469. r1 = p1;
  470. call _put_core_lock;
  471. SSYNC(r2);
  472. r0 = p1;
  473. call _get_core_lock;
  474. r1 = [p1];
  475. #ifdef __ARCH_SYNC_CORE_DCACHE
  476. r1 <<= 4;
  477. r1 >>= 4;
  478. #endif
  479. cc = r1 == r3;
  480. if !cc jump .Lwrlock_wait;
  481. jump .Lwrlock_try
  482. ENDPROC(___raw_write_lock_asm)
  483. /*
  484. * r0 = &rwlock->lock
  485. *
  486. * Clobbers: r3:0, p1:0
  487. */
  488. ENTRY(___raw_write_trylock_asm)
  489. p1 = r0;
  490. [--sp] = rets;
  491. call _get_core_lock;
  492. r1 = [p1];
  493. r2.l = lo(RW_LOCK_BIAS);
  494. r2.h = hi(RW_LOCK_BIAS);
  495. cc = r1 == r2;
  496. if !cc jump .Lfailed_trywrlock;
  497. #ifdef __ARCH_SYNC_CORE_DCACHE
  498. r1 >>= 28;
  499. r1 <<= 28;
  500. #else
  501. r1 = 0;
  502. #endif
  503. [p1] = r1;
  504. r1 = p1;
  505. #ifdef __ARCH_SYNC_CORE_DCACHE
  506. call _start_lock_coherent
  507. #else
  508. call _put_core_lock;
  509. #endif
  510. rets = [sp++];
  511. r0 = 1;
  512. rts;
  513. .Lfailed_trywrlock:
  514. r1 = p1;
  515. call _put_core_lock;
  516. rets = [sp++];
  517. r0 = 0;
  518. rts;
  519. ENDPROC(___raw_write_trylock_asm)
  520. /*
  521. * r0 = &rwlock->lock
  522. *
  523. * Clobbers: r3:0, p1:0
  524. */
  525. ENTRY(___raw_write_unlock_asm)
  526. p1 = r0;
  527. r3.l = lo(RW_LOCK_BIAS);
  528. r3.h = hi(RW_LOCK_BIAS);
  529. [--sp] = rets;
  530. call _get_core_lock;
  531. r1 = [p1];
  532. r1 = r1 + r3;
  533. [p1] = r1;
  534. r1 = p1;
  535. #ifdef __ARCH_SYNC_CORE_DCACHE
  536. call _end_lock_coherent
  537. #else
  538. call _put_core_lock;
  539. #endif
  540. rets = [sp++];
  541. rts;
  542. ENDPROC(___raw_write_unlock_asm)
  543. /*
  544. * r0 = ptr
  545. * r1 = value
  546. *
  547. * ADD a signed value to a 32bit word and return the new value atomically.
  548. * Clobbers: r3:0, p1:0
  549. */
  550. ENTRY(___raw_atomic_add_asm)
  551. p1 = r0;
  552. r3 = r1;
  553. [--sp] = rets;
  554. call _get_core_lock;
  555. r2 = [p1];
  556. r3 = r3 + r2;
  557. [p1] = r3;
  558. r1 = p1;
  559. call _put_core_lock;
  560. r0 = r3;
  561. rets = [sp++];
  562. rts;
  563. ENDPROC(___raw_atomic_add_asm)
  564. /*
  565. * r0 = ptr
  566. * r1 = value
  567. *
  568. * ADD a signed value to a 32bit word and return the old value atomically.
  569. * Clobbers: r3:0, p1:0
  570. */
  571. ENTRY(___raw_atomic_xadd_asm)
  572. p1 = r0;
  573. r3 = r1;
  574. [--sp] = rets;
  575. call _get_core_lock;
  576. r3 = [p1];
  577. r2 = r3 + r2;
  578. [p1] = r2;
  579. r1 = p1;
  580. call _put_core_lock;
  581. r0 = r3;
  582. rets = [sp++];
  583. rts;
  584. ENDPROC(___raw_atomic_add_asm)
  585. /*
  586. * r0 = ptr
  587. * r1 = mask
  588. *
  589. * AND the mask bits from a 32bit word and return the old 32bit value
  590. * atomically.
  591. * Clobbers: r3:0, p1:0
  592. */
  593. ENTRY(___raw_atomic_and_asm)
  594. p1 = r0;
  595. r3 = r1;
  596. [--sp] = rets;
  597. call _get_core_lock;
  598. r3 = [p1];
  599. r2 = r2 & r3;
  600. [p1] = r2;
  601. r1 = p1;
  602. call _put_core_lock;
  603. r0 = r3;
  604. rets = [sp++];
  605. rts;
  606. ENDPROC(___raw_atomic_and_asm)
  607. /*
  608. * r0 = ptr
  609. * r1 = mask
  610. *
  611. * OR the mask bits into a 32bit word and return the old 32bit value
  612. * atomically.
  613. * Clobbers: r3:0, p1:0
  614. */
  615. ENTRY(___raw_atomic_or_asm)
  616. p1 = r0;
  617. r3 = r1;
  618. [--sp] = rets;
  619. call _get_core_lock;
  620. r3 = [p1];
  621. r2 = r2 | r3;
  622. [p1] = r2;
  623. r1 = p1;
  624. call _put_core_lock;
  625. r0 = r3;
  626. rets = [sp++];
  627. rts;
  628. ENDPROC(___raw_atomic_or_asm)
  629. /*
  630. * r0 = ptr
  631. * r1 = mask
  632. *
  633. * XOR the mask bits with a 32bit word and return the old 32bit value
  634. * atomically.
  635. * Clobbers: r3:0, p1:0
  636. */
  637. ENTRY(___raw_atomic_xor_asm)
  638. p1 = r0;
  639. r3 = r1;
  640. [--sp] = rets;
  641. call _get_core_lock;
  642. r3 = [p1];
  643. r2 = r2 ^ r3;
  644. [p1] = r2;
  645. r1 = p1;
  646. call _put_core_lock;
  647. r0 = r3;
  648. rets = [sp++];
  649. rts;
  650. ENDPROC(___raw_atomic_xor_asm)
  651. /*
  652. * r0 = ptr
  653. * r1 = mask
  654. *
  655. * Perform a logical AND between the mask bits and a 32bit word, and
  656. * return the masked value. We need this on this architecture in
  657. * order to invalidate the local cache before testing.
  658. *
  659. * Clobbers: r3:0, p1:0
  660. */
  661. ENTRY(___raw_atomic_test_asm)
  662. p1 = r0;
  663. r3 = r1;
  664. r1 = -L1_CACHE_BYTES;
  665. r1 = r0 & r1;
  666. p0 = r1;
  667. /* flush core internal write buffer before invalidate dcache */
  668. CSYNC(r2);
  669. flushinv[p0];
  670. SSYNC(r2);
  671. r0 = [p1];
  672. r0 = r0 & r3;
  673. rts;
  674. ENDPROC(___raw_atomic_test_asm)
  675. /*
  676. * r0 = ptr
  677. * r1 = value
  678. *
  679. * Swap *ptr with value and return the old 32bit value atomically.
  680. * Clobbers: r3:0, p1:0
  681. */
  682. #define __do_xchg(src, dst) \
  683. p1 = r0; \
  684. r3 = r1; \
  685. [--sp] = rets; \
  686. call _get_core_lock; \
  687. r2 = src; \
  688. dst = r3; \
  689. r3 = r2; \
  690. r1 = p1; \
  691. call _put_core_lock; \
  692. r0 = r3; \
  693. rets = [sp++]; \
  694. rts;
  695. ENTRY(___raw_xchg_1_asm)
  696. __do_xchg(b[p1] (z), b[p1])
  697. ENDPROC(___raw_xchg_1_asm)
  698. ENTRY(___raw_xchg_2_asm)
  699. __do_xchg(w[p1] (z), w[p1])
  700. ENDPROC(___raw_xchg_2_asm)
  701. ENTRY(___raw_xchg_4_asm)
  702. __do_xchg([p1], [p1])
  703. ENDPROC(___raw_xchg_4_asm)
  704. /*
  705. * r0 = ptr
  706. * r1 = new
  707. * r2 = old
  708. *
  709. * Swap *ptr with new if *ptr == old and return the previous *ptr
  710. * value atomically.
  711. *
  712. * Clobbers: r3:0, p1:0
  713. */
  714. #define __do_cmpxchg(src, dst) \
  715. [--sp] = rets; \
  716. [--sp] = r4; \
  717. p1 = r0; \
  718. r3 = r1; \
  719. r4 = r2; \
  720. call _get_core_lock; \
  721. r2 = src; \
  722. cc = r2 == r4; \
  723. if !cc jump 1f; \
  724. dst = r3; \
  725. 1: r3 = r2; \
  726. r1 = p1; \
  727. call _put_core_lock; \
  728. r0 = r3; \
  729. r4 = [sp++]; \
  730. rets = [sp++]; \
  731. rts;
  732. ENTRY(___raw_cmpxchg_1_asm)
  733. __do_cmpxchg(b[p1] (z), b[p1])
  734. ENDPROC(___raw_cmpxchg_1_asm)
  735. ENTRY(___raw_cmpxchg_2_asm)
  736. __do_cmpxchg(w[p1] (z), w[p1])
  737. ENDPROC(___raw_cmpxchg_2_asm)
  738. ENTRY(___raw_cmpxchg_4_asm)
  739. __do_cmpxchg([p1], [p1])
  740. ENDPROC(___raw_cmpxchg_4_asm)
  741. /*
  742. * r0 = ptr
  743. * r1 = bitnr
  744. *
  745. * Set a bit in a 32bit word and return the old 32bit value atomically.
  746. * Clobbers: r3:0, p1:0
  747. */
  748. ENTRY(___raw_bit_set_asm)
  749. r2 = r1;
  750. r1 = 1;
  751. r1 <<= r2;
  752. jump ___raw_atomic_or_asm
  753. ENDPROC(___raw_bit_set_asm)
  754. /*
  755. * r0 = ptr
  756. * r1 = bitnr
  757. *
  758. * Clear a bit in a 32bit word and return the old 32bit value atomically.
  759. * Clobbers: r3:0, p1:0
  760. */
  761. ENTRY(___raw_bit_clear_asm)
  762. r2 = 1;
  763. r2 <<= r1;
  764. r1 = ~r2;
  765. jump ___raw_atomic_and_asm
  766. ENDPROC(___raw_bit_clear_asm)
  767. /*
  768. * r0 = ptr
  769. * r1 = bitnr
  770. *
  771. * Toggle a bit in a 32bit word and return the old 32bit value atomically.
  772. * Clobbers: r3:0, p1:0
  773. */
  774. ENTRY(___raw_bit_toggle_asm)
  775. r2 = r1;
  776. r1 = 1;
  777. r1 <<= r2;
  778. jump ___raw_atomic_xor_asm
  779. ENDPROC(___raw_bit_toggle_asm)
  780. /*
  781. * r0 = ptr
  782. * r1 = bitnr
  783. *
  784. * Test-and-set a bit in a 32bit word and return the old bit value atomically.
  785. * Clobbers: r3:0, p1:0
  786. */
  787. ENTRY(___raw_bit_test_set_asm)
  788. [--sp] = rets;
  789. [--sp] = r1;
  790. call ___raw_bit_set_asm
  791. r1 = [sp++];
  792. r2 = 1;
  793. r2 <<= r1;
  794. r0 = r0 & r2;
  795. cc = r0 == 0;
  796. if cc jump 1f
  797. r0 = 1;
  798. 1:
  799. rets = [sp++];
  800. rts;
  801. ENDPROC(___raw_bit_test_set_asm)
  802. /*
  803. * r0 = ptr
  804. * r1 = bitnr
  805. *
  806. * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
  807. * Clobbers: r3:0, p1:0
  808. */
  809. ENTRY(___raw_bit_test_clear_asm)
  810. [--sp] = rets;
  811. [--sp] = r1;
  812. call ___raw_bit_clear_asm
  813. r1 = [sp++];
  814. r2 = 1;
  815. r2 <<= r1;
  816. r0 = r0 & r2;
  817. cc = r0 == 0;
  818. if cc jump 1f
  819. r0 = 1;
  820. 1:
  821. rets = [sp++];
  822. rts;
  823. ENDPROC(___raw_bit_test_clear_asm)
  824. /*
  825. * r0 = ptr
  826. * r1 = bitnr
  827. *
  828. * Test-and-toggle a bit in a 32bit word,
  829. * and return the old bit value atomically.
  830. * Clobbers: r3:0, p1:0
  831. */
  832. ENTRY(___raw_bit_test_toggle_asm)
  833. [--sp] = rets;
  834. [--sp] = r1;
  835. call ___raw_bit_toggle_asm
  836. r1 = [sp++];
  837. r2 = 1;
  838. r2 <<= r1;
  839. r0 = r0 & r2;
  840. cc = r0 == 0;
  841. if cc jump 1f
  842. r0 = 1;
  843. 1:
  844. rets = [sp++];
  845. rts;
  846. ENDPROC(___raw_bit_test_toggle_asm)
  847. /*
  848. * r0 = ptr
  849. * r1 = bitnr
  850. *
  851. * Test a bit in a 32bit word and return its value.
  852. * We need this on this architecture in order to invalidate
  853. * the local cache before testing.
  854. *
  855. * Clobbers: r3:0, p1:0
  856. */
  857. ENTRY(___raw_bit_test_asm)
  858. r2 = r1;
  859. r1 = 1;
  860. r1 <<= r2;
  861. jump ___raw_atomic_test_asm
  862. ENDPROC(___raw_bit_test_asm)
  863. /*
  864. * r0 = ptr
  865. *
  866. * Fetch and return an uncached 32bit value.
  867. *
  868. * Clobbers: r2:0, p1:0
  869. */
  870. ENTRY(___raw_uncached_fetch_asm)
  871. p1 = r0;
  872. r1 = -L1_CACHE_BYTES;
  873. r1 = r0 & r1;
  874. p0 = r1;
  875. /* flush core internal write buffer before invalidate dcache */
  876. CSYNC(r2);
  877. flushinv[p0];
  878. SSYNC(r2);
  879. r0 = [p1];
  880. rts;
  881. ENDPROC(___raw_uncached_fetch_asm)