barrier.h 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. /*
  2. * This file is subject to the terms and conditions of the GNU General Public
  3. * License. See the file "COPYING" in the main directory of this archive
  4. * for more details.
  5. *
  6. * Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
  7. */
  8. #ifndef __ASM_BARRIER_H
  9. #define __ASM_BARRIER_H
  10. #include <asm/addrspace.h>
  11. /*
  12. * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
  13. * These values are used with the sync instruction to perform memory barriers.
  14. * Types of ordering guarantees available through the SYNC instruction:
  15. * - Completion Barriers
  16. * - Ordering Barriers
  17. * As compared to the completion barrier, the ordering barrier is a
  18. * lighter-weight operation as it does not require the specified instructions
  19. * before the SYNC to be already completed. Instead it only requires that those
  20. * specified instructions which are subsequent to the SYNC in the instruction
  21. * stream are never re-ordered for processing ahead of the specified
  22. * instructions which are before the SYNC in the instruction stream.
  23. * This potentially reduces how many cycles the barrier instruction must stall
  24. * before it completes.
  25. * Implementations that do not use any of the non-zero values of stype to define
  26. * different barriers, such as ordering barriers, must make those stype values
  27. * act the same as stype zero.
  28. */
  29. /*
  30. * Completion barriers:
  31. * - Every synchronizable specified memory instruction (loads or stores or both)
  32. * that occurs in the instruction stream before the SYNC instruction must be
  33. * already globally performed before any synchronizable specified memory
  34. * instructions that occur after the SYNC are allowed to be performed, with
  35. * respect to any other processor or coherent I/O module.
  36. *
  37. * - The barrier does not guarantee the order in which instruction fetches are
  38. * performed.
  39. *
  40. * - A stype value of zero will always be defined such that it performs the most
  41. * complete set of synchronization operations that are defined.This means
  42. * stype zero always does a completion barrier that affects both loads and
  43. * stores preceding the SYNC instruction and both loads and stores that are
  44. * subsequent to the SYNC instruction. Non-zero values of stype may be defined
  45. * by the architecture or specific implementations to perform synchronization
  46. * behaviors that are less complete than that of stype zero. If an
  47. * implementation does not use one of these non-zero values to define a
  48. * different synchronization behavior, then that non-zero value of stype must
  49. * act the same as stype zero completion barrier. This allows software written
  50. * for an implementation with a lighter-weight barrier to work on another
  51. * implementation which only implements the stype zero completion barrier.
  52. *
  53. * - A completion barrier is required, potentially in conjunction with SSNOP (in
  54. * Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
  55. * to guarantee that memory reference results are visible across operating
  56. * mode changes. For example, a completion barrier is required on some
  57. * implementations on entry to and exit from Debug Mode to guarantee that
  58. * memory effects are handled correctly.
  59. */
  60. /*
  61. * stype 0 - A completion barrier that affects preceding loads and stores and
  62. * subsequent loads and stores.
  63. * Older instructions which must reach the load/store ordering point before the
  64. * SYNC instruction completes: Loads, Stores
  65. * Younger instructions which must reach the load/store ordering point only
  66. * after the SYNC instruction completes: Loads, Stores
  67. * Older instructions which must be globally performed when the SYNC instruction
  68. * completes: Loads, Stores
  69. */
  70. #define STYPE_SYNC 0x0
  71. /*
  72. * Ordering barriers:
  73. * - Every synchronizable specified memory instruction (loads or stores or both)
  74. * that occurs in the instruction stream before the SYNC instruction must
  75. * reach a stage in the load/store datapath after which no instruction
  76. * re-ordering is possible before any synchronizable specified memory
  77. * instruction which occurs after the SYNC instruction in the instruction
  78. * stream reaches the same stage in the load/store datapath.
  79. *
  80. * - If any memory instruction before the SYNC instruction in program order,
  81. * generates a memory request to the external memory and any memory
  82. * instruction after the SYNC instruction in program order also generates a
  83. * memory request to external memory, the memory request belonging to the
  84. * older instruction must be globally performed before the time the memory
  85. * request belonging to the younger instruction is globally performed.
  86. *
  87. * - The barrier does not guarantee the order in which instruction fetches are
  88. * performed.
  89. */
  90. /*
  91. * stype 0x10 - An ordering barrier that affects preceding loads and stores and
  92. * subsequent loads and stores.
  93. * Older instructions which must reach the load/store ordering point before the
  94. * SYNC instruction completes: Loads, Stores
  95. * Younger instructions which must reach the load/store ordering point only
  96. * after the SYNC instruction completes: Loads, Stores
  97. * Older instructions which must be globally performed when the SYNC instruction
  98. * completes: N/A
  99. */
  100. #define STYPE_SYNC_MB 0x10
  101. #ifdef CONFIG_CPU_HAS_SYNC
  102. #define __sync() \
  103. __asm__ __volatile__( \
  104. ".set push\n\t" \
  105. ".set noreorder\n\t" \
  106. ".set mips2\n\t" \
  107. "sync\n\t" \
  108. ".set pop" \
  109. : /* no output */ \
  110. : /* no input */ \
  111. : "memory")
  112. #else
  113. #define __sync() do { } while(0)
  114. #endif
  115. #define __fast_iob() \
  116. __asm__ __volatile__( \
  117. ".set push\n\t" \
  118. ".set noreorder\n\t" \
  119. "lw $0,%0\n\t" \
  120. "nop\n\t" \
  121. ".set pop" \
  122. : /* no output */ \
  123. : "m" (*(int *)CKSEG1) \
  124. : "memory")
  125. #ifdef CONFIG_CPU_CAVIUM_OCTEON
  126. # define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
  127. # define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
  128. # define fast_wmb() __syncw()
  129. # define fast_rmb() barrier()
  130. # define fast_mb() __sync()
  131. # define fast_iob() do { } while (0)
  132. #else /* ! CONFIG_CPU_CAVIUM_OCTEON */
  133. # define fast_wmb() __sync()
  134. # define fast_rmb() __sync()
  135. # define fast_mb() __sync()
  136. # ifdef CONFIG_SGI_IP28
  137. # define fast_iob() \
  138. __asm__ __volatile__( \
  139. ".set push\n\t" \
  140. ".set noreorder\n\t" \
  141. "lw $0,%0\n\t" \
  142. "sync\n\t" \
  143. "lw $0,%0\n\t" \
  144. ".set pop" \
  145. : /* no output */ \
  146. : "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
  147. : "memory")
  148. # else
  149. # define fast_iob() \
  150. do { \
  151. __sync(); \
  152. __fast_iob(); \
  153. } while (0)
  154. # endif
  155. #endif /* CONFIG_CPU_CAVIUM_OCTEON */
  156. #ifdef CONFIG_CPU_HAS_WB
  157. #include <asm/wbflush.h>
  158. #define mb() wbflush()
  159. #define iob() wbflush()
  160. #else /* !CONFIG_CPU_HAS_WB */
  161. #define mb() fast_mb()
  162. #define iob() fast_iob()
  163. #endif /* !CONFIG_CPU_HAS_WB */
  164. #define wmb() fast_wmb()
  165. #define rmb() fast_rmb()
  166. #if defined(CONFIG_WEAK_ORDERING)
  167. # ifdef CONFIG_CPU_CAVIUM_OCTEON
  168. # define __smp_mb() __sync()
  169. # define __smp_rmb() barrier()
  170. # define __smp_wmb() __syncw()
  171. # else
  172. # define __smp_mb() __asm__ __volatile__("sync" : : :"memory")
  173. # define __smp_rmb() __asm__ __volatile__("sync" : : :"memory")
  174. # define __smp_wmb() __asm__ __volatile__("sync" : : :"memory")
  175. # endif
  176. #else
  177. #define __smp_mb() barrier()
  178. #define __smp_rmb() barrier()
  179. #define __smp_wmb() barrier()
  180. #endif
  181. #if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
  182. #define __WEAK_LLSC_MB " sync \n"
  183. #else
  184. #define __WEAK_LLSC_MB " \n"
  185. #endif
  186. #define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
  187. #ifdef CONFIG_CPU_CAVIUM_OCTEON
  188. #define smp_mb__before_llsc() smp_wmb()
  189. #define __smp_mb__before_llsc() __smp_wmb()
  190. /* Cause previous writes to become visible on all CPUs as soon as possible */
  191. #define nudge_writes() __asm__ __volatile__(".set push\n\t" \
  192. ".set arch=octeon\n\t" \
  193. "syncw\n\t" \
  194. ".set pop" : : : "memory")
  195. #else
  196. #define smp_mb__before_llsc() smp_llsc_mb()
  197. #define __smp_mb__before_llsc() smp_llsc_mb()
  198. #define nudge_writes() mb()
  199. #endif
  200. #define __smp_mb__before_atomic() __smp_mb__before_llsc()
  201. #define __smp_mb__after_atomic() smp_llsc_mb()
  202. /*
  203. * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load,
  204. * store or pref) in between an ll & sc can cause the sc instruction to
  205. * erroneously succeed, breaking atomicity. Whilst it's unusual to write code
  206. * containing such sequences, this bug bites harder than we might otherwise
  207. * expect due to reordering & speculation:
  208. *
  209. * 1) A memory access appearing prior to the ll in program order may actually
  210. * be executed after the ll - this is the reordering case.
  211. *
  212. * In order to avoid this we need to place a memory barrier (ie. a sync
  213. * instruction) prior to every ll instruction, in between it & any earlier
  214. * memory access instructions. Many of these cases are already covered by
  215. * smp_mb__before_llsc() but for the remaining cases, typically ones in
  216. * which multiple CPUs may operate on a memory location but ordering is not
  217. * usually guaranteed, we use loongson_llsc_mb() below.
  218. *
  219. * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later.
  220. *
  221. * 2) If a conditional branch exists between an ll & sc with a target outside
  222. * of the ll-sc loop, for example an exit upon value mismatch in cmpxchg()
  223. * or similar, then misprediction of the branch may allow speculative
  224. * execution of memory accesses from outside of the ll-sc loop.
  225. *
  226. * In order to avoid this we need a memory barrier (ie. a sync instruction)
  227. * at each affected branch target, for which we also use loongson_llsc_mb()
  228. * defined below.
  229. *
  230. * This case affects all current Loongson 3 CPUs.
  231. */
  232. #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
  233. #define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
  234. #else
  235. #define loongson_llsc_mb() do { } while (0)
  236. #endif
  237. #include <asm-generic/barrier.h>
  238. #endif /* __ASM_BARRIER_H */