kvm_book3s_64.h 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License, version 2, as
  4. * published by the Free Software Foundation.
  5. *
  6. * This program is distributed in the hope that it will be useful,
  7. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. * GNU General Public License for more details.
  10. *
  11. * You should have received a copy of the GNU General Public License
  12. * along with this program; if not, write to the Free Software
  13. * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  14. *
  15. * Copyright SUSE Linux Products GmbH 2010
  16. *
  17. * Authors: Alexander Graf <agraf@suse.de>
  18. */
  19. #ifndef __ASM_KVM_BOOK3S_64_H__
  20. #define __ASM_KVM_BOOK3S_64_H__
  21. #include <asm/book3s/64/mmu-hash.h>
  22. #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
  23. static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
  24. {
  25. preempt_disable();
  26. return &get_paca()->shadow_vcpu;
  27. }
  28. static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
  29. {
  30. preempt_enable();
  31. }
  32. #endif
  33. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  34. #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
  35. #endif
  36. #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
  37. /*
  38. * We use a lock bit in HPTE dword 0 to synchronize updates and
  39. * accesses to each HPTE, and another bit to indicate non-present
  40. * HPTEs.
  41. */
  42. #define HPTE_V_HVLOCK 0x40UL
  43. #define HPTE_V_ABSENT 0x20UL
  44. /*
  45. * We use this bit in the guest_rpte field of the revmap entry
  46. * to indicate a modified HPTE.
  47. */
  48. #define HPTE_GR_MODIFIED (1ul << 62)
  49. /* These bits are reserved in the guest view of the HPTE */
  50. #define HPTE_GR_RESERVED HPTE_GR_MODIFIED
  51. static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
  52. {
  53. unsigned long tmp, old;
  54. __be64 be_lockbit, be_bits;
  55. /*
  56. * We load/store in native endian, but the HTAB is in big endian. If
  57. * we byte swap all data we apply on the PTE we're implicitly correct
  58. * again.
  59. */
  60. be_lockbit = cpu_to_be64(HPTE_V_HVLOCK);
  61. be_bits = cpu_to_be64(bits);
  62. asm volatile(" ldarx %0,0,%2\n"
  63. " and. %1,%0,%3\n"
  64. " bne 2f\n"
  65. " or %0,%0,%4\n"
  66. " stdcx. %0,0,%2\n"
  67. " beq+ 2f\n"
  68. " mr %1,%3\n"
  69. "2: isync"
  70. : "=&r" (tmp), "=&r" (old)
  71. : "r" (hpte), "r" (be_bits), "r" (be_lockbit)
  72. : "cc", "memory");
  73. return old == 0;
  74. }
  75. static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
  76. {
  77. hpte_v &= ~HPTE_V_HVLOCK;
  78. asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
  79. hpte[0] = cpu_to_be64(hpte_v);
  80. }
  81. /* Without barrier */
  82. static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
  83. {
  84. hpte_v &= ~HPTE_V_HVLOCK;
  85. hpte[0] = cpu_to_be64(hpte_v);
  86. }
  87. static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
  88. unsigned long pte_index)
  89. {
  90. int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
  91. unsigned int penc;
  92. unsigned long rb = 0, va_low, sllp;
  93. unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
  94. if (v & HPTE_V_LARGE) {
  95. i = hpte_page_sizes[lp];
  96. b_psize = i & 0xf;
  97. a_psize = i >> 4;
  98. }
  99. /*
  100. * Ignore the top 14 bits of va
  101. * v have top two bits covering segment size, hence move
  102. * by 16 bits, Also clear the lower HPTE_V_AVPN_SHIFT (7) bits.
  103. * AVA field in v also have the lower 23 bits ignored.
  104. * For base page size 4K we need 14 .. 65 bits (so need to
  105. * collect extra 11 bits)
  106. * For others we need 14..14+i
  107. */
  108. /* This covers 14..54 bits of va*/
  109. rb = (v & ~0x7fUL) << 16; /* AVA field */
  110. /*
  111. * AVA in v had cleared lower 23 bits. We need to derive
  112. * that from pteg index
  113. */
  114. va_low = pte_index >> 3;
  115. if (v & HPTE_V_SECONDARY)
  116. va_low = ~va_low;
  117. /*
  118. * get the vpn bits from va_low using reverse of hashing.
  119. * In v we have va with 23 bits dropped and then left shifted
  120. * HPTE_V_AVPN_SHIFT (7) bits. Now to find vsid we need
  121. * right shift it with (SID_SHIFT - (23 - 7))
  122. */
  123. if (!(v & HPTE_V_1TB_SEG))
  124. va_low ^= v >> (SID_SHIFT - 16);
  125. else
  126. va_low ^= v >> (SID_SHIFT_1T - 16);
  127. va_low &= 0x7ff;
  128. switch (b_psize) {
  129. case MMU_PAGE_4K:
  130. sllp = get_sllp_encoding(a_psize);
  131. rb |= sllp << 5; /* AP field */
  132. rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
  133. break;
  134. default:
  135. {
  136. int aval_shift;
  137. /*
  138. * remaining bits of AVA/LP fields
  139. * Also contain the rr bits of LP
  140. */
  141. rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
  142. /*
  143. * Now clear not needed LP bits based on actual psize
  144. */
  145. rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
  146. /*
  147. * AVAL field 58..77 - base_page_shift bits of va
  148. * we have space for 58..64 bits, Missing bits should
  149. * be zero filled. +1 is to take care of L bit shift
  150. */
  151. aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
  152. rb |= ((va_low << aval_shift) & 0xfe);
  153. rb |= 1; /* L field */
  154. penc = mmu_psize_defs[b_psize].penc[a_psize];
  155. rb |= penc << 12; /* LP field */
  156. break;
  157. }
  158. }
  159. rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
  160. return rb;
  161. }
  162. static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
  163. {
  164. return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
  165. }
  166. static inline int hpte_is_writable(unsigned long ptel)
  167. {
  168. unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
  169. return pp != PP_RXRX && pp != PP_RXXX;
  170. }
  171. static inline unsigned long hpte_make_readonly(unsigned long ptel)
  172. {
  173. if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
  174. ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
  175. else
  176. ptel |= PP_RXRX;
  177. return ptel;
  178. }
  179. static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
  180. {
  181. unsigned int wimg = hptel & HPTE_R_WIMG;
  182. /* Handle SAO */
  183. if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
  184. cpu_has_feature(CPU_FTR_ARCH_206))
  185. wimg = HPTE_R_M;
  186. if (!is_ci)
  187. return wimg == HPTE_R_M;
  188. /*
  189. * if host is mapped cache inhibited, make sure hptel also have
  190. * cache inhibited.
  191. */
  192. if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
  193. return false;
  194. return !!(wimg & HPTE_R_I);
  195. }
  196. /*
  197. * If it's present and writable, atomically set dirty and referenced bits and
  198. * return the PTE, otherwise return 0.
  199. */
  200. static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
  201. {
  202. pte_t old_pte, new_pte = __pte(0);
  203. while (1) {
  204. /*
  205. * Make sure we don't reload from ptep
  206. */
  207. old_pte = READ_ONCE(*ptep);
  208. /*
  209. * wait until H_PAGE_BUSY is clear then set it atomically
  210. */
  211. if (unlikely(pte_val(old_pte) & H_PAGE_BUSY)) {
  212. cpu_relax();
  213. continue;
  214. }
  215. /* If pte is not present return None */
  216. if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
  217. return __pte(0);
  218. new_pte = pte_mkyoung(old_pte);
  219. if (writing && pte_write(old_pte))
  220. new_pte = pte_mkdirty(new_pte);
  221. if (pte_xchg(ptep, old_pte, new_pte))
  222. break;
  223. }
  224. return new_pte;
  225. }
  226. static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
  227. {
  228. if (key)
  229. return PP_RWRX <= pp && pp <= PP_RXRX;
  230. return true;
  231. }
  232. static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
  233. {
  234. if (key)
  235. return pp == PP_RWRW;
  236. return pp <= PP_RWRW;
  237. }
  238. static inline int hpte_get_skey_perm(unsigned long hpte_r, unsigned long amr)
  239. {
  240. unsigned long skey;
  241. skey = ((hpte_r & HPTE_R_KEY_HI) >> 57) |
  242. ((hpte_r & HPTE_R_KEY_LO) >> 9);
  243. return (amr >> (62 - 2 * skey)) & 3;
  244. }
  245. static inline void lock_rmap(unsigned long *rmap)
  246. {
  247. do {
  248. while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
  249. cpu_relax();
  250. } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
  251. }
  252. static inline void unlock_rmap(unsigned long *rmap)
  253. {
  254. __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
  255. }
  256. static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
  257. unsigned long pagesize)
  258. {
  259. unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
  260. if (pagesize <= PAGE_SIZE)
  261. return true;
  262. return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
  263. }
  264. /*
  265. * This works for 4k, 64k and 16M pages on POWER7,
  266. * and 4k and 16M pages on PPC970.
  267. */
  268. static inline unsigned long slb_pgsize_encoding(unsigned long psize)
  269. {
  270. unsigned long senc = 0;
  271. if (psize > 0x1000) {
  272. senc = SLB_VSID_L;
  273. if (psize == 0x10000)
  274. senc |= SLB_VSID_LP_01;
  275. }
  276. return senc;
  277. }
  278. static inline int is_vrma_hpte(unsigned long hpte_v)
  279. {
  280. return (hpte_v & ~0xffffffUL) ==
  281. (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
  282. }
  283. #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
  284. /*
  285. * Note modification of an HPTE; set the HPTE modified bit
  286. * if anyone is interested.
  287. */
  288. static inline void note_hpte_modification(struct kvm *kvm,
  289. struct revmap_entry *rev)
  290. {
  291. if (atomic_read(&kvm->arch.hpte_mod_interest))
  292. rev->guest_rpte |= HPTE_GR_MODIFIED;
  293. }
  294. /*
  295. * Like kvm_memslots(), but for use in real mode when we can't do
  296. * any RCU stuff (since the secondary threads are offline from the
  297. * kernel's point of view), and we can't print anything.
  298. * Thus we use rcu_dereference_raw() rather than rcu_dereference_check().
  299. */
  300. static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
  301. {
  302. return rcu_dereference_raw_notrace(kvm->memslots[0]);
  303. }
  304. extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
  305. extern void kvmhv_rm_send_ipi(int cpu);
  306. #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  307. #endif /* __ASM_KVM_BOOK3S_64_H__ */