123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440 |
- /*
- * Copyright IBM Corp. 2007,2009
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
- #include <linux/sched.h>
- #include <linux/kernel.h>
- #include <linux/errno.h>
- #include <linux/gfp.h>
- #include <linux/mm.h>
- #include <linux/swap.h>
- #include <linux/smp.h>
- #include <linux/highmem.h>
- #include <linux/pagemap.h>
- #include <linux/spinlock.h>
- #include <linux/module.h>
- #include <linux/quicklist.h>
- #include <linux/rcupdate.h>
- #include <asm/system.h>
- #include <asm/pgtable.h>
- #include <asm/pgalloc.h>
- #include <asm/tlb.h>
- #include <asm/tlbflush.h>
- #include <asm/mmu_context.h>
- #ifndef CONFIG_64BIT
- #define ALLOC_ORDER 1
- #define FRAG_MASK 0x0f
- #else
- #define ALLOC_ORDER 2
- #define FRAG_MASK 0x03
- #endif
- unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
- EXPORT_SYMBOL(VMALLOC_START);
- static int __init parse_vmalloc(char *arg)
- {
- if (!arg)
- return -EINVAL;
- VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
- return 0;
- }
- early_param("vmalloc", parse_vmalloc);
- unsigned long *crst_table_alloc(struct mm_struct *mm)
- {
- struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
- if (!page)
- return NULL;
- return (unsigned long *) page_to_phys(page);
- }
- void crst_table_free(struct mm_struct *mm, unsigned long *table)
- {
- free_pages((unsigned long) table, ALLOC_ORDER);
- }
- #ifdef CONFIG_64BIT
- int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
- {
- unsigned long *table, *pgd;
- unsigned long entry;
- BUG_ON(limit > (1UL << 53));
- repeat:
- table = crst_table_alloc(mm);
- if (!table)
- return -ENOMEM;
- spin_lock_bh(&mm->page_table_lock);
- if (mm->context.asce_limit < limit) {
- pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit <= (1UL << 31)) {
- entry = _REGION3_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- } else {
- entry = _REGION2_ENTRY_EMPTY;
- mm->context.asce_limit = 1UL << 53;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION2;
- }
- crst_table_init(table, entry);
- pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->task_size = mm->context.asce_limit;
- table = NULL;
- }
- spin_unlock_bh(&mm->page_table_lock);
- if (table)
- crst_table_free(mm, table);
- if (mm->context.asce_limit < limit)
- goto repeat;
- update_mm(mm, current);
- return 0;
- }
- void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
- {
- pgd_t *pgd;
- if (mm->context.asce_limit <= limit)
- return;
- __tlb_flush_mm(mm);
- while (mm->context.asce_limit > limit) {
- pgd = mm->pgd;
- switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
- case _REGION_ENTRY_TYPE_R2:
- mm->context.asce_limit = 1UL << 42;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_REGION3;
- break;
- case _REGION_ENTRY_TYPE_R3:
- mm->context.asce_limit = 1UL << 31;
- mm->context.asce_bits = _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS |
- _ASCE_TYPE_SEGMENT;
- break;
- default:
- BUG();
- }
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->task_size = mm->context.asce_limit;
- crst_table_free(mm, (unsigned long *) pgd);
- }
- update_mm(mm, current);
- }
- #endif
- static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
- {
- unsigned int old, new;
- do {
- old = atomic_read(v);
- new = old ^ bits;
- } while (atomic_cmpxchg(v, old, new) != old);
- return new;
- }
- /*
- * page table entry allocation/free routines.
- */
- #ifdef CONFIG_PGSTE
- static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
- {
- struct page *page;
- unsigned long *table;
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- pgtable_page_ctor(page);
- atomic_set(&page->_mapcount, 3);
- table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- return table;
- }
- static inline void page_table_free_pgste(unsigned long *table)
- {
- struct page *page;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- pgtable_page_ctor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- }
- #endif
- unsigned long *page_table_alloc(struct mm_struct *mm)
- {
- struct page *page;
- unsigned long *table;
- unsigned int mask, bit;
- #ifdef CONFIG_PGSTE
- if (mm_has_pgste(mm))
- return page_table_alloc_pgste(mm);
- #endif
- /* Allocate fragments of a 4K page as 1K/2K page table */
- spin_lock_bh(&mm->context.list_lock);
- mask = FRAG_MASK;
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- table = (unsigned long *) page_to_phys(page);
- mask = atomic_read(&page->_mapcount);
- mask = mask | (mask >> 4);
- }
- if ((mask & FRAG_MASK) == FRAG_MASK) {
- spin_unlock_bh(&mm->context.list_lock);
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- pgtable_page_ctor(page);
- atomic_set(&page->_mapcount, 1);
- table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
- spin_lock_bh(&mm->context.list_lock);
- list_add(&page->lru, &mm->context.pgtable_list);
- } else {
- for (bit = 1; mask & bit; bit <<= 1)
- table += PTRS_PER_PTE;
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if ((mask & FRAG_MASK) == FRAG_MASK)
- list_del(&page->lru);
- }
- spin_unlock_bh(&mm->context.list_lock);
- return table;
- }
- void page_table_free(struct mm_struct *mm, unsigned long *table)
- {
- struct page *page;
- unsigned int bit, mask;
- #ifdef CONFIG_PGSTE
- if (mm_has_pgste(mm))
- return page_table_free_pgste(table);
- #endif
- /* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
- spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if (mask & FRAG_MASK)
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- if (mask == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- }
- }
- static void __page_table_free_rcu(void *table, unsigned bit)
- {
- struct page *page;
- #ifdef CONFIG_PGSTE
- if (bit == FRAG_MASK)
- return page_table_free_pgste(table);
- #endif
- /* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- }
- }
- void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
- {
- struct mm_struct *mm;
- struct page *page;
- unsigned int bit, mask;
- mm = tlb->mm;
- #ifdef CONFIG_PGSTE
- if (mm_has_pgste(mm)) {
- table = (unsigned long *) (__pa(table) | FRAG_MASK);
- tlb_remove_table(tlb, table);
- return;
- }
- #endif
- bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
- if (mask & FRAG_MASK)
- list_add_tail(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *) (__pa(table) | (bit << 4));
- tlb_remove_table(tlb, table);
- }
- void __tlb_remove_table(void *_table)
- {
- const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
- void *table = (void *)((unsigned long) _table & ~mask);
- unsigned type = (unsigned long) _table & mask;
- if (type)
- __page_table_free_rcu(table, type);
- else
- free_pages((unsigned long) table, ALLOC_ORDER);
- }
- static void tlb_remove_table_smp_sync(void *arg)
- {
- /* Simply deliver the interrupt */
- }
- static void tlb_remove_table_one(void *table)
- {
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
- }
- static void tlb_remove_table_rcu(struct rcu_head *head)
- {
- struct mmu_table_batch *batch;
- int i;
- batch = container_of(head, struct mmu_table_batch, rcu);
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
- free_page((unsigned long)batch);
- }
- void tlb_table_flush(struct mmu_gather *tlb)
- {
- struct mmu_table_batch **batch = &tlb->batch;
- if (*batch) {
- __tlb_flush_mm(tlb->mm);
- call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
- }
- void tlb_remove_table(struct mmu_gather *tlb, void *table)
- {
- struct mmu_table_batch **batch = &tlb->batch;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_table_flush(tlb);
- }
- /*
- * switch on pgstes for its userspace process (for kvm)
- */
- int s390_enable_sie(void)
- {
- struct task_struct *tsk = current;
- struct mm_struct *mm, *old_mm;
- /* Do we have switched amode? If no, we cannot do sie */
- if (user_mode == HOME_SPACE_MODE)
- return -EINVAL;
- /* Do we have pgstes? if yes, we are done */
- if (mm_has_pgste(tsk->mm))
- return 0;
- /* lets check if we are allowed to replace the mm */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
- #ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
- #endif
- tsk->mm != tsk->active_mm) {
- task_unlock(tsk);
- return -EINVAL;
- }
- task_unlock(tsk);
- /* we copy the mm and let dup_mm create the page tables with_pgstes */
- tsk->mm->context.alloc_pgste = 1;
- mm = dup_mm(tsk);
- tsk->mm->context.alloc_pgste = 0;
- if (!mm)
- return -ENOMEM;
- /* Now lets check again if something happened */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
- #ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
- #endif
- tsk->mm != tsk->active_mm) {
- mmput(mm);
- task_unlock(tsk);
- return -EINVAL;
- }
- /* ok, we are alone. No ptrace, no threads, etc. */
- old_mm = tsk->mm;
- tsk->mm = tsk->active_mm = mm;
- preempt_disable();
- update_mm(mm, tsk);
- atomic_inc(&mm->context.attach_count);
- atomic_dec(&old_mm->context.attach_count);
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- preempt_enable();
- task_unlock(tsk);
- mmput(old_mm);
- return 0;
- }
- EXPORT_SYMBOL_GPL(s390_enable_sie);
- #if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
- bool kernel_page_present(struct page *page)
- {
- unsigned long addr;
- int cc;
- addr = page_to_phys(page);
- asm volatile(
- " lra %1,0(%1)\n"
- " ipm %0\n"
- " srl %0,28"
- : "=d" (cc), "+a" (addr) : : "cc");
- return cc == 0;
- }
- #endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */
|