2 Commits 9e456d6551 ... eb0ed8b773

Autor SHA1 Mensagem Data
  Agustina Arzille eb0ed8b773 Use pmap windows instead of raw PTEs 2 meses atrás
  Agustina Arzille 5fc9e0b083 Small fixes and improvements 2 meses atrás
10 arquivos alterados com 129 adições e 118 exclusões
  1. 4 7
      arch/x86/machine/cpu.h
  2. 56 19
      arch/x86/machine/pmap.c
  3. 29 35
      arch/x86/machine/pmap.h
  4. 4 2
      kern/futex.c
  5. 11 12
      kern/ipc.c
  6. 8 12
      kern/sref.c
  7. 5 17
      kern/thread.c
  8. 2 10
      kern/thread.h
  9. 10 4
      kern/unwind.c
  10. 0 0
      vm/map.c

+ 4 - 7
arch/x86/machine/cpu.h

@@ -925,13 +925,7 @@ cpu_clear_intr (void)
 
 #endif
 
-/*
- * CPU fixups, used to safely perform operations on memory that may fault.
- *
- * They work similarly to setjmp/longjmp, with the exception that they
- * are better coupled with exception and traps, since they can use
- * a CPU frame to start the unwinding process.
-*/
+// Needed stuff for the unwinder.
 
 #ifdef __LP64__
   #define CPU_UNWIND_FRAME_REG   6
@@ -958,6 +952,9 @@ long cpu_lpad_swap (uintptr_t *args, void *lpad, void *pc);
 // Return from the execution context in a landing pad.
 noreturn void cpu_lpad_return (uintptr_t sp, intptr_t ret);
 
+// Number of physical map windows.
+#define CPU_NR_PMAP_WINDOWS   2
+
 /*
  * This init operation provides :
  *  - initialization of the BSP structure.

+ 56 - 19
arch/x86/machine/pmap.c

@@ -115,15 +115,16 @@ union pmap_global
     } full;
 };
 
-struct pmap_ipc_pte_t
+struct pmap_window_data_t
 {
-  pmap_pte_t *ptes[3];
+  pmap_pte_t *ptes[CPU_NR_PMAP_WINDOWS];
 };
 
 static union pmap_global pmap_global_pmap;
 struct pmap *pmap_kernel_pmap;
 struct pmap *pmap_current_ptr __percpu;
-static struct pmap_ipc_pte_t pmap_ipc_ptes __percpu;
+static uintptr_t pmap_ipc_va __read_mostly;
+static struct pmap_window_data_t pmap_window_data __percpu;
 
 #ifdef CONFIG_X86_PAE
 
@@ -971,7 +972,7 @@ pmap_copy_cpu_table (uint32_t cpu)
   pmap_copy_cpu_table_recursive (sptp, level, dptp);
 }
 
-static int pmap_setup_ipc_ptes (uintptr_t);
+static int pmap_setup_ipc_ptes (void);
 
 void __init
 pmap_mp_setup (void)
@@ -1014,7 +1015,7 @@ pmap_mp_setup (void)
 
   pmap_do_remote_updates = 1;
 
-  if (pmap_setup_ipc_ptes (vm_map_ipc_addr ()) != 0)
+  if (pmap_setup_ipc_ptes () != 0)
     panic ("pmap: unable to create IPC PTEs");
 }
 
@@ -1269,12 +1270,20 @@ set:
 }
 
 static int
-pmap_setup_ipc_ptes (uintptr_t va)
-{
+pmap_setup_ipc_ptes (void)
+{
+  uintptr_t va = 0;
+  if (vm_map_enter (vm_map_get_kernel_map (), &va,
+                    PAGE_SIZE * CPU_NR_PMAP_WINDOWS,
+                    VM_MAP_FLAGS (VM_PROT_RDWR, VM_PROT_RDWR,
+                                  VM_INHERIT_NONE, VM_ADV_DEFAULT, 0),
+                    NULL, 0) != 0)
+    return (ENOMEM);
+
   struct pmap *pmap = pmap_get_kernel_pmap ();
   for (uint32_t i = 0; i < cpu_count (); ++i)
     {
-      _Auto base = percpu_ptr (pmap_ipc_ptes, i);
+      _Auto base = percpu_ptr (pmap_window_data, i);
       for (uint32_t j = 0; j < ARRAY_SIZE (base->ptes); ++j)
         {
           _Auto ptr = &base->ptes[j];
@@ -1287,6 +1296,7 @@ pmap_setup_ipc_ptes (uintptr_t va)
         }
     }
 
+  pmap_ipc_va = va;
   return (0);
 }
 
@@ -1720,22 +1730,49 @@ pmap_sync (void *arg)
     }
 }
 
-struct thread_pmap_data*
-pmap_ipc_pte_get_idx (uint32_t idx)
+void
+pmap_window_set (struct pmap_window *window, phys_addr_t pa)
+{
+  cpu_tlb_flush_va (window->va);
+  pmap_pte_set (window->pte, pa, PMAP_PTE_G | PMAP_PTE_RW, &pmap_pt_levels[0]);
+}
+
+struct pmap_window*
+(pmap_window_get) (uint32_t idx, struct pmap_window *window)
+{
+  assert (idx < CPU_NR_PMAP_WINDOWS);
+  window->idx = idx;
+  window->pte = cpu_local_ptr(pmap_window_data)->ptes[idx];
+  window->va = pmap_ipc_va + idx * PAGE_SIZE;
+
+  _Auto pptr = &thread_self()->pmap_windows[idx];
+  if ((window->prev = *pptr) != NULL)
+    window->prev->saved = *window->pte & PMAP_PA_MASK;
+
+  return (*pptr = window);
+}
+
+void
+pmap_window_put (struct pmap_window *window)
 {
-  _Auto ret = &thread_self()->pmap_data[idx];
-  ret->pte = cpu_local_ptr(pmap_ipc_ptes)->ptes[idx];
-  ret->va = 0;
-  return (ret);
+  _Auto prev = window->prev;
+  if ((thread_self()->pmap_windows[window->idx] = prev) != NULL)
+    pmap_window_set (prev, prev->saved);
 }
 
 void
-pmap_ipc_pte_set (struct thread_pmap_data *pd, uintptr_t va, phys_addr_t pa)
+pmap_context_switch (struct thread *prev, struct thread *new)
 {
-  cpu_tlb_flush_va (va);
-  pd->va = va;
-  pmap_pte_set ((pmap_pte_t *)pd->pte, pa,
-                PMAP_PTE_G | PMAP_PTE_RW, &pmap_pt_levels[0]);
+  for (int i = 0; i < (int)ARRAY_SIZE (prev->pmap_windows); ++i)
+    {
+      _Auto window = prev->pmap_windows[i];
+      if (window)
+        window->saved = *window->pte & PMAP_PA_MASK;
+
+      window = new->pmap_windows[i];
+      if (window)
+        pmap_window_set (window, window->saved);
+    }
 }
 
 void

+ 29 - 35
arch/x86/machine/pmap.h

@@ -183,6 +183,21 @@ typedef phys_addr_t pmap_pte_t;
 // Physical address map.
 struct pmap;
 
+/*
+ * Physical map windows.
+ *
+ * These are temporary mappings that link reserved kernel virtual
+ * addresses to arbitrary physical pages. They are used for IPC.
+ */
+struct pmap_window
+{
+  uint32_t idx;
+  uintptr_t va;
+  phys_addr_t saved;
+  struct pmap_window *prev;
+  pmap_pte_t *pte;
+};
+
 struct pmap_clean_data
 {
   phys_addr_t pa;
@@ -354,46 +369,25 @@ pmap_current (void)
   return (cpu_local_read (pmap_current_ptr));
 }
 
-static inline void
-pmap_ipc_pte_init (struct thread_pmap_data *pd)
-{
-  pd->pte = NULL;
-  pd->va = 0;
-  pd->prev = 1;
-}
+// Get the pmap window at a specific index.
+struct pmap_window* (pmap_window_get) (uint32_t idx, struct pmap_window *wp);
 
-// Get the thread-specific data used for IPC.
-struct thread_pmap_data* pmap_ipc_pte_get_idx (uint32_t idx);
+#define pmap_window_get(idx)   \
+  ({   \
+     void *wp_ = alloca (sizeof (struct pmap_window));   \
+     (pmap_window_get) ((idx), wp_);   \
+   })
 
-static inline struct thread_pmap_data*
-pmap_ipc_pte_get (void)
-{
-  return (pmap_ipc_pte_get_idx (0));
-}
+// Map a window to a physical address.
+void pmap_window_set (struct pmap_window *window, phys_addr_t pa);
 
-// Make the special PTE map a physical address.
-void pmap_ipc_pte_set (struct thread_pmap_data *pd,
-                       uintptr_t va, phys_addr_t pa);
+// Return a pmap window.
+void pmap_window_put (struct pmap_window *window);
 
-// Put back the special PTE.
-static inline void
-pmap_ipc_pte_put (struct thread_pmap_data *pd)
-{
-  pd->pte = NULL;
-}
+// Do the necessary bookkeeping for pmaps in a context switch.
+void pmap_context_switch (struct thread *prev, struct thread *next);
 
-static inline void
-pmap_ipc_pte_save (struct thread_pmap_data *pd, uint64_t *prev)
-{
-  *prev = pd->pte && pd->va ? (*(pmap_pte_t *)pd->pte & PMAP_PA_MASK) : 1;
-}
-
-static inline void
-pmap_ipc_pte_load (struct thread_pmap_data *pd, phys_addr_t pa)
-{
-  if (pa != 1)
-    pmap_ipc_pte_set (pd, pd->va, pa);
-}
+#define pmap_window_va(window)   ((void *)(window)->va)
 
 // Cross-call entry point for cleaning a page.
 void pmap_xcall_clean (void *arg);

+ 4 - 2
kern/futex.c

@@ -519,8 +519,10 @@ futex_td_exit (struct futex_td *td)
   if (!td || user_copy_from (&rtd, td, sizeof (rtd)) != 0)
     return;
 
-  if (rtd.pending)
-    futex_robust_list_handle (rtd.pending, (int *)rtd.pending, tid);
+  if (rtd.pending &&
+      (!user_check_range (rtd.pending, sizeof (rtd.pending)) ||
+       futex_robust_list_handle (rtd.pending, (int *)rtd.pending, tid) != 0))
+    return;
 
   uint32_t nmax = 1024;   // Handle this many robust futexes.
   while (rtd.list)

+ 11 - 12
kern/ipc.c

@@ -33,10 +33,9 @@
 struct ipc_data
 {
   cpu_flags_t cpu_flags;
-  uintptr_t va;
   int direction;
   int prot;
-  void *ipc_pte;
+  struct pmap_window *window;
   struct vm_page *page;
 };
 
@@ -45,8 +44,7 @@ ipc_data_init (struct ipc_data *data, int direction)
 {
   data->direction = direction;
   data->prot = direction == IPC_COPY_FROM ? VM_PROT_READ : VM_PROT_RDWR;
-  data->va = vm_map_ipc_addr ();
-  data->ipc_pte = NULL;
+  data->window = NULL;
   data->page = NULL;
 }
 
@@ -84,29 +82,28 @@ static void
 ipc_data_pte_get (struct ipc_data *data)
 {
   thread_pin ();
-  data->ipc_pte = pmap_ipc_pte_get ();
+  data->window = pmap_window_get (0);
 }
 
 static void
 ipc_data_pte_map (struct ipc_data *data, phys_addr_t pa)
 {
-  assert (thread_pinned () || !cpu_intr_enabled ());
-  pmap_ipc_pte_set (data->ipc_pte, data->va, pa);
+  pmap_window_set (data->window, pa);
 }
 
 static void
 ipc_data_pte_put (struct ipc_data *data)
 {
-  pmap_ipc_pte_put (data->ipc_pte);
+  pmap_window_put (data->window);
   thread_unpin ();
-  data->ipc_pte = NULL;
+  data->window = NULL;
 }
 
 static void
 ipc_data_fini (void *arg)
 {
   struct ipc_data *data = arg;
-  if (data->ipc_pte)
+  if (data->window)
     ipc_data_pte_put (data);
   if (data->page)
     vm_page_unref (data->page);
@@ -194,10 +191,12 @@ ipc_bcopyv_impl (struct vm_map *r_map, const struct iovec *r_v,
   ipc_data_pte_map (data, pa);
   ipc_data_intr_restore (data);
 
+  void *va = (char *)pmap_window_va (data->window) + page_off;
+
   if (data->direction == IPC_COPY_TO)
-    memcpy ((void *)(data->va + page_off), l_v->iov_base, ret);
+    memcpy (va, l_v->iov_base, ret);
   else
-    memcpy ((void *)l_v->iov_base, (void *)(data->va + page_off), ret);
+    memcpy ((void *)l_v->iov_base, va, ret);
 
   ipc_data_pte_put (data);
   ipc_data_page_unref (data);

+ 8 - 12
kern/sref.c

@@ -640,8 +640,8 @@ sref_cache_needs_management (struct sref_cache *cache)
   assert (!cpu_intr_enabled ());
   assert (!thread_preempt_enabled ());
 
-  const _Auto queue = sref_cache_get_queue_by_epoch_id (cache,
-                                                        cache->epoch_id - 2);
+  const _Auto queue =
+    sref_cache_get_queue_by_epoch_id (cache, cache->epoch_id - 2);
   return (sref_cache_is_dirty (cache) || !sref_queue_empty (queue));
 }
 
@@ -649,7 +649,6 @@ static void
 sref_cache_end_epoch (struct sref_cache *cache)
 {
   assert (!sref_cache_needs_management (cache));
-
   sref_data_ack_cpu (cache->data);
   ++cache->epoch_id;
 }
@@ -675,8 +674,8 @@ sref_cache_flush (struct sref_cache *cache, struct sref_queue *queue)
   sref_cache_clear_dirty (cache);
   sref_cache_set_flushed (cache);
 
-  _Auto prev_queue = sref_cache_get_queue_by_epoch_id (cache,
-                                                       cache->epoch_id - 2);
+  _Auto prev_queue =
+    sref_cache_get_queue_by_epoch_id (cache, cache->epoch_id - 2);
   sref_queue_move (queue, prev_queue);
   sref_queue_init (prev_queue);
   sref_cache_end_epoch (cache);
@@ -722,15 +721,12 @@ sref_queue_review (struct sref_queue *queue, struct sref_cache *cache)
           ++nr_dirty_zeroes;
           sref_counter_clear_dirty (counter);
         }
+      else if (sref_counter_kill_weakref (counter) == 0)
+        requeue = false;
       else
         {
-          if (sref_counter_kill_weakref (counter) == 0)
-            requeue = false;
-          else
-            {
-              requeue = true;
-              ++nr_revives;
-            }
+          requeue = true;
+          ++nr_revives;
         }
 
       if (requeue)

+ 5 - 17
kern/thread.c

@@ -627,17 +627,6 @@ thread_runq_guard_fini (struct thread_runq_guard_t *guard)
 #define thread_runq_guard   \
   thread_runq_guard_t CLEANUP (thread_runq_guard_fini) __unused
 
-static void
-thread_pmap_context_switch (struct thread_pmap_data *prev,
-                            struct thread_pmap_data *next)
-{
-  for (size_t i = 0; i < THREAD_NR_PMAP_DATA; ++i)
-    {
-      pmap_ipc_pte_save (prev + i, &prev[i].prev);
-      pmap_ipc_pte_load (next + i, next[i].prev);
-    }
-}
-
 static struct thread_runq*
 thread_runq_schedule (struct thread_runq *runq)
 {
@@ -674,7 +663,7 @@ thread_runq_schedule (struct thread_runq *runq)
     {
       thread_runq_schedule_unload (prev);
       rcu_report_context_switch (thread_rcu_reader (prev));
-      thread_pmap_context_switch (prev->pmap_data, next->pmap_data);
+      pmap_context_switch (prev, next);
       spinlock_transfer_owner (&runq->lock, next);
 
       /*
@@ -1699,8 +1688,8 @@ thread_init (struct thread *thread, void *stack,
   thread->cur_lpad = NULL;
   thread->futex_td = NULL;
   bulletin_init (&thread->dead_subs);
-  for (size_t i = 0; i < THREAD_NR_PMAP_DATA; ++i)
-    pmap_ipc_pte_init (&thread->pmap_data[i]);
+  for (int i = 0; i < (int)ARRAY_SIZE (thread->pmap_windows); ++i)
+    thread->pmap_windows[i] = NULL;
 
 #ifdef CONFIG_PERFMON
   perfmon_td_init (thread_get_perfmon_td (thread));
@@ -1810,6 +1799,7 @@ thread_destroy (struct thread *thread)
   // See task_info().
   task_remove_thread (thread->task, thread);
 
+  kuid_remove (&thread->kuid, KUID_THREAD);
   turnstile_destroy (thread->priv_turnstile);
   sleepq_destroy (thread->priv_sleepq);
   thread_free_stack (thread->stack);
@@ -1850,7 +1840,6 @@ thread_terminate (struct thread *thread)
   SPINLOCK_GUARD (&thread->join_lock);
   thread->terminating = true;
   cap_notify_dead (&thread->dead_subs);
-  kuid_remove (&thread->kuid, KUID_THREAD);
   thread_wakeup (thread->join_waiter);
 }
 
@@ -1863,8 +1852,7 @@ thread_balance_idle_tick (struct thread_runq *runq)
    * Interrupts can occur early, at a time the balancer thread hasn't been
    * created yet.
    */
-  if (runq->balancer &&
-      --runq->idle_balance_ticks == 0)
+  if (runq->balancer && --runq->idle_balance_ticks == 0)
     thread_runq_wakeup_balancer (runq);
 }
 

+ 2 - 10
kern/thread.h

@@ -76,6 +76,7 @@ struct thread_sched_data
 
 // Forward declarations.
 struct sleepq;
+struct pmap_window;
 
 struct thread_runq;
 struct thread_fs_runq;
@@ -102,15 +103,6 @@ struct thread_fs_data
   uint16_t work;
 };
 
-struct thread_pmap_data
-{
-  uint64_t prev;
-  void *pte;
-  uintptr_t va;
-};
-
-#define THREAD_NR_PMAP_DATA   3
-
 /*
  * Thread structure.
  *
@@ -227,7 +219,7 @@ struct thread
   struct task *xtask;             // (-)
   struct futex_td *futex_td;      // (-)
   struct bulletin dead_subs;      // ( )
-  struct thread_pmap_data pmap_data[THREAD_NR_PMAP_DATA];   // (-)
+  struct pmap_window *pmap_windows[CPU_NR_PMAP_WINDOWS];   // (-)
 };
 
 // Thread IPC message (TODO: Move to a specific header).

+ 10 - 4
kern/unwind.c

@@ -204,6 +204,13 @@ unw_read_safe (uintptr_t addr, uintptr_t *out)
   return (0);
 }
 
+static void
+unw_fast_cpy (void *dst, const void *src, size_t size)
+{
+  for (size_t i = 0; i < size; ++i)
+    ((char *)dst)[i] = ((const char *)src)[i];
+}
+
 static int
 unw_read_encptr (uint8_t enc, const unsigned char **ptr,
                  uintptr_t pc, uintptr_t *out)
@@ -260,8 +267,7 @@ unw_read_encptr (uint8_t enc, const unsigned char **ptr,
   case DW_EH_PE_##enc_val:   \
     {   \
       type tmp;   \
-      for (size_t i = 0; i < sizeof (tmp); ++i)   \
-        ((unsigned char *)&tmp)[i] = p[i];   \
+      unw_fast_cpy (&tmp, p, sizeof (tmp));   \
       p += sizeof (tmp);   \
       ret = base + tmp;   \
     }   \
@@ -366,7 +372,7 @@ unw_run_dw (struct unw_cursor *cursor, const struct unw_cie *cie,
           case DW_CFA_advance_loc2:
             {
               uint16_t off;
-              memcpy (&off, ops, sizeof (off));
+              unw_fast_cpy (&off, ops, sizeof (off));
               ops += sizeof (off);
               pc += off * cie->code_align;
               break;
@@ -375,7 +381,7 @@ unw_run_dw (struct unw_cursor *cursor, const struct unw_cie *cie,
           case DW_CFA_advance_loc4:
             {
               uint32_t off;
-              memcpy (&off, ops, sizeof (off));
+              unw_fast_cpy (&off, ops, sizeof (off));
               ops += sizeof (off);
               pc += off * cie->code_align;
               break;

+ 0 - 0
vm/map.c


Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff