RFC: Lightweight synchronization mechanism for gnumach

bug-hurd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RFC: Lightweight synchronization mechanism for gnumach

From:	Agustina Arzille
Subject:	RFC: Lightweight synchronization mechanism for gnumach
Date:	Sat, 27 Feb 2016 22:53:12 -0300

Hello, everyone.

Here's a patch for gnumach that implements a lightweight synchronization

mechanism. It can be used as a basis to build many higher-level objectslikemutexes, semaphores, etc. Given its genericity, and because I'm terribleat

naming things, I decided to name this module "gsync", as in
"generic synchronization".

I've tested it in many scenarios, and seems to work fine. I'vebenchmarked it

as well, and it's far more faster than pthread mutexes implemented with

"mach_msg". I'm far from being an expert in kernel programming, though,so

so there's probably a few things that I've overlooked.

Anyway, here's the patch. It's not too long for what it does, and Isoughtto comment it extensively (Although there's always room to improve). I'mopen

to any questions, comments, advice you may have.

diff --git a/Makefrag.am b/Makefrag.am
index 823ece5..94189e6 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -144,6 +144,8 @@ libkernel_a_SOURCES += \
        kern/eventcount.h \
        kern/exception.c \
        kern/exception.h \
+       kern/gsync.c \
+       kern/gsync.h \
        kern/host.c \
        kern/host.h \
        kern/ipc_host.c \
diff --git a/include/mach/gnumach.defs b/include/mach/gnumach.defs
index dd4da87..cac9e35 100644
--- a/include/mach/gnumach.defs
+++ b/include/mach/gnumach.defs
@@ -84,3 +84,37 @@ simpleroutine task_set_name(
 routine register_new_task_notification(
                host_priv       : host_priv_t;
                notification    : mach_port_send_t);
+
+/* Test that the contents of ADDR are equal to the 32-bit integer VAL1.
+ * If they are not, return immediately, otherwise, block until a
+ * matching 'gsync_wake' is done on the same address. FLAGS is used
+ * to control how the thread waits, and may be composed of:
+ * - GSYNC_SHARED: The address may be shared among tasks. If this
+     bit is not set, the address is assumed to be task-local.
+ * - GSYNC_QUAD: Additionally check that the adjacent 32-bit word
+     following ADDR matches the value VAL2.
+ * - GSYNC_TIMED: The call only blocks for MSEC milliseconds. */
+routine gsync_wait(
+  task : task_t;
+  addr : vm_offset_t;
+  val1 : unsigned;
+  val2 : unsigned;
+  msec : natural_t;
+  flags : int);
+
+/* Wake up threads waiting on the address ADDR. Much like with
+ * 'gsync_wait', the parameter FLAGS controls how it is done. In this
+ * case, it may be composed of the following:
+ * - GSYNC_SHARED: Same as with 'gsync_wait'.
+ * - GSYNC_BROADCAST: Wake up every thread waiting on the address. If
+ *   this flag is not set, the call wakes (at most) 1 thread.
+ * - GSYNC_MUTATE: Before waking any potential waiting threads, set the
+ *   contents of ADDR to VAL.
+ *
+ * This RPC is implemented as a simple routine for efficiency reasons,
+ * and because the return value rarely matters. */
+simpleroutine gsync_wake(
+  task : task_t;
+  addr : vm_offset_t;
+  val : unsigned;
+  flags : int);
diff --git a/kern/gsync.c b/kern/gsync.c
new file mode 100644
index 0000000..0a05054
--- /dev/null
+++ b/kern/gsync.c
@@ -0,0 +1,258 @@
+/* Copyright (C) 2016 Free Software Foundation, Inc.
+   Contributed by Agustina Arzille <avarzille@riseup.net>, 2016.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either
+   version 3 of the license, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public
+   License along with this program; if not, see
+   <http://www.gnu.org/licenses/>.
+*/
+
+#include <kern/gsync.h>
+#include <kern/sched_prim.h>
+#include <kern/thread.h>
+#include <kern/lock.h>
+#include <kern/list.h>
+#include <vm/vm_map.h>
+
+/* An entry in the global hash table. */
+struct gsync_hbucket
+{
+  struct list entries;
+  decl_simple_lock_data (, lock)
+};
+
+/* A thread that is waiting for a 'gsync_wake' call. The members
+ * U and V represent the keys used to identify the address. Their
+ * values depend on whether the address is shared or task-local. */
+struct gsync_waiter
+{
+  struct list link;
+  unsigned long u;
+  unsigned long v;
+  thread_t waiter;
+};
+
+#define GSYNC_NBUCKETS   512
+static struct gsync_hbucket gsync_buckets[GSYNC_NBUCKETS];
+
+void gsync_setup (void)
+{
+  for (int i = 0; i < GSYNC_NBUCKETS; ++i)
+    {
+      list_init (&gsync_buckets[i].entries);
+      simple_lock_init (&gsync_buckets[i].lock);
+    }
+}
+
+#define MIX2_LL(x, y)   ((((x) << 5) | ((x) >> 27)) ^ (y))
+
+static inline unsigned int
+gsync_hash2 (unsigned long u, unsigned long v)
+{
+#ifndef __LP64__
+  unsigned int ret = 4;
+  ret = MIX2_LL (ret, u);
+  ret = MIX2_LL (ret, v);
+#else
+  unsigned int ret = 8;
+  ret = MIX2_LL (ret, u & ~0U);
+  ret = MIX2_LL (ret, u >> 32);
+  ret = MIX2_LL (ret, v & ~0U);
+  ret = MIX2_LL (ret, v >> 32);
+#endif
+  return (ret);
+}
+
+/* Test if the passed VM Map can access the address range
+ * [ADDR, ADDR + SIZE] with protection PROT. */
+static int
+valid_access_p (vm_map_t map, vm_offset_t addr,
+  vm_offset_t size, vm_prot_t prot)
+{
+  vm_map_entry_t entry;
+  return (vm_map_lookup_entry (map, addr, &entry) &&
+    entry->vme_end >= addr + size &&
+    (prot & entry->protection) == prot);
+}
+
+/* Given a task and an address, fill the waiter in *OUTP and return
+ * the corresponding bucket. FLAGS is used to specify several things
+ * about the address (Width, protection, if it's task-local or not).
+ * Currently can only fail when given an invalid address. */
+static int
+gsync_fill_waiter (task_t task, vm_offset_t addr,
+  int flags, struct gsync_waiter *outp)
+{
+  vm_prot_t prot = VM_PROT_READ |
+    ((flags & GSYNC_MUTATE) ? VM_PROT_WRITE : 0);
+  vm_offset_t size = sizeof (unsigned int) *
+    ((flags & GSYNC_QUAD) ? 2 : 1);
+
+  if (unlikely (!valid_access_p (task->map, addr, size, prot)))
+    return (-1);
+
+  if (flags & GSYNC_SHARED)
+    {
+      /* For a shared address, we need the VM object
+       * and offset as the keys. */
+      vm_map_t map = task->map;
+      vm_map_version_t ver;
+      vm_prot_t rpr;
+      vm_object_t obj;
+      vm_offset_t off;
+      boolean_t wired_p;
+
+      if (unlikely (vm_map_lookup (&map, addr, prot, &ver,
+          &obj, &off, &rpr, &wired_p) != KERN_SUCCESS))
+        return (-1);
+
+      outp->u = (unsigned long)obj;
+      outp->v = (unsigned long)off;
+    }
+  else
+    {
+      /* Task-local address. The keys are the task's map and
+       * the virtual address itself. */
+      outp->u = (unsigned long)task->map;
+      outp->v = (unsigned long)addr;
+    }
+
+  return ((int)(gsync_hash2 (outp->u, outp->v) % GSYNC_NBUCKETS));
+}
+
+static inline struct gsync_waiter*
+node_to_waiter (struct list *nodep)
+{
+  return (list_entry (nodep, struct gsync_waiter, link));
+}
+
+kern_return_t gsync_wait (task_t task, vm_offset_t addr,
+  unsigned int lo, unsigned int hi, natural_t msec, int flags)
+{
+  struct gsync_waiter w;
+  int bucket = gsync_fill_waiter (task, addr, flags, &w);
+
+  if (bucket < 0)
+    return (KERN_INVALID_ADDRESS);
+
+  struct gsync_hbucket *hbp = gsync_buckets + bucket;
+  simple_lock (&hbp->lock);
+
+  /* Before doing any work, check that the expected value(s)
+   * match the contents of the address. Otherwise, the waiting
+   * thread could potentially miss a wakeup. */
+  if (((unsigned int *)addr)[0] != lo ||
+      ((flags & GSYNC_QUAD) &&
+        ((unsigned int *)addr)[1] != hi))
+    {
+      simple_unlock (&hbp->lock);
+      return (KERN_INVALID_ARGUMENT);
+    }
+
+  /* Look for the first entry in the hash bucket that
+   * compares greater than this waiter. */
+  struct list *runp;
+  list_for_each (&hbp->entries, runp)
+    {
+      struct gsync_waiter *p = node_to_waiter (runp);
+      if (w.u < p->u || (w.u == p->u && w.v < p->v))
+        break;
+    }
+
+  /* Finally, add ourselves to the list and go to sleep. */
+  list_add (runp->prev, runp, &w.link);
+  w.waiter = current_thread ();
+
+  if (flags & GSYNC_TIMED)
+    thread_will_wait_with_timeout (w.waiter, msec);
+  else
+    thread_will_wait (w.waiter);
+
+  thread_sleep (0, (simple_lock_t)&hbp->lock, TRUE);
+
+  /* We're back. */
+  kern_return_t ret = current_thread()->wait_result;
+  if (ret != THREAD_AWAKENED)
+    {
+      /* We were interrupted or timed out. */
+      simple_lock (&hbp->lock);
+      if (w.link.next != NULL)
+        list_remove (&w.link);
+      simple_unlock (&hbp->lock);
+
+      /* XXX: These codes aren't really descriptive, but it's
+       * the best I can think of right now. */
+      ret = ret == THREAD_INTERRUPTED ?
+        KERN_ABORTED : KERN_TERMINATED;
+    }
+  else
+    ret = KERN_SUCCESS;
+
+  return (ret);
+}
+
+kern_return_t gsync_wake (task_t task,
+  vm_offset_t addr, unsigned int val, int flags)
+{
+  struct gsync_waiter w;
+  int bucket = gsync_fill_waiter (task, addr, flags, &w);
+
+  if (bucket < 0)
+    return (KERN_INVALID_ADDRESS);
+
+  struct gsync_hbucket *hbp = gsync_buckets + bucket;
+
+  /* When the broadcast bit is set, we wake every waiter
+   * that matches. Setting the amount to UINT_MAX
+   * should do the trick until we can manage a ridiculously
+   * large amount of waiters. */
+  unsigned int nw = (flags & GSYNC_BROADCAST) ? ~0U : 1;
+  kern_return_t ret = KERN_INVALID_ARGUMENT;
+  simple_lock (&hbp->lock);
+
+  if (flags & GSYNC_MUTATE)
+    __atomic_store_n ((unsigned int *)addr,
+      val, __ATOMIC_RELEASE);
+
+  /* Look for at least a waiter that matches. We take advantage
+   * of the fact that the entries are sorted to break out of
+   * the loop as early as possible. */
+  struct list *runp;
+  list_for_each (&hbp->entries, runp)
+    {
+      struct gsync_waiter *p = node_to_waiter (runp);
+      if (w.u < p->u || (w.u == p->u && w.v < p->v))
+        break;
+      else if (w.u == p->u && w.v == p->v)
+        {
+          do
+            {
+              struct list *nextp = list_next (runp);
+              list_remove (runp);
+              list_node_init (runp);
+              clear_wait (node_to_waiter(runp)->waiter,
+                THREAD_AWAKENED, FALSE);
+              runp = nextp;
+            }
+          while (--nw != 0 && !list_end (&hbp->entries, runp) &&
+            node_to_waiter(runp)->u == w.u &&
+            node_to_waiter(runp)->v == w.v);
+
+          ret = KERN_SUCCESS;
+          break;
+        }
+    }
+
+  simple_unlock (&hbp->lock);
+  return (ret);
+}
+
diff --git a/kern/gsync.h b/kern/gsync.h
new file mode 100644
index 0000000..bedbbcc
--- /dev/null
+++ b/kern/gsync.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2016 Free Software Foundation, Inc.
+   Contributed by Agustina Arzille <avarzille@riseup.net>, 2016.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either
+   version 3 of the license, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public
+   License along with this program; if not, see
+   <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _KERN_GSYNC_H_
+#define _KERN_GSYNC_H_   1
+
+#define GSYNC_SHARED      0x01
+#define GSYNC_QUAD        0x02
+#define GSYNC_TIMED       0x04
+#define GSYNC_BROADCAST   0x08
+#define GSYNC_MUTATE      0x10
+
+#include <mach/mach_types.h>
+
+void gsync_setup (void);
+
+kern_return_t gsync_wait (task_t task, vm_offset_t addr,
+  unsigned int lo, unsigned int hi, natural_t msec, int flags);
+
+kern_return_t gsync_wake (task_t task,
+  vm_offset_t addr, unsigned int val, int flags);
+
+#endif
diff --git a/kern/startup.c b/kern/startup.c
index 30cff5c..fe9d51e 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -36,6 +36,7 @@
 #include <ipc/ipc_init.h>
 #include <kern/cpu_number.h>
 #include <kern/debug.h>
+#include <kern/gsync.h>
 #include <kern/machine.h>
 #include <kern/mach_factor.h>
 #include <kern/mach_clock.h>
@@ -158,6 +159,8 @@ void setup_main(void)
        recompute_priorities(NULL);
        compute_mach_factor();

+       gsync_setup ();
+
        /*
         *      Create a kernel thread to start the other kernel
         *      threads.  Thread_resume (from kernel_thread) calls

[Prev in Thread]

Current Thread

[Next in Thread]

RFC: Lightweight synchronization mechanism for gnumach, Agustina Arzille <=
- Re: RFC: Lightweight synchronization mechanism for gnumach, Agustina Arzille, 2016/02/28
- Re: RFC: Lightweight synchronization mechanism for gnumach, Justus Winter, 2016/02/28
  - Re: RFC: Lightweight synchronization mechanism for gnumach, Samuel Thibault, 2016/02/28
  - Re: RFC: Lightweight synchronization mechanism for gnumach, Agustina Arzille, 2016/02/28
    - Re: RFC: Lightweight synchronization mechanism for gnumach, Samuel Thibault, 2016/02/28
    - Re: RFC: Lightweight synchronization mechanism for gnumach, Agustina Arzille, 2016/02/28
    - Re: RFC: Lightweight synchronization mechanism for gnumach, Samuel Thibault, 2016/02/28
    - Re: RFC: Lightweight synchronization mechanism for gnumach, Richard Braun, 2016/02/28
  - Re: RFC: Lightweight synchronization mechanism for gnumach, Richard Braun, 2016/02/28
- Re: RFC: Lightweight synchronization mechanism for gnumach, Richard Braun, 2016/02/28

Prev by Date: Re: [PATCH] Fix use-after-free in libps.
Next by Date: [bug #32770] Add /proc/$pid/maps
Previous by thread: [PATCH] Fix use-after-free in libps.
Next by thread: Re: RFC: Lightweight synchronization mechanism for gnumach
Index(es):
- Date
- Thread