bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2] Fix race condition in ext2fs when remounting


From: James Clarke
Subject: [PATCH v2] Fix race condition in ext2fs when remounting
Date: Wed, 22 Jul 2015 23:52:54 +0100

On some systems, ext2fs.static would regularly hang at startup, as a
race condition meant it would process paging requests while remounting.
To fix this, libpager has been altered to allow inhibiting and resuming
its worker threads, and ext2fs uses this to inhibit paging while
remounting.

* console/pager.c (pager_requests): New variable.
(user_pager_init): Updated call to pager_start_workers to use new
pager_requests variable.
* daemons/runsystem.sh: Removed artificial delay working around the race
condition.
* ext2fs/ext2fs.c (diskfs_reload_global_state): Call new
inhibit_ext2_pager and resume_ext2_pager functions, and leave sblock as
non-NULL so it will be munmapped.
* ext2fs/ext2fs.h (inhibit_ext2_pager,resume_ext2_pager): New functions.
* ext2fs/pager.c (file_pager_requests): New variable.
(create_disk_pager): Updated call to pager_start_workers to use new
file_pager_requests variable.
(inhibit_ext2_pager,resume_ext2_pager): New functions.
* fatfs/fatfs.h (inhibit_fat_pager,resume_fat_pager): New functions.
* fatfs/pager.c (file_pager_requests): New variable.
(create_fat_pager): Updated call to pager_start_workers to use new
file_pager_requests variable.
(inhibit_fat_pager,resume_fat_pager): New functions.
* libdiskfs/disk-pager.c (diskfs_disk_pager_requests): New variable.
(diskfs_start_disk_pager): Updated call to pager_start_workers to use
new diskfs_disk_pager_requests variable.
* libdiskfs/diskfs-pager.h (diskfs_disk_pager_requests): New variable.
* libpager/demuxer.c (struct pager_requests): Renamed struct requests to
struct pager_requests. Replaced queue with queue_in and queue_out
pointers. Added inhibit_wakeup field.
(pager_demuxer): Updated to use new queue_in/queue_out pointers. Only
wake up workers if not inhibited.
(worker_func): Updated to use new queue_in/queue_out pointers. Final
worker thread to sleep notifies the inhibit_wakeup condition variable.
(pager_start_workers): Added out parameter for the requests instance.
Allocate heap space shared by both queues. Initialise new inhibit_wakeup
condition.
(pager_inhibit_workers,pager_resume_workers): New functions.
* libpager/pager.h (struct pager_requests): Public forward definition.
(pager_start_workers): Added out parameter for the requests instance.
(pager_inhibit_workers,pager_resume_workers): New functions.
* libpager/queue.h (queue_empty): New function.
* storeio/pager.c (pager_requests): New variable.
(init_dev_paging): Updated call to pager_start_workers to use new
pager_requests variable.
---
 console/pager.c          |   3 +-
 daemons/runsystem.sh     |   3 --
 ext2fs/ext2fs.c          |  12 ++++-
 ext2fs/ext2fs.h          |   6 +++
 ext2fs/pager.c           |  29 +++++++++++-
 fatfs/fatfs.h            |   2 +
 fatfs/pager.c            |  29 +++++++++++-
 libdiskfs/disk-pager.c   |   3 +-
 libdiskfs/diskfs-pager.h |   1 +
 libpager/demuxer.c       | 119 ++++++++++++++++++++++++++++++++++++++++-------
 libpager/pager.h         |  28 ++++++++++-
 libpager/queue.h         |   8 ++++
 storeio/pager.c          |   3 +-
 13 files changed, 219 insertions(+), 27 deletions(-)

diff --git a/console/pager.c b/console/pager.c
index 5e13ba4..818e49d 100644
--- a/console/pager.c
+++ b/console/pager.c
@@ -42,6 +42,7 @@ struct user_pager_info
 
 /* We need a separate bucket for the pager ports.  */
 static struct port_bucket *pager_bucket;
+static struct pager_requests *pager_requests;
 
 
 /* Implement the pager_clear_user_data callback from the pager library. */
@@ -133,7 +134,7 @@ user_pager_init (void)
     error (5, errno, "Cannot create pager bucket");
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (pager_bucket);
+  err = pager_start_workers (pager_bucket, &pager_requests);
   if (err)
     error (5, err, "Cannot start pager worker threads");
 }
diff --git a/daemons/runsystem.sh b/daemons/runsystem.sh
index ae25a7d..5d0ad01 100644
--- a/daemons/runsystem.sh
+++ b/daemons/runsystem.sh
@@ -118,9 +118,6 @@ esac
 /hurd/mach-defpager
 
 # This is necessary to make stat / return the correct device ids.
-# Work around a race condition (probably in the root translator).
-for i in `seq 1 100000` ; do : ; done # XXX
-
 fsysopts / --update --readonly
 
 # Finally, start the actual init.
diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c
index d0fdfe7..03c9eed 100644
--- a/ext2fs/ext2fs.c
+++ b/ext2fs/ext2fs.c
@@ -207,10 +207,20 @@ main (int argc, char **argv)
 error_t
 diskfs_reload_global_state ()
 {
+  error_t err;
+
   pokel_flush (&global_pokel);
   pager_flush (diskfs_disk_pager, 1);
-  sblock = NULL;
+
+  /* libdiskfs is not responsible for inhibiting paging.  */
+  err = inhibit_ext2_pager ();
+  if (err)
+    return err;
+
   get_hypermetadata ();
   map_hypermetadata ();
+
+  resume_ext2_pager ();
+
   return 0;
 }
diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h
index 96d8e9d..a744685 100644
--- a/ext2fs/ext2fs.h
+++ b/ext2fs/ext2fs.h
@@ -201,6 +201,12 @@ struct user_pager_info
 /* Set up the disk pager.  */
 void create_disk_pager (void);
 
+/* Inhibit the disk pager.  */
+error_t inhibit_ext2_pager (void);
+
+/* Resume the disk pager.  */
+void resume_ext2_pager (void);
+
 /* Call this when we should turn off caching so that unused memory object
    ports get freed.  */
 void drop_pager_softrefs (struct node *node);
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index b56c923..c200f30 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -34,6 +34,10 @@ struct port_bucket *disk_pager_bucket;
 /* A ports bucket to hold file pager ports.  */
 struct port_bucket *file_pager_bucket;
 
+/* Stores a reference to the requests instance used by the file pager so its
+   worker threads can be inhibited and resumed.  */
+struct pager_requests *file_pager_requests;
+
 pthread_spinlock_t node_to_page_lock = PTHREAD_SPINLOCK_INITIALIZER;
 
 
@@ -1217,11 +1221,34 @@ create_disk_pager (void)
   file_pager_bucket = ports_create_bucket ();
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (file_pager_bucket);
+  err = pager_start_workers (file_pager_bucket, &file_pager_requests);
   if (err)
     ext2_panic ("can't create libpager worker threads: %s", strerror (err));
 }
 
+error_t
+inhibit_ext2_pager (void)
+{
+  error_t err;
+
+  /* The file pager can rely on the disk pager, so inhibit the file
+     pager first.  */
+
+  err = pager_inhibit_workers (file_pager_requests);
+  if (err)
+    return err;
+
+  err = pager_inhibit_workers (diskfs_disk_pager_requests);
+  return err;
+}
+
+void
+resume_ext2_pager (void)
+{
+  pager_resume_workers (diskfs_disk_pager_requests);
+  pager_resume_workers (file_pager_requests);
+}
+
 /* Call this to create a FILE_DATA pager and return a send right.
    NODE must be locked.  */
 mach_port_t
diff --git a/fatfs/fatfs.h b/fatfs/fatfs.h
index 3c3d836..54c3426 100644
--- a/fatfs/fatfs.h
+++ b/fatfs/fatfs.h
@@ -121,6 +121,8 @@ extern struct dirrect dr_root_node;
 void drop_pager_softrefs (struct node *);
 void allow_pager_softrefs (struct node *);
 void create_fat_pager (void);
+error_t inhibit_fat_pager (void);
+void resume_fat_pager (void);
 
 void flush_node_pager (struct node *node);
 
diff --git a/fatfs/pager.c b/fatfs/pager.c
index 10d1fc9..a4f70d8 100644
--- a/fatfs/pager.c
+++ b/fatfs/pager.c
@@ -29,6 +29,10 @@ struct port_bucket *disk_pager_bucket;
 /* A ports bucket to hold file pager ports.  */
 struct port_bucket *file_pager_bucket;
 
+/* Stores a reference to the requests instance used by the file pager so its
+   worker threads can be inhibited and resumed.  */
+struct pager_requests *file_pager_requests;
+
 /* Mapped image of the FAT.  */
 void *fat_image;
 
@@ -776,11 +780,34 @@ create_fat_pager (void)
   file_pager_bucket = ports_create_bucket ();
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (file_pager_bucket);
+  err = pager_start_workers (file_pager_bucket, &file_pager_requests);
   if (err)
     error (2, err, "can't create libpager worker threads");
 }
 
+error_t
+inhibit_fat_pager (void)
+{
+  error_t err;
+
+  /* The file pager can rely on the disk pager, so inhibit the file
+     pager first.  */
+
+  err = pager_inhibit_workers (file_pager_requests);
+  if (err)
+    return err;
+
+  err = pager_inhibit_workers (diskfs_disk_pager_requests);
+  return err;
+}
+
+void
+resume_fat_pager (void)
+{
+  pager_resume_workers (diskfs_disk_pager_requests);
+  pager_resume_workers (file_pager_requests);
+}
+
 /* Call this to create a FILE_DATA pager and return a send right.
    NODE must be locked.  */
 mach_port_t
diff --git a/libdiskfs/disk-pager.c b/libdiskfs/disk-pager.c
index 008aa2d..33b109c 100644
--- a/libdiskfs/disk-pager.c
+++ b/libdiskfs/disk-pager.c
@@ -24,6 +24,7 @@
 __thread struct disk_image_user *diskfs_exception_diu;
 
 struct pager *diskfs_disk_pager;
+struct requests *diskfs_disk_pager_requests;
 
 static void fault_handler (int sig, long int sigcode, struct sigcontext *scp);
 static struct hurd_signal_preemptor preemptor =
@@ -43,7 +44,7 @@ diskfs_start_disk_pager (struct user_pager_info *upi,
   mach_port_t disk_pager_port;
 
   /* Start libpagers worker threads.  */
-  err = pager_start_workers (pager_bucket);
+  err = pager_start_workers (pager_bucket, &diskfs_disk_pager_requests);
   if (err)
     error (2, err, "creating pager worker threads failed");
 
diff --git a/libdiskfs/diskfs-pager.h b/libdiskfs/diskfs-pager.h
index a253069..db99f9ff 100644
--- a/libdiskfs/diskfs-pager.h
+++ b/libdiskfs/diskfs-pager.h
@@ -40,6 +40,7 @@ extern void diskfs_start_disk_pager (struct user_pager_info 
*info,
                                     size_t size, void **image);
 
 extern struct pager *diskfs_disk_pager;
+extern struct requests *diskfs_disk_pager_requests;
 
 struct disk_image_user
   {
diff --git a/libpager/demuxer.c b/libpager/demuxer.c
index 4dd3cd8..59dd1c5 100644
--- a/libpager/demuxer.c
+++ b/libpager/demuxer.c
@@ -60,7 +60,7 @@ request_inp (const struct request *r)
 /* A worker.  */
 struct worker
 {
-  struct requests *requests;   /* our pagers request queue */
+  struct pager_requests *requests;     /* our pagers request queue */
   struct queue queue;  /* other workers may delegate requests to us */
   unsigned long tag;   /* tag of the object we are working on */
 };
@@ -68,12 +68,18 @@ struct worker
 /* This is the queue for incoming requests.  A single thread receives
    messages from the port set, looks the service routine up, and
    enqueues the request here.  */
-struct requests
+struct pager_requests
 {
   struct port_bucket *bucket;
-  struct queue queue;
+  /* Normally, both queues are the same.  However, when the workers are
+     inhibited, a new queue_in is created, but queue_out is left as the
+     old value, so the workers drain queue_out but do not receive new
+     requests.  */
+  struct queue *queue_in;      /* the queue to add to */
+  struct queue *queue_out;     /* the queue to take from */
   int asleep;
   pthread_cond_t wakeup;
+  pthread_cond_t inhibit_wakeup;
   pthread_mutex_t lock;
   struct worker workers[WORKER_COUNT];
 };
@@ -81,7 +87,7 @@ struct requests
 /* Demultiplex a single message directed at a pager port; INP is the
    message received; fill OUTP with the reply.  */
 static int
-pager_demuxer (struct requests *requests,
+pager_demuxer (struct pager_requests *requests,
               mach_msg_header_t *inp,
               mach_msg_header_t *outp)
 {
@@ -108,10 +114,10 @@ pager_demuxer (struct requests *requests,
 
   pthread_mutex_lock (&requests->lock);
 
-  queue_enqueue (&requests->queue, &r->item);
+  queue_enqueue (requests->queue_in, &r->item);
 
-  /* Awake worker.  */
-  if (requests->asleep > 0)
+  /* Awake worker, but only if not inhibited.  */
+  if (requests->asleep > 0 && requests->queue_in == requests->queue_out)
       pthread_cond_signal (&requests->wakeup);
 
   pthread_mutex_unlock (&requests->lock);
@@ -160,7 +166,7 @@ static void *
 worker_func (void *arg)
 {
   struct worker *self = (struct worker *) arg;
-  struct requests *requests = self->requests;
+  struct pager_requests *requests = self->requests;
   struct request *r = NULL;
   mig_reply_header_t reply_msg;
 
@@ -186,9 +192,11 @@ worker_func (void *arg)
 
     get_request_locked:
       /* ... get a request from the global queue instead.  */
-      while ((r = queue_dequeue (&requests->queue)) == NULL)
+      while ((r = queue_dequeue (requests->queue_out)) == NULL)
        {
          requests->asleep += 1;
+         if (requests->asleep == WORKER_COUNT)
+           pthread_cond_broadcast (&requests->inhibit_wakeup);
          pthread_cond_wait (&requests->wakeup, &requests->lock);
          requests->asleep -= 1;
        }
@@ -281,7 +289,7 @@ worker_func (void *arg)
 static void *
 service_paging_requests (void *arg)
 {
-  struct requests *requests = arg;
+  struct pager_requests *requests = arg;
 
   int demuxer (mach_msg_header_t *inp,
               mach_msg_header_t *outp)
@@ -298,27 +306,44 @@ service_paging_requests (void *arg)
 
 /* Start the worker threads libpager uses to service requests.  */
 error_t
-pager_start_workers (struct port_bucket *pager_bucket)
+pager_start_workers (struct port_bucket *pager_bucket,
+                    struct pager_requests **out_requests)
 {
   error_t err;
   int i;
   pthread_t t;
-  struct requests *requests;
+  struct pager_requests *requests;
+
+  assert (out_requests != NULL);
 
   requests = malloc (sizeof *requests);
   if (requests == NULL)
-    return ENOMEM;
+    {
+      err = ENOMEM;
+      goto done;
+    }
 
   requests->bucket = pager_bucket;
   requests->asleep = 0;
-  queue_init (&requests->queue);
+
+  requests->queue_in = malloc (sizeof *requests->queue_in);
+  if (requests->queue_in == NULL)
+    {
+      err = ENOMEM;
+      goto done;
+    }
+  queue_init (requests->queue_in);
+  /* Until the workers are inhibited, both queues are the same.  */
+  requests->queue_out = requests->queue_in;
+
   pthread_cond_init (&requests->wakeup, NULL);
+  pthread_cond_init (&requests->inhibit_wakeup, NULL);
   pthread_mutex_init (&requests->lock, NULL);
 
   /* Make a thread to service paging requests.  */
   err = pthread_create (&t, NULL, service_paging_requests, requests);
   if (err)
-    return err;
+    goto done;
   pthread_detach (t);
 
   for (i = 0; i < WORKER_COUNT; i++)
@@ -329,9 +354,71 @@ pager_start_workers (struct port_bucket *pager_bucket)
 
       err = pthread_create (&t, NULL, &worker_func, &requests->workers[i]);
       if (err)
-       return err;
+       goto done;
       pthread_detach (t);
     }
 
+done:
+  if (err)
+    *out_requests = NULL;
+  else
+    *out_requests = requests;
+
   return err;
 }
+
+error_t
+pager_inhibit_workers (struct pager_requests *requests)
+{
+  error_t err = 0;
+
+  pthread_mutex_lock (&requests->lock);
+
+  /* Check the workers are not already inhibited.  */
+  assert (requests->queue_out == requests->queue_in);
+
+  /* Any new paging requests will go into a new queue.  */
+  struct queue *new_queue = malloc (sizeof *new_queue);
+  if (new_queue == NULL)
+    {
+      err = ENOMEM;
+      goto done_locked;
+    }
+  queue_init (new_queue);
+  requests->queue_in = new_queue;
+
+  /* Wait until all the workers are asleep and the queue has been
+     drained. All individual worker queues must have been drained, as
+     they are populated while the relevant worker is still running, and
+     it will always drain its personal queue before sleeping.
+     Check that the queue is empty, since it's possible that a request
+     came in, was queued and a worker was signalled but the lock was
+     acquired here before the worker woke up.  */
+  while (requests->asleep < WORKER_COUNT || !queue_empty(requests->queue_out))
+    pthread_cond_wait (&requests->inhibit_wakeup, &requests->lock);
+
+done_locked:
+  pthread_mutex_unlock (&requests->lock);
+  return err;
+}
+
+void
+pager_resume_workers (struct pager_requests *requests)
+{
+  pthread_mutex_lock (&requests->lock);
+
+  /* Check the workers are inhibited.  */
+  assert (requests->queue_out != requests->queue_in);
+  assert (requests->asleep == WORKER_COUNT);
+  assert (queue_empty(requests->queue_out));
+
+  /* The queue has been drained and will no longer be used.  */
+  free (requests->queue_out);
+  requests->queue_out = requests->queue_in;
+
+  /* We need to wake up all workers, as there could be multiple requests
+     in the new queue.  */
+  pthread_cond_broadcast (&requests->wakeup);
+
+  pthread_mutex_unlock (&requests->lock);
+}
diff --git a/libpager/pager.h b/libpager/pager.h
index fe34238..df4db68 100644
--- a/libpager/pager.h
+++ b/libpager/pager.h
@@ -25,8 +25,32 @@
    scope.  */
 struct user_pager_info;
 
-/* Start the worker threads libpager uses to service requests.  */
-error_t pager_start_workers (struct port_bucket *pager_bucket);
+struct pager_requests;
+
+/* Start the worker threads libpager uses to service requests. If no
+   error is returned, *requests will be a valid pointer, else it will be
+   set to NULL.  */
+error_t
+pager_start_workers (struct port_bucket *pager_bucket,
+                    struct pager_requests **requests);
+
+/* Inhibit the worker threads libpager uses to service requests,
+   blocking until all requests sent before this function is called have
+   finished.
+   Note that RPCs will not be inhibited, so new requests will
+   queue up, but will not be handled until the workers are resumed. If
+   RPCs should be inhibited as well, call ports_inhibit_bucket_rpcs with
+   the bucket used to create the workers before calling this. However,
+   inhibiting RPCs and not calling this is generally insufficient, as
+   libports is unaware of our internal worker pool, and will return once
+   all the RPCs have been queued, before they have been handled by a
+   worker thread.  */
+error_t
+pager_inhibit_workers (struct pager_requests *requests);
+
+/* Resume the worker threads libpager uses to service requests.  */
+void
+pager_resume_workers (struct pager_requests *requests);
 
 /* Create a new pager.  The pager will have a port created for it
    (using libports, in BUCKET) and will be immediately ready
diff --git a/libpager/queue.h b/libpager/queue.h
index d3cf738..abcd3b9 100644
--- a/libpager/queue.h
+++ b/libpager/queue.h
@@ -19,6 +19,8 @@
    You should have received a copy of the GNU General Public License
    along with the GNU Hurd.  If not, see <http://www.gnu.org/licenses/>.  */
 
+#include <stdbool.h>
+
 /* A FIFO queue with constant-time enqueue and dequeue operations.  */
 struct item {
   struct item *next;
@@ -59,3 +61,9 @@ queue_dequeue (struct queue *q)
   r->next = NULL;
   return r;
 }
+
+static inline bool
+queue_empty (struct queue *q)
+{
+  return q->head == NULL;
+}
diff --git a/storeio/pager.c b/storeio/pager.c
index c260d73..f8f59cd 100644
--- a/storeio/pager.c
+++ b/storeio/pager.c
@@ -142,6 +142,7 @@ pager_clear_user_data (struct user_pager_info *upi)
 }
 
 static struct port_bucket *pager_port_bucket = 0;
+static struct pager_requests *pager_requests;
 
 /* Initialize paging for this device.  */
 static void
@@ -160,7 +161,7 @@ init_dev_paging ()
          pager_port_bucket = ports_create_bucket ();
 
          /* Start libpagers worker threads.  */
-         err = pager_start_workers (pager_port_bucket);
+         err = pager_start_workers (pager_port_bucket, &pager_requests);
          if (err)
            {
              errno = err;
-- 
2.4.6




reply via email to

[Prev in Thread] Current Thread [Next in Thread]