[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] Fix race condition in ext2fs when remounting
From: |
James Clarke |
Subject: |
[PATCH] Fix race condition in ext2fs when remounting |
Date: |
Wed, 22 Jul 2015 23:42:55 +0100 |
On some systems, ext2fs.static would regularly hang at startup, as a
race condition meant it would process paging requests while reounting.
To fix this, libpager has been altered to allow inhibiting and resuming
its worker threads.
* console/pager.c (pager_requests): New variable.
(user_pager_init): Updated call to pager_start_workers to use new
pager_requests variable.
* daemons/runsystem.sh: Removed artificial delay working around the race
condition.
* ext2fs/ext2fs.c (diskfs_reload_global_state): Call new
inhibit_ext2_pager and resume_ext2_pager functions, and leave sblock as
non-NULL so it will be munmapped.
* ext2fs/ext2fs.h (inhibit_ext2_pager,resume_ext2_pager): New functions.
* ext2fs/pager.c (file_pager_requests): New variable.
(create_disk_pager): Updated call to pager_start_workers to use new
file_pager_requests variable.
(inhibit_ext2_pager,resume_ext2_pager): New functions.
* fatfs/fatfs.h (inhibit_fat_pager,resume_fat_pager): New functions.
* fatfs/pager.c (file_pager_requests): New variable.
(create_fat_pager): Updated call to pager_start_workers to use new
file_pager_requests variable.
(inhibit_fat_pager,resume_fat_pager): New functions.
* libdiskfs/disk-pager.c (diskfs_disk_pager_requests): New variable.
(diskfs_start_disk_pager): Updated call to pager_start_workers to use
new diskfs_disk_pager_requests variable.
* libdiskfs/diskfs-pager.h (diskfs_disk_pager_requests): New variable.
* libpager/demuxer.c (struct pager_requests): Renamed struct requests to
struct pager_requests. Replaced queue with queue_in and queue_out
pointers. Added inhibit_wakeup field.
(pager_demuxer): Updated to use new queue_in/queue_out pointers. Only
wake up workers if not inhibited.
(worker_func): Updated to use new queue_in/queue_out pointers. Final
worker thread to sleep notifies the inhibit_wakeup condition variable.
(pager_start_workers): Added out parameter for the requests instance.
Allocate heap space shared by both queues. Initialise new inhibit_wakeup
condition.
(pager_inhibit_workers,pager_resume_workers): New functions.
* libpager/pager.h (struct pager_requests): Public forward definition.
(pager_start_workers): Added out parameter for the requests instance.
(pager_inhibit_workers,pager_resume_workers): New functions.
* libpager/queue.h (queue_empty): New function.
* storeio/pager.c (pager_requests): New variable.
(init_dev_paging): Updated call to pager_start_workers to use new
pager_requests variable.
---
console/pager.c | 3 +-
daemons/runsystem.sh | 3 --
ext2fs/ext2fs.c | 12 ++++-
ext2fs/ext2fs.h | 6 +++
ext2fs/pager.c | 29 +++++++++++-
fatfs/fatfs.h | 2 +
fatfs/pager.c | 29 +++++++++++-
libdiskfs/disk-pager.c | 3 +-
libdiskfs/diskfs-pager.h | 1 +
libpager/demuxer.c | 119 ++++++++++++++++++++++++++++++++++++++++-------
libpager/pager.h | 28 ++++++++++-
libpager/queue.h | 8 ++++
storeio/pager.c | 3 +-
13 files changed, 219 insertions(+), 27 deletions(-)
diff --git a/console/pager.c b/console/pager.c
index 5e13ba4..818e49d 100644
--- a/console/pager.c
+++ b/console/pager.c
@@ -42,6 +42,7 @@ struct user_pager_info
/* We need a separate bucket for the pager ports. */
static struct port_bucket *pager_bucket;
+static struct pager_requests *pager_requests;
/* Implement the pager_clear_user_data callback from the pager library. */
@@ -133,7 +134,7 @@ user_pager_init (void)
error (5, errno, "Cannot create pager bucket");
/* Start libpagers worker threads. */
- err = pager_start_workers (pager_bucket);
+ err = pager_start_workers (pager_bucket, &pager_requests);
if (err)
error (5, err, "Cannot start pager worker threads");
}
diff --git a/daemons/runsystem.sh b/daemons/runsystem.sh
index ae25a7d..5d0ad01 100644
--- a/daemons/runsystem.sh
+++ b/daemons/runsystem.sh
@@ -118,9 +118,6 @@ esac
/hurd/mach-defpager
# This is necessary to make stat / return the correct device ids.
-# Work around a race condition (probably in the root translator).
-for i in `seq 1 100000` ; do : ; done # XXX
-
fsysopts / --update --readonly
# Finally, start the actual init.
diff --git a/ext2fs/ext2fs.c b/ext2fs/ext2fs.c
index d0fdfe7..03c9eed 100644
--- a/ext2fs/ext2fs.c
+++ b/ext2fs/ext2fs.c
@@ -207,10 +207,20 @@ main (int argc, char **argv)
error_t
diskfs_reload_global_state ()
{
+ error_t err;
+
pokel_flush (&global_pokel);
pager_flush (diskfs_disk_pager, 1);
- sblock = NULL;
+
+ /* libdiskfs is not responsible for inhibiting paging. */
+ err = inhibit_ext2_pager ();
+ if (err)
+ return err;
+
get_hypermetadata ();
map_hypermetadata ();
+
+ resume_ext2_pager ();
+
return 0;
}
diff --git a/ext2fs/ext2fs.h b/ext2fs/ext2fs.h
index 96d8e9d..a744685 100644
--- a/ext2fs/ext2fs.h
+++ b/ext2fs/ext2fs.h
@@ -201,6 +201,12 @@ struct user_pager_info
/* Set up the disk pager. */
void create_disk_pager (void);
+/* Inhibit the disk pager. */
+error_t inhibit_ext2_pager (void);
+
+/* Resume the disk pager. */
+void resume_ext2_pager (void);
+
/* Call this when we should turn off caching so that unused memory object
ports get freed. */
void drop_pager_softrefs (struct node *node);
diff --git a/ext2fs/pager.c b/ext2fs/pager.c
index b56c923..c200f30 100644
--- a/ext2fs/pager.c
+++ b/ext2fs/pager.c
@@ -34,6 +34,10 @@ struct port_bucket *disk_pager_bucket;
/* A ports bucket to hold file pager ports. */
struct port_bucket *file_pager_bucket;
+/* Stores a reference to the requests instance used by the file pager so its
+ worker threads can be inhibited and resumed. */
+struct pager_requests *file_pager_requests;
+
pthread_spinlock_t node_to_page_lock = PTHREAD_SPINLOCK_INITIALIZER;
@@ -1217,11 +1221,34 @@ create_disk_pager (void)
file_pager_bucket = ports_create_bucket ();
/* Start libpagers worker threads. */
- err = pager_start_workers (file_pager_bucket);
+ err = pager_start_workers (file_pager_bucket, &file_pager_requests);
if (err)
ext2_panic ("can't create libpager worker threads: %s", strerror (err));
}
+error_t
+inhibit_ext2_pager (void)
+{
+ error_t err;
+
+ /* The file pager can rely on the disk pager, so inhibit the file
+ pager first. */
+
+ err = pager_inhibit_workers (file_pager_requests);
+ if (err)
+ return err;
+
+ err = pager_inhibit_workers (diskfs_disk_pager_requests);
+ return err;
+}
+
+void
+resume_ext2_pager (void)
+{
+ pager_resume_workers (diskfs_disk_pager_requests);
+ pager_resume_workers (file_pager_requests);
+}
+
/* Call this to create a FILE_DATA pager and return a send right.
NODE must be locked. */
mach_port_t
diff --git a/fatfs/fatfs.h b/fatfs/fatfs.h
index 3c3d836..54c3426 100644
--- a/fatfs/fatfs.h
+++ b/fatfs/fatfs.h
@@ -121,6 +121,8 @@ extern struct dirrect dr_root_node;
void drop_pager_softrefs (struct node *);
void allow_pager_softrefs (struct node *);
void create_fat_pager (void);
+error_t inhibit_fat_pager (void);
+void resume_fat_pager (void);
void flush_node_pager (struct node *node);
diff --git a/fatfs/pager.c b/fatfs/pager.c
index 10d1fc9..a4f70d8 100644
--- a/fatfs/pager.c
+++ b/fatfs/pager.c
@@ -29,6 +29,10 @@ struct port_bucket *disk_pager_bucket;
/* A ports bucket to hold file pager ports. */
struct port_bucket *file_pager_bucket;
+/* Stores a reference to the requests instance used by the file pager so its
+ worker threads can be inhibited and resumed. */
+struct pager_requests *file_pager_requests;
+
/* Mapped image of the FAT. */
void *fat_image;
@@ -776,11 +780,34 @@ create_fat_pager (void)
file_pager_bucket = ports_create_bucket ();
/* Start libpagers worker threads. */
- err = pager_start_workers (file_pager_bucket);
+ err = pager_start_workers (file_pager_bucket, &file_pager_requests);
if (err)
error (2, err, "can't create libpager worker threads");
}
+error_t
+inhibit_fat_pager (void)
+{
+ error_t err;
+
+ /* The file pager can rely on the disk pager, so inhibit the file
+ pager first. */
+
+ err = pager_inhibit_workers (file_pager_requests);
+ if (err)
+ return err;
+
+ err = pager_inhibit_workers (diskfs_disk_pager_requests);
+ return err;
+}
+
+void
+resume_fat_pager (void)
+{
+ pager_resume_workers (diskfs_disk_pager_requests);
+ pager_resume_workers (file_pager_requests);
+}
+
/* Call this to create a FILE_DATA pager and return a send right.
NODE must be locked. */
mach_port_t
diff --git a/libdiskfs/disk-pager.c b/libdiskfs/disk-pager.c
index 008aa2d..33b109c 100644
--- a/libdiskfs/disk-pager.c
+++ b/libdiskfs/disk-pager.c
@@ -24,6 +24,7 @@
__thread struct disk_image_user *diskfs_exception_diu;
struct pager *diskfs_disk_pager;
+struct requests *diskfs_disk_pager_requests;
static void fault_handler (int sig, long int sigcode, struct sigcontext *scp);
static struct hurd_signal_preemptor preemptor =
@@ -43,7 +44,7 @@ diskfs_start_disk_pager (struct user_pager_info *upi,
mach_port_t disk_pager_port;
/* Start libpagers worker threads. */
- err = pager_start_workers (pager_bucket);
+ err = pager_start_workers (pager_bucket, &diskfs_disk_pager_requests);
if (err)
error (2, err, "creating pager worker threads failed");
diff --git a/libdiskfs/diskfs-pager.h b/libdiskfs/diskfs-pager.h
index a253069..db99f9ff 100644
--- a/libdiskfs/diskfs-pager.h
+++ b/libdiskfs/diskfs-pager.h
@@ -40,6 +40,7 @@ extern void diskfs_start_disk_pager (struct user_pager_info
*info,
size_t size, void **image);
extern struct pager *diskfs_disk_pager;
+extern struct requests *diskfs_disk_pager_requests;
struct disk_image_user
{
diff --git a/libpager/demuxer.c b/libpager/demuxer.c
index 4dd3cd8..59dd1c5 100644
--- a/libpager/demuxer.c
+++ b/libpager/demuxer.c
@@ -60,7 +60,7 @@ request_inp (const struct request *r)
/* A worker. */
struct worker
{
- struct requests *requests; /* our pagers request queue */
+ struct pager_requests *requests; /* our pagers request queue */
struct queue queue; /* other workers may delegate requests to us */
unsigned long tag; /* tag of the object we are working on */
};
@@ -68,12 +68,18 @@ struct worker
/* This is the queue for incoming requests. A single thread receives
messages from the port set, looks the service routine up, and
enqueues the request here. */
-struct requests
+struct pager_requests
{
struct port_bucket *bucket;
- struct queue queue;
+ /* Normally, both queues are the same. However, when the workers are
+ inhibited, a new queue_in is created, but queue_out is left as the
+ old value, so the workers drain queue_out but do not receive new
+ requests. */
+ struct queue *queue_in; /* the queue to add to */
+ struct queue *queue_out; /* the queue to take from */
int asleep;
pthread_cond_t wakeup;
+ pthread_cond_t inhibit_wakeup;
pthread_mutex_t lock;
struct worker workers[WORKER_COUNT];
};
@@ -81,7 +87,7 @@ struct requests
/* Demultiplex a single message directed at a pager port; INP is the
message received; fill OUTP with the reply. */
static int
-pager_demuxer (struct requests *requests,
+pager_demuxer (struct pager_requests *requests,
mach_msg_header_t *inp,
mach_msg_header_t *outp)
{
@@ -108,10 +114,10 @@ pager_demuxer (struct requests *requests,
pthread_mutex_lock (&requests->lock);
- queue_enqueue (&requests->queue, &r->item);
+ queue_enqueue (requests->queue_in, &r->item);
- /* Awake worker. */
- if (requests->asleep > 0)
+ /* Awake worker, but only if not inhibited. */
+ if (requests->asleep > 0 && requests->queue_in == requests->queue_out)
pthread_cond_signal (&requests->wakeup);
pthread_mutex_unlock (&requests->lock);
@@ -160,7 +166,7 @@ static void *
worker_func (void *arg)
{
struct worker *self = (struct worker *) arg;
- struct requests *requests = self->requests;
+ struct pager_requests *requests = self->requests;
struct request *r = NULL;
mig_reply_header_t reply_msg;
@@ -186,9 +192,11 @@ worker_func (void *arg)
get_request_locked:
/* ... get a request from the global queue instead. */
- while ((r = queue_dequeue (&requests->queue)) == NULL)
+ while ((r = queue_dequeue (requests->queue_out)) == NULL)
{
requests->asleep += 1;
+ if (requests->asleep == WORKER_COUNT)
+ pthread_cond_broadcast (&requests->inhibit_wakeup);
pthread_cond_wait (&requests->wakeup, &requests->lock);
requests->asleep -= 1;
}
@@ -281,7 +289,7 @@ worker_func (void *arg)
static void *
service_paging_requests (void *arg)
{
- struct requests *requests = arg;
+ struct pager_requests *requests = arg;
int demuxer (mach_msg_header_t *inp,
mach_msg_header_t *outp)
@@ -298,27 +306,44 @@ service_paging_requests (void *arg)
/* Start the worker threads libpager uses to service requests. */
error_t
-pager_start_workers (struct port_bucket *pager_bucket)
+pager_start_workers (struct port_bucket *pager_bucket,
+ struct pager_requests **out_requests)
{
error_t err;
int i;
pthread_t t;
- struct requests *requests;
+ struct pager_requests *requests;
+
+ assert (out_requests != NULL);
requests = malloc (sizeof *requests);
if (requests == NULL)
- return ENOMEM;
+ {
+ err = ENOMEM;
+ goto done;
+ }
requests->bucket = pager_bucket;
requests->asleep = 0;
- queue_init (&requests->queue);
+
+ requests->queue_in = malloc (sizeof *requests->queue_in);
+ if (requests->queue_in == NULL)
+ {
+ err = ENOMEM;
+ goto done;
+ }
+ queue_init (requests->queue_in);
+ /* Until the workers are inhibited, both queues are the same. */
+ requests->queue_out = requests->queue_in;
+
pthread_cond_init (&requests->wakeup, NULL);
+ pthread_cond_init (&requests->inhibit_wakeup, NULL);
pthread_mutex_init (&requests->lock, NULL);
/* Make a thread to service paging requests. */
err = pthread_create (&t, NULL, service_paging_requests, requests);
if (err)
- return err;
+ goto done;
pthread_detach (t);
for (i = 0; i < WORKER_COUNT; i++)
@@ -329,9 +354,71 @@ pager_start_workers (struct port_bucket *pager_bucket)
err = pthread_create (&t, NULL, &worker_func, &requests->workers[i]);
if (err)
- return err;
+ goto done;
pthread_detach (t);
}
+done:
+ if (err)
+ *out_requests = NULL;
+ else
+ *out_requests = requests;
+
return err;
}
+
+error_t
+pager_inhibit_workers (struct pager_requests *requests)
+{
+ error_t err = 0;
+
+ pthread_mutex_lock (&requests->lock);
+
+ /* Check the workers are not already inhibited. */
+ assert (requests->queue_out == requests->queue_in);
+
+ /* Any new paging requests will go into a new queue. */
+ struct queue *new_queue = malloc (sizeof *new_queue);
+ if (new_queue == NULL)
+ {
+ err = ENOMEM;
+ goto done_locked;
+ }
+ queue_init (new_queue);
+ requests->queue_in = new_queue;
+
+ /* Wait until all the workers are asleep and the queue has been
+ drained. All individual worker queues must have been drained, as
+ they are populated while the relevant worker is still running, and
+ it will always drain its personal queue before sleeping.
+ Check that the queue is empty, since it's possible that a request
+ came in, was queued and a worker was signalled but the lock was
+ acquired here before the worker woke up. */
+ while (requests->asleep < WORKER_COUNT || !queue_empty(requests->queue_out))
+ pthread_cond_wait (&requests->inhibit_wakeup, &requests->lock);
+
+done_locked:
+ pthread_mutex_unlock (&requests->lock);
+ return err;
+}
+
+void
+pager_resume_workers (struct pager_requests *requests)
+{
+ pthread_mutex_lock (&requests->lock);
+
+ /* Check the workers are inhibited. */
+ assert (requests->queue_out != requests->queue_in);
+ assert (requests->asleep == WORKER_COUNT);
+ assert (queue_empty(requests->queue_out));
+
+ /* The queue has been drained and will no longer be used. */
+ free (requests->queue_out);
+ requests->queue_out = requests->queue_in;
+
+ /* We need to wake up all workers, as there could be multiple requests
+ in the new queue. */
+ pthread_cond_broadcast (&requests->wakeup);
+
+ pthread_mutex_unlock (&requests->lock);
+}
diff --git a/libpager/pager.h b/libpager/pager.h
index fe34238..df4db68 100644
--- a/libpager/pager.h
+++ b/libpager/pager.h
@@ -25,8 +25,32 @@
scope. */
struct user_pager_info;
-/* Start the worker threads libpager uses to service requests. */
-error_t pager_start_workers (struct port_bucket *pager_bucket);
+struct pager_requests;
+
+/* Start the worker threads libpager uses to service requests. If no
+ error is returned, *requests will be a valid pointer, else it will be
+ set to NULL. */
+error_t
+pager_start_workers (struct port_bucket *pager_bucket,
+ struct pager_requests **requests);
+
+/* Inhibit the worker threads libpager uses to service requests,
+ blocking until all requests sent before this function is called have
+ finished.
+ Note that RPCs will not be inhibited, so new requests will
+ queue up, but will not be handled until the workers are resumed. If
+ RPCs should be inhibited as well, call ports_inhibit_bucket_rpcs with
+ the bucket used to create the workers before calling this. However,
+ inhibiting RPCs and not calling this is generally insufficient, as
+ libports is unaware of our internal worker pool, and will return once
+ all the RPCs have been queued, before they have been handled by a
+ worker thread. */
+error_t
+pager_inhibit_workers (struct pager_requests *requests);
+
+/* Resume the worker threads libpager uses to service requests. */
+void
+pager_resume_workers (struct pager_requests *requests);
/* Create a new pager. The pager will have a port created for it
(using libports, in BUCKET) and will be immediately ready
diff --git a/libpager/queue.h b/libpager/queue.h
index d3cf738..abcd3b9 100644
--- a/libpager/queue.h
+++ b/libpager/queue.h
@@ -19,6 +19,8 @@
You should have received a copy of the GNU General Public License
along with the GNU Hurd. If not, see <http://www.gnu.org/licenses/>. */
+#include <stdbool.h>
+
/* A FIFO queue with constant-time enqueue and dequeue operations. */
struct item {
struct item *next;
@@ -59,3 +61,9 @@ queue_dequeue (struct queue *q)
r->next = NULL;
return r;
}
+
+static inline bool
+queue_empty (struct queue *q)
+{
+ return q->head == NULL;
+}
diff --git a/storeio/pager.c b/storeio/pager.c
index c260d73..f8f59cd 100644
--- a/storeio/pager.c
+++ b/storeio/pager.c
@@ -142,6 +142,7 @@ pager_clear_user_data (struct user_pager_info *upi)
}
static struct port_bucket *pager_port_bucket = 0;
+static struct pager_requests *pager_requests;
/* Initialize paging for this device. */
static void
@@ -160,7 +161,7 @@ init_dev_paging ()
pager_port_bucket = ports_create_bucket ();
/* Start libpagers worker threads. */
- err = pager_start_workers (pager_port_bucket);
+ err = pager_start_workers (pager_port_bucket, &pager_requests);
if (err)
{
errno = err;
--
2.4.6
- [PATCH] Fix race condition in ext2fs when remounting,
James Clarke <=