bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

And another patch...


From: Sergey Bugaev
Subject: And another patch...
Date: Thu, 6 May 2021 20:58:29 +0300

There's yet another bug: libpager just throws away clean precious
pages (those previously offered with pager_offer_page (precious = 1))
upon receiving them from the kernel, since it mistakes them for
evicted pages it's being notified about. This is obviously very
problematic.

I'm attaching a patch that attempts to fix the issue, but I'm less
sure about it. What do you think?

Sergey

-- >8 --
Subject: [PATCH] libpager: Do not throw away precious pages

The kernel invokes memory_object_data_return () with dirty = 0 in two
cases: if notification on eviction was requested, or when returning
precious pages. Properly distinguish between the two cases, and only
throw the clean page away in the former case.
---
 libpager/data-return.c | 69 ++++++++++++++++--------------------------
 libpager/offer-page.c  |  2 ++
 libpager/priv.h        |  1 +
 3 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/libpager/data-return.c b/libpager/data-return.c
index c0f5aaf7..59e3e956 100644
--- a/libpager/data-return.c
+++ b/libpager/data-return.c
@@ -38,12 +38,12 @@ _pager_do_write_request (struct pager *p,
   short *pm_entries;
   int npages, i;
   char *notified;
+  char *omit_data;
   error_t *pagerrs;
   struct lock_request *lr;
   struct lock_list {struct lock_request *lr;
     struct lock_list *next;} *lock_list, *ll;
   int wakeup;
-  int omitdata = 0;

   if (!p
       || p->port.class != _pager_class)
@@ -78,6 +78,9 @@ _pager_do_write_request (struct pager *p,
   npages = length / __vm_page_size;
   pagerrs = alloca (npages * sizeof (error_t));

+  omit_data = alloca (npages * (sizeof *omit_data));
+  memset (omit_data, 0, npages * (sizeof *omit_data));
+
   notified = alloca (npages * (sizeof *notified));
 #ifndef NDEBUG
   memset (notified, -1, npages * (sizeof *notified));
@@ -90,23 +93,6 @@ _pager_do_write_request (struct pager *p,

   pm_entries = &p->pagemap[offset / __vm_page_size];

-  if (! dirty)
-    {
-      munmap ((void *) data, length);
-      if (!kcopy) {
-        /* Prepare notified array.  */
-        for (i = 0; i < npages; i++)
-          notified[i] = (p->notify_on_evict
-                         && ! (pm_entries[i] & PM_PAGEINWAIT));
-
-        goto notify;
-      }
-      else {
-        _pager_allow_termination (p);
-        goto release_out;
-      }
-    }
-
   /* Make sure there are no other in-progress writes for any of these
      pages before we begin.  This imposes a little more serialization
      than we really have to require (because *all* future writes on
@@ -115,27 +101,24 @@ _pager_do_write_request (struct pager *p,
   /* XXX: Is this still needed?  */
  retry:
   for (i = 0; i < npages; i++)
-    if (pm_entries[i] & PM_PAGINGOUT)
-      {
- pm_entries[i] |= PM_WRITEWAIT;
- pthread_cond_wait (&p->wakeup, &p->interlock);
- goto retry;
+    {
+      if ((initializing && (pm_entries[i] & PM_INIT)) ||
+          (!dirty && !(pm_entries[i] & PM_PRECIOUS)))
+        {
+          omit_data[i] = 1;
+          continue;
+        }
+      if (pm_entries[i] & PM_PAGINGOUT)
+        {
+          pm_entries[i] |= PM_WRITEWAIT;
+          pthread_cond_wait (&p->wakeup, &p->interlock);
+          goto retry;
       }
+    }

   /* Mark these pages as being paged out.  */
-  if (initializing)
-    {
-      assert_backtrace (npages <= 32);
-      for (i = 0; i < npages; i++)
- {
-  if (pm_entries[i] & PM_INIT)
-    omitdata |= 1 << i;
-  else
-    pm_entries[i] |= PM_PAGINGOUT | PM_INIT;
- }
-    }
-  else
-    for (i = 0; i < npages; i++)
+  for (i = 0; i < npages; i++)
+    if (!omit_data[i])
       pm_entries[i] |= PM_PAGINGOUT | PM_INIT;

   /* If this write occurs while a lock is pending, record
@@ -162,7 +145,7 @@ _pager_do_write_request (struct pager *p,
      but until the pager library interface is changed, this will have to do. */

   for (i = 0; i < npages; i++)
-    if (!(omitdata & (1 << i)))
+    if (!omit_data[i])
       pagerrs[i] = pager_write_page (p->upi,
      offset + (vm_page_size * i),
      data + (vm_page_size * i));
@@ -175,9 +158,11 @@ _pager_do_write_request (struct pager *p,
   wakeup = 0;
   for (i = 0; i < npages; i++)
     {
-      if (omitdata & (1 << i))
+      if (omit_data[i])
  {
-  notified[i] = 0;
+  notified[i] = (p->notify_on_evict
+                         && !kcopy
+                         && ! (pm_entries[i] & PM_PAGEINWAIT));
   continue;
  }

@@ -205,14 +190,13 @@ _pager_do_write_request (struct pager *p,
  }
       else
  {
-  munmap ((void *) (data + (vm_page_size * i)),
-  vm_page_size);
   notified[i] = (! kcopy && p->notify_on_evict);
   if (! kcopy)
     pm_entries[i] &= ~PM_INCORE;
  }

-      pm_entries[i] &= ~(PM_PAGINGOUT | PM_PAGEINWAIT | PM_WRITEWAIT);
+      pm_entries[i] &= ~(PM_PAGINGOUT | PM_PAGEINWAIT
+                       | PM_WRITEWAIT | PM_PRECIOUS);
     }

   for (ll = lock_list; ll; ll = ll->next)
@@ -222,7 +206,6 @@ _pager_do_write_request (struct pager *p,
   if (wakeup)
     pthread_cond_broadcast (&p->wakeup);

- notify:
   _pager_allow_termination (p);
   pthread_mutex_unlock (&p->interlock);

diff --git a/libpager/offer-page.c b/libpager/offer-page.c
index 26f88ca3..392e83b8 100644
--- a/libpager/offer-page.c
+++ b/libpager/offer-page.c
@@ -38,6 +38,8 @@ pager_offer_page (struct pager *p,

   short *pm_entry = &p->pagemap[offset / vm_page_size];
   *pm_entry |= PM_INCORE;
+  if (precious)
+    *pm_entry |= PM_PRECIOUS;

   err = memory_object_data_supply (p->memobjcntl, offset, buf, vm_page_size, 0,
                                    writelock ? VM_PROT_WRITE : VM_PROT_NONE,
diff --git a/libpager/priv.h b/libpager/priv.h
index d9d76965..a5e22f36 100644
--- a/libpager/priv.h
+++ b/libpager/priv.h
@@ -108,6 +108,7 @@ extern int _pager_page_errors[];

 /* Pagemap format */
 /* These are binary state bits */
+#define PM_PRECIOUS   0x0400 /* return even if not dirty */
 #define PM_WRITEWAIT  0x0200 /* queue wakeup once write is done */
 #define PM_INIT       0x0100    /* data has been written */
 #define PM_INCORE     0x0080 /* kernel might have a copy */
-- 
2.31.1



reply via email to

[Prev in Thread] Current Thread [Next in Thread]