bug-hurd
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [patch #4818] Dynamic memory allocation for Linux Device drivers in


From: Gianluca Guida
Subject: Re: [patch #4818] Dynamic memory allocation for Linux Device drivers in glue.
Date: Sun, 22 Jan 2006 02:48:20 +0100

For inline patches fans, here it is:

This is a new version of the patch.

No major improvement, it's just a cleaning of the previous patch:

- Removed the rtl8139.c, which I forgot to remove in the previous patch;
- Removed some debugging printf that I forgot, in pure StoMach style;

Happy Testing,
Gianluca

diff -ru gnumach-vanilla/i386/i386at/model_dep.c gnumach/i386/i386at/model_dep.c
--- gnumach-vanilla/i386/i386at/model_dep.c     2004-11-28 18:29:35.000000000 
+0100
+++ gnumach/i386/i386at/model_dep.c     2006-01-22 01:08:09.000000000 +0100
@@ -86,16 +86,7 @@
 /* Configuration parameter:
    if zero, only use physical memory in the low 16MB of addresses.
    Only SCSI still has DMA problems.  */
-#ifdef LINUX_DEV
-#define use_all_mem 1
-#else
-#include "nscsi.h"
-#if    NSCSI > 0
-#define use_all_mem 0
-#else
 #define use_all_mem 1
-#endif
-#endif

 extern char    version[];

@@ -454,7 +445,6 @@
        vm_offset_t addr;
        extern char start[], end[];
        int i;
-       static int wrapped = 0;

        /* Memory regions to skip.  */
        vm_offset_t cmdline_start_pa = boot_info.flags & MULTIBOOT_CMDLINE
@@ -474,25 +464,8 @@
        /* Page-align the start address.  */
        avail_next = round_page(avail_next);

-       /* Start with memory above 16MB, reserving the low memory for later. */
-       if (use_all_mem && !wrapped && phys_last_addr > 16 * 1024*1024)
-         {
-           if (avail_next < 16 * 1024*1024)
-             avail_next = 16 * 1024*1024;
-           else if (avail_next == phys_last_addr)
-             {
-               /* We have used all the memory above 16MB, so now start on
-                  the low memory.  This will wind up at the end of the list
-                  of free pages, so it should not have been allocated to any
-                  other use in early initialization before the Linux driver
-                  glue initialization needs to allocate low memory.  */
-               avail_next = 0x1000;
-               wrapped = 1;
-             }
-         }
-
        /* Check if we have reached the end of memory.  */
-        if (avail_next == (wrapped ? 16 * 1024*1024 : phys_last_addr))
+        if (avail_next == phys_last_addr)
                return FALSE;

        /* Tentatively assign the current location to the caller.  */
@@ -585,107 +558,3 @@
                !(((boot_info.mem_lower * 1024) <= x) && (x < 1024*1024)));
 }

-#ifndef NBBY
-#define NBBY   8
-#endif
-#ifndef NBPW
-#define NBPW   (NBBY * sizeof(int))
-#endif
-#define DMA_MAX        (16*1024*1024)
-
-/*
- * Allocate contiguous pages below 16 MB
- * starting at specified boundary for DMA.
- */
-vm_offset_t
-alloc_dma_mem(size, align)
-       vm_size_t size;
-       vm_offset_t align;
-{
-       int *bits, i, j, k, n;
-       int npages, count, bit, mask;
-       int first_page, last_page;
-       vm_offset_t addr;
-       vm_page_t p, prevp;
-
-       npages = round_page(size) / PAGE_SIZE;
-       mask = align ? (align - 1) / PAGE_SIZE : 0;
-
-       /*
-        * Allocate bit array.
-        */
-       n = ((DMA_MAX / PAGE_SIZE) + NBPW - 1) / NBPW;
-       i = n * NBPW;
-       bits = (unsigned *)kalloc(i);
-       if (bits == 0) {
-               printf("alloc_dma_mem: unable alloc bit array\n");
-               return (0);
-       }
-       bzero((char *)bits, i);
-
-       /*
-        * Walk the page free list and set a bit for
-        * every usable page in bit array.
-        */
-       simple_lock(&vm_page_queue_free_lock);
-       for (p = vm_page_queue_free; p; p = (vm_page_t)p->pageq.next) {
-               if (p->phys_addr < DMA_MAX) {
-                       i = p->phys_addr / PAGE_SIZE;
-                       bits[i / NBPW] |= 1 << (i % NBPW);
-               }
-       }
-
-       /*
-        * Search for contiguous pages by scanning bit array.
-        */
-       for (i = 0, first_page = -1; i < n; i++) {
-               for (bit = 1, j = 0; j < NBPW; j++, bit <<= 1) {
-                       if (bits[i] & bit) {
-                               if (first_page < 0) {
-                                       k = i * NBPW + j;
-                                       if (!mask
-                                           || (((k & mask) + npages)
-                                               <= mask + 1)) {
-                                               first_page = k;
-                                               if (npages == 1)
-                                                       goto found;
-                                               count = 1;
-                                       }
-                               } else if (++count == npages)
-                                       goto found;
-                       } else
-                               first_page = -1;
-               }
-       }
-       addr = 0;
-       goto out;
-
- found:
-       /*
-        * Remove pages from the free list.
-        */
-       addr = first_page * PAGE_SIZE;
-       last_page = first_page + npages;
-       vm_page_free_count -= npages;
-       p = vm_page_queue_free;
-       prevp = 0;
-       while (1) {
-               i = p->phys_addr / PAGE_SIZE;
-               if (i >= first_page && i < last_page) {
-                       if (prevp)
-                               prevp->pageq.next = p->pageq.next;
-                       else
-                               vm_page_queue_free = (vm_page_t)p->pageq.next;
-                       p->free = FALSE;
-                       if (--npages == 0)
-                               break;
-               } else
-                       prevp = p;
-               p = (vm_page_t)p->pageq.next;
-       }
-
- out:
-       simple_unlock(&vm_page_queue_free_lock);
-       kfree((vm_offset_t)bits, n * NBPW);
-       return (addr);
-}
diff -ru gnumach-vanilla/i386/intel/pmap.c gnumach/i386/intel/pmap.c
--- gnumach-vanilla/i386/intel/pmap.c   2001-04-05 08:39:21.000000000 +0200
+++ gnumach/i386/intel/pmap.c   2006-01-22 01:08:09.000000000 +0100
@@ -584,6 +584,11 @@
        return(virt);
 }

+unsigned long vm_page_normal_first = 16*1024*1024;
+unsigned long vm_page_normal_last = 0;
+unsigned long vm_page_dma_first = 0;
+unsigned long vm_page_dma_last = 16*1024*1024 - 1;
+
 /*
  *     Bootstrap the system enough to run with virtual memory.
  *     Allocate the kernel page directory and page tables,
@@ -698,6 +703,25 @@
                                va += INTEL_PGBYTES;
                        }
                }
+
+               if (phys_last_addr <= 16*1024*1024) {
+                       /* Set so to never get TRUE from isnormal(). */
+                       vm_page_normal_first = phys_last_addr + 1;
+                       vm_page_normal_last = 0;
+
+                       /* Only DMA memory.  */
+                       vm_page_dma_first = 0;
+                       vm_page_dma_last = phys_last_addr;
+               } else {
+                       vm_page_normal_first = 16*1024*1024;
+                       vm_page_normal_last = phys_last_addr;
+
+                       vm_page_dma_first = 0;
+                       vm_page_dma_last = 16*1024*1024 - 1;
+               }
+
+
+
        }

 #if    i860
@@ -2341,6 +2365,27 @@
        return (phys_attribute_test(phys, PHYS_REFERENCED));
 }

+/*
+ *     pmap_is_dma
+ *
+ *     Return TRUE if PHYS is in the DMA zone range.
+ */
+boolean_t pmap_is_dma (vm_offset_t phys)
+{
+  return (phys < 16*1024*1024);
+}
+
+/*
+ *     pmap_is_normal:
+ *
+ *     Return TRUE if PHYS is in the normal zone range.
+ */
+boolean_t pmap_is_normal (vm_offset_t phys)
+{
+       return (phys >= 16*1024*1024);
+}
+
+
 #if    NCPUS > 1
 /*
 *          TLB Coherence Code (TLB "shootdown" code)
diff -ru gnumach-vanilla/kern/startup.c gnumach/kern/startup.c
--- gnumach-vanilla/kern/startup.c      2001-04-05 08:39:20.000000000 +0200
+++ gnumach/kern/startup.c      2006-01-22 01:08:09.000000000 +0100
@@ -80,9 +80,6 @@
 extern void    action_thread();
 #endif /* NCPUS > 1 */

-/* XX */
-extern vm_offset_t phys_first_addr, phys_last_addr;
-
 /*
  *     Running in virtual memory, on the interrupt stack.
  *     Does not return.  Dispatches initial thread.
@@ -122,7 +119,7 @@
        machine_init();

        machine_info.max_cpus = NCPUS;
-       machine_info.memory_size = phys_last_addr - phys_first_addr; /* XXX
mem_size */
+       machine_info.memory_size = phys_last_addr - phys_first_addr;
        machine_info.avail_cpus = 0;
        machine_info.major_version = KERNEL_MAJOR_VERSION;
        machine_info.minor_version = KERNEL_MINOR_VERSION;
diff -ru gnumach-vanilla/linux/dev/glue/kmem.c gnumach/linux/dev/glue/kmem.c
--- gnumach-vanilla/linux/dev/glue/kmem.c       1999-04-26 07:45:35.000000000 
+0200
+++ gnumach/linux/dev/glue/kmem.c       2006-01-22 02:36:14.000000000 +0100
@@ -25,6 +25,7 @@
 #include <sys/types.h>

 #include <mach/mach_types.h>
+#include <mach/error.h>
 #include <mach/vm_param.h>

 #include <kern/assert.h>
@@ -40,22 +41,11 @@

 #include <asm/system.h>

-extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *);
 extern int printf (const char *, ...);

-/* Amount of memory to reserve for Linux memory allocator.
-   We reserve 64K chunks to stay within DMA limits.
-   Increase MEM_CHUNKS if the kernel is running out of memory.  */
-#define MEM_CHUNK_SIZE (64 * 1024)
-#define MEM_CHUNKS     7
-
 /* Mininum amount that linux_kmalloc will allocate.  */
 #define MIN_ALLOC      12

-#ifndef NBPW
-#define NBPW           32
-#endif
-
 /* Memory block header.  */
 struct blkhdr
 {
@@ -70,62 +60,17 @@
   struct pagehdr *next;        /* next header in list */
 };

-/* This structure describes a memory chunk.  */
-struct chunkhdr
-{
-  unsigned long start; /* start address */
-  unsigned long end;           /* end address */
-  unsigned long bitmap;        /* busy/free bitmap of pages */
-};
-
-/* Chunks from which pages are allocated.  */
-static struct chunkhdr pages_free[MEM_CHUNKS];
-
 /* Memory list maintained by linux_kmalloc.  */
-static struct pagehdr *memlist;
+static struct pagehdr *memlist_dma = NULL;
+static struct pagehdr *memlist_nml = NULL;

 /* Some statistics.  */
 int num_block_coalesce = 0;
-int num_page_collect = 0;
-int linux_mem_avail;

 /* Initialize the Linux memory allocator.  */
 void
 linux_kmem_init ()
 {
-  int i, j;
-  vm_page_t p, pages;
-
-  for (i = 0; i < MEM_CHUNKS; i++)
-    {
-      /* Allocate memory.  */
-      pages_free[i].start = (unsigned long) alloc_contig_mem (MEM_CHUNK_SIZE,
-                                                             16 * 1024 * 1024,
-                                                             0xffff, &pages);
-
-      assert (pages_free[i].start);
-      assert ((pages_free[i].start & 0xffff) == 0);
-
-      /* Sanity check: ensure pages are contiguous and within DMA limits.  */
-      for (p = pages, j = 0; j < MEM_CHUNK_SIZE - PAGE_SIZE; j += PAGE_SIZE)
-       {
-         assert (p->phys_addr < 16 * 1024 * 1024);
-         assert (p->phys_addr + PAGE_SIZE
-                 == ((vm_page_t) p->pageq.next)->phys_addr);
-
-         p = (vm_page_t) p->pageq.next;
-       }
-
-      pages_free[i].end = pages_free[i].start + MEM_CHUNK_SIZE;
-
-      /* Initialize free page bitmap.  */
-      pages_free[i].bitmap = 0;
-      j = MEM_CHUNK_SIZE >> PAGE_SHIFT;
-      while (--j >= 0)
-       pages_free[i].bitmap |= 1 << j;
-    }
-
-  linux_mem_avail = (MEM_CHUNKS * MEM_CHUNK_SIZE) >> PAGE_SHIFT;
 }

 /* Return the number by which the page size should be
@@ -178,7 +123,40 @@

   num_block_coalesce++;

-  for (ph = memlist; ph; ph = ph->next)
+  /* Coalesce DMA memory.  */
+  for (ph = memlist_dma; ph; ph = ph->next)
+    {
+      bh = (struct blkhdr *) (ph + 1);
+      ebh = (struct blkhdr *) ((void *) ph + ph->size);
+      while (1)
+       {
+         /* Skip busy blocks.  */
+         while (bh < ebh && !bh->free)
+           bh = (struct blkhdr *) ((void *) (bh + 1) + bh->size);
+         if (bh == ebh)
+           break;
+
+         /* Merge adjacent free blocks.  */
+         while (1)
+           {
+             bhp = (struct blkhdr *) ((void *) (bh + 1) + bh->size);
+             if (bhp == ebh)
+               {
+                 bh = bhp;
+                 break;
+               }
+             if (!bhp->free)
+               {
+                 bh = (struct blkhdr *) ((void *) (bhp + 1) + bhp->size);
+                 break;
+               }
+             bh->size += bhp->size + sizeof (struct blkhdr);
+           }
+       }
+    }
+
+  /* Coalesce non-DMA memory.  */
+  for (ph = memlist_nml; ph; ph = ph->next)
     {
       bh = (struct blkhdr *) (ph + 1);
       ebh = (struct blkhdr *) ((void *) ph + ph->size);
@@ -216,20 +194,26 @@
 void *
 linux_kmalloc (unsigned int size, int priority)
 {
-  int order, coalesced = 0;
+  int order, coalesced = 0, dma = 0;
   unsigned flags;
-  struct pagehdr *ph;
+  struct pagehdr *ph, **memlistp;
   struct blkhdr *bh, *new_bh;

+  if (priority & GFP_DMA)
+    {
+      memlistp = &memlist_dma;
+      dma = 1;
+    }
+  else
+    {
+      memlistp = &memlist_nml;
+      dma = 0;
+    }
   if (size < MIN_ALLOC)
     size = MIN_ALLOC;
   else
     size = (size + sizeof (int) - 1) & ~(sizeof (int) - 1);

-  assert (size <= (MEM_CHUNK_SIZE
-                  - sizeof (struct pagehdr)
-                  - sizeof (struct blkhdr)));
-
   save_flags (flags);
   cli ();

@@ -238,7 +222,7 @@

   /* Walk the page list and find the first free block with size
      greater than or equal to the one required.  */
-  for (ph = memlist; ph; ph = ph->next)
+  for (ph = *memlistp; ph; ph = ph->next)
     {
       bh = (struct blkhdr *) (ph + 1);
       while (bh < (struct blkhdr *) ((void *) ph + ph->size))
@@ -278,16 +262,26 @@
   order = get_page_order (size
                          + sizeof (struct pagehdr)
                          + sizeof (struct blkhdr));
-  ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, ~0UL);
+  ph = (struct pagehdr *) __get_free_pages (GFP_KERNEL, order, dma);
   if (!ph)
     {
       restore_flags (flags);
       return NULL;
     }

+  /* __get_free_pages may return DMA memory if non-DMA memory is not
+     free so we check back here for mem type.  */
+  if (pmap_is_dma ((unsigned long)ph))
+    {
+      memlistp = &memlist_dma;
+    }
+  else
+    {
+      memlistp = &memlist_nml;
+    }
   ph->size = PAGE_SIZE << order;
-  ph->next = memlist;
-  memlist = ph;
+  ph->next = *memlistp;
+  *memlistp = ph;
   bh = (struct blkhdr *) (ph + 1);
   bh->free = 0;
   bh->size = ph->size - sizeof (struct pagehdr) - sizeof (struct blkhdr);
@@ -310,17 +304,28 @@
 linux_kfree (void *p)
 {
   unsigned flags;
+  vm_offset_t addr;
   struct blkhdr *bh;
-  struct pagehdr *ph;
+  struct pagehdr *ph, **memlistp;

   assert (((int) p & (sizeof (int) - 1)) == 0);

+  addr = (vm_offset_t) p;
+
+  if (pmap_is_dma (addr))
+    {
+      memlistp = &memlist_dma;
+    }
+  else
+    {
+      memlistp = &memlist_nml;
+    }
   save_flags (flags);
   cli ();

   check_page_list (__LINE__);

-  for (ph = memlist; ph; ph = ph->next)
+  for (ph = *memlistp; ph; ph = ph->next)
     if (p >= (void *) ph && p < (void *) ph + ph->size)
       break;

@@ -339,10 +344,10 @@
   restore_flags (flags);
 }

-/* Free any pages that are not in use.
+/* Free any DMA page that are not in use.
    Called by __get_free_pages when pages are running low.  */
 static void
-collect_kmalloc_pages ()
+collect_kmalloc_pages_dma ()
 {
   struct blkhdr *bh;
   struct pagehdr *ph, **prev_ph;
@@ -353,8 +358,8 @@

   check_page_list (__LINE__);

-  ph = memlist;
-  prev_ph = &memlist;
+  ph = memlist_dma;
+  prev_ph = &memlist_dma;
   while (ph)
     {
       bh = (struct blkhdr *) (ph + 1);
@@ -373,68 +378,91 @@

   check_page_list (__LINE__);
 }
-
-/* Allocate ORDER + 1 number of physically contiguous pages.
-   PRIORITY and DMA are not used in Mach.
-
-   XXX: This needs to be dynamic.  To do that we need to make
-   the Mach page manipulation routines interrupt safe and they
-   must provide machine dependant hooks.  */
-unsigned long
-__get_free_pages (int priority, unsigned long order, int dma)
+/* Free any non-DMA page that are not in use.
+   Called by __get_free_pages when pages are running low.  */
+static void
+collect_kmalloc_pages_nml ()
 {
-  int i, pages_collected = 0;
-  unsigned flags, bits, off, j, len;
+  struct blkhdr *bh;
+  struct pagehdr *ph, **prev_ph;

-  assert ((PAGE_SIZE << order) <= MEM_CHUNK_SIZE);
+  check_page_list (__LINE__);

-  /* Construct bitmap of contiguous pages.  */
-  bits = 0;
-  j = 0;
-  len = 0;
-  while (len < (PAGE_SIZE << order))
-    {
-      bits |= 1 << j++;
-      len += PAGE_SIZE;
-    }
+  coalesce_blocks ();

-  save_flags (flags);
-  cli ();
-again:
+  check_page_list (__LINE__);

-  /* Search each chunk for the required number of contiguous pages.  */
-  for (i = 0; i < MEM_CHUNKS; i++)
+  ph = memlist_nml;
+  prev_ph = &memlist_nml;
+  while (ph)
     {
-      off = 0;
-      j = bits;
-      while (MEM_CHUNK_SIZE - off >= (PAGE_SIZE << order))
+      bh = (struct blkhdr *) (ph + 1);
+      if (bh->free && (void *) (bh + 1) + bh->size == (void *) ph + ph->size)
        {
-         if ((pages_free[i].bitmap & j) == j)
-           {
-             pages_free[i].bitmap &= ~j;
-             linux_mem_avail -= order + 1;
-             restore_flags (flags);
-             return pages_free[i].start + off;
-           }
-         j <<= 1;
-         off += PAGE_SIZE;
+         *prev_ph = ph->next;
+         free_pages ((unsigned long) ph, get_page_order (ph->size));
+         ph = *prev_ph;
+       }
+      else
+       {
+         prev_ph = &ph->next;
+         ph = ph->next;
        }
     }

-  /* Allocation failed; collect kmalloc and buffer pages
-     and try again.  */
-  if (!pages_collected)
-    {
-      num_page_collect++;
-      collect_kmalloc_pages ();
-      pages_collected = 1;
-      goto again;
-    }
+  check_page_list (__LINE__);
+}

-  printf ("%s:%d: __get_free_pages: ran out of pages\n", __FILE__, __LINE__);
+/* Allocate ORDER + 1 number of physically contiguous pages.
+   PRIORITY and DMA are not used in Mach.  */
+unsigned long
+__get_free_pages (int priority, unsigned long order, int dma)
+{
+  unsigned long pagenum;
+  unsigned vm_page_flags = 0;
+  unsigned long p;
+
+  if (dma)
+    vm_page_flags |= VM_PAGE_DMA;
+
+  pagenum = (1 << order);
+
+  p = 0;
+
+  if (pagenum > 1)
+    {
+      /*
+       * Contiguous grabbing is slow and may fail.
+       * We reserve it for special occasions.
+       */
+      mach_error_t err;
+      vm_offset_t vmo;
+
+      err = vm_page_grab_contiguous_pages_flags (pagenum, &vmo, FALSE,
+                                                vm_page_flags, 0);
+      p = (unsigned long) vmo;
+
+      if (err)
+       return 0;
+    }
+  else
+    {
+      vm_page_t m;
+
+      m = vm_page_grab_flags (FALSE, vm_page_flags);
+      if (!m)
+       return 0;
+
+      p = m->phys_addr;
+
+      if (m->tabled)
+       {
+         printf ("Error while getting page of order %ld\n", order);
+         return 0;
+       }
+    };

-  restore_flags (flags);
-  return 0;
+  return p;
 }

 /* Free ORDER + 1 number of physically
@@ -442,36 +470,20 @@
 void
 free_pages (unsigned long addr, unsigned long order)
 {
-  int i;
-  unsigned flags, bits, len, j;
+  unsigned long i, pagenum;

-  assert ((addr & PAGE_MASK) == 0);
-
-  for (i = 0; i < MEM_CHUNKS; i++)
-    if (addr >= pages_free[i].start && addr < pages_free[i].end)
-      break;
+  pagenum = 1 << order;

-  assert (i < MEM_CHUNKS);
-
-  /* Contruct bitmap of contiguous pages.  */
-  len = 0;
-  j = 0;
-  bits = 0;
-  while (len < (PAGE_SIZE << order))
+  for (i = 0; i < pagenum; i++)
     {
-      bits |= 1 << j++;
-      len += PAGE_SIZE;
-    }
-  bits <<= (addr - pages_free[i].start) >> PAGE_SHIFT;
+      vm_page_t m;

-  save_flags (flags);
-  cli ();
+      m = vm_page_physaddr_lookup (addr + (i * PAGE_SIZE));
+      if (m == VM_PAGE_NULL)
+       panic ("couldn't lookup page for address %lx", addr + (i * PAGE_SIZE));

-  assert ((pages_free[i].bitmap & bits) == 0);
-
-  pages_free[i].bitmap |= bits;
-  linux_mem_avail += order + 1;
-  restore_flags (flags);
+      vm_page_free (m);
+    }
 }


diff -ru gnumach-vanilla/linux/dev/init/main.c gnumach/linux/dev/init/main.c
--- gnumach-vanilla/linux/dev/init/main.c       1999-04-26 07:49:06.000000000 
+0200
+++ gnumach/linux/dev/init/main.c       2006-01-22 02:35:54.000000000 +0100
@@ -82,9 +82,7 @@
 static void calibrate_delay (void);

 extern int hz;
-extern vm_offset_t phys_last_addr;

-extern void *alloc_contig_mem (unsigned, unsigned, unsigned, vm_page_t *);
 extern void free_contig_mem (vm_page_t);
 extern void init_IRQ (void);
 extern void restore_IRQ (void);
@@ -105,10 +103,8 @@
 extern void linux_sched_init (void);


-/*
- * Amount of contiguous memory to allocate for initialization.
- */
-#define CONTIG_ALLOC (512 * 1024)
+/* Amount of contiguous memory to allocate for initialization.  */
+#define CONTIG_ALLOC_ORDER (7) /* 512kb.  */

 /*
  * Initialize Linux drivers.
@@ -117,7 +113,7 @@
 linux_init (void)
 {
   int addr;
-  unsigned memory_start, memory_end;
+  unsigned long memory_start, memory_end;
   vm_page_t pages;

   /*
@@ -142,40 +138,34 @@
   memcpy ((char *) &drive_info + 16,
          (void *) ((addr & 0xffff) + ((addr >> 12) & 0xffff0)), 16);

-  /*
-   * Initialize Linux memory allocator.
-   */
+  /* Initialize Linux memory allocator.  */
   linux_kmem_init ();

-  /*
-   * Allocate contiguous memory below 16 MB.
-   */
-  memory_start = (unsigned long) alloc_contig_mem (CONTIG_ALLOC,
-                                                  16 * 1024 * 1024,
-                                                  0, &pages);
-  if (memory_start == 0)
-    panic ("linux_init: alloc_contig_mem failed");
-  memory_end = memory_start + CONTIG_ALLOC;
+  /* Allocate contiguous memory below 16 MB.  */
+  memory_start = __get_free_pages (GFP_ATOMIC, CONTIG_ALLOC_ORDER, 1);
+  if (!memory_start)
+    panic ("linux_init: alloc PCI memory failed");
+  memory_end = memory_start + ((1 << CONTIG_ALLOC_ORDER) * PAGE_SIZE);

-  /*
-   * Initialize PCI bus.
-   */
+  /* Initialize PCI bus.  */
   memory_start = pci_init (memory_start, memory_end);

   if (memory_start > memory_end)
     panic ("linux_init: ran out memory");

-  /*
-   * Free unused memory.
-   */
-  while (pages && pages->phys_addr < round_page (memory_start))
-    pages = (vm_page_t) pages->pageq.next;
-  if (pages)
-    free_contig_mem (pages);
+  /* Free unused memory.  */
+  {
+    unsigned long memaddr;
+
+    for (memaddr = round_page (memory_start);
+        memaddr < memory_end;
+        memaddr += PAGE_SIZE)
+      {
+       free_pages (memaddr, 0);
+      }
+  }

-  /*
-   * Initialize devices.
-   */
+  /* Initialize devices.  */
 #ifdef CONFIG_INET
   linux_net_emulation_init ();
 #endif
@@ -186,148 +176,6 @@
   linux_auto_config = 0;
 }

-#ifndef NBPW
-#define NBPW 32
-#endif
-
-/*
- * Allocate contiguous memory with the given constraints.
- * This routine is horribly inefficient but it is presently
- * only used during initialization so it's not that bad.
- */
-void *
-alloc_contig_mem (unsigned size, unsigned limit,
-                 unsigned mask, vm_page_t * pages)
-{
-  int i, j, bits_len;
-  unsigned *bits, len;
-  void *m;
-  vm_page_t p, page_list, tail, prev;
-  vm_offset_t addr, max_addr;
-
-  if (size == 0)
-    return (NULL);
-  size = round_page (size);
-  if ((size >> PAGE_SHIFT) > vm_page_free_count)
-    return (NULL);
-
-  /* Allocate bit array.  */
-  max_addr = phys_last_addr;
-  if (max_addr > limit)
-    max_addr = limit;
-  bits_len = ((((max_addr >> PAGE_SHIFT) + NBPW - 1) / NBPW)
-             * sizeof (unsigned));
-  bits = (unsigned *) kalloc (bits_len);
-  if (!bits)
-    return (NULL);
-  memset (bits, 0, bits_len);
-
-  /*
-   * Walk the page free list and set a bit for every usable page.
-   */
-  simple_lock (&vm_page_queue_free_lock);
-  p = vm_page_queue_free;
-  while (p)
-    {
-      if (p->phys_addr < limit)
-       (bits[(p->phys_addr >> PAGE_SHIFT) / NBPW]
-        |= 1 << ((p->phys_addr >> PAGE_SHIFT) % NBPW));
-      p = (vm_page_t) p->pageq.next;
-    }
-
-  /*
-   * Scan bit array for contiguous pages.
-   */
-  len = 0;
-  m = NULL;
-  for (i = 0; len < size && i < bits_len / sizeof (unsigned); i++)
-    for (j = 0; len < size && j < NBPW; j++)
-      if (!(bits[i] & (1 << j)))
-       {
-         len = 0;
-         m = NULL;
-       }
-      else
-       {
-         if (len == 0)
-           {
-             addr = ((vm_offset_t) (i * NBPW + j)
-                     << PAGE_SHIFT);
-             if ((addr & mask) == 0)
-               {
-                 len += PAGE_SIZE;
-                 m = (void *) addr;
-               }
-           }
-         else
-           len += PAGE_SIZE;
-       }
-
-  if (len != size)
-    {
-      simple_unlock (&vm_page_queue_free_lock);
-      kfree ((vm_offset_t) bits, bits_len);
-      return (NULL);
-    }
-
-  /*
-   * Remove pages from free list
-   * and construct list to return to caller.
-   */
-  page_list = NULL;
-  for (len = 0; len < size; len += PAGE_SIZE, addr += PAGE_SIZE)
-    {
-      prev = NULL;
-      for (p = vm_page_queue_free; p; p = (vm_page_t) p->pageq.next)
-       {
-         if (p->phys_addr == addr)
-           break;
-         prev = p;
-       }
-      if (!p)
-       panic ("alloc_contig_mem: page not on free list");
-      if (prev)
-       prev->pageq.next = p->pageq.next;
-      else
-       vm_page_queue_free = (vm_page_t) p->pageq.next;
-      p->free = FALSE;
-      p->pageq.next = NULL;
-      if (!page_list)
-       page_list = tail = p;
-      else
-       {
-         tail->pageq.next = (queue_entry_t) p;
-         tail = p;
-       }
-      vm_page_free_count--;
-    }
-
-  simple_unlock (&vm_page_queue_free_lock);
-  kfree ((vm_offset_t) bits, bits_len);
-  if (pages)
-    *pages = page_list;
-  return (m);
-}
-
-/*
- * Free memory allocated by alloc_contig_mem.
- */
-void
-free_contig_mem (vm_page_t pages)
-{
-  int i;
-  vm_page_t p;
-
-  for (p = pages, i = 0; p->pageq.next; p = (vm_page_t) p->pageq.next, i++)
-    p->free = TRUE;
-  p->free = TRUE;
-  simple_lock (&vm_page_queue_free_lock);
-  vm_page_free_count += i + 1;
-  p->pageq.next = (queue_entry_t) vm_page_queue_free;
-  vm_page_queue_free = pages;
-  simple_unlock (&vm_page_queue_free_lock);
-}
-
 /* This is the number of bits of precision for the loops_per_second.  Each
  * bit takes on average 1.5/HZ seconds.  This (like the original) is a little
  * better than 1%
diff -ru gnumach-vanilla/vm/pmap.h gnumach/vm/pmap.h
--- gnumach-vanilla/vm/pmap.h   2001-04-05 08:39:21.000000000 +0200
+++ gnumach/vm/pmap.h   2006-01-22 01:08:09.000000000 +0100
@@ -174,6 +174,15 @@
 /* Return modify bit */
 boolean_t pmap_is_modified(vm_offset_t pa);

+/*
+ *     Page Zones routines
+ */
+
+/* Physical address is in DMA capable zone.  */
+boolean_t pmap_is_dma (vm_offset_t pa);
+
+/* Physical address is in non-DMA capable zone.  */
+boolean_t pmap_is_normal (vm_offset_t pa);

 /*
  *     Statistics routines
diff -ru gnumach-vanilla/vm/vm_page.h gnumach/vm/vm_page.h
--- gnumach-vanilla/vm/vm_page.h        1999-06-28 02:41:02.000000000 +0200
+++ gnumach/vm/vm_page.h        2006-01-22 01:08:09.000000000 +0100
@@ -152,22 +152,26 @@
  *             ordered, in LRU-like fashion.
  */

+#define VM_PAGE_DMA 0x1
+
+extern
+queue_head_t   vm_page_queue_free_normal; /* normal memory free queue */
 extern
-vm_page_t      vm_page_queue_free;     /* memory free queue */
+queue_head_t   vm_page_queue_free_dma; /* DMA-capable memory free queue */
 extern
-vm_page_t      vm_page_queue_fictitious;       /* fictitious free queue */
+vm_page_t      vm_page_queue_fictitious; /* fictitious free queue */
 extern
-queue_head_t   vm_page_queue_active;   /* active memory queue */
+queue_head_t   vm_page_queue_active; /* active memory queue */
 extern
 queue_head_t   vm_page_queue_inactive; /* inactive memory queue */

 extern
-vm_offset_t    first_phys_addr;        /* physical address for first_page */
+vm_offset_t    phys_first_addr;/* physical address for first_page */
 extern
-vm_offset_t    last_phys_addr;         /* physical address for last_page */
+vm_offset_t    phys_last_addr; /* physical address for last_page */

 extern
-int    vm_page_free_count;     /* How many pages are free? */
+int    vm_page_free_count;     /* How many pages are free? */
 extern
 int    vm_page_fictitious_count;/* How many fictitious pages are free? */
 extern
@@ -220,11 +224,20 @@
 extern vm_page_t       vm_page_lookup(
        vm_object_t     object,
        vm_offset_t     offset);
+extern vm_page_t       vm_page_physaddr_lookup (vm_offset_t);
 extern vm_page_t       vm_page_grab_fictitious(void);
 extern void            vm_page_release_fictitious(vm_page_t);
 extern boolean_t       vm_page_convert(vm_page_t, boolean_t);
 extern void            vm_page_more_fictitious(void);
 extern vm_page_t       vm_page_grab(boolean_t);
+extern vm_page_t       vm_page_grab_flags(boolean_t, unsigned);
+extern kern_return_t   vm_page_grab_contiguous_pages_flags(
+       int             npages,
+       vm_offset_t     *phys_address,
+       boolean_t       external,
+       unsigned        flags,
+       unsigned long   align);
+
 extern void            vm_page_release(vm_page_t, boolean_t);
 extern void            vm_page_wait(void (*)(void));
 extern vm_page_t       vm_page_alloc(
diff -ru gnumach-vanilla/vm/vm_resident.c gnumach/vm/vm_resident.c
--- gnumach-vanilla/vm/vm_resident.c    1999-09-04 15:03:32.000000000 +0200
+++ gnumach/vm/vm_resident.c    2006-01-22 02:37:16.000000000 +0100
@@ -56,6 +56,9 @@
 #include <vm/vm_user.h>
 #endif

+extern unsigned long vm_page_normal_first, vm_page_normal_last;
+extern unsigned long vm_page_dma_first, vm_page_dma_last;
+
 /* in zalloc.c XXX */
 extern vm_offset_t     zdata;
 extern vm_size_t       zdata_size;
@@ -105,14 +108,19 @@
  *     Resident pages that represent real memory
  *     are allocated from a free list.
  */
-vm_page_t      vm_page_queue_free;
+queue_head_t   vm_page_queue_free_normal;
+queue_head_t   vm_page_queue_free_dma;
 vm_page_t      vm_page_queue_fictitious;
 decl_simple_lock_data(,vm_page_queue_free_lock)
+
 unsigned int   vm_page_free_wanted;
 int            vm_page_free_count;
 int            vm_page_fictitious_count;
 int            vm_page_external_count;

+natural_t      *vm_page_free_bitmap;
+unsigned long  vm_page_free_bitmap_bitsz;
+
 unsigned int   vm_page_free_count_minimum;     /* debugging */

 /*
@@ -174,6 +182,102 @@
 boolean_t vm_page_deactivate_hint = TRUE;

 /*
+ *     vm_page_free_bitmap_set and vm_page_free_bitmap_unset:
+ *     FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1)
+ *     page allocator.
+ *
+ *     Used to mark a page as free.
+ */
+
+#ifndef        NBBY
+#define        NBBY    8       /* size in bits of sizeof()`s unity */
+#endif
+#define        NBPEL   (sizeof(natural_t)*NBBY)
+
+void vm_page_free_bitmap_set(natural_t pageno)
+{
+       register int word_index, bit_index;
+
+       word_index = pageno / NBPEL;
+       bit_index = pageno - (word_index * NBPEL);      
+
+       vm_page_free_bitmap[word_index] |= 1 << bit_index;      
+}
+void vm_page_free_bitmap_unset(natural_t pageno)
+{
+       register int word_index, bit_index;
+       
+       word_index = pageno / NBPEL;
+       bit_index = pageno - (word_index * NBPEL);
+
+       vm_page_free_bitmap[word_index] &= ~(1 << bit_index);
+}
+
+/*
+ *     vm_page_free_bitmap_alloc:
+ *
+ *     Alloc space for bitmap at initializiation time.
+ *     FIXME: Free pages bitmap is SLOW! Make a decent multizone O(1)
+ *     page allocator.
+ */
+
+void vm_page_free_bitmap_alloc(void)
+{
+       register unsigned long size, bitsz;
+        unsigned long vm_page_big_pagenum;
+       int i;
+
+       vm_page_big_pagenum = atop (phys_last_addr);
+
+       bitsz = (vm_page_big_pagenum + NBPEL - 1)
+               & ~(NBPEL - 1);                         /* in bits */
+       
+       size = bitsz / NBBY;                            /* in bytes */
+
+       vm_page_free_bitmap = (natural_t *) pmap_steal_memory (size);
+       bzero(vm_page_free_bitmap, size);
+
+       vm_page_free_bitmap_bitsz = bitsz;
+}
+
+
+/*     Fast phys_addr to vm_page_t lookup.  */
+
+static vm_page_t *vm_page_array;
+static unsigned vm_page_array_size;
+
+static void
+vm_page_array_init ()
+{
+  int i;
+
+  vm_page_array_size = (phys_last_addr - phys_first_addr) >> PAGE_SHIFT;
+  vm_page_array =  (vm_page_t *) pmap_steal_memory (sizeof(vm_page_t)
+                                                   * (vm_page_array_size));
+
+  for (i = 0; i < vm_page_array_size; i++)
+    vm_page_array[i] = VM_PAGE_NULL;
+
+}
+
+static void
+vm_page_array_add (vm_offset_t phys_addr, vm_page_t pg)
+{
+  assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr);
+
+  vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT] = pg;
+}
+
+vm_page_t
+vm_page_physaddr_lookup (vm_offset_t phys_addr)
+{
+  assert (phys_addr < phys_last_addr && phys_addr >= phys_first_addr);
+
+  return vm_page_array [(phys_addr - phys_first_addr) >> PAGE_SHIFT];
+}
+
+
+/*
  *     vm_page_bootstrap:
  *
  *     Initializes the resident memory module.
@@ -229,7 +333,8 @@
        simple_lock_init(&vm_page_queue_free_lock);
        simple_lock_init(&vm_page_queue_lock);

-       vm_page_queue_free = VM_PAGE_NULL;
+       queue_init (&vm_page_queue_free_normal);
+       queue_init (&vm_page_queue_free_dma);
        vm_page_queue_fictitious = VM_PAGE_NULL;
        queue_init(&vm_page_queue_active);
        queue_init(&vm_page_queue_inactive);
@@ -279,6 +384,8 @@
                simple_lock_init(&bucket->lock);
        }

+       vm_page_free_bitmap_alloc();
+
        /*
         *      Machine-dependent code allocates the resident page table.
         *      It uses vm_page_init to initialize the page frames.
@@ -294,7 +401,6 @@
        *startp = virtual_space_start;
        *endp = virtual_space_end;

-       /*      printf("vm_page_bootstrap: %d free pages\n", 
vm_page_free_count);*/
        vm_page_free_count_minimum = vm_page_free_count;
 }

@@ -380,6 +486,8 @@

        pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);

+       vm_page_array_init ();
+
        /*
         *      Initialize the page frames.
         */
@@ -389,21 +497,12 @@
                        break;

                vm_page_init(&pages[i], paddr);
+               vm_page_array_add (paddr, &pages[i]);
+               vm_page_release(&pages[i], FALSE);
                pages_initialized++;
        }

        /*
-        * Release pages in reverse order so that physical pages
-        * initially get allocated in ascending addresses. This keeps
-        * the devices (which must address physical memory) happy if
-        * they require several consecutive pages.
-        */
-
-       for (i = pages_initialized; i > 0; i--) {
-               vm_page_release(&pages[i - 1], FALSE);
-       }
-
-       /*
         *      We have to re-align virtual_space_start,
         *      because pmap_steal_memory has been using it.
         */
@@ -421,7 +520,7 @@
  *             Second initialization pass, to be done after
  *             the basic VM system is ready.
  */
-void           vm_page_module_init(void)
+void vm_page_module_init(void)
 {
        vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
                             VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
@@ -453,6 +552,7 @@
                        panic("vm_page_create");

                vm_page_init(m, paddr);
+               vm_page_array_add (paddr, m);
                vm_page_release(m, FALSE);
        }
 }
@@ -840,16 +940,16 @@
 }

 /*
- *     vm_page_grab:
+ *     vm_page_grab_flags:
  *
- *     Remove a page from the free list.
+ *     Remove a page specifying the memory zone to get the page from.
  *     Returns VM_PAGE_NULL if the free list is too small.
  */
-
-vm_page_t vm_page_grab(
-       boolean_t external)
+vm_page_t vm_page_grab_flags(
+                            boolean_t external,
+                            unsigned flags)
 {
-       register vm_page_t      mem;
+       register vm_page_t mem = VM_PAGE_NULL;

        simple_lock(&vm_page_queue_free_lock);

@@ -867,17 +967,70 @@
                return VM_PAGE_NULL;
        }

-       if (vm_page_queue_free == VM_PAGE_NULL)
+       /*
+        * If we put no flag, we request any page, so we search in
+        * the normal zone before.
+        */
+
+       if (!(flags & VM_PAGE_DMA)
+           && !(queue_empty(&vm_page_queue_free_normal)))
+         {
+
+               if (--vm_page_free_count < vm_page_free_count_minimum)
+                       vm_page_free_count_minimum = vm_page_free_count;
+
+               if (external)
+                       vm_page_external_count++;
+
+               queue_remove_first (&vm_page_queue_free_normal,
+                                   mem, vm_page_t, pageq);
+
+               mem->free = FALSE;
+               mem->extcounted = mem->external = external;
+               mem->pageq.next = 0;
+               mem->pageq.prev = 0;
+
+               vm_page_free_bitmap_unset (atop (mem->phys_addr));
+       }
+
+       if (!mem)
+         {
+       
+           /*
+            * It is not necessarily a bug if we ask for a DMA page and we can't
+            * obtain it, despite of vm_page_free_count, since free pages can be
+            * in the normal zone.
+            */
+           if (queue_empty(&vm_page_queue_free_dma))
+           {
+             if (!(flags & VM_PAGE_DMA))
                panic("vm_page_grab");
+             else
+               {
+                 printf ("vm_page_grab: no dma anymore");
+                 simple_unlock(&vm_page_queue_free_lock);
+                 return VM_PAGE_NULL;
+               }
+           }
+       
+       
+           if (--vm_page_free_count < vm_page_free_count_minimum)
+             vm_page_free_count_minimum = vm_page_free_count;
+               
+           if (external)
+             vm_page_external_count++;
+
+           queue_remove_first (&vm_page_queue_free_dma,
+                               mem, vm_page_t, pageq);
+       
+           mem->free = FALSE;
+           mem->extcounted = mem->external = external;
+           mem->pageq.next = 0;
+           mem->pageq.prev = 0;
+       
+           vm_page_free_bitmap_unset (atop (mem->phys_addr));
+         }

-       if (--vm_page_free_count < vm_page_free_count_minimum)
-               vm_page_free_count_minimum = vm_page_free_count;
-       if (external)
-               vm_page_external_count++;
-       mem = vm_page_queue_free;
-       vm_page_queue_free = (vm_page_t) mem->pageq.next;
-       mem->free = FALSE;
-       mem->extcounted = mem->external = external;
        simple_unlock(&vm_page_queue_free_lock);

        /*
@@ -897,6 +1050,26 @@
                thread_wakeup((event_t) &vm_page_free_wanted);

        return mem;
+
+}
+                       
+
+/*
+ *     vm_page_grab:
+ *
+ *     Remove a page from the free list.
+ *     Returns VM_PAGE_NULL if the free list is too small.
+ */
+
+vm_page_t vm_page_grab(
+       boolean_t external)
+{
+       register vm_page_t      mem;
+
+       /* Get any free page, no matter what zone. */
+       mem = vm_page_grab_flags (external, 0);
+
+       return mem;
 }

 vm_offset_t vm_page_grab_phys_addr()
@@ -909,13 +1082,12 @@
 }

 /*
- *     vm_page_grab_contiguous_pages:
+ *     vm_page_grab_contiguous_pages_queue:
  *
- *     Take N pages off the free list, the pages should
- *     cover a contiguous range of physical addresses.
- *     [Used by device drivers to cope with DMA limitations]
+ *     Take N pages off the free list FREEQUEUE, the pages
+ *     should cover a contiguous range of physical addresses.
  *
- *     Returns the page descriptors in ascending order, or
+ *     Returns the first page descriptor, or
  *     Returns KERN_RESOURCE_SHORTAGE if it could not.
  */

@@ -924,44 +1096,32 @@
 vm_size_t      vm_page_big_pagenum = 0;        /* Set this before call! */

 kern_return_t
-vm_page_grab_contiguous_pages(
-       int             npages,
-       vm_page_t       pages[],
-       natural_t       *bits,
-       boolean_t       external)
+vm_page_grab_contiguous_pages_queue(
+                                   int         npages,
+                                   vm_offset_t *phys_addr,
+                                   boolean_t   external,
+                                   queue_t     freequeue,
+                                   unsigned long minbitidx,
+                                   unsigned long maxbitidx,
+                                   unsigned long align)
+
 {
        register int    first_set;
        int             size, alloc_size;
        kern_return_t   ret;
        vm_page_t       mem, prevmem;

-#ifndef        NBBY
-#define        NBBY    8       /* size in bits of sizeof()`s unity */
-#endif
+       if (!align)
+         align = 1;

-#define        NBPEL   (sizeof(natural_t)*NBBY)
+       if (minbitidx >= vm_page_free_bitmap_bitsz)
+               panic ("minbitidx too high.");

-       size = (vm_page_big_pagenum + NBPEL - 1)
-               & ~(NBPEL - 1);                         /* in bits */
-
-       size = size / NBBY;                             /* in bytes */
-
-       /*
-        * If we are called before the VM system is fully functional
-        * the invoker must provide us with the work space. [one bit
-        * per page starting at phys 0 and up to vm_page_big_pagenum]
-        */
-       if (bits == 0) {
-               alloc_size = round_page(size);
-               if (kmem_alloc_wired(kernel_map,
-                                    (vm_offset_t *)&bits,
-                                    alloc_size)
-                       != KERN_SUCCESS)
-                   return KERN_RESOURCE_SHORTAGE;
-       } else
-               alloc_size = 0;
-
-       bzero(bits, size);
+       if (maxbitidx > vm_page_free_bitmap_bitsz) {
+               printf ("%s: maxbitidx exceeds bitmap size (%x > %x).\n",
+                       __FUNCTION__, maxbitidx, vm_page_free_bitmap_bitsz);
+               maxbitidx = vm_page_free_bitmap_bitsz;
+       }

        /*
         * A very large granularity call, its rare so that is ok
@@ -972,32 +1132,16 @@
         *      Do not dip into the reserved pool.
         */

-       if ((vm_page_free_count < vm_page_free_reserved)
-           || (vm_page_external_count >= vm_page_external_limit)) {
+       if (((vm_page_free_count < vm_page_free_reserved)
+            || (external
+                && (vm_page_external_count > vm_page_external_limit)))
+           && !current_thread()->vm_privilege) {
                simple_unlock(&vm_page_queue_free_lock);
                return KERN_RESOURCE_SHORTAGE;
        }

        /*
-        *      First pass through, build a big bit-array of
-        *      the pages that are free.  It is not going to
-        *      be too large anyways, in 4k we can fit info
-        *      for 32k pages.
-        */
-       mem = vm_page_queue_free;
-       while (mem) {
-               register int word_index, bit_index;
-
-               bit_index = (mem->phys_addr >> PAGE_SHIFT);
-               word_index = bit_index / NBPEL;
-               bit_index = bit_index - (word_index * NBPEL);
-               bits[word_index] |= 1 << bit_index;
-
-               mem = (vm_page_t) mem->pageq.next;
-       }
-
-       /*
-        *      Second loop. Scan the bit array for NPAGES
+        *      First loop. Scan the bit array for NPAGES
         *      contiguous bits.  That gives us, if any,
         *      the range of pages we will be grabbing off
         *      the free list.
@@ -1007,9 +1151,13 @@

                first_set = 0;

-               for (i = 0; i < size; i += sizeof(natural_t)) {
+               for (i = (minbitidx/NBBY);
+                    i < (maxbitidx/NBBY);
+                    i += sizeof(natural_t))
+                 {

-                   register natural_t  v = bits[i / sizeof(natural_t)];
+                   register natural_t  v =
+                     vm_page_free_bitmap[i / sizeof(natural_t)];
                    register int        bitpos;

                    /*
@@ -1042,14 +1190,20 @@
                         */
                        bits_so_far = 0;
 count_zeroes:
-                       while ((bitpos < NBPEL) && ((v & 1) == 0)) {
+                       while ((bitpos < NBPEL) &&
+                              (((v & 1) == 0)
+                               || ((bitpos + i*NBBY) % align)))
+                       {
                            bitpos++;
                            v >>= 1;
                        }
-                       if (v & 1) {
+
+                       if ((v & 1)
+                           && (!((bitpos + i*NBBY) % align)))
+                         {
                            first_set = (i * NBBY) + bitpos;
                            goto count_ones;
-                       }
+                         }
                    }
                    /*
                     * No luck
@@ -1063,7 +1217,6 @@
         */
 not_found_em:
        simple_unlock(&vm_page_queue_free_lock);
-
        ret = KERN_RESOURCE_SHORTAGE;
        goto out;

@@ -1079,43 +1232,33 @@
                vm_page_free_count_minimum = vm_page_free_count;
        if (external)
                vm_page_external_count += npages;
+       
        {
-           register vm_offset_t        first_phys, last_phys;
-
-           /* cache values for compare */
-           first_phys = first_set << PAGE_SHIFT;
-           last_phys = first_phys + (npages << PAGE_SHIFT);/* not included */
-
-           /* running pointers */
-           mem = vm_page_queue_free;
-           prevmem = VM_PAGE_NULL;
-
-           while (mem) {
-
-               register vm_offset_t    addr;
-
-               addr = mem->phys_addr;
-
-               if ((addr >= first_phys) &&
-                   (addr <  last_phys)) {
-                   if (prevmem)
-                       prevmem->pageq.next = mem->pageq.next;
-                   pages[(addr - first_phys) >> PAGE_SHIFT] = mem;
-                   mem->free = FALSE;
-                   mem->extcounted = mem->external = external;
-                   /*
-                    * Got them all ?
-                    */
-                   if (--npages == 0) break;
-               } else
-                   prevmem = mem;
-
-               mem = (vm_page_t) mem->pageq.next;
+         vm_offset_t first_phys;
+         vm_page_t pg;
+         int i;
+       
+         first_phys = first_set << PAGE_SHIFT;
+       
+         if (phys_addr)
+           *phys_addr = first_phys;
+
+         for (i = 0; i < npages; i++)
+           {
+             pg = vm_page_physaddr_lookup (first_phys + (i << PAGE_SHIFT));
+
+             assert (pg != VM_PAGE_NULL);
+
+             queue_remove (freequeue, pg, vm_page_t, pageq);
+       
+             pg->free = FALSE;
+             pg->extcounted = pg->external = external;
+             vm_page_free_bitmap_unset (atop (pg->phys_addr));
            }
        }
-
+       
        simple_unlock(&vm_page_queue_free_lock);
-
+       
        /*
         *      Decide if we should poke the pageout daemon.
         *      We do this if the free count is less than the low
@@ -1134,8 +1277,74 @@

        ret = KERN_SUCCESS;
 out:
-       if (alloc_size)
-               kmem_free(kernel_map, (vm_offset_t) bits, alloc_size);
+
+       return ret;
+}
+
+/*
+ *     vm_page_grab_contiguous_pages_flags:
+ *
+ *     Take N pages from specified zone, the pages should
+ *     cover a contiguous range of physical addresses.
+ *     [Used by device drivers to cope with DMA limitations]
+ *
+ *     Returns the page descriptors in ascending order, or
+ *     Returns KERN_RESOURCE_SHORTAGE if it could not.
+ */
+
+kern_return_t
+vm_page_grab_contiguous_pages_flags(
+       int             npages,
+       vm_offset_t     *phys_addr,
+       boolean_t       external,
+       unsigned        flags,
+       unsigned long   align)
+{
+       kern_return_t    ret;
+
+       if (!(flags & VM_PAGE_DMA))
+         {
+           ret = vm_page_grab_contiguous_pages_queue (
+              npages, phys_addr, external,
+              &vm_page_queue_free_normal,
+              atop(vm_page_normal_first),
+              atop(vm_page_normal_last),
+              align);
+       
+           if (ret == KERN_SUCCESS)
+             return ret;
+         };
+       
+       ret = vm_page_grab_contiguous_pages_queue (
+           npages, phys_addr, external,
+          &vm_page_queue_free_dma,
+          atop(vm_page_dma_first),
+          atop(vm_page_dma_last),
+          align);
+       
+       return ret;
+}
+
+/*
+ *     vm_page_grab_contiguous_pages:
+ *
+ *     Take N pages off the free list, the pages should
+ *     cover a contiguous range of physical addresses.
+ *
+ *     Returns the page descriptors in ascending order, or
+ *     Returns KERN_RESOURCE_SHORTAGE if it could not.
+ *     [Used by device drivers to cope with DMA limitations]
+ */
+kern_return_t
+vm_page_grab_contiguous_pages(
+       int             npages,
+       queue_t         pages,
+       vm_offset_t     *phys_addr,
+       boolean_t       e)
+{
+       kern_return_t   ret;
+
+       ret = vm_page_grab_contiguous_pages_flags (npages, phys_addr, e, 0, 0);

        return ret;
 }
@@ -1150,16 +1359,36 @@
        register vm_page_t      mem,
        boolean_t external)
 {
+       queue_t freequeue;
+
+       if (pmap_is_dma (mem->phys_addr))
+               freequeue = &vm_page_queue_free_dma;
+       else if (pmap_is_normal (mem->phys_addr))
+               freequeue = &vm_page_queue_free_normal;
+       else {
+               /* XXX - Don't put a panic here. it's just for now. */
+               panic ("vm_page_release (unknown page zone)");
+       }
+
+       /* UGLY: We skip the page 0, since it may cause problems
+          when returned to drivers.  */
+       if (mem->phys_addr == 0)
+         return;
+
        simple_lock(&vm_page_queue_free_lock);
+
        if (mem->free)
                panic("vm_page_release");
        mem->free = TRUE;
-       mem->pageq.next = (queue_entry_t) vm_page_queue_free;
-       vm_page_queue_free = mem;
+
+       queue_enter (freequeue, mem, vm_page_t, pageq);
+
        vm_page_free_count++;
        if (external)
                vm_page_external_count--;

+       vm_page_free_bitmap_set (atop (mem->phys_addr));
+
        /*
         *      Check if we should wake up someone waiting for page.
         *      But don't bother waking them unless they can allocate.



--
It was a type of people I did not know, I found them very strange and
they did not inspire confidence at all. Later I learned that I had been
introduced to electronic engineers.
                                                  E. W. Dijkstra




reply via email to

[Prev in Thread] Current Thread [Next in Thread]