DragonFly kernel List (threaded) for 2004-01
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
[
Date Index][
Thread Index]
Re: malloc(M_NOWAIT) issues.
Here's what I have so far. This does not represent any major operational
changes yet, but I'm gearing up to some sort of solution for CAM and
for general interrupt-time allocations.
What this patch does is change the VM_ALLOC_* state into a flags set,
and augments the M_ malloc flags. It seems to do a fair job when I
drop hw.physmem to 64m and run buildworld -j 20. I'm trying to make the
flags more flexible to better cover the situations that come up.
My intention is to find a solution that takes advantage of the fact that
interrupt threads are threads. One advantage that DFly already has is
the fact that it should be possible to reuse pages from the 'cache' queue
for allocations made from interrupts simply by calling lwkt_yield()
if curthread->td_preempted is non-NULL. This would cause the
interrupt preemption to return to the original thread and then reschedule
as a non-preemptive thread. When non-preemptive it should be possible
for an interrupt to make use of additional memory resources or even to
block (if we are very careful, anyway).
-Matt
Index: dev/agp/agp.c
===================================================================
RCS file: /cvs/src/sys/dev/agp/agp.c,v
retrieving revision 1.8
diff -u -r1.8 agp.c
--- dev/agp/agp.c 9 Dec 2003 19:40:56 -0000 1.8
+++ dev/agp/agp.c 19 Jan 2004 03:03:15 -0000
@@ -523,7 +523,7 @@
* the pages will be allocated and zeroed.
*/
m = vm_page_grab(mem->am_obj, OFF_TO_IDX(i),
- VM_ALLOC_ZERO | VM_ALLOC_RETRY);
+ VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
if ((m->flags & PG_ZERO) == 0)
vm_page_zero_fill(m);
AGP_DPF("found page pa=%#x\n", VM_PAGE_TO_PHYS(m));
Index: dev/agp/agp_i810.c
===================================================================
RCS file: /cvs/src/sys/dev/agp/agp_i810.c,v
retrieving revision 1.4
diff -u -r1.4 agp_i810.c
--- dev/agp/agp_i810.c 9 Dec 2003 19:40:56 -0000 1.4
+++ dev/agp/agp_i810.c 19 Jan 2004 03:03:27 -0000
@@ -519,7 +519,8 @@
* get its physical address.
*/
vm_page_t m;
- m = vm_page_grab(mem->am_obj, 0, VM_ALLOC_ZERO|VM_ALLOC_RETRY);
+ m = vm_page_grab(mem->am_obj, 0,
+ VM_ALLOC_NORMAL|VM_ALLOC_ZERO|VM_ALLOC_RETRY);
if ((m->flags & PG_ZERO) == 0)
vm_page_zero_fill(m);
vm_page_wire(m);
Index: i386/i386/pmap.c
===================================================================
RCS file: /cvs/src/sys/i386/i386/pmap.c,v
retrieving revision 1.27
diff -u -r1.27 pmap.c
--- i386/i386/pmap.c 18 Jan 2004 12:29:47 -0000 1.27
+++ i386/i386/pmap.c 19 Jan 2004 03:42:40 -0000
@@ -1178,7 +1178,7 @@
* Find or fabricate a new pagetable page
*/
m = vm_page_grab(pmap->pm_pteobj, ptepindex,
- VM_ALLOC_ZERO | VM_ALLOC_RETRY);
+ VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
KASSERT(m->queue == PQ_NONE,
("_pmap_allocpte: %p->queue != PQ_NONE", m));
@@ -1377,8 +1377,9 @@
/*
* This index is bogus, but out of the way
*/
- nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
- if (!nkpg)
+ nkpg = vm_page_alloc(kptobj, nkpt,
+ VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT);
+ if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
nkpt++;
Index: kern/kern_conf.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_conf.c,v
retrieving revision 1.5
diff -u -r1.5 kern_conf.c
--- kern/kern_conf.c 9 Nov 2003 02:22:36 -0000 1.5
+++ kern/kern_conf.c 19 Jan 2004 03:35:00 -0000
@@ -118,7 +118,7 @@
}
if (stashed >= DEVT_STASH) {
MALLOC(si, struct specinfo *, sizeof(*si), M_DEVT,
- M_USE_RESERVE);
+ M_WAITOK|M_USE_RESERVE);
bzero(si, sizeof(*si));
} else if (LIST_FIRST(&dev_free)) {
si = LIST_FIRST(&dev_free);
Index: kern/kern_mpipe.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_mpipe.c,v
retrieving revision 1.1
diff -u -r1.1 kern_mpipe.c
--- kern/kern_mpipe.c 30 Nov 2003 20:13:54 -0000 1.1
+++ kern/kern_mpipe.c 19 Jan 2004 02:38:53 -0000
@@ -69,7 +69,7 @@
++mpipe->total_count;
mpipe_free(mpipe, buf);
while (--nnow > 0) {
- buf = malloc(bytes, mpipe->type, M_NOWAIT);
+ buf = malloc(bytes, mpipe->type, M_SYSNOWAIT);
if (buf == NULL)
break;
++mpipe->total_count;
@@ -98,7 +98,7 @@
}
/*
- * Allocate an entry. flags can be M_NOWAIT which tells us not to block.
+ * Allocate an entry. flags can be M_RNOWAIT which tells us not to block.
* Unlike a normal malloc, if we block in mpipe_alloc() no deadlock will occur
* because it will unblock the moment an existing in-use buffer is freed.
*/
@@ -116,7 +116,7 @@
return(buf);
}
--mpipe->total_count;
- } else if (flags & M_NOWAIT) {
+ } else if (flags & M_RNOWAIT) {
crit_exit();
return(NULL);
} else {
Index: kern/kern_slaballoc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_slaballoc.c,v
retrieving revision 1.14
diff -u -r1.14 kern_slaballoc.c
--- kern/kern_slaballoc.c 25 Oct 2003 00:48:03 -0000 1.14
+++ kern/kern_slaballoc.c 19 Jan 2004 03:30:35 -0000
@@ -190,7 +190,7 @@
ZonePageCount = ZoneSize / PAGE_SIZE;
npg = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / PAGE_SIZE;
- kmemusage = kmem_slab_alloc(npg * sizeof(struct kmemusage), PAGE_SIZE, M_ZERO);
+ kmemusage = kmem_slab_alloc(npg * sizeof(struct kmemusage), PAGE_SIZE, M_WAITOK|M_ZERO);
for (i = 0; i < arysize(weirdary); ++i)
weirdary[i] = WEIRD_ADDR;
@@ -332,9 +332,15 @@
* KMEM subsystem. A SLAB tracking descriptor must be specified, use
* &SlabMisc if you don't care.
*
- * M_NOWAIT - return NULL instead of blocking.
+ * M_RNOWAIT - return NULL instead of blocking.
* M_ZERO - zero the returned memory.
- * M_USE_RESERVE - allocate out of the system reserve if necessary
+ * M_USE_RESERVE - allow greater drawdown of the free list
+ * M_USE_INTERRUPT_RESERVE - allow the freelist to be exhausted
+ *
+ * M_FAILSAFE - Failsafe allocation, when the allocation must
+ * succeed attemp to get out of any preemption context
+ * and allocate from the cache, else block (even though
+ * we might be blocking from an interrupt), or panic.
*/
void *
malloc(unsigned long size, struct malloc_type *type, int flags)
@@ -372,7 +378,7 @@
ttl += type->ks_memuse[i];
type->ks_loosememuse = ttl;
if (ttl >= type->ks_limit) {
- if (flags & (M_NOWAIT|M_NULLOK))
+ if (flags & (M_RNOWAIT|M_NULLOK))
return(NULL);
panic("%s: malloc limit exceeded", type->ks_shortdesc);
}
@@ -393,7 +399,7 @@
* safely manipulate the kernel_map in free() due to free() possibly
* being called via an IPI message or from sensitive interrupt code.
*/
- while (slgd->NFreeZones > ZONE_RELS_THRESH && (flags & M_NOWAIT) == 0) {
+ while (slgd->NFreeZones > ZONE_RELS_THRESH && (flags & M_RNOWAIT) == 0) {
crit_enter();
if (slgd->NFreeZones > ZONE_RELS_THRESH) { /* crit sect race */
z = slgd->FreeZones;
@@ -406,7 +412,7 @@
/*
* XXX handle oversized frees that were queued from free().
*/
- while (slgd->FreeOvZones && (flags & M_NOWAIT) == 0) {
+ while (slgd->FreeOvZones && (flags & M_RNOWAIT) == 0) {
crit_enter();
if ((z = slgd->FreeOvZones) != NULL) {
KKASSERT(z->z_Magic == ZALLOC_OVSZ_MAGIC);
@@ -835,6 +841,11 @@
* but when we move zalloc() over to use this function as its backend
* we will have to switch to kreserve/krelease and call reserve(0)
* after the new space is made available.
+ *
+ * Interrupt code which has preempted other code is not allowed to
+ * message with CACHE pages, but if M_FAILSAFE is set we can do a
+ * yield to become non-preempting and try again inclusive of
+ * cache pages.
*/
static void *
kmem_slab_alloc(vm_size_t size, vm_offset_t align, int flags)
@@ -843,6 +854,8 @@
vm_offset_t addr;
vm_offset_t offset;
int count;
+ int wanted_reserve;
+ thread_t td;
vm_map_t map = kernel_map;
size = round_page(size);
@@ -856,10 +869,12 @@
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, align, &addr)) {
vm_map_unlock(map);
- if ((flags & (M_NOWAIT|M_NULLOK)) == 0)
+ if ((flags & (M_RNOWAIT|M_NULLOK)) == 0)
panic("kmem_slab_alloc(): kernel_map ran out of space!");
crit_exit();
vm_map_entry_release(count);
+ if ((flags & (M_FAILSAFE|M_NULLOK)) == M_FAILSAFE)
+ panic("kmem_slab_alloc(): kernel_map ran out of space!");
return(NULL);
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
@@ -868,26 +883,65 @@
kernel_object, offset, addr, addr + size,
VM_PROT_ALL, VM_PROT_ALL, 0);
+ td = curthread;
+ wanted_reserve = 0; /* non-zero = tried but unable to use system reserve */
+
/*
* Allocate the pages. Do not mess with the PG_ZERO flag yet.
*/
for (i = 0; i < size; i += PAGE_SIZE) {
vm_page_t m;
vm_pindex_t idx = OFF_TO_IDX(offset + i);
- int zero = (flags & M_ZERO) ? VM_ALLOC_ZERO : 0;
+ int vmflags = 0;
- if ((flags & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
- m = vm_page_alloc(kernel_object, idx, VM_ALLOC_INTERRUPT|zero);
- else
- m = vm_page_alloc(kernel_object, idx, VM_ALLOC_SYSTEM|zero);
+ if (flags & M_ZERO)
+ vmflags |= VM_ALLOC_ZERO;
+ if (flags & M_USE_RESERVE)
+ vmflags |= VM_ALLOC_SYSTEM;
+ if (flags & M_USE_INTERRUPT_RESERVE)
+ vmflags |= VM_ALLOC_INTERRUPT;
+ if ((flags & (M_RNOWAIT|M_WAITOK)) == 0)
+ printf("kmem_slab_alloc: bad flags %08x (%p)\n", flags, ((int **)&size)[-1]);
+ if (flags & (M_FAILSAFE|M_WAITOK)) {
+ if (td->td_preempted) {
+ wanted_reserve = 1;
+ } else {
+ vmflags |= VM_ALLOC_NORMAL;
+ wanted_reserve = 0;
+ }
+ }
+
+ m = vm_page_alloc(kernel_object, idx, vmflags);
+
+ /*
+ * If the allocation failed we either return NULL or we retry.
+ *
+ * If M_WAITOK or M_FAILSAFE is set we retry. Note that M_WAITOK
+ * (and M_FAILSAFE) can be specified from an interrupt. M_FAILSAFE
+ * generates a warning or a panic.
+ */
if (m == NULL) {
- if ((flags & M_NOWAIT) == 0) {
- vm_map_unlock(map);
- vm_wait();
- vm_map_lock(map);
+ if (flags & (M_FAILSAFE|M_WAITOK)) {
+ if (wanted_reserve) {
+ if (flags & M_FAILSAFE)
+ printf("malloc: no memory, try failsafe\n");
+ vm_map_unlock(map);
+ lwkt_yield();
+ vm_map_lock(map);
+ } else {
+ if (flags & M_FAILSAFE)
+ printf("malloc: no memory, block even tho we shouldn't\n");
+ vm_map_unlock(map);
+ vm_wait();
+ vm_map_lock(map);
+ }
i -= PAGE_SIZE; /* retry */
continue;
}
+
+ /*
+ * We were unable to recover, cleanup and return NULL
+ */
while (i != 0) {
i -= PAGE_SIZE;
m = vm_page_lookup(kernel_object, OFF_TO_IDX(offset + i));
@@ -902,6 +956,8 @@
}
/*
+ * Success!
+ *
* Mark the map entry as non-pageable using a routine that allows us to
* populate the underlying pages.
*/
Index: kern/kern_varsym.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_varsym.c,v
retrieving revision 1.3
diff -u -r1.3 kern_varsym.c
--- kern/kern_varsym.c 10 Nov 2003 23:58:57 -0000 1.3
+++ kern/kern_varsym.c 19 Jan 2004 03:29:01 -0000
@@ -128,7 +128,7 @@
if ((error = copyinstr(uap->name, name, sizeof(name), NULL)) != 0)
goto done2;
- buf = malloc(MAXVARSYM_DATA, M_TEMP, 0);
+ buf = malloc(MAXVARSYM_DATA, M_TEMP, M_WAITOK);
if (uap->data &&
(error = copyinstr(uap->data, buf, MAXVARSYM_DATA, NULL)) != 0)
{
@@ -377,8 +377,8 @@
error = E2BIG;
} else if (data) {
datalen = strlen(data);
- ve = malloc(sizeof(struct varsyment), M_VARSYM, M_ZERO);
- sym = malloc(sizeof(struct varsym) + namelen + datalen + 2, M_VARSYM, 0);
+ ve = malloc(sizeof(struct varsyment), M_VARSYM, M_WAITOK|M_ZERO);
+ sym = malloc(sizeof(struct varsym) + namelen + datalen + 2, M_VARSYM, M_WAITOK);
ve->ve_sym = sym;
sym->vs_refs = 1;
sym->vs_namelen = namelen;
@@ -417,7 +417,7 @@
{
struct varsyment *nve;
- nve = malloc(sizeof(struct varsyment), M_VARSYM, M_ZERO);
+ nve = malloc(sizeof(struct varsyment), M_VARSYM, M_WAITOK|M_ZERO);
nve->ve_sym = ve->ve_sym;
++nve->ve_sym->vs_refs;
TAILQ_INSERT_TAIL(&vss->vx_queue, nve, ve_entry);
Index: kern/vfs_bio.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.16
diff -u -r1.16 vfs_bio.c
--- kern/vfs_bio.c 3 Nov 2003 17:11:21 -0000 1.16
+++ kern/vfs_bio.c 19 Jan 2004 01:37:26 -0000
@@ -2503,7 +2503,7 @@
* with paging I/O, no matter which
* process we are.
*/
- m = vm_page_alloc(obj, pi, VM_ALLOC_SYSTEM);
+ m = vm_page_alloc(obj, pi, VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM);
if (m == NULL) {
VM_WAIT;
vm_pageout_deficit += desiredpages - bp->b_npages;
@@ -3157,7 +3157,7 @@
*/
p = vm_page_alloc(kernel_object,
((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
- VM_ALLOC_SYSTEM);
+ VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM);
if (!p) {
vm_pageout_deficit += (to - from) >> PAGE_SHIFT;
VM_WAIT;
Index: sys/malloc.h
===================================================================
RCS file: /cvs/src/sys/sys/malloc.h,v
retrieving revision 1.16
diff -u -r1.16 malloc.h
--- sys/malloc.h 30 Nov 2003 20:13:53 -0000 1.16
+++ sys/malloc.h 19 Jan 2004 02:25:10 -0000
@@ -59,12 +59,50 @@
/*
* flags to malloc.
*/
-#define M_NOWAIT 0x0001 /* do not block */
-#define M_WAITOK 0x0002 /* wait for resources */
+#define M_RNOWAIT 0x0001 /* do not block */
+#define M_WAITOK 0x0002 /* wait for resources / alloc from cache */
#define M_ZERO 0x0100 /* bzero() the allocation */
-#define M_USE_RESERVE 0x0200 /* can alloc out of reserve memory */
+#define M_USE_RESERVE 0x0200 /* can eat into free list reserve */
#define M_NULLOK 0x0400 /* ok to return NULL in M_WAITOK case */
#define M_PASSIVE_ZERO 0x0800 /* (internal to the slab code only) */
+#define M_USE_INTERRUPT_RESERVE \
+ 0x1000 /* can exhaust free list entirely */
+#define M_FAILSAFE 0x2000 /* failsafe allocation attempt */
+
+/*
+ * M_NOWAIT has to be a set of flags for equivalence to prior use.
+ *
+ * M_INTALLOC should be used for any critical infrastructure allocations
+ * made from interrupts.
+ *
+ * M_SYSALLOC should be used for any critical infrastructure allocations
+ * made by the kernel proper.
+ *
+ * NOTE ON DRAGONFLY USE OF M_NOWAIT. M_NOWAIT has traditionally been used
+ * when we did not wish to break spl protections or when we allocate memory
+ * from interrupts. For the spl protection case we intend to move all
+ * such allocations outside of the spl blocks.
+ *
+ * For the interrupt case the issue comes down to whether it is possible
+ * to allocate out of the VM page cache. Since interrupts are threads it
+ * is theoretically possible to allocate out of the VM page cache as long
+ * as we determine that we are not preempting another thread. This is a
+ * simple td->td_preempted check. In DFly we can also theoretically do
+ * an lwkt_yield() to force the interrupt thread to be rescheduled (so it
+ * is no longer preempting a thread) and then allocate out of the cache.
+ * This is what the M_FAILSAFE flag does in M_INTALLOC and this is why
+ * M_INTALLOC should be used in interrupt-related situations where the
+ * allocation must absolutely succeed for the health of the machine.
+ */
+
+#define M_INTNOWAIT (M_RNOWAIT|M_USE_RESERVE|M_USE_INTERRUPT_RESERVE)
+#define M_SYSNOWAIT (M_RNOWAIT|M_USE_RESERVE)
+#define M_INTWAIT (M_WAITOK|M_USE_RESERVE|M_USE_INTERRUPT_RESERVE)
+#define M_SYSWAIT (M_WAITOK|M_USE_RESERVE)
+
+#define M_NOWAIT M_INTNOWAIT
+#define M_INTALLOC (M_INTNOWAIT|M_FAILSAFE)
+#define M_SYSALLOC M_SYSWAIT
#define M_MAGIC 877983977 /* time when first defined :-) */
Index: vfs/nwfs/nwfs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/vfs/nwfs/nwfs_vfsops.c,v
retrieving revision 1.7
diff -u -r1.7 nwfs_vfsops.c
--- vfs/nwfs/nwfs_vfsops.c 7 Aug 2003 21:54:36 -0000 1.7
+++ vfs/nwfs/nwfs_vfsops.c 19 Jan 2004 03:35:32 -0000
@@ -184,7 +184,7 @@
ncp_conn_unlock(conn,td); /* we keep the ref */
mp->mnt_stat.f_iosize = conn->buffer_size;
/* We must malloc our own mount info */
- MALLOC(nmp,struct nwmount *,sizeof(struct nwmount),M_NWFSDATA,M_USE_RESERVE | M_ZERO);
+ MALLOC(nmp,struct nwmount *,sizeof(struct nwmount),M_NWFSDATA, M_WAITOK|M_USE_RESERVE|M_ZERO);
if (nmp == NULL) {
nwfs_printf("could not alloc nwmount\n");
error = ENOMEM;
Index: vfs/smbfs/smbfs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/vfs/smbfs/smbfs_vfsops.c,v
retrieving revision 1.7
diff -u -r1.7 smbfs_vfsops.c
--- vfs/smbfs/smbfs_vfsops.c 7 Aug 2003 21:54:36 -0000 1.7
+++ vfs/smbfs/smbfs_vfsops.c 19 Jan 2004 03:35:43 -0000
@@ -177,7 +177,7 @@
#ifdef SMBFS_USEZONE
smp = zalloc(smbfsmount_zone);
#else
- MALLOC(smp, struct smbmount*, sizeof(*smp), M_SMBFSDATA, M_USE_RESERVE);
+ MALLOC(smp, struct smbmount*, sizeof(*smp), M_SMBFSDATA, M_WAITOK|M_USE_RESERVE);
#endif
if (smp == NULL) {
printf("could not alloc smbmount\n");
Index: vm/vm_fault.c
===================================================================
RCS file: /cvs/src/sys/vm/vm_fault.c,v
retrieving revision 1.9
diff -u -r1.9 vm_fault.c
--- vm/vm_fault.c 3 Nov 2003 17:11:23 -0000 1.9
+++ vm/vm_fault.c 19 Jan 2004 01:37:26 -0000
@@ -363,7 +363,7 @@
fs.m = NULL;
if (!vm_page_count_severe()) {
fs.m = vm_page_alloc(fs.object, fs.pindex,
- (fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_ZERO);
+ (fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_NORMAL | VM_ALLOC_ZERO);
}
if (fs.m == NULL) {
unlock_and_deallocate(&fs);
Index: vm/vm_kern.c
===================================================================
RCS file: /cvs/src/sys/vm/vm_kern.c,v
retrieving revision 1.13
diff -u -r1.13 vm_kern.c
--- vm/vm_kern.c 14 Jan 2004 23:26:14 -0000 1.13
+++ vm/vm_kern.c 19 Jan 2004 03:14:51 -0000
@@ -204,7 +204,7 @@
vm_page_t mem;
mem = vm_page_grab(kernel_object, OFF_TO_IDX(offset + i),
- VM_ALLOC_ZERO | VM_ALLOC_RETRY);
+ VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
if ((mem->flags & PG_ZERO) == 0)
vm_page_zero_fill(mem);
mem->valid = VM_PAGE_BITS_ALL;
@@ -285,14 +285,11 @@
* kmem_alloc() because we may need to allocate memory at interrupt
* level where we cannot block (canwait == FALSE).
*
- * Note that this still only works in a uni-processor environment and
- * when called at splhigh().
- *
* We don't worry about expanding the map (adding entries) since entries
* for wired maps are statically allocated.
*
- * NOTE: This routine is not supposed to block if M_NOWAIT is set, but
- * I have not verified that it actually does not block.
+ * NOTE: Please see kmem_slab_alloc() for a better explanation of the
+ * M_* flags.
*/
vm_offset_t
kmem_malloc(vm_map_t map, vm_size_t size, int flags)
@@ -302,6 +299,8 @@
vm_offset_t addr;
vm_page_t m;
int count;
+ thread_t td;
+ int wanted_reserve;
if (map != kernel_map && map != mb_map)
panic("kmem_malloc: map != {kmem,mb}_map");
@@ -324,9 +323,13 @@
printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
return (0);
}
- if ((flags & M_NOWAIT) == 0)
- panic("kmem_malloc(%ld): kernel_map too small: %ld total allocated",
+ if ((flags & (M_RNOWAIT|M_NULLOK)) == 0 ||
+ (flags & (M_FAILSAFE|M_NULLOK)) == M_FAILSAFE
+ ) {
+ panic("kmem_malloc(%ld): kernel_map too small: "
+ "%ld total allocated",
(long)size, (long)map->size);
+ }
return (0);
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
@@ -335,32 +338,52 @@
kmem_object, offset, addr, addr + size,
VM_PROT_ALL, VM_PROT_ALL, 0);
+ td = curthread;
+ wanted_reserve = 0;
+
for (i = 0; i < size; i += PAGE_SIZE) {
- /*
- * Note: if M_NOWAIT specified alone, allocate from
- * interrupt-safe queues only (just the free list). If
- * M_USE_RESERVE is also specified, we can also
- * allocate from the cache. Neither of the latter two
- * flags may be specified from an interrupt since interrupts
- * are not allowed to mess with the cache queue.
- */
-retry:
- m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i),
- ((flags & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) ?
- VM_ALLOC_INTERRUPT :
- VM_ALLOC_SYSTEM);
+ int vmflags;
+
+ vmflags = VM_ALLOC_SYSTEM; /* XXX M_USE_RESERVE? */
+ if ((flags & (M_WAITOK|M_RNOWAIT)) == 0)
+ printf("kmem_malloc: bad flags %08x (%p)\n", flags, ((int **)&map)[-1]);
+ if (flags & M_USE_INTERRUPT_RESERVE)
+ vmflags |= VM_ALLOC_INTERRUPT;
+ if (flags & (M_FAILSAFE|M_WAITOK)) {
+ if (td->td_preempted) {
+ wanted_reserve = 1;
+ } else {
+ vmflags |= VM_ALLOC_NORMAL;
+ wanted_reserve = 0;
+ }
+ }
+
+ m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i), vmflags);
/*
* Ran out of space, free everything up and return. Don't need
* to lock page queues here as we know that the pages we got
* aren't on any queues.
+ *
+ * If M_WAITOK or M_FAILSAFE is set we can yield or block.
*/
if (m == NULL) {
- if ((flags & M_NOWAIT) == 0) {
- vm_map_unlock(map);
- VM_WAIT;
- vm_map_lock(map);
- goto retry;
+ if (flags & (M_FAILSAFE|M_WAITOK)) {
+ if (wanted_reserve) {
+ if (flags & M_FAILSAFE)
+ printf("kmem_malloc: no memory, try failsafe\n");
+ vm_map_unlock(map);
+ lwkt_yield();
+ vm_map_lock(map);
+ } else {
+ if (flags & M_FAILSAFE)
+ printf("kmem_malloc: no memory, block even though we shouldn't\n");
+ vm_map_unlock(map);
+ VM_WAIT;
+ vm_map_lock(map);
+ }
+ i -= PAGE_SIZE; /* retry */
+ continue;
}
/*
* Free the pages before removing the map entry.
Index: vm/vm_map.c
===================================================================
RCS file: /cvs/src/sys/vm/vm_map.c,v
retrieving revision 1.19
diff -u -r1.19 vm_map.c
--- vm/vm_map.c 18 Jan 2004 12:32:04 -0000 1.19
+++ vm/vm_map.c 19 Jan 2004 03:04:31 -0000
@@ -3513,11 +3513,11 @@
for (idx = 0; idx < robject->size; idx++) {
m_out = vm_page_grab(robject, idx,
- VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+ VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
if (m_out->valid == 0) {
m_in = vm_page_grab(object, bo_pindex + idx,
- VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
+ VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
if (m_in->valid == 0) {
rv = vm_pager_get_pages(object, &m_in, 1, 0);
if (rv != VM_PAGER_OK) {
Index: vm/vm_page.c
===================================================================
RCS file: /cvs/src/sys/vm/vm_page.c,v
retrieving revision 1.15
diff -u -r1.15 vm_page.c
--- vm/vm_page.c 3 Nov 2003 17:11:23 -0000 1.15
+++ vm/vm_page.c 19 Jan 2004 03:49:00 -0000
@@ -731,10 +731,10 @@
* with this VM object/offset pair.
*
* page_req classes:
- * VM_ALLOC_NORMAL normal process request
- * VM_ALLOC_SYSTEM system *really* needs a page
- * VM_ALLOC_INTERRUPT interrupt time request
- * VM_ALLOC_ZERO zero page
+ * VM_ALLOC_NORMAL allow use of cache pages, nominal free drain
+ * VM_ALLOC_SYSTEM greater free drain
+ * VM_ALLOC_INTERRUPT allow free list to be completely drained
+ * VM_ALLOC_ZERO advisory request for pre-zero'd page
*
* Object must be locked.
* This routine may not block.
@@ -752,62 +752,72 @@
KASSERT(!vm_page_lookup(object, pindex),
("vm_page_alloc: page already allocated"));
+ KKASSERT(page_req &
+ (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM));
/*
* The pager is allowed to eat deeper into the free page list.
*/
-
- if ((curthread == pagethread) && (page_req != VM_ALLOC_INTERRUPT)) {
- page_req = VM_ALLOC_SYSTEM;
- };
+ if (curthread == pagethread)
+ page_req |= VM_ALLOC_SYSTEM;
s = splvm();
-
loop:
- if (vmstats.v_free_count > vmstats.v_free_reserved) {
+ if (vmstats.v_free_count > vmstats.v_free_reserved ||
+ ((page_req & VM_ALLOC_INTERRUPT) && vmstats.v_free_count > 0) ||
+ ((page_req & VM_ALLOC_SYSTEM) && vmstats.v_cache_count == 0 &&
+ vmstats.v_free_count > vmstats.v_interrupt_free_min)
+ ) {
/*
- * Allocate from the free queue if there are plenty of pages
- * in it.
+ * The free queue has sufficient free pages to take one out.
*/
- if (page_req == VM_ALLOC_ZERO)
+ if (page_req & VM_ALLOC_ZERO)
m = vm_page_select_free(object, pindex, TRUE);
else
m = vm_page_select_free(object, pindex, FALSE);
- } else if (
- (page_req == VM_ALLOC_SYSTEM &&
- vmstats.v_cache_count == 0 &&
- vmstats.v_free_count > vmstats.v_interrupt_free_min) ||
- (page_req == VM_ALLOC_INTERRUPT && vmstats.v_free_count > 0)
- ) {
+ } else if (page_req & VM_ALLOC_NORMAL) {
+ /*
+ * Allocatable from the cache (non-interrupt only). On
+ * success, we must free the page and try again, thus
+ * ensuring that vmstats.v_*_free_min counters are replenished.
+ */
+#ifdef INVARIANTS
+ if (curthread->td_preempted) {
+ printf("vm_page_alloc(): warning, attempt to allocate"
+ " cache page from preempting interrupt\n");
+ m = NULL;
+ } else {
+ m = vm_page_select_cache(object, pindex);
+ }
+#else
+ m = vm_page_select_cache(object, pindex);
+#endif
/*
- * Interrupt or system, dig deeper into the free list.
+ * On succuess move the page into the free queue and loop.
*/
- m = vm_page_select_free(object, pindex, FALSE);
- } else if (page_req != VM_ALLOC_INTERRUPT) {
+ if (m != NULL) {
+ KASSERT(m->dirty == 0,
+ ("Found dirty cache page %p", m));
+ vm_page_busy(m);
+ vm_page_protect(m, VM_PROT_NONE);
+ vm_page_free(m);
+ goto loop;
+ }
+
/*
- * Allocatable from cache (non-interrupt only). On success,
- * we must free the page and try again, thus ensuring that
- * vmstats.v_*_free_min counters are replenished.
+ * On failure return NULL
*/
- m = vm_page_select_cache(object, pindex);
- if (m == NULL) {
- splx(s);
+ splx(s);
#if defined(DIAGNOSTIC)
- if (vmstats.v_cache_count > 0)
- printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", vmstats.v_cache_count);
+ if (vmstats.v_cache_count > 0)
+ printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", vmstats.v_cache_count);
#endif
- vm_pageout_deficit++;
- pagedaemon_wakeup();
- return (NULL);
- }
- KASSERT(m->dirty == 0, ("Found dirty cache page %p", m));
- vm_page_busy(m);
- vm_page_protect(m, VM_PROT_NONE);
- vm_page_free(m);
- goto loop;
+ vm_pageout_deficit++;
+ pagedaemon_wakeup();
+ return (NULL);
} else {
/*
- * Not allocatable from cache from interrupt, give up.
+ * No pages available, wakeup the pageout daemon and give up.
*/
splx(s);
vm_pageout_deficit++;
@@ -816,24 +826,18 @@
}
/*
- * At this point we had better have found a good page.
+ * Good page found.
*/
-
- KASSERT(
- m != NULL,
- ("vm_page_alloc(): missing page on free queue\n")
- );
+ KASSERT(m != NULL, ("vm_page_alloc(): missing page on free queue\n"));
/*
* Remove from free queue
*/
-
vm_page_unqueue_nowakeup(m);
/*
* Initialize structure. Only the PG_ZERO flag is inherited.
*/
-
if (m->flags & PG_ZERO) {
vm_page_zero_count--;
m->flags = PG_ZERO | PG_BUSY;
@@ -845,7 +849,8 @@
m->act_count = 0;
m->busy = 0;
m->valid = 0;
- KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m));
+ KASSERT(m->dirty == 0,
+ ("vm_page_alloc: free/cache page %p was dirty", m));
/*
* vm_page_insert() is safe prior to the splx(). Note also that
@@ -853,7 +858,6 @@
* could cause us to block allocating memory). We cannot block
* anywhere.
*/
-
vm_page_insert(m, object, pindex);
/*
@@ -864,7 +868,6 @@
pagedaemon_wakeup();
splx(s);
-
return (m);
}
@@ -1428,15 +1431,18 @@
* changing state. We keep on waiting, if the page continues
* to be in the object. If the page doesn't exist, allocate it.
*
+ * If VM_ALLOC_RETRY is specified VM_ALLOC_NORMAL must also be specified.
+ *
* This routine may block.
*/
vm_page_t
vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
{
-
vm_page_t m;
int s, generation;
+ KKASSERT(allocflags &
+ (VM_ALLOC_NORMAL|VM_ALLOC_INTERRUPT|VM_ALLOC_SYSTEM));
retrylookup:
if ((m = vm_page_lookup(object, pindex)) != NULL) {
if (m->busy || (m->flags & PG_BUSY)) {
Index: vm/vm_page.h
===================================================================
RCS file: /cvs/src/sys/vm/vm_page.h,v
retrieving revision 1.8
diff -u -r1.8 vm_page.h
--- vm/vm_page.h 3 Nov 2003 17:11:23 -0000 1.8
+++ vm/vm_page.h 19 Jan 2004 03:47:09 -0000
@@ -389,11 +389,19 @@
#define VM_PAGE_BITS_ALL 0xffff
#endif
-#define VM_ALLOC_NORMAL 0
-#define VM_ALLOC_INTERRUPT 1
-#define VM_ALLOC_SYSTEM 2
-#define VM_ALLOC_ZERO 3
-#define VM_ALLOC_RETRY 0x80
+/*
+ * Note: the code will always use nominally free pages from the free list
+ * before trying other flag-specified sources.
+ *
+ * At least one of VM_ALLOC_NORMAL|VM_ALLOC_SYSTEM|VM_ALLOC_INTERRUPT
+ * must be specified. VM_ALLOC_RETRY may only be specified if VM_ALLOC_NORMAL
+ * is also specified.
+ */
+#define VM_ALLOC_NORMAL 0x01 /* ok to use cache pages */
+#define VM_ALLOC_SYSTEM 0x02 /* ok to exhaust most of free list */
+#define VM_ALLOC_INTERRUPT 0x04 /* ok to exhaust entire free list */
+#define VM_ALLOC_ZERO 0x08 /* req pre-zero'd memory if avail */
+#define VM_ALLOC_RETRY 0x80 /* indefinite block (vm_page_grab()) */
void vm_page_unhold(vm_page_t mem);
Index: vm/vm_zone.c
===================================================================
RCS file: /cvs/src/sys/vm/vm_zone.c,v
retrieving revision 1.12
diff -u -r1.12 vm_zone.c
--- vm/vm_zone.c 14 Jan 2004 23:26:14 -0000 1.12
+++ vm/vm_zone.c 19 Jan 2004 01:37:26 -0000
@@ -190,10 +190,10 @@
z->zobj = obj;
_vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj);
}
- z->zallocflag = VM_ALLOC_INTERRUPT;
+ z->zallocflag = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT;
z->zmax += nentries;
} else {
- z->zallocflag = VM_ALLOC_SYSTEM;
+ z->zallocflag = VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM;
z->zmax = 0;
}
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
[
Date Index][
Thread Index]