From 4f72c0cc7401d0e5fe78d9914943245a070ecc4e Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 29 Jan 2019 23:03:50 +0000 Subject: [PATCH] buddy: Throw out the current stuff in favour of a small tight model Not yet tested --- Kernel/buddy.c | 539 ++++++++++++++----------------------------------- 1 file changed, 146 insertions(+), 393 deletions(-) diff --git a/Kernel/buddy.c b/Kernel/buddy.c index 50f67547..13942765 100644 --- a/Kernel/buddy.c +++ b/Kernel/buddy.c @@ -1,451 +1,204 @@ /* - * Trivial left packing buddy allocator favouring memory efficiency - * Intended for 68K systems using the simple MMU concept. The only - * real oddities here are that we left pack to try and keep big - * chunks free and that we know the owner of any leaf block which - * allows our swapper to evict the right people during a brk() grow or - * in theory (with compiler hooks added) on a stack extend. + * A simple buddy allocator. Set BUDDY_SIZE to the number of blocks you + * may need to handle and BUDDY_NODESHIFT to the shift from block numbers + * to blocks. buddy_ram is a uint8_t pointer to the memory pool base. * - * BUDDY_NUMLEVEL: levels of allocator - * BUDDY_BLOCKBITS: bits in the block size (eg 12 for 4K) - * BUDDY_BASE: base address of the managed block - * BUDDY_START: first usable address in managed range - * BUDDY_TOP: top address of usable memory for buddy - * BUDDY_TABLE: the buddy table, usually set to BUDDY_START to take it - * from the memory pool. - * BUDDY_TREESIZE: size of allocation for the buddy tree + * All buddy data is kept outside of the allocator and the data does not + * therefore need to be power of two aligned (but can be). * - * buddy_level: offsets for each level in tree + * The caller can use buddy_pin() to pin down blocks that are not available + * eg to align the allocator on powers of two boundaries * - * These can all be defined to variables if needed + * The main trick here for simplicity is to number the nodes from one not + * zero and to keep them together. That means you can move up/down by + * shifting the node number (left side on down) and you can find the + * pair by xoring the node number with 1. * - * The smallest size is level 0, that makes things simpler - * for the rest of the logic. - * - * The simple MMU needs BUDDY_BASE to be power of two aligned, but - * not BUDDY_START (although that is preferable). The code itself - * just needs CPU appropriate alignment. - * - * The simple MMU works as follows. All user space addresses are - * translated according to the rule - * - * pa = (va & mask_reg) ^ base_reg; - * fault = (va & ~mask_reg) != 0 && (va & ~mask_reg) != ~mask_reg; - * - * Processes are set up with stack at va FFFFFFFF and base at 00000000 - * on a suitably power of 2 aligned address allocated from the buddy - * for the size needed. Mask is set to size - 1 and base is set to the - * base of the block. When extending a larger block is allocated - * (possibly extended), and the code is copied to the bottom of the new - * block if we extended down, and the stack to the new top if we extended - * up. The base and mask are then updated so the process doesn't see the - * fact it's been physically stretched. - * - * The table consumes an effective 2 bytes per block. So for 8MB of - * RAM in 2K blocks we need one block. In practise for most machines - * we can simply assume we lose one block and set the tables accordingly - * rather than dynamically. Given 2K is actually undersized for blocks - * on most systems this is doubly true. - * - * NOT YET DEBUGGED DO NOT USE!!!! + * We keep an owner because we need to support eviction of blocks for + * swapping. */ #include #include #include -#include - -#ifdef CONFIG_BUDDY_MMU -#define BUDDY_FREE 0xFF +#ifdef CONFIG_BUDDY_MALLOC -extern uint16_t buddy_level[BUDDY_NUMLEVEL]; +static uint8_t buddy[2 * BUDDY_SIZE]; /* Node 0 is not used */ +static uint16_t inuse, total; -/* Ok this is crap FIXME but will do for now */ -static int buddy_get_level(usize_t size) -{ - int l = 0; - size >>= BUDDY_BLOCKBITS; - while(size) { - size >>= 1; - l++; - } - return l; -} - -/* Turn a block number at a given level into a memory address for the - map bit and a bit number */ -static uint8_t *map_buddy(uint32_t n, int level) -{ - n >>= level + BUDDY_BLOCKBITS; /* Index into level */ - /* Add in the precomputed tree level base */ - return BUDDY_TABLE + buddy_level[level] + n; -} +#define NODE_FREE 0xFF +#define NODE_UNAVAIL 0x00 -/* Mark the required bytes as 0xFF (free) or n (busy). Handle the propogation - of busyness into the higher tree levels */ -static void buddy_markv(uint32_t n, int level, uint8_t owner) +/* Pin a bottom level node */ +void buddy_pin(uint16_t node, uint8_t owner) { - uint8_t *p; + /* Optional debug - check first entry is NODE_FREE */ do { - p = map_buddy(n, level); - /* FIXME: check for freeing free, owning owned */ - *p = owner; - - /* If we are freeing then a non free partner means our parent remains busy, - if we are allocating then it means our parent is already busy. Either - way we can stop ! - - This looks so much better in B than C, but the compiler will generate - nice asm for it anyhow */ - p = (uint8_t *) (((size_t) p) ^ 1); - /* FIXME: This logic below is wrong for free/free merging */ - } while (level++ < BUDDY_NUMLEVEL && *p != BUDDY_FREE); - /* All free or all full */ -} - -void buddy_free(uint8_t * p, int level) -{ - uint32_t n = p - BUDDY_BASE; - buddy_markv(n, level, BUDDY_FREE); -} - -/* Turn a map entry into an offset relative to the buddy base */ -static uint32_t map_to_ptr(int i, int level) -{ - return BUDDY_BLOCKSIZE << level; -} - -/* Could be optimised a bit by walking down the tree levels rather than - scanning across ? However remember that in most of our use cases the - largest allocation is going to be a small number of 4K or 8K pages */ -uint8_t *buddy_alloc(int level, uint8_t owner) -{ - unsigned int i; - uint32_t n; - - /* The level we will scan for a free byte */ - uint16_t level_size = 1 << (BUDDY_NUMLEVEL - level); - uint8_t *p = BUDDY_TABLE + buddy_level[level]; - - for (i = 0; i < level_size; i++) { - if (*p++ == BUDDY_FREE) { - n = map_to_ptr(i, level); - level_size = 1 << level; - /* Mark ourself busy at all the levels */ - for (i = 0; i < level_size; i++) { - buddy_markv(n, 0, owner); - n += BUDDY_BLOCKSIZE; - } - return n + BUDDY_BASE; - } - } - /* Out of memory */ - return NULL; -} - -/* - * Call the evictor with the owner id of each occupied block - * that is needed to make the pair of our buddy block free. - */ -int buddy_evict(uint8_t *p, int level, int (*evict) (uint8_t owner)) -{ - /* Find the map for our partner at this level, and free it up */ - uint8_t *m = map_buddy((p - BUDDY_BASE) ^ - (BUDDY_BLOCKSIZE << level), 0); - int i = 1 << level; - int r; - uint8_t last = BUDDY_FREE; - uint8_t d; - - while (i--) { - d = *m++; - if (d != BUDDY_FREE) { - if (d != last && (r = evict(d))) - return r; - last = d; + buddy[node] = owner; + node /= 2; + } while(node && buddy[node] == NODE_FREE); +} + +/* Unpin a bottom level node */ +static void buddy_unpin(uint16_t node) +{ + uint16_t o; + + /* Our node becomes free */ + buddy[node] = NODE_FREE; + /* Walk to the top of the tree freeing the parent if the other + child is also free. As we go remember the pair owner */ + while(node > 1 && (o = buddy[node^1]) == NODE_FREE) + buddy[node /= 2] = NODE_FREE; + /* If we stopped part way then set the owner of the nodes above us + to the pair so that we propogate useful ownership info up. + Optional */ + while(node > 1) + buddy[node /= 2] = o; +} + +/* Free a block and propogate both free and ownership information updates */ +static uint8_t buddy_free(uint16_t node, uint8_t sizeshift) +{ + uint16_t n = 1 << sizeshift; /* How many blocks to free */ + uint16_t i; + + node <<= depth; /* Turn our node into the leftmost base node of + the level we need */ + /* Unpin all our blocks from the bottom up */ + for (i = 0; i < n; i++) + buddy_upin(node++); + inuse -= n; +} + +/* Allocate a block using the buddy allocator if space is available */ +static int16_t buddy_alloc(uint8_t sizeshift, uint8_t owner) +{ + /* There are n slots at offset n: isn't that neat */ + + uint16_t ct = n; + uint16_t node = n; + uint16_t n = BUDDY_SIZE >> sizeshift; + + while(ct--) { + if (buddy[node] == NODE_FREE) { + /* Found a free node at our level. Turn it to base + * */ + node <<= sizeshift; + for (ct = 0; ct < n; ct++) + pin(node + ct, owner); + inuse += n; + return node; } + node++; } - - return 0; -} - -/* Given a pointer return the base address of the block at this level. Our - memory management layer needs this so that it can work out where the - new bigger MMU base is relative to the current allocation when we evict - the pair with buddy_evict */ -uint8_t *buddy_base(uint8_t * p, int level) -{ - size_t mask = (BUDDY_BLOCKSIZE << level) - 1; - /* Do the maths so we don't assume power of two alignment of base. With - luck gcc will figure out the optimisation for the simple case 8) */ - size_t n = p - BUDDY_BASE; - return BUDDY_BASE + (n & ~mask); -} - -/* Given a size return the needed order */ -uint8_t buddy_order(size_t s) -{ - uint8_t level = 0; - /* This all comes out as constant maths on s */ - s = (s + BUDDY_BLOCKSIZE - 1) >> BUDDY_BLOCKBITS; - /* Simple bit count - we could optimise if needed ! */ - while (s >>= 1) - level++; - if (level >= BUDDY_NUMLEVEL) - return 0xFF; - return level; + return -1; } -/* -- Can go in discard along with any platform computation code for - doing dynamic setup -- */ - -void buddy_init(void) +#if 0 // for swap +uint8_t buddy_evict(uint16_t base, uint8_t sizeshift) { - uint8_t *p = BUDDY_START; - - if (BUDDY_START == BUDDY_TABLE) - p += BUDDY_TREESIZE; - - /* Mark every block as currently in use, and minimum size. Must be picked - so that the mapping layers fall on byte boundaries */ - memset(BUDDY_START, 0x00, BUDDY_TREESIZE); - - /* Free all the blocks that are truely free - saves us - having code to set it all up nicely */ - while (p < BUDDY_TOP) { - buddy_free(p, 0); - p += BUDDY_BLOCKSIZE; + uint16_t node = base; + uint8_t n = BUDDY_SIZE >> sizeshift; + for (ct = 0; ct < n; ct++) + if (!can_evict(node++)) + return EBUSY; } -} - - -/******************* MMU Logic *************************/ -int do_pagemap_alloc(ptptr p, usize_t size) -{ - return 0; - /* FIXME */ - uint8_t *m; - uint8_t pn = p - ptab; - uint8_t level = buddy_order(size); - if (level == 0xFF) - return ENOMEM; - while (1) { - m = buddy_alloc(level, pn); - if (m) { - /* Indicate the actual size handed back as we must - put the stack at the top */ - membase[pn] = m; - p->p_top = (BUDDY_BLOCKSIZE << level) - 1; - p->p_page = level; - program_mmu(m, p->p_top); - return 0; - } -/* FIXME - if (swapneeded(p, 1) == NULL) - return ENOMEM; */ + node = base; + for (ct = 0; ct < n; ct++) + evict(node); + buddy_unpin(node++); } } +#endif -int pagemap_alloc(ptptr p) -{ - return do_pagemap_alloc(p, p->p_top); -} - -/* Reallocate for an exec or brk */ -int pagemap_realloc(usize_t size) -{ - uint8_t pn = udata.u_ptab - ptab; - /* We can do this better - on a shrink we can free some of our buddy - pages */ - /* FIXME: check if space before we do these .. */ - buddy_free(membase[pn], buddy_get_level(size)); - return do_pagemap_alloc(udata.u_ptab, size); -} - -void pagemap_free(ptptr p) -{ - /* FIXME TODO */ -} +/* We work entirely in nodes but the conversions are simple */ -unsigned long pagemap_mem_used(void) +static uint8_t *buddy_node2addr(uint16_t node) { - /* TODO */ - return 0; + return buddy_ram + node << BUDDY_NODESHIFT; } -static uint8_t grow_pn; - -/* Kick out everyone unlucky enough to be in the way */ -static int pagemap_evict(uint8_t owner) +static uint8_t buddy_addr2node(uint16_t *p) { - if (owner != grow_pn) - return swapout(ptab + owner); - return 0; + size_t n = (p - buddy_ram) >> BUDDY_NODESHIFT; } -int pagemap_grow(usize_t size) +/* There are faster ways to do this using ffs on some processors or and + * masking to binary search. + * + * Note: gives nonsense result for a 0 size request + */ +static uint8_t buddy_sizetoshift(uint32_t n) { - uint8_t level = buddy_get_level(size); - uint8_t *m, *n, *me, *ne; - ptptr p; - - if (level == 0xFF) - return ENOMEM; - /* We don't give back for the moment */ - if (level <= udata.u_ptab->p_page) - return 0; - - p = udata.u_ptab; - grow_pn = p - ptab; + uint8_t ct = 0; - m = membase[grow_pn]; - n = buddy_alloc(level, grow_pn); - if (n) - buddy_free(m, p->p_page); - else { - /* Have to do an eviction */ - /* Get the base of the bigger block we will expand into */ - n = buddy_base(m, level); - /* Make room */ - if (buddy_evict(n, level, pagemap_evict)) - return ENOMEM; - /* FIXME: do we need to check enough room before we do this ! */ - buddy_free(m, p->p_page); - n = buddy_alloc(level, grow_pn); - if (n == NULL) - panic("insfram"); + n--; + n >>= BUDDY_NODESHIFT; + while(n) { + n >>= 1; + ct++; } - membase[grow_pn] = n; - p->p_top = (1 << level) - 1; - /* Now copy the code/data/bss/break area if it moved */ - if (m != n) - memcpy(n, m, udata.u_break); - /* Point to the byte after the end of memory */ - me = m + (BUDDY_BLOCKSIZE << p->p_page); - ne = n + (BUDDY_BLOCKSIZE << level); - /* The stack pointer is FFF.... so effectively negative, and we can - do our maths from the tail by using this */ - /* SECURITY FIXME: someone somewhere between entry and here needs to - spot out of range user stack pointers and kill the process */ - /* Copy the stack if it moved - need to copy a bit more FIXME */ - if (me != ne) - memcpy(ne + udata.u_syscall_sp, me + udata.u_syscall_sp, -udata.u_syscall_sp); - /* Zero the hole in the middle */ - memzero(n + udata.u_break, - (1 << level) - udata.u_break + udata.u_syscall_sp); - p->p_page = level; - program_mmu(m, p->p_top); - return 0; -} - - - -/* Swappers to do yet - simple swapmap stuff won't work - we need an - actual allocator for swap - should have a general purpose flat physical - swap for bigger boxes */ - -void swapin(ptptr p, uint16_t map) -{ -} - -int swapout(ptptr p) -{ - return ENOMEM; + return ct; } -/* FIXME: THESE NEED WRITING PROPERLY FOR THE MMU */ -usize_t _uget(const uint8_t *user, uint8_t *dest, usize_t count) +void buddy_init(uint16_t start, uint16_t end) { -// uint8_t tmp; -// while(count--) { -// tmp = *user++; -// *dest++ = tmp; -// } - return 0; -} - -int16_t _ugetc(const uint8_t *user) -{ - uint8_t tmp; -// tmp = *user; - return tmp; -} - -uint16_t _ugetw(const uint16_t *user) -{ - uint16_t tmp; -// tmp = *user; - return tmp; -} - -uint32_t ugetl(void *user, int *err) -{ - uint32_t tmp; -// tmp = *(uint32_t *)user; - return tmp; -} - -int _ugets(const uint8_t *user, uint8_t *dest, usize_t count) -{ -// uint8_t tmp; -// while(count--) { -// tmp = *user++; -// *dest++ = tmp; -// if (tmp == '\0') -// return 0; -// } -// /* Ensure terminated */ -// dest[-1] = '\0'; - return -1; + /* It starts with everything owned by 0 */ + while (start < end) { + total++; + buddy_upin(start); + } + inuse = 0; } -int _uput(const uint8_t *source, uint8_t *user, usize_t count) -{ -// uint8_t tmp; -// while(count--) { -// tmp = *source++; -// *user++ = tmp; -// } - return 0; - -} +/* Supply the standard memory allocator layer so that we can plug this into the + banking functions just like malloc.c */ -int _uputc(uint16_t value, uint8_t *user) +unsigned long kmemavail(void) { -// *user = value; - return 0; + return (total - inuse) << BUDDY_NODESHIFT; } -int _uputw(uint16_t value, uint16_t *user) +unsigned long kmemused(void) { -// *user = value; - return 0; + return inuse << BUDDY_NODESHIFT; } -int uputl(uint32_t value, void *user) +void kfree_s(void *p, size_t size) { -// *(uint32_t *)user = value; - return 0; + buddy_free(buddy_addr2node(p), buddy_sizetoshift(size)); } -int _uzero(uint8_t *user, usize_t count) +void *kmalloc(size_t size, uint8_t owner) { -// while(count--) -// *user++=0; - return 0; + /* We will need to pass owners around eventually to both allocators */ + int16_t node = buddy_alloc(buddy_sizetoshift(size), owner); + if (node < 0) + return NULL; + return buddy_node2addr(p); } -arg_t _memalloc(void) +#if 0 +/* For now use default flat one */ +usize_t valaddr(const char *pp, usize_t lw) { - udata.u_error = ENOSYS; - return -1; -} + uint16_t node = buddy_addr2node(pp); + uint16_t base = pp - buddy_node2addr(node); + int32_t l = lw; -arg_t _memfree(void) -{ - udata.u_error = ENOSYS; - return -1; + while(l > 0) { + if (buddy[node] != udata.u_page) { + if (lw == l) + udata.u_error = EFAULT; + return lw - l; + l -= (1 << BUDDY_NODESHIFT) - base; + base = 0; + } + return lw; } #endif +#endif + -- 2.34.1