From c1262367937ad277110fbc9f83fcb004401512c6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 21 Feb 2018 13:32:49 +0000 Subject: [PATCH] devio: First cut at putting into place sleeping I/O support This allows the locking frameworks needed to let block devices sleep, but not to do I/O from interrupts. The latter is much more complicated and it's not clear that is useful except on bigger systems which need a different buffer cache anyway --- Kernel/devio.c | 148 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 117 insertions(+), 31 deletions(-) diff --git a/Kernel/devio.c b/Kernel/devio.c index ad741da6..629607c5 100644 --- a/Kernel/devio.c +++ b/Kernel/devio.c @@ -35,38 +35,72 @@ read() wants to read an unallocated block of a file. Bufsync() write outs all dirty blocks. Note that a pointer to a buffer structure is the same as a pointer to -the data. This is very important. +the data if the buffer is inline. This is very important. + +FIXME: need to add locking to this for the sleeping case, and a hash for +the bigger systems **********************************************************************/ -uint16_t bufclock; /* Time-stamp counter for LRU */ +static uint16_t bufclock; /* Time-stamp counter for LRU */ + +#define bisbusy(x) ((x)->bf_busy == BF_BUSY) + +#ifndef CONFIG_BLOCK_SLEEP +#define block(x) ((x)->bf_busy = BF_BUSY) +#define bunlock(x) ((x)->bf_busy = BF_FREE) +#define bcheck(x) bisbusy(x) +#define block_s(x) +#define bunlock_s(x) +#else + +static void block(bufptr bp) +{ + while (bp->bf_busy == BF_BUSY) + psleep_nosig(bp); + bp->bf_busy = BF_BUSY; +} + +static void bunlock(bufptr bp) +{ + if (bp->bf_busy == BF_FREE) + panic(BFREEFREE); + bp->bf_busy = BF_FREE; + pwake(bp); +} + +#define block_s(x) block(x) +#define bunlock_s(x) bunlock(x) +#define bcheck(x) 0 + +#endif +/* + * Make an entry in the buffer cache and fill it. If rewrite is + * set then we are not keeping any of the old data but overwriting + * it all. + * + * Hands back either a locked buffer, or NULL on an error. + */ bufptr bread(uint16_t dev, blkno_t blk, bool rewrite) { regptr bufptr bp; - if ((bp = bfind(dev, blk)) != NULL) { - if (bp->bf_busy == BF_BUSY) - panic(PANIC_WANTBSYB); - else if (bp->bf_busy == BF_FREE) - bp->bf_busy = BF_BUSY; - } else { + if ((bp = bfind(dev, blk)) == NULL) { bp = freebuf(); bp->bf_dev = dev; bp->bf_blk = blk; - bp->bf_busy = BF_BUSY; /* If rewrite is set, we are about to write over the entire block, so we don't need the previous contents */ if (!rewrite) { if (bdread(bp) != BLKSIZE) { udata.u_error = EIO; - bp->bf_busy = BF_FREE; bp->bf_dev = NO_DEVICE; + bunlock(bp); return (NULL); } } } - bp->bf_time = ++bufclock; /* Time stamp it */ return bp; } @@ -83,29 +117,37 @@ void bawrite(bufptr bp) bfree(bp, 1); } - +/* + * Release an entry in the buffer cache. Passed a locked buffer and + * a dirty status + * + * 0: Caller did not dirty buffer (but may be dirty already) + * 1: Caller did dirty buffer + * 2: Caller dirtied buffer and wants it written back now + * + * If a writeback now is requested an an error occurs then u_error will + * be set and -1 returned. + */ int bfree(regptr bufptr bp, uint8_t dirty) { /* dirty: 0=clean, 1=dirty (write back), 2=dirty+immediate write */ + int ret = 0; if (dirty) bp->bf_dirty = true; - bp->bf_busy = BF_FREE; - if (dirty > 1) { /* immediate writeback */ if (bdwrite(bp) != BLKSIZE) { udata.u_error = EIO; - return -1; + ret = -1; } bp->bf_dirty = false; - return 0; } - return 0; + bunlock(bp); + return ret; } - -/* This returns a busy block not belonging to any device, with - * garbage contents. It is essentially a malloc for the kernel. - * Free it with tmpfree. +/* + * Allocate a buffer for scratch use by the kernel. This buffer can then + * be freed with tmpfree. * * API note: Nothing guarantees a connection between a bufcache entry * and tmpbuf in future. Always free with tmpfree. @@ -116,13 +158,16 @@ void *tmpbuf(void) bp = freebuf(); bp->bf_dev = NO_DEVICE; - bp->bf_busy = BF_BUSY; bp->bf_time = ++bufclock; /* Time stamp it */ return bp->__bf_data; } -/* Allocate an empty _disk cache_ buffer. This won't be able to use tmpbuf - on platforms where we split disk and temporary buffers */ +/* + * Allocate an empty _disk cache_ buffer. We use this when dealing with file + * holes. It would be nice if this API could go way and readi just use uzero() + * + * This won't be able to use tmpbuf if we split disk and temporary buffers. + */ void *zerobuf(void) { void *b = tmpbuf(); @@ -131,14 +176,31 @@ void *zerobuf(void) return b; } +/* + * Write back a buffer doing the locking outselves. This is called when + * we do a sync or when we get a media change and need to write back + * data. + * + * FIXME: for the simple case I don't think we can ever get called within + * an active I/O so the block/bunlock should be fine - but not needed. In + * async mode they are + */ static void bdput(regptr bufptr bp) { - bdwrite(bp); - if (bp->bf_busy == BF_FREE) + block_s(bp); + if (bp->bf_dirty) { + bdwrite(bp); bp->bf_dirty = false; - d_flush(bp->bf_dev); + bunlock_s(bp); + d_flush(bp->bf_dev); + } else + bunlock_s(bp); } +/* + * The low level logic for sync(). We write back each dirty buffer that + * belongs to a device. + */ void bufsync(void) { regptr bufptr bp; @@ -146,29 +208,52 @@ void bufsync(void) /* FIXME: this can generate a lot of d_flush calls when you have plenty of buffers */ for (bp = bufpool; bp < bufpool_end; ++bp) { - if ((bp->bf_dev != NO_DEVICE) && bp->bf_dirty) + if (bp->bf_dev != NO_DEVICE) bdput(bp); } } +/* + * Find a matching buffer in the block pool. As we have few buffers + * we do a simple linear search. The buffer we return is locked so + * that it can't vanish under the caller when we do sleeping block + * devices. + */ bufptr bfind(uint16_t dev, blkno_t blk) { bufptr bp; for (bp = bufpool; bp < bufpool_end; ++bp) { - if (bp->bf_dev == dev && bp->bf_blk == blk) + if (bp->bf_dev == dev && bp->bf_blk == blk) { + /* FIXME: this check is only relevant for non sync stuff + if it's sleeping then this is fine as we'll block here + and sleep until the buffer is unlocked */ + if (bcheck(bp)) + panic(PANIC_WANTBSYB); + block(bp); return bp; + } } return NULL; } +/* + * Handle umount or media change where we need to discard any old + * read buffers. + * + * FIXME: If we want to support mediachange notifications then + * we'll need a way to call this that reports errors rather than + * trying to write back each block. We'll also need to pass in a mask + * for partitioned devices. Maybe the media change case has to be + * irq safe ? + */ void bdrop(uint16_t dev) { regptr bufptr bp; for (bp = bufpool; bp < bufpool_end; ++bp) { if (bp->bf_dev == dev) { - bdput(bp); + bdput(bp); bp->bf_dev = NO_DEVICE; } } @@ -184,7 +269,7 @@ bufptr freebuf(void) oldest = NULL; oldtime = 0; for (bp = bufpool; bp < bufpool_end; ++bp) { - if (bufclock - bp->bf_time >= oldtime && bp->bf_busy == BF_FREE) { + if (bufclock - bp->bf_time >= oldtime && !bisbusy(bp)) { oldest = bp; oldtime = bufclock - bp->bf_time; } @@ -192,6 +277,7 @@ bufptr freebuf(void) if (!oldest) panic(PANIC_NOFREEB); + block(oldest); if (oldest->bf_dirty) { if (bdwrite(oldest) == -1) udata.u_error = EIO; -- 2.34.1