From bdad47c25aa516c3d372052f43a3c46a6d03a16a Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 2 Oct 2015 20:37:53 +0100 Subject: [PATCH] graphics: Have a rethink Basically replace all the operations with a single tight kernel operation which can be used to construct them all with reasonable efficiency, and is good at certain useful things. Push all the alignment considerations onto the user code so that everyone doesn't have the time and size penalties for it when they don't care. Redo the NC100 to follow this implementation. trs80 needs redoing yet but it will take the trs80 hires graphics from unbearable to slow 8) --- Kernel/README.GRAPHICS | 113 ++++++++++++++------ Kernel/include/graphics.h | 68 +++++------- Kernel/platform-nc100/devgfx.c | 39 ++++--- Kernel/platform-nc100/devgfx.h | 6 +- Kernel/platform-nc100/nc100.s | 184 +++++++-------------------------- 5 files changed, 163 insertions(+), 247 deletions(-) diff --git a/Kernel/README.GRAPHICS b/Kernel/README.GRAPHICS index 77ed30b5..b47138d7 100644 --- a/Kernel/README.GRAPHICS +++ b/Kernel/README.GRAPHICS @@ -52,28 +52,6 @@ GFX_PALETTE_SET The colour table is settable Memory holds the amount of RAM for hardware where this also matters (VDP primarily ?, otherwise 0) -Commands are again organised to try and minimise size for small devices - -GFX_BLTAL_CG Rectangular blit or byte aligned blocks from CPU to - the screen -GFX_BLTAL_GC The reverse -GFX_SETPIXEL Plot a list of pixels -GFX_HLINE Horizontal line (on many devices even for software - this can be done with optimised methods) -GFX_VLINE Similar for verticals -GFX_LINE Arbitrary line from A to B, for accelerator hardware -GFX_BLT_GG Arbitrary screen to screen memory copy. Mainly for - things like VDP2 -GFX_BLT_CG As the aligned blit but any alignment -GFX_BLT_GC Ditto in reverse -GFX_RECT Solid rectangle (or patterned if supported) -GFX_RAW Direct command streams (eg VDP). Device specific -GFX_RAWCOPY For non mapped non accelerated screens, each command - describes a block copy - -MODE_XOR XOR drawing mode is supported -MODE_PATTERN 8x8 Pattern fill is allowed - There are things that need covering if they make sense - Sprite discovery, description, setup and movement - Blit with stencil @@ -85,22 +63,91 @@ a few minimal operations and most ioctls can be completely ignored but we still can support smarter devices while having a direct route for accelerators for those cases they are needed (eg games) -HLINE is worth having because in most cases that is faster if done by -masking the ends and just setting/clearing/cpl the midstream bytes +Need to decide on the best approach for text and fonts -VLINE is a repeated operation while adding scanline of bytes +Rather than using graphcis operations like lines and rectangles we replace +all the graphics operations proposed with game style sprite rectangles -RECT covers both and also the useful to optimise case of narrow rectangles -where its a 1 or 2 byte wide drawing done akin to vline optimisations +We then need exactly two graphics rendering commands (three if we do blits). +In addition the lines are easily verified and clipped before drawing. -The BLTAL blit operations try and keep the logic simple and the code fast -for the faster paths. It's sufficient for a lot of stuff including 8 pixel -aligned font work, bitmaps, and double buffering work. +GFX_DRAW y x (byte aligned) buffer len (w and h is implied) -Need to decide on the best approach for text and fonts + Where buffer holds + + [Repeats.b|0 = line end] [and] [or] * n + + 0 0 = end of draw (use 1 FF 00 for a skipped line) + + Each drawing operations is performed as + + *screen &= and; + *screen ^= or; + + Which can be done very fast on most processors + + Logic table + + and xor + 0 0 clear bit + 0 1 set bit + 1 0 keep bit + 1 1 invert bit + +l0: + lda ,x + beq nextline +l1: + ldb 1,x + andb ,u + eorb 2,x + stb u+ + deca + bne l1 + leax 3,x + bra l0 + +l0: xor a + ld b, (hl) + cp b + jr z, nextline + inc hl + ld c, (hl) + inc hl +l1: + ld a, (de) + and c + xor (hl) + ld (de),a + inc de + djnz l1 + inc hl + jr l0 + + +GFX_GET y x (byte aligned) h w buffer + copy up to 512 bytes from video to userspace + +The only icky case then is the matter of the machines with head-up-backside +colour attributes (eg the ZX Spectrum). That could probably be handled by +having a GFX_ADRAW to do attribute draws or somesuch + +If we go this way then + +1. Do we guarantee clipping versus the display or versus end of video ram + or just error bad video + +2. Do we allow a repeat to cross a scan line (probably not - its a PITA) + +3. Do we allow mask of repeats for planar video + +4. If we are clipping against the display do we just allow configurable clip + rectangles given it's near enough the same logic ? + +5. What is the best way to arrange the operations so we don't end up using + bounce buffers if avoidable ? -Some hand optimised standard asm implementations of the rect/vline/hline and -blits would also be worth having. +6. Do we still allow for screen/screen blits as well (eg scrolling) diff --git a/Kernel/include/graphics.h b/Kernel/include/graphics.h index 71515d4c..301fa8de 100644 --- a/Kernel/include/graphics.h +++ b/Kernel/include/graphics.h @@ -29,21 +29,17 @@ struct display { #define GFX_PALETTE_SET 64 /* Has settable colour palette */ uint16_t memory; /* Memory size in KB (may be 0 if not relevant) */ uint16_t commands; -#define GFX_BLTAL_CG 1 /* Aligned blit CPU to graphics */ -#define GFX_BLTAL_GC 2 /* And the reverse */ -#define GFX_SETPIXEL 4 /* Driver supports set/clear pixel */ -#define GFX_HLINE 8 /* Horizontal line */ -#define GFX_VLINE 16 /* Vertical line */ -#define GFX_LINE 32 /* Arbitrary line */ -#define GFX_BLT_GG 64 /* Screen to screen blit */ -#define GFX_BLT_CG 128 /* Unaligned blits */ -#define GFX_BLT_GC 256 -#define GFX_RECT 512 /* Rectangles */ -#define GFX_RAW 1024 /* Raw command streams */ -#define GFX_RAWCOPY 2048 /* Raw command stream is copier format */ - uint16_t drawmodes; -#define MODE_XOR 1 /* XOR as well as set/clr */ -#define MODE_PATTERN 2 /* 8x8 pattern */ +#define GFX_DRAW 1 /* Supports the draw command */ +#define GFX_RAW 2 /* Raw command streams to the GPU */ +#define GFX_ADRAW 4 /* Supports draw attributes */ +#define GFX_CLIP 8 /* Supports clipping */ +#define GFX_BLIT 16 /* Supports screen to screen blits */ +#define GFX_READ 32 /* Supports reading back a buffer */ +#define GFX_AREAD 64 /* Supports reading back an attribute buffer */ +#define GFX_PDRAW 128 /* Supports planar draw (draw buffer with a + leading plane mask) indicating which planes + to run the command on */ + /* We may want to add some hardware ones as we hit machines that have them */ }; /* FIXME: need a way to describe/set modes if multiple supported */ @@ -53,17 +49,6 @@ struct palette { uint8_t r,g,b; }; -/* Do not fiddle with this struct idly - it has asm users */ -struct attribute { - uint8_t ink, paper; - uint8_t mode; -#define GFX_OP_COPY 0 -#define GFX_OP_SET 1 -#define GFX_OP_CLEAR 2 -#define GFX_OP_XOR 3 - uint8_t flags; -}; - /* Returned from a successful GFXIOC_MAP */ struct videomap { uaddr_t mmio; /* Memory mapped register base */ @@ -83,9 +68,6 @@ struct videomap { mode would imply */ }; - -#define GFX_BUFLEN 64 /* Default buffer length (128 byte) */ - #define GFXIOC_GETINFO 0x0300 /* Query display info for this tty */ #define GFXIOC_ENABLE 0x0301 /* Enter graphics mode */ #define GFXIOC_DISABLE 0x0302 /* Exit graphics mode */ @@ -93,20 +75,16 @@ struct videomap { #define GFXIOC_SETPALETTE 0x0304 /* Set a palette entry */ #define GFXIOC_MAP 0x0305 /* Map into process if supported */ #define GFXIOC_UNMAP 0x0306 /* Unmap from process */ -#define GFXIOC_SETATTR 0x0307 /* Set the drawing attributes */ -#define GFXIOC_SETPIXEL 0x0308 /* Set a pixel */ -#define GFXIOC_HLINE 0x0309 /* Horizontal line */ -#define GFXIOC_VLINE 0x030A /* Vertical line */ -#define GFXIOC_RECT 0x030B /* Draw rectangle */ -#define GFXIOC_BLTAL_CG 0x030C /* Blit functions */ -#define GFXIOC_BLTAL_GC 0x030D -#define GFXIOC_BLTCG 0x030E -#define GFXIOC_BLTGC 0x030F -#define GFXIOC_BLTGG 0x0310 -#define GFXIOC_CMD 0x0311 /* Raw command stream for a VDP */ -#define GFXIOC_PAN 0x0312 /* Panning */ -#define GFXIOC_WAITVB 0x0313 /* Wait for vblank */ -#define GFXIOC_GETPIXEL 0x0314 /* Read a pixel */ -#define GFXIOC_GETMODE 0x0315 /* Get info on a mode */ -#define GFXIOC_SETMODE 0x0315 /* Set video mode */ +#define GFXIOC_DRAW 0x0307 /* Draw a buffer */ +#define GFXIOC_RAW 0x0308 /* GPU direct buffer */ +#define GFXIOC_ADRAW 0x0309 /* Draw an attribute buffer */ +#define GFXIOC_CLIP 0x030A /* Set clip rectangle */ +#define GFXIOC_BLIT 0x030B /* Screen to screen blit */ +#define GFXIOC_READ 0x030C /* Read back screen */ +#define GFXIOC_AREAD 0x030D /* Read back attributes */ +#define GFXIOC_PDRAW 0x030E /* Planar draw */ +#define GFXIOC_PAN 0x030F /* Panning */ +#define GFXIOC_WAITVB 0x0310 /* Wait for vblank */ +#define GFXIOC_GETMODE 0x0311 /* Get info on a mode */ +#define GFXIOC_SETMODE 0x0312 /* Set video mode */ #endif diff --git a/Kernel/platform-nc100/devgfx.c b/Kernel/platform-nc100/devgfx.c index 41bd351c..a2bfc186 100644 --- a/Kernel/platform-nc100/devgfx.c +++ b/Kernel/platform-nc100/devgfx.c @@ -17,8 +17,7 @@ static struct display ncdisplay = { HW_UNACCEL, 0, 0, - GFX_SETPIXEL|GFX_RAW|GFX_RAWCOPY, - 0 + GFX_DRAW }; #else static struct display ncdisplay = { @@ -29,34 +28,40 @@ static struct display ncdisplay = { HW_UNACCEL, 0, 0, - GFX_SETPIXEL|GFX_RAW|GFX_RAWCOPY, - 0 + GFX_DRAW }; #endif -extern uint16_t video_op[GFX_BUFLEN]; - -extern struct attribute video_attr; /* Shared with asm code */ - int gfx_ioctl(uint8_t minor, uarg_t arg, char *ptr) { + uint8_t *tmp; + uint16_t l; if (arg >> 8 != 0x03) return vt_ioctl(minor, arg, ptr); if (arg == GFXIOC_GETINFO) return uput(&ncdisplay, ptr, sizeof(ncdisplay)); - if (arg == GFXIOC_SETATTR) - return uget(&video_attr, ptr, sizeof(video_attr)); - if (uget(&video_op, ptr, sizeof(video_op))) - return -1; switch(arg) { - case GFXIOC_SETPIXEL: - video_setpixel(); + case GFXIOC_DRAW: + /* Note: we assume we will not map the screen over the buffers */ + tmp = (uint8_t *)tmpbuf(); + l = ugetw(ptr); + if (l < 2 || l > 512) + goto bad; + if (uget(tmp, ptr + 2, l)) + goto bad2; + /* TODO + if (draw_validate(ptr, l, 480, 64)) + goto bad; */ + video_cmd(tmp); + brelse((bufptr) tmp); return 0; - case GFXIOC_CMD: - video_cmd(); - return 0; default: udata.u_error = EINVAL; return -1; } +bad: + udata.u_error = EINVAL; +bad2: + brelse((bufptr) tmp); + return -1; } diff --git a/Kernel/platform-nc100/devgfx.h b/Kernel/platform-nc100/devgfx.h index f495778c..e6f4e24f 100644 --- a/Kernel/platform-nc100/devgfx.h +++ b/Kernel/platform-nc100/devgfx.h @@ -4,10 +4,6 @@ #include extern int gfx_ioctl(uint8_t minor, uarg_t arg, char *ptr); -extern struct attribute video_attr; -extern uint16_t video_op[GFX_BUFLEN]; - -extern void video_setpixel(void); -extern void video_cmd(void); +extern void video_cmd(uint8_t *ptr); #endif diff --git a/Kernel/platform-nc100/nc100.s b/Kernel/platform-nc100/nc100.s index 0c59b8f4..39d085a2 100644 --- a/Kernel/platform-nc100/nc100.s +++ b/Kernel/platform-nc100/nc100.s @@ -31,10 +31,7 @@ .globl _font4x6 .globl _vtinit .globl platform_interrupt_all - .globl _video_setpixel .globl _video_cmd - .globl _video_attr - .globl _video_op ; exported debugging tools .globl _trap_monitor @@ -255,7 +252,7 @@ outcharw: ret ; -; Disk helper +; Disk helper - FIXME should be IRQ safe nowdays ; _rd_memcpy: push ix ld ix, #0 @@ -562,159 +559,52 @@ _cursor_off: ld de, (_cursorpos) jr cursor_do + ; -; Entered with HL pointing to the co-ordinates -; Returns with HL pointing to the byte address of the pixel -; A holding the pixel mask -; C holding the bit number -; DE holding the screen byte address +; For NC100 (NC200 needs doing) ; -coords: - ld a, (hl) ; low bits of X - and #7 ; pixel - ld c, a - ld a, (hl) - ld de, #0 ; clear D - and keep D at zero - rra - rra - rra ; A is the byte offset - inc hl - bit 0, (hl) - jr z, xonleft - set 5,a ; right hand side -xonleft: - inc hl - ld a, (hl) ; y low (no y high needed) 0-63 or 0-127 */ - add a ; x 2 - ld l, a - ld h, d ; zero - add hl, hl ; x 4 - add hl, hl ; x 8 - add hl, hl ; x 16 - add hl, hl ; x 32 - add hl, hl ; x 64 lines into bytes offset - add hl, de ; pixel in this byte - ex de, hl - ld hl, #setpixel_bittab - ld b, d ; zero - add hl, bc - ld a, (hl) ; our pixel mask - ex de, hl - ret - -setpixel_optab: - nop ; - or (hl) ; COPY - nop - or (hl) ; SET - cpl ; complement pixel mask - and (hl) ; CLEAR by anding with mask - nop - xor (hl) ; INVERT -setpixel_bittab: - .db 128,64,32,16,8,4,2,1 - -_video_setpixel: +; video_cmd(uint8_t *buf) +; +_video_cmd: + pop de + pop hl + push hl + push de in a, (0x11) push af - ld a, #0x43 ; Map the display - out (0x11), a - call video_setpixel - pop af - out (0x11), a - ret - -video_setpixel: - ld a, (_video_attr + 2) ; mode - or a ; copy ? - jr nz, setpixel_notdraw - ld a, (_video_attr) ; ink - or a ; white ? - jr nz, setpixel_notdraw ; a = 1 = set so good - ld a, #2 ; clear -setpixel_notdraw: - ld e, a - ld d, #0 - ld hl, #setpixel_optab - add hl, de - ld a, (hl) - ld (setpixel_opcode), a ; Self modifying + ld e,(hl) inc hl - ld a, (hl) - ld (setpixel_opcode+1), a ; Self modifying - ld bc, (_video_op) ; B is the count - ld a, b - and #0x1f ; max 31 pixels per op - ret z - push bc - ld hl, #_video_op + 2 ; co-ordinate pairs -setpixel_loop: - push hl - call coords -setpixel_opcode: - nop ; nop or cpl - ld a, (hl) ; screen - ld (hl), a ; store back to display - pop hl + ld d,(hl) + call addr_de ; turn DE into screen address (HL is kept) +nextline: + push de +nextop: + xor a + ld b, (hl) + cp b + jr z, endline inc hl + ld c,(hl) inc hl +oploop: + ld a,(de) + and c + xor (hl) + ld (de), a + inc de + djnz oploop inc hl + jr nextop +endline: pop de + ex de,hl + ld bc, #64 + add hl, bc + ex de, hl inc hl - pop bc - djnz setpixel_loop - ret - -; -; Need different logic for NC200 ? -; -_video_cmd: - in a, (0x11) - push af - ld a, #0x43 ; Map the display - out (0x11), a - call video_cmd + xor a + cp (hl) ; 0 0 = end (for blank lines just do 01 ff 00) + jr nz, nextline pop af out (0x11), a ret -video_cmd: - ld hl, #_video_op - ld e, (hl) ; offset - inc hl - ld a, (hl) - cp #0x10 - ret nc ; over end - ld d, a - inc hl - ld a, (hl) ; count - cp #124 ; 2 bytes offset, 2 bytes length, 124 data - ret nc - ld c, a - inc hl - ld a, (hl) - or a - ret nz ; too big - ld b, a - push hl - ld l, e - ld h, d - add hl, bc ; end offset - ld a, h - cp #0x10 - jr c, cmd_over ; doesn't fit - ld hl, #VIDEO_BASE - add hl, de ; offset - ex de, hl - pop hl ; input buffer - ldir - ret -cmd_over: pop hl - ret - -; -; Needed here so they don't vanish when we map the screen -; -_video_op: - .ds 128 -_video_attr: - .ds 4 -- 2.34.1