From b1f2a92295adab48d2177a3402616d30ed644c08 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Sat, 20 Oct 2018 19:22:11 +0100 Subject: [PATCH] fweep: comment parser ready to think about fixing it up --- Applications/games/fweep.c | 69 +++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/Applications/games/fweep.c b/Applications/games/fweep.c index 3a42ac93..f38d6881 100644 --- a/Applications/games/fweep.c +++ b/Applications/games/fweep.c @@ -940,7 +940,11 @@ input_again: return 13; } -/* FIXME: stop using uint64_t */ +/* + * Fetch 4 or 6 bytes of data from the given dictionary address. + * At the moment we return it as a single big value not an array + * FIXME. + */ dword_t dictionary_get(uint16_t addr) { dword_t v = 0; @@ -950,6 +954,16 @@ dword_t dictionary_get(uint16_t addr) return v; } +/* + * Take each byte of input and check it against the 78 zscii symbols + * for this game. Output a sequence of words holding 15 bit zscii + * with the final word being padded and having the top bit set + * + * Non zscii symbols are encoded in their extended zscii form. In + * all cases we must clip arbitrarily on the right number of symbols + * even if mid symbol + */ + uint64_t dictionary_encode(uint8_t * text, int len) { /* FIXME: stop using uint64_t */ @@ -961,31 +975,47 @@ uint64_t dictionary_encode(uint8_t * text, int len) sync_alphabet(); while (c && len && *text) { - - // Since line breaks cannot be in an input line of text, and VAR:252 is only available in version 5, line breaks need not be considered here. - // However, because of VAR:252, spaces still need to be considered. + /* + * Since line breaks cannot be in an input line of text, and + * VAR:252 is only available in version 5, line breaks need not + * be considered here. However, because of VAR:252, spaces still + * need to be considered. + */ + + /* Symbols are 3 per word with an extra bit */ if (!(c % 3)) v <<= 1; + /* Space packs as 0 */ if (*text == ' ') { v <<= 5; } else { + /* Find the byte in the dictionary */ for (i = 0; i < 78; i++) { + /* If it's a normal member then encode it */ if (*text == al[i] && i != 52 && i != 53) { v <<= 5; + /* Case conversion */ if (i >= 26) { + /* Add the alphabet shift */ v |= i / 26 + (VERSION > 2 ? 3 : 1); c--; + /* Out of space. end mark + and done */ if (!c) return v | 0x8000; + /* Make space for next sym */ + /* Adjust for padding bit */ if (!(c % 3)) v <<= 1; v <<= 5; } + /* Add the symbol code */ v |= (i % 26) + 6; break; } } + /* It wasn't in the dictionary. Encode it long form */ if (i == 78) { v <<= 5; v |= VERSION > 2 ? 5 : 3; @@ -1016,6 +1046,7 @@ uint64_t dictionary_encode(uint8_t * text, int len) text++; len--; } + /* Fill the remaining space with padding */ while (c) { if (!(c % 3)) v <<= 1; @@ -1023,19 +1054,27 @@ uint64_t dictionary_encode(uint8_t * text, int len) v |= 5; c--; } + /* End mark it and return the zscii */ return v | 0x8000; } +/* + * Encode a word into the parse buffer + */ void add_to_parsebuf(uint16_t parsebuf, uint16_t dict, uint8_t * d, int k, int el, int ne, int p, uint16_t flag) { + /* Encode the word into zscii */ dword_t v = dictionary_encode(d, k); dword_t g; int i; uint16_t n = parsebuf + (read8(parsebuf + 1) << 2); + /* Hunt for a match */ for (i = 0; i < ne; i++) { + /* Get the next word and see if it matches */ g = dictionary_get(dict) | 0x8000; if (g == v) { + /* It does - add the needed parse info */ write8(n + 5, p + 1 + (VERSION > 4)); write8(n + 4, k); write16(n + 2, dict); @@ -1043,16 +1082,22 @@ void add_to_parsebuf(uint16_t parsebuf, uint16_t dict, uint8_t * d, } dict += el; } + /* No luck - we may need to write in a failure */ if (i == ne && !flag) { write8(n + 5, p + 1 + (VERSION > 4)); write8(n + 4, k); write16(n + 2, 0); } + /* Finally bump the count */ write8(parsebuf + 1, read8(parsebuf + 1) + 1); } #define Add_to_parsebuf() if(k)add_to_parsebuf(parsebuf,dict,d,k,el,ne,p1,flag),k=0;p1=p+1; + +/* + * Process a command line input + */ void tokenise(uint16_t text, uint16_t dict, uint16_t parsebuf, int len, uint16_t flag) { @@ -1064,6 +1109,7 @@ void tokenise(uint16_t text, uint16_t dict, uint16_t parsebuf, int len, /* A big copy we should avoid */ /* FIXME change algorithms */ + /* Read the table of character codes that count as a word */ if (!dict) { l = read8(dictionary_table); for (i = 1; i <= l; i++) @@ -1073,34 +1119,49 @@ void tokenise(uint16_t text, uint16_t dict, uint16_t parsebuf, int len, l = read8(dict); for (i = 1; i <= l; i++) ws[read8(dict + i)] = 1; + /* Parse buf count */ write8(parsebuf + 1, 0); k = p = p1 = 0; + /* Get the length and number of entries */ el = read8low(dict + read8(dict) + 1); ne = read16low(dict + read8(dict) + 2); + /* Binary search hint - not used */ if (ne < 0) ne *= -1; // Currently, it won't care about the order; it doesn't use binary search. + /* Skip the header */ dict += read8(dict) + 4; + + /* Walk the input */ while (p < len && read8(text + p) && read8(parsebuf + 1) < read8(parsebuf)) { + /* Get a symbol */ i = read8(text + p); + /* Case conversion */ if (i >= 'A' && i <= 'Z') i += 'a' - 'A'; + /* Quiting rules */ if (i == '?' && qtospace) i = ' '; + /* Spaces break words - send the word to the buffer */ if (i == ' ') { Add_to_parsebuf(); } else if (ws[i]) { + /* Symbols go the buffer on their own - queue any + pending stuff first, then the symbol */ Add_to_parsebuf(); *d = i; k = 1; Add_to_parsebuf(); } else if (k < 10) { + /* Queue more symbol */ d[k++] = i; } else { + /* Discard extra bytes */ k++; } p++; } + /* Add the final entry */ Add_to_parsebuf(); } -- 2.34.1