Add applesoft-like (but not identical) tokenizer and detokenizer
authorNick Downing <nick@ndcode.org>
Tue, 17 May 2022 09:45:01 +0000 (19:45 +1000)
committerNick Downing <nick@ndcode.org>
Tue, 17 May 2022 11:18:53 +0000 (21:18 +1000)
.gitignore
Makefile
detokenizer.l [new file with mode: 0644]
tokenizer.l [new file with mode: 0644]

index 8f4dc51..4b0f3e7 100644 (file)
@@ -1,5 +1,8 @@
+*.tok
 __pycache__
+/detokenizer.py
 /element.py
 /lex_yy.py
+/tokenizer.py
 /t_def.py
 /y_tab.py
index cd0d4b1..77611e4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-all: element.py lex_yy.py t_def.py y_tab.py
+all: element.py lex_yy.py t_def.py y_tab.py tokenizer.py detokenizer.py
 
 element.py:
        pitree --install-element
@@ -12,5 +12,17 @@ t_def.py: applesoft_basic.t
 y_tab.py: applesoft_basic.y
        piyacc --element --python $<
 
+tokenizer.py: tokenizer.l
+       pilex --python -o __temp__.py $<
+       (echo '#!/usr/bin/env python3'; echo; cat __temp__.py) >$@
+       rm __temp__.py
+       chmod a+x $@
+
+detokenizer.py: detokenizer.l
+       pilex --python -o __temp__.py $<
+       (echo '#!/usr/bin/env python3'; echo; cat __temp__.py) >$@
+       rm __temp__.py
+       chmod a+x $@
+
 clean:
-       rm -f element.py lex_yy.py t_def.py y_tab.py
+       rm -f element.py lex_yy.py t_def.py y_tab.py __temp__.py tokenizer.py detokenizer.py
diff --git a/detokenizer.l b/detokenizer.l
new file mode 100644 (file)
index 0000000..e3242cc
--- /dev/null
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2022 Nick Downing <nick@ndcode.org>
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+%start DATA DATA_TEXT REM_TEXT STR_LITERAL
+
+%%
+
+<INITIAL,DATA>{
+  " "
+  \" {
+    ECHO()
+    yy_push_state(STR_LITERAL)
+  }
+}
+<INITIAL>{
+  \x80 {
+    yyout.write('END')
+  }
+  \x81 {
+    yyout.write('FOR')
+  }
+  \x82 {
+    yyout.write('NEXT')
+  }
+  \x83 {
+    yyout.write('DATA')
+    BEGIN(DATA)
+  }
+  \x84 {
+    yyout.write('INPUT')
+  }
+  \x85 {
+    yyout.write('DEL')
+  }
+  \x86 {
+    yyout.write('DIM')
+  }
+  \x87 {
+    yyout.write('READ')
+  }
+  \x88 {
+    yyout.write('GR')
+  }
+  \x89 {
+    yyout.write('TEXT')
+  }
+  \x8a {
+    yyout.write('PR#')
+  }
+  \x8b {
+    yyout.write('IN#')
+  }
+  \x8c {
+    yyout.write('CALL')
+  }
+  \x8d {
+    yyout.write('PLOT')
+  }
+  \x8e {
+    yyout.write('HLIN')
+  }
+  \x8f {
+    yyout.write('VLIN')
+  }
+  \x90 {
+    yyout.write('HGR2')
+  }
+  \x91 {
+    yyout.write('HGR')
+  }
+  \x92 {
+    yyout.write('HCOLOR=')
+  }
+  \x93 {
+    yyout.write('HPLOT')
+  }
+  \x94 {
+    yyout.write('DRAW')
+  }
+  \x95 {
+    yyout.write('XDRAW')
+  }
+  \x96 {
+    yyout.write('HTAB')
+  }
+  \x97 {
+    yyout.write('HOME')
+  }
+  \x98 {
+    yyout.write('ROT=')
+  }
+  \x99 {
+    yyout.write('SCALE=')
+  }
+  \x9a {
+    yyout.write('SHLOAD')
+  }
+  \x9b {
+    yyout.write('TRACE')
+  }
+  \x9c {
+    yyout.write('NOTRACE')
+  }
+  \x9d {
+    yyout.write('NORMAL')
+  }
+  \x9e {
+    yyout.write('INVERSE')
+  }
+  \x9f {
+    yyout.write('FLASH')
+  }
+  \xa0 {
+    yyout.write('COLOR=')
+  }
+  \xa1 {
+    yyout.write('POP')
+  }
+  \xa2 {
+    yyout.write('VTAB')
+  }
+  \xa3 {
+    yyout.write('HIMEM:')
+  }
+  \xa4 {
+    yyout.write('LOMEM:')
+  }
+  \xa5 {
+    yyout.write('ONERR')
+  }
+  \xa6 {
+    yyout.write('RESUME')
+  }
+  \xa7 {
+    yyout.write('RECALL')
+  }
+  \xa8 {
+    yyout.write('STORE')
+  }
+  \xa9 {
+    yyout.write('SPEED=')
+  }
+  \xaa {
+    yyout.write('LET')
+  }
+  \xab {
+    yyout.write('GOTO')
+  }
+  \xac {
+    yyout.write('RUN')
+  }
+  \xad {
+    yyout.write('IF')
+  }
+  \xae {
+    yyout.write('RESTORE')
+  }
+  \xaf {
+    yyout.write('&')
+  }
+  \xb0 {
+    yyout.write('GOSUB')
+  }
+  \xb1 {
+    yyout.write('RETURN')
+  }
+  \xb2 {
+    yyout.write('REM')
+    BEGIN(REM_TEXT)
+  }
+  \xb3 {
+    yyout.write('STOP')
+  }
+  \xb4 {
+    yyout.write('ON')
+  }
+  \xb5 {
+    yyout.write('WAIT')
+  }
+  \xb6 {
+    yyout.write('LOAD')
+  }
+  \xb7 {
+    yyout.write('SAVE')
+  }
+  \xb8 {
+    yyout.write('DEF')
+  }
+  \xb9 {
+    yyout.write('POKE')
+  }
+  \xba {
+    yyout.write('PRINT')
+  }
+  \xbb {
+    yyout.write('CONT')
+  }
+  \xbc {
+    yyout.write('LIST')
+  }
+  \xbd {
+    yyout.write('CLEAR')
+  }
+  \xbe {
+    yyout.write('GET')
+  }
+  \xbf {
+    yyout.write('NEW')
+  }
+  \xc0 {
+    yyout.write('TAB(')
+  }
+  \xc1 {
+    yyout.write('TO')
+  }
+  \xc2 {
+    yyout.write('FN')
+  }
+  \xc3 {
+    yyout.write('SPC(')
+  }
+  \xc4 {
+    yyout.write('THEN')
+  }
+  \xc5 {
+    yyout.write('AT')
+  }
+  \xc6 {
+    yyout.write('NOT')
+  }
+  \xc7 {
+    yyout.write('STEP')
+  }
+  \xc8 {
+    yyout.write('+')
+  }
+  \xc9 {
+    yyout.write('-')
+  }
+  \xca {
+    yyout.write('*')
+  }
+  \xcb {
+    yyout.write('/')
+  }
+  \xcc {
+    yyout.write('^')
+  }
+  \xcd {
+    yyout.write('AND')
+  }
+  \xce {
+    yyout.write('OR')
+  }
+  \xcf {
+    yyout.write('>')
+  }
+  \xd0 {
+    yyout.write('=')
+  }
+  \xd1 {
+    yyout.write('<')
+  }
+  \xd2 {
+    yyout.write('SGN')
+  }
+  \xd3 {
+    yyout.write('INT')
+  }
+  \xd4 {
+    yyout.write('ABS')
+  }
+  \xd5 {
+    yyout.write('USR')
+  }
+  \xd6 {
+    yyout.write('FRE')
+  }
+  \xd7 {
+    yyout.write('SCRN(')
+  }
+  \xd8 {
+    yyout.write('PDL')
+  }
+  \xd9 {
+    yyout.write('POS')
+  }
+  \xda {
+    yyout.write('SQR')
+  }
+  \xdb {
+    yyout.write('RND')
+  }
+  \xdc {
+    yyout.write('LOG')
+  }
+  \xdd {
+    yyout.write('EXP')
+  }
+  \xde {
+    yyout.write('COS')
+  }
+  \xdf {
+    yyout.write('SIN')
+  }
+  \xe0 {
+    yyout.write('TAN')
+  }
+  \xe1 {
+    yyout.write('ATN')
+  }
+  \xe2 {
+    yyout.write('PEEK')
+  }
+  \xe3 {
+    yyout.write('LEN')
+  }
+  \xe4 {
+    yyout.write('STR$')
+  }
+  \xe5 {
+    yyout.write('VAL')
+  }
+  \xe6 {
+    yyout.write('ASC')
+  }
+  \xe7 {
+    yyout.write('CHR$')
+  }
+  \xe8 {
+    yyout.write('LEFT$')
+  }
+  \xe9 {
+    yyout.write('RIGHT$')
+  }
+  \xea {
+    yyout.write('MID$')
+  }
+}
+<DATA,DATA_TEXT>{
+  :|\n {
+    ECHO()
+    BEGIN(INITIAL)
+  }
+}
+<DATA>{
+  [^ ,] {
+    ECHO()
+    BEGIN(DATA_TEXT)
+  }
+}
+<DATA_TEXT>{
+  , {
+    ECHO()
+    BEGIN(DATA)
+  }
+}
+
+<REM_TEXT>{
+  \n {
+    ECHO()
+    BEGIN(INITIAL)
+  }
+}
+
+<STR_LITERAL>{
+  \" {
+    ECHO()
+    yy_pop_state()
+  }
+  \n {
+    ECHO()
+    yy_pop_state()
+    BEGIN(INITIAL)
+  }  
+}
+
+%%
+
+if __name__ == '__main__':
+  while yylex():
+    pass
diff --git a/tokenizer.l b/tokenizer.l
new file mode 100644 (file)
index 0000000..441fee7
--- /dev/null
@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2022 Nick Downing <nick@ndcode.org>
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+%start DATA DATA_TEXT REM_TEXT STR_LITERAL
+
+%%
+
+<INITIAL,DATA>{
+  " "
+  \" {
+    ECHO()
+    yy_push_state(STR_LITERAL)
+  }
+}
+<INITIAL>{
+  E\ *N\ *D {
+    yyout.write('\x80')
+  }
+  F\ *O\ *R {
+    yyout.write('\x81')
+  }
+  N\ *E\ *X\ *T {
+    yyout.write('\x82')
+  }
+  D\ *A\ *T\ *A {
+    yyout.write('\x83')
+    BEGIN(DATA)
+  }
+  I\ *N\ *P\ *U\ *T {
+    yyout.write('\x84')
+  }
+  D\ *E\ *L {
+    yyout.write('\x85')
+  }
+  D\ *I\ *M {
+    yyout.write('\x86')
+  }
+  R\ *E\ *A\ *D {
+    yyout.write('\x87')
+  }
+  G\ *R {
+    yyout.write('\x88')
+  }
+  T\ *E\ *X\ *T {
+    yyout.write('\x89')
+  }
+  P\ *R\ *\# {
+    yyout.write('\x8a')
+  }
+  I\ *N\ *\# {
+    yyout.write('\x8b')
+  }
+  C\ *A\ *L\ *L {
+    yyout.write('\x8c')
+  }
+  P\ *L\ *O\ *T {
+    yyout.write('\x8d')
+  }
+  H\ *L\ *I\ *N {
+    yyout.write('\x8e')
+  }
+  V\ *L\ *I\ *N {
+    yyout.write('\x8f')
+  }
+  H\ *G\ *R\ *2 {
+    yyout.write('\x90')
+  }
+  H\ *G\ *R {
+    yyout.write('\x91')
+  }
+  H\ *C\ *O\ *L\ *O\ *R\ *\= {
+    yyout.write('\x92')
+  }
+  H\ *P\ *L\ *O\ *T {
+    yyout.write('\x93')
+  }
+  D\ *R\ *A\ *W {
+    yyout.write('\x94')
+  }
+  X\ *D\ *R\ *A\ *W {
+    yyout.write('\x95')
+  }
+  H\ *T\ *A\ *B {
+    yyout.write('\x96')
+  }
+  H\ *O\ *M\ *E {
+    yyout.write('\x97')
+  }
+  R\ *O\ *T\ *\= {
+    yyout.write('\x98')
+  }
+  S\ *C\ *A\ *L\ *E\ *\= {
+    yyout.write('\x99')
+  }
+  S\ *H\ *L\ *O\ *A\ *D {
+    yyout.write('\x9a')
+  }
+  T\ *R\ *A\ *C\ *E {
+    yyout.write('\x9b')
+  }
+  N\ *O\ *T\ *R\ *A\ *C\ *E {
+    yyout.write('\x9c')
+  }
+  N\ *O\ *R\ *M\ *A\ *L {
+    yyout.write('\x9d')
+  }
+  I\ *N\ *V\ *E\ *R\ *S\ *E {
+    yyout.write('\x9e')
+  }
+  F\ *L\ *A\ *S\ *H {
+    yyout.write('\x9f')
+  }
+  C\ *O\ *L\ *O\ *R\ *\= {
+    yyout.write('\xa0')
+  }
+  P\ *O\ *P {
+    yyout.write('\xa1')
+  }
+  V\ *T\ *A\ *B {
+    yyout.write('\xa2')
+  }
+  H\ *I\ *M\ *E\ *M\ *\: {
+    yyout.write('\xa3')
+  }
+  L\ *O\ *M\ *E\ *M\ *\: {
+    yyout.write('\xa4')
+  }
+  O\ *N\ *E\ *R\ *R {
+    yyout.write('\xa5')
+  }
+  R\ *E\ *S\ *U\ *M\ *E {
+    yyout.write('\xa6')
+  }
+  R\ *E\ *C\ *A\ *L\ *L {
+    yyout.write('\xa7')
+  }
+  S\ *T\ *O\ *R\ *E {
+    yyout.write('\xa8')
+  }
+  S\ *P\ *E\ *E\ *D\ *\= {
+    yyout.write('\xa9')
+  }
+  L\ *E\ *T {
+    yyout.write('\xaa')
+  }
+  G\ *O\ *T\ *O {
+    yyout.write('\xab')
+  }
+  R\ *U\ *N {
+    yyout.write('\xac')
+  }
+  I\ *F {
+    yyout.write('\xad')
+  }
+  R\ *E\ *S\ *T\ *O\ *R\ *E {
+    yyout.write('\xae')
+  }
+  \& {
+    yyout.write('\xaf')
+  }
+  G\ *O\ *S\ *U\ *B {
+    yyout.write('\xb0')
+  }
+  R\ *E\ *T\ *U\ *R\ *N {
+    yyout.write('\xb1')
+  }
+  R\ *E\ *M {
+    yyout.write('\xb2')
+    BEGIN(REM_TEXT)
+  }
+  S\ *T\ *O\ *P {
+    yyout.write('\xb3')
+  }
+  O\ *N {
+    yyout.write('\xb4')
+  }
+  W\ *A\ *I\ *T {
+    yyout.write('\xb5')
+  }
+  L\ *O\ *A\ *D {
+    yyout.write('\xb6')
+  }
+  S\ *A\ *V\ *E {
+    yyout.write('\xb7')
+  }
+  D\ *E\ *F {
+    yyout.write('\xb8')
+  }
+  P\ *O\ *K\ *E {
+    yyout.write('\xb9')
+  }
+  P\ *R\ *I\ *N\ *T {
+    yyout.write('\xba')
+  }
+  C\ *O\ *N\ *T {
+    yyout.write('\xbb')
+  }
+  L\ *I\ *S\ *T {
+    yyout.write('\xbc')
+  }
+  C\ *L\ *E\ *A\ *R {
+    yyout.write('\xbd')
+  }
+  G\ *E\ *T {
+    yyout.write('\xbe')
+  }
+  N\ *E\ *W {
+    yyout.write('\xbf')
+  }
+  T\ *A\ *B\ *\( {
+    yyout.write('\xc0')
+  }
+  T\ *O {
+    yyout.write('\xc1')
+  }
+  F\ *N {
+    yyout.write('\xc2')
+  }
+  S\ *P\ *C\ *\( {
+    yyout.write('\xc3')
+  }
+  T\ *H\ *E\ *N {
+    yyout.write('\xc4')
+  }
+  A\ *T {
+    yyout.write('\xc5')
+  }
+  N\ *O\ *T {
+    yyout.write('\xc6')
+  }
+  S\ *T\ *E\ *P {
+    yyout.write('\xc7')
+  }
+  \+ {
+    yyout.write('\xc8')
+  }
+  \- {
+    yyout.write('\xc9')
+  }
+  \* {
+    yyout.write('\xca')
+  }
+  \/ {
+    yyout.write('\xcb')
+  }
+  \^ {
+    yyout.write('\xcc')
+  }
+  A\ *N\ *D {
+    yyout.write('\xcd')
+  }
+  O\ *R {
+    yyout.write('\xce')
+  }
+  \> {
+    yyout.write('\xcf')
+  }
+  \= {
+    yyout.write('\xd0')
+  }
+  \< {
+    yyout.write('\xd1')
+  }
+  S\ *G\ *N {
+    yyout.write('\xd2')
+  }
+  I\ *N\ *T {
+    yyout.write('\xd3')
+  }
+  A\ *B\ *S {
+    yyout.write('\xd4')
+  }
+  U\ *S\ *R {
+    yyout.write('\xd5')
+  }
+  F\ *R\ *E {
+    yyout.write('\xd6')
+  }
+  S\ *C\ *R\ *N\ *\( {
+    yyout.write('\xd7')
+  }
+  P\ *D\ *L {
+    yyout.write('\xd8')
+  }
+  P\ *O\ *S {
+    yyout.write('\xd9')
+  }
+  S\ *Q\ *R {
+    yyout.write('\xda')
+  }
+  R\ *N\ *D {
+    yyout.write('\xdb')
+  }
+  L\ *O\ *G {
+    yyout.write('\xdc')
+  }
+  E\ *X\ *P {
+    yyout.write('\xdd')
+  }
+  C\ *O\ *S {
+    yyout.write('\xde')
+  }
+  S\ *I\ *N {
+    yyout.write('\xdf')
+  }
+  T\ *A\ *N {
+    yyout.write('\xe0')
+  }
+  A\ *T\ *N {
+    yyout.write('\xe1')
+  }
+  P\ *E\ *E\ *K {
+    yyout.write('\xe2')
+  }
+  L\ *E\ *N {
+    yyout.write('\xe3')
+  }
+  S\ *T\ *R\ *\$ {
+    yyout.write('\xe4')
+  }
+  V\ *A\ *L {
+    yyout.write('\xe5')
+  }
+  A\ *S\ *C {
+    yyout.write('\xe6')
+  }
+  C\ *H\ *R\ *\$ {
+    yyout.write('\xe7')
+  }
+  L\ *E\ *F\ *T\ *\$ {
+    yyout.write('\xe8')
+  }
+  R\ *I\ *G\ *H\ *T\ *\$ {
+    yyout.write('\xe9')
+  }
+  M\ *I\ *D\ *\$ {
+    yyout.write('\xea')
+  }
+}
+<DATA,DATA_TEXT>{
+  :|\n {
+    ECHO()
+    BEGIN(INITIAL)
+  }
+}
+<DATA>{
+  [^ ,] {
+    ECHO()
+    BEGIN(DATA_TEXT)
+  }
+}
+<DATA_TEXT>{
+  , {
+    ECHO()
+    BEGIN(DATA)
+  }
+}
+
+<REM_TEXT>{
+  \n {
+    ECHO()
+    BEGIN(INITIAL)
+  }
+}
+
+<STR_LITERAL>{
+  \" {
+    ECHO()
+    yy_pop_state()
+  }
+  \n {
+    ECHO()
+    yy_pop_state()
+    BEGIN(INITIAL)
+  }  
+}
+
+%%
+
+if __name__ == '__main__':
+  while yylex():
+    pass