Pristine Ack-5.5
[Ack-5.5.git] / lang / m2 / m2mm / LLlex.c
1 /*
2  * (c) copyright 1987 by the Vrije Universiteit, Amsterdam, The Netherlands.
3  * See the copyright notice in the ACK home directory, in the file "Copyright".
4  *
5  * Author: Ceriel J.H. Jacobs
6  */
7
8 /* L E X I C A L   A N A L Y S E R   F O R   M O D U L A - 2 */
9
10 /* $Id: LLlex.c,v 1.7 1994/06/24 12:44:30 ceriel Exp $ */
11
12 #include        <alloc.h>
13 #include        "idfsize.h"
14 #include        "idf.h"
15 #include        "LLlex.h"
16 #include        "input.h"
17 #include        "f_info.h"
18 #include        "Lpars.h"
19 #include        "class.h"
20
21 struct token    dot,
22                 aside;
23 int             idfsize = IDFSIZE;
24 int             ForeignFlag;
25
26 extern char     *getwdir();
27
28 static
29 SkipComment()
30 {
31         /*      Skip Modula-2 comments (* ... *).
32                 Note that comments may be nested (par. 3.5).
33         */
34         register int ch;
35         register int CommentLevel = 0;
36
37         LoadChar(ch);
38         if (ch == '$') {
39                 LoadChar(ch);
40                 switch(ch) {
41                 case 'F':
42                         /* Foreign; This definition module has an
43                            implementation in another language.
44                            In this case, check that the object file is present
45                            and don't generate a rule for it.
46                         */
47                         ForeignFlag = 1;
48                         break;
49                 default:
50                         PushBack();
51                         break;
52                 }
53         }
54         for (;;) {
55                 if (class(ch) == STNL) {
56                         LineNumber++;
57                 }
58                 else if (ch == '(') {
59                         LoadChar(ch);
60                         if (ch == '*') CommentLevel++;
61                         else continue;
62                 }
63                 else if (ch == '*') {
64                         LoadChar(ch);
65                         if (ch == ')') {
66                                 CommentLevel--;
67                                 if (CommentLevel < 0) break;
68                         }
69                         else continue;
70                 }
71                 else if (ch == EOI) {
72                         lexerror("unterminated comment");
73                         break;
74                 }
75                 LoadChar(ch);
76         }
77 }
78
79 static
80 GetString(upto)
81 {
82         /*      Read a Modula-2 string, delimited by the character "upto".
83         */
84         register int ch;
85         
86         while (LoadChar(ch), ch != upto)        {
87                 if (class(ch) == STNL)  {
88                         lexerror("newline in string");
89                         LineNumber++;
90                         break;
91                 }
92                 if (ch == EOI)  {
93                         lexerror("end-of-file in string");
94                         break;
95                 }
96         }
97 }
98
99 static char *s_error = "illegal line directive";
100
101 static int
102 getch()
103 {
104         register int ch;
105
106         for (;;) {
107                 LoadChar(ch);
108                 if ((ch & 0200) && ch != EOI) {
109                         error("non-ascii '\\%03o' read", ch & 0377);
110                         continue;
111                 }
112                 break;
113         }
114         return ch;
115 }
116
117 CheckForLineDirective()
118 {
119         register int ch = getch();
120         register int    i = 0;
121         char            buf[IDFSIZE + 2];
122         register char   *c = buf;
123
124
125         if (ch != '#') {
126                 PushBack();
127                 return;
128         }
129         do {    /*
130                  * Skip to next digit
131                  * Do not skip newlines
132                  */
133                 ch = getch();
134                 if (class(ch) == STNL || class(ch) == STEOI) {
135                         LineNumber++;
136                         error(s_error);
137                         return;
138                 }
139         } while (class(ch) != STNUM);
140         while (class(ch) == STNUM)  {
141                 i = i*10 + (ch - '0');
142                 ch = getch();
143         }
144         while (ch != '"' && class(ch) != STNL && class(ch) != STEOI)
145                 ch = getch();
146         if (ch == '"') {
147                 c = buf;
148                 do {
149                         *c++ = ch = getch();
150                         if (class(ch) == STNL || class(ch) == STEOI) {
151                                 LineNumber++;
152                                 error(s_error);
153                                 return;
154                         }
155                 } while (ch != '"');
156                 *--c = '\0';
157                 do {
158                         ch = getch();
159                 } while (class(ch) != STNL && class(ch) != STEOI);
160                 /*
161                  * Remember the file name
162                  */
163                 if (class(ch) == STNL && strcmp(FileName,buf)) {
164                         FileName = Salloc(buf,(unsigned) strlen(buf) + 1);
165                         WorkingDir = getwdir(FileName);
166                 }
167         }
168         if (class(ch) == STEOI) {
169                 error(s_error);
170                 return;
171         }
172         LineNumber = i;
173 }
174
175 char idfbuf[IDFSIZE + 2];
176
177 int
178 LLlex()
179 {
180         /*      LLlex() is the Lexical Analyzer.
181                 The putting aside of tokens is taken into account.
182         */
183         register struct token *tk = &dot;
184         register int ch, nch;
185
186         if (ASIDE)      {       /* a token is put aside         */
187                 *tk = aside;
188                 ASIDE = 0;
189                 return tk->tk_symb;
190         }
191
192 again:
193         ch = getch();
194         tk->tk_lineno = LineNumber;
195
196         switch (class(ch))      {
197
198         case STNL:
199                 LineNumber++;
200                 CheckForLineDirective();
201                 goto again;
202
203         case STSKIP:
204                 goto again;
205
206         case STGARB:
207                 if ((unsigned) ch - 040 < 0137) {
208                         lexerror("garbage char %c", ch);
209                 }
210                 else    lexerror("garbage char \\%03o", ch);
211                 goto again;
212
213         case STSIMP:
214                 if (ch == '(')  {
215                         LoadChar(nch);
216                         if (nch == '*') {
217                                 SkipComment();
218                                 goto again;
219                         }
220                         PushBack();
221                 }
222                 if (ch == '&') return tk->tk_symb = AND;
223                 if (ch == '~') return tk->tk_symb = NOT;
224                 return tk->tk_symb = ch;
225
226         case STCOMP:
227                 LoadChar(nch);
228                 switch (ch)     {
229
230                 case '.':
231                         if (nch == '.') {
232                                 return tk->tk_symb = UPTO;
233                         }
234                         break;
235
236                 case ':':
237                         if (nch == '=') {
238                                 return tk->tk_symb = BECOMES;
239                         }
240                         break;
241
242                 case '<':
243                         if (nch == '=') {
244                                 return tk->tk_symb = LESSEQUAL;
245                         }
246                         if (nch == '>') {
247                                 return tk->tk_symb = '#';
248                         }
249                         break;
250
251                 case '>':
252                         if (nch == '=') {
253                                 return tk->tk_symb = GREATEREQUAL;
254                         }
255                         break;
256
257                 default :
258                         crash("(LLlex, STCOMP)");
259                 }
260                 PushBack();
261                 return tk->tk_symb = ch;
262
263         case STIDF:
264         {
265                 register char *tag = &idfbuf[0];
266                 register struct idf *id;
267
268                 do      {
269                         if (tag - idfbuf < idfsize) *tag++ = ch;
270                         LoadChar(ch);
271                 } while(in_idf(ch));
272
273                 PushBack();
274                 *tag++ = '\0';
275
276                 tk->TOK_IDF = id = findidf(idfbuf);
277                 return tk->tk_symb = id && id->id_reserved ? id->id_reserved : IDENT;
278         }
279
280         case STSTR:
281                 GetString(ch);
282                 return tk->tk_symb = STRING;
283
284         case STNUM:
285         {
286                 /*      The problem arising with the "parsing" of a number
287                         is that we don't know the base in advance so we
288                         have to read the number with the help of a rather
289                         complex finite automaton.
290                 */
291                 enum statetp {Oct,OptHex,Hex,Dec,OctEndOrHex,End,OptReal,Real};
292                 register enum statetp state;
293                 state = is_oct(ch) ? Oct : Dec;
294                 LoadChar(ch);
295                 for (;;) {
296                         switch(state) {
297                         case Oct:
298                                 while (is_oct(ch))      {
299                                         LoadChar(ch);
300                                 }
301                                 if (ch == 'B' || ch == 'C') {
302                                         state = OctEndOrHex;
303                                         break;
304                                 }
305                                 /* Fall Through */
306                         case Dec:
307                                 while (is_dig(ch))      {
308                                         LoadChar(ch);
309                                 }
310                                 if (ch == 'D') state = OptHex;
311                                 else if (is_hex(ch)) state = Hex;
312                                 else if (ch == '.') state = OptReal;
313                                 else {
314                                         state = End;
315                                         if (ch != 'H') PushBack();
316                                 }
317                                 break;
318
319                         case OptHex:
320                                 LoadChar(ch);
321                                 if (is_hex(ch)) {
322                                         state = Hex;
323                                 }
324                                 else {
325                                         ch = 'D';
326                                         state = End;
327                                         PushBack();
328                                 }
329                                 break;
330
331                         case Hex:
332                                 while (is_hex(ch))      {
333                                         LoadChar(ch);
334                                 }
335                                 state = End;
336                                 if (ch != 'H') {
337                                         lexerror("H expected after hex number");
338                                         PushBack();
339                                 }
340                                 break;
341
342                         case OctEndOrHex:
343                                 LoadChar(ch);
344                                 if (ch == 'H') {
345                                         state = End;
346                                         break;
347                                 }
348                                 if (is_hex(ch)) {
349                                         state = Hex;
350                                         break;
351                                 }
352                                 PushBack();
353                                 /* Fall through */
354                                 
355                         case End:
356                                 return tk->tk_symb = INTEGER;
357
358                         case OptReal:
359                                 /*      The '.' could be the first of the '..'
360                                         token. At this point, we need a
361                                         look-ahead of two characters.
362                                 */
363                                 LoadChar(ch);
364                                 if (ch == '.') {
365                                         /*      Indeed the '..' token
366                                         */
367                                         PushBack();
368                                         PushBack();
369                                         state = End;
370                                         break;
371                                 }
372                                 state = Real;
373                                 break;
374                         }
375                         if (state == Real) break;
376                 }
377
378                 while (is_dig(ch)) {
379                         /*      Fractional part
380                         */
381                         LoadChar(ch);
382                 }
383
384                 if (ch == 'E' || ch == 'D') {
385                         /*      Scale factor
386                         */
387                         if (ch == 'D') {
388                                 LoadChar(ch);
389                                 if (!(ch == '+' || ch == '-' || is_dig(ch)))
390                                         goto noscale;
391                         }
392                         LoadChar(ch);
393                         if (ch == '+' || ch == '-') {
394                                 /*      Signed scalefactor
395                                 */
396                                 LoadChar(ch);
397                         }
398                         if (is_dig(ch)) {
399                                 do {
400                                         LoadChar(ch);
401                                 } while (is_dig(ch));
402                         }
403                         else {
404                                 lexerror("bad scale factor");
405                         }
406                 }
407
408 noscale:
409                 PushBack();
410
411                 return tk->tk_symb = REAL;
412
413                 /*NOTREACHED*/
414         }
415
416         case STEOI:
417                 return tk->tk_symb = -1;
418
419         case STCHAR:
420         default:
421                 crash("(LLlex) Impossible character class");
422                 /*NOTREACHED*/
423         }
424         /*NOTREACHED*/
425 }