Pristine Ack-5.5
[Ack-5.5.git] / util / flex / parse.y
1
2 /* parse.y - parser for flex input */
3
4 %token CHAR NUMBER SECTEND SCDECL XSCDECL WHITESPACE NAME PREVCCL EOF_OP
5
6 %{
7 /*-
8  * Copyright (c) 1990 The Regents of the University of California.
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Vern Paxson.
13  * 
14  * The United States Government has rights in this work pursuant
15  * to contract no. DE-AC03-76SF00098 between the United States
16  * Department of Energy and the University of California.
17  *
18  * Redistribution and use in source and binary forms are permitted provided
19  * that: (1) source distributions retain this entire copyright notice and
20  * comment, and (2) distributions including binaries display the following
21  * acknowledgement:  ``This product includes software developed by the
22  * University of California, Berkeley and its contributors'' in the
23  * documentation or other materials provided with the distribution and in
24  * all advertising materials mentioning features or use of this software.
25  * Neither the name of the University nor the names of its contributors may
26  * be used to endorse or promote products derived from this software without
27  * specific prior written permission.
28  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
29  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
30  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
31  */
32
33 #ifndef lint
34 static char rcsid[] =
35     "@(#) $Id: parse.y,v 1.2 1994/06/24 10:57:18 ceriel Exp $ (LBL)";
36 #endif
37
38 #include "flexdef.h"
39
40 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, actvp, rulelen;
41 int trlcontxt, xcluflg, cclsorted, varlength, variable_trail_rule;
42 Char clower();
43
44 static int madeany = false;  /* whether we've made the '.' character class */
45 int previous_continued_action;  /* whether the previous rule's action was '|' */
46
47 %}
48
49 %%
50 goal            :  initlex sect1 sect1end sect2 initforrule
51                         { /* add default rule */
52                         int def_rule;
53
54                         pat = cclinit();
55                         cclnegate( pat );
56
57                         def_rule = mkstate( -pat );
58
59                         finish_rule( def_rule, false, 0, 0 );
60
61                         for ( i = 1; i <= lastsc; ++i )
62                             scset[i] = mkbranch( scset[i], def_rule );
63
64                         if ( spprdflt )
65                             fputs( "YY_FATAL_ERROR( \"flex scanner jammed\" )",
66                                    temp_action_file );
67                         else
68                             fputs( "ECHO", temp_action_file );
69
70                         fputs( ";\n\tYY_BREAK\n", temp_action_file );
71                         }
72                 ;
73
74 initlex         :
75                         {
76                         /* initialize for processing rules */
77
78                         /* create default DFA start condition */
79                         scinstal( "INITIAL", false );
80                         }
81                 ;
82
83 sect1           :  sect1 startconddecl WHITESPACE namelist1 '\n'
84                 |
85                 |  error '\n'
86                         { synerr( "unknown error processing section 1" ); }
87                 ;
88
89 sect1end        :  SECTEND
90                 ;
91
92 startconddecl   :  SCDECL
93                         {
94                         /* these productions are separate from the s1object
95                          * rule because the semantics must be done before
96                          * we parse the remainder of an s1object
97                          */
98
99                         xcluflg = false;
100                         }
101
102                 |  XSCDECL
103                         { xcluflg = true; }
104                 ;
105
106 namelist1       :  namelist1 WHITESPACE NAME
107                         { scinstal( nmstr, xcluflg ); }
108
109                 |  NAME
110                         { scinstal( nmstr, xcluflg ); }
111
112                 |  error
113                         { synerr( "bad start condition list" ); }
114                 ;
115
116 sect2           :  sect2 initforrule flexrule '\n'
117                 |
118                 ;
119
120 initforrule     :
121                         {
122                         /* initialize for a parse of one rule */
123                         trlcontxt = variable_trail_rule = varlength = false;
124                         trailcnt = headcnt = rulelen = 0;
125                         current_state_type = STATE_NORMAL;
126                         previous_continued_action = continued_action;
127                         new_rule();
128                         }
129                 ;
130
131 flexrule        :  scon '^' rule
132                         {
133                         pat = $3;
134                         finish_rule( pat, variable_trail_rule,
135                                      headcnt, trailcnt );
136
137                         for ( i = 1; i <= actvp; ++i )
138                             scbol[actvsc[i]] =
139                                 mkbranch( scbol[actvsc[i]], pat );
140
141                         if ( ! bol_needed )
142                             {
143                             bol_needed = true;
144
145                             if ( performance_report )
146                                 pinpoint_message( 
147                             "'^' operator results in sub-optimal performance" );
148                             }
149                         }
150
151                 |  scon rule
152                         {
153                         pat = $2;
154                         finish_rule( pat, variable_trail_rule,
155                                      headcnt, trailcnt );
156
157                         for ( i = 1; i <= actvp; ++i )
158                             scset[actvsc[i]] =
159                                 mkbranch( scset[actvsc[i]], pat );
160                         }
161
162                 |  '^' rule
163                         {
164                         pat = $2;
165                         finish_rule( pat, variable_trail_rule,
166                                      headcnt, trailcnt );
167
168                         /* add to all non-exclusive start conditions,
169                          * including the default (0) start condition
170                          */
171
172                         for ( i = 1; i <= lastsc; ++i )
173                             if ( ! scxclu[i] )
174                                 scbol[i] = mkbranch( scbol[i], pat );
175
176                         if ( ! bol_needed )
177                             {
178                             bol_needed = true;
179
180                             if ( performance_report )
181                                 pinpoint_message(
182                             "'^' operator results in sub-optimal performance" );
183                             }
184                         }
185
186                 |  rule
187                         {
188                         pat = $1;
189                         finish_rule( pat, variable_trail_rule,
190                                      headcnt, trailcnt );
191
192                         for ( i = 1; i <= lastsc; ++i )
193                             if ( ! scxclu[i] )
194                                 scset[i] = mkbranch( scset[i], pat );
195                         }
196
197                 |  scon EOF_OP
198                         { build_eof_action(); }
199
200                 |  EOF_OP
201                         {
202                         /* this EOF applies to all start conditions
203                          * which don't already have EOF actions
204                          */
205                         actvp = 0;
206
207                         for ( i = 1; i <= lastsc; ++i )
208                             if ( ! sceof[i] )
209                                 actvsc[++actvp] = i;
210
211                         if ( actvp == 0 )
212                             pinpoint_message(
213                 "warning - all start conditions already have <<EOF>> rules" );
214
215                         else
216                             build_eof_action();
217                         }
218
219                 |  error
220                         { synerr( "unrecognized rule" ); }
221                 ;
222
223 scon            :  '<' namelist2 '>'
224                 ;
225
226 namelist2       :  namelist2 ',' NAME
227                         {
228                         if ( (scnum = sclookup( nmstr )) == 0 )
229                             format_pinpoint_message(
230                                 "undeclared start condition %s", nmstr );
231
232                         else
233                             actvsc[++actvp] = scnum;
234                         }
235
236                 |  NAME
237                         {
238                         if ( (scnum = sclookup( nmstr )) == 0 )
239                             format_pinpoint_message(
240                                 "undeclared start condition %s", nmstr );
241                         else
242                             actvsc[actvp = 1] = scnum;
243                         }
244
245                 |  error
246                         { synerr( "bad start condition list" ); }
247                 ;
248
249 rule            :  re2 re
250                         {
251                         if ( transchar[lastst[$2]] != SYM_EPSILON )
252                             /* provide final transition \now/ so it
253                              * will be marked as a trailing context
254                              * state
255                              */
256                             $2 = link_machines( $2, mkstate( SYM_EPSILON ) );
257
258                         mark_beginning_as_normal( $2 );
259                         current_state_type = STATE_NORMAL;
260
261                         if ( previous_continued_action )
262                             {
263                             /* we need to treat this as variable trailing
264                              * context so that the backup does not happen
265                              * in the action but before the action switch
266                              * statement.  If the backup happens in the
267                              * action, then the rules "falling into" this
268                              * one's action will *also* do the backup,
269                              * erroneously.
270                              */
271                             if ( ! varlength || headcnt != 0 )
272                                 {
273                                 fprintf( stderr,
274     "%s: warning - trailing context rule at line %d made variable because\n",
275                                          program_name, linenum );
276                                 fprintf( stderr,
277                                          "      of preceding '|' action\n" );
278                                 }
279
280                             /* mark as variable */
281                             varlength = true;
282                             headcnt = 0;
283                             }
284
285                         if ( varlength && headcnt == 0 )
286                             { /* variable trailing context rule */
287                             /* mark the first part of the rule as the accepting
288                              * "head" part of a trailing context rule
289                              */
290                             /* by the way, we didn't do this at the beginning
291                              * of this production because back then
292                              * current_state_type was set up for a trail
293                              * rule, and add_accept() can create a new
294                              * state ...
295                              */
296                             add_accept( $1, num_rules | YY_TRAILING_HEAD_MASK );
297                             variable_trail_rule = true;
298                             }
299                         
300                         else
301                             trailcnt = rulelen;
302
303                         $$ = link_machines( $1, $2 );
304                         }
305
306                 |  re2 re '$'
307                         { synerr( "trailing context used twice" ); }
308
309                 |  re '$'
310                         {
311                         if ( trlcontxt )
312                             {
313                             synerr( "trailing context used twice" );
314                             $$ = mkstate( SYM_EPSILON );
315                             }
316
317                         else if ( previous_continued_action )
318                             {
319                             /* see the comment in the rule for "re2 re"
320                              * above
321                              */
322                             if ( ! varlength || headcnt != 0 )
323                                 {
324                                 fprintf( stderr,
325     "%s: warning - trailing context rule at line %d made variable because\n",
326                                          program_name, linenum );
327                                 fprintf( stderr,
328                                          "      of preceding '|' action\n" );
329                                 }
330
331                             /* mark as variable */
332                             varlength = true;
333                             headcnt = 0;
334                             }
335
336                         trlcontxt = true;
337
338                         if ( ! varlength )
339                             headcnt = rulelen;
340
341                         ++rulelen;
342                         trailcnt = 1;
343
344                         eps = mkstate( SYM_EPSILON );
345                         $$ = link_machines( $1,
346                                  link_machines( eps, mkstate( '\n' ) ) );
347                         }
348
349                 |  re
350                         {
351                         $$ = $1;
352
353                         if ( trlcontxt )
354                             {
355                             if ( varlength && headcnt == 0 )
356                                 /* both head and trail are variable-length */
357                                 variable_trail_rule = true;
358                             else
359                                 trailcnt = rulelen;
360                             }
361                         }
362                 ;
363
364
365 re              :  re '|' series
366                         {
367                         varlength = true;
368                         $$ = mkor( $1, $3 );
369                         }
370
371                 |  series
372                         { $$ = $1; }
373                 ;
374
375
376 re2             :  re '/'
377                         {
378                         /* this rule is written separately so
379                          * the reduction will occur before the trailing
380                          * series is parsed
381                          */
382
383                         if ( trlcontxt )
384                             synerr( "trailing context used twice" );
385                         else
386                             trlcontxt = true;
387
388                         if ( varlength )
389                             /* we hope the trailing context is fixed-length */
390                             varlength = false;
391                         else
392                             headcnt = rulelen;
393
394                         rulelen = 0;
395
396                         current_state_type = STATE_TRAILING_CONTEXT;
397                         $$ = $1;
398                         }
399                 ;
400
401 series          :  series singleton
402                         {
403                         /* this is where concatenation of adjacent patterns
404                          * gets done
405                          */
406                         $$ = link_machines( $1, $2 );
407                         }
408
409                 |  singleton
410                         { $$ = $1; }
411                 ;
412
413 singleton       :  singleton '*'
414                         {
415                         varlength = true;
416
417                         $$ = mkclos( $1 );
418                         }
419
420                 |  singleton '+'
421                         {
422                         varlength = true;
423
424                         $$ = mkposcl( $1 );
425                         }
426
427                 |  singleton '?'
428                         {
429                         varlength = true;
430
431                         $$ = mkopt( $1 );
432                         }
433
434                 |  singleton '{' NUMBER ',' NUMBER '}'
435                         {
436                         varlength = true;
437
438                         if ( $3 > $5 || $3 < 0 )
439                             {
440                             synerr( "bad iteration values" );
441                             $$ = $1;
442                             }
443                         else
444                             {
445                             if ( $3 == 0 )
446                                 $$ = mkopt( mkrep( $1, $3, $5 ) );
447                             else
448                                 $$ = mkrep( $1, $3, $5 );
449                             }
450                         }
451
452                 |  singleton '{' NUMBER ',' '}'
453                         {
454                         varlength = true;
455
456                         if ( $3 <= 0 )
457                             {
458                             synerr( "iteration value must be positive" );
459                             $$ = $1;
460                             }
461
462                         else
463                             $$ = mkrep( $1, $3, INFINITY );
464                         }
465
466                 |  singleton '{' NUMBER '}'
467                         {
468                         /* the singleton could be something like "(foo)",
469                          * in which case we have no idea what its length
470                          * is, so we punt here.
471                          */
472                         varlength = true;
473
474                         if ( $3 <= 0 )
475                             {
476                             synerr( "iteration value must be positive" );
477                             $$ = $1;
478                             }
479
480                         else
481                             $$ = link_machines( $1, copysingl( $1, $3 - 1 ) );
482                         }
483
484                 |  '.'
485                         {
486                         if ( ! madeany )
487                             {
488                             /* create the '.' character class */
489                             anyccl = cclinit();
490                             ccladd( anyccl, '\n' );
491                             cclnegate( anyccl );
492
493                             if ( useecs )
494                                 mkeccl( ccltbl + cclmap[anyccl],
495                                         ccllen[anyccl], nextecm,
496                                         ecgroup, csize, csize );
497
498                             madeany = true;
499                             }
500
501                         ++rulelen;
502
503                         $$ = mkstate( -anyccl );
504                         }
505
506                 |  fullccl
507                         {
508                         if ( ! cclsorted )
509                             /* sort characters for fast searching.  We use a
510                              * shell sort since this list could be large.
511                              */
512                             cshell( ccltbl + cclmap[$1], ccllen[$1], true );
513
514                         if ( useecs )
515                             mkeccl( ccltbl + cclmap[$1], ccllen[$1],
516                                     nextecm, ecgroup, csize, csize );
517
518                         ++rulelen;
519
520                         $$ = mkstate( -$1 );
521                         }
522
523                 |  PREVCCL
524                         {
525                         ++rulelen;
526
527                         $$ = mkstate( -$1 );
528                         }
529
530                 |  '"' string '"'
531                         { $$ = $2; }
532
533                 |  '(' re ')'
534                         { $$ = $2; }
535
536                 |  CHAR
537                         {
538                         ++rulelen;
539
540                         if ( caseins && $1 >= 'A' && $1 <= 'Z' )
541                             $1 = clower( $1 );
542
543                         $$ = mkstate( $1 );
544                         }
545                 ;
546
547 fullccl         :  '[' ccl ']'
548                         { $$ = $2; }
549
550                 |  '[' '^' ccl ']'
551                         {
552                         /* *Sigh* - to be compatible Unix lex, negated ccls
553                          * match newlines
554                          */
555 #ifdef NOTDEF
556                         ccladd( $3, '\n' ); /* negated ccls don't match '\n' */
557                         cclsorted = false; /* because we added the newline */
558 #endif
559                         cclnegate( $3 );
560                         $$ = $3;
561                         }
562                 ;
563
564 ccl             :  ccl CHAR '-' CHAR
565                         {
566                         if ( $2 > $4 )
567                             synerr( "negative range in character class" );
568
569                         else
570                             {
571                             if ( caseins )
572                                 {
573                                 if ( $2 >= 'A' && $2 <= 'Z' )
574                                     $2 = clower( $2 );
575                                 if ( $4 >= 'A' && $4 <= 'Z' )
576                                     $4 = clower( $4 );
577                                 }
578
579                             for ( i = $2; i <= $4; ++i )
580                                 ccladd( $1, i );
581
582                             /* keep track if this ccl is staying in alphabetical
583                              * order
584                              */
585                             cclsorted = cclsorted && ($2 > lastchar);
586                             lastchar = $4;
587                             }
588
589                         $$ = $1;
590                         }
591
592                 |  ccl CHAR
593                         {
594                         if ( caseins )
595                             if ( $2 >= 'A' && $2 <= 'Z' )
596                                 $2 = clower( $2 );
597
598                         ccladd( $1, $2 );
599                         cclsorted = cclsorted && ($2 > lastchar);
600                         lastchar = $2;
601                         $$ = $1;
602                         }
603
604                 |
605                         {
606                         cclsorted = true;
607                         lastchar = 0;
608                         $$ = cclinit();
609                         }
610                 ;
611
612 string          :  string CHAR
613                         {
614                         if ( caseins )
615                             if ( $2 >= 'A' && $2 <= 'Z' )
616                                 $2 = clower( $2 );
617
618                         ++rulelen;
619
620                         $$ = link_machines( $1, mkstate( $2 ) );
621                         }
622
623                 |
624                         { $$ = mkstate( SYM_EPSILON ); }
625                 ;
626
627 %%
628
629
630 /* build_eof_action - build the "<<EOF>>" action for the active start
631  *                    conditions
632  */
633
634 void build_eof_action()
635
636     {
637     register int i;
638
639     for ( i = 1; i <= actvp; ++i )
640         {
641         if ( sceof[actvsc[i]] )
642             format_pinpoint_message(
643                 "multiple <<EOF>> rules for start condition %s",
644                     scname[actvsc[i]] );
645
646         else
647             {
648             sceof[actvsc[i]] = true;
649             fprintf( temp_action_file, "case YY_STATE_EOF(%s):\n",
650                      scname[actvsc[i]] );
651             }
652         }
653
654     line_directive_out( temp_action_file );
655     }
656
657
658 /* synerr - report a syntax error */
659
660 void synerr( str )
661 char str[];
662
663     {
664     syntaxerror = true;
665     pinpoint_message( str );
666     }
667
668
669 /* format_pinpoint_message - write out a message formatted with one string,
670  *                           pinpointing its location
671  */
672
673 void format_pinpoint_message( msg, arg )
674 char msg[], arg[];
675
676     {
677     char errmsg[MAXLINE];
678
679     (void) sprintf( errmsg, msg, arg );
680     pinpoint_message( errmsg );
681     }
682
683
684 /* pinpoint_message - write out a message, pinpointing its location */
685
686 void pinpoint_message( str )
687 char str[];
688
689     {
690     fprintf( stderr, "\"%s\", line %d: %s\n", infilename, linenum, str );
691     }
692
693
694 /* yyerror - eat up an error message from the parser;
695  *           currently, messages are ignore
696  */
697
698 void yyerror( msg )
699 char msg[];
700
701     {
702     }