util/flex/dfa.c

   1 /* dfa - DFA construction routines */
   2
   3 /*-
   4  * Copyright (c) 1990 The Regents of the University of California.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to Berkeley by
   8  * Vern Paxson.
   9  *
  10  * The United States Government has rights in this work pursuant
  11  * to contract no. DE-AC03-76SF00098 between the United States
  12  * Department of Energy and the University of California.
  13  *
  14  * Redistribution and use in source and binary forms are permitted provided
  15  * that: (1) source distributions retain this entire copyright notice and
  16  * comment, and (2) distributions including binaries display the following
  17  * acknowledgement:  ``This product includes software developed by the
  18  * University of California, Berkeley and its contributors'' in the
  19  * documentation or other materials provided with the distribution and in
  20  * all advertising materials mentioning features or use of this software.
  21  * Neither the name of the University nor the names of its contributors may
  22  * be used to endorse or promote products derived from this software without
  23  * specific prior written permission.
  24  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  25  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  26  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  27  */
  28
  29 #ifndef lint
  30 static char rcsid[] =
  31     "@(#) $Id: dfa.c,v 1.2 1994/06/24 10:56:41 ceriel Exp $ (LBL)";
  32 #endif
  33
  34 #include "flexdef.h"
  35
  36
  37 /* declare functions that have forward references */
  38
  39 void dump_associated_rules PROTO((FILE*, int));
  40 void dump_transitions PROTO((FILE*, int[]));
  41 void sympartition PROTO((int[], int, int[], int[]));
  42 int symfollowset PROTO((int[], int, int, int[]));
  43
  44
  45 /* check_for_backtracking - check a DFA state for backtracking
  46  *
  47  * synopsis
  48  *     int ds, state[numecs];
  49  *     check_for_backtracking( ds, state );
  50  *
  51  * ds is the number of the state to check and state[] is its out-transitions,
  52  * indexed by equivalence class, and state_rules[] is the set of rules
  53  * associated with this state
  54  */
  55
  56 void check_for_backtracking( ds, state )
  57 int ds;
  58 int state[];
  59
  60     {
  61     if ( (reject && ! dfaacc[ds].dfaacc_set) || ! dfaacc[ds].dfaacc_state )
  62         { /* state is non-accepting */
  63         ++num_backtracking;
  64
  65         if ( backtrack_report )
  66             {
  67             fprintf( backtrack_file, "State #%d is non-accepting -\n", ds );
  68
  69             /* identify the state */
  70             dump_associated_rules( backtrack_file, ds );
  71
  72             /* now identify it further using the out- and jam-transitions */
  73             dump_transitions( backtrack_file, state );
  74
  75             putc( '\n', backtrack_file );
  76             }
  77         }
  78     }
  79
  80
  81 /* check_trailing_context - check to see if NFA state set constitutes
  82  *                          "dangerous" trailing context
  83  *
  84  * synopsis
  85  *    int nfa_states[num_states+1], num_states;
  86  *    int accset[nacc+1], nacc;
  87  *    check_trailing_context( nfa_states, num_states, accset, nacc );
  88  *
  89  * NOTES
  90  *    Trailing context is "dangerous" if both the head and the trailing
  91  *  part are of variable size \and/ there's a DFA state which contains
  92  *  both an accepting state for the head part of the rule and NFA states
  93  *  which occur after the beginning of the trailing context.
  94  *  When such a rule is matched, it's impossible to tell if having been
  95  *  in the DFA state indicates the beginning of the trailing context
  96  *  or further-along scanning of the pattern.  In these cases, a warning
  97  *  message is issued.
  98  *
  99  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
 100  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
 101  */
 102
 103 void check_trailing_context( nfa_states, num_states, accset, nacc )
 104 int *nfa_states, num_states;
 105 int *accset;
 106 register int nacc;
 107
 108     {
 109     register int i, j;
 110
 111     for ( i = 1; i <= num_states; ++i )
 112         {
 113         int ns = nfa_states[i];
 114         register int type = state_type[ns];
 115         register int ar = assoc_rule[ns];
 116
 117         if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
 118             { /* do nothing */
 119             }
 120
 121         else if ( type == STATE_TRAILING_CONTEXT )
 122             {
 123             /* potential trouble.  Scan set of accepting numbers for
 124              * the one marking the end of the "head".  We assume that
 125              * this looping will be fairly cheap since it's rare that
 126              * an accepting number set is large.
 127              */
 128             for ( j = 1; j <= nacc; ++j )
 129                 if ( accset[j] & YY_TRAILING_HEAD_MASK )
 130                     {
 131                     fprintf( stderr,
 132                      "%s: Dangerous trailing context in rule at line %d\n",
 133                              program_name, rule_linenum[ar] );
 134                     return;
 135                     }
 136             }
 137         }
 138     }
 139
 140
 141 /* dump_associated_rules - list the rules associated with a DFA state
 142  *
 143  * synopisis
 144  *     int ds;
 145  *     FILE *file;
 146  *     dump_associated_rules( file, ds );
 147  *
 148  * goes through the set of NFA states associated with the DFA and
 149  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
 150  * and writes a report to the given file
 151  */
 152
 153 void dump_associated_rules( file, ds )
 154 FILE *file;
 155 int ds;
 156
 157     {
 158     register int i, j;
 159     register int num_associated_rules = 0;
 160     int rule_set[MAX_ASSOC_RULES + 1];
 161     int *dset = dss[ds];
 162     int size = dfasiz[ds];
 163
 164     for ( i = 1; i <= size; ++i )
 165         {
 166         register rule_num = rule_linenum[assoc_rule[dset[i]]];
 167
 168         for ( j = 1; j <= num_associated_rules; ++j )
 169             if ( rule_num == rule_set[j] )
 170                 break;
 171
 172         if ( j > num_associated_rules )
 173             { /* new rule */
 174             if ( num_associated_rules < MAX_ASSOC_RULES )
 175                 rule_set[++num_associated_rules] = rule_num;
 176             }
 177         }
 178
 179     bubble( rule_set, num_associated_rules );
 180
 181     fprintf( file, " associated rule line numbers:" );
 182
 183     for ( i = 1; i <= num_associated_rules; ++i )
 184         {
 185         if ( i % 8 == 1 )
 186             putc( '\n', file );
 187
 188         fprintf( file, "\t%d", rule_set[i] );
 189         }
 190
 191     putc( '\n', file );
 192     }
 193
 194
 195 /* dump_transitions - list the transitions associated with a DFA state
 196  *
 197  * synopisis
 198  *     int state[numecs];
 199  *     FILE *file;
 200  *     dump_transitions( file, state );
 201  *
 202  * goes through the set of out-transitions and lists them in human-readable
 203  * form (i.e., not as equivalence classes); also lists jam transitions
 204  * (i.e., all those which are not out-transitions, plus EOF).  The dump
 205  * is done to the given file.
 206  */
 207
 208 void dump_transitions( file, state )
 209 FILE *file;
 210 int state[];
 211
 212     {
 213     register int i, ec;
 214     int out_char_set[CSIZE];
 215
 216     for ( i = 0; i < csize; ++i )
 217         {
 218         ec = abs( ecgroup[i] );
 219         out_char_set[i] = state[ec];
 220         }
 221
 222     fprintf( file, " out-transitions: " );
 223
 224     list_character_set( file, out_char_set );
 225
 226     /* now invert the members of the set to get the jam transitions */
 227     for ( i = 0; i < csize; ++i )
 228         out_char_set[i] = ! out_char_set[i];
 229
 230     fprintf( file, "\n jam-transitions: EOF " );
 231
 232     list_character_set( file, out_char_set );
 233
 234     putc( '\n', file );
 235     }
 236
 237
 238 /* epsclosure - construct the epsilon closure of a set of ndfa states
 239  *
 240  * synopsis
 241  *    int t[current_max_dfa_size], numstates, accset[num_rules + 1], nacc;
 242  *    int hashval;
 243  *    int *epsclosure();
 244  *    t = epsclosure( t, &numstates, accset, &nacc, &hashval );
 245  *
 246  * NOTES
 247  *    the epsilon closure is the set of all states reachable by an arbitrary
 248  *  number of epsilon transitions which themselves do not have epsilon
 249  *  transitions going out, unioned with the set of states which have non-null
 250  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
 251  *  Upon return, t holds the epsilon closure and numstates is updated.  accset
 252  *  holds a list of the accepting numbers, and the size of accset is given
 253  *  by nacc.  t may be subjected to reallocation if it is not large enough
 254  *  to hold the epsilon closure.
 255  *
 256  *    hashval is the hash value for the dfa corresponding to the state set
 257  */
 258
 259 int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
 260 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
 261
 262     {
 263     register int stkpos, ns, tsp;
 264     int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
 265     int stkend, nstate;
 266     static int did_stk_init = false, *stk;
 267
 268 #define MARK_STATE(state) \
 269         trans1[state] = trans1[state] - MARKER_DIFFERENCE;
 270
 271 #define IS_MARKED(state) (trans1[state] < 0)
 272
 273 #define UNMARK_STATE(state) \
 274         trans1[state] = trans1[state] + MARKER_DIFFERENCE;
 275
 276 #define CHECK_ACCEPT(state) \
 277         { \
 278         nfaccnum = accptnum[state]; \
 279         if ( nfaccnum != NIL ) \
 280             accset[++nacc] = nfaccnum; \
 281         }
 282
 283 #define DO_REALLOCATION \
 284         { \
 285         current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
 286         ++num_reallocs; \
 287         t = reallocate_integer_array( t, current_max_dfa_size ); \
 288         stk = reallocate_integer_array( stk, current_max_dfa_size ); \
 289         } \
 290
 291 #define PUT_ON_STACK(state) \
 292         { \
 293         if ( ++stkend >= current_max_dfa_size ) \
 294             DO_REALLOCATION \
 295         stk[stkend] = state; \
 296         MARK_STATE(state) \
 297         }
 298
 299 #define ADD_STATE(state) \
 300         { \
 301         if ( ++numstates >= current_max_dfa_size ) \
 302             DO_REALLOCATION \
 303         t[numstates] = state; \
 304         hashval = hashval + state; \
 305         }
 306
 307 #define STACK_STATE(state) \
 308         { \
 309         PUT_ON_STACK(state) \
 310         CHECK_ACCEPT(state) \
 311         if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
 312             ADD_STATE(state) \
 313         }
 314
 315     if ( ! did_stk_init )
 316         {
 317         stk = allocate_integer_array( current_max_dfa_size );
 318         did_stk_init = true;
 319         }
 320
 321     nacc = stkend = hashval = 0;
 322
 323     for ( nstate = 1; nstate <= numstates; ++nstate )
 324         {
 325         ns = t[nstate];
 326
 327         /* the state could be marked if we've already pushed it onto
 328          * the stack
 329          */
 330         if ( ! IS_MARKED(ns) )
 331             PUT_ON_STACK(ns)
 332
 333         CHECK_ACCEPT(ns)
 334         hashval = hashval + ns;
 335         }
 336
 337     for ( stkpos = 1; stkpos <= stkend; ++stkpos )
 338         {
 339         ns = stk[stkpos];
 340         transsym = transchar[ns];
 341
 342         if ( transsym == SYM_EPSILON )
 343             {
 344             tsp = trans1[ns] + MARKER_DIFFERENCE;
 345
 346             if ( tsp != NO_TRANSITION )
 347                 {
 348                 if ( ! IS_MARKED(tsp) )
 349                     STACK_STATE(tsp)
 350
 351                 tsp = trans2[ns];
 352
 353                 if ( tsp != NO_TRANSITION )
 354                     if ( ! IS_MARKED(tsp) )
 355                         STACK_STATE(tsp)
 356                 }
 357             }
 358         }
 359
 360     /* clear out "visit" markers */
 361
 362     for ( stkpos = 1; stkpos <= stkend; ++stkpos )
 363         {
 364         if ( IS_MARKED(stk[stkpos]) )
 365             {
 366             UNMARK_STATE(stk[stkpos])
 367             }
 368         else
 369             flexfatal( "consistency check failed in epsclosure()" );
 370         }
 371
 372     *ns_addr = numstates;
 373     *hv_addr = hashval;
 374     *nacc_addr = nacc;
 375
 376     return ( t );
 377     }
 378
 379
 380 /* increase_max_dfas - increase the maximum number of DFAs */
 381
 382 void increase_max_dfas()
 383
 384     {
 385     current_max_dfas += MAX_DFAS_INCREMENT;
 386
 387     ++num_reallocs;
 388
 389     base = reallocate_integer_array( base, current_max_dfas );
 390     def = reallocate_integer_array( def, current_max_dfas );
 391     dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
 392     accsiz = reallocate_integer_array( accsiz, current_max_dfas );
 393     dhash = reallocate_integer_array( dhash, current_max_dfas );
 394     dss = reallocate_int_ptr_array( dss, current_max_dfas );
 395     dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
 396
 397     if ( nultrans )
 398         nultrans = reallocate_integer_array( nultrans, current_max_dfas );
 399     }
 400
 401
 402 /* ntod - convert an ndfa to a dfa
 403  *
 404  * synopsis
 405  *    ntod();
 406  *
 407  *  creates the dfa corresponding to the ndfa we've constructed.  the
 408  *  dfa starts out in state #1.
 409  */
 410
 411 void ntod()
 412
 413     {
 414     int *accset, ds, nacc, newds;
 415     int sym, hashval, numstates, dsize;
 416     int num_full_table_rows;    /* used only for -f */
 417     int *nset, *dset;
 418     int targptr, totaltrans, i, comstate, comfreq, targ;
 419     int *epsclosure(), snstods(), symlist[CSIZE + 1];
 420     int num_start_states;
 421     int todo_head, todo_next;
 422
 423     /* note that the following are indexed by *equivalence classes*
 424      * and not by characters.  Since equivalence classes are indexed
 425      * beginning with 1, even if the scanner accepts NUL's, this
 426      * means that (since every character is potentially in its own
 427      * equivalence class) these arrays must have room for indices
 428      * from 1 to CSIZE, so their size must be CSIZE + 1.
 429      */
 430     int duplist[CSIZE + 1], state[CSIZE + 1];
 431     int targfreq[CSIZE + 1], targstate[CSIZE + 1];
 432
 433     /* this is so find_table_space(...) will know where to start looking in
 434      * chk/nxt for unused records for space to put in the state
 435      */
 436     if ( fullspd )
 437         firstfree = 0;
 438
 439     accset = allocate_integer_array( num_rules + 1 );
 440     nset = allocate_integer_array( current_max_dfa_size );
 441
 442     /* the "todo" queue is represented by the head, which is the DFA
 443      * state currently being processed, and the "next", which is the
 444      * next DFA state number available (not in use).  We depend on the
 445      * fact that snstods() returns DFA's \in increasing order/, and thus
 446      * need only know the bounds of the dfas to be processed.
 447      */
 448     todo_head = todo_next = 0;
 449
 450     for ( i = 0; i <= csize; ++i )
 451         {
 452         duplist[i] = NIL;
 453         symlist[i] = false;
 454         }
 455
 456     for ( i = 0; i <= num_rules; ++i )
 457         accset[i] = NIL;
 458
 459     if ( trace )
 460         {
 461         dumpnfa( scset[1] );
 462         fputs( "\n\nDFA Dump:\n\n", stderr );
 463         }
 464
 465     inittbl();
 466
 467     /* check to see whether we should build a separate table for transitions
 468      * on NUL characters.  We don't do this for full-speed (-F) scanners,
 469      * since for them we don't have a simple state number lying around with
 470      * which to index the table.  We also don't bother doing it for scanners
 471      * unless (1) NUL is in its own equivalence class (indicated by a
 472      * positive value of ecgroup[NUL]), (2) NUL's equilvalence class is
 473      * the last equivalence class, and (3) the number of equivalence classes
 474      * is the same as the number of characters.  This latter case comes about
 475      * when useecs is false or when its true but every character still
 476      * manages to land in its own class (unlikely, but it's cheap to check
 477      * for).  If all these things are true then the character code needed
 478      * to represent NUL's equivalence class for indexing the tables is
 479      * going to take one more bit than the number of characters, and therefore
 480      * we won't be assured of being able to fit it into a YY_CHAR variable.
 481      * This rules out storing the transitions in a compressed table, since
 482      * the code for interpreting them uses a YY_CHAR variable (perhaps it
 483      * should just use an integer, though; this is worth pondering ... ###).
 484      *
 485      * Finally, for full tables, we want the number of entries in the
 486      * table to be a power of two so the array references go fast (it
 487      * will just take a shift to compute the major index).  If encoding
 488      * NUL's transitions in the table will spoil this, we give it its
 489      * own table (note that this will be the case if we're not using
 490      * equivalence classes).
 491      */
 492
 493     /* note that the test for ecgroup[0] == numecs below accomplishes
 494      * both (1) and (2) above
 495      */
 496     if ( ! fullspd && ecgroup[0] == numecs )
 497         { /* NUL is alone in its equivalence class, which is the last one */
 498         int use_NUL_table = (numecs == csize);
 499
 500         if ( fulltbl && ! use_NUL_table )
 501             { /* we still may want to use the table if numecs is a power of 2 */
 502             int power_of_two;
 503
 504             for ( power_of_two = 1; power_of_two <= csize; power_of_two *= 2 )
 505                 if ( numecs == power_of_two )
 506                     {
 507                     use_NUL_table = true;
 508                     break;
 509                     }
 510             }
 511
 512         if ( use_NUL_table )
 513             nultrans = allocate_integer_array( current_max_dfas );
 514             /* from now on, nultrans != nil indicates that we're
 515              * saving null transitions for later, separate encoding
 516              */
 517         }
 518
 519
 520     if ( fullspd )
 521         {
 522         for ( i = 0; i <= numecs; ++i )
 523             state[i] = 0;
 524         place_state( state, 0, 0 );
 525         }
 526
 527     else if ( fulltbl )
 528         {
 529         if ( nultrans )
 530             /* we won't be including NUL's transitions in the table,
 531              * so build it for entries from 0 .. numecs - 1
 532              */
 533             num_full_table_rows = numecs;
 534
 535         else
 536             /* take into account the fact that we'll be including
 537              * the NUL entries in the transition table.  Build it
 538              * from 0 .. numecs.
 539              */
 540             num_full_table_rows = numecs + 1;
 541
 542         /* declare it "short" because it's a real long-shot that that
 543          * won't be large enough.
 544          */
 545         printf( "static short int yy_nxt[][%d] =\n    {\n",
 546                 /* '}' so vi doesn't get too confused */
 547                 num_full_table_rows );
 548
 549         /* generate 0 entries for state #0 */
 550         for ( i = 0; i < num_full_table_rows; ++i )
 551             mk2data( 0 );
 552
 553         /* force ',' and dataflush() next call to mk2data */
 554         datapos = NUMDATAITEMS;
 555
 556         /* force extra blank line next dataflush() */
 557         dataline = NUMDATALINES;
 558         }
 559
 560     /* create the first states */
 561
 562     num_start_states = lastsc * 2;
 563
 564     for ( i = 1; i <= num_start_states; ++i )
 565         {
 566         numstates = 1;
 567
 568         /* for each start condition, make one state for the case when
 569          * we're at the beginning of the line (the '%' operator) and
 570          * one for the case when we're not
 571          */
 572         if ( i % 2 == 1 )
 573             nset[numstates] = scset[(i / 2) + 1];
 574         else
 575             nset[numstates] = mkbranch( scbol[i / 2], scset[i / 2] );
 576
 577         nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
 578
 579         if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
 580             {
 581             numas += nacc;
 582             totnst += numstates;
 583             ++todo_next;
 584
 585             if ( variable_trailing_context_rules && nacc > 0 )
 586                 check_trailing_context( nset, numstates, accset, nacc );
 587             }
 588         }
 589
 590     if ( ! fullspd )
 591         {
 592         if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
 593             flexfatal( "could not create unique end-of-buffer state" );
 594
 595         ++numas;
 596         ++num_start_states;
 597         ++todo_next;
 598         }
 599
 600     while ( todo_head < todo_next )
 601         {
 602         targptr = 0;
 603         totaltrans = 0;
 604
 605         for ( i = 1; i <= numecs; ++i )
 606             state[i] = 0;
 607
 608         ds = ++todo_head;
 609
 610         dset = dss[ds];
 611         dsize = dfasiz[ds];
 612
 613         if ( trace )
 614             fprintf( stderr, "state # %d:\n", ds );
 615
 616         sympartition( dset, dsize, symlist, duplist );
 617
 618         for ( sym = 1; sym <= numecs; ++sym )
 619             {
 620             if ( symlist[sym] )
 621                 {
 622                 symlist[sym] = 0;
 623
 624                 if ( duplist[sym] == NIL )
 625                     { /* symbol has unique out-transitions */
 626                     numstates = symfollowset( dset, dsize, sym, nset );
 627                     nset = epsclosure( nset, &numstates, accset,
 628                                        &nacc, &hashval );
 629
 630                     if ( snstods( nset, numstates, accset,
 631                                   nacc, hashval, &newds ) )
 632                         {
 633                         totnst = totnst + numstates;
 634                         ++todo_next;
 635                         numas += nacc;
 636
 637                         if ( variable_trailing_context_rules && nacc > 0 )
 638                             check_trailing_context( nset, numstates,
 639                                 accset, nacc );
 640                         }
 641
 642                     state[sym] = newds;
 643
 644                     if ( trace )
 645                         fprintf( stderr, "\t%d\t%d\n", sym, newds );
 646
 647                     targfreq[++targptr] = 1;
 648                     targstate[targptr] = newds;
 649                     ++numuniq;
 650                     }
 651
 652                 else
 653                     {
 654                     /* sym's equivalence class has the same transitions
 655                      * as duplist(sym)'s equivalence class
 656                      */
 657                     targ = state[duplist[sym]];
 658                     state[sym] = targ;
 659
 660                     if ( trace )
 661                         fprintf( stderr, "\t%d\t%d\n", sym, targ );
 662
 663                     /* update frequency count for destination state */
 664
 665                     i = 0;
 666                     while ( targstate[++i] != targ )
 667                         ;
 668
 669                     ++targfreq[i];
 670                     ++numdup;
 671                     }
 672
 673                 ++totaltrans;
 674                 duplist[sym] = NIL;
 675                 }
 676             }
 677
 678         numsnpairs = numsnpairs + totaltrans;
 679
 680         if ( caseins && ! useecs )
 681             {
 682             register int j;
 683
 684             for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
 685                 state[i] = state[j];
 686             }
 687
 688         if ( ds > num_start_states )
 689             check_for_backtracking( ds, state );
 690
 691         if ( nultrans )
 692             {
 693             nultrans[ds] = state[NUL_ec];
 694             state[NUL_ec] = 0;  /* remove transition */
 695             }
 696
 697         if ( fulltbl )
 698             {
 699             /* supply array's 0-element */
 700             if ( ds == end_of_buffer_state )
 701                 mk2data( -end_of_buffer_state );
 702             else
 703                 mk2data( end_of_buffer_state );
 704
 705             for ( i = 1; i < num_full_table_rows; ++i )
 706                 /* jams are marked by negative of state number */
 707                 mk2data( state[i] ? state[i] : -ds );
 708
 709             /* force ',' and dataflush() next call to mk2data */
 710             datapos = NUMDATAITEMS;
 711
 712             /* force extra blank line next dataflush() */
 713             dataline = NUMDATALINES;
 714             }
 715
 716         else if ( fullspd )
 717             place_state( state, ds, totaltrans );
 718
 719         else if ( ds == end_of_buffer_state )
 720             /* special case this state to make sure it does what it's
 721              * supposed to, i.e., jam on end-of-buffer
 722              */
 723             stack1( ds, 0, 0, JAMSTATE );
 724
 725         else /* normal, compressed state */
 726             {
 727             /* determine which destination state is the most common, and
 728              * how many transitions to it there are
 729              */
 730
 731             comfreq = 0;
 732             comstate = 0;
 733
 734             for ( i = 1; i <= targptr; ++i )
 735                 if ( targfreq[i] > comfreq )
 736                     {
 737                     comfreq = targfreq[i];
 738                     comstate = targstate[i];
 739                     }
 740
 741             bldtbl( state, ds, totaltrans, comstate, comfreq );
 742             }
 743         }
 744
 745     if ( fulltbl )
 746         dataend();
 747
 748     else if ( ! fullspd )
 749         {
 750         cmptmps();  /* create compressed template entries */
 751
 752         /* create tables for all the states with only one out-transition */
 753         while ( onesp > 0 )
 754             {
 755             mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
 756                     onedef[onesp] );
 757             --onesp;
 758             }
 759
 760         mkdeftbl();
 761         }
 762     }
 763
 764
 765 /* snstods - converts a set of ndfa states into a dfa state
 766  *
 767  * synopsis
 768  *    int sns[numstates], numstates, newds, accset[num_rules + 1], nacc, hashval;
 769  *    int snstods();
 770  *    is_new_state = snstods( sns, numstates, accset, nacc, hashval, &newds );
 771  *
 772  * on return, the dfa state number is in newds.
 773  */
 774
 775 int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
 776 int sns[], numstates, accset[], nacc, hashval, *newds_addr;
 777
 778     {
 779     int didsort = 0;
 780     register int i, j;
 781     int newds, *oldsns;
 782
 783     for ( i = 1; i <= lastdfa; ++i )
 784         if ( hashval == dhash[i] )
 785             {
 786             if ( numstates == dfasiz[i] )
 787                 {
 788                 oldsns = dss[i];
 789
 790                 if ( ! didsort )
 791                     {
 792                     /* we sort the states in sns so we can compare it to
 793                      * oldsns quickly.  we use bubble because there probably
 794                      * aren't very many states
 795                      */
 796                     bubble( sns, numstates );
 797                     didsort = 1;
 798                     }
 799
 800                 for ( j = 1; j <= numstates; ++j )
 801                     if ( sns[j] != oldsns[j] )
 802                         break;
 803
 804                 if ( j > numstates )
 805                     {
 806                     ++dfaeql;
 807                     *newds_addr = i;
 808                     return ( 0 );
 809                     }
 810
 811                 ++hshcol;
 812                 }
 813
 814             else
 815                 ++hshsave;
 816             }
 817
 818     /* make a new dfa */
 819
 820     if ( ++lastdfa >= current_max_dfas )
 821         increase_max_dfas();
 822
 823     newds = lastdfa;
 824
 825     dss[newds] = (int *) malloc( (unsigned) ((numstates + 1) * sizeof( int )) );
 826
 827     if ( ! dss[newds] )
 828         flexfatal( "dynamic memory failure in snstods()" );
 829
 830     /* if we haven't already sorted the states in sns, we do so now, so that
 831      * future comparisons with it can be made quickly
 832      */
 833
 834     if ( ! didsort )
 835         bubble( sns, numstates );
 836
 837     for ( i = 1; i <= numstates; ++i )
 838         dss[newds][i] = sns[i];
 839
 840     dfasiz[newds] = numstates;
 841     dhash[newds] = hashval;
 842
 843     if ( nacc == 0 )
 844         {
 845         if ( reject )
 846             dfaacc[newds].dfaacc_set = (int *) 0;
 847         else
 848             dfaacc[newds].dfaacc_state = 0;
 849
 850         accsiz[newds] = 0;
 851         }
 852
 853     else if ( reject )
 854         {
 855         /* we sort the accepting set in increasing order so the disambiguating
 856          * rule that the first rule listed is considered match in the event of
 857          * ties will work.  We use a bubble sort since the list is probably
 858          * quite small.
 859          */
 860
 861         bubble( accset, nacc );
 862
 863         dfaacc[newds].dfaacc_set =
 864             (int *) malloc( (unsigned) ((nacc + 1) * sizeof( int )) );
 865
 866         if ( ! dfaacc[newds].dfaacc_set )
 867             flexfatal( "dynamic memory failure in snstods()" );
 868
 869         /* save the accepting set for later */
 870         for ( i = 1; i <= nacc; ++i )
 871             dfaacc[newds].dfaacc_set[i] = accset[i];
 872
 873         accsiz[newds] = nacc;
 874         }
 875
 876     else
 877         { /* find lowest numbered rule so the disambiguating rule will work */
 878         j = num_rules + 1;
 879
 880         for ( i = 1; i <= nacc; ++i )
 881             if ( accset[i] < j )
 882                 j = accset[i];
 883
 884         dfaacc[newds].dfaacc_state = j;
 885         }
 886
 887     *newds_addr = newds;
 888
 889     return ( 1 );
 890     }
 891
 892
 893 /* symfollowset - follow the symbol transitions one step
 894  *
 895  * synopsis
 896  *    int ds[current_max_dfa_size], dsize, transsym;
 897  *    int nset[current_max_dfa_size], numstates;
 898  *    numstates = symfollowset( ds, dsize, transsym, nset );
 899  */
 900
 901 int symfollowset( ds, dsize, transsym, nset )
 902 int ds[], dsize, transsym, nset[];
 903
 904     {
 905     int ns, tsp, sym, i, j, lenccl, ch, numstates;
 906     int ccllist;
 907
 908     numstates = 0;
 909
 910     for ( i = 1; i <= dsize; ++i )
 911         { /* for each nfa state ns in the state set of ds */
 912         ns = ds[i];
 913         sym = transchar[ns];
 914         tsp = trans1[ns];
 915
 916         if ( sym < 0 )
 917             { /* it's a character class */
 918             sym = -sym;
 919             ccllist = cclmap[sym];
 920             lenccl = ccllen[sym];
 921
 922             if ( cclng[sym] )
 923                 {
 924                 for ( j = 0; j < lenccl; ++j )
 925                     { /* loop through negated character class */
 926                     ch = ccltbl[ccllist + j];
 927
 928                     if ( ch == 0 )
 929                         ch = NUL_ec;
 930
 931                     if ( ch > transsym )
 932                         break;  /* transsym isn't in negated ccl */
 933
 934                     else if ( ch == transsym )
 935                         /* next 2 */ goto bottom;
 936                     }
 937
 938                 /* didn't find transsym in ccl */
 939                 nset[++numstates] = tsp;
 940                 }
 941
 942             else
 943                 for ( j = 0; j < lenccl; ++j )
 944                     {
 945                     ch = ccltbl[ccllist + j];
 946
 947                     if ( ch == 0 )
 948                         ch = NUL_ec;
 949
 950                     if ( ch > transsym )
 951                         break;
 952
 953                     else if ( ch == transsym )
 954                         {
 955                         nset[++numstates] = tsp;
 956                         break;
 957                         }
 958                     }
 959             }
 960
 961         else if ( sym >= 'A' && sym <= 'Z' && caseins )
 962             flexfatal( "consistency check failed in symfollowset" );
 963
 964         else if ( sym == SYM_EPSILON )
 965             { /* do nothing */
 966             }
 967
 968         else if ( abs( ecgroup[sym] ) == transsym )
 969             nset[++numstates] = tsp;
 970
 971 bottom:
 972         ;
 973         }
 974
 975     return ( numstates );
 976     }
 977
 978
 979 /* sympartition - partition characters with same out-transitions
 980  *
 981  * synopsis
 982  *    integer ds[current_max_dfa_size], numstates, duplist[numecs];
 983  *    symlist[numecs];
 984  *    sympartition( ds, numstates, symlist, duplist );
 985  */
 986
 987 void sympartition( ds, numstates, symlist, duplist )
 988 int ds[], numstates, duplist[];
 989 int symlist[];
 990
 991     {
 992     int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
 993
 994     /* partitioning is done by creating equivalence classes for those
 995      * characters which have out-transitions from the given state.  Thus
 996      * we are really creating equivalence classes of equivalence classes.
 997      */
 998
 999     for ( i = 1; i <= numecs; ++i )
1000         { /* initialize equivalence class list */
1001         duplist[i] = i - 1;
1002         dupfwd[i] = i + 1;
1003         }
1004
1005     duplist[1] = NIL;
1006     dupfwd[numecs] = NIL;
1007
1008     for ( i = 1; i <= numstates; ++i )
1009         {
1010         ns = ds[i];
1011         tch = transchar[ns];
1012
1013         if ( tch != SYM_EPSILON )
1014             {
1015             if ( tch < -lastccl || tch > csize )
1016                 {
1017                 if ( tch > csize && tch <= CSIZE )
1018                     flexerror( "scanner requires -8 flag" );
1019
1020                 else
1021                     flexfatal(
1022                         "bad transition character detected in sympartition()" );
1023                 }
1024
1025             if ( tch >= 0 )
1026                 { /* character transition */
1027                 /* abs() needed for fake %t ec's */
1028                 int ec = abs( ecgroup[tch] );
1029
1030                 mkechar( ec, dupfwd, duplist );
1031                 symlist[ec] = 1;
1032                 }
1033
1034             else
1035                 { /* character class */
1036                 tch = -tch;
1037
1038                 lenccl = ccllen[tch];
1039                 cclp = cclmap[tch];
1040                 mkeccl( ccltbl + cclp, lenccl, dupfwd, duplist, numecs,
1041                         NUL_ec );
1042
1043                 if ( cclng[tch] )
1044                     {
1045                     j = 0;
1046
1047                     for ( k = 0; k < lenccl; ++k )
1048                         {
1049                         ich = ccltbl[cclp + k];
1050
1051                         if ( ich == 0 )
1052                             ich = NUL_ec;
1053
1054                         for ( ++j; j < ich; ++j )
1055                             symlist[j] = 1;
1056                         }
1057
1058                     for ( ++j; j <= numecs; ++j )
1059                         symlist[j] = 1;
1060                     }
1061
1062                 else
1063                     for ( k = 0; k < lenccl; ++k )
1064                         {
1065                         ich = ccltbl[cclp + k];
1066
1067                         if ( ich == 0 )
1068                             ich = NUL_ec;
1069
1070                         symlist[ich] = 1;
1071                         }
1072                 }
1073             }
1074         }
1075     }