pd.if.org Git - pccts/blob - h/AParser.cpp

   1 /* ANTLRParser.C
   2  *
   3  * SOFTWARE RIGHTS
   4  *
   5  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
   6  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
   7  * company may do whatever they wish with source code distributed with
   8  * PCCTS or the code generated by PCCTS, including the incorporation of
   9  * PCCTS, or its output, into commerical software.
  10  *
  11  * We encourage users to develop software with PCCTS.  However, we do ask
  12  * that credit is given to us for developing PCCTS.  By "credit",
  13  * we mean that if you incorporate our source code into one of your
  14  * programs (commercial product, research project, or otherwise) that you
  15  * acknowledge this fact somewhere in the documentation, research report,
  16  * etc...  If you like PCCTS and have developed a nice tool with the
  17  * output, please mention that you developed it using PCCTS.  In
  18  * addition, we ask that this header remain intact in our source code.
  19  * As long as these guidelines are kept, we expect to continue enhancing
  20  * this system and expect to make other tools available as they are
  21  * completed.
  22  *
  23  * ANTLR 1.33
  24  * Terence Parr
  25  * Parr Research Corporation
  26  * with Purdue University and AHPCRC, University of Minnesota
  27  * 1989-1995
  28  */
  29 #include <stdlib.h>
  30 #include <stdarg.h>
  31 #include <string.h>
  32 #include <stdio.h>
  33
  34 /* I have to put this here due to C++ limitation
  35  * that you can't have a 'forward' decl for enums.
  36  * I hate C++!!!!!!!!!!!!!!!
  37  * Of course, if I could use real templates, this would go away.
  38  */
  39 enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED };
  40
  41 #define ANTLR_SUPPORT_CODE
  42
  43 #include "config.h"
  44 #include ATOKEN_H
  45
  46 #include ATOKENBUFFER_H
  47 #include APARSER_H
  48
  49 static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;
  50 static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;
  51
  52                  /* L o o k a h e a d  M a c r o s */
  53
  54 /* maximum of 32 bits/unsigned int and must be 8 bits/byte;
  55  * we only use 8 bits of it.
  56  */
  57 SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
  58         0x00000001, 0x00000002, 0x00000004, 0x00000008,
  59         0x00000010, 0x00000020, 0x00000040, 0x00000080
  60 };
  61
  62 char ANTLRParser::eMsgBuffer[500] = "";
  63
  64 ANTLRParser::
  65 ~ANTLRParser()
  66 {
  67         delete [] token_type;
  68 }
  69
  70 ANTLRParser::
  71 ANTLRParser(ANTLRTokenBuffer *_inputTokens,
  72                         int k,
  73                         int use_inf_look,
  74                         int dlook,
  75                         int ssize)
  76 {
  77         LLk = k;
  78         can_use_inf_look = use_inf_look;
  79         demand_look = dlook;
  80         bsetsize = ssize;
  81
  82         guessing = 0;
  83         token_tbl = NULL;
  84         eofToken = (ANTLRTokenType)1;
  85
  86         // allocate lookahead buffer
  87         token_type = new ANTLRTokenType[LLk];
  88         lap = 0;
  89         labase = 0;
  90         dirty = 0;
  91
  92         /* prime lookahead buffer, point to inputTokens */
  93         this->inputTokens = _inputTokens;
  94         this->inputTokens->setMinTokens(k);
  95 }
  96
  97 void ANTLRParser::init()
  98 {
  99    prime_lookahead();
 100 }
 101
 102 int ANTLRParser::
 103 guess(ANTLRParserState *st)
 104 {
 105         saveState(st);
 106         guessing = 1;
 107         return setjmp(guess_start.state);
 108 }
 109
 110 void ANTLRParser::
 111 saveState(ANTLRParserState *buf)
 112 {
 113         buf->guess_start = guess_start;
 114         buf->guessing = guessing;
 115         buf->inf_labase = inf_labase;
 116         buf->inf_last = inf_last;
 117         buf->dirty = dirty;
 118 }
 119
 120 void ANTLRParser::
 121 restoreState(ANTLRParserState *buf)
 122 {
 123         int i;
 124
 125         guess_start = buf->guess_start;
 126         guessing = buf->guessing;
 127         inf_labase = buf->inf_labase;
 128         inf_last = buf->inf_last;
 129         dirty = buf->dirty;
 130
 131         // restore lookahead buffer from k tokens before restored TokenBuffer position
 132         // if demand_look, then I guess we don't look backwards for these tokens.
 133         for (i=1; i<=LLk; i++) token_type[i-1] =
 134                 inputTokens->bufferedToken(i-LLk)->getType();
 135         lap = 0;
 136         labase = 0;
 137 }
 138
 139 /* Get the next symbol from the input stream; put it into lookahead buffer;
 140  * fill token_type[] fast reference cache also.  NLA is the next place where
 141  * a lookahead ANTLRAbstractToken should go.
 142  */
 143 void ANTLRParser::
 144 consume()
 145 {
 146     NLA = inputTokens->getToken()->getType();
 147         dirty--;
 148         lap = (lap+1)&(LLk-1);
 149 }
 150
 151 _ANTLRTokenPtr ANTLRParser::
 152 LT(int i)
 153 {
 154 #ifdef DEBUG_TOKENBUFFER
 155         if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk )
 156         {
 157                 static char buf[2000];
 158                 sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
 159                 panic(buf);
 160         }
 161 #endif
 162         return inputTokens->bufferedToken(i-LLk);
 163 }
 164
 165 void
 166 ANTLRParser::
 167 look(int k)
 168 {
 169         int i, c = k - (LLk-dirty);
 170         for (i=1; i<=c; i++) consume();
 171 }
 172
 173 /* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
 174  */
 175 void
 176 ANTLRParser::
 177 prime_lookahead()
 178 {
 179         int i;
 180         for(i=1;i<=LLk; i++) consume();
 181         dirty=0;
 182         lap = 0;
 183         labase = 0;
 184 }
 185
 186 /* check to see if the current input symbol matches '_t'.
 187  * During NON demand lookahead mode, dirty will always be 0 and
 188  * hence the extra code for consuming tokens in _match is never
 189  * executed; the same routine can be used for both modes.
 190  */
 191 int ANTLRParser::
 192 _match(ANTLRTokenType _t, ANTLRChar **MissText,
 193            ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
 194            SetWordType **MissSet)
 195 {
 196         if ( dirty==LLk ) {
 197                 consume();
 198         }
 199         if ( LA(1)!=_t ) {
 200                 *MissText=NULL;
 201                 *MissTok= _t; *BadTok = LT(1);
 202                 *MissSet=NULL;
 203                 return 0;
 204         }
 205         dirty++;
 206         labase = (labase+1)&(LLk-1);    // labase maintained even if !demand look
 207         return 1;
 208 }
 209
 210 /* check to see if the current input symbol matches '_t'.
 211  * Used during exception handling.
 212  */
 213 int ANTLRParser::
 214 _match_wsig(ANTLRTokenType _t)
 215 {
 216         if ( dirty==LLk ) {
 217                 consume();
 218         }
 219         if ( LA(1)!=_t ) return 0;
 220         dirty++;
 221         labase = (labase+1)&(LLk-1);    // labase maintained even if !demand look
 222         return 1;
 223 }
 224
 225 /* check to see if the current input symbol matches any token in a set.
 226  * During NON demand lookahead mode, dirty will always be 0 and
 227  * hence the extra code for consuming tokens in _match is never
 228  * executed; the same routine can be used for both modes.
 229  */
 230 int ANTLRParser::
 231 _setmatch(SetWordType *tset, ANTLRChar **MissText,
 232            ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
 233            SetWordType **MissSet)
 234 {
 235         if ( dirty==LLk ) {
 236                 consume();
 237         }
 238         if ( !set_el(LA(1), tset) ) {
 239                 *MissText=NULL;
 240                 *MissTok= (ANTLRTokenType)0; *BadTok=LT(1);
 241                 *MissSet=tset;
 242                 return 0;
 243         }
 244         dirty++;
 245         labase = (labase+1)&(LLk-1);    // labase maintained even if !demand look
 246         return 1;
 247 }
 248
 249 int ANTLRParser::
 250 _setmatch_wsig(SetWordType *tset)
 251 {
 252         if ( dirty==LLk ) {
 253                 consume();
 254         }
 255         if ( !set_el(LA(1), tset) ) return 0;
 256         dirty++;
 257         labase = (labase+1)&(LLk-1);    // labase maintained even if !demand look
 258         return 1;
 259 }
 260
 261                    /* Exception handling routines */
 262
 263 void ANTLRParser::
 264 consumeUntil(SetWordType *st)
 265 {
 266         while ( !set_el(LA(1), st) ) { consume(); }
 267 }
 268
 269 void ANTLRParser::
 270 consumeUntilToken(int t)
 271 {
 272         while ( LA(1)!=t ) { consume(); }
 273 }
 274
 275
 276                         /* Old error stuff */
 277
 278 void ANTLRParser::
 279 resynch(SetWordType *wd,SetWordType mask)
 280 {
 281         static int consumed = 1;
 282
 283         /* if you enter here without having consumed a token from last resynch
 284          * force a token consumption.
 285          */
 286         if ( !consumed ) {consume(); consumed=1; return;}
 287
 288         /* if current token is in resynch set, we've got what we wanted */
 289         if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;}
 290
 291         /* scan until we find something in the resynch set */
 292         while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
 293         consumed=1;
 294 }
 295
 296 /* standard error reporting function that assumes DLG-based scanners;
 297  * you should redefine in subclass to change it or if you use your
 298  * own scanner.
 299  */
 300 void ANTLRParser::
 301 syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset,
 302         ANTLRTokenType etok, int k)
 303 {
 304         int line;
 305
 306         line = LT(1)->getLine();
 307
 308         fprintf(stderr, "line %d: syntax error at \"%s\"",
 309                                         line, LT(1)->getText());
 310         if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
 311         if ( k==1 ) fprintf(stderr, " missing");
 312         else
 313         {
 314                 fprintf(stderr, "; \"%s\" not", LT(1)->getText());
 315                 if ( set_deg(eset)>1 ) fprintf(stderr, " in");
 316         }
 317         if ( set_deg(eset)>0 ) edecode(eset);
 318         else fprintf(stderr, " %s", token_tbl[etok]);
 319         if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
 320         fprintf(stderr, "\n");
 321 }
 322
 323 /* is b an element of set p? */
 324 int ANTLRParser::
 325 set_el(ANTLRTokenType b, SetWordType *p)
 326 {
 327         return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
 328 }
 329
 330 int ANTLRParser::
 331 set_deg(SetWordType *a)
 332 {
 333         /* Fast compute degree of a set... the number
 334            of elements present in the set.  Assumes
 335            that all word bits are used in the set
 336         */
 337         register SetWordType *p = a;
 338         register SetWordType *endp = &(a[bsetsize]);
 339         register int degree = 0;
 340
 341         if ( a == NULL ) return 0;
 342         while ( p < endp )
 343         {
 344                 register SetWordType t = *p;
 345                 register SetWordType *b = &(bitmask[0]);
 346                 do {
 347                         if (t & *b) ++degree;
 348                 } while (++b < &(bitmask[sizeof(SetWordType)*8]));
 349                 p++;
 350         }
 351
 352         return(degree);
 353 }
 354
 355 void ANTLRParser::
 356 edecode(SetWordType *a)
 357 {
 358         register SetWordType *p = a;
 359         register SetWordType *endp = &(p[bsetsize]);
 360         register unsigned e = 0;
 361
 362         if ( set_deg(a)>1 ) fprintf(stderr, " {");
 363         do {
 364                 register SetWordType t = *p;
 365                 register SetWordType *b = &(bitmask[0]);
 366                 do {
 367                         if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]);
 368                         e++;
 369                 } while (++b < &(bitmask[sizeof(SetWordType)*8]));
 370         } while (++p < endp);
 371         if ( set_deg(a)>1 ) fprintf(stderr, " }");
 372 }
 373
 374 /* input looks like:
 375  *      zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
 376  * where the zzMiss stuff is set here to the token that did not match
 377  * (and which set wasn't it a member of).
 378  */
 379 void
 380 ANTLRParser::FAIL(int k, ...)
 381 {
 382     static char text[1000];     // dangerous, but I don't care right now
 383     static SetWordType *f[20];
 384     SetWordType **miss_set;
 385     ANTLRChar **miss_text;
 386     _ANTLRTokenPtr *bad_tok;
 387     ANTLRChar **bad_text;
 388     unsigned *err_k;
 389     int i;
 390     va_list ap;
 391
 392     va_start(ap, k);
 393
 394     text[0] = '\0';
 395         if ( k>20 ) panic("FAIL: overflowed buffer");
 396     for (i=1; i<=k; i++)    /* collect all lookahead sets */
 397     {
 398         f[i-1] = va_arg(ap, SetWordType *);
 399     }
 400     for (i=1; i<=k; i++)    /* look for offending token */
 401     {
 402         if ( i>1 ) strcat(text, " ");
 403         strcat(text, LT(i)->getText());
 404         if ( !set_el(LA(i), f[i-1]) ) break;
 405     }
 406     miss_set = va_arg(ap, SetWordType **);
 407     miss_text = va_arg(ap, ANTLRChar **);
 408     bad_tok = va_arg(ap, _ANTLRTokenPtr *);
 409     bad_text = va_arg(ap, ANTLRChar **);
 410     err_k = va_arg(ap, unsigned *);
 411     if ( i>k )
 412     {
 413         /* bad; lookahead is permutation that cannot be matched,
 414          * but, the ith token of lookahead is valid at the ith position
 415          * (The old LL sub 1 (k) versus LL(k) parsing technique)
 416          */
 417         *miss_set = NULL;
 418         *miss_text = LT(1)->getText();
 419         *bad_tok = LT(1);
 420         *bad_text = (*bad_tok)->getText();
 421         *err_k = k;
 422         return;
 423     }
 424 /*  fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
 425     *miss_set = f[i-1];
 426     *miss_text = text;
 427     *bad_tok = LT(i);
 428     *bad_text = (*bad_tok)->getText();
 429     if ( i==1 ) *err_k = 1;
 430     else *err_k = k;
 431 }
 432
 433 int ANTLRParser::
 434 _match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows)
 435 {
 436         if ( dirty==LLk ) consume();
 437
 438         if ( LA(1)!=tokenWanted )
 439         {
 440                 fprintf(stderr,
 441                                 "line %d: syntax error at \"%s\" missing %s\n",
 442                                 LT(1)->getLine(),
 443                                 (LA(1)==eofToken)?"<eof>":LT(1)->getText(),
 444                                 token_tbl[tokenWanted]);
 445                 consumeUntil( whatFollows );
 446                 return 0;
 447         }
 448         else {
 449                 dirty++;
 450                 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
 451 /*              if ( !demand_look ) consume(); */
 452                 return 1;
 453         }
 454 }
 455
 456
 457 int ANTLRParser::
 458 _setmatch_wdfltsig(SetWordType *tokensWanted,
 459                                         ANTLRTokenType tokenTypeOfSet,
 460                                         SetWordType *whatFollows)
 461 {
 462         if ( dirty==LLk ) consume();
 463         if ( !set_el(LA(1), tokensWanted) )
 464         {
 465                 fprintf(stderr,
 466                                 "line %d: syntax error at \"%s\" missing %s\n",
 467                                 LT(1)->getLine(),
 468                                 (LA(1)==eofToken)?"<eof>":LT(1)->getText(),
 469                                 token_tbl[tokenTypeOfSet]);
 470                 consumeUntil( whatFollows );
 471                 return 0;
 472         }
 473         else {
 474                 dirty++;
 475                 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
 476 /*              if ( !demand_look ) consume(); */
 477                 return 1;
 478         }
 479 }
 480
 481 char *ANTLRParser::
 482 eMsgd(char *err,int d)
 483 {
 484         sprintf(eMsgBuffer, err, d);    // dangerous, but I don't care
 485         return eMsgBuffer;
 486 }
 487
 488 char *ANTLRParser::
 489 eMsg(char *err, char *s)
 490 {
 491         sprintf(eMsgBuffer, err, s);
 492         return eMsgBuffer;
 493 }
 494
 495 char *ANTLRParser::
 496 eMsg2(char *err,char *s, char *t)
 497 {
 498         sprintf(eMsgBuffer, err, s, t);
 499         return eMsgBuffer;
 500 }
 501
 502 void ANTLRParser::
 503 panic(char *msg)
 504 {
 505         fprintf(stderr, "ANTLR panic: %s\n", msg);
 506         exit(EXIT_FAILURE);
 507 }