5 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
6 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
7 * company may do whatever they wish with source code distributed with
8 * PCCTS or the code generated by PCCTS, including the incorporation of
9 * PCCTS, or its output, into commerical software.
11 * We encourage users to develop software with PCCTS. However, we do ask
12 * that credit is given to us for developing PCCTS. By "credit",
13 * we mean that if you incorporate our source code into one of your
14 * programs (commercial product, research project, or otherwise) that you
15 * acknowledge this fact somewhere in the documentation, research report,
16 * etc... If you like PCCTS and have developed a nice tool with the
17 * output, please mention that you developed it using PCCTS. In
18 * addition, we ask that this header remain intact in our source code.
19 * As long as these guidelines are kept, we expect to continue enhancing
20 * this system and expect to make other tools available as they are
25 * Parr Research Corporation
26 * with Purdue University and AHPCRC, University of Minnesota
34 /* I have to put this here due to C++ limitation
35 * that you can't have a 'forward' decl for enums.
36 * I hate C++!!!!!!!!!!!!!!!
37 * Of course, if I could use real templates, this would go away.
39 enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED };
41 #define ANTLR_SUPPORT_CODE
46 #include ATOKENBUFFER_H
49 static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;
50 static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;
52 /* L o o k a h e a d M a c r o s */
54 /* maximum of 32 bits/unsigned int and must be 8 bits/byte;
55 * we only use 8 bits of it.
57 SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
58 0x00000001, 0x00000002, 0x00000004, 0x00000008,
59 0x00000010, 0x00000020, 0x00000040, 0x00000080
62 char ANTLRParser::eMsgBuffer[500] = "";
71 ANTLRParser(ANTLRTokenBuffer *_inputTokens,
78 can_use_inf_look = use_inf_look;
84 eofToken = (ANTLRTokenType)1;
86 // allocate lookahead buffer
87 token_type = new ANTLRTokenType[LLk];
92 /* prime lookahead buffer, point to inputTokens */
93 this->inputTokens = _inputTokens;
94 this->inputTokens->setMinTokens(k);
97 void ANTLRParser::init()
103 guess(ANTLRParserState *st)
107 return setjmp(guess_start.state);
111 saveState(ANTLRParserState *buf)
113 buf->guess_start = guess_start;
114 buf->guessing = guessing;
115 buf->inf_labase = inf_labase;
116 buf->inf_last = inf_last;
121 restoreState(ANTLRParserState *buf)
125 guess_start = buf->guess_start;
126 guessing = buf->guessing;
127 inf_labase = buf->inf_labase;
128 inf_last = buf->inf_last;
131 // restore lookahead buffer from k tokens before restored TokenBuffer position
132 // if demand_look, then I guess we don't look backwards for these tokens.
133 for (i=1; i<=LLk; i++) token_type[i-1] =
134 inputTokens->bufferedToken(i-LLk)->getType();
139 /* Get the next symbol from the input stream; put it into lookahead buffer;
140 * fill token_type[] fast reference cache also. NLA is the next place where
141 * a lookahead ANTLRAbstractToken should go.
146 NLA = inputTokens->getToken()->getType();
148 lap = (lap+1)&(LLk-1);
151 _ANTLRTokenPtr ANTLRParser::
154 #ifdef DEBUG_TOKENBUFFER
155 if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk )
157 static char buf[2000];
158 sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
162 return inputTokens->bufferedToken(i-LLk);
169 int i, c = k - (LLk-dirty);
170 for (i=1; i<=c; i++) consume();
173 /* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
180 for(i=1;i<=LLk; i++) consume();
186 /* check to see if the current input symbol matches '_t'.
187 * During NON demand lookahead mode, dirty will always be 0 and
188 * hence the extra code for consuming tokens in _match is never
189 * executed; the same routine can be used for both modes.
192 _match(ANTLRTokenType _t, ANTLRChar **MissText,
193 ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
194 SetWordType **MissSet)
201 *MissTok= _t; *BadTok = LT(1);
206 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
210 /* check to see if the current input symbol matches '_t'.
211 * Used during exception handling.
214 _match_wsig(ANTLRTokenType _t)
219 if ( LA(1)!=_t ) return 0;
221 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
225 /* check to see if the current input symbol matches any token in a set.
226 * During NON demand lookahead mode, dirty will always be 0 and
227 * hence the extra code for consuming tokens in _match is never
228 * executed; the same routine can be used for both modes.
231 _setmatch(SetWordType *tset, ANTLRChar **MissText,
232 ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
233 SetWordType **MissSet)
238 if ( !set_el(LA(1), tset) ) {
240 *MissTok= (ANTLRTokenType)0; *BadTok=LT(1);
245 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
250 _setmatch_wsig(SetWordType *tset)
255 if ( !set_el(LA(1), tset) ) return 0;
257 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
261 /* Exception handling routines */
264 consumeUntil(SetWordType *st)
266 while ( !set_el(LA(1), st) ) { consume(); }
270 consumeUntilToken(int t)
272 while ( LA(1)!=t ) { consume(); }
276 /* Old error stuff */
279 resynch(SetWordType *wd,SetWordType mask)
281 static int consumed = 1;
283 /* if you enter here without having consumed a token from last resynch
284 * force a token consumption.
286 if ( !consumed ) {consume(); consumed=1; return;}
288 /* if current token is in resynch set, we've got what we wanted */
289 if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;}
291 /* scan until we find something in the resynch set */
292 while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
296 /* standard error reporting function that assumes DLG-based scanners;
297 * you should redefine in subclass to change it or if you use your
301 syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset,
302 ANTLRTokenType etok, int k)
306 line = LT(1)->getLine();
308 fprintf(stderr, "line %d: syntax error at \"%s\"",
309 line, LT(1)->getText());
310 if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
311 if ( k==1 ) fprintf(stderr, " missing");
314 fprintf(stderr, "; \"%s\" not", LT(1)->getText());
315 if ( set_deg(eset)>1 ) fprintf(stderr, " in");
317 if ( set_deg(eset)>0 ) edecode(eset);
318 else fprintf(stderr, " %s", token_tbl[etok]);
319 if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
320 fprintf(stderr, "\n");
323 /* is b an element of set p? */
325 set_el(ANTLRTokenType b, SetWordType *p)
327 return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
331 set_deg(SetWordType *a)
333 /* Fast compute degree of a set... the number
334 of elements present in the set. Assumes
335 that all word bits are used in the set
337 register SetWordType *p = a;
338 register SetWordType *endp = &(a[bsetsize]);
339 register int degree = 0;
341 if ( a == NULL ) return 0;
344 register SetWordType t = *p;
345 register SetWordType *b = &(bitmask[0]);
347 if (t & *b) ++degree;
348 } while (++b < &(bitmask[sizeof(SetWordType)*8]));
356 edecode(SetWordType *a)
358 register SetWordType *p = a;
359 register SetWordType *endp = &(p[bsetsize]);
360 register unsigned e = 0;
362 if ( set_deg(a)>1 ) fprintf(stderr, " {");
364 register SetWordType t = *p;
365 register SetWordType *b = &(bitmask[0]);
367 if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]);
369 } while (++b < &(bitmask[sizeof(SetWordType)*8]));
370 } while (++p < endp);
371 if ( set_deg(a)>1 ) fprintf(stderr, " }");
375 * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
376 * where the zzMiss stuff is set here to the token that did not match
377 * (and which set wasn't it a member of).
380 ANTLRParser::FAIL(int k, ...)
382 static char text[1000]; // dangerous, but I don't care right now
383 static SetWordType *f[20];
384 SetWordType **miss_set;
385 ANTLRChar **miss_text;
386 _ANTLRTokenPtr *bad_tok;
387 ANTLRChar **bad_text;
395 if ( k>20 ) panic("FAIL: overflowed buffer");
396 for (i=1; i<=k; i++) /* collect all lookahead sets */
398 f[i-1] = va_arg(ap, SetWordType *);
400 for (i=1; i<=k; i++) /* look for offending token */
402 if ( i>1 ) strcat(text, " ");
403 strcat(text, LT(i)->getText());
404 if ( !set_el(LA(i), f[i-1]) ) break;
406 miss_set = va_arg(ap, SetWordType **);
407 miss_text = va_arg(ap, ANTLRChar **);
408 bad_tok = va_arg(ap, _ANTLRTokenPtr *);
409 bad_text = va_arg(ap, ANTLRChar **);
410 err_k = va_arg(ap, unsigned *);
413 /* bad; lookahead is permutation that cannot be matched,
414 * but, the ith token of lookahead is valid at the ith position
415 * (The old LL sub 1 (k) versus LL(k) parsing technique)
418 *miss_text = LT(1)->getText();
420 *bad_text = (*bad_tok)->getText();
424 /* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
428 *bad_text = (*bad_tok)->getText();
429 if ( i==1 ) *err_k = 1;
434 _match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows)
436 if ( dirty==LLk ) consume();
438 if ( LA(1)!=tokenWanted )
441 "line %d: syntax error at \"%s\" missing %s\n",
443 (LA(1)==eofToken)?"<eof>":LT(1)->getText(),
444 token_tbl[tokenWanted]);
445 consumeUntil( whatFollows );
450 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
451 /* if ( !demand_look ) consume(); */
458 _setmatch_wdfltsig(SetWordType *tokensWanted,
459 ANTLRTokenType tokenTypeOfSet,
460 SetWordType *whatFollows)
462 if ( dirty==LLk ) consume();
463 if ( !set_el(LA(1), tokensWanted) )
466 "line %d: syntax error at \"%s\" missing %s\n",
468 (LA(1)==eofToken)?"<eof>":LT(1)->getText(),
469 token_tbl[tokenTypeOfSet]);
470 consumeUntil( whatFollows );
475 labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
476 /* if ( !demand_look ) consume(); */
482 eMsgd(char *err,int d)
484 sprintf(eMsgBuffer, err, d); // dangerous, but I don't care
489 eMsg(char *err, char *s)
491 sprintf(eMsgBuffer, err, s);
496 eMsg2(char *err,char *s, char *t)
498 sprintf(eMsgBuffer, err, s, t);
505 fprintf(stderr, "ANTLR panic: %s\n", msg);