/* ANTLRParser.C * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * ANTLR 1.33 * Terence Parr * Parr Research Corporation * with Purdue University and AHPCRC, University of Minnesota * 1989-1995 */ #include #include #include #include /* I have to put this here due to C++ limitation * that you can't have a 'forward' decl for enums. * I hate C++!!!!!!!!!!!!!!! * Of course, if I could use real templates, this would go away. */ enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED }; #define ANTLR_SUPPORT_CODE #include "config.h" #include ATOKEN_H #include ATOKENBUFFER_H #include APARSER_H static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000; static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000; /* L o o k a h e a d M a c r o s */ /* maximum of 32 bits/unsigned int and must be 8 bits/byte; * we only use 8 bits of it. */ SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = { 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, 0x00000040, 0x00000080 }; char ANTLRParser::eMsgBuffer[500] = ""; ANTLRParser:: ~ANTLRParser() { delete [] token_type; } ANTLRParser:: ANTLRParser(ANTLRTokenBuffer *_inputTokens, int k, int use_inf_look, int dlook, int ssize) { LLk = k; can_use_inf_look = use_inf_look; demand_look = dlook; bsetsize = ssize; guessing = 0; token_tbl = NULL; eofToken = (ANTLRTokenType)1; // allocate lookahead buffer token_type = new ANTLRTokenType[LLk]; lap = 0; labase = 0; dirty = 0; /* prime lookahead buffer, point to inputTokens */ this->inputTokens = _inputTokens; this->inputTokens->setMinTokens(k); } void ANTLRParser::init() { prime_lookahead(); } int ANTLRParser:: guess(ANTLRParserState *st) { saveState(st); guessing = 1; return setjmp(guess_start.state); } void ANTLRParser:: saveState(ANTLRParserState *buf) { buf->guess_start = guess_start; buf->guessing = guessing; buf->inf_labase = inf_labase; buf->inf_last = inf_last; buf->dirty = dirty; } void ANTLRParser:: restoreState(ANTLRParserState *buf) { int i; guess_start = buf->guess_start; guessing = buf->guessing; inf_labase = buf->inf_labase; inf_last = buf->inf_last; dirty = buf->dirty; // restore lookahead buffer from k tokens before restored TokenBuffer position // if demand_look, then I guess we don't look backwards for these tokens. for (i=1; i<=LLk; i++) token_type[i-1] = inputTokens->bufferedToken(i-LLk)->getType(); lap = 0; labase = 0; } /* Get the next symbol from the input stream; put it into lookahead buffer; * fill token_type[] fast reference cache also. NLA is the next place where * a lookahead ANTLRAbstractToken should go. */ void ANTLRParser:: consume() { NLA = inputTokens->getToken()->getType(); dirty--; lap = (lap+1)&(LLk-1); } _ANTLRTokenPtr ANTLRParser:: LT(int i) { #ifdef DEBUG_TOKENBUFFER if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk ) { static char buf[2000]; sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i); panic(buf); } #endif return inputTokens->bufferedToken(i-LLk); } void ANTLRParser:: look(int k) { int i, c = k - (LLk-dirty); for (i=1; i<=c; i++) consume(); } /* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK); */ void ANTLRParser:: prime_lookahead() { int i; for(i=1;i<=LLk; i++) consume(); dirty=0; lap = 0; labase = 0; } /* check to see if the current input symbol matches '_t'. * During NON demand lookahead mode, dirty will always be 0 and * hence the extra code for consuming tokens in _match is never * executed; the same routine can be used for both modes. */ int ANTLRParser:: _match(ANTLRTokenType _t, ANTLRChar **MissText, ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, SetWordType **MissSet) { if ( dirty==LLk ) { consume(); } if ( LA(1)!=_t ) { *MissText=NULL; *MissTok= _t; *BadTok = LT(1); *MissSet=NULL; return 0; } dirty++; labase = (labase+1)&(LLk-1); // labase maintained even if !demand look return 1; } /* check to see if the current input symbol matches '_t'. * Used during exception handling. */ int ANTLRParser:: _match_wsig(ANTLRTokenType _t) { if ( dirty==LLk ) { consume(); } if ( LA(1)!=_t ) return 0; dirty++; labase = (labase+1)&(LLk-1); // labase maintained even if !demand look return 1; } /* check to see if the current input symbol matches any token in a set. * During NON demand lookahead mode, dirty will always be 0 and * hence the extra code for consuming tokens in _match is never * executed; the same routine can be used for both modes. */ int ANTLRParser:: _setmatch(SetWordType *tset, ANTLRChar **MissText, ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, SetWordType **MissSet) { if ( dirty==LLk ) { consume(); } if ( !set_el(LA(1), tset) ) { *MissText=NULL; *MissTok= (ANTLRTokenType)0; *BadTok=LT(1); *MissSet=tset; return 0; } dirty++; labase = (labase+1)&(LLk-1); // labase maintained even if !demand look return 1; } int ANTLRParser:: _setmatch_wsig(SetWordType *tset) { if ( dirty==LLk ) { consume(); } if ( !set_el(LA(1), tset) ) return 0; dirty++; labase = (labase+1)&(LLk-1); // labase maintained even if !demand look return 1; } /* Exception handling routines */ void ANTLRParser:: consumeUntil(SetWordType *st) { while ( !set_el(LA(1), st) ) { consume(); } } void ANTLRParser:: consumeUntilToken(int t) { while ( LA(1)!=t ) { consume(); } } /* Old error stuff */ void ANTLRParser:: resynch(SetWordType *wd,SetWordType mask) { static int consumed = 1; /* if you enter here without having consumed a token from last resynch * force a token consumption. */ if ( !consumed ) {consume(); consumed=1; return;} /* if current token is in resynch set, we've got what we wanted */ if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;} /* scan until we find something in the resynch set */ while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();} consumed=1; } /* standard error reporting function that assumes DLG-based scanners; * you should redefine in subclass to change it or if you use your * own scanner. */ void ANTLRParser:: syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset, ANTLRTokenType etok, int k) { int line; line = LT(1)->getLine(); fprintf(stderr, "line %d: syntax error at \"%s\"", line, LT(1)->getText()); if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} if ( k==1 ) fprintf(stderr, " missing"); else { fprintf(stderr, "; \"%s\" not", LT(1)->getText()); if ( set_deg(eset)>1 ) fprintf(stderr, " in"); } if ( set_deg(eset)>0 ) edecode(eset); else fprintf(stderr, " %s", token_tbl[etok]); if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup); fprintf(stderr, "\n"); } /* is b an element of set p? */ int ANTLRParser:: set_el(ANTLRTokenType b, SetWordType *p) { return( p[DIVWORD(b)] & bitmask[MODWORD(b)] ); } int ANTLRParser:: set_deg(SetWordType *a) { /* Fast compute degree of a set... the number of elements present in the set. Assumes that all word bits are used in the set */ register SetWordType *p = a; register SetWordType *endp = &(a[bsetsize]); register int degree = 0; if ( a == NULL ) return 0; while ( p < endp ) { register SetWordType t = *p; register SetWordType *b = &(bitmask[0]); do { if (t & *b) ++degree; } while (++b < &(bitmask[sizeof(SetWordType)*8])); p++; } return(degree); } void ANTLRParser:: edecode(SetWordType *a) { register SetWordType *p = a; register SetWordType *endp = &(p[bsetsize]); register unsigned e = 0; if ( set_deg(a)>1 ) fprintf(stderr, " {"); do { register SetWordType t = *p; register SetWordType *b = &(bitmask[0]); do { if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]); e++; } while (++b < &(bitmask[sizeof(SetWordType)*8])); } while (++p < endp); if ( set_deg(a)>1 ) fprintf(stderr, " }"); } /* input looks like: * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk) * where the zzMiss stuff is set here to the token that did not match * (and which set wasn't it a member of). */ void ANTLRParser::FAIL(int k, ...) { static char text[1000]; // dangerous, but I don't care right now static SetWordType *f[20]; SetWordType **miss_set; ANTLRChar **miss_text; _ANTLRTokenPtr *bad_tok; ANTLRChar **bad_text; unsigned *err_k; int i; va_list ap; va_start(ap, k); text[0] = '\0'; if ( k>20 ) panic("FAIL: overflowed buffer"); for (i=1; i<=k; i++) /* collect all lookahead sets */ { f[i-1] = va_arg(ap, SetWordType *); } for (i=1; i<=k; i++) /* look for offending token */ { if ( i>1 ) strcat(text, " "); strcat(text, LT(i)->getText()); if ( !set_el(LA(i), f[i-1]) ) break; } miss_set = va_arg(ap, SetWordType **); miss_text = va_arg(ap, ANTLRChar **); bad_tok = va_arg(ap, _ANTLRTokenPtr *); bad_text = va_arg(ap, ANTLRChar **); err_k = va_arg(ap, unsigned *); if ( i>k ) { /* bad; lookahead is permutation that cannot be matched, * but, the ith token of lookahead is valid at the ith position * (The old LL sub 1 (k) versus LL(k) parsing technique) */ *miss_set = NULL; *miss_text = LT(1)->getText(); *bad_tok = LT(1); *bad_text = (*bad_tok)->getText(); *err_k = k; return; } /* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/ *miss_set = f[i-1]; *miss_text = text; *bad_tok = LT(i); *bad_text = (*bad_tok)->getText(); if ( i==1 ) *err_k = 1; else *err_k = k; } int ANTLRParser:: _match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows) { if ( dirty==LLk ) consume(); if ( LA(1)!=tokenWanted ) { fprintf(stderr, "line %d: syntax error at \"%s\" missing %s\n", LT(1)->getLine(), (LA(1)==eofToken)?"":LT(1)->getText(), token_tbl[tokenWanted]); consumeUntil( whatFollows ); return 0; } else { dirty++; labase = (labase+1)&(LLk-1); // labase maintained even if !demand look /* if ( !demand_look ) consume(); */ return 1; } } int ANTLRParser:: _setmatch_wdfltsig(SetWordType *tokensWanted, ANTLRTokenType tokenTypeOfSet, SetWordType *whatFollows) { if ( dirty==LLk ) consume(); if ( !set_el(LA(1), tokensWanted) ) { fprintf(stderr, "line %d: syntax error at \"%s\" missing %s\n", LT(1)->getLine(), (LA(1)==eofToken)?"":LT(1)->getText(), token_tbl[tokenTypeOfSet]); consumeUntil( whatFollows ); return 0; } else { dirty++; labase = (labase+1)&(LLk-1); // labase maintained even if !demand look /* if ( !demand_look ) consume(); */ return 1; } } char *ANTLRParser:: eMsgd(char *err,int d) { sprintf(eMsgBuffer, err, d); // dangerous, but I don't care return eMsgBuffer; } char *ANTLRParser:: eMsg(char *err, char *s) { sprintf(eMsgBuffer, err, s); return eMsgBuffer; } char *ANTLRParser:: eMsg2(char *err,char *s, char *t) { sprintf(eMsgBuffer, err, s, t); return eMsgBuffer; } void ANTLRParser:: panic(char *msg) { fprintf(stderr, "ANTLR panic: %s\n", msg); exit(EXIT_FAILURE); }