X-Git-Url: https://pd.if.org/git/?p=pccts;a=blobdiff_plain;f=h%2FAParser.cpp;fp=h%2FAParser.cpp;h=51d85492b9c776d222699bf62ca3e1eae8067cf3;hp=0000000000000000000000000000000000000000;hb=cb15b978c765a661bf3154d865fa3e2401d649f5;hpb=c4e55222e892b8762e11f2425a64611e898ef20e diff --git a/h/AParser.cpp b/h/AParser.cpp new file mode 100755 index 0000000..51d8549 --- /dev/null +++ b/h/AParser.cpp @@ -0,0 +1,507 @@ +/* ANTLRParser.C + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#include +#include +#include + +/* I have to put this here due to C++ limitation + * that you can't have a 'forward' decl for enums. + * I hate C++!!!!!!!!!!!!!!! + * Of course, if I could use real templates, this would go away. + */ +enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED }; + +#define ANTLR_SUPPORT_CODE + +#include "config.h" +#include ATOKEN_H + +#include ATOKENBUFFER_H +#include APARSER_H + +static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000; +static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000; + + /* L o o k a h e a d M a c r o s */ + +/* maximum of 32 bits/unsigned int and must be 8 bits/byte; + * we only use 8 bits of it. + */ +SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080 +}; + +char ANTLRParser::eMsgBuffer[500] = ""; + +ANTLRParser:: +~ANTLRParser() +{ + delete [] token_type; +} + +ANTLRParser:: +ANTLRParser(ANTLRTokenBuffer *_inputTokens, + int k, + int use_inf_look, + int dlook, + int ssize) +{ + LLk = k; + can_use_inf_look = use_inf_look; + demand_look = dlook; + bsetsize = ssize; + + guessing = 0; + token_tbl = NULL; + eofToken = (ANTLRTokenType)1; + + // allocate lookahead buffer + token_type = new ANTLRTokenType[LLk]; + lap = 0; + labase = 0; + dirty = 0; + + /* prime lookahead buffer, point to inputTokens */ + this->inputTokens = _inputTokens; + this->inputTokens->setMinTokens(k); +} + +void ANTLRParser::init() +{ + prime_lookahead(); +} + +int ANTLRParser:: +guess(ANTLRParserState *st) +{ + saveState(st); + guessing = 1; + return setjmp(guess_start.state); +} + +void ANTLRParser:: +saveState(ANTLRParserState *buf) +{ + buf->guess_start = guess_start; + buf->guessing = guessing; + buf->inf_labase = inf_labase; + buf->inf_last = inf_last; + buf->dirty = dirty; +} + +void ANTLRParser:: +restoreState(ANTLRParserState *buf) +{ + int i; + + guess_start = buf->guess_start; + guessing = buf->guessing; + inf_labase = buf->inf_labase; + inf_last = buf->inf_last; + dirty = buf->dirty; + + // restore lookahead buffer from k tokens before restored TokenBuffer position + // if demand_look, then I guess we don't look backwards for these tokens. + for (i=1; i<=LLk; i++) token_type[i-1] = + inputTokens->bufferedToken(i-LLk)->getType(); + lap = 0; + labase = 0; +} + +/* Get the next symbol from the input stream; put it into lookahead buffer; + * fill token_type[] fast reference cache also. NLA is the next place where + * a lookahead ANTLRAbstractToken should go. + */ +void ANTLRParser:: +consume() +{ + NLA = inputTokens->getToken()->getType(); + dirty--; + lap = (lap+1)&(LLk-1); +} + +_ANTLRTokenPtr ANTLRParser:: +LT(int i) +{ +#ifdef DEBUG_TOKENBUFFER + if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk ) + { + static char buf[2000]; + sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i); + panic(buf); + } +#endif + return inputTokens->bufferedToken(i-LLk); +} + +void +ANTLRParser:: +look(int k) +{ + int i, c = k - (LLk-dirty); + for (i=1; i<=c; i++) consume(); +} + +/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK); + */ +void +ANTLRParser:: +prime_lookahead() +{ + int i; + for(i=1;i<=LLk; i++) consume(); + dirty=0; + lap = 0; + labase = 0; +} + +/* check to see if the current input symbol matches '_t'. + * During NON demand lookahead mode, dirty will always be 0 and + * hence the extra code for consuming tokens in _match is never + * executed; the same routine can be used for both modes. + */ +int ANTLRParser:: +_match(ANTLRTokenType _t, ANTLRChar **MissText, + ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, + SetWordType **MissSet) +{ + if ( dirty==LLk ) { + consume(); + } + if ( LA(1)!=_t ) { + *MissText=NULL; + *MissTok= _t; *BadTok = LT(1); + *MissSet=NULL; + return 0; + } + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + +/* check to see if the current input symbol matches '_t'. + * Used during exception handling. + */ +int ANTLRParser:: +_match_wsig(ANTLRTokenType _t) +{ + if ( dirty==LLk ) { + consume(); + } + if ( LA(1)!=_t ) return 0; + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + +/* check to see if the current input symbol matches any token in a set. + * During NON demand lookahead mode, dirty will always be 0 and + * hence the extra code for consuming tokens in _match is never + * executed; the same routine can be used for both modes. + */ +int ANTLRParser:: +_setmatch(SetWordType *tset, ANTLRChar **MissText, + ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, + SetWordType **MissSet) +{ + if ( dirty==LLk ) { + consume(); + } + if ( !set_el(LA(1), tset) ) { + *MissText=NULL; + *MissTok= (ANTLRTokenType)0; *BadTok=LT(1); + *MissSet=tset; + return 0; + } + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + +int ANTLRParser:: +_setmatch_wsig(SetWordType *tset) +{ + if ( dirty==LLk ) { + consume(); + } + if ( !set_el(LA(1), tset) ) return 0; + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + + /* Exception handling routines */ + +void ANTLRParser:: +consumeUntil(SetWordType *st) +{ + while ( !set_el(LA(1), st) ) { consume(); } +} + +void ANTLRParser:: +consumeUntilToken(int t) +{ + while ( LA(1)!=t ) { consume(); } +} + + + /* Old error stuff */ + +void ANTLRParser:: +resynch(SetWordType *wd,SetWordType mask) +{ + static int consumed = 1; + + /* if you enter here without having consumed a token from last resynch + * force a token consumption. + */ + if ( !consumed ) {consume(); consumed=1; return;} + + /* if current token is in resynch set, we've got what we wanted */ + if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;} + + /* scan until we find something in the resynch set */ + while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();} + consumed=1; +} + +/* standard error reporting function that assumes DLG-based scanners; + * you should redefine in subclass to change it or if you use your + * own scanner. + */ +void ANTLRParser:: +syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset, + ANTLRTokenType etok, int k) +{ + int line; + + line = LT(1)->getLine(); + + fprintf(stderr, "line %d: syntax error at \"%s\"", + line, LT(1)->getText()); + if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} + if ( k==1 ) fprintf(stderr, " missing"); + else + { + fprintf(stderr, "; \"%s\" not", LT(1)->getText()); + if ( set_deg(eset)>1 ) fprintf(stderr, " in"); + } + if ( set_deg(eset)>0 ) edecode(eset); + else fprintf(stderr, " %s", token_tbl[etok]); + if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup); + fprintf(stderr, "\n"); +} + +/* is b an element of set p? */ +int ANTLRParser:: +set_el(ANTLRTokenType b, SetWordType *p) +{ + return( p[DIVWORD(b)] & bitmask[MODWORD(b)] ); +} + +int ANTLRParser:: +set_deg(SetWordType *a) +{ + /* Fast compute degree of a set... the number + of elements present in the set. Assumes + that all word bits are used in the set + */ + register SetWordType *p = a; + register SetWordType *endp = &(a[bsetsize]); + register int degree = 0; + + if ( a == NULL ) return 0; + while ( p < endp ) + { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if (t & *b) ++degree; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + p++; + } + + return(degree); +} + +void ANTLRParser:: +edecode(SetWordType *a) +{ + register SetWordType *p = a; + register SetWordType *endp = &(p[bsetsize]); + register unsigned e = 0; + + if ( set_deg(a)>1 ) fprintf(stderr, " {"); + do { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]); + e++; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + } while (++p < endp); + if ( set_deg(a)>1 ) fprintf(stderr, " }"); +} + +/* input looks like: + * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk) + * where the zzMiss stuff is set here to the token that did not match + * (and which set wasn't it a member of). + */ +void +ANTLRParser::FAIL(int k, ...) +{ + static char text[1000]; // dangerous, but I don't care right now + static SetWordType *f[20]; + SetWordType **miss_set; + ANTLRChar **miss_text; + _ANTLRTokenPtr *bad_tok; + ANTLRChar **bad_text; + unsigned *err_k; + int i; + va_list ap; + + va_start(ap, k); + + text[0] = '\0'; + if ( k>20 ) panic("FAIL: overflowed buffer"); + for (i=1; i<=k; i++) /* collect all lookahead sets */ + { + f[i-1] = va_arg(ap, SetWordType *); + } + for (i=1; i<=k; i++) /* look for offending token */ + { + if ( i>1 ) strcat(text, " "); + strcat(text, LT(i)->getText()); + if ( !set_el(LA(i), f[i-1]) ) break; + } + miss_set = va_arg(ap, SetWordType **); + miss_text = va_arg(ap, ANTLRChar **); + bad_tok = va_arg(ap, _ANTLRTokenPtr *); + bad_text = va_arg(ap, ANTLRChar **); + err_k = va_arg(ap, unsigned *); + if ( i>k ) + { + /* bad; lookahead is permutation that cannot be matched, + * but, the ith token of lookahead is valid at the ith position + * (The old LL sub 1 (k) versus LL(k) parsing technique) + */ + *miss_set = NULL; + *miss_text = LT(1)->getText(); + *bad_tok = LT(1); + *bad_text = (*bad_tok)->getText(); + *err_k = k; + return; + } +/* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/ + *miss_set = f[i-1]; + *miss_text = text; + *bad_tok = LT(i); + *bad_text = (*bad_tok)->getText(); + if ( i==1 ) *err_k = 1; + else *err_k = k; +} + +int ANTLRParser:: +_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows) +{ + if ( dirty==LLk ) consume(); + + if ( LA(1)!=tokenWanted ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + LT(1)->getLine(), + (LA(1)==eofToken)?"":LT(1)->getText(), + token_tbl[tokenWanted]); + consumeUntil( whatFollows ); + return 0; + } + else { + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look +/* if ( !demand_look ) consume(); */ + return 1; + } +} + + +int ANTLRParser:: +_setmatch_wdfltsig(SetWordType *tokensWanted, + ANTLRTokenType tokenTypeOfSet, + SetWordType *whatFollows) +{ + if ( dirty==LLk ) consume(); + if ( !set_el(LA(1), tokensWanted) ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + LT(1)->getLine(), + (LA(1)==eofToken)?"":LT(1)->getText(), + token_tbl[tokenTypeOfSet]); + consumeUntil( whatFollows ); + return 0; + } + else { + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look +/* if ( !demand_look ) consume(); */ + return 1; + } +} + +char *ANTLRParser:: +eMsgd(char *err,int d) +{ + sprintf(eMsgBuffer, err, d); // dangerous, but I don't care + return eMsgBuffer; +} + +char *ANTLRParser:: +eMsg(char *err, char *s) +{ + sprintf(eMsgBuffer, err, s); + return eMsgBuffer; +} + +char *ANTLRParser:: +eMsg2(char *err,char *s, char *t) +{ + sprintf(eMsgBuffer, err, s, t); + return eMsgBuffer; +} + +void ANTLRParser:: +panic(char *msg) +{ + fprintf(stderr, "ANTLR panic: %s\n", msg); + exit(EXIT_FAILURE); +}