--- /dev/null
+/* ANTLRParser.C
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain. An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ *
+ * We encourage users to develop software with PCCTS. However, we do ask
+ * that credit is given to us for developing PCCTS. By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc... If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS. In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * ANTLR 1.33
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+
+/* I have to put this here due to C++ limitation
+ * that you can't have a 'forward' decl for enums.
+ * I hate C++!!!!!!!!!!!!!!!
+ * Of course, if I could use real templates, this would go away.
+ */
+enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED };
+
+#define ANTLR_SUPPORT_CODE
+
+#include "config.h"
+#include ATOKEN_H
+
+#include ATOKENBUFFER_H
+#include APARSER_H
+
+static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;
+static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;
+
+ /* L o o k a h e a d M a c r o s */
+
+/* maximum of 32 bits/unsigned int and must be 8 bits/byte;
+ * we only use 8 bits of it.
+ */
+SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+ 0x00000010, 0x00000020, 0x00000040, 0x00000080
+};
+
+char ANTLRParser::eMsgBuffer[500] = "";
+
+ANTLRParser::
+~ANTLRParser()
+{
+ delete [] token_type;
+}
+
+ANTLRParser::
+ANTLRParser(ANTLRTokenBuffer *_inputTokens,
+ int k,
+ int use_inf_look,
+ int dlook,
+ int ssize)
+{
+ LLk = k;
+ can_use_inf_look = use_inf_look;
+ demand_look = dlook;
+ bsetsize = ssize;
+
+ guessing = 0;
+ token_tbl = NULL;
+ eofToken = (ANTLRTokenType)1;
+
+ // allocate lookahead buffer
+ token_type = new ANTLRTokenType[LLk];
+ lap = 0;
+ labase = 0;
+ dirty = 0;
+
+ /* prime lookahead buffer, point to inputTokens */
+ this->inputTokens = _inputTokens;
+ this->inputTokens->setMinTokens(k);
+}
+
+void ANTLRParser::init()
+{
+ prime_lookahead();
+}
+
+int ANTLRParser::
+guess(ANTLRParserState *st)
+{
+ saveState(st);
+ guessing = 1;
+ return setjmp(guess_start.state);
+}
+
+void ANTLRParser::
+saveState(ANTLRParserState *buf)
+{
+ buf->guess_start = guess_start;
+ buf->guessing = guessing;
+ buf->inf_labase = inf_labase;
+ buf->inf_last = inf_last;
+ buf->dirty = dirty;
+}
+
+void ANTLRParser::
+restoreState(ANTLRParserState *buf)
+{
+ int i;
+
+ guess_start = buf->guess_start;
+ guessing = buf->guessing;
+ inf_labase = buf->inf_labase;
+ inf_last = buf->inf_last;
+ dirty = buf->dirty;
+
+ // restore lookahead buffer from k tokens before restored TokenBuffer position
+ // if demand_look, then I guess we don't look backwards for these tokens.
+ for (i=1; i<=LLk; i++) token_type[i-1] =
+ inputTokens->bufferedToken(i-LLk)->getType();
+ lap = 0;
+ labase = 0;
+}
+
+/* Get the next symbol from the input stream; put it into lookahead buffer;
+ * fill token_type[] fast reference cache also. NLA is the next place where
+ * a lookahead ANTLRAbstractToken should go.
+ */
+void ANTLRParser::
+consume()
+{
+ NLA = inputTokens->getToken()->getType();
+ dirty--;
+ lap = (lap+1)&(LLk-1);
+}
+
+_ANTLRTokenPtr ANTLRParser::
+LT(int i)
+{
+#ifdef DEBUG_TOKENBUFFER
+ if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk )
+ {
+ static char buf[2000];
+ sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
+ panic(buf);
+ }
+#endif
+ return inputTokens->bufferedToken(i-LLk);
+}
+
+void
+ANTLRParser::
+look(int k)
+{
+ int i, c = k - (LLk-dirty);
+ for (i=1; i<=c; i++) consume();
+}
+
+/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
+ */
+void
+ANTLRParser::
+prime_lookahead()
+{
+ int i;
+ for(i=1;i<=LLk; i++) consume();
+ dirty=0;
+ lap = 0;
+ labase = 0;
+}
+
+/* check to see if the current input symbol matches '_t'.
+ * During NON demand lookahead mode, dirty will always be 0 and
+ * hence the extra code for consuming tokens in _match is never
+ * executed; the same routine can be used for both modes.
+ */
+int ANTLRParser::
+_match(ANTLRTokenType _t, ANTLRChar **MissText,
+ ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
+ SetWordType **MissSet)
+{
+ if ( dirty==LLk ) {
+ consume();
+ }
+ if ( LA(1)!=_t ) {
+ *MissText=NULL;
+ *MissTok= _t; *BadTok = LT(1);
+ *MissSet=NULL;
+ return 0;
+ }
+ dirty++;
+ labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+ return 1;
+}
+
+/* check to see if the current input symbol matches '_t'.
+ * Used during exception handling.
+ */
+int ANTLRParser::
+_match_wsig(ANTLRTokenType _t)
+{
+ if ( dirty==LLk ) {
+ consume();
+ }
+ if ( LA(1)!=_t ) return 0;
+ dirty++;
+ labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+ return 1;
+}
+
+/* check to see if the current input symbol matches any token in a set.
+ * During NON demand lookahead mode, dirty will always be 0 and
+ * hence the extra code for consuming tokens in _match is never
+ * executed; the same routine can be used for both modes.
+ */
+int ANTLRParser::
+_setmatch(SetWordType *tset, ANTLRChar **MissText,
+ ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
+ SetWordType **MissSet)
+{
+ if ( dirty==LLk ) {
+ consume();
+ }
+ if ( !set_el(LA(1), tset) ) {
+ *MissText=NULL;
+ *MissTok= (ANTLRTokenType)0; *BadTok=LT(1);
+ *MissSet=tset;
+ return 0;
+ }
+ dirty++;
+ labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+ return 1;
+}
+
+int ANTLRParser::
+_setmatch_wsig(SetWordType *tset)
+{
+ if ( dirty==LLk ) {
+ consume();
+ }
+ if ( !set_el(LA(1), tset) ) return 0;
+ dirty++;
+ labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+ return 1;
+}
+
+ /* Exception handling routines */
+
+void ANTLRParser::
+consumeUntil(SetWordType *st)
+{
+ while ( !set_el(LA(1), st) ) { consume(); }
+}
+
+void ANTLRParser::
+consumeUntilToken(int t)
+{
+ while ( LA(1)!=t ) { consume(); }
+}
+
+
+ /* Old error stuff */
+
+void ANTLRParser::
+resynch(SetWordType *wd,SetWordType mask)
+{
+ static int consumed = 1;
+
+ /* if you enter here without having consumed a token from last resynch
+ * force a token consumption.
+ */
+ if ( !consumed ) {consume(); consumed=1; return;}
+
+ /* if current token is in resynch set, we've got what we wanted */
+ if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;}
+
+ /* scan until we find something in the resynch set */
+ while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
+ consumed=1;
+}
+
+/* standard error reporting function that assumes DLG-based scanners;
+ * you should redefine in subclass to change it or if you use your
+ * own scanner.
+ */
+void ANTLRParser::
+syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset,
+ ANTLRTokenType etok, int k)
+{
+ int line;
+
+ line = LT(1)->getLine();
+
+ fprintf(stderr, "line %d: syntax error at \"%s\"",
+ line, LT(1)->getText());
+ if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
+ if ( k==1 ) fprintf(stderr, " missing");
+ else
+ {
+ fprintf(stderr, "; \"%s\" not", LT(1)->getText());
+ if ( set_deg(eset)>1 ) fprintf(stderr, " in");
+ }
+ if ( set_deg(eset)>0 ) edecode(eset);
+ else fprintf(stderr, " %s", token_tbl[etok]);
+ if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
+ fprintf(stderr, "\n");
+}
+
+/* is b an element of set p? */
+int ANTLRParser::
+set_el(ANTLRTokenType b, SetWordType *p)
+{
+ return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
+}
+
+int ANTLRParser::
+set_deg(SetWordType *a)
+{
+ /* Fast compute degree of a set... the number
+ of elements present in the set. Assumes
+ that all word bits are used in the set
+ */
+ register SetWordType *p = a;
+ register SetWordType *endp = &(a[bsetsize]);
+ register int degree = 0;
+
+ if ( a == NULL ) return 0;
+ while ( p < endp )
+ {
+ register SetWordType t = *p;
+ register SetWordType *b = &(bitmask[0]);
+ do {
+ if (t & *b) ++degree;
+ } while (++b < &(bitmask[sizeof(SetWordType)*8]));
+ p++;
+ }
+
+ return(degree);
+}
+
+void ANTLRParser::
+edecode(SetWordType *a)
+{
+ register SetWordType *p = a;
+ register SetWordType *endp = &(p[bsetsize]);
+ register unsigned e = 0;
+
+ if ( set_deg(a)>1 ) fprintf(stderr, " {");
+ do {
+ register SetWordType t = *p;
+ register SetWordType *b = &(bitmask[0]);
+ do {
+ if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]);
+ e++;
+ } while (++b < &(bitmask[sizeof(SetWordType)*8]));
+ } while (++p < endp);
+ if ( set_deg(a)>1 ) fprintf(stderr, " }");
+}
+
+/* input looks like:
+ * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
+ * where the zzMiss stuff is set here to the token that did not match
+ * (and which set wasn't it a member of).
+ */
+void
+ANTLRParser::FAIL(int k, ...)
+{
+ static char text[1000]; // dangerous, but I don't care right now
+ static SetWordType *f[20];
+ SetWordType **miss_set;
+ ANTLRChar **miss_text;
+ _ANTLRTokenPtr *bad_tok;
+ ANTLRChar **bad_text;
+ unsigned *err_k;
+ int i;
+ va_list ap;
+
+ va_start(ap, k);
+
+ text[0] = '\0';
+ if ( k>20 ) panic("FAIL: overflowed buffer");
+ for (i=1; i<=k; i++) /* collect all lookahead sets */
+ {
+ f[i-1] = va_arg(ap, SetWordType *);
+ }
+ for (i=1; i<=k; i++) /* look for offending token */
+ {
+ if ( i>1 ) strcat(text, " ");
+ strcat(text, LT(i)->getText());
+ if ( !set_el(LA(i), f[i-1]) ) break;
+ }
+ miss_set = va_arg(ap, SetWordType **);
+ miss_text = va_arg(ap, ANTLRChar **);
+ bad_tok = va_arg(ap, _ANTLRTokenPtr *);
+ bad_text = va_arg(ap, ANTLRChar **);
+ err_k = va_arg(ap, unsigned *);
+ if ( i>k )
+ {
+ /* bad; lookahead is permutation that cannot be matched,
+ * but, the ith token of lookahead is valid at the ith position
+ * (The old LL sub 1 (k) versus LL(k) parsing technique)
+ */
+ *miss_set = NULL;
+ *miss_text = LT(1)->getText();
+ *bad_tok = LT(1);
+ *bad_text = (*bad_tok)->getText();
+ *err_k = k;
+ return;
+ }
+/* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
+ *miss_set = f[i-1];
+ *miss_text = text;
+ *bad_tok = LT(i);
+ *bad_text = (*bad_tok)->getText();
+ if ( i==1 ) *err_k = 1;
+ else *err_k = k;
+}
+
+int ANTLRParser::
+_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows)
+{
+ if ( dirty==LLk ) consume();
+
+ if ( LA(1)!=tokenWanted )
+ {
+ fprintf(stderr,
+ "line %d: syntax error at \"%s\" missing %s\n",
+ LT(1)->getLine(),
+ (LA(1)==eofToken)?"<eof>":LT(1)->getText(),
+ token_tbl[tokenWanted]);
+ consumeUntil( whatFollows );
+ return 0;
+ }
+ else {
+ dirty++;
+ labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+/* if ( !demand_look ) consume(); */
+ return 1;
+ }
+}
+
+
+int ANTLRParser::
+_setmatch_wdfltsig(SetWordType *tokensWanted,
+ ANTLRTokenType tokenTypeOfSet,
+ SetWordType *whatFollows)
+{
+ if ( dirty==LLk ) consume();
+ if ( !set_el(LA(1), tokensWanted) )
+ {
+ fprintf(stderr,
+ "line %d: syntax error at \"%s\" missing %s\n",
+ LT(1)->getLine(),
+ (LA(1)==eofToken)?"<eof>":LT(1)->getText(),
+ token_tbl[tokenTypeOfSet]);
+ consumeUntil( whatFollows );
+ return 0;
+ }
+ else {
+ dirty++;
+ labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+/* if ( !demand_look ) consume(); */
+ return 1;
+ }
+}
+
+char *ANTLRParser::
+eMsgd(char *err,int d)
+{
+ sprintf(eMsgBuffer, err, d); // dangerous, but I don't care
+ return eMsgBuffer;
+}
+
+char *ANTLRParser::
+eMsg(char *err, char *s)
+{
+ sprintf(eMsgBuffer, err, s);
+ return eMsgBuffer;
+}
+
+char *ANTLRParser::
+eMsg2(char *err,char *s, char *t)
+{
+ sprintf(eMsgBuffer, err, s, t);
+ return eMsgBuffer;
+}
+
+void ANTLRParser::
+panic(char *msg)
+{
+ fprintf(stderr, "ANTLR panic: %s\n", msg);
+ exit(EXIT_FAILURE);
+}