/* ANTLRParser.C
 *
 * SOFTWARE RIGHTS
 *
 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
 * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
 * company may do whatever they wish with source code distributed with
 * PCCTS or the code generated by PCCTS, including the incorporation of
 * PCCTS, or its output, into commerical software.
 * 
 * We encourage users to develop software with PCCTS.  However, we do ask
 * that credit is given to us for developing PCCTS.  By "credit",
 * we mean that if you incorporate our source code into one of your
 * programs (commercial product, research project, or otherwise) that you
 * acknowledge this fact somewhere in the documentation, research report,
 * etc...  If you like PCCTS and have developed a nice tool with the
 * output, please mention that you developed it using PCCTS.  In
 * addition, we ask that this header remain intact in our source code.
 * As long as these guidelines are kept, we expect to continue enhancing
 * this system and expect to make other tools available as they are
 * completed.
 *
 * ANTLR 1.33
 * Terence Parr
 * Parr Research Corporation
 * with Purdue University and AHPCRC, University of Minnesota
 * 1989-1995
 */
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdio.h>

/* I have to put this here due to C++ limitation
 * that you can't have a 'forward' decl for enums.
 * I hate C++!!!!!!!!!!!!!!!
 * Of course, if I could use real templates, this would go away.
 */
enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED };

#define ANTLR_SUPPORT_CODE

#include "config.h"
#include ATOKEN_H

#include ATOKENBUFFER_H
#include APARSER_H

static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;
static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;

                 /* L o o k a h e a d  M a c r o s */

/* maximum of 32 bits/unsigned int and must be 8 bits/byte;
 * we only use 8 bits of it.
 */
SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
	0x00000001, 0x00000002, 0x00000004, 0x00000008,
	0x00000010, 0x00000020, 0x00000040, 0x00000080
};

char ANTLRParser::eMsgBuffer[500] = "";

ANTLRParser::
~ANTLRParser()
{
	delete [] token_type;
}

ANTLRParser::
ANTLRParser(ANTLRTokenBuffer *_inputTokens,
			int k,
			int use_inf_look,
			int dlook,
			int ssize)
{
	LLk = k;
	can_use_inf_look = use_inf_look;
	demand_look = dlook;
	bsetsize = ssize;

	guessing = 0;
	token_tbl = NULL;
	eofToken = (ANTLRTokenType)1;

	// allocate lookahead buffer
	token_type = new ANTLRTokenType[LLk];
	lap = 0;
	labase = 0;
	dirty = 0;

	/* prime lookahead buffer, point to inputTokens */
	this->inputTokens = _inputTokens;
	this->inputTokens->setMinTokens(k);
}

void ANTLRParser::init()
{
   prime_lookahead();
}

int ANTLRParser::
guess(ANTLRParserState *st)
{
	saveState(st);
	guessing = 1;
	return setjmp(guess_start.state);
}

void ANTLRParser::
saveState(ANTLRParserState *buf)
{
	buf->guess_start = guess_start;
	buf->guessing = guessing;
	buf->inf_labase = inf_labase;
	buf->inf_last = inf_last;
	buf->dirty = dirty;
}

void ANTLRParser::
restoreState(ANTLRParserState *buf)
{
	int i;

	guess_start = buf->guess_start;
	guessing = buf->guessing;
	inf_labase = buf->inf_labase;
	inf_last = buf->inf_last;
	dirty = buf->dirty;

	// restore lookahead buffer from k tokens before restored TokenBuffer position
	// if demand_look, then I guess we don't look backwards for these tokens.
	for (i=1; i<=LLk; i++) token_type[i-1] =
		inputTokens->bufferedToken(i-LLk)->getType();
	lap = 0;
	labase = 0;
}

/* Get the next symbol from the input stream; put it into lookahead buffer;
 * fill token_type[] fast reference cache also.  NLA is the next place where
 * a lookahead ANTLRAbstractToken should go.
 */
void ANTLRParser::
consume()
{
    NLA = inputTokens->getToken()->getType();
	dirty--;
	lap = (lap+1)&(LLk-1);
}

_ANTLRTokenPtr ANTLRParser::
LT(int i)
{
#ifdef DEBUG_TOKENBUFFER
	if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk )
	{
		static char buf[2000];
		sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
		panic(buf);
	}
#endif
	return inputTokens->bufferedToken(i-LLk);
}

void
ANTLRParser::
look(int k)
{
	int i, c = k - (LLk-dirty);
	for (i=1; i<=c; i++) consume();
}

/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
 */
void
ANTLRParser::
prime_lookahead()
{
	int i;
	for(i=1;i<=LLk; i++) consume();
	dirty=0;
	lap = 0;
	labase = 0;
}

/* check to see if the current input symbol matches '_t'.
 * During NON demand lookahead mode, dirty will always be 0 and
 * hence the extra code for consuming tokens in _match is never
 * executed; the same routine can be used for both modes.
 */
int ANTLRParser::
_match(ANTLRTokenType _t, ANTLRChar **MissText,
	   ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
	   SetWordType **MissSet)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( LA(1)!=_t ) {
		*MissText=NULL;
		*MissTok= _t; *BadTok = LT(1);
		*MissSet=NULL;
		return 0;
	}
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

/* check to see if the current input symbol matches '_t'.
 * Used during exception handling.
 */
int ANTLRParser::
_match_wsig(ANTLRTokenType _t)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( LA(1)!=_t ) return 0;
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

/* check to see if the current input symbol matches any token in a set.
 * During NON demand lookahead mode, dirty will always be 0 and
 * hence the extra code for consuming tokens in _match is never
 * executed; the same routine can be used for both modes.
 */
int ANTLRParser::
_setmatch(SetWordType *tset, ANTLRChar **MissText,
	   ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
	   SetWordType **MissSet)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( !set_el(LA(1), tset) ) {
		*MissText=NULL;
		*MissTok= (ANTLRTokenType)0; *BadTok=LT(1);
		*MissSet=tset;
		return 0;
	}
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

int ANTLRParser::
_setmatch_wsig(SetWordType *tset)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( !set_el(LA(1), tset) ) return 0;
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

                   /* Exception handling routines */

void ANTLRParser::
consumeUntil(SetWordType *st)
{
	while ( !set_el(LA(1), st) ) { consume(); }
}

void ANTLRParser::
consumeUntilToken(int t)
{
	while ( LA(1)!=t ) { consume(); }
}


                        /* Old error stuff */

void ANTLRParser::
resynch(SetWordType *wd,SetWordType mask)
{
	static int consumed = 1;

	/* if you enter here without having consumed a token from last resynch
	 * force a token consumption.
	 */
	if ( !consumed ) {consume(); consumed=1; return;}

	/* if current token is in resynch set, we've got what we wanted */
	if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;}
	
	/* scan until we find something in the resynch set */
	while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
	consumed=1;
}

/* standard error reporting function that assumes DLG-based scanners;
 * you should redefine in subclass to change it or if you use your
 * own scanner.
 */
void ANTLRParser::
syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset,
	ANTLRTokenType etok, int k)
{
	int line;

	line = LT(1)->getLine();

	fprintf(stderr, "line %d: syntax error at \"%s\"",
					line, LT(1)->getText());
	if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
	if ( k==1 ) fprintf(stderr, " missing");
	else
	{
		fprintf(stderr, "; \"%s\" not", LT(1)->getText());
		if ( set_deg(eset)>1 ) fprintf(stderr, " in");
	}
	if ( set_deg(eset)>0 ) edecode(eset);
	else fprintf(stderr, " %s", token_tbl[etok]);
	if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
	fprintf(stderr, "\n");
}

/* is b an element of set p? */
int ANTLRParser::
set_el(ANTLRTokenType b, SetWordType *p)
{
	return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
}

int ANTLRParser::
set_deg(SetWordType *a)
{
	/* Fast compute degree of a set... the number
	   of elements present in the set.  Assumes
	   that all word bits are used in the set
	*/
	register SetWordType *p = a;
	register SetWordType *endp = &(a[bsetsize]);
	register int degree = 0;

	if ( a == NULL ) return 0;
	while ( p < endp )
	{
		register SetWordType t = *p;
		register SetWordType *b = &(bitmask[0]);
		do {
			if (t & *b) ++degree;
		} while (++b < &(bitmask[sizeof(SetWordType)*8]));
		p++;
	}

	return(degree);
}

void ANTLRParser::
edecode(SetWordType *a)
{
	register SetWordType *p = a;
	register SetWordType *endp = &(p[bsetsize]);
	register unsigned e = 0;

	if ( set_deg(a)>1 ) fprintf(stderr, " {");
	do {
		register SetWordType t = *p;
		register SetWordType *b = &(bitmask[0]);
		do {
			if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]);
			e++;
		} while (++b < &(bitmask[sizeof(SetWordType)*8]));
	} while (++p < endp);
	if ( set_deg(a)>1 ) fprintf(stderr, " }");
}

/* input looks like:
 *      zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
 * where the zzMiss stuff is set here to the token that did not match
 * (and which set wasn't it a member of).
 */
void
ANTLRParser::FAIL(int k, ...)
{
    static char text[1000];	// dangerous, but I don't care right now
    static SetWordType *f[20];
    SetWordType **miss_set;
    ANTLRChar **miss_text;
    _ANTLRTokenPtr *bad_tok;
    ANTLRChar **bad_text;
    unsigned *err_k;
    int i;
    va_list ap;

    va_start(ap, k);

    text[0] = '\0';
	if ( k>20 ) panic("FAIL: overflowed buffer");
    for (i=1; i<=k; i++)    /* collect all lookahead sets */
    {
        f[i-1] = va_arg(ap, SetWordType *);
    }
    for (i=1; i<=k; i++)    /* look for offending token */
    {
        if ( i>1 ) strcat(text, " ");
        strcat(text, LT(i)->getText());
        if ( !set_el(LA(i), f[i-1]) ) break;
    }
    miss_set = va_arg(ap, SetWordType **);
    miss_text = va_arg(ap, ANTLRChar **);
    bad_tok = va_arg(ap, _ANTLRTokenPtr *);
    bad_text = va_arg(ap, ANTLRChar **);
    err_k = va_arg(ap, unsigned *);
    if ( i>k )
    {
        /* bad; lookahead is permutation that cannot be matched,
         * but, the ith token of lookahead is valid at the ith position
         * (The old LL sub 1 (k) versus LL(k) parsing technique)
         */
        *miss_set = NULL;
        *miss_text = LT(1)->getText();
        *bad_tok = LT(1);
        *bad_text = (*bad_tok)->getText();
        *err_k = k;
        return;
    }
/*  fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
    *miss_set = f[i-1];
    *miss_text = text;
    *bad_tok = LT(i);
    *bad_text = (*bad_tok)->getText();
    if ( i==1 ) *err_k = 1;
    else *err_k = k;
}

int ANTLRParser::
_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows)
{
	if ( dirty==LLk ) consume();

	if ( LA(1)!=tokenWanted )
	{
		fprintf(stderr,
				"line %d: syntax error at \"%s\" missing %s\n",
				LT(1)->getLine(),
				(LA(1)==eofToken)?"<eof>":LT(1)->getText(),
				token_tbl[tokenWanted]);
		consumeUntil( whatFollows );
		return 0;
	}
	else {
		dirty++;
		labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
/*		if ( !demand_look ) consume(); */
		return 1;
	}
}


int ANTLRParser::
_setmatch_wdfltsig(SetWordType *tokensWanted,
					ANTLRTokenType tokenTypeOfSet,
					SetWordType *whatFollows)
{
	if ( dirty==LLk ) consume();
	if ( !set_el(LA(1), tokensWanted) )
	{
		fprintf(stderr,
				"line %d: syntax error at \"%s\" missing %s\n",
				LT(1)->getLine(),
				(LA(1)==eofToken)?"<eof>":LT(1)->getText(),
				token_tbl[tokenTypeOfSet]);
		consumeUntil( whatFollows );
		return 0;
	}
	else {
		dirty++;
		labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
/*		if ( !demand_look ) consume(); */
		return 1;
	}
}

char *ANTLRParser::
eMsgd(char *err,int d)
{
	sprintf(eMsgBuffer, err, d);	// dangerous, but I don't care
	return eMsgBuffer;
}

char *ANTLRParser::
eMsg(char *err, char *s)
{
	sprintf(eMsgBuffer, err, s);
	return eMsgBuffer;
}

char *ANTLRParser::
eMsg2(char *err,char *s, char *t)
{
	sprintf(eMsgBuffer, err, s, t);
	return eMsgBuffer;
}

void ANTLRParser::
panic(char *msg)
{
	fprintf(stderr, "ANTLR panic: %s\n", msg);
	exit(EXIT_FAILURE);
}