X-Git-Url: https://pd.if.org/git/?p=pccts;a=blobdiff_plain;f=h%2FAParser.cpp;fp=h%2FAParser.cpp;h=51d85492b9c776d222699bf62ca3e1eae8067cf3;hp=0000000000000000000000000000000000000000;hb=cb15b978c765a661bf3154d865fa3e2401d649f5;hpb=c4e55222e892b8762e11f2425a64611e898ef20e

diff --git a/h/AParser.cpp b/h/AParser.cpp
new file mode 100755
index 0000000..51d8549
--- /dev/null
+++ b/h/AParser.cpp
@@ -0,0 +1,507 @@
+/* ANTLRParser.C
+ *
+ * SOFTWARE RIGHTS
+ *
+ * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
+ * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
+ * company may do whatever they wish with source code distributed with
+ * PCCTS or the code generated by PCCTS, including the incorporation of
+ * PCCTS, or its output, into commerical software.
+ * 
+ * We encourage users to develop software with PCCTS.  However, we do ask
+ * that credit is given to us for developing PCCTS.  By "credit",
+ * we mean that if you incorporate our source code into one of your
+ * programs (commercial product, research project, or otherwise) that you
+ * acknowledge this fact somewhere in the documentation, research report,
+ * etc...  If you like PCCTS and have developed a nice tool with the
+ * output, please mention that you developed it using PCCTS.  In
+ * addition, we ask that this header remain intact in our source code.
+ * As long as these guidelines are kept, we expect to continue enhancing
+ * this system and expect to make other tools available as they are
+ * completed.
+ *
+ * ANTLR 1.33
+ * Terence Parr
+ * Parr Research Corporation
+ * with Purdue University and AHPCRC, University of Minnesota
+ * 1989-1995
+ */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+
+/* I have to put this here due to C++ limitation
+ * that you can't have a 'forward' decl for enums.
+ * I hate C++!!!!!!!!!!!!!!!
+ * Of course, if I could use real templates, this would go away.
+ */
+enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED };
+
+#define ANTLR_SUPPORT_CODE
+
+#include "config.h"
+#include ATOKEN_H
+
+#include ATOKENBUFFER_H
+#include APARSER_H
+
+static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;
+static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;
+
+                 /* L o o k a h e a d  M a c r o s */
+
+/* maximum of 32 bits/unsigned int and must be 8 bits/byte;
+ * we only use 8 bits of it.
+ */
+SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
+	0x00000001, 0x00000002, 0x00000004, 0x00000008,
+	0x00000010, 0x00000020, 0x00000040, 0x00000080
+};
+
+char ANTLRParser::eMsgBuffer[500] = "";
+
+ANTLRParser::
+~ANTLRParser()
+{
+	delete [] token_type;
+}
+
+ANTLRParser::
+ANTLRParser(ANTLRTokenBuffer *_inputTokens,
+			int k,
+			int use_inf_look,
+			int dlook,
+			int ssize)
+{
+	LLk = k;
+	can_use_inf_look = use_inf_look;
+	demand_look = dlook;
+	bsetsize = ssize;
+
+	guessing = 0;
+	token_tbl = NULL;
+	eofToken = (ANTLRTokenType)1;
+
+	// allocate lookahead buffer
+	token_type = new ANTLRTokenType[LLk];
+	lap = 0;
+	labase = 0;
+	dirty = 0;
+
+	/* prime lookahead buffer, point to inputTokens */
+	this->inputTokens = _inputTokens;
+	this->inputTokens->setMinTokens(k);
+}
+
+void ANTLRParser::init()
+{
+   prime_lookahead();
+}
+
+int ANTLRParser::
+guess(ANTLRParserState *st)
+{
+	saveState(st);
+	guessing = 1;
+	return setjmp(guess_start.state);
+}
+
+void ANTLRParser::
+saveState(ANTLRParserState *buf)
+{
+	buf->guess_start = guess_start;
+	buf->guessing = guessing;
+	buf->inf_labase = inf_labase;
+	buf->inf_last = inf_last;
+	buf->dirty = dirty;
+}
+
+void ANTLRParser::
+restoreState(ANTLRParserState *buf)
+{
+	int i;
+
+	guess_start = buf->guess_start;
+	guessing = buf->guessing;
+	inf_labase = buf->inf_labase;
+	inf_last = buf->inf_last;
+	dirty = buf->dirty;
+
+	// restore lookahead buffer from k tokens before restored TokenBuffer position
+	// if demand_look, then I guess we don't look backwards for these tokens.
+	for (i=1; i<=LLk; i++) token_type[i-1] =
+		inputTokens->bufferedToken(i-LLk)->getType();
+	lap = 0;
+	labase = 0;
+}
+
+/* Get the next symbol from the input stream; put it into lookahead buffer;
+ * fill token_type[] fast reference cache also.  NLA is the next place where
+ * a lookahead ANTLRAbstractToken should go.
+ */
+void ANTLRParser::
+consume()
+{
+    NLA = inputTokens->getToken()->getType();
+	dirty--;
+	lap = (lap+1)&(LLk-1);
+}
+
+_ANTLRTokenPtr ANTLRParser::
+LT(int i)
+{
+#ifdef DEBUG_TOKENBUFFER
+	if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk )
+	{
+		static char buf[2000];
+		sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i);
+		panic(buf);
+	}
+#endif
+	return inputTokens->bufferedToken(i-LLk);
+}
+
+void
+ANTLRParser::
+look(int k)
+{
+	int i, c = k - (LLk-dirty);
+	for (i=1; i<=c; i++) consume();
+}
+
+/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
+ */
+void
+ANTLRParser::
+prime_lookahead()
+{
+	int i;
+	for(i=1;i<=LLk; i++) consume();
+	dirty=0;
+	lap = 0;
+	labase = 0;
+}
+
+/* check to see if the current input symbol matches '_t'.
+ * During NON demand lookahead mode, dirty will always be 0 and
+ * hence the extra code for consuming tokens in _match is never
+ * executed; the same routine can be used for both modes.
+ */
+int ANTLRParser::
+_match(ANTLRTokenType _t, ANTLRChar **MissText,
+	   ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
+	   SetWordType **MissSet)
+{
+	if ( dirty==LLk ) {
+		consume();
+	}
+	if ( LA(1)!=_t ) {
+		*MissText=NULL;
+		*MissTok= _t; *BadTok = LT(1);
+		*MissSet=NULL;
+		return 0;
+	}
+	dirty++;
+	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
+	return 1;
+}
+
+/* check to see if the current input symbol matches '_t'.
+ * Used during exception handling.
+ */
+int ANTLRParser::
+_match_wsig(ANTLRTokenType _t)
+{
+	if ( dirty==LLk ) {
+		consume();
+	}
+	if ( LA(1)!=_t ) return 0;
+	dirty++;
+	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
+	return 1;
+}
+
+/* check to see if the current input symbol matches any token in a set.
+ * During NON demand lookahead mode, dirty will always be 0 and
+ * hence the extra code for consuming tokens in _match is never
+ * executed; the same routine can be used for both modes.
+ */
+int ANTLRParser::
+_setmatch(SetWordType *tset, ANTLRChar **MissText,
+	   ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok,
+	   SetWordType **MissSet)
+{
+	if ( dirty==LLk ) {
+		consume();
+	}
+	if ( !set_el(LA(1), tset) ) {
+		*MissText=NULL;
+		*MissTok= (ANTLRTokenType)0; *BadTok=LT(1);
+		*MissSet=tset;
+		return 0;
+	}
+	dirty++;
+	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
+	return 1;
+}
+
+int ANTLRParser::
+_setmatch_wsig(SetWordType *tset)
+{
+	if ( dirty==LLk ) {
+		consume();
+	}
+	if ( !set_el(LA(1), tset) ) return 0;
+	dirty++;
+	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
+	return 1;
+}
+
+                   /* Exception handling routines */
+
+void ANTLRParser::
+consumeUntil(SetWordType *st)
+{
+	while ( !set_el(LA(1), st) ) { consume(); }
+}
+
+void ANTLRParser::
+consumeUntilToken(int t)
+{
+	while ( LA(1)!=t ) { consume(); }
+}
+
+
+                        /* Old error stuff */
+
+void ANTLRParser::
+resynch(SetWordType *wd,SetWordType mask)
+{
+	static int consumed = 1;
+
+	/* if you enter here without having consumed a token from last resynch
+	 * force a token consumption.
+	 */
+	if ( !consumed ) {consume(); consumed=1; return;}
+
+	/* if current token is in resynch set, we've got what we wanted */
+	if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;}
+	
+	/* scan until we find something in the resynch set */
+	while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
+	consumed=1;
+}
+
+/* standard error reporting function that assumes DLG-based scanners;
+ * you should redefine in subclass to change it or if you use your
+ * own scanner.
+ */
+void ANTLRParser::
+syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset,
+	ANTLRTokenType etok, int k)
+{
+	int line;
+
+	line = LT(1)->getLine();
+
+	fprintf(stderr, "line %d: syntax error at \"%s\"",
+					line, LT(1)->getText());
+	if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
+	if ( k==1 ) fprintf(stderr, " missing");
+	else
+	{
+		fprintf(stderr, "; \"%s\" not", LT(1)->getText());
+		if ( set_deg(eset)>1 ) fprintf(stderr, " in");
+	}
+	if ( set_deg(eset)>0 ) edecode(eset);
+	else fprintf(stderr, " %s", token_tbl[etok]);
+	if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
+	fprintf(stderr, "\n");
+}
+
+/* is b an element of set p? */
+int ANTLRParser::
+set_el(ANTLRTokenType b, SetWordType *p)
+{
+	return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
+}
+
+int ANTLRParser::
+set_deg(SetWordType *a)
+{
+	/* Fast compute degree of a set... the number
+	   of elements present in the set.  Assumes
+	   that all word bits are used in the set
+	*/
+	register SetWordType *p = a;
+	register SetWordType *endp = &(a[bsetsize]);
+	register int degree = 0;
+
+	if ( a == NULL ) return 0;
+	while ( p < endp )
+	{
+		register SetWordType t = *p;
+		register SetWordType *b = &(bitmask[0]);
+		do {
+			if (t & *b) ++degree;
+		} while (++b < &(bitmask[sizeof(SetWordType)*8]));
+		p++;
+	}
+
+	return(degree);
+}
+
+void ANTLRParser::
+edecode(SetWordType *a)
+{
+	register SetWordType *p = a;
+	register SetWordType *endp = &(p[bsetsize]);
+	register unsigned e = 0;
+
+	if ( set_deg(a)>1 ) fprintf(stderr, " {");
+	do {
+		register SetWordType t = *p;
+		register SetWordType *b = &(bitmask[0]);
+		do {
+			if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]);
+			e++;
+		} while (++b < &(bitmask[sizeof(SetWordType)*8]));
+	} while (++p < endp);
+	if ( set_deg(a)>1 ) fprintf(stderr, " }");
+}
+
+/* input looks like:
+ *      zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
+ * where the zzMiss stuff is set here to the token that did not match
+ * (and which set wasn't it a member of).
+ */
+void
+ANTLRParser::FAIL(int k, ...)
+{
+    static char text[1000];	// dangerous, but I don't care right now
+    static SetWordType *f[20];
+    SetWordType **miss_set;
+    ANTLRChar **miss_text;
+    _ANTLRTokenPtr *bad_tok;
+    ANTLRChar **bad_text;
+    unsigned *err_k;
+    int i;
+    va_list ap;
+
+    va_start(ap, k);
+
+    text[0] = '\0';
+	if ( k>20 ) panic("FAIL: overflowed buffer");
+    for (i=1; i<=k; i++)    /* collect all lookahead sets */
+    {
+        f[i-1] = va_arg(ap, SetWordType *);
+    }
+    for (i=1; i<=k; i++)    /* look for offending token */
+    {
+        if ( i>1 ) strcat(text, " ");
+        strcat(text, LT(i)->getText());
+        if ( !set_el(LA(i), f[i-1]) ) break;
+    }
+    miss_set = va_arg(ap, SetWordType **);
+    miss_text = va_arg(ap, ANTLRChar **);
+    bad_tok = va_arg(ap, _ANTLRTokenPtr *);
+    bad_text = va_arg(ap, ANTLRChar **);
+    err_k = va_arg(ap, unsigned *);
+    if ( i>k )
+    {
+        /* bad; lookahead is permutation that cannot be matched,
+         * but, the ith token of lookahead is valid at the ith position
+         * (The old LL sub 1 (k) versus LL(k) parsing technique)
+         */
+        *miss_set = NULL;
+        *miss_text = LT(1)->getText();
+        *bad_tok = LT(1);
+        *bad_text = (*bad_tok)->getText();
+        *err_k = k;
+        return;
+    }
+/*  fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
+    *miss_set = f[i-1];
+    *miss_text = text;
+    *bad_tok = LT(i);
+    *bad_text = (*bad_tok)->getText();
+    if ( i==1 ) *err_k = 1;
+    else *err_k = k;
+}
+
+int ANTLRParser::
+_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows)
+{
+	if ( dirty==LLk ) consume();
+
+	if ( LA(1)!=tokenWanted )
+	{
+		fprintf(stderr,
+				"line %d: syntax error at \"%s\" missing %s\n",
+				LT(1)->getLine(),
+				(LA(1)==eofToken)?"<eof>":LT(1)->getText(),
+				token_tbl[tokenWanted]);
+		consumeUntil( whatFollows );
+		return 0;
+	}
+	else {
+		dirty++;
+		labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+/*		if ( !demand_look ) consume(); */
+		return 1;
+	}
+}
+
+
+int ANTLRParser::
+_setmatch_wdfltsig(SetWordType *tokensWanted,
+					ANTLRTokenType tokenTypeOfSet,
+					SetWordType *whatFollows)
+{
+	if ( dirty==LLk ) consume();
+	if ( !set_el(LA(1), tokensWanted) )
+	{
+		fprintf(stderr,
+				"line %d: syntax error at \"%s\" missing %s\n",
+				LT(1)->getLine(),
+				(LA(1)==eofToken)?"<eof>":LT(1)->getText(),
+				token_tbl[tokenTypeOfSet]);
+		consumeUntil( whatFollows );
+		return 0;
+	}
+	else {
+		dirty++;
+		labase = (labase+1)&(LLk-1); // labase maintained even if !demand look
+/*		if ( !demand_look ) consume(); */
+		return 1;
+	}
+}
+
+char *ANTLRParser::
+eMsgd(char *err,int d)
+{
+	sprintf(eMsgBuffer, err, d);	// dangerous, but I don't care
+	return eMsgBuffer;
+}
+
+char *ANTLRParser::
+eMsg(char *err, char *s)
+{
+	sprintf(eMsgBuffer, err, s);
+	return eMsgBuffer;
+}
+
+char *ANTLRParser::
+eMsg2(char *err,char *s, char *t)
+{
+	sprintf(eMsgBuffer, err, s, t);
+	return eMsgBuffer;
+}
+
+void ANTLRParser::
+panic(char *msg)
+{
+	fprintf(stderr, "ANTLR panic: %s\n", msg);
+	exit(EXIT_FAILURE);
+}