From: Terence Parr <> Date: Fri, 6 Oct 1995 00:40:12 +0000 (-0500) Subject: auto commit for import X-Git-Url: https://pd.if.org/git/?p=pccts;a=commitdiff_plain;h=cb15b978c765a661bf3154d865fa3e2401d649f5 auto commit for import --- diff --git a/h/AParser.cpp b/h/AParser.cpp new file mode 100755 index 0000000..51d8549 --- /dev/null +++ b/h/AParser.cpp @@ -0,0 +1,507 @@ +/* ANTLRParser.C + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#include +#include +#include + +/* I have to put this here due to C++ limitation + * that you can't have a 'forward' decl for enums. + * I hate C++!!!!!!!!!!!!!!! + * Of course, if I could use real templates, this would go away. + */ +enum ANTLRTokenType { TER_HATES_CPP, ITS_TOO_COMPLICATED }; + +#define ANTLR_SUPPORT_CODE + +#include "config.h" +#include ATOKEN_H + +#include ATOKENBUFFER_H +#include APARSER_H + +static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000; +static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000; + + /* L o o k a h e a d M a c r o s */ + +/* maximum of 32 bits/unsigned int and must be 8 bits/byte; + * we only use 8 bits of it. + */ +SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, + 0x00000010, 0x00000020, 0x00000040, 0x00000080 +}; + +char ANTLRParser::eMsgBuffer[500] = ""; + +ANTLRParser:: +~ANTLRParser() +{ + delete [] token_type; +} + +ANTLRParser:: +ANTLRParser(ANTLRTokenBuffer *_inputTokens, + int k, + int use_inf_look, + int dlook, + int ssize) +{ + LLk = k; + can_use_inf_look = use_inf_look; + demand_look = dlook; + bsetsize = ssize; + + guessing = 0; + token_tbl = NULL; + eofToken = (ANTLRTokenType)1; + + // allocate lookahead buffer + token_type = new ANTLRTokenType[LLk]; + lap = 0; + labase = 0; + dirty = 0; + + /* prime lookahead buffer, point to inputTokens */ + this->inputTokens = _inputTokens; + this->inputTokens->setMinTokens(k); +} + +void ANTLRParser::init() +{ + prime_lookahead(); +} + +int ANTLRParser:: +guess(ANTLRParserState *st) +{ + saveState(st); + guessing = 1; + return setjmp(guess_start.state); +} + +void ANTLRParser:: +saveState(ANTLRParserState *buf) +{ + buf->guess_start = guess_start; + buf->guessing = guessing; + buf->inf_labase = inf_labase; + buf->inf_last = inf_last; + buf->dirty = dirty; +} + +void ANTLRParser:: +restoreState(ANTLRParserState *buf) +{ + int i; + + guess_start = buf->guess_start; + guessing = buf->guessing; + inf_labase = buf->inf_labase; + inf_last = buf->inf_last; + dirty = buf->dirty; + + // restore lookahead buffer from k tokens before restored TokenBuffer position + // if demand_look, then I guess we don't look backwards for these tokens. + for (i=1; i<=LLk; i++) token_type[i-1] = + inputTokens->bufferedToken(i-LLk)->getType(); + lap = 0; + labase = 0; +} + +/* Get the next symbol from the input stream; put it into lookahead buffer; + * fill token_type[] fast reference cache also. NLA is the next place where + * a lookahead ANTLRAbstractToken should go. + */ +void ANTLRParser:: +consume() +{ + NLA = inputTokens->getToken()->getType(); + dirty--; + lap = (lap+1)&(LLk-1); +} + +_ANTLRTokenPtr ANTLRParser:: +LT(int i) +{ +#ifdef DEBUG_TOKENBUFFER + if ( i >= inputTokens->bufferSize() || inputTokens->minTokens() <= LLk ) + { + static char buf[2000]; + sprintf(buf, "The minimum number of tokens you requested that the\nANTLRTokenBuffer buffer is not enough to satisfy your\nLT(%d) request; increase 'k' argument to constructor for ANTLRTokenBuffer\n", i); + panic(buf); + } +#endif + return inputTokens->bufferedToken(i-LLk); +} + +void +ANTLRParser:: +look(int k) +{ + int i, c = k - (LLk-dirty); + for (i=1; i<=c; i++) consume(); +} + +/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK); + */ +void +ANTLRParser:: +prime_lookahead() +{ + int i; + for(i=1;i<=LLk; i++) consume(); + dirty=0; + lap = 0; + labase = 0; +} + +/* check to see if the current input symbol matches '_t'. + * During NON demand lookahead mode, dirty will always be 0 and + * hence the extra code for consuming tokens in _match is never + * executed; the same routine can be used for both modes. + */ +int ANTLRParser:: +_match(ANTLRTokenType _t, ANTLRChar **MissText, + ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, + SetWordType **MissSet) +{ + if ( dirty==LLk ) { + consume(); + } + if ( LA(1)!=_t ) { + *MissText=NULL; + *MissTok= _t; *BadTok = LT(1); + *MissSet=NULL; + return 0; + } + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + +/* check to see if the current input symbol matches '_t'. + * Used during exception handling. + */ +int ANTLRParser:: +_match_wsig(ANTLRTokenType _t) +{ + if ( dirty==LLk ) { + consume(); + } + if ( LA(1)!=_t ) return 0; + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + +/* check to see if the current input symbol matches any token in a set. + * During NON demand lookahead mode, dirty will always be 0 and + * hence the extra code for consuming tokens in _match is never + * executed; the same routine can be used for both modes. + */ +int ANTLRParser:: +_setmatch(SetWordType *tset, ANTLRChar **MissText, + ANTLRTokenType *MissTok, _ANTLRTokenPtr *BadTok, + SetWordType **MissSet) +{ + if ( dirty==LLk ) { + consume(); + } + if ( !set_el(LA(1), tset) ) { + *MissText=NULL; + *MissTok= (ANTLRTokenType)0; *BadTok=LT(1); + *MissSet=tset; + return 0; + } + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + +int ANTLRParser:: +_setmatch_wsig(SetWordType *tset) +{ + if ( dirty==LLk ) { + consume(); + } + if ( !set_el(LA(1), tset) ) return 0; + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look + return 1; +} + + /* Exception handling routines */ + +void ANTLRParser:: +consumeUntil(SetWordType *st) +{ + while ( !set_el(LA(1), st) ) { consume(); } +} + +void ANTLRParser:: +consumeUntilToken(int t) +{ + while ( LA(1)!=t ) { consume(); } +} + + + /* Old error stuff */ + +void ANTLRParser:: +resynch(SetWordType *wd,SetWordType mask) +{ + static int consumed = 1; + + /* if you enter here without having consumed a token from last resynch + * force a token consumption. + */ + if ( !consumed ) {consume(); consumed=1; return;} + + /* if current token is in resynch set, we've got what we wanted */ + if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;} + + /* scan until we find something in the resynch set */ + while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();} + consumed=1; +} + +/* standard error reporting function that assumes DLG-based scanners; + * you should redefine in subclass to change it or if you use your + * own scanner. + */ +void ANTLRParser:: +syn(_ANTLRTokenPtr tok, ANTLRChar *egroup, SetWordType *eset, + ANTLRTokenType etok, int k) +{ + int line; + + line = LT(1)->getLine(); + + fprintf(stderr, "line %d: syntax error at \"%s\"", + line, LT(1)->getText()); + if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} + if ( k==1 ) fprintf(stderr, " missing"); + else + { + fprintf(stderr, "; \"%s\" not", LT(1)->getText()); + if ( set_deg(eset)>1 ) fprintf(stderr, " in"); + } + if ( set_deg(eset)>0 ) edecode(eset); + else fprintf(stderr, " %s", token_tbl[etok]); + if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup); + fprintf(stderr, "\n"); +} + +/* is b an element of set p? */ +int ANTLRParser:: +set_el(ANTLRTokenType b, SetWordType *p) +{ + return( p[DIVWORD(b)] & bitmask[MODWORD(b)] ); +} + +int ANTLRParser:: +set_deg(SetWordType *a) +{ + /* Fast compute degree of a set... the number + of elements present in the set. Assumes + that all word bits are used in the set + */ + register SetWordType *p = a; + register SetWordType *endp = &(a[bsetsize]); + register int degree = 0; + + if ( a == NULL ) return 0; + while ( p < endp ) + { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if (t & *b) ++degree; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + p++; + } + + return(degree); +} + +void ANTLRParser:: +edecode(SetWordType *a) +{ + register SetWordType *p = a; + register SetWordType *endp = &(p[bsetsize]); + register unsigned e = 0; + + if ( set_deg(a)>1 ) fprintf(stderr, " {"); + do { + register SetWordType t = *p; + register SetWordType *b = &(bitmask[0]); + do { + if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]); + e++; + } while (++b < &(bitmask[sizeof(SetWordType)*8])); + } while (++p < endp); + if ( set_deg(a)>1 ) fprintf(stderr, " }"); +} + +/* input looks like: + * zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk) + * where the zzMiss stuff is set here to the token that did not match + * (and which set wasn't it a member of). + */ +void +ANTLRParser::FAIL(int k, ...) +{ + static char text[1000]; // dangerous, but I don't care right now + static SetWordType *f[20]; + SetWordType **miss_set; + ANTLRChar **miss_text; + _ANTLRTokenPtr *bad_tok; + ANTLRChar **bad_text; + unsigned *err_k; + int i; + va_list ap; + + va_start(ap, k); + + text[0] = '\0'; + if ( k>20 ) panic("FAIL: overflowed buffer"); + for (i=1; i<=k; i++) /* collect all lookahead sets */ + { + f[i-1] = va_arg(ap, SetWordType *); + } + for (i=1; i<=k; i++) /* look for offending token */ + { + if ( i>1 ) strcat(text, " "); + strcat(text, LT(i)->getText()); + if ( !set_el(LA(i), f[i-1]) ) break; + } + miss_set = va_arg(ap, SetWordType **); + miss_text = va_arg(ap, ANTLRChar **); + bad_tok = va_arg(ap, _ANTLRTokenPtr *); + bad_text = va_arg(ap, ANTLRChar **); + err_k = va_arg(ap, unsigned *); + if ( i>k ) + { + /* bad; lookahead is permutation that cannot be matched, + * but, the ith token of lookahead is valid at the ith position + * (The old LL sub 1 (k) versus LL(k) parsing technique) + */ + *miss_set = NULL; + *miss_text = LT(1)->getText(); + *bad_tok = LT(1); + *bad_text = (*bad_tok)->getText(); + *err_k = k; + return; + } +/* fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/ + *miss_set = f[i-1]; + *miss_text = text; + *bad_tok = LT(i); + *bad_text = (*bad_tok)->getText(); + if ( i==1 ) *err_k = 1; + else *err_k = k; +} + +int ANTLRParser:: +_match_wdfltsig(ANTLRTokenType tokenWanted, SetWordType *whatFollows) +{ + if ( dirty==LLk ) consume(); + + if ( LA(1)!=tokenWanted ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + LT(1)->getLine(), + (LA(1)==eofToken)?"":LT(1)->getText(), + token_tbl[tokenWanted]); + consumeUntil( whatFollows ); + return 0; + } + else { + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look +/* if ( !demand_look ) consume(); */ + return 1; + } +} + + +int ANTLRParser:: +_setmatch_wdfltsig(SetWordType *tokensWanted, + ANTLRTokenType tokenTypeOfSet, + SetWordType *whatFollows) +{ + if ( dirty==LLk ) consume(); + if ( !set_el(LA(1), tokensWanted) ) + { + fprintf(stderr, + "line %d: syntax error at \"%s\" missing %s\n", + LT(1)->getLine(), + (LA(1)==eofToken)?"":LT(1)->getText(), + token_tbl[tokenTypeOfSet]); + consumeUntil( whatFollows ); + return 0; + } + else { + dirty++; + labase = (labase+1)&(LLk-1); // labase maintained even if !demand look +/* if ( !demand_look ) consume(); */ + return 1; + } +} + +char *ANTLRParser:: +eMsgd(char *err,int d) +{ + sprintf(eMsgBuffer, err, d); // dangerous, but I don't care + return eMsgBuffer; +} + +char *ANTLRParser:: +eMsg(char *err, char *s) +{ + sprintf(eMsgBuffer, err, s); + return eMsgBuffer; +} + +char *ANTLRParser:: +eMsg2(char *err,char *s, char *t) +{ + sprintf(eMsgBuffer, err, s, t); + return eMsgBuffer; +} + +void ANTLRParser:: +panic(char *msg) +{ + fprintf(stderr, "ANTLR panic: %s\n", msg); + exit(EXIT_FAILURE); +} diff --git a/h/ATokPtr.cpp b/h/ATokPtr.cpp new file mode 100755 index 0000000..d60e446 --- /dev/null +++ b/h/ATokPtr.cpp @@ -0,0 +1,76 @@ +/* ATokPtr.C + * + * ANTLRToken MUST be defined before entry to this file. + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Written by Russell Quong June 30, 1995 + * Adapted by Terence Parr to ANTLR stuff + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#include "ATokPtr.h" + +void ANTLRTokenPtr::ref() const +{ + if (ptr_ != NULL) { + ptr_->ref(); + } +} + +#include + +void ANTLRTokenPtr::deref() +{ + if (ptr_ != NULL) + { + ptr_->deref(); + if ( ptr_->nref()==0 ) + { + delete ptr_; + ptr_ = NULL; + } + } +} + +ANTLRTokenPtr::~ANTLRTokenPtr() +{ + deref(); +} + +void ANTLRTokenPtr::operator = (const ANTLRTokenPtr lhs) +{ + lhs.ref(); // protect against "xp = xp"; ie same underlying object + deref(); + ptr_ = lhs.ptr_; +} + +void ANTLRTokenPtr::operator = (ANTLRAbstractToken *addr) +{ + if (addr != NULL) { + addr->ref(); + } + deref(); + ptr_ = addr; +} diff --git a/h/ATokPtr.h b/h/ATokPtr.h new file mode 100755 index 0000000..ebcb965 --- /dev/null +++ b/h/ATokPtr.h @@ -0,0 +1,69 @@ +/* ATokPtr.h + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Written by Russell Quong June 30, 1995 + * Adapted by Terence Parr to ANTLR stuff + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ATokPtr_h +#define ATokPtr_h + +// pointer to a reference counted object +// robust in that an unused ANTLRTokenPtr can point to NULL. + +class ANTLRAbstractToken; + +class ANTLRTokenPtr { +public: + ANTLRTokenPtr(ANTLRAbstractToken *addr=NULL){ptr_ = addr; ref();} + ANTLRTokenPtr(const ANTLRTokenPtr &lhs) {ptr_ = lhs.ptr_; lhs.ref();} + ~ANTLRTokenPtr(); + + // use ANTLRTokenPtr as a pointer to ANTLRToken + ANTLRAbstractToken *operator-> () { return ptr_; } + void operator = (const ANTLRTokenPtr lhs); + void operator = (ANTLRAbstractToken *addr); + unsigned operator != (const ANTLRTokenPtr &q) + { return this->ptr_ != q.ptr_; } + unsigned operator == (const ANTLRTokenPtr &q) + { return this->ptr_ == q.ptr_; } + void ref() const; + void deref(); + +protected: + ANTLRAbstractToken *ptr_; +}; + +//typedef ANTLRTokenPtr _ANTLRTokenPtr; + +/* + * Since you cannot redefine operator->() to return one of the user's + * token object types, we must down cast. This is a drag. Here's + * a macro that helps. template: "mytoken(a-smart-ptr)->myfield". + */ +#define mytoken(tk) ((ANTLRToken *)(tk.operator->())) + +#endif diff --git a/h/AToken.h b/h/AToken.h new file mode 100755 index 0000000..6c9e333 --- /dev/null +++ b/h/AToken.h @@ -0,0 +1,213 @@ +/* ANTLRToken.h + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ATOKEN_H_GATE +#define ATOKEN_H_GATE + +#include +#include +#include + +#ifndef ANTLRCommonTokenTEXTSIZE +#define ANTLRCommonTokenTEXTSIZE 100 +#endif + +#ifdef DBG_REFCOUNTTOKEN +#include +#endif + +/* must define what a char looks like; can make this a class too */ +typedef char ANTLRChar; + +/* D E F I N E S M A R T P O I N T E R S */ +#include "config.h" +//#include ATOKPTR_H not tested yet, leave out +class ANTLRAbstractToken; +typedef ANTLRAbstractToken *_ANTLRTokenPtr; + +class ANTLRAbstractToken { +public: + virtual ~ANTLRAbstractToken() {;} + virtual ANTLRTokenType getType() = 0; + virtual void setType(ANTLRTokenType t) = 0; + virtual int getLine() = 0; + virtual void setLine(int line) = 0; + virtual ANTLRChar *getText() = 0; + virtual void setText(ANTLRChar *) = 0; + + /* This function will disappear when I can use templates */ + virtual ANTLRAbstractToken *makeToken(ANTLRTokenType tt, + ANTLRChar *text, + int line) = 0; + + /* define to satisfy ANTLRTokenBuffer's need to determine whether or + not a token object can be destroyed. If nref()==0, no one has + a reference, and the object may be destroyed. This function defaults + to 1, hence, if you use deleteTokens() message with a token object + not derived from ANTLRCommonRefCountToken, the parser will compile + but will not delete objects after they leave the token buffer. + */ + virtual unsigned nref() { return 1; } + virtual void ref() {;} + virtual void deref() {;} + + virtual void panic(char *msg) + { + fprintf(stderr, "ANTLRAbstractToken panic: %s\n", msg); + exit(PCCTS_EXIT_FAILURE); + } +}; + +/* This class should be subclassed. It cannot store token type or text */ + +class ANTLRRefCountToken : public ANTLRAbstractToken { +public: +#ifdef DBG_REFCOUNTTOKEN + static int ctor; + static int dtor; +#endif +protected: + unsigned refcnt_; +#ifdef DBG_REFCOUNTTOKEN + char object[200]; +#endif + +public: + ANTLRRefCountToken(ANTLRTokenType t, ANTLRChar *s) +#ifndef DBG_REFCOUNTTOKEN + { + refcnt_ = 0; + } +#else + { + ctor++; + refcnt_ = 0; + if ( t==1 ) sprintf(object,"tok_EOF"); + else sprintf(object,"tok_%s",s); + fprintf(stderr, "ctor %s #%d\n",object,ctor); + } +#endif + ANTLRRefCountToken() +#ifndef DBG_REFCOUNTTOKEN + { refcnt_ = 0; } +#else + { + ctor++; + refcnt_ = 0; + sprintf(object,"tok_blank"); + fprintf(stderr, "ctor %s #%d\n",object,ctor); + } + virtual ~ANTLRRefCountToken() + { + dtor++; + if ( dtor>ctor ) fprintf(stderr, "WARNING: dtor>ctor\n"); + fprintf(stderr, "dtor %s #%d\n", object, dtor); + object[0]='\0'; + } +#endif + + // reference counting stuff needed by ANTLRTokenPtr. + // User should not access these; for C++ language reasons, we had + // to make these public. Yuck. + void ref() { refcnt_++; } + void deref() { refcnt_--; } + unsigned nref() { return refcnt_; } + + virtual ANTLRAbstractToken *makeToken(ANTLRTokenType tt, + ANTLRChar *txt, + int line) + { + panic("call to ANTLRRefCountToken::makeToken()\n"); + return NULL; + } +}; + +class ANTLRCommonNoRefCountToken : public ANTLRAbstractToken { +protected: + ANTLRTokenType _type; + int _line; + ANTLRChar _text[ANTLRCommonTokenTEXTSIZE+1]; + +public: + ANTLRCommonNoRefCountToken(ANTLRTokenType t, ANTLRChar *s) + { setType(t); _line = 0; setText(s); } + ANTLRCommonNoRefCountToken() + { setType((ANTLRTokenType)0); _line = 0; setText(""); } + + ANTLRTokenType getType() { return _type; } + void setType(ANTLRTokenType t) { _type = t; } + virtual int getLine() { return _line; } + void setLine(int line) { _line = line; } + ANTLRChar *getText() { return _text; } + void setText(ANTLRChar *s) + { strncpy((char *)_text, (char *)s, ANTLRCommonTokenTEXTSIZE); } + virtual ANTLRAbstractToken *makeToken(ANTLRTokenType tt, + ANTLRChar *txt, + int line) + { + ANTLRAbstractToken *t = new ANTLRCommonNoRefCountToken; + t->setType(tt); t->setText(txt); t->setLine(line); + return t; + } +}; + +class ANTLRCommonToken : public ANTLRRefCountToken { +protected: + ANTLRTokenType _type; + int _line; + ANTLRChar _text[ANTLRCommonTokenTEXTSIZE+1]; + +public: + ANTLRCommonToken(ANTLRTokenType t, ANTLRChar *s) : ANTLRRefCountToken(t,s) + { setType(t); _line = 0; setText(s); } + ANTLRCommonToken() + { setType((ANTLRTokenType)0); _line = 0; setText(""); } + virtual ~ANTLRCommonToken() {;} + + ANTLRTokenType getType() { return _type; } + void setType(ANTLRTokenType t) { _type = t; } + virtual int getLine() { return _line; } + void setLine(int line) { _line = line; } + ANTLRChar *getText() { return _text; } + void setText(ANTLRChar *s) + { strncpy((char *)_text, (char *)s, ANTLRCommonTokenTEXTSIZE); } + virtual ANTLRAbstractToken *makeToken(ANTLRTokenType tt, + ANTLRChar *txt, + int line) + { + ANTLRAbstractToken *t = new ANTLRCommonToken(tt,txt); + t->setLine(line); + return t; + } +}; + +// used for backward compatibility +typedef ANTLRCommonToken ANTLRCommonBacktrackingToken; + +#endif diff --git a/h/ATokenBuffer.cpp b/h/ATokenBuffer.cpp new file mode 100755 index 0000000..fb4d4dd --- /dev/null +++ b/h/ATokenBuffer.cpp @@ -0,0 +1,325 @@ +/* ANTLRTokenBuffer.C + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +typedef int ANTLRTokenType; // fool AToken.h into compiling + +#define ANTLR_SUPPORT_CODE + +#include "config.h" +#include ATOKENBUFFER_H +typedef ANTLRAbstractToken *_ANTLRTokenPtr; + +#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) +static unsigned char test[1000]; +#endif + +#ifdef DBG_REFCOUNTTOKEN +int ANTLRCommonToken::ctor = 0; +int ANTLRCommonToken::dtor = 0; +#endif + +ANTLRTokenBuffer:: +ANTLRTokenBuffer(ANTLRTokenStream *_input, int _k, int _cs) +{ + this->input = _input; + this->k = _k; + buffer_size = chunk_size = _cs; + buffer = (_ANTLRTokenPtr *) + calloc(chunk_size+1,sizeof(_ANTLRTokenPtr )); + if ( buffer == NULL ) { + panic("cannot alloc token buffer"); + } + buffer++; // leave the first elem empty so tp-1 is valid ptr + + tp = &buffer[0]; + last = tp-1; + next = &buffer[0]; + num_markers = 0; + end_of_buffer = &buffer[buffer_size-1]; + threshold = &buffer[(int)(buffer_size*(1.0/2.0))]; + _deleteTokens = 1; // assume we delete tokens +} + +static void f() {;} +ANTLRTokenBuffer:: +~ANTLRTokenBuffer() +{ + f(); + // Delete all remaining tokens (from 0..last inclusive) + if ( _deleteTokens ) + { + _ANTLRTokenPtr *z; + for (z=buffer; z<=last; z++) + { + (*z)->deref(); +// z->deref(); +#ifdef DBG_REFCOUNTTOKEN + fprintf(stderr, "##########dtor: deleting token '%s' (ref %d)\n", + ((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); +#endif + if ( (*z)->nref()==0 ) + { + delete (*z); + } + } + } + + if ( buffer!=NULL ) free((char *)(buffer-1)); +} + +#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) +#include +#endif + +_ANTLRTokenPtr ANTLRTokenBuffer:: +getToken() +{ + if ( tp <= last ) // is there any buffered lookahead still to be read? + { + return *tp++; // read buffered lookahead + } + // out of buffered lookahead, get some more "real" + // input from getANTLRToken() + if ( num_markers==0 ) + { + if( next > threshold ) + { +#ifdef DBG_TBUF +fprintf(stderr,"getToken: next > threshold (high water is %d)\n", threshold-buffer); +#endif + makeRoom(); + } + } + else { + if ( next > end_of_buffer ) + { +#ifdef DBG_TBUF +fprintf(stderr,"getToken: next > end_of_buffer (size is %d)\n", buffer_size); +#endif + extendBuffer(); + } + } + *next = getANTLRToken(); + (*next)->ref(); // say we have a copy of this pointer in buffer + last = next; + next++; + tp = last; + return *tp++; +} + +void ANTLRTokenBuffer:: +rewind(int pos) +{ +#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) + fprintf(stderr, "rewind(%d)[nm=%d,from=%d,%d.n=%d]\n", pos, num_markers, tp-buffer,pos,test[pos]); + test[pos]--; +#endif + tp = &buffer[pos]; + num_markers--; +} + +/* + * This function is used to specify that the token pointers read + * by the ANTLRTokenBuffer should be buffered up (to be reused later). + */ +int ANTLRTokenBuffer:: +mark() +{ +#if defined(DBG_TBUF)||defined(DBG_TBUF_MARK_REW) + test[tp-buffer]++; + fprintf(stderr,"mark(%d)[nm=%d,%d.n=%d]\n",tp-buffer,num_markers+1,tp-buffer,test[tp-buffer]); +#endif + num_markers++; + return tp - buffer; +} + +/* + * returns the token pointer n positions ahead. + * This implies that bufferedToken(1) gets the NEXT symbol of lookahead. + * This is used in conjunction with the ANTLRParser lookahead buffer. + * + * No markers are set or anything. A bunch of input is buffered--that's all. + * The tp pointer is left alone as the lookahead has not been advanced + * with getToken(). The next call to getToken() will find a token + * in the buffer and won't have to call getANTLRToken(). + * + * If this is called before a consume() is done, how_many_more_i_need is + * set to 'n'. + */ +_ANTLRTokenPtr ANTLRTokenBuffer:: +bufferedToken(int n) +{ +// int how_many_more_i_need = (last-tp < 0) ? n : n-(last-tp)-1; + int how_many_more_i_need = (tp > last) ? n : n-(last-tp)-1; + // Make sure that at least n tokens are available in the buffer +#ifdef DBG_TBUF + fprintf(stderr, "bufferedToken(%d)\n", n); +#endif + for (int i=1; i<=how_many_more_i_need; i++) + { + if ( next > end_of_buffer ) // buffer overflow? + { + extendBuffer(); + } + *next = getANTLRToken(); + (*next)->ref(); // say we have a copy of this pointer in buffer + last = next; + next++; + } + return tp[n - 1]; +} + +/* If no markers are set, the none of the input needs to be saved (except + * for the lookahead Token pointers). We save only k-1 token pointers as + * we are guaranteed to do a getANTLRToken() right after this because otherwise + * we wouldn't have needed to extend the buffer. + * + * If there are markers in the buffer, we need to save things and so + * extendBuffer() is called. + */ +void ANTLRTokenBuffer:: +makeRoom() +{ +#ifdef DBG_TBUF + fprintf(stderr, "in makeRoom.................\n"); + fprintf(stderr, "num_markers==%d\n", num_markers); +#endif +/* + if ( num_markers == 0 ) + { +*/ +#ifdef DBG_TBUF + fprintf(stderr, "moving lookahead and resetting next\n"); + + _ANTLRTokenPtr *r; + fprintf(stderr, "tbuf = ["); + for (r=buffer; r<=last; r++) + { + if ( *r==NULL ) fprintf(stderr, " xxx"); + else fprintf(stderr, " '%s'", ((ANTLRCommonToken *)*r)->getText()); + } + fprintf(stderr, " ]\n"); + + fprintf(stderr, + "before: tp=%d, last=%d, next=%d, threshold=%d\n",tp-buffer,last-buffer,next-buffer,threshold-buffer); +#endif + + // Delete all tokens from 0..last-(k-1) inclusive + if ( _deleteTokens ) + { + _ANTLRTokenPtr *z; + for (z=buffer; z<=last-(k-1); z++) + { + (*z)->deref(); +// z->deref(); +#ifdef DBG_REFCOUNTTOKEN + fprintf(stderr, "##########makeRoom: deleting token '%s' (ref %d)\n", + ((ANTLRCommonToken *)*z)->getText(), (*z)->nref()); +#endif + if ( (*z)->nref()==0 ) + { + delete (*z); + } + } + } + + // reset the buffer to initial conditions, but move k-1 symbols + // to the beginning of buffer and put new input symbol at k + _ANTLRTokenPtr *p = buffer, *q = last-(k-1)+1; +// ANTLRAbstractToken **p = buffer, **q = end_of_buffer-(k-1)+1; +#ifdef DBG_TBUF + fprintf(stderr, "lookahead buffer = ["); +#endif + for (int i=1; i<=(k-1); i++) + { + *p++ = *q++; +#ifdef DBG_TBUF + fprintf(stderr, + " '%s'", ((ANTLRCommonToken *)buffer[i-1])->getText()); +#endif + } +#ifdef DBG_TBUF + fprintf(stderr, " ]\n"); +#endif + next = &buffer[k-1]; + tp = &buffer[k-1]; // tp points to what will be filled in next + last = tp-1; +#ifdef DBG_TBUF + fprintf(stderr, + "after: tp=%d, last=%d, next=%d\n", + tp-buffer, last-buffer, next-buffer); +#endif +/* + } + else { + extendBuffer(); + } +*/ +} + +/* This function extends 'buffer' by chunk_size and returns with all + * pointers at the same relative positions in the buffer (the buffer base + * address could have changed in realloc()) except that 'next' comes + * back set to where the next token should be stored. All other pointers + * are untouched. + */ +void +ANTLRTokenBuffer:: +extendBuffer() +{ + int save_last = last-buffer, save_tp = tp-buffer, save_next = next-buffer; +#ifdef DBG_TBUF + fprintf(stderr, "extending physical buffer\n"); +#endif + buffer_size += chunk_size; + buffer = (_ANTLRTokenPtr *) + realloc((char *)(buffer-1), + (buffer_size+1)*sizeof(_ANTLRTokenPtr )); + if ( buffer == NULL ) { + panic("cannot alloc token buffer"); + } + buffer++; // leave the first elem empty so tp-1 is valid ptr + + tp = buffer + save_tp; // put the pointers back to same relative position + last = buffer + save_last; + next = buffer + save_next; + end_of_buffer = &buffer[buffer_size-1]; + threshold = &buffer[(int)(buffer_size*(1.0/2.0))]; + +/* + // zero out new token ptrs so we'll know if something to delete in buffer + ANTLRAbstractToken **p = end_of_buffer-chunk_size+1; + for (; p<=end_of_buffer; p++) *p = NULL; +*/ +} + +/* to avoid having to link in another file just for the smart token ptr + * stuff, we include it here. Ugh. + */ +#include ATOKPTR_C diff --git a/h/ATokenBuffer.h b/h/ATokenBuffer.h new file mode 100755 index 0000000..6fdd632 --- /dev/null +++ b/h/ATokenBuffer.h @@ -0,0 +1,93 @@ +/* ANTLRTokenBuffer.h + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef ATOKENBUFFER_H_GATE +#define ATOKENBUFFER_H_GATE + +#include "config.h" +#include ATOKEN_H +#include ATOKENSTREAM_H +#include + +/* + * The parser is "attached" to an ANTLRTokenBuffer via interface + * functions: getToken() and bufferedToken(). The object that actually + * consumes characters and constructs tokens is connected to the + * ANTLRTokenBuffer via interface function ANTLRTokenStream::getToken(); + * where ANTLRTokenStream is really just a behavior (class with no data). + * C++ does not have this abstraction and hence we simply have come up + * with a fancy name for "void *". See the note in ANTLRTokenStream.h on + * the "behavior" of ANTLRTokenStream. + */ + +class ANTLRTokenBuffer { +protected: + ANTLRTokenStream *input; // where do I get tokens + int buffer_size; + int chunk_size; + int num_markers; + int k; // Need at least this many tokens in buffer + _ANTLRTokenPtr *buffer; // buffer used for arbitrary lookahead + _ANTLRTokenPtr *tp; // pts into buffer; current token ptr + _ANTLRTokenPtr *last; // pts to last valid token in buffer + _ANTLRTokenPtr *next; // place to put token from getANTLRToken() + _ANTLRTokenPtr *end_of_buffer; + /* when you try to write a token past this and there are no markers + set, then move k-1 tokens back to the beginning of the buffer. + We want to stay away from the end of the buffer because we have + to extend it if a marker is set and we reach the end (we cannot + move tokens to the beginning of the buffer in this case). + */ + _ANTLRTokenPtr *threshold; + unsigned char _deleteTokens; + + // This function is filled in by the subclass; it initiates fetch of input + virtual _ANTLRTokenPtr getANTLRToken() { return input->getToken(); } + void makeRoom(); + void extendBuffer(); + +public: + ANTLRTokenBuffer(ANTLRTokenStream *in, int k=1, int chksz=50); + virtual ~ANTLRTokenBuffer(); + virtual _ANTLRTokenPtr getToken(); + virtual void rewind(int pos); + virtual int mark(); + virtual _ANTLRTokenPtr bufferedToken(int i); + + void noGarbageCollectTokens() { _deleteTokens=0; } + void garbageCollectTokens() { _deleteTokens=1; } + + virtual bufferSize() { return buffer_size; } + virtual int minTokens() { return k; } + virtual void setMinTokens(int k_new) { k = k_new; } + + virtual void panic(char *msg) { exit(PCCTS_EXIT_FAILURE); } +}; + +#endif diff --git a/h/ATokenStream.h b/h/ATokenStream.h new file mode 100755 index 0000000..ca3337b --- /dev/null +++ b/h/ATokenStream.h @@ -0,0 +1,43 @@ +/* ANTLRTokenStream.h + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#ifndef ATOKENSTREAM_H_GATE +#define ATOKENSTREAM_H_GATE + +/* This is really a behavior or protocol; it merely indicates the behavior + * required of the input and output of an ANTLRTokenBuffer. You could + * subclass it, but you can also just pass any old pointer to ANTLRTokenBuffer + * with a type cast (in which case, your getANTLRToken() would have to + * explicitly cast the input pointer to your REAL type (typically your lexer)). + */ +class ANTLRTokenStream { +public: + virtual _ANTLRTokenPtr getToken() = 0; +}; + +#endif diff --git a/h/DLexer.cpp b/h/DLexer.cpp new file mode 100755 index 0000000..d12d077 --- /dev/null +++ b/h/DLexer.cpp @@ -0,0 +1,153 @@ +/* DLexer.c + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#define ZZINC {if ( track_columns ) (++_endcol);} + +#define ZZGETC {ch = input->nextChar(); cl = ZZSHIFT(ch);} + +#define ZZNEWSTATE (newstate = dfa[state][cl]) + +#ifndef ZZCOPY +#define ZZCOPY \ + /* Truncate matching buffer to size (not an error) */ \ + if (nextpos < lastpos){ \ + *(nextpos++) = ch; \ + }else{ \ + bufovf = 1; \ + } +#endif + +void DLGLexer:: +mode( int m ) +{ + /* points to base of dfa table */ + if (m*actions[accepts[state]])(); + switch (add_erase) { + case 1: goto skip; + case 2: goto more; + } + return tk; +} + +void DLGLexer:: +advance() +{ + if ( input==NULL ) err_in(); + ZZGETC; charfull = 1; ZZINC; +} diff --git a/h/DLexerBase.cpp b/h/DLexerBase.cpp new file mode 100755 index 0000000..b505481 --- /dev/null +++ b/h/DLexerBase.cpp @@ -0,0 +1,213 @@ +/* DLGLexerBase.c + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#include + +/* I have to put this here due to C++ limitation + * that you can't have a 'forward' decl for enums. + * I hate C++!!!!!!!!!!!!!!! + */ +enum ANTLRTokenType { TER_HATES_CPP, ITS_UTTER_GARBAGE, WITH_SOME_GOOD_IDEAS }; + +#define ANTLR_SUPPORT_CODE + +#include "config.h" +#include DLEXERBASE_H + +DLGLexerBase:: +DLGLexerBase(DLGInputStream *in, + unsigned bufsize, + int _interactive, + int _track_columns) +{ + this->_bufsize = bufsize; + this->_lextext = new DLGChar[_bufsize]; + if ( this->_lextext==NULL ) { + panic("text buffer is NULL"); + } + this->_begexpr = this->_endexpr = NULL; + this->ch = this->bufovf = 0; + this->nextpos = NULL; + this->cl = 0; + this->add_erase = 0; + this->input = in; + this->_begcol = 0; + this->_endcol = 0; + this->_line = 1; + this->charfull = 0; + this->automaton = 0; + this->token_to_fill = NULL; + this->interactive = _interactive; + this->track_columns = _track_columns; +} + +void DLGLexerBase:: +setInputStream( DLGInputStream *in ) +{ + this->input = in; + _line = 1; + charfull = 0; +} + +/* saves dlg state, but not what feeds dlg (such as file position) */ +void DLGLexerBase:: +saveState(DLGState *state) +{ + state->input = input; + state->interactive = interactive; + state->track_columns = track_columns; + state->auto_num = automaton; + state->add_erase = add_erase; + state->lookc = ch; + state->char_full = charfull; + state->begcol = _begcol; + state->endcol = _endcol; + state->line = _line; + state->lextext = _lextext; + state->begexpr = _begexpr; + state->endexpr = _endexpr; + state->bufsize = _bufsize; + state->bufovf = bufovf; + state->nextpos = nextpos; + state->class_num = cl; +} + +void DLGLexerBase:: +restoreState(DLGState *state) +{ + input = state->input; + interactive = state->interactive; + track_columns = state->track_columns; + automaton = state->auto_num; + add_erase = state->add_erase; + ch = state->lookc; + charfull = state->char_full; + _begcol = state->begcol; + _endcol = state->endcol; + _line = state->line; + _lextext = state->lextext; + _begexpr = state->begexpr; + _endexpr = state->endexpr; + _bufsize = state->bufsize; + bufovf = state->bufovf; + nextpos = state->nextpos; + cl = state->class_num; +} + +/* erase what is currently in the buffer, and get a new reg. expr */ +void DLGLexerBase:: +skip() +{ + add_erase = 1; +} + +/* don't erase what is in the lextext buffer, add on to it */ +void DLGLexerBase:: +more() +{ + add_erase = 2; +} + +/* substitute c for the reg. expr last matched and is in the buffer */ +void DLGLexerBase:: +replchar(DLGChar c) +{ + /* can't allow overwriting null at end of string */ + if (_begexpr < &_lextext[_bufsize-1]){ + *_begexpr = c; + *(_begexpr+1) = '\0'; + } + _endexpr = _begexpr; + nextpos = _begexpr + 1; +} + +/* replace the string s for the reg. expr last matched and in the buffer */ +void DLGLexerBase:: +replstr(register DLGChar *s) +{ + register DLGChar *l= &_lextext[_bufsize -1]; + + nextpos = _begexpr; + if (s){ + while ((nextpos <= l) && (*(nextpos++) = *(s++))){ + /* empty */ + } + /* correct for NULL at end of string */ + nextpos--; + } + if ((nextpos <= l) && (*(--s) == 0)){ + bufovf = 0; + }else{ + bufovf = 1; + } + *(nextpos) = '\0'; + _endexpr = nextpos - 1; +} + +void DLGLexerBase:: +errstd(char *s) +{ + fprintf(stderr, + "%s near line %d (text was '%s')\n", + ((s == NULL) ? "Lexical error" : s), + _line,_lextext); +} + +int DLGLexerBase:: +err_in() +{ + fprintf(stderr,"No input stream, function, or string\n"); + /* return eof to get out gracefully */ + return EOF; +} + +ANTLRTokenType DLGLexerBase:: +erraction() +{ + errstd("invalid token"); + advance(); + skip(); + return (ANTLRTokenType) 0; // bogus, but satisfies compiler +} + +_ANTLRTokenPtr DLGLexerBase:: +getToken() +{ + if ( token_to_fill==NULL ) panic("NULL token_to_fill"); + ANTLRTokenType tt = nextTokenType(); + _ANTLRTokenPtr tk = token_to_fill->makeToken(tt, _lextext,_line); + return tk; +} + +void DLGLexerBase:: +panic(char *msg) +{ + fprintf(stderr, "DLG panic: %s\n", msg); + exit(EXIT_FAILURE); +} diff --git a/h/DLexerBase.h b/h/DLexerBase.h new file mode 100755 index 0000000..48c50d4 --- /dev/null +++ b/h/DLexerBase.h @@ -0,0 +1,179 @@ +/* DLGLexerBase.h + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#ifndef DLGX_H +#define DLGX_H + +#include +#include "config.h" +#include ATOKEN_H +#include ATOKENSTREAM_H + +/* must define what a char looks like; can make this a class too */ +typedef char DLGChar; + +/* Can have it as a class too: (ack this looks weird; is it right?) +class DLGChar { +private: + int c; +public: + DLGChar(int ch) { c = ch; } + int atom() { return c; } +}; +*/ + +/* user must subclass this */ +class DLGInputStream { +public: + virtual int nextChar() = 0; +}; + +/* Predefined char stream: Input from FILE */ +class DLGFileInput : public DLGInputStream { +private: + int found_eof; + FILE *input; +public: + DLGFileInput(FILE *f) { input = f; found_eof = 0; } + int nextChar() { + int c; + if ( found_eof ) return EOF; + else { + c=getc(input); + if ( c==EOF ) found_eof = 1; + return c; + } + } +}; + +/* Predefined char stream: Input from string */ +class DLGStringInput : public DLGInputStream { +private: + DLGChar *input; + DLGChar *p; +public: + DLGStringInput(DLGChar *s) { input = s; p = &input[0];} + int nextChar() + { + if (*p) return (int) *p++; + else return EOF; + } +}; + +class DLGState { +public: + DLGInputStream *input; + int interactive; + int track_columns; + int auto_num; + int add_erase; + int lookc; + int char_full; + int begcol, endcol; + int line; + DLGChar *lextext, *begexpr, *endexpr; + int bufsize; + int bufovf; + DLGChar *nextpos; + int class_num; +}; + +/* user must subclass this */ +class DLGLexerBase : public ANTLRTokenStream { +public: + virtual ANTLRTokenType erraction(); + +protected: + DLGInputStream *input; + int interactive; + int track_columns; + DLGChar *_lextext; /* text of most recently matched token */ + DLGChar *_begexpr; /* beginning of last reg expr recogn. */ + DLGChar *_endexpr; /* beginning of last reg expr recogn. */ + int _bufsize; /* number of characters in lextext */ + int _begcol; /* column that first character of token is in*/ + int _endcol; /* column that last character of token is in */ + int _line; /* line current token is on */ + int ch; /* character to determine next state */ + int bufovf; /* indicates that buffer too small for text */ + int charfull; + DLGChar *nextpos; /* points to next available position in lextext*/ + int cl; + int automaton; + int add_erase; + DLGChar ebuf[70]; + _ANTLRTokenPtr token_to_fill; + + virtual _ANTLRTokenPtr getToken(); + +public: + virtual void advance(void) = 0; + void skip(void); /* erase lextext, look for antoher token */ + void more(void); /* keep lextext, look for another token */ + void mode(int k); /* switch to automaton 'k' */ + void saveState(DLGState *); + void restoreState(DLGState *); + virtual ANTLRTokenType nextTokenType(void)=0;/* get next token */ + void replchar(DLGChar c); /* replace last recognized reg. expr. with + a character */ + void replstr(DLGChar *s); /* replace last recognized reg. expr. with + a string */ + int err_in(); + void errstd(char *); + + int line() { return _line; } + virtual void newline() { _line++; } + DLGChar *lextext() { return _lextext; } + + int begcol() { return _begcol; } + int endcol() { return _endcol; } + void set_begcol(int a) { _begcol=a; } + void set_endcol(int a) { _endcol=a; } + DLGChar *begexpr() { return _begexpr; } + DLGChar *endexpr() { return _endexpr; } + int bufsize() { return _bufsize; } + + void setToken(ANTLRAbstractToken *t) { token_to_fill = t; } + + void setInputStream(DLGInputStream *); + DLGLexerBase(DLGInputStream *in, + unsigned bufsize=2000, + int interactive=0, + int track_columns=0); + virtual ~DLGLexerBase() { delete [] _lextext; } + void panic(char *msg); + + void trackColumns() { + track_columns = 1; + this->_begcol = 0; + this->_endcol = 0; + } +}; + +#endif diff --git a/h/PBlackBox.h b/h/PBlackBox.h new file mode 100755 index 0000000..0706df0 --- /dev/null +++ b/h/PBlackBox.h @@ -0,0 +1,80 @@ +#ifndef PBLACKBOX_H +#define PBLACKBOX_H + +/* + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#include + +template +class ParserBlackBox { +protected: + DLGFileInput *in; + Lexer *scan; + _ANTLRTokenPtr tok; + ANTLRTokenBuffer *pipe; + Parser *_parser; + FILE *file; +public: + + ParserBlackBox(FILE *f) + { + file = f; + in = new DLGFileInput(f); + scan = new Lexer(in); + pipe = new ANTLRTokenBuffer(scan); + tok = new Token; + scan->setToken(tok); + _parser = new Parser(pipe); + _parser->init(); + } + ParserBlackBox(char *fname) + { + FILE *f = fopen(fname, "r"); + if ( f==NULL ) {cerr << "cannot open " << fname << "\n"; return;} + else { + file = f; + in = new DLGFileInput(f); + scan = new Lexer(in); + pipe = new ANTLRTokenBuffer(scan); + tok = new Token; + scan->setToken(tok); + _parser = new Parser(pipe); + _parser->init(); + } + } + ~ParserBlackBox() + { + delete in; delete scan; delete pipe; delete _parser; delete tok; + fclose(file); + } + + Parser *parser() { return _parser; } +}; + +#endif diff --git a/h/PCCTSAST.cpp b/h/PCCTSAST.cpp new file mode 100755 index 0000000..12fd55e --- /dev/null +++ b/h/PCCTSAST.cpp @@ -0,0 +1,641 @@ +/* + * PCCTSAST.C + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to SORCERER -- SORCERER is in the public + * domain. An individual or company may do whatever they wish with + * source code distributed with SORCERER or the code generated by + * SORCERER, including the incorporation of SORCERER, or its output, into + * commerical software. + * + * We encourage users to develop software with SORCERER. However, we do + * ask that credit is given to us for developing SORCERER. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like SORCERER and have developed a nice tool with the + * output, please mention that you developed it using SORCERER. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * SORCERER 1.00B14 and ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * AHPCRC, University of Minnesota + * 1992-1995 + */ + +#define ANTLR_SUPPORT_CODE + +#include "PCCTSAST.h" +#include +#include +//#include "SList.h" + + /* String Scanning/Parsing Stuff */ + +char *PCCTS_AST::scan_token_tbl[] = { + "invalid", /* 0 */ + "LPAREN", /* 1 */ + "RPAREN", /* 2 */ + "PERCENT", /* 3 */ + "INT", /* 4 */ + "COLON", /* 5 */ + "POUND", /* 6 */ + "PERIOD", /* 7 */ +}; + +void PCCTS_AST:: +addChild(PCCTS_AST *t) +{ + if ( t==NULL ) return; + PCCTS_AST *s = down(); + if ( s!=NULL ) + { + while ( s->right()!=NULL ) s = s->right(); + s->setRight(t); + } + else + this->setDown(t); +} + +void PCCTS_AST:: +lisp(FILE *f) +{ + if ( down() != NULL ) fprintf(f," ("); + lisp_action(f); + if ( down()!=NULL ) down()->lisp(f); + if ( down() != NULL ) fprintf(f," )"); + if ( right()!=NULL ) right()->lisp(f); +} + +/* build a tree (root child1 child2 ... NULL) + * If root is NULL, simply make the children siblings and return ptr + * to 1st sibling (child1). If root is not single node, return NULL. + * + * Siblings that are actually sibling lists themselves are handled + * correctly. For example #( NULL, #( NULL, A, B, C), D) results + * in the tree ( NULL A B C D ). + * + * Requires at least two parameters with the last one being NULL. If + * both are NULL, return NULL. + * + * The down() and right() down/right pointers are used to make the tree. + */ +PCCTS_AST *PCCTS_AST:: +make(PCCTS_AST *rt, ...) +{ + va_list ap; + register PCCTS_AST *child, *sibling=NULL, *tail, *w; + PCCTS_AST *root; + + va_start(ap, rt); + root = rt; + + if ( root != NULL ) + if ( root->down() != NULL ) return NULL; + child = va_arg(ap, PCCTS_AST *); + while ( child != NULL ) + { + /* find end of child */ + for (w=child; w->right()!=NULL; w=w->right()) {;} + if ( sibling == NULL ) {sibling = child; tail = w;} + else {tail->setRight(child); tail = w;} + child = va_arg(ap, PCCTS_AST *); + } + if ( root==NULL ) root = sibling; + else root->setDown(sibling); + va_end(ap); + return root; +} + +/* The following push and pop routines are only used by ast_find_all() */ + +void PCCTS_AST:: +_push(PCCTS_AST **st, int *sp, PCCTS_AST *e) +{ + (*sp)--; + require((*sp)>=0, "stack overflow"); + st[(*sp)] = e; +} + +PCCTS_AST *PCCTS_AST:: +_pop(PCCTS_AST **st, int *sp) +{ + PCCTS_AST *e = st[*sp]; + (*sp)++; + require((*sp)<=MaxTreeStackDepth, "stack underflow"); + return e; +} + +/* Find all occurrences of u in t. + * 'cursor' must be initialized to 't'. It eventually + * returns NULL when no more occurrences of 'u' are found. + */ +PCCTS_AST *PCCTS_AST:: +ast_find_all(PCCTS_AST *u, PCCTS_AST **cursor) +{ + PCCTS_AST *sib; + static PCCTS_AST *template_stack[MaxTreeStackDepth]; + static int tsp = MaxTreeStackDepth; + static int nesting = 0; + + if ( *cursor == NULL ) return NULL; + if ( *cursor!=this ) sib = *cursor; + else { + /* else, first time--start at top of template 't' */ + tsp = MaxTreeStackDepth; + sib = this; + /* bottom of stack is always a NULL--"cookie" indicates "done" */ + _push(template_stack, &tsp, NULL); + } + +keep_looking: + if ( sib==NULL ) /* hit end of sibling list */ + { + sib = _pop(template_stack, &tsp); + if ( sib == NULL ) { *cursor = NULL; return NULL; } + } + + if ( sib->type() != u->type() ) + { + /* look for another match */ + if ( sib->down()!=NULL ) + { + if ( sib->right()!=NULL ) _push(template_stack, &tsp, sib->right()); + sib=sib->down(); + goto keep_looking; + } + /* nothing below to try, try next sibling */ + sib=sib->right(); + goto keep_looking; + } + + /* found a matching root node, try to match what's below */ + if ( match_partial(sib, u) ) + { + /* record sibling cursor so we can pick up next from there */ + if ( sib->down()!=NULL ) + { + if ( sib->right()!=NULL ) _push(template_stack, &tsp, sib->right()); + *cursor = sib->down(); + } + else if ( sib->right()!=NULL ) *cursor = sib->right(); + else *cursor = _pop(template_stack, &tsp); + return sib; + } + + /* no match, keep searching */ + if ( sib->down()!=NULL ) + { + if ( sib->right()!=NULL ) _push(template_stack, &tsp, sib->right()); + sib=sib->down(); + } + else sib = sib->right(); /* else, try to right if zip below */ + goto keep_looking; +} + +/* are two trees exactly alike? */ +int PCCTS_AST:: +match(PCCTS_AST *u) +{ + PCCTS_AST *t = this; + PCCTS_AST *sib; + + if ( u==NULL ) return 0; + + for (sib=t; sib!=NULL&&u!=NULL; sib=sib->right(), u=u->right()) + { + if ( sib->type() != u->type() ) return 0; + if ( sib->down()!=NULL ) + if ( !sib->down()->match(u->down()) ) return 0; + } + return 1; +} + +/* Is 'u' a subtree of 't' beginning at the root? */ +int PCCTS_AST:: +match_partial(PCCTS_AST *t, PCCTS_AST *u) +{ + PCCTS_AST *sib; + + if ( u==NULL ) return 1; + if ( t==NULL ) if ( u!=NULL ) return 0; else return 1; + + for (sib=t; sib!=NULL&&u!=NULL; sib=sib->right(), u=u->right()) + { + if ( sib->type() != u->type() ) return 0; + if ( sib->down()!=NULL ) + if ( !match_partial(sib->down(), u->down()) ) return 0; + } + return 1; +} + +/* Walk the template tree 't' (matching against 'this'), filling in the + * 'labels' array, and setting 'n' according to how many labels were matched. + */ +int PCCTS_AST:: +scanmatch(ScanAST *t, PCCTS_AST **labels[], int *n) +{ + ScanAST *sib; + PCCTS_AST *u = this; + + if ( u==NULL ) return 0; + + for (sib=t; sib!=NULL&&u!=NULL; sib=sib->right(), u=u->right()) + { + /* make sure tokens match; token of '0' means wildcard match */ + if ( sib->type() != u->type() && sib->type()!=0 ) return 0; + /* we have a matched token here; set label pointers if exists */ + if ( sib->label_num>0 ) + { + require(labels!=NULL, "label found in template, but no array of labels"); + (*n)++; + *(labels[sib->label_num-1]) = u; + } + /* match what's below if something there and current node is not wildcard */ + if ( sib->down()!=NULL && sib->type()!=0 ) + { + if ( sib->down()==NULL ) if ( u->down()!=NULL ) return 0; else return 1; + if ( !u->down()->scanmatch(sib->down(), labels, n) ) return 0; + } + } + return 1; +} + +void PCCTS_AST:: +insert_after(PCCTS_AST *b) +{ + PCCTS_AST *end; + if ( b==NULL ) return; + /* find end of b's child list */ + for (end=b; end->right()!=NULL; end=end->right()) {;} + end->setRight(this->right()); + this->setRight(b); +} + +void PCCTS_AST:: +append(PCCTS_AST *b) +{ + PCCTS_AST *end; + require(b!=NULL, "append: NULL input tree"); + /* find end of child list */ + for (end=this; end->right()!=NULL; end=end->right()) {;} + end->setRight(b); +} + +PCCTS_AST *PCCTS_AST:: +tail() +{ + PCCTS_AST *end; + /* find end of child list */ + for (end=this; end->right()!=NULL; end=end->right()) {;} + return end; +} + +PCCTS_AST *PCCTS_AST:: +bottom() +{ + PCCTS_AST *end; + /* find end of child list */ + for (end=this; end->down()!=NULL; end=end->down()) {;} + return end; +} + +PCCTS_AST *PCCTS_AST:: +cut_between(PCCTS_AST *a, PCCTS_AST *b) +{ + PCCTS_AST *end, *ret; + if (a==NULL||b==NULL) return NULL; + /* find node pointing to b */ + for (end=a; end->right()!=NULL&&end->right()!=b; end=end->right()) + {;} + if (end->right()==NULL) return NULL; //ast_cut_between: a,b not connected + end->setRight(NULL); /* don't want it point to 'b' anymore */ + ret = a->right(); + a->setRight(b); + return ret; +} + +#ifdef NOT_YET +SList *PCCTS_AST:: +to_slist() +{ + SList *list = new SList; + PCCTS_AST *p; + + for (p=this; p!=NULL; p=p->right()) + { + list->add(p); + } + return list; +} +#endif + +void PCCTS_AST:: +tfree() +{ + PCCTS_AST *t = this; + if ( t->down()!=NULL ) t->down()->tfree(); + if ( t->right()!=NULL ) t->right()->tfree(); + delete t; +} + +int PCCTS_AST:: +nsiblings() +{ + PCCTS_AST *t = this; + int n=0; + + while ( t!=NULL ) + { + n++; + t = t->right(); + } + return n; +} + +PCCTS_AST *PCCTS_AST:: +sibling_index(int i) +{ + PCCTS_AST *t = this; + int j=1; + require(i>0, "sibling_index: i<=0"); + + while ( t!=NULL ) + { + if ( j==i ) return t; + j++; + t = t->right(); + } + return NULL; +} + +/* Assume this is a root node of a tree-- + * duplicate that node and what's below; ignore siblings of root node. + */ +PCCTS_AST *PCCTS_AST:: +deepCopy() +{ + PCCTS_AST *u = this->shallowCopy(); + if ( down()!=NULL ) u->setDown(down()->deepCopy()); + return u; +} + +/* Copy all nodes including siblings of root. */ +PCCTS_AST *PCCTS_AST:: +deepCopyBushy() +{ + PCCTS_AST *u = this->shallowCopy(); + /* copy the rest of the tree */ + if ( down()!=NULL ) u->setDown(down()->deepCopy()); + if ( right()!=NULL ) u->setRight(right()->deepCopy()); + return u; +} + +void PCCTS_AST:: +scanast_free(ScanAST *t) +{ + if ( t == NULL ) return; + scanast_free( t->down() ); + scanast_free( t->right() ); + free( t ); +} + +/* + * scan + * + * This function is like scanf(): it attempts to match a template + * against an input tree. A variable number of tree pointers + * may be set according to the '%i' labels in the template string. + * For example: + * + * t->ast_scan("#( 6 #(5 %1:4 %2:3) #(1 %3:3 %4:3) )", + * &w, &x, &y, &z); + * + * Naturally, you'd want this converted from + * + * t->ast_scan("#( RangeOp #(Minus %1:IConst %2:Var) #(Plus %3:Var %4Var) )", + * &w, &x, &y, &z); + * + * by SORCERER. + * + * This function call must be done withing a SORCERER file because SORCERER + * must convert the token references to the associated token number. + * + * This functions parses the template and creates trees which are then + * matched against the input tree. The labels are set as they are + * encountered; hence, partial matches may leave some pointers set + * and some NULL. This routines initializes all argument pointers to NULL + * at the beginning. + * + * This function returns the number of labels matched. + */ +int PCCTS_AST:: +ast_scan(char *templ, ...) +{ + va_list ap; + ScanAST *tmpl; + int n, i, found=0; + PCCTS_AST ***label_ptrs=NULL; + + va_start(ap, templ); + + /* make a ScanAST tree out of the template */ + tmpl = stringparser_parse_scanast(templ, &n); + + /* make an array out of the labels */ + if ( n>0 ) + { + label_ptrs = (PCCTS_AST ***) calloc(n, sizeof(PCCTS_AST **)); + require(label_ptrs!=NULL, "scan: out of memory"); + for (i=1; i<=n; i++) + { + label_ptrs[i-1] = va_arg(ap, PCCTS_AST **); + *(label_ptrs[i-1]) = NULL; + } + } + + /* match the input tree against the template */ + scanmatch(tmpl, label_ptrs, &found); + + scanast_free(tmpl); + free(label_ptrs); + + return found; +} + +ScanAST *PCCTS_AST:: +new_scanast(int tok) +{ + ScanAST *p = (ScanAST *) calloc(1, sizeof(ScanAST)); + if ( p == NULL ) {fprintf(stderr, "out of mem\n"); exit(EXIT_FAILURE);} + p->_token = tok; + return p; +} + +ScanAST *PCCTS_AST:: +stringparser_parse_scanast(char *templ, int *num_labels) +{ + StringLexer lex; + StringParser parser; + ScanAST *t; + + stringlexer_init(&lex, templ); + stringparser_init(&parser, &lex); + t = stringparser_parse_tree(&parser); + *num_labels = parser.num_labels; + return t; +} + +void PCCTS_AST:: +stringparser_match(StringParser *parser, int token) +{ + if ( parser->token != token ) panic("bad tree in scan()"); +} + +/* + * Match a tree of the form: + * (root child1 child2 ... childn) + * or, + * node + * + * where the elements are integers or labeled integers. + */ +ScanAST *PCCTS_AST:: +stringparser_parse_tree(StringParser *parser) +{ + ScanAST *t=NULL, *root, *child, *last; + + if ( parser->token != __POUND ) + { + return stringparser_parse_element(parser); + } + stringparser_match(parser,__POUND); + parser->token = stringscan_gettok(parser->lexer); + stringparser_match(parser,__LPAREN); + parser->token = stringscan_gettok(parser->lexer); + root = stringparser_parse_element(parser); + while ( parser->token != __RPAREN ) + { + child = stringparser_parse_element(parser); + if ( t==NULL ) { t = child; last = t; } + else { last->_right = child; last = child; } + } + stringparser_match(parser,__RPAREN); + parser->token = stringscan_gettok(parser->lexer); + root->_down = t; + return root; +} + +ScanAST *PCCTS_AST:: +stringparser_parse_element(StringParser *parser) +{ + static char ebuf[100]; + int label = 0; + + if ( parser->token == __POUND ) + { + return stringparser_parse_tree(parser); + } + if ( parser->token == __PERCENT ) + { + parser->token = stringscan_gettok(parser->lexer); + stringparser_match(parser,__INT); + label = atoi(parser->lexer->text); + parser->num_labels++; + if ( label==0 ) panic("%%0 is an invalid label"); + parser->token = stringscan_gettok(parser->lexer); + stringparser_match(parser,__COLON); + parser->token = stringscan_gettok(parser->lexer); + /* can label tokens and wildcards */ + if ( parser->token != __INT && parser->token != __PERIOD ) + panic("can only label tokens"); + } + if ( parser->token == __INT ) + { + ScanAST *p = new_scanast(atoi(parser->lexer->text)); + parser->token = stringscan_gettok(parser->lexer); + p->label_num = label; + return p; + } + if ( parser->token == __PERIOD ) + { + ScanAST *p = new_scanast(0); /* token of 0 is wildcard */ + parser->token = stringscan_gettok(parser->lexer); + p->label_num = label; + return p; + } + sprintf(ebuf, "mismatch token in scan(): %s", scan_token_str(parser->token)); + panic(ebuf); + return NULL; +} + +void PCCTS_AST:: +stringparser_init(StringParser *parser, StringLexer *input) +{ + parser->lexer = input; + parser->token = stringscan_gettok(parser->lexer); + parser->num_labels = 0; +} + +void PCCTS_AST:: +stringlexer_init(StringLexer *scanner, char *input) +{ + scanner->text[0]='\0'; + scanner->input = input; + scanner->p = input; + stringscan_advance(scanner); +} + +void PCCTS_AST:: +stringscan_advance(StringLexer *scanner) +{ + if ( *(scanner->p) == '\0' ) scanner->c = __StringScanEOF; + scanner->c = *(scanner->p)++; +} + +int PCCTS_AST:: +stringscan_gettok(StringLexer *scanner) +{ + char *index = &scanner->text[0]; + static char ebuf[100]; + + while ( isspace(scanner->c) ) { stringscan_advance(scanner); } + if ( isdigit(scanner->c) ) + { + int tok = __INT; + while ( isdigit(scanner->c) ) { + *index++ = scanner->c; + stringscan_advance(scanner); + } + *index = '\0'; + return tok; + } + switch ( scanner->c ) + { + case '#' : stringscan_advance(scanner); return __POUND; + case '(' : stringscan_advance(scanner); return __LPAREN; + case ')' : stringscan_advance(scanner); return __RPAREN; + case '%' : stringscan_advance(scanner); return __PERCENT; + case ':' : stringscan_advance(scanner); return __COLON; + case '.' : stringscan_advance(scanner); return __PERIOD; + case '\0' : return __StringScanEOF; + case __StringScanEOF : return __StringScanEOF; + default : + sprintf(ebuf, "invalid char in scan: '%c'", scanner->c); + panic(ebuf); + } + return __StringScanEOF; // never reached +} + +char *PCCTS_AST:: +scan_token_str(int t) +{ + if ( VALID_SCAN_TOKEN(t) ) return scan_token_tbl[t]; + else if ( t==__StringScanEOF ) return ""; + else return ""; +} diff --git a/support/sym/sym.c b/support/sym/sym.c new file mode 100755 index 0000000..29340d8 --- /dev/null +++ b/support/sym/sym.c @@ -0,0 +1,366 @@ +/* + * Simple symbol table manager using coalesced chaining to resolve collisions + * + * Doubly-linked lists are used for fast removal of entries. + * + * 'sym.h' must have a definition for typedef "Sym". Sym must include at + * minimum the following fields: + * + * ... + * char *symbol; + * struct ... *next, *prev, **head, *scope; + * unsigned int hash; + * ... + * + * 'template.h' can be used as a template to create a 'sym.h'. + * + * 'head' is &(table[hash(itself)]). + * The hash table is not resizable at run-time. + * The scope field is used to link all symbols of a current scope together. + * Scope() sets the current scope (linked list) to add symbols to. + * Any number of scopes can be handled. The user passes the address of + * a pointer to a symbol table + * entry (INITIALIZED TO NULL first time). + * + * Available Functions: + * + * zzs_init(s1,s2) -- Create hash table with size s1, string table size s2. + * zzs_done() -- Free hash and string table created with zzs_init(). + * zzs_add(key,rec)-- Add 'rec' with key 'key' to the symbol table. + * zzs_newadd(key) -- create entry; add using 'key' to the symbol table. + * zzs_get(key) -- Return pointer to last record entered under 'key' + * Else return NULL + * zzs_del(p) -- Unlink the entry associated with p. This does + * NOT free 'p' and DOES NOT remove it from a scope + * list. If it was a part of your intermediate code + * tree or another structure. It will still be there. + * It is only removed from further consideration + * by the symbol table. + * zzs_keydel(s) -- Unlink the entry associated with key s. + * Calls zzs_del(p) to unlink. + * zzs_scope(sc) -- Specifies that everything added to the symbol + * table with zzs_add() is added to the list (scope) + * 'sc'. 'sc' is of 'Sym **sc' type and must be + * initialized to NULL before trying to add anything + * to it (passing it to zzs_scope()). Scopes can be + * switched at any time and merely links a set of + * symbol table entries. If a NULL pointer is + * passed, the current scope is returned. + * zzs_rmscope(sc) -- Remove (zzs_del()) all elements of scope 'sc' + * from the symbol table. The entries are NOT + * free()'d. A pointer to the first + * element in the "scope" is returned. The user + * can then manipulate the list as he/she chooses + * (such as freeing them all). NOTE that this + * function sets your scope pointer to NULL, + * but returns a pointer to the list for you to use. + * zzs_stat() -- Print out the symbol table and some relevant stats. + * zzs_new(key) -- Create a new record with calloc() of type Sym. + * Add 'key' to the string table and make the new + * records 'symbol' pointer point to it. + * zzs_strdup(s) -- Add s to the string table and return a pointer + * to it. Very fast allocation routine + * and does not require strlen() nor calloc(). + * + * Example: + * + * #include + * #include "sym.h" + * + * main() + * { + * Sym *scope1=NULL, *scope2=NULL, *a, *p; + * + * zzs_init(101, 100); + * + * a = zzs_new("Apple"); zzs_add(a->symbol, a); -- No scope + * zzs_scope( &scope1 ); -- enter scope 1 + * a = zzs_new("Plum"); zzs_add(a->symbol, a); + * zzs_scope( &scope2 ); -- enter scope 2 + * a = zzs_new("Truck"); zzs_add(a->symbol, a); + * + * p = zzs_get("Plum"); + * if ( p == NULL ) fprintf(stderr, "Hmmm...Can't find 'Plum'\n"); + * + * p = zzs_rmscope(&scope1) + * for (; p!=NULL; p=p->scope) {printf("Scope1: %s\n", p->symbol);} + * p = zzs_rmscope(&scope2) + * for (; p!=NULL; p=p->scope) {printf("Scope2: %s\n", p->symbol);} + * } + * + * Terence Parr + * Purdue University + * February 1990 + * + * CHANGES + * + * Terence Parr + * May 1991 + * Renamed functions to be consistent with ANTLR + * Made HASH macro + * Added zzs_keydel() + * Added zzs_newadd() + * Fixed up zzs_stat() + * + * July 1991 + * Made symbol table entry save its hash code for fast comparison + * during searching etc... + */ + +#include +#if __STDC__ == 1 +#include +#include +#else +#include "malloc.h" +#endif +#ifdef MEMCHK +#include "trax.h" +#endif +#include "sym.h" + +#define StrSame 0 + +static Sym **CurScope = NULL; +static unsigned size = 0; +static Sym **table=NULL; +static char *strings; +static char *strp; +static int strsize = 0; + +void +zzs_init(sz, strs) +int sz, strs; +{ + if ( sz <= 0 || strs <= 0 ) return; + table = (Sym **) calloc(sz, sizeof(Sym *)); + if ( table == NULL ) + { + fprintf(stderr, "Cannot allocate table of size %d\n", sz); + exit(1); + } + strings = (char *) calloc(strs, sizeof(char)); + if ( strings == NULL ) + { + fprintf(stderr, "Cannot allocate string table of size %d\n", strs); + exit(1); + } + size = sz; + strsize = strs; + strp = strings; +} + +void +zzs_done() +{ + if ( table != NULL ) free( table ); + if ( strings != NULL ) free( strings ); +} + +void +zzs_add(key, rec) +char *key; +register Sym *rec; +{ + register unsigned int h=0; + register char *p=key; + extern Sym *Globals; + + HASH(p, h); + rec->hash = h; /* save hash code for fast comp later */ + h %= size; + + if ( CurScope != NULL ) {rec->scope = *CurScope; *CurScope = rec;} + rec->next = table[h]; /* Add to doubly-linked list */ + rec->prev = NULL; + if ( rec->next != NULL ) (rec->next)->prev = rec; + table[h] = rec; + rec->head = &(table[h]); +} + +Sym * +zzs_get(key) +char *key; +{ + register unsigned int h=0; + register char *p=key; + register Sym *q; + + HASH(p, h); + + for (q = table[h%size]; q != NULL; q = q->next) + { + if ( q->hash == h ) /* do we even have a chance of matching? */ + if ( strcmp(key, q->symbol) == StrSame ) return( q ); + } + return( NULL ); +} + +/* + * Unlink p from the symbol table. Hopefully, it's actually in the + * symbol table. + * + * If p is not part of a bucket chain of the symbol table, bad things + * will happen. + * + * Will do nothing if all list pointers are NULL + */ +void +zzs_del(p) +register Sym *p; +{ + if ( p == NULL ) {fprintf(stderr, "zzs_del(NULL)\n"); exit(1);} + if ( p->prev == NULL ) /* Head of list */ + { + register Sym **t = p->head; + + if ( t == NULL ) return; /* not part of symbol table */ + (*t) = p->next; + if ( (*t) != NULL ) (*t)->prev = NULL; + } + else + { + (p->prev)->next = p->next; + if ( p->next != NULL ) (p->next)->prev = p->prev; + } + p->next = p->prev = NULL; /* not part of symbol table anymore */ + p->head = NULL; +} + +void +zzs_keydel(key) +char *key; +{ + Sym *p = zzs_get(key); + + if ( p != NULL ) zzs_del( p ); +} + +/* S c o p e S t u f f */ + +/* Set current scope to 'scope'; return current scope if 'scope' == NULL */ +Sym ** +zzs_scope(scope) +Sym **scope; +{ + if ( scope == NULL ) return( CurScope ); + CurScope = scope; + return( scope ); +} + +/* Remove a scope described by 'scope'. Return pointer to 1st element in scope */ +Sym * +zzs_rmscope(scope) +register Sym **scope; +{ + register Sym *p; + Sym *start; + + if ( scope == NULL ) return(NULL); + start = p = *scope; + for (; p != NULL; p=p->scope) { zzs_del( p ); } + *scope = NULL; + return( start ); +} + +void +zzs_stat() +{ + static unsigned short count[20]; + unsigned int i,n=0,low=0, hi=0; + register Sym **p; + float avg=0.0; + + for (i=0; i<20; i++) count[i] = 0; + for (p=table; p<&(table[size]); p++) + { + register Sym *q = *p; + unsigned int len; + + if ( q != NULL && low==0 ) low = p-table; + len = 0; + if ( q != NULL ) printf("[%d]", p-table); + while ( q != NULL ) + { + len++; + n++; + printf(" %s", q->symbol); + q = q->next; + if ( q == NULL ) printf("\n"); + } + if ( len>=20 ) printf("zzs_stat: count table too small\n"); + else count[len]++; + if ( *p != NULL ) hi = p-table; + } + + printf("Storing %d recs used %d hash positions out of %d\n", + n, size-count[0], size); + printf("%f %% utilization\n", + ((float)(size-count[0]))/((float)size)); + for (i=0; i<20; i++) + { + if ( count[i] != 0 ) + { + avg += (((float)(i*count[i]))/((float)n)) * i; + printf("Buckets of len %d == %d (%f %% of recs)\n", + i, count[i], 100.0*((float)(i*count[i]))/((float)n)); + } + } + printf("Avg bucket length %f\n", avg); + printf("Range of hash function: %d..%d\n", low, hi); +} + +/* + * Given a string, this function allocates and returns a pointer to a + * symbol table record whose "symbol" pointer is reset to a position + * in the string table. + */ +Sym * +zzs_new(text) +char *text; +{ + Sym *p; + char *zzs_strdup(); + + if ( (p = (Sym *) calloc(1,sizeof(Sym))) == 0 ) + { + fprintf(stderr,"Out of memory\n"); + exit(1); + } + p->symbol = zzs_strdup(text); + + return p; +} + +/* create a new symbol table entry and add it to the symbol table */ +Sym * +zzs_newadd(text) +char *text; +{ + Sym *p = zzs_new(text); + if ( p != NULL ) zzs_add(text, p); + return p; +} + +/* Add a string to the string table and return a pointer to it. + * Bump the pointer into the string table to next avail position. + */ +char * +zzs_strdup(s) +register char *s; +{ + register char *start=strp; + + while ( *s != '\0' ) + { + if ( strp >= &(strings[strsize-2]) ) + { + fprintf(stderr, "sym: string table overflow (%d chars)\n", strsize); + exit(-1); + } + *strp++ = *s++; + } + *strp++ = '\0'; + + return( start ); +}