3 * Define the generic ANTLRParser superclass, which is subclassed to
4 * define an actual parser.
6 * Before entry into this file: ANTLRTokenType must be set.
10 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
11 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
12 * company may do whatever they wish with source code distributed with
13 * PCCTS or the code generated by PCCTS, including the incorporation of
14 * PCCTS, or its output, into commerical software.
16 * We encourage users to develop software with PCCTS. However, we do ask
17 * that credit is given to us for developing PCCTS. By "credit",
18 * we mean that if you incorporate our source code into one of your
19 * programs (commercial product, research project, or otherwise) that you
20 * acknowledge this fact somewhere in the documentation, research report,
21 * etc... If you like PCCTS and have developed a nice tool with the
22 * output, please mention that you developed it using PCCTS. In
23 * addition, we ask that this header remain intact in our source code.
24 * As long as these guidelines are kept, we expect to continue enhancing
25 * this system and expect to make other tools available as they are
30 * Parr Research Corporation
31 * with Purdue University and AHPCRC, University of Minnesota
35 #ifndef APARSER_H_GATE
36 #define APARSER_H_GATE
42 #include ATOKENBUFFER_H
51 #define NLA (token_type[lap&(LLk-1)])/* --> next LA */
53 typedef unsigned char SetWordType;
55 /* Define external bit set stuff (for SetWordType) */
56 #define EXT_WORDSIZE (sizeof(char)*8)
57 #define EXT_LOGWORDSIZE 3
59 /* s y n t a c t i c p r e d i c a t e s t u f f */
61 typedef struct _zzjmp_buf {
65 /* these need to be macros not member functions */
66 #define zzGUESS_BLOCK ANTLRParserState zzst; int zzrv; int _marker;
67 #define zzNON_GUESS_MODE if ( !guessing )
68 #define zzGUESS_FAIL guess_fail();
69 #define zzGUESS_DONE {zzrv=1; inputTokens->rewind(_marker); guess_done(&zzst);}
70 #define zzGUESS saveState(&zzst); \
72 _marker = inputTokens->mark(); \
73 zzrv = setjmp(guess_start.state); \
74 if ( zzrv ) zzGUESS_DONE
76 /* a n t l r p a r s e r d e f */
78 struct ANTLRParserState {
80 zzjmp_buf guess_start;
91 * multiple inheritance is a cool way to include what stuff is needed
92 * in this structure (like guess stuff). however, i'm not convinced that
93 * multiple inheritance works correctly on all platforms. not that
94 * much space is used--just include all possibly useful members.
96 * the class should also be a template with arguments for the lookahead
97 * depth and so on. that way, more than one parser can be defined (as
98 * each will probably have different lookahead requirements). however,
99 * am i sure that templates work? no, i'm not sure.
101 * no attributes are maintained and, hence, the 'asp' variable is not
102 * needed. $i can still be referenced, but it refers to the token
103 * associated with that rule element. question: where are the token's
104 * stored if not on the software stack? in local variables created
105 * and assigned to by antlr.
109 /* class variables */
110 static SetWordType bitmask[sizeof(SetWordType)*8];
111 static char eMsgBuffer[500];
114 int LLk; // number of lookahead symbols (old LL_K)
116 ANTLRTokenType eofToken; // when do I stop during resynch()s
117 int bsetsize; // size of bitsets created by ANTLR in
118 // units of SetWordType
120 ANTLRTokenBuffer *inputTokens; //place to get input tokens
122 zzjmp_buf guess_start; // where to jump back to upon failure
123 int guessing; // if guessing (using (...)? predicate)
125 // infinite lookahead stuff
126 int can_use_inf_look; // set by subclass (generated by ANTLR)
132 ANTLRChar **token_tbl; // pointer to table of token type strings
134 int dirty; // used during demand lookahead
136 ANTLRTokenType *token_type; // fast reference cache of token.getType()
137 // ANTLRLightweightToken **token; // the token with all its attributes
142 void fill_inf_look();
145 void guess_fail() { longjmp(guess_start.state, 1); }
146 void guess_done(ANTLRParserState *st){ restoreState(st); }
147 int guess(ANTLRParserState *);
149 int _match(ANTLRTokenType, ANTLRChar **, ANTLRTokenType *,
150 _ANTLRTokenPtr *, SetWordType **);
151 int _setmatch(SetWordType *, ANTLRChar **, ANTLRTokenType *,
152 _ANTLRTokenPtr *, SetWordType **);
153 int _match_wsig(ANTLRTokenType);
154 int _setmatch_wsig(SetWordType *);
155 virtual void consume();
156 void resynch(SetWordType *wd,SetWordType mask);
157 void prime_lookahead();
158 virtual void tracein(char *r)
160 fprintf(stderr, "enter rule \"%s\"\n", r);
162 virtual void traceout(char *r)
164 fprintf(stderr, "exit rule \"%s\"\n", r);
166 unsigned MODWORD(unsigned x) {return x & (EXT_WORDSIZE-1);} // x % EXT_WORDSIZE
167 unsigned DIVWORD(unsigned x) {return x >> EXT_LOGWORDSIZE;} // x / EXT_WORDSIZE
168 int set_deg(SetWordType *);
169 int set_el(ANTLRTokenType, SetWordType *);
170 void edecode(SetWordType *);
171 void FAIL(int k, ...);
174 ANTLRParser(ANTLRTokenBuffer *,
179 virtual ~ANTLRParser();
183 ANTLRTokenType LA(int i)
185 return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] :
186 token_type[(lap+(i)-1)&(LLk-1)];
188 _ANTLRTokenPtr LT(int i);
190 void setEofToken(ANTLRTokenType t) { eofToken = t; }
192 void noGarbageCollectTokens() { inputTokens->noGarbageCollectTokens(); }
193 void garbageCollectTokens() { inputTokens->garbageCollectTokens(); }
195 virtual void syn(_ANTLRTokenPtr tok, ANTLRChar *egroup,
196 SetWordType *eset, ANTLRTokenType etok, int k);
197 void saveState(ANTLRParserState *);
198 void restoreState(ANTLRParserState *);
200 virtual void panic(char *msg);
201 static char *eMsgd(char *,int);
202 static char *eMsg(char *,char *);
203 static char *eMsg2(char *,char *,char *);
205 void consumeUntil(SetWordType *st);
206 void consumeUntilToken(int t);
208 virtual int _setmatch_wdfltsig(SetWordType *tokensWanted,
209 ANTLRTokenType tokenTypeOfSet,
210 SetWordType *whatFollows);
211 virtual int _match_wdfltsig(ANTLRTokenType tokenWanted,
212 SetWordType *whatFollows);
216 #define zzmatch(_t) \
217 if ( !_match((ANTLRTokenType)_t, &zzMissText, &zzMissTok, \
218 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
220 #define zzmatch_wsig(_t,handler) \
221 if ( !_match_wsig((ANTLRTokenType)_t) ) if ( guessing ) goto fail; else {_signal=MismatchedToken; goto handler;}
223 #define zzsetmatch(_ts) \
224 if ( !_setmatch(_ts, &zzMissText, &zzMissTok, \
225 (_ANTLRTokenPtr *) &zzBadTok, &zzMissSet) ) goto fail;
227 #define zzsetmatch_wsig(_ts, handler) \
228 if ( !_setmatch_wsig(_ts) ) if ( guessing ) goto fail; else {_signal=MismatchedToken; goto handler;}
230 /* For the dflt signal matchers, a FALSE indicates that an error occurred
231 * just like the other matchers, but in this case, the routine has already
232 * recovered--we do NOT want to consume another token. However, when
233 * the match was successful, we do want to consume hence _signal=0 so that
234 * a token is consumed by the "if (!_signal) consume(); _signal=NoSignal;"
237 #define zzsetmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) \
238 if ( !_setmatch_wdfltsig(tokensWanted, tokenTypeOfSet, whatFollows) ) \
239 _signal = MismatchedToken;
241 #define zzmatch_wdfltsig(tokenWanted, whatFollows) \
242 if ( !_match_wdfltsig(tokenWanted, whatFollows) ) _signal = MismatchedToken;
245 #ifndef zzfailed_pred
246 #define zzfailed_pred(_p) \
247 zzNON_GUESS_MODE { fprintf(stderr, "line %d: semantic error; failed predicate: '%s'\n",LT(1)->getLine(), _p); }
251 SetWordType *zzMissSet=NULL; ANTLRTokenType zzMissTok=(ANTLRTokenType)0; \
252 _ANTLRTokenPtr zzBadTok; ANTLRChar *zzBadText=(ANTLRChar *)""; \
254 ANTLRChar *zzMissText=(ANTLRChar *)"";
258 /* S t a n d a r d E x c e p t i o n S i g n a l s */
261 #define MismatchedToken 1
262 #define NoViableAlt 2
263 #define NoSemViableAlt 3