1 /* This is the parser for the dlg
2 * This is a part of the Purdue Compiler Construction Tool Set
6 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
7 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
8 * company may do whatever they wish with source code distributed with
9 * PCCTS or the code generated by PCCTS, including the incorporation of
10 * PCCTS, or its output, into commerical software.
12 * We encourage users to develop software with PCCTS. However, we do ask
13 * that credit is given to us for developing PCCTS. By "credit",
14 * we mean that if you incorporate our source code into one of your
15 * programs (commercial product, research project, or otherwise) that you
16 * acknowledge this fact somewhere in the documentation, research report,
17 * etc... If you like PCCTS and have developed a nice tool with the
18 * output, please mention that you developed it using PCCTS. In
19 * addition, we ask that this header remain intact in our source code.
20 * As long as these guidelines are kept, we expect to continue enhancing
21 * this system and expect to make other tools available as they are
26 * With mods by Terence Parr; AHPCRC, University of Minnesota
39 int action_no = 0; /* keep track of actions outputed */
40 int nfa_allocated = 0; /* keeps track of number of nfa nodes */
41 nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */
42 nfa_node nfa_model_node; /* model to initialize new nodes */
43 set used_chars; /* used to label trans. arcs */
44 set used_classes; /* classes or chars used to label trans. arcs */
45 set normal_chars; /* mask to get rid elements that aren't used
47 int flag_paren = FALSE;
48 int flag_brace = FALSE;
49 int mode_counter = 0; /* keep track of number of %%names */
54 int func_action; /* should actions be turned into functions?*/
55 int lex_mode_counter = 0; /* keeps track of the number of %%names */
58 #token "[\r\t\ ]+" << zzskip(); >> /* Ignore white */
59 #token "\n" << zzline++; zzskip(); DAWDLE; >> /* Track Line # */
62 #token NAME_PER_PER "\%\%[a-zA-Z_][a-zA-Z0-9_]*"
63 << p_mode_def(&zzlextext[2],lex_mode_counter++); >>
66 fprintf(OUT,"\n%s %sact%d()\n{ ",
67 gen_cpp?"ANTLRTokenType":"static void",
68 gen_cpp?ClassName("::"):"", ++action_no);
69 zzmode(ACT); zzskip();
71 #token GREAT_GREAT "\>\>"
83 #token OCTAL_VALUE "\\0[0-7]*"
84 << {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>>
85 #token HEX_VALUE "\\0[Xx][0-9a-fA-F]+"
86 << {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>>
87 #token DEC_VALUE "\\[1-9][0-9]*"
88 << {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>>
89 #token TAB "\\t" << zzlextext[0] = '\t';>>
90 #token NL "\\n" << zzlextext[0] = '\n';>>
91 #token CR "\\r" << zzlextext[0] = '\r';>>
92 #token BS "\\b" << zzlextext[0] = '\b';>>
93 /* NOTE: this takes ANYTHING after the \ */
94 #token LIT "\\~[tnrb]" << zzlextext[0] = zzlextext[1];>>
95 /* NOTE: this takes ANYTHING that doesn't match the other tokens */
96 #token REGCHAR "~[\\]"
99 grammar : << p_head(); p_class_hdr(); func_action = FALSE;>> (ACTION)*
100 <<if ( gen_cpp ) p_includes();>>
102 << func_action = FALSE; p_tables(); p_tail(); >>
106 start_states : ( PER_PER do_conversion
107 | NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*)
111 do_conversion : <<new_automaton_mode(); func_action = TRUE;>>
114 dfa_class_nop[mode_counter] =
115 relabel($1.l,comp_level);
117 p_shift_table(mode_counter);
118 dfa_basep[mode_counter] = dfa_allocated+1;
119 make_dfa_model_node(dfa_class_nop[mode_counter]);
124 fprint_hash_stats(stderr);
129 rule_list : rule <<$$.l=$1.l; $$.r=$1.r;>>
135 /* all accept nodes "dead ends" */
141 <<$$.l = new_nfa_node(); $$.r = NULL;
142 warning("no regular expressions", zzline);
146 rule : reg_expr ACTION
147 <<$$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no;>>
149 <<$$.l = NULL; $$.r = NULL;
150 error("no expression for action ", zzline);
154 reg_expr : and_expr <<$$.l=$1.l; $$.r=$1.r;>>
156 <<{nfa_node *t1, *t2;
157 t1 = new_nfa_node(); t2 = new_nfa_node();
168 and_expr : repeat_expr <<$$.l=$1.l; $$.r=$1.r;>>
169 (repeat_expr <<($$.r)->trans[1]=$1.l; $$.r=$1.r;>>)*
172 repeat_expr : expr <<$$.l=$1.l; $$.r=$1.r;>>
174 <<{ nfa_node *t1,*t2;
175 ($$.r)->trans[0] = $$.l;
176 t1 = new_nfa_node(); t2 = new_nfa_node();
184 <<($$.r)->trans[0] = $$.l;>>
187 << error("no expression for *", zzline);>>
189 << error("no expression for +", zzline);>>
192 expr : << $$.l = new_nfa_node(); $$.r = new_nfa_node(); >>
193 L_BRACK atom_list R_BRACK
195 ($$.l)->trans[0] = $$.r;
196 ($$.l)->label = set_dup($2.label);
197 set_orin(&used_chars,($$.l)->label);
199 | NOT L_BRACK atom_list R_BRACK
201 ($$.l)->trans[0] = $$.r;
202 ($$.l)->label = set_dif(normal_chars,$3.label);
203 set_orin(&used_chars,($$.l)->label);
205 | L_PAR reg_expr R_PAR
207 ($$.l)->trans[0] = $2.l;
208 ($2.r)->trans[1] = $$.r;
210 | L_BRACE reg_expr R_BRACE
212 ($$.l)->trans[0] = $2.l;
213 ($$.l)->trans[1] = $$.r;
214 ($2.r)->trans[1] = $$.r;
218 ($$.l)->trans[0] = $$.r;
219 ($$.l)->label = set_dup($1.label);
220 set_orin(&used_chars,($$.l)->label);
224 atom_list : << set_free($$.label); >>
225 (near_atom <<set_orin(&($$.label),$1.label);>>)*
228 near_atom : << register int i;
229 register int i_prime;
232 <<$$.letter=$1.letter; $$.label=set_of($1.letter);
233 i_prime = $1.letter + MIN_CHAR;
234 if (case_insensitive && islower(i_prime))
235 set_orel(toupper(i_prime)-MIN_CHAR,
237 if (case_insensitive && isupper(i_prime))
238 set_orel(tolower(i_prime)-MIN_CHAR,
242 << if (case_insensitive){
243 i_prime = $$.letter+MIN_CHAR;
244 $$.letter = (islower(i_prime) ?
245 toupper(i_prime) : i_prime)-MIN_CHAR;
246 i_prime = $2.letter+MIN_CHAR;
247 $2.letter = (islower(i_prime) ?
248 toupper(i_prime) : i_prime)-MIN_CHAR;
250 /* check to see if range okay */
251 if ($$.letter > $2.letter){
252 error("invalid range ", zzline);
254 for (i=$$.letter; i<= (int)$2.letter; ++i){
255 set_orel(i,&($$.label));
256 i_prime = i+MIN_CHAR;
257 if (case_insensitive && islower(i_prime))
258 set_orel(toupper(i_prime)-MIN_CHAR,
260 if (case_insensitive && isupper(i_prime))
261 set_orel(tolower(i_prime)-MIN_CHAR,
268 atom : << register int i_prime;>>
270 <<$$.label = set_of($1.letter);
271 i_prime = $1.letter + MIN_CHAR;
272 if (case_insensitive && islower(i_prime))
273 set_orel(toupper(i_prime)-MIN_CHAR,
275 if (case_insensitive && isupper(i_prime))
276 set_orel(tolower(i_prime)-MIN_CHAR,
281 anychar : REGCHAR <<$$.letter = $1.letter - MIN_CHAR;>>
282 | OCTAL_VALUE <<$$.letter = $1.letter - MIN_CHAR;>>
283 | HEX_VALUE <<$$.letter = $1.letter - MIN_CHAR;>>
284 | DEC_VALUE <<$$.letter = $1.letter - MIN_CHAR;>>
285 | TAB <<$$.letter = $1.letter - MIN_CHAR;>>
286 | NL <<$$.letter = $1.letter - MIN_CHAR;>>
287 | CR <<$$.letter = $1.letter - MIN_CHAR;>>
288 | BS <<$$.letter = $1.letter - MIN_CHAR;>>
289 | LIT <<$$.letter = $1.letter - MIN_CHAR;>>
290 /* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/
291 | L_EOF <<$$.letter = 0;>>
294 <</* empty action */>>
297 #token "@" << error("unterminated action", zzline); zzmode(START); >>
299 << if (func_action) fprintf(OUT,"}\n\n");
302 #token "\>" << putc(zzlextext[0], OUT); zzskip(); >>
303 #token "\\\>" << putc('>', OUT); zzskip(); >>
304 #token "\\" << putc('\\', OUT); zzskip(); >>
305 #token "\n" << putc(zzlextext[0], OUT); ++zzline; zzskip(); >>
306 #token "~[\>\\@\n]+" << fprintf(OUT, "%s", &(zzlextext[0])); zzskip(); >>
309 /* adds a new nfa to the binary tree and returns a pointer to it */
310 nfa_node *new_nfa_node()
312 register nfa_node *t;
313 static int nfa_size=0; /* elements nfa_array[] can hold */
316 if (nfa_size<=nfa_allocated){
317 /* need to redo array */
319 /* need some to do inital allocation */
320 nfa_size=nfa_allocated+NFA_MIN;
321 nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)*
324 /* need more space */
325 nfa_size=2*(nfa_allocated+1);
326 nfa_array=(nfa_node **) realloc(nfa_array,
327 sizeof(nfa_node*)*nfa_size);
330 /* fill out entry in array */
331 t = (nfa_node*) malloc(sizeof(nfa_node));
332 nfa_array[nfa_allocated] = t;
334 t->node_no = nfa_allocated;
339 /* initialize the model node used to fill in newly made nfa_nodes */
341 make_nfa_model_node()
343 nfa_model_node.node_no = -1; /* impossible value for real nfa node */
344 nfa_model_node.nfa_set = 0;
345 nfa_model_node.accept = 0; /* error state default*/
346 nfa_model_node.trans[0] = NULL;
347 nfa_model_node.trans[1] = NULL;
348 nfa_model_node.label = empty;
355 /* print out the pointer value and the node_number */
356 fprint_dfa_pair(f, p)
361 fprintf(f, "%x (%d)", p, p->node_no);
367 /* print out interest information on a set */
374 fprintf(f, "n = %d,", s.n);
376 fprintf(f, "setword = %x, ", s.setword);
377 /* print out all the elements in the set */
380 fprintf(f, "%d ", *x);
384 fprintf(f, "setword = (nil)");
388 /* code to be able to dump out the nfas
389 return 0 if okay dump
390 return 1 if screwed up
392 int dump_nfas(first_node, last_node)
399 for (i=first_node; i<=last_node; ++i){
402 fprintf(stderr, "nfa_node %d {\n", t->node_no);
403 fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set);
404 fprintf(stderr, "\taccept\t=\t%d\n", t->accept);
405 fprintf(stderr, "\ttrans\t=\t(");
406 fprint_dfa_pair(stderr, t->trans[0]);
407 fprintf(stderr, ",");
408 fprint_dfa_pair(stderr, t->trans[1]);
409 fprintf(stderr, ")\n");
410 fprintf(stderr, "\tlabel\t=\t{ ");
411 fprint_set(stderr, t->label);
412 fprintf(stderr, "\t}\n");
413 fprintf(stderr, "}\n\n");
421 /* DLG-specific syntax error message generator
422 * (define USER_ZZSYN when compiling so don't get 2 definitions)
426 zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text)
428 zzsyn(text, tok, egroup, eset, etok, k, bad_text)
429 char *text, *egroup, *bad_text;
436 fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline);
437 fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text);
438 if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
439 if ( k==1 ) fprintf(stderr, " missing");
442 fprintf(stderr, "; \"%s\" not", bad_text);
443 if ( zzset_deg(eset)>1 ) fprintf(stderr, " in");
445 if ( zzset_deg(eset)>0 ) zzedecode(eset);
446 else fprintf(stderr, " %s", zztokens[etok]);
447 if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup);
448 fprintf(stderr, "\n");