]> pd.if.org Git - pccts/blob - dlg/dlg_p.g
auto commit for import
[pccts] / dlg / dlg_p.g
1 /*  This is the parser for the dlg
2  *  This is a part of the Purdue Compiler Construction Tool Set
3  *
4  * SOFTWARE RIGHTS
5  *
6  * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
7  * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
8  * company may do whatever they wish with source code distributed with
9  * PCCTS or the code generated by PCCTS, including the incorporation of
10  * PCCTS, or its output, into commerical software.
11  * 
12  * We encourage users to develop software with PCCTS.  However, we do ask
13  * that credit is given to us for developing PCCTS.  By "credit",
14  * we mean that if you incorporate our source code into one of your
15  * programs (commercial product, research project, or otherwise) that you
16  * acknowledge this fact somewhere in the documentation, research report,
17  * etc...  If you like PCCTS and have developed a nice tool with the
18  * output, please mention that you developed it using PCCTS.  In
19  * addition, we ask that this header remain intact in our source code.
20  * As long as these guidelines are kept, we expect to continue enhancing
21  * this system and expect to make other tools available as they are
22  * completed.
23  *
24  * DLG 1.33
25  * Will Cohen
26  * With mods by Terence Parr; AHPCRC, University of Minnesota
27  * 1989-1995
28  */
29
30 #header <<
31 #include <ctype.h>
32 #include "dlg.h"
33 #ifdef MEMCHK
34 #include "trax.h"
35 #endif
36 >>
37
38 <<
39 int     action_no = 0;     /* keep track of actions outputed */
40 int     nfa_allocated = 0; /* keeps track of number of nfa nodes */
41 nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */
42 nfa_node nfa_model_node;   /* model to initialize new nodes */
43 set     used_chars;        /* used to label trans. arcs */
44 set     used_classes;      /* classes or chars used to label trans. arcs */
45 set     normal_chars;      /* mask to get rid elements that aren't used
46                               in set */
47 int     flag_paren = FALSE;
48 int     flag_brace = FALSE;
49 int     mode_counter = 0;  /* keep track of number of %%names */
50
51 >>
52
53 #lexaction <<
54 int     func_action;            /* should actions be turned into functions?*/
55 int     lex_mode_counter = 0;   /* keeps track of the number of %%names */
56 >>
57
58 #token "[\r\t\ ]+"      << zzskip(); >>                                         /* Ignore white */
59 #token "\n"                     << zzline++; zzskip(); DAWDLE; >>       /* Track Line # */
60 #token L_EOF            "\@"
61 #token PER_PER          "\%\%"
62 #token NAME_PER_PER     "\%\%[a-zA-Z_][a-zA-Z0-9_]*"
63                 << p_mode_def(&zzlextext[2],lex_mode_counter++); >>
64 #token ACTION           "\<\<"
65                 << if (func_action)
66                         fprintf(OUT,"\n%s %sact%d()\n{ ",
67                                         gen_cpp?"ANTLRTokenType":"static void",
68                                         gen_cpp?ClassName("::"):"", ++action_no);
69                    zzmode(ACT); zzskip();
70                 >>
71 #token GREAT_GREAT      "\>\>"
72 #token L_BRACE          "\{"
73 #token R_BRACE          "\}"
74 #token L_PAR            "\("
75 #token R_PAR            "\)"
76 #token L_BRACK          "\["
77 #token R_BRACK          "\]"
78 #token ZERO_MORE        "\*"
79 #token ONE_MORE         "\+"
80 #token OR               "\|"
81 #token RANGE            "\-"
82 #token NOT              "\~"
83 #token OCTAL_VALUE "\\0[0-7]*"          
84         << {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>>
85 #token HEX_VALUE   "\\0[Xx][0-9a-fA-F]+"
86         << {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>>
87 #token DEC_VALUE   "\\[1-9][0-9]*"
88         << {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>>
89 #token TAB              "\\t"           << zzlextext[0] = '\t';>>
90 #token NL               "\\n"           << zzlextext[0] = '\n';>>
91 #token CR               "\\r"           << zzlextext[0] = '\r';>>
92 #token BS               "\\b"           << zzlextext[0] = '\b';>>
93 /* NOTE: this takes ANYTHING after the \ */
94 #token LIT              "\\~[tnrb]"     << zzlextext[0] = zzlextext[1];>>
95 /* NOTE: this takes ANYTHING that doesn't match the other tokens */
96 #token REGCHAR          "~[\\]"
97
98
99 grammar         :   << p_head(); p_class_hdr(); func_action = FALSE;>> (ACTION)*
100                     <<if ( gen_cpp ) p_includes();>>
101                     start_states
102                     << func_action = FALSE; p_tables(); p_tail(); >>
103                     (ACTION)* "@"
104                 ;
105
106 start_states    : ( PER_PER do_conversion
107                   | NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*)
108                     PER_PER 
109                 ;
110
111 do_conversion   : <<new_automaton_mode(); func_action = TRUE;>>
112                         rule_list
113                         <<
114                                 dfa_class_nop[mode_counter] =
115                                         relabel($1.l,comp_level);
116                                 if (comp_level)
117                                         p_shift_table(mode_counter);
118                                 dfa_basep[mode_counter] = dfa_allocated+1;
119                                 make_dfa_model_node(dfa_class_nop[mode_counter]);
120                                 nfa_to_dfa($1.l);
121                                 ++mode_counter;
122                                 func_action = FALSE;
123 #ifdef HASH_STAT
124                                 fprint_hash_stats(stderr);
125 #endif
126                         >>
127                 ;
128
129 rule_list       : rule <<$$.l=$1.l; $$.r=$1.r;>>
130                         (rule
131                                 <<{nfa_node *t1;
132                                    t1 = new_nfa_node();
133                                    (t1)->trans[0]=$$.l;
134                                    (t1)->trans[1]=$1.l;
135                                    /* all accept nodes "dead ends" */
136                                    $$.l=t1; $$.r=NULL;
137                                    }
138                                 >>
139                         )*
140                 | /* empty */
141                         <<$$.l = new_nfa_node(); $$.r = NULL;
142                            warning("no regular expressions", zzline);
143                         >>
144                 ;
145
146 rule            : reg_expr ACTION
147                         <<$$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no;>>
148                 | ACTION
149                         <<$$.l = NULL; $$.r = NULL;
150                           error("no expression for action  ", zzline);
151                         >>
152                 ;
153
154 reg_expr        : and_expr <<$$.l=$1.l; $$.r=$1.r;>>
155                         (OR and_expr 
156                                 <<{nfa_node *t1, *t2;
157                                    t1 = new_nfa_node(); t2 = new_nfa_node();
158                                    (t1)->trans[0]=$$.l;
159                                    (t1)->trans[1]=$2.l;
160                                    ($$.r)->trans[1]=t2;
161                                    ($2.r)->trans[1]=t2;
162                                    $$.l=t1; $$.r=t2;
163                                   }
164                                 >>
165                         )*
166                 ;
167
168 and_expr        : repeat_expr <<$$.l=$1.l; $$.r=$1.r;>>
169                         (repeat_expr <<($$.r)->trans[1]=$1.l; $$.r=$1.r;>>)*
170                 ;
171
172 repeat_expr     : expr <<$$.l=$1.l; $$.r=$1.r;>>
173                         { ZERO_MORE
174                         <<{     nfa_node *t1,*t2;
175                                 ($$.r)->trans[0] = $$.l;
176                                 t1 = new_nfa_node(); t2 = new_nfa_node();
177                                 t1->trans[0]=$$.l;
178                                 t1->trans[1]=t2;
179                                 ($$.r)->trans[1]=t2;
180                                 $$.l=t1;$$.r=t2;
181                           }
182                         >>
183                         | ONE_MORE
184                         <<($$.r)->trans[0] = $$.l;>>
185                         }
186                 | ZERO_MORE
187                         << error("no expression for *", zzline);>>
188                 | ONE_MORE
189                         << error("no expression for +", zzline);>>
190                 ;
191
192 expr            : << $$.l = new_nfa_node(); $$.r = new_nfa_node(); >>
193                   L_BRACK atom_list R_BRACK
194                         <<
195                                 ($$.l)->trans[0] = $$.r;
196                                 ($$.l)->label = set_dup($2.label);
197                                 set_orin(&used_chars,($$.l)->label);
198                         >>
199                 | NOT L_BRACK atom_list R_BRACK
200                         <<
201                                 ($$.l)->trans[0] = $$.r;
202                                 ($$.l)->label = set_dif(normal_chars,$3.label);
203                                 set_orin(&used_chars,($$.l)->label);
204                         >>
205                 | L_PAR reg_expr R_PAR
206                         <<
207                                 ($$.l)->trans[0] = $2.l;
208                                 ($2.r)->trans[1] = $$.r;
209                         >>
210                 | L_BRACE reg_expr R_BRACE
211                         <<
212                                 ($$.l)->trans[0] = $2.l;
213                                 ($$.l)->trans[1] = $$.r;
214                                 ($2.r)->trans[1] = $$.r;
215                         >>
216                 | atom
217                         <<
218                                 ($$.l)->trans[0] = $$.r;
219                                 ($$.l)->label = set_dup($1.label);
220                                 set_orin(&used_chars,($$.l)->label);
221                         >>
222                 ;
223
224 atom_list       : << set_free($$.label); >>
225                         (near_atom <<set_orin(&($$.label),$1.label);>>)*
226                 ;
227
228 near_atom       : << register int i;
229                      register int i_prime;
230                   >>
231                   anychar
232                         <<$$.letter=$1.letter; $$.label=set_of($1.letter);
233                         i_prime = $1.letter + MIN_CHAR;
234                         if (case_insensitive && islower(i_prime))
235                                 set_orel(toupper(i_prime)-MIN_CHAR,
236                                         &($$.label));
237                         if (case_insensitive && isupper(i_prime))
238                                 set_orel(tolower(i_prime)-MIN_CHAR,
239                                         &($$.label));
240                         >>
241                         { RANGE anychar 
242                                 << if (case_insensitive){
243                                         i_prime = $$.letter+MIN_CHAR;
244                                         $$.letter = (islower(i_prime) ?
245                                                 toupper(i_prime) : i_prime)-MIN_CHAR;
246                                         i_prime = $2.letter+MIN_CHAR;
247                                         $2.letter = (islower(i_prime) ?
248                                                 toupper(i_prime) : i_prime)-MIN_CHAR;
249                                    }
250                                    /* check to see if range okay */
251                                    if ($$.letter > $2.letter){
252                                           error("invalid range  ", zzline);
253                                    }
254                                    for (i=$$.letter; i<= (int)$2.letter; ++i){
255                                         set_orel(i,&($$.label));
256                                         i_prime = i+MIN_CHAR;
257                                         if (case_insensitive && islower(i_prime))
258                                                 set_orel(toupper(i_prime)-MIN_CHAR,
259                                                         &($$.label));
260                                         if (case_insensitive && isupper(i_prime))
261                                                 set_orel(tolower(i_prime)-MIN_CHAR,
262                                                         &($$.label));
263                                         }
264                                 >>
265                         }
266                 ;
267
268 atom            : << register int i_prime;>>
269                   anychar
270                   <<$$.label = set_of($1.letter);
271                     i_prime = $1.letter + MIN_CHAR;
272                     if (case_insensitive && islower(i_prime))
273                         set_orel(toupper(i_prime)-MIN_CHAR,
274                                 &($$.label));
275                     if (case_insensitive && isupper(i_prime))
276                         set_orel(tolower(i_prime)-MIN_CHAR,
277                                 &($$.label));
278                   >>
279                 ;
280
281 anychar         : REGCHAR       <<$$.letter = $1.letter - MIN_CHAR;>>
282                 | OCTAL_VALUE   <<$$.letter = $1.letter - MIN_CHAR;>>
283                 | HEX_VALUE     <<$$.letter = $1.letter - MIN_CHAR;>>
284                 | DEC_VALUE     <<$$.letter = $1.letter - MIN_CHAR;>>
285                 | TAB           <<$$.letter = $1.letter - MIN_CHAR;>>
286                 | NL            <<$$.letter = $1.letter - MIN_CHAR;>>
287                 | CR            <<$$.letter = $1.letter - MIN_CHAR;>>
288                 | BS            <<$$.letter = $1.letter - MIN_CHAR;>>
289                 | LIT           <<$$.letter = $1.letter - MIN_CHAR;>>
290                 /* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/
291                 | L_EOF         <<$$.letter = 0;>>
292                 ;
293
294 <</* empty action */>>
295
296 #lexclass ACT
297 #token "@"      << error("unterminated action", zzline); zzmode(START); >>
298 #token ACTION "\>\>"
299                 << if (func_action) fprintf(OUT,"}\n\n");
300                    zzmode(START);
301                 >>
302 #token "\>"             << putc(zzlextext[0], OUT); zzskip(); >>
303 #token "\\\>"           << putc('>', OUT); zzskip(); >>
304 #token "\\"             << putc('\\', OUT); zzskip(); >>
305 #token "\n"             << putc(zzlextext[0], OUT); ++zzline; zzskip(); >>
306 #token "~[\>\\@\n]+"    << fprintf(OUT, "%s", &(zzlextext[0])); zzskip(); >>
307
308 <<
309 /* adds a new nfa to the binary tree and returns a pointer to it */
310 nfa_node *new_nfa_node()
311 {
312         register nfa_node *t;
313         static int nfa_size=0;  /* elements nfa_array[] can hold */
314
315         ++nfa_allocated;
316         if (nfa_size<=nfa_allocated){
317                 /* need to redo array */
318                 if (!nfa_array){
319                         /* need some to do inital allocation */
320                         nfa_size=nfa_allocated+NFA_MIN;
321                         nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)*
322                                 nfa_size);
323                 }else{
324                         /* need more space */
325                         nfa_size=2*(nfa_allocated+1);
326                         nfa_array=(nfa_node **) realloc(nfa_array, 
327                                 sizeof(nfa_node*)*nfa_size);
328                 }
329         }
330         /* fill out entry in array */
331         t = (nfa_node*) malloc(sizeof(nfa_node));
332         nfa_array[nfa_allocated] = t;
333         *t = nfa_model_node;
334         t->node_no = nfa_allocated;
335         return t;
336 }
337
338
339 /* initialize the model node used to fill in newly made nfa_nodes */
340 void
341 make_nfa_model_node()
342 {
343         nfa_model_node.node_no = -1; /* impossible value for real nfa node */
344         nfa_model_node.nfa_set = 0;
345         nfa_model_node.accept = 0;   /* error state default*/
346         nfa_model_node.trans[0] = NULL;
347         nfa_model_node.trans[1] = NULL;
348         nfa_model_node.label = empty;
349 }
350 >>
351
352 <<
353 #ifdef DEBUG
354
355 /* print out the pointer value and the node_number */
356 fprint_dfa_pair(f, p)
357 FILE *f;
358 nfa_node *p;
359 {
360         if (p){
361                 fprintf(f, "%x (%d)", p, p->node_no);
362         }else{
363                 fprintf(f, "(nil)");
364         }
365 }
366
367 /* print out interest information on a set */
368 fprint_set(f,s)
369 FILE *f;
370 set s;
371 {
372         unsigned int *x;
373
374         fprintf(f, "n = %d,", s.n);
375         if (s.setword){
376                 fprintf(f, "setword = %x,   ", s.setword);
377                 /* print out all the elements in the set */
378                 x = set_pdq(s);
379                 while (*x!=nil){
380                         fprintf(f, "%d ", *x);
381                         ++x;
382                 }
383         }else{
384                 fprintf(f, "setword = (nil)");
385         }
386 }
387
388 /* code to be able to dump out the nfas
389         return 0 if okay dump
390         return 1 if screwed up
391  */
392 int dump_nfas(first_node, last_node)
393 int first_node;
394 int last_node;
395 {
396         register int i;
397         nfa_node *t;
398
399         for (i=first_node; i<=last_node; ++i){
400                 t = NFA(i);
401                 if (!t) break;
402                 fprintf(stderr, "nfa_node %d {\n", t->node_no);
403                 fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set);
404                 fprintf(stderr, "\taccept\t=\t%d\n", t->accept);
405                 fprintf(stderr, "\ttrans\t=\t(");
406                 fprint_dfa_pair(stderr, t->trans[0]);
407                 fprintf(stderr, ",");
408                 fprint_dfa_pair(stderr, t->trans[1]);
409                 fprintf(stderr, ")\n");
410                 fprintf(stderr, "\tlabel\t=\t{ ");
411                 fprint_set(stderr, t->label);
412                 fprintf(stderr, "\t}\n");
413                 fprintf(stderr, "}\n\n");
414         }
415         return 0;
416 }
417 #endif
418 >>
419
420 <<
421 /* DLG-specific syntax error message generator 
422  * (define USER_ZZSYN when compiling so don't get 2 definitions)
423  */
424 void
425 #ifdef __STDC__
426 zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text)
427 #else
428 zzsyn(text, tok, egroup, eset, etok, k, bad_text)
429 char *text, *egroup, *bad_text;
430 int tok;
431 int etok;
432 int k;
433 SetWordType *eset;
434 #endif
435 {
436         fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline);
437         fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text);
438         if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
439         if ( k==1 ) fprintf(stderr, " missing");
440         else
441         {
442                 fprintf(stderr, "; \"%s\" not", bad_text);
443                 if ( zzset_deg(eset)>1 ) fprintf(stderr, " in");
444         }
445         if ( zzset_deg(eset)>0 ) zzedecode(eset);
446         else fprintf(stderr, " %s", zztokens[etok]);
447         if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup);
448         fprintf(stderr, "\n");
449 }
450 >>