1 /* output.c, output generator for dlg
5 * DfaStates == number of dfa nodes in automaton (just a #define)
6 * DfaState == type large enough to index every node in automaton
7 * <256 unsigned char, <65536 unsigned short, etc.
9 * Thus, the elements in each of the automaton states (st%d) are type DfaState
10 * and are size appropriately, since they must be able to index the next
13 * dfa[] == a linear array that points to all the automaton states (st%d)
14 * (dfa_base[] should be the same, but isn't right now)
16 * accepts[] == Taking a closer look at this one, it probably shouldn't be type
17 * DfaState because there is no real requirement that the number of
18 * accepts states is less than the number of dfa state. However, if
19 * the number of accept states was more than the number of DFA states
20 * then the lexical specification would be really ambiguous.
22 * Another note. Is that is should be possible to fold accepts[] and
23 * actions[] together. If this is done, I would suggest get rid of
24 * accept[] and make actions[] have an entry for each state (st%d) in
27 * dfa_base[] == starting location for each lexical mode. This should be
28 * Dfastate type (but isn't right now), since it points to the states
31 * dfa_class_no[] == indicates the number of columns each lexical mode has.
33 * b_class_no[] == pointer to the start of the translation array used to
34 * convert from input character to character class. This could cause
35 * problems if there are more than 256 classes
37 * shift%d[] == the actual translation arrays that convert the input character
38 * into the character class. These will have to change if there are
39 * more than 256 character classes.
43 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
44 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
45 * company may do whatever they wish with source code distributed with
46 * PCCTS or the code generated by PCCTS, including the incorporation of
47 * PCCTS, or its output, into commerical software.
49 * We encourage users to develop software with PCCTS. However, we do ask
50 * that credit is given to us for developing PCCTS. By "credit",
51 * we mean that if you incorporate our source code into one of your
52 * programs (commercial product, research project, or otherwise) that you
53 * acknowledge this fact somewhere in the documentation, research report,
54 * etc... If you like PCCTS and have developed a nice tool with the
55 * output, please mention that you developed it using PCCTS. In
56 * addition, we ask that this header remain intact in our source code.
57 * As long as these guidelines are kept, we expect to continue enhancing
58 * this system and expect to make other tools available as they are
63 * With mods by Terence Parr; AHPCRC, University of Minnesota
79 static char *mode_name[MAX_MODES];
80 static int mode_number[MAX_MODES];
81 static int cur_mode=0;
83 int operation_no = 0; /* used to mark nodes so that infinite loops avoided */
84 int dfa_basep[MAX_MODES]; /* start of each group of states */
85 int dfa_class_nop[MAX_MODES]; /* number of elements in each group of states*/
87 int gen_ansi = FALSE; /* allows ansi code to be generated */
89 FILE *input_stream; /* where to read description from */
90 FILE *output_stream; /* where to put the output */
91 FILE *mode_stream; /* where to put the mode.h stuff */
92 FILE *class_stream; /* where to put the scan.h stuff (if gen_cpp) */
94 /* NOTE: This section is MACHINE DEPENDENT */
96 #if defined(PC) && !defined(PC32)
97 long typesize[DIF_SIZE] = { 0x7f, 0x7fff, 0x7fff, 0x7fffffff };
98 char t0[] = "unsigned char";
99 char t1[] = "unsigned short";
100 char t2[] = "unsigned int";
101 char t3[] = "unsigned long";
102 char *typevar[DIF_SIZE] = { t0, t1, t2, t3};
104 long typesize[DIF_SIZE] = { 0x7f, 0x7fff, 0x7fffffff, 0x7fffffff };
105 char t0[] = "unsigned char";
106 char t1[] = "unsigned short";
107 char t2[] = "unsigned int";
108 char t3[] = "unsigned long";
109 char *typevar[DIF_SIZE] = { t0, t1, t2, t3};
118 /* Added by TJP August 1994 */
119 /* Take in MyLexer and return MyLexer_h */
122 gate_symbol(char *name)
128 static char buf[100];
129 sprintf(buf, "%s_h", name);
133 /* Added by TJP August 1994 */
142 char *p = (char *)malloc(strlen(s)+1);
149 if ( class_stream == NULL ) return;
150 fprintf(class_stream, "#ifndef %s\n", gate_symbol(ClassName("")));
151 fprintf(class_stream, "#define %s\n", gate_symbol(ClassName("")));
152 fprintf(class_stream, "/*\n");
153 fprintf(class_stream, " * D L G L e x e r C l a s s D e f i n i t i o n\n");
154 fprintf(class_stream, " *\n");
155 fprintf(class_stream, " * Generated from:");
156 fprintf(class_stream, " %s", file_str[0]);
157 fprintf(class_stream, "\n");
158 fprintf(class_stream, " *\n");
159 fprintf(class_stream, " * 1989-1994 by Will Cohen, Terence Parr, and Hank Dietz\n");
160 fprintf(class_stream, " * Purdue University Electrical Engineering\n");
161 fprintf(class_stream, " * DLG Version %s\n", version);
162 fprintf(class_stream, " */\n\n");
169 if ( class_stream == NULL ) return;
170 fprintf(class_stream, "\n");
171 fprintf(class_stream, "#include \"%s\"\n\n", DLEXERBASE_H);
172 fprintf(class_stream, "class %s : public DLGLexerBase {\n", ClassName(""));
173 fprintf(class_stream, "public:\n");
174 fprintf(class_stream, "\tstatic const int MAX_MODE;\n");
175 fprintf(class_stream, "\tstatic const int DfaStates;\n");
176 for (i=0; i<cur_mode; i++) {
177 fprintf(class_stream, "\tstatic const int %s;\n", mode_name[i]);
180 fprintf(class_stream, "\ttypedef %s DfaState;\n\n", minsize(dfa_allocated));
181 fprintf(class_stream, "\t%s(DLGInputStream *in,\n",ClassName(""));
182 fprintf(class_stream, "\t\tunsigned bufsize=2000)\n");
183 fprintf(class_stream, "\t\t: DLGLexerBase(in, bufsize, %d)\n", interactive);
184 fprintf(class_stream, "\t{\n");
185 fprintf(class_stream, "\t;\n");
186 fprintf(class_stream, "\t}\n");
187 fprintf(class_stream, "\tvoid mode(int);\n");
188 fprintf(class_stream, "\tANTLRTokenType nextTokenType(void);\n");
189 fprintf(class_stream, "\tvoid advance(void);\n");
191 fprintf(class_stream, "protected:\n");
192 for (i=1; i<=action_no; ++i) {
193 fprintf(class_stream, "\tANTLRTokenType act%d();\n", i);
196 for(m=0; m<(mode_counter-1); ++m){
197 for(i=dfa_basep[m]; i<dfa_basep[m+1]; ++i)
198 fprintf(class_stream, "\tstatic DfaState st%d[%d];\n", i-1, dfa_class_nop[m]+1);
200 for(i=dfa_basep[m]; i<=dfa_allocated; ++i)
201 fprintf(class_stream, "\tstatic DfaState st%d[%d];\n", i-1, dfa_class_nop[m]+1);
203 fprintf(class_stream, "\tstatic DfaState *dfa[%d];\n", dfa_allocated);
204 fprintf(class_stream, "\tstatic DfaState dfa_base[];\n");
205 /* fprintf(class_stream, "\tstatic int dfa_base_no[];\n"); */
206 fprintf(class_stream, "\tstatic unsigned char *b_class_no[];\n");
207 fprintf(class_stream, "\tstatic DfaState accepts[%d];\n",dfa_allocated+1);
208 fprintf(class_stream, "\tstatic DLGChar alternatives[%d];\n",dfa_allocated+1);
209 /* WARNING: should be ANTLRTokenType for action table, but g++ 2.5.6 is hosed */
210 fprintf(class_stream, "\tstatic ANTLRTokenType (%s::*actions[%d])();\n", ClassName(""), action_no+1);
211 for(m=0; m<mode_counter; ++m) {
212 fprintf(class_stream, "\tstatic unsigned char shift%d[%d];\n",
216 fprintf(class_stream, "\tint ZZSHIFT(int c) { return b_class_no[automaton][1+c]; }\n");
218 fprintf(class_stream, "\tint ZZSHIFT(int c) { return 1+c; }\n");
220 fprintf(class_stream, "};\n");
222 fprintf(class_stream, "typedef ANTLRTokenType (%s::*Ptr%sMemberFunc)();\n",
223 ClassName(""), ClassName(""));
225 fprintf(class_stream, "#endif\n");
228 /* generate required header on output */
232 fprintf(OUT, "/*\n");
233 fprintf(OUT, " * D L G tables\n");
234 fprintf(OUT, " *\n");
235 fprintf(OUT, " * Generated from:");
236 fprintf(OUT, " %s", file_str[0]);
238 fprintf(OUT, " *\n");
239 fprintf(OUT, " * 1989-1994 by Will Cohen, Terence Parr, and Hank Dietz\n");
240 fprintf(OUT, " * Purdue University Electrical Engineering\n");
241 fprintf(OUT, " * DLG Version %s\n", version);
242 fprintf(OUT, " */\n\n");
243 if ( gen_cpp ) fprintf(OUT, "#include <stdio.h>\n");
244 if ( !gen_cpp ) fprintf(OUT, "#include \"%s\"\n\n", mode_file);
252 fprintf(OUT, "#include \"%s\"\n", APARSER_H);
253 fprintf(OUT, "#include \"%s\"\n", DLEXERBASE_H);
254 fprintf(OUT, "#include \"%s\"\n", ClassName(".h"));
257 /* generate code to tie up any loose ends */
262 if ( strcmp(ClassName(""), DEFAULT_CLASSNAME)!=0 )
263 fprintf(OUT, "#define DLGLexer %s\n", ClassName(""));
264 fprintf(OUT, "#include \"%s\"\n", DLEXER_C);
270 fprintf(OUT, "#define ZZSHIFT(c) (b_class_no[zzauto][1+c])\n");
272 fprintf(OUT, "#define ZZSHIFT(c) (1+c)\n");
273 if ( !gen_cpp ) fprintf(OUT, "#define MAX_MODE %d\n",mode_counter);
274 fprintf(OUT, "#include \"dlgauto.h\"\n");
278 /* output the table of DFA for general use */
285 fprintf(OUT, "#define DfaStates\t%d\n", dfa_allocated);
286 fprintf(OUT, "typedef %s DfaState;\n\n", minsize(dfa_allocated));
292 fprintf(OUT, "const int %s::MAX_MODE=%d;\n",
295 fprintf(OUT, "const int %s::DfaStates=%d;\n",
298 for (i=0; i<cur_mode; i++) {
299 fprintf(OUT, "const int %s::%s=%d;\n",
300 ClassName(""), mode_name[i], mode_number[i]);
313 if (interactive || gen_cpp )
314 p_alternative_table();
318 /* figures out the smallest variable type that will hold the transitions
320 char *minsize(elements)
325 while (elements > typesize[i])
336 for(m=0; m<(mode_counter-1); ++m){
337 for(i=dfa_basep[m]; i<dfa_basep[m+1]; ++i)
338 p_single_node(i,dfa_class_nop[m]);
340 for(i=dfa_basep[m]; i<=dfa_allocated; ++i)
341 p_single_node(i,dfa_class_nop[m]);
345 p_single_node(i,classes)
349 register int trans, items_on_line;
352 /* extra state (classes+1) for invalid characters */
353 fprintf(OUT, "%sDfaState %sst%d[%d] = {\n ",
354 gen_cpp?ClassName("::"):"static ",
355 gen_cpp?ClassName("::"):"",(i-1), (classes+1));
357 fprintf(OUT, "static DfaState st%d[%d] = {\n ", (i-1), classes);
359 items_on_line = MAX_ON_LINE;
360 for(j=0; j<classes; ++j){
362 trans = DFA(i)->trans[j];
363 if (trans == NIL_INDEX)
364 trans = dfa_allocated+1;
365 /* all of DFA moved down one in array */
366 fprintf(OUT, "%d", trans-1);
368 if (!(--items_on_line)){
370 items_on_line = MAX_ON_LINE;
374 /* put in jump to error state */
375 fprintf(OUT, "%d\n};\n\n", dfa_allocated);
377 fprintf(OUT, "\n};\n\n");
386 fprintf(OUT, "\n%sDfaState *%sdfa[%d] = {\n",
387 gen_cpp?ClassName("::"):"",gen_cpp?ClassName("::"):"", dfa_allocated);
388 for (i=0; i<(dfa_allocated-1); ++i){
389 fprintf(OUT, "\tst%d,\n", i);
391 fprintf(OUT, "\tst%d\n", i);
392 fprintf(OUT, "};\n\n");
399 register int items_on_line = 0;
400 int true_interactive = TRUE;
402 /* make sure element for one past (zzerraction) -WEC 12/16/92 */
403 fprintf(OUT,"\n%sDfaState %saccepts[%d] = {\n ",
404 gen_cpp?ClassName("::"):"",
405 gen_cpp?ClassName("::"):"",
407 /* don't do anything if no dfa nodes */
408 if (i>dfa_allocated) goto skip_accepts;
413 unsigned int *t, *nfa_i;
414 unsigned int *q, *regular_expr;
417 nfa_states = DFA(i)->nfa_states;
418 t = nfa_i = set_pdq(nfa_states);
419 /* NOTE: picks lowest accept because accepts monotonic */
420 /* with respect to nfa node numbers and set_pdq */
421 /* returns in that order */
422 while((*nfa_i != nil) && (!(accept = NFA(*nfa_i)->accept))){
426 /* figure out if more than one accept state there */
428 set_orel(accept, &accept_set);
429 while(*nfa_i != nil){
430 set_orel(NFA(*nfa_i)->accept, &accept_set);
433 /* remove error action from consideration */
434 set_rm(0, accept_set);
436 if( set_deg(accept_set)>1){
437 fprintf(stderr, "dlg warning: ambiguous regular expression ");
438 q = regular_expr = set_pdq(accept_set);
439 while(*regular_expr != nil){
440 fprintf(stderr," %d ", *regular_expr);
443 fprintf(stderr, "\n");
448 if ((DFA(i)->alternatives) && (accept != 0)){
449 true_interactive = FALSE;
451 fprintf(OUT, "%d, ", accept);
453 /* free up memory before we "break" below -ATG 4/6/95 */
455 set_free(accept_set);
457 if ((++i)>dfa_allocated)
459 if ((++items_on_line)>=MAX_ON_LINE){
465 set_free(accept_set);
468 /* make sure element for one past (zzerraction) -WEC 12/16/92 */
470 fprintf(OUT, "0\n};\n\n");
477 char* className = ClassName("");
480 fprintf(OUT, "Ptr%sMemberFunc %s::actions[%d] = {\n", className,
481 className, action_no+1);
483 fprintf(OUT, "void (*actions[%d])() = {\n", action_no+1);
485 /* fprintf(OUT, "\t(Ptr%sMemberFunc)&%s::erraction,\n", className, className);*/
486 fprintf(OUT, "\t&%s::erraction,\n", className);
488 fprintf(OUT, "\tzzerraction,\n");
489 for (i=1; i<action_no; ++i) {
491 /* fprintf(OUT,"\t(Ptr%sMemberFunc)&%s::act%d,\n", className, className, i);*/
492 fprintf(OUT,"\t&%s::act%d,\n", className, i);
494 fprintf(OUT,"\tact%d,\n", i);
498 /* fprintf(OUT,"\t(Ptr%sMemberFunc)&%s::act%d\n", className, className, i);*/
499 fprintf(OUT,"\t&%s::act%d\n", className, i);
501 fprintf(OUT,"\tact%d\n", i);
502 fprintf(OUT, "};\n\n");
509 register int i = 0, j;
510 register int items_on_line = 0;
512 fprintf(OUT, "%s unsigned char %sshift%d[%d] = {\n ",
514 gen_cpp?ClassName("::"):"", m, CHAR_RANGE);
516 /* find which partition character i is in */
517 for (j=0; j<dfa_class_nop[mode_counter]; ++j){
518 if (set_el(i,class_sets[j]))
522 if ((++i)>=CHAR_RANGE)
525 if ((++items_on_line)>=MAX_ON_LINE){
530 fprintf(OUT, "\n};\n\n");
538 fprintf(OUT, "%sDfaState %sdfa_base[] = {\n",
539 gen_cpp?ClassName("::"):"static ",
540 gen_cpp?ClassName("::"):"");
541 for(m=0; m<(mode_counter-1); ++m)
542 fprintf(OUT, "\t%d,\n", dfa_basep[m]-1);
543 fprintf(OUT, "\t%d\n};\n\n", dfa_basep[m]-1);
552 fprintf(OUT,"%s int %sdfa_class_no[] = {\n",
554 gen_cpp?ClassName("::"):"");
555 for(m=0; m<(mode_counter-1); ++m)
556 fprintf(OUT,"\t%d,\n", dfa_class_nop[m]);
557 fprintf(OUT,"\t%d\n};\n\n", dfa_class_nop[m]);
566 fprintf(OUT,"%s unsigned char *%sb_class_no[] = {\n",
568 gen_cpp?ClassName("::"):"");
569 for(m=0; m<(mode_counter-1); ++m)
570 fprintf(OUT, "\tshift%d,\n", m);
571 fprintf(OUT, "\tshift%d\n};\n\n", m);
575 p_alternative_table()
579 if ( !gen_cpp ) fprintf(OUT, "#define ZZINTERACTIVE\n\n");
581 fprintf(OUT, "DLGChar %salternatives[%sDfaStates+1] = {\n",
585 fprintf(OUT, "static %s zzalternatives[DfaStates+1] = {\n",
586 minsize(dfa_allocated));
588 for(i=1; i<=dfa_allocated; ++i)
589 fprintf(OUT, "\t%d,\n", DFA(i)->alternatives);
590 fprintf(OUT, "/* must have 0 for zzalternatives[DfaStates] */\n");
591 fprintf(OUT, "\t0\n};\n\n");
601 mode_name[cur_mode] = mystrdup(s);
602 mode_number[cur_mode] = m;
606 fprintf(mode_stream, "#define %s %d\n", s, m);
613 static char buf[200];
614 extern char *class_name;
616 sprintf(buf, "%s%s", class_name, suffix);
621 /* print out a particular nfa node that is pointed to by p */
625 register nfa_node *t;
628 printf("NFA state : %d\naccept state : %d\n",
629 NFA_NO(p),p->accept);
630 if (p->trans[0] != NIL_INDEX){
631 printf("trans[0] => %d on ", NFA_NO(p->trans[0]));
636 printf("trans[0] => nil\n");
637 if (p->trans[1] != NIL_INDEX)
638 printf("trans[1] => %d on epsilon\n",
639 NFA_NO(p->trans[1]));
641 printf("trans[1] => nil\n");
648 /* code to print out special structures when using a debugger */
651 nfa_node *p; /* state number also index into array */
653 /* each node has a marker on it so it only gets printed once */
655 operation_no++; /* get new number */
660 nfa_node *p; /* state number also index into array */
662 if ((p != NIL_INDEX) && (p->nfa_set != operation_no)){
663 /* so it is only printed once */
664 p->nfa_set = operation_no;
666 s_p_nfa(p->trans[0]);
667 s_p_nfa(p->trans[1]);
677 printf("DFA state :%d\n",NFA_NO(p));
682 printf("from nfa states : ");
683 p_set(p->nfa_states);
685 /* NOTE: trans arcs stored as ints rather than pointer*/
686 for (i=0; i<class_no; i++){
687 printf("%d ",p->trans[i]);
695 /* prints out all the dfa nodes actually allocated */
699 for (i = 1; i<=dfa_allocated; i++)
704 /* print out numbers in the set label */
713 t = e = set_pdq(label);
715 printf("%d ", (*e+MIN_CHAR));