/* output.c, output generator for dlg * * Output Notes: * * DfaStates == number of dfa nodes in automaton (just a #define) * DfaState == type large enough to index every node in automaton * <256 unsigned char, <65536 unsigned short, etc. * * Thus, the elements in each of the automaton states (st%d) are type DfaState * and are size appropriately, since they must be able to index the next * automaton state. * * dfa[] == a linear array that points to all the automaton states (st%d) * (dfa_base[] should be the same, but isn't right now) * * accepts[] == Taking a closer look at this one, it probably shouldn't be type * DfaState because there is no real requirement that the number of * accepts states is less than the number of dfa state. However, if * the number of accept states was more than the number of DFA states * then the lexical specification would be really ambiguous. * * Another note. Is that is should be possible to fold accepts[] and * actions[] together. If this is done, I would suggest get rid of * accept[] and make actions[] have an entry for each state (st%d) in * the automaton. * * dfa_base[] == starting location for each lexical mode. This should be * Dfastate type (but isn't right now), since it points to the states * in the automaton. * * dfa_class_no[] == indicates the number of columns each lexical mode has. * * b_class_no[] == pointer to the start of the translation array used to * convert from input character to character class. This could cause * problems if there are more than 256 classes * * shift%d[] == the actual translation arrays that convert the input character * into the character class. These will have to change if there are * more than 256 character classes. * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * DLG 1.33 * Will Cohen * With mods by Terence Parr; AHPCRC, University of Minnesota * 1989-1995 */ #include #include "dlg.h" #ifdef MEMCHK #include "trax.h" #else #ifdef __STDC__ #include #else #include #endif /* __STDC__ */ #endif static char *mode_name[MAX_MODES]; static int mode_number[MAX_MODES]; static int cur_mode=0; int operation_no = 0; /* used to mark nodes so that infinite loops avoided */ int dfa_basep[MAX_MODES]; /* start of each group of states */ int dfa_class_nop[MAX_MODES]; /* number of elements in each group of states*/ int gen_ansi = FALSE; /* allows ansi code to be generated */ FILE *input_stream; /* where to read description from */ FILE *output_stream; /* where to put the output */ FILE *mode_stream; /* where to put the mode.h stuff */ FILE *class_stream; /* where to put the scan.h stuff (if gen_cpp) */ /* NOTE: This section is MACHINE DEPENDENT */ #define DIF_SIZE 4 #if defined(PC) && !defined(PC32) long typesize[DIF_SIZE] = { 0x7f, 0x7fff, 0x7fff, 0x7fffffff }; char t0[] = "unsigned char"; char t1[] = "unsigned short"; char t2[] = "unsigned int"; char t3[] = "unsigned long"; char *typevar[DIF_SIZE] = { t0, t1, t2, t3}; #else long typesize[DIF_SIZE] = { 0x7f, 0x7fff, 0x7fffffff, 0x7fffffff }; char t0[] = "unsigned char"; char t1[] = "unsigned short"; char t2[] = "unsigned int"; char t3[] = "unsigned long"; char *typevar[DIF_SIZE] = { t0, t1, t2, t3}; #endif #ifdef __STDC__ char *minsize(int); #else char *minsize(); #endif /* Added by TJP August 1994 */ /* Take in MyLexer and return MyLexer_h */ static char * #ifdef __USE_PROTOS gate_symbol(char *name) #else gate_symbol(name) char *name; #endif { static char buf[100]; sprintf(buf, "%s_h", name); return buf; } /* Added by TJP August 1994 */ static char * #ifdef __USE_PROTOS mystrdup(char *s) #else mystrdup(s) char *s; #endif { char *p = (char *)malloc(strlen(s)+1); strcpy(p, s); return p; } p_class_hdr() { if ( class_stream == NULL ) return; fprintf(class_stream, "#ifndef %s\n", gate_symbol(ClassName(""))); fprintf(class_stream, "#define %s\n", gate_symbol(ClassName(""))); fprintf(class_stream, "/*\n"); fprintf(class_stream, " * D L G L e x e r C l a s s D e f i n i t i o n\n"); fprintf(class_stream, " *\n"); fprintf(class_stream, " * Generated from:"); fprintf(class_stream, " %s", file_str[0]); fprintf(class_stream, "\n"); fprintf(class_stream, " *\n"); fprintf(class_stream, " * 1989-1994 by Will Cohen, Terence Parr, and Hank Dietz\n"); fprintf(class_stream, " * Purdue University Electrical Engineering\n"); fprintf(class_stream, " * DLG Version %s\n", version); fprintf(class_stream, " */\n\n"); } p_class_def() { int i, m; if ( class_stream == NULL ) return; fprintf(class_stream, "\n"); fprintf(class_stream, "#include \"%s\"\n\n", DLEXERBASE_H); fprintf(class_stream, "class %s : public DLGLexerBase {\n", ClassName("")); fprintf(class_stream, "public:\n"); fprintf(class_stream, "\tstatic const int MAX_MODE;\n"); fprintf(class_stream, "\tstatic const int DfaStates;\n"); for (i=0; i\n"); if ( !gen_cpp ) fprintf(OUT, "#include \"%s\"\n\n", mode_file); fprintf(OUT,"\n"); } p_includes() { int i; fprintf(OUT, "#include \"%s\"\n", APARSER_H); fprintf(OUT, "#include \"%s\"\n", DLEXERBASE_H); fprintf(OUT, "#include \"%s\"\n", ClassName(".h")); } /* generate code to tie up any loose ends */ p_tail() { if ( gen_cpp ) { if ( strcmp(ClassName(""), DEFAULT_CLASSNAME)!=0 ) fprintf(OUT, "#define DLGLexer %s\n", ClassName("")); fprintf(OUT, "#include \"%s\"\n", DLEXER_C); return; } fprintf(OUT, "\n"); fprintf(OUT, "\n"); if (comp_level) fprintf(OUT, "#define ZZSHIFT(c) (b_class_no[zzauto][1+c])\n"); else fprintf(OUT, "#define ZZSHIFT(c) (1+c)\n"); if ( !gen_cpp ) fprintf(OUT, "#define MAX_MODE %d\n",mode_counter); fprintf(OUT, "#include \"dlgauto.h\"\n"); } /* output the table of DFA for general use */ p_tables() { char *minsize(); if ( !gen_cpp ) { fprintf(OUT, "#define DfaStates\t%d\n", dfa_allocated); fprintf(OUT, "typedef %s DfaState;\n\n", minsize(dfa_allocated)); } if ( gen_cpp ) { int i; fprintf(OUT, "\n"); fprintf(OUT, "const int %s::MAX_MODE=%d;\n", ClassName(""), mode_counter); fprintf(OUT, "const int %s::DfaStates=%d;\n", ClassName(""), dfa_allocated); for (i=0; i typesize[i]) ++i; return typevar[i]; } p_node_table() { register int i; register int m = 0; for(m=0; m<(mode_counter-1); ++m){ for(i=dfa_basep[m]; itrans[j]; if (trans == NIL_INDEX) trans = dfa_allocated+1; /* all of DFA moved down one in array */ fprintf(OUT, "%d", trans-1); fprintf(OUT, ", "); if (!(--items_on_line)){ fprintf(OUT, "\n "); items_on_line = MAX_ON_LINE; } } #if 1 /* put in jump to error state */ fprintf(OUT, "%d\n};\n\n", dfa_allocated); #else fprintf(OUT, "\n};\n\n"); #endif } p_dfa_table() { register int i; fprintf(OUT, "\n%sDfaState *%sdfa[%d] = {\n", gen_cpp?ClassName("::"):"",gen_cpp?ClassName("::"):"", dfa_allocated); for (i=0; i<(dfa_allocated-1); ++i){ fprintf(OUT, "\tst%d,\n", i); } fprintf(OUT, "\tst%d\n", i); fprintf(OUT, "};\n\n"); } p_accept_table() { register int i = 1; register int items_on_line = 0; int true_interactive = TRUE; /* make sure element for one past (zzerraction) -WEC 12/16/92 */ fprintf(OUT,"\n%sDfaState %saccepts[%d] = {\n ", gen_cpp?ClassName("::"):"", gen_cpp?ClassName("::"):"", dfa_allocated+1); /* don't do anything if no dfa nodes */ if (i>dfa_allocated) goto skip_accepts; while (TRUE){ int accept; set accept_set; set nfa_states; unsigned int *t, *nfa_i; unsigned int *q, *regular_expr; accept_set = empty; nfa_states = DFA(i)->nfa_states; t = nfa_i = set_pdq(nfa_states); /* NOTE: picks lowest accept because accepts monotonic */ /* with respect to nfa node numbers and set_pdq */ /* returns in that order */ while((*nfa_i != nil) && (!(accept = NFA(*nfa_i)->accept))){ nfa_i++; } /* figure out if more than one accept state there */ if (warn_ambig ){ set_orel(accept, &accept_set); while(*nfa_i != nil){ set_orel(NFA(*nfa_i)->accept, &accept_set); nfa_i++; } /* remove error action from consideration */ set_rm(0, accept_set); if( set_deg(accept_set)>1){ fprintf(stderr, "dlg warning: ambiguous regular expression "); q = regular_expr = set_pdq(accept_set); while(*regular_expr != nil){ fprintf(stderr," %d ", *regular_expr); ++regular_expr; } fprintf(stderr, "\n"); free(q); } } if ((DFA(i)->alternatives) && (accept != 0)){ true_interactive = FALSE; } fprintf(OUT, "%d, ", accept); /* free up memory before we "break" below -ATG 4/6/95 */ free(t); set_free(accept_set); if ((++i)>dfa_allocated) break; if ((++items_on_line)>=MAX_ON_LINE){ fprintf(OUT,"\n "); items_on_line = 0; } /* free(t); set_free(accept_set); */ } /* make sure element for one past (zzerraction) -WEC 12/16/92 */ skip_accepts: fprintf(OUT, "0\n};\n\n"); } p_action_table() { register int i; char* className = ClassName(""); if ( gen_cpp ) fprintf(OUT, "Ptr%sMemberFunc %s::actions[%d] = {\n", className, className, action_no+1); else fprintf(OUT, "void (*actions[%d])() = {\n", action_no+1); if ( gen_cpp ) /* fprintf(OUT, "\t(Ptr%sMemberFunc)&%s::erraction,\n", className, className);*/ fprintf(OUT, "\t&%s::erraction,\n", className); else fprintf(OUT, "\tzzerraction,\n"); for (i=1; i=CHAR_RANGE) break; fprintf(OUT,", "); if ((++items_on_line)>=MAX_ON_LINE){ fprintf(OUT,"\n "); items_on_line = 0; } } fprintf(OUT, "\n};\n\n"); } p_base_table() { register int m; fprintf(OUT, "%sDfaState %sdfa_base[] = {\n", gen_cpp?ClassName("::"):"static ", gen_cpp?ClassName("::"):""); for(m=0; m<(mode_counter-1); ++m) fprintf(OUT, "\t%d,\n", dfa_basep[m]-1); fprintf(OUT, "\t%d\n};\n\n", dfa_basep[m]-1); } p_class_table() { #if 0 register int m; fprintf(OUT,"%s int %sdfa_class_no[] = {\n", gen_cpp?"":"static", gen_cpp?ClassName("::"):""); for(m=0; m<(mode_counter-1); ++m) fprintf(OUT,"\t%d,\n", dfa_class_nop[m]); fprintf(OUT,"\t%d\n};\n\n", dfa_class_nop[m]); #endif } p_bshift_table() { register int m; fprintf(OUT,"%s unsigned char *%sb_class_no[] = {\n", gen_cpp?"":"static", gen_cpp?ClassName("::"):""); for(m=0; m<(mode_counter-1); ++m) fprintf(OUT, "\tshift%d,\n", m); fprintf(OUT, "\tshift%d\n};\n\n", m); } p_alternative_table() { register int i; if ( !gen_cpp ) fprintf(OUT, "#define ZZINTERACTIVE\n\n"); if ( gen_cpp ) fprintf(OUT, "DLGChar %salternatives[%sDfaStates+1] = {\n", ClassName("::"), ClassName("::")); else fprintf(OUT, "static %s zzalternatives[DfaStates+1] = {\n", minsize(dfa_allocated)); for(i=1; i<=dfa_allocated; ++i) fprintf(OUT, "\t%d,\n", DFA(i)->alternatives); fprintf(OUT, "/* must have 0 for zzalternatives[DfaStates] */\n"); fprintf(OUT, "\t0\n};\n\n"); } p_mode_def(s,m) char *s; int m; { if ( gen_cpp ) { mode_name[cur_mode] = mystrdup(s); mode_number[cur_mode] = m; cur_mode++; } else fprintf(mode_stream, "#define %s %d\n", s, m); } char * ClassName(suffix) char *suffix; { static char buf[200]; extern char *class_name; sprintf(buf, "%s%s", class_name, suffix); return buf; } #ifdef DEBUG /* print out a particular nfa node that is pointed to by p */ p_nfa_node(p) nfa_node *p; { register nfa_node *t; if (p != NIL_INDEX){ printf("NFA state : %d\naccept state : %d\n", NFA_NO(p),p->accept); if (p->trans[0] != NIL_INDEX){ printf("trans[0] => %d on ", NFA_NO(p->trans[0])); p_set(p->label); printf("\n"); } else printf("trans[0] => nil\n"); if (p->trans[1] != NIL_INDEX) printf("trans[1] => %d on epsilon\n", NFA_NO(p->trans[1])); else printf("trans[1] => nil\n"); printf("\n"); } } #endif #ifdef DEBUG /* code to print out special structures when using a debugger */ p_nfa(p) nfa_node *p; /* state number also index into array */ { /* each node has a marker on it so it only gets printed once */ operation_no++; /* get new number */ s_p_nfa(p); } s_p_nfa(p) nfa_node *p; /* state number also index into array */ { if ((p != NIL_INDEX) && (p->nfa_set != operation_no)){ /* so it is only printed once */ p->nfa_set = operation_no; p_nfa_node(p); s_p_nfa(p->trans[0]); s_p_nfa(p->trans[1]); } } p_dfa_node(p) dfa_node *p; { int i; if (p != NIL_INDEX){ printf("DFA state :%d\n",NFA_NO(p)); if (p->done) printf("done\n"); else printf("undone\n"); printf("from nfa states : "); p_set(p->nfa_states); printf("\n"); /* NOTE: trans arcs stored as ints rather than pointer*/ for (i=0; itrans[i]); } printf("\n\n"); } } p_dfa() { /* prints out all the dfa nodes actually allocated */ int i; for (i = 1; i<=dfa_allocated; i++) p_dfa_node(NFA(i)); } /* print out numbers in the set label */ p_set(label) set label; { unsigned *t, *e; if (set_nil(label)){ printf("epsilon\n"); }else{ t = e = set_pdq(label); while(*e != nil){ printf("%d ", (*e+MIN_CHAR)); e++; } printf("\n"); free(t); } } #endif