From a5858ad3d85781b0412eac53847e35a1bf24e70d Mon Sep 17 00:00:00 2001 From: Terence Parr <> Date: Thu, 5 Oct 1995 19:40:04 -0500 Subject: [PATCH] auto commit for import --- antlr/README | 19 ++ antlr/antlr.mpw.r | 371 +++++++++++++++++++++++++++++++++++++ antlr/makefile.mpw | 92 +++++++++ antlr/watantlr.mak | 54 ++++++ dlg/dlg_p.g | 450 +++++++++++++++++++++++++++++++++++++++++++++ dlg/err.c | 83 +++++++++ dlg/main.c | 207 +++++++++++++++++++++ 7 files changed, 1276 insertions(+) create mode 100755 antlr/README create mode 100755 antlr/antlr.mpw.r create mode 100755 antlr/makefile.mpw create mode 100755 antlr/watantlr.mak create mode 100755 dlg/dlg_p.g create mode 100755 dlg/err.c create mode 100755 dlg/main.c diff --git a/antlr/README b/antlr/README new file mode 100755 index 0000000..d6364ec --- /dev/null +++ b/antlr/README @@ -0,0 +1,19 @@ + ANTLR 1.33 + +This directory contains the files necessary to build ANTLR. + +If you do a "make scrub", ANTLR will have to run on antlr.g and DLG +will have to run on parser.dlg. Either + +(1) ANTLR uses the previous antlr in that directory to rebuild itself +(2) Needs to find antlr on the search path + +You will find that running "antlr -gh antlr.g" will result in about +10 ambiguity warnings. These are normal. Don't worry. + +If you do a "make clean" right after installation, ANTLR and DLG should +not need to run; only the C files will compile. + +Don't forget to go into the makefile to uncomment the appropriate +definitions for your OS/architecture/compiler or see the appropriate +NOTES.?? file. diff --git a/antlr/antlr.mpw.r b/antlr/antlr.mpw.r new file mode 100755 index 0000000..093b371 --- /dev/null +++ b/antlr/antlr.mpw.r @@ -0,0 +1,371 @@ +#include "cmdo.r" + +resource 'cmdo' (128, "Antlr") { + { + /* [1] */ + 295, + "ANTLR -- Purdue Compiler Construction Tool Set (PCCTS) LL(k) parser generator.", + { + /* [1] */ + NotDependent { }, MultiFiles { + "Grammar File(s)É", + "Choose the grammar specification files you wish to have ANTLR process.", + {25, 24, 44, 154}, + "Grammar specification:", + "", + MultiInputFiles { + { /* array MultiTypesArray: 1 elements */ + /* [1] */ + text + }, + ".g", + "Files ending in .g", + "All text files" + } + }, + /* [2] */ + NotDependent { }, Files { + DirOnly, + OptionalFile { + {56, 25, 72, 155}, + {77, 25, 96, 155}, + "Output Directory", + ":", + "-o", + "", + "Choose the directory where ANTLR will put its output.", + dim, + "Output DirectoryÉ", + "", + "" + }, + NoMore { + + } + }, + /* [3] */ + NotDependent { }, Redirection { + StandardOutput, + {126, 27} + }, + /* [4] */ + NotDependent { }, Redirection { + DiagnosticOutput, + {126, 178} + }, + /* [5] */ + NotDependent { }, TextBox { + gray, + {117, 20, 167, 300}, + "Redirection" + }, + /* [6] */ + NotDependent { }, NestedDialog { + 2, + {20, 324, 40, 460}, + "OptionsÉ", + "Various command line options may be set " + "with this button." + }, + /* [7] */ + NotDependent { }, NestedDialog { + 3, + {48, 324, 68, 460}, + "More OptionsÉ", + "Antlr has ALOT of options. There are even more to be found with this button." + }, + /* [8] */ + NotDependent { }, NestedDialog { + 4, + {76, 324, 96, 460}, + "Rename OptionsÉ", + "Options for renaming output files may be set with this button." + }, + /* [9] */ + NotDependent { }, VersionDialog { + VersionString { + "1.33" + }, + "PCCTS was written by Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995. " + "MPW port by Scott Haney.", + noDialog + } + }, + /* [2] */ + 295, + "Use this dialog to specify command line options.", + { + /* [1] */ + NotDependent { }, CheckOption { + NotSet, + {18, 25, 33, 225}, + "Generate C++ code", + "-CC", + "Generate C++ output from both ANTLR and DLG." + }, + /* [2] */ + NotDependent { }, CheckOption { + NotSet, + {38, 25, 53, 225}, + "Generate ASTs", + "-gt", + "Generate code for Abstract-Syntax-Trees (ASTs)." + }, + /* [3] */ + NotDependent { }, CheckOption { + NotSet, + {18, 235, 33, 435}, + "Support parse traces", + "-gd", + "If this option is checked, ANTLR inserts code in each parsing " + "function to provide for user-defined handling of a detailed parse trace. " + "The code consists of calls to zzTRACEIN and zzTRACEOUT." + }, + /* [4] */ + NotDependent { }, CheckOption { + NotSet, + {58, 25, 73, 225}, + "Generate line info", + "-gl", + "If this option is checked, ANTLR will generate line info about grammar" + "actions, thereby making debugging easier since " + "compile errors will point to the grammar file." + }, + /* [5] */ + NotDependent { }, CheckOption { + NotSet, + {38, 235, 53, 435}, + "Generate cross-references", + "-cr", + "If this option is checked, ANTLR will generate a cross reference for all " + "rules. For each rule it will print a list of all other rules that refrence it." + }, + /* [6] */ + NotDependent { }, CheckOption { + NotSet, + {78, 25, 93, 225}, + "Generate error classes", + "-ge", + "If this option is checked, ANTLR will generate an error class for" + "each non-terminal." + }, + /* [7] */ + NotDependent { }, CheckOption { + NotSet, + {58, 235, 73, 435}, + "Hoist predicate context", + "-prc on", + "If this option is checked, ANTLR will turn on the computation and hoisting of " + "predicate context." + }, + /* [8] */ + NotDependent { }, CheckOption { + NotSet, + {98, 25, 113, 225}, + "Don't generate Code", + "-gc", + "If this option is checked, ANTLR will generate no code, i.e. " + "it will only perform analysis on the grammar." + }, + /* [9] */ + NotDependent { }, CheckOption { + NotSet, + {78, 235, 93, 435}, + "Don't create Lexer files", + "-gx", + "If this option is checked, ANTLR will not generate DLG-related output files. " + "This option should be used if one wants a custom lexical analyzer or if one " + "has made changes to the grammar not affecting the lexical structure." + }, + /* [10] */ + NotDependent { }, CheckOption { + NotSet, + {118, 25, 133, 225}, + "Delay lookahead fetches", + "-gk", + "If this option is checked, ANTLR will generate a parser that delays lookahead " + "fetches until needed." + }, + /* [11] */ + NotDependent { }, CheckOption { + NotSet, + {98, 235, 113, 460}, + "Don't generate token expr sets", + "-gs", + "If this option is checked, ANTLR will not generate sets for token expression " + "sets; instead, it will generate a || separated sequence of LA(1)==token #. " + }, + /* [12] */ + NotDependent { }, RegularEntry { + "Lookahead:", + {140, 25, 155, 150}, + {160, 25, 176, 150}, + "1", + keepCase, + "-k", + "This entry specifies the number of tokens of lookahead." + }, + /* [13] */ + NotDependent { }, RegularEntry { + "Compr lookahead:", + {140, 165, 155, 290}, + {160, 165, 176, 290}, + "", + keepCase, + "-ck", + "This entry specifies the number of tokens of lookahead when using compressed " + "(linear approximation) lookahead. In general, the compressed lookahead is much " + "deeper than the full lookahead." + }, + /* [14] */ + NotDependent { }, RegularEntry { + "Max tree nodes:", + {140, 310, 155, 435}, + {160, 305, 176, 435}, + "", + keepCase, + "-rl", + "This entry specifies the maximum number of tokens of tree nodes used by the grammar " + "analysis." + } + }, + /* [3] */ + 295, + "Use this dialog to specify still more command line options.", + { + /* [1] */ + NotDependent { }, RadioButtons { + { /* array radioArray: 3 elements */ + /* [1] */ + {38, 25, 53, 105}, "None", "", Set, "When this option is selected, ANTLR " + "will not print the grammar to stdout.", + /* [2] */ + {38, 115, 53, 195}, "Yes", "-p", NotSet, "When this option is selected, ANTLR " + "will print the grammar, stripped of all actions and comments, to stdout.", + /* [3] */ + {38, 210, 53, 300}, "More", "-pa", NotSet, "When this option is selected, ANTLR " + "will print the grammar, stripped of all actions and comments, to stdout. " + "It will also annotate the output with the first sets determined from grammar " + "analysis." + } + }, + /* [2] */ + NotDependent { }, TextBox { + gray, + { 28, 15, 60, 310 }, + "Grammar Printing" + }, + /* [3] */ + NotDependent { }, RadioButtons { + { /* array radioArray: 3 elements */ + /* [1] */ + {88, 25, 103, 105}, "Low", "", Set, "When this option is selected, ANTLR " + "will show ambiguities/errors in low detail.", + /* [2] */ + {88, 115, 103, 195}, "Medium", "-e2", NotSet, "When this option is selected, ANTLR " + "will show ambiguities/errors in more detail.", + /* [3] */ + {88, 210, 103, 300}, "High", "-e3", NotSet, "When this option is selected, ANTLR " + "will show ambiguities/errors in excruciating detail." + } + }, + /* [4] */ + NotDependent { }, TextBox { + gray, + { 78, 15, 110, 310 }, + "Error reporting" + }, + /* [5] */ + NotDependent { }, CheckOption { + NotSet, + {128, 25, 143, 225}, + "More warnings", + "-w2", + "If this option is checked, ANTLR will warn if semantic predicates and/or " + "(É)? blocks are assumed to cover ambiguous alternatives." + }, + + }, + /* [4] */ + 295, + "Use this dialog to specify command line options relating to renaming output files.", + { + /* [1] */ + NotDependent { }, RegularEntry { + "Errors file name:", + {35, 25, 50, 205}, + {35, 205, 51, 300}, + "err.c", + keepCase, + "-fe", + "This entry specifies the name ANTLR uses for " + "the errors file." + }, + /* [2] */ + NotDependent { }, RegularEntry { + "Lexical output name:", + {60, 25, 75, 205}, + {60, 205, 76, 300}, + "parser.dlg", + keepCase, + "-fl", + "This entry specifies the name ANTLR uses for " + "the lexical output file." + }, + /* [3] */ + NotDependent { }, RegularEntry { + "Lexical modes name:", + {85, 25, 100, 205}, + {85, 205, 101, 300}, + "mode.h", + keepCase, + "-fl", + "This entry specifies the name ANTLR uses for " + "the lexical mode definitions file." + }, + /* [4] */ + NotDependent { }, RegularEntry { + "Remap file name:", + {110, 25, 125, 205}, + {110, 205, 126, 300}, + "remap.h", + keepCase, + "-fl", + "This entry specifies the name ANTLR uses for " + "the file that remaps globally visible symbols." + }, + /* [5] */ + NotDependent { }, RegularEntry { + "Tokens file name:", + {135, 25, 150, 205}, + {135, 205, 151, 300}, + "tokens.h", + keepCase, + "-fl", + "This entry specifies the name ANTLR uses for " + "the tokens file." + }, + /* [6] */ + NotDependent{ }, CheckOption { + NotSet, + {160, 25, 175, 175}, + "Create std header", + "-gh", + "If this option is checked, ANTLR will create a standard header file named, " + "by default 'stdpccts.h'. This name can be altered using the entry right next door." + }, + /* [7] */ + Or { {6} }, RegularEntry { + "Std header file name:", + {160, 175, 175, 355}, + {160, 355, 176, 450}, + "stdpccts.h", + keepCase, + "-fh", + "This entry specifies the name ANTLR uses for " + "the standard header file." + } + } + } +}; + diff --git a/antlr/makefile.mpw b/antlr/makefile.mpw new file mode 100755 index 0000000..c2f87c5 --- /dev/null +++ b/antlr/makefile.mpw @@ -0,0 +1,92 @@ +#////////////////////////////////////////////////////////////////////////// +#// +#// FILE NAME +#// antlr.make +#// +#// AUTHOR +#// Scott Haney +#// +#// CREATED +#// May 30, 1994 +#// +#// DESCRIPTION +#// Controls Macintosh builds of ANTLR. +#// +#////////////////////////////////////////////////////////////////////////// + +ObjDir = :Macintosh: +SetDir = ::support:set + +COptions = -mc68020 -model far -warnings off -d MPW -d __STDC__=1 -d USER_ZZSYN ¶ + -i {SetDir} -i ::h +LinkOptions = -d -c 'MPS ' -t MPST -mf -model far -br ON -srtsg ALL + +Objects = ¶ + {ObjDir}antlr.c.o ¶ + {ObjDir}bits.c.o ¶ + {ObjDir}build.c.o ¶ + {ObjDir}err.c.o ¶ + {ObjDir}fset.c.o ¶ + {ObjDir}fset2.c.o ¶ + {ObjDir}gen.c.o ¶ + {ObjDir}globals.c.o ¶ + {ObjDir}hash.c.o ¶ + {ObjDir}lex.c.o ¶ + {ObjDir}main.c.o ¶ + {ObjDir}misc.c.o ¶ + {ObjDir}pred.c.o ¶ + {ObjDir}scan.c.o ¶ + {SetDir}{ObjDir}set.c.o ¶ + "{CLibraries}"StdCLib.o ¶ + "{Libraries}"Stubs.o ¶ + "{Libraries}"Runtime.o ¶ + "{Libraries}"Interface.o ¶ + "{Libraries}"ToolLibs.o ¶ + +{ObjDir}antlr.c.o Ä antlr.c + +{ObjDir}bits.c.o Ä bits.c + +{ObjDir}build.c.o Ä build.c + +{ObjDir}err.c.o Ä err.c + +{ObjDir}fset.c.o Ä fset.c + +{ObjDir}fset2.c.o Ä fset2.c + +{ObjDir}gen.c.o Ä gen.c + +{ObjDir}globals.c.o Ä globals.c + +{ObjDir}hash.c.o Ä hash.c + +{ObjDir}lex.c.o Ä lex.c + +{ObjDir}main.c.o Ä main.c + +{ObjDir}misc.c.o Ä misc.c + +{ObjDir}pred.c.o Ä pred.c + +{ObjDir}scan.c.o Ä scan.c + +{SetDir}{ObjDir}set.c.o Ä {SetDir}:set.c + +SetDir}{ObjDir} Ä {SetDir}: +{ObjDir} Ä : +{ObjDir} Ä {ObjDir} + +antlr ÄÄ {Objects} + Link {LinkOptions} -o antlr {Objects} + +antlr ÄÄ antlr.r + Rez antlr.r -o antlr -a + +Bootstrap Ä antlr.g + antlr -gh antlr.g + dlg -C2 parser.dlg scan.c + +Install Ä antlr + Duplicate antlr "{MPW}"Tools:antlr + diff --git a/antlr/watantlr.mak b/antlr/watantlr.mak new file mode 100755 index 0000000..37dc8c7 --- /dev/null +++ b/antlr/watantlr.mak @@ -0,0 +1,54 @@ +SET=..\support\set +PCCTS_H=..\h + +# +# Watcom +# +CC=wcl386 +ANTLR=..\bin\antlr +DLG=..\bin\dlg +CFLAGS= -I. -I$(SET) -I$(PCCTS_H) -DUSER_ZZSYN -DPC +OUT_OBJ = -o +OBJ_EXT = obj +LINK = wcl386 + +.c.obj : + $(CC) -c $[* $(CFLAGS) + +antlr.exe: antlr.obj scan.obj err.obj bits.obj build.obj fset2.obj & + fset.obj gen.obj globals.obj hash.obj lex.obj main.obj & + misc.obj set.obj pred.obj + $(LINK) -fe=antlr.exe *.obj -k14336 + copy *.exe ..\bin + +# *********** Target list of PC machines *********** +# +# Don't worry about the ambiguity messages coming from antlr +# for making antlr.c etc... [should be 10 of them, I think] +# +antlr.c stdpccts.h parser.dlg tokens.h err.c : antlr.g + $(ANTLR) antlr.g + +antlr.$(OBJ_EXT): antlr.c mode.h tokens.h + +scan.$(OBJ_EXT): scan.c mode.h tokens.h + +scan.c mode.h: parser.dlg + $(DLG) -C2 parser.dlg scan.c + +set.$(OBJ_EXT): $(SET)\set.c + $(CC) $(CFLAGS) -c set.$(OBJ_EXT) $(SET)\set.c + +# +# ****** These next targets are common to UNIX and PC world ******** +# + +#clean up all the intermediate files +clean: + del *.obj + +#remove everything in clean plus the PCCTS files generated +scrub: + del $(PCCTS_GEN) + del *.$(OBJ_EXT) +EOF_watantlr.mak diff --git a/dlg/dlg_p.g b/dlg/dlg_p.g new file mode 100755 index 0000000..750a7d0 --- /dev/null +++ b/dlg/dlg_p.g @@ -0,0 +1,450 @@ +/* This is the parser for the dlg + * This is a part of the Purdue Compiler Construction Tool Set + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * DLG 1.33 + * Will Cohen + * With mods by Terence Parr; AHPCRC, University of Minnesota + * 1989-1995 + */ + +#header << +#include +#include "dlg.h" +#ifdef MEMCHK +#include "trax.h" +#endif +>> + +<< +int action_no = 0; /* keep track of actions outputed */ +int nfa_allocated = 0; /* keeps track of number of nfa nodes */ +nfa_node **nfa_array = NULL;/* root of binary tree that stores nfa array */ +nfa_node nfa_model_node; /* model to initialize new nodes */ +set used_chars; /* used to label trans. arcs */ +set used_classes; /* classes or chars used to label trans. arcs */ +set normal_chars; /* mask to get rid elements that aren't used + in set */ +int flag_paren = FALSE; +int flag_brace = FALSE; +int mode_counter = 0; /* keep track of number of %%names */ + +>> + +#lexaction << +int func_action; /* should actions be turned into functions?*/ +int lex_mode_counter = 0; /* keeps track of the number of %%names */ +>> + +#token "[\r\t\ ]+" << zzskip(); >> /* Ignore white */ +#token "\n" << zzline++; zzskip(); DAWDLE; >> /* Track Line # */ +#token L_EOF "\@" +#token PER_PER "\%\%" +#token NAME_PER_PER "\%\%[a-zA-Z_][a-zA-Z0-9_]*" + << p_mode_def(&zzlextext[2],lex_mode_counter++); >> +#token ACTION "\<\<" + << if (func_action) + fprintf(OUT,"\n%s %sact%d()\n{ ", + gen_cpp?"ANTLRTokenType":"static void", + gen_cpp?ClassName("::"):"", ++action_no); + zzmode(ACT); zzskip(); + >> +#token GREAT_GREAT "\>\>" +#token L_BRACE "\{" +#token R_BRACE "\}" +#token L_PAR "\(" +#token R_PAR "\)" +#token L_BRACK "\[" +#token R_BRACK "\]" +#token ZERO_MORE "\*" +#token ONE_MORE "\+" +#token OR "\|" +#token RANGE "\-" +#token NOT "\~" +#token OCTAL_VALUE "\\0[0-7]*" + << {int t; sscanf(&zzlextext[1],"%o",&t); zzlextext[0] = t;}>> +#token HEX_VALUE "\\0[Xx][0-9a-fA-F]+" + << {int t; sscanf(&zzlextext[3],"%x",&t); zzlextext[0] = t;}>> +#token DEC_VALUE "\\[1-9][0-9]*" + << {int t; sscanf(&zzlextext[1],"%d",&t); zzlextext[0] = t;}>> +#token TAB "\\t" << zzlextext[0] = '\t';>> +#token NL "\\n" << zzlextext[0] = '\n';>> +#token CR "\\r" << zzlextext[0] = '\r';>> +#token BS "\\b" << zzlextext[0] = '\b';>> +/* NOTE: this takes ANYTHING after the \ */ +#token LIT "\\~[tnrb]" << zzlextext[0] = zzlextext[1];>> +/* NOTE: this takes ANYTHING that doesn't match the other tokens */ +#token REGCHAR "~[\\]" + + +grammar : << p_head(); p_class_hdr(); func_action = FALSE;>> (ACTION)* + <> + start_states + << func_action = FALSE; p_tables(); p_tail(); >> + (ACTION)* "@" + ; + +start_states : ( PER_PER do_conversion + | NAME_PER_PER do_conversion (NAME_PER_PER do_conversion)*) + PER_PER + ; + +do_conversion : <> + rule_list + << + dfa_class_nop[mode_counter] = + relabel($1.l,comp_level); + if (comp_level) + p_shift_table(mode_counter); + dfa_basep[mode_counter] = dfa_allocated+1; + make_dfa_model_node(dfa_class_nop[mode_counter]); + nfa_to_dfa($1.l); + ++mode_counter; + func_action = FALSE; +#ifdef HASH_STAT + fprint_hash_stats(stderr); +#endif + >> + ; + +rule_list : rule <<$$.l=$1.l; $$.r=$1.r;>> + (rule + <<{nfa_node *t1; + t1 = new_nfa_node(); + (t1)->trans[0]=$$.l; + (t1)->trans[1]=$1.l; + /* all accept nodes "dead ends" */ + $$.l=t1; $$.r=NULL; + } + >> + )* + | /* empty */ + <<$$.l = new_nfa_node(); $$.r = NULL; + warning("no regular expressions", zzline); + >> + ; + +rule : reg_expr ACTION + <<$$.l=$1.l; $$.r=$1.r; ($1.r)->accept=action_no;>> + | ACTION + <<$$.l = NULL; $$.r = NULL; + error("no expression for action ", zzline); + >> + ; + +reg_expr : and_expr <<$$.l=$1.l; $$.r=$1.r;>> + (OR and_expr + <<{nfa_node *t1, *t2; + t1 = new_nfa_node(); t2 = new_nfa_node(); + (t1)->trans[0]=$$.l; + (t1)->trans[1]=$2.l; + ($$.r)->trans[1]=t2; + ($2.r)->trans[1]=t2; + $$.l=t1; $$.r=t2; + } + >> + )* + ; + +and_expr : repeat_expr <<$$.l=$1.l; $$.r=$1.r;>> + (repeat_expr <<($$.r)->trans[1]=$1.l; $$.r=$1.r;>>)* + ; + +repeat_expr : expr <<$$.l=$1.l; $$.r=$1.r;>> + { ZERO_MORE + <<{ nfa_node *t1,*t2; + ($$.r)->trans[0] = $$.l; + t1 = new_nfa_node(); t2 = new_nfa_node(); + t1->trans[0]=$$.l; + t1->trans[1]=t2; + ($$.r)->trans[1]=t2; + $$.l=t1;$$.r=t2; + } + >> + | ONE_MORE + <<($$.r)->trans[0] = $$.l;>> + } + | ZERO_MORE + << error("no expression for *", zzline);>> + | ONE_MORE + << error("no expression for +", zzline);>> + ; + +expr : << $$.l = new_nfa_node(); $$.r = new_nfa_node(); >> + L_BRACK atom_list R_BRACK + << + ($$.l)->trans[0] = $$.r; + ($$.l)->label = set_dup($2.label); + set_orin(&used_chars,($$.l)->label); + >> + | NOT L_BRACK atom_list R_BRACK + << + ($$.l)->trans[0] = $$.r; + ($$.l)->label = set_dif(normal_chars,$3.label); + set_orin(&used_chars,($$.l)->label); + >> + | L_PAR reg_expr R_PAR + << + ($$.l)->trans[0] = $2.l; + ($2.r)->trans[1] = $$.r; + >> + | L_BRACE reg_expr R_BRACE + << + ($$.l)->trans[0] = $2.l; + ($$.l)->trans[1] = $$.r; + ($2.r)->trans[1] = $$.r; + >> + | atom + << + ($$.l)->trans[0] = $$.r; + ($$.l)->label = set_dup($1.label); + set_orin(&used_chars,($$.l)->label); + >> + ; + +atom_list : << set_free($$.label); >> + (near_atom <>)* + ; + +near_atom : << register int i; + register int i_prime; + >> + anychar + <<$$.letter=$1.letter; $$.label=set_of($1.letter); + i_prime = $1.letter + MIN_CHAR; + if (case_insensitive && islower(i_prime)) + set_orel(toupper(i_prime)-MIN_CHAR, + &($$.label)); + if (case_insensitive && isupper(i_prime)) + set_orel(tolower(i_prime)-MIN_CHAR, + &($$.label)); + >> + { RANGE anychar + << if (case_insensitive){ + i_prime = $$.letter+MIN_CHAR; + $$.letter = (islower(i_prime) ? + toupper(i_prime) : i_prime)-MIN_CHAR; + i_prime = $2.letter+MIN_CHAR; + $2.letter = (islower(i_prime) ? + toupper(i_prime) : i_prime)-MIN_CHAR; + } + /* check to see if range okay */ + if ($$.letter > $2.letter){ + error("invalid range ", zzline); + } + for (i=$$.letter; i<= (int)$2.letter; ++i){ + set_orel(i,&($$.label)); + i_prime = i+MIN_CHAR; + if (case_insensitive && islower(i_prime)) + set_orel(toupper(i_prime)-MIN_CHAR, + &($$.label)); + if (case_insensitive && isupper(i_prime)) + set_orel(tolower(i_prime)-MIN_CHAR, + &($$.label)); + } + >> + } + ; + +atom : << register int i_prime;>> + anychar + <<$$.label = set_of($1.letter); + i_prime = $1.letter + MIN_CHAR; + if (case_insensitive && islower(i_prime)) + set_orel(toupper(i_prime)-MIN_CHAR, + &($$.label)); + if (case_insensitive && isupper(i_prime)) + set_orel(tolower(i_prime)-MIN_CHAR, + &($$.label)); + >> + ; + +anychar : REGCHAR <<$$.letter = $1.letter - MIN_CHAR;>> + | OCTAL_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> + | HEX_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> + | DEC_VALUE <<$$.letter = $1.letter - MIN_CHAR;>> + | TAB <<$$.letter = $1.letter - MIN_CHAR;>> + | NL <<$$.letter = $1.letter - MIN_CHAR;>> + | CR <<$$.letter = $1.letter - MIN_CHAR;>> + | BS <<$$.letter = $1.letter - MIN_CHAR;>> + | LIT <<$$.letter = $1.letter - MIN_CHAR;>> + /* NOTE: LEX_EOF is ALWAYS shifted to 0 = MIN_CHAR - MIN_CHAR*/ + | L_EOF <<$$.letter = 0;>> + ; + +<> + +#lexclass ACT +#token "@" << error("unterminated action", zzline); zzmode(START); >> +#token ACTION "\>\>" + << if (func_action) fprintf(OUT,"}\n\n"); + zzmode(START); + >> +#token "\>" << putc(zzlextext[0], OUT); zzskip(); >> +#token "\\\>" << putc('>', OUT); zzskip(); >> +#token "\\" << putc('\\', OUT); zzskip(); >> +#token "\n" << putc(zzlextext[0], OUT); ++zzline; zzskip(); >> +#token "~[\>\\@\n]+" << fprintf(OUT, "%s", &(zzlextext[0])); zzskip(); >> + +<< +/* adds a new nfa to the binary tree and returns a pointer to it */ +nfa_node *new_nfa_node() +{ + register nfa_node *t; + static int nfa_size=0; /* elements nfa_array[] can hold */ + + ++nfa_allocated; + if (nfa_size<=nfa_allocated){ + /* need to redo array */ + if (!nfa_array){ + /* need some to do inital allocation */ + nfa_size=nfa_allocated+NFA_MIN; + nfa_array=(nfa_node **) malloc(sizeof(nfa_node*)* + nfa_size); + }else{ + /* need more space */ + nfa_size=2*(nfa_allocated+1); + nfa_array=(nfa_node **) realloc(nfa_array, + sizeof(nfa_node*)*nfa_size); + } + } + /* fill out entry in array */ + t = (nfa_node*) malloc(sizeof(nfa_node)); + nfa_array[nfa_allocated] = t; + *t = nfa_model_node; + t->node_no = nfa_allocated; + return t; +} + + +/* initialize the model node used to fill in newly made nfa_nodes */ +void +make_nfa_model_node() +{ + nfa_model_node.node_no = -1; /* impossible value for real nfa node */ + nfa_model_node.nfa_set = 0; + nfa_model_node.accept = 0; /* error state default*/ + nfa_model_node.trans[0] = NULL; + nfa_model_node.trans[1] = NULL; + nfa_model_node.label = empty; +} +>> + +<< +#ifdef DEBUG + +/* print out the pointer value and the node_number */ +fprint_dfa_pair(f, p) +FILE *f; +nfa_node *p; +{ + if (p){ + fprintf(f, "%x (%d)", p, p->node_no); + }else{ + fprintf(f, "(nil)"); + } +} + +/* print out interest information on a set */ +fprint_set(f,s) +FILE *f; +set s; +{ + unsigned int *x; + + fprintf(f, "n = %d,", s.n); + if (s.setword){ + fprintf(f, "setword = %x, ", s.setword); + /* print out all the elements in the set */ + x = set_pdq(s); + while (*x!=nil){ + fprintf(f, "%d ", *x); + ++x; + } + }else{ + fprintf(f, "setword = (nil)"); + } +} + +/* code to be able to dump out the nfas + return 0 if okay dump + return 1 if screwed up + */ +int dump_nfas(first_node, last_node) +int first_node; +int last_node; +{ + register int i; + nfa_node *t; + + for (i=first_node; i<=last_node; ++i){ + t = NFA(i); + if (!t) break; + fprintf(stderr, "nfa_node %d {\n", t->node_no); + fprintf(stderr, "\n\tnfa_set = %d\n", t->nfa_set); + fprintf(stderr, "\taccept\t=\t%d\n", t->accept); + fprintf(stderr, "\ttrans\t=\t("); + fprint_dfa_pair(stderr, t->trans[0]); + fprintf(stderr, ","); + fprint_dfa_pair(stderr, t->trans[1]); + fprintf(stderr, ")\n"); + fprintf(stderr, "\tlabel\t=\t{ "); + fprint_set(stderr, t->label); + fprintf(stderr, "\t}\n"); + fprintf(stderr, "}\n\n"); + } + return 0; +} +#endif +>> + +<< +/* DLG-specific syntax error message generator + * (define USER_ZZSYN when compiling so don't get 2 definitions) + */ +void +#ifdef __STDC__ +zzsyn(char *text, int tok, char *egroup, SetWordType *eset, int etok, int k, char *bad_text) +#else +zzsyn(text, tok, egroup, eset, etok, k, bad_text) +char *text, *egroup, *bad_text; +int tok; +int etok; +int k; +SetWordType *eset; +#endif +{ + fprintf(stderr, ErrHdr, file_str[0]!=NULL?file_str[0]:"stdin", zzline); + fprintf(stderr, " syntax error at \"%s\"", (tok==zzEOF_TOKEN)?"EOF":text); + if ( !etok && !eset ) {fprintf(stderr, "\n"); return;} + if ( k==1 ) fprintf(stderr, " missing"); + else + { + fprintf(stderr, "; \"%s\" not", bad_text); + if ( zzset_deg(eset)>1 ) fprintf(stderr, " in"); + } + if ( zzset_deg(eset)>0 ) zzedecode(eset); + else fprintf(stderr, " %s", zztokens[etok]); + if ( strlen(egroup) > (size_t)0 ) fprintf(stderr, " in %s", egroup); + fprintf(stderr, "\n"); +} +>> diff --git a/dlg/err.c b/dlg/err.c new file mode 100755 index 0000000..cb23702 --- /dev/null +++ b/dlg/err.c @@ -0,0 +1,83 @@ +/* + * A n t l r S e t s / E r r o r F i l e H e a d e r + * + * Generated from: dlg_p.g + * + * Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995 + * Parr Research Corporation + * with Purdue University Electrical Engineering + * With AHPCRC, University of Minnesota + * ANTLR Version 1.32 + */ + +#include +#define ANTLR_VERSION 132 + +#include +#include "dlg.h" +#ifdef MEMCHK +#include "trax.h" +#endif +#define zzSET_SIZE 8 +#include "antlr.h" +#include "tokens.h" +#include "dlgdef.h" +#include "err.h" + +ANTLRChar *zztokens[34]={ + /* 00 */ "Invalid", + /* 01 */ "@", + /* 02 */ "[\\r\\t\\ ]+", + /* 03 */ "\\n", + /* 04 */ "L_EOF", + /* 05 */ "PER_PER", + /* 06 */ "NAME_PER_PER", + /* 07 */ "ACTION", + /* 08 */ "GREAT_GREAT", + /* 09 */ "L_BRACE", + /* 10 */ "R_BRACE", + /* 11 */ "L_PAR", + /* 12 */ "R_PAR", + /* 13 */ "L_BRACK", + /* 14 */ "R_BRACK", + /* 15 */ "ZERO_MORE", + /* 16 */ "ONE_MORE", + /* 17 */ "OR", + /* 18 */ "RANGE", + /* 19 */ "NOT", + /* 20 */ "OCTAL_VALUE", + /* 21 */ "HEX_VALUE", + /* 22 */ "DEC_VALUE", + /* 23 */ "TAB", + /* 24 */ "NL", + /* 25 */ "CR", + /* 26 */ "BS", + /* 27 */ "LIT", + /* 28 */ "REGCHAR", + /* 29 */ "\\>", + /* 30 */ "\\\\>", + /* 31 */ "\\", + /* 32 */ "\\n", + /* 33 */ "~[\\>\\@\\n]+" +}; +SetWordType zzerr1[8] = {0x60,0x0,0x0,0x0, 0x0,0x0,0x0,0x0}; +SetWordType zzerr2[8] = {0xf0,0xaa,0xf9,0x1f, 0x0,0x0,0x0,0x0}; +SetWordType zzerr3[8] = {0x90,0xaa,0xf9,0x1f, 0x0,0x0,0x0,0x0}; +SetWordType setwd1[34] = {0x0,0x3,0x0,0x0,0x98,0x64,0x64, + 0x1a,0x0,0x98,0x0,0x98,0x0,0x98,0x0, + 0x98,0x98,0x0,0x0,0x98,0x98,0x98,0x98, + 0x98,0x98,0x98,0x98,0x98,0x98,0x0,0x0, + 0x0,0x0,0x0}; +SetWordType zzerr4[8] = {0x10,0xaa,0xf9,0x1f, 0x0,0x0,0x0,0x0}; +SetWordType zzerr5[8] = {0x10,0x2a,0xf8,0x1f, 0x0,0x0,0x0,0x0}; +SetWordType setwd2[34] = {0x0,0x0,0x0,0x0,0xf5,0x1,0x1, + 0xab,0x0,0xb5,0xaa,0xb5,0xaa,0xb5,0x0, + 0xa5,0xa5,0xa8,0x0,0xb5,0xf5,0xf5,0xf5, + 0xf5,0xf5,0xf5,0xf5,0xf5,0xf5,0x0,0x0, + 0x0,0x0,0x0}; +SetWordType zzerr6[8] = {0x10,0x0,0xf0,0x1f, 0x0,0x0,0x0,0x0}; +SetWordType setwd3[34] = {0x0,0x0,0x0,0x0,0x1d,0x0,0x0, + 0x18,0x0,0x18,0x18,0x18,0x18,0x18,0x16, + 0x18,0x18,0x18,0x10,0x18,0x1d,0x1d,0x1d, + 0x1d,0x1d,0x1d,0x1d,0x1d,0x1d,0x0,0x0, + 0x0,0x0,0x0}; diff --git a/dlg/main.c b/dlg/main.c new file mode 100755 index 0000000..556398f --- /dev/null +++ b/dlg/main.c @@ -0,0 +1,207 @@ +/* Main function for dlg version + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * DLG 1.33 + * Will Cohen + * With mods by Terence Parr; AHPCRC, University of Minnesota + * 1989-1995 + */ + +#include +#include "stdpccts.h" + +char program[] = "dlg"; +char version[] = "1.33"; +int numfiles = 0; +char *file_str[2] = {NULL, NULL}; +char *mode_file = "mode.h"; +char *class_name = DEFAULT_CLASSNAME; +char *outdir = TopDirectory; + +/* Option variables */ +int comp_level = 0; +int interactive = FALSE; +int case_insensitive = FALSE; +int warn_ambig = FALSE; +int gen_cpp = FALSE; + +/* Option List Stuff */ +void p_comp0() {comp_level = 0;} +void p_comp1() {comp_level = 1;} +void p_comp2() {comp_level = 2;} +void p_stdio() { file_str[numfiles++] = NULL;} +void p_file(s) char *s; { file_str[numfiles++] = s;} +void p_cl_name(s,t) + char *s, *t; + { + if ( gen_cpp ) { + class_name = t; + } + else { + warning("-cl only valid in C++ mode; -cl ignored...",0); + } + } +void p_mode_file(s,t) char *s,*t;{mode_file=t;} +void p_outdir(s,t) char *s,*t;{outdir=t;} +void p_ansi() {gen_ansi = TRUE;} +void p_interactive() {interactive = TRUE;} +void p_case_s() { case_insensitive = FALSE; } +void p_case_i() { case_insensitive = TRUE; } +void p_warn_ambig() { warn_ambig = TRUE; } +void p_cpp() { gen_cpp = TRUE; } + +typedef struct { + char *option; + int arg; + void (*process)(); + char *descr; + } Opt; + +Opt options[] = { + { "-CC", 0, p_cpp, "Generate C++ output" }, + { "-C0", 0, p_comp0, "No compression (default)" }, + { "-C1", 0, p_comp1, "Compression level 1" }, + { "-C2", 0, p_comp2, "Compression level 2" }, + { "-ga", 0, p_ansi, "Generate ansi C"}, + { "-Wambiguity", 0, p_warn_ambig, "Warn if expressions ambiguous"}, + { "-m", 1, p_mode_file, "Rename lexical mode output file"}, + { "-i", 0, p_interactive, "Build interactive scanner"}, + { "-ci", 0, p_case_i, "Make lexical analyzer case insensitive"}, + { "-cl", 1, p_cl_name, "Rename lexer class (DLGLexer); only used for -CC"}, + { "-cs", 0, p_case_s, "Make lexical analyzer case sensitive (default)"}, + { "-o", 1, p_outdir, OutputDirectoryOption}, + { "-", 0, p_stdio, "Use standard i/o rather than file"}, + { "*", 0, p_file, ""}, /* anything else is a file */ + { NULL, 0, NULL } +}; + +int main(argc,argv) +int argc; +char *argv[]; +{ + init(); + fprintf(stderr, "%s Version %s 1989-1995\n", &(program[0]), + &(version[0])); + if ( argc == 1 ) + { + Opt *p = options; + fprintf(stderr, "%s [options] f1 f2 ... fn\n",argv[0]); + while ( *(p->option) != '*' ) + { + fprintf(stderr, "\t%s %s\t%s\n", + p->option, + (p->arg)?"___":" ", + p->descr); + p++; + } + }else{ + ProcessArgs(argc-1, &(argv[1]), options); + if (input_stream = read_stream(file_str[0])) { + /* don't overwrite unless input okay */ + if ( gen_cpp ) { + output_stream = write_stream(ClassName(CPP_FILE_SUFFIX)); + if ( file_str[1]!=NULL ) { + warning("output file implicit in C++ mode; ignored...",0); + } + class_stream = write_stream(ClassName(".h")); + mode_stream = class_stream; + } + else { + output_stream = write_stream(file_str[1]); + mode_stream = write_stream(mode_file); + } + } + /* make sure that error reporting routines in grammar + know what the file really is */ + /* make sure that reading and writing somewhere */ + if (input_stream && output_stream && mode_stream){ + ANTLR(grammar(), input_stream); + } + p_class_def(); + } + if ( output_stream!=NULL ) fclose(output_stream); + if ( !gen_cpp && mode_stream!=NULL ) fclose(mode_stream); + if ( class_stream!=NULL ) fclose(class_stream); + exit(PCCTS_EXIT_SUCCESS); +} + + +ProcessArgs(argc, argv, options) +int argc; +char **argv; +Opt *options; +{ + Opt *p; + + while ( argc-- > 0 ) + { + p = options; + while ( p->option != NULL ) + { + if ( strcmp(p->option, "*") == 0 || + strcmp(p->option, *argv) == 0 ) + { + if ( p->arg ) + { + (*p->process)( *argv, *(argv+1) ); + argv++; + argc--; + } + else + (*p->process)( *argv ); + break; + } + p++; + } + argv++; + } +} + +/* initialize all the variables */ +init() +{ + register int i; + + special_inits(); + + used_chars = empty; + used_classes = empty; + /* make the valid character set */ + normal_chars = empty; + /* NOTE: MIN_CHAR is EOF */ + /* NOTE: EOF is not quite a valid char, it is special. Skip it*/ + for (i = 1; i