From 4c1585478dbadc88148f5417d75d6f58b8042c82 Mon Sep 17 00:00:00 2001 From: Terence Parr <> Date: Thu, 5 Oct 1995 19:39:58 -0500 Subject: [PATCH] auto commit for import --- antlr/generic.h | 228 ++++++++ antlr/globals.c | 386 ++++++++++++++ antlr/hash.c | 229 ++++++++ antlr/lex.c | 783 ++++++++++++++++++++++++++++ antlr/main.c | 1220 +++++++++++++++++++++++++++++++++++++++++++ antlr/misc.c | 1325 +++++++++++++++++++++++++++++++++++++++++++++++ antlr/pred.c | 607 ++++++++++++++++++++++ 7 files changed, 4778 insertions(+) create mode 100755 antlr/generic.h create mode 100755 antlr/globals.c create mode 100755 antlr/hash.c create mode 100755 antlr/lex.c create mode 100755 antlr/main.c create mode 100755 antlr/misc.c create mode 100755 antlr/pred.c diff --git a/antlr/generic.h b/antlr/generic.h new file mode 100755 index 0000000..68fd4d2 --- /dev/null +++ b/antlr/generic.h @@ -0,0 +1,228 @@ +/* + * generic.h -- generic include stuff for new PCCTS ANTLR. + * + * $Id: generic.h,v 1.2 95/06/15 18:06:55 parrt Exp $ + * $Revision: 1.2 $ + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#define StrSame 0 + +#define DefaultParserName "zzparser" + +#define ZZLEXBUFSIZE 4000 + +/* Tree/FIRST/FOLLOW defines -- valid only after all grammar has been read */ +#define ALT TokenNum+1 +#define SET TokenNum+2 +#define TREE_REF TokenNum+3 + + /* E r r o r M a c r o s */ + +#define fatal(err) fatalFL(err, __FILE__, __LINE__) +#define fatal_internal(err) fatal_intern(err, __FILE__, __LINE__) + + +#define eMsg1(s,a) eMsg3(s,a,NULL,NULL) +#define eMsg2(s,a,b) eMsg3(s,a,b,NULL) + + /* S a n i t y C h e c k i n g */ + +#ifndef require +#define require(expr, err) {if ( !(expr) ) fatal_internal(err);} +#endif + + /* L i s t N o d e s */ + +typedef struct _ListNode { + void *elem; /* pointer to any kind of element */ + struct _ListNode *next; + } ListNode; + +/* Define a Cycle node which is used to track lists of cycles for later + * reconciliation by ResolveFoCycles(). + */ +typedef struct _c { + int croot; /* cycle root */ + set cyclicDep; /* cyclic dependents */ + unsigned deg; /* degree of FOLLOW set of croot */ + } Cycle; + +typedef struct _e { + int tok; /* error class name == TokenStr[tok] */ + ListNode *elist; /* linked list of elements in error set */ + set eset; + int setdeg; /* how big is the set */ + int lexclass; /* which lex class is it in? */ + } ECnode; + +typedef struct _TCnode { + int tok; /* token class name */ + ListNode *tlist; /* linked list of elements in token set */ + set tset; + int lexclass; /* which lex class is it in? */ + unsigned char dumped; /* this def has been been dumped */ + unsigned setnum; /* which set number is this guy? (if dumped) */ + } TCnode; + +typedef struct _ft { + char *token; /* id of token type to remap */ + int tnum; /* move token type to which token position */ + } ForcedToken; + +#define newListNode (ListNode *) calloc(1, sizeof(ListNode)); +#define newCycle (Cycle *) calloc(1, sizeof(Cycle)); +#define newECnode (ECnode *) calloc(1, sizeof(ECnode)); +#define newTCnode (TCnode *) calloc(1, sizeof(TCnode)); + + + /* H a s h T a b l e E n t r i e s */ + +typedef struct _t { /* Token name or expression */ + char *str; + struct _t *next; + int token; /* token number */ + unsigned char classname; /* is it a err/tok class name or token */ + TCnode *tclass; /* ptr to token class */ + char *action; + } TermEntry; + +typedef struct _r { /* Rule name and ptr to start of rule */ + char *str; + struct _t *next; + int rulenum; /* RulePtr[rulenum]== ptr to RuleBlk junction */ + unsigned char noAST;/* gen AST construction code? (def==gen code) */ + char *egroup; /* which error group (err reporting stuff) */ + ListNode *el_labels;/* list of element labels ref in all of rule */ + unsigned char has_rule_exception; + } RuleEntry; + +typedef struct _f { /* cache Fi/Fo set */ + char *str; /* key == (rulename, computation, k) */ + struct _f *next; + set fset; /* First/Follow of rule */ + set rk; /* set of k's remaining to be done after ruleref */ + int incomplete; /* only w/FOLLOW sets. Use only if complete */ + } CacheEntry; + +typedef struct _LabelEntry { /* element labels */ + char *str; + struct _f *next; + Node *elem; /* which element does it point to? */ + ExceptionGroup *ex_group; + /* Is there an exception attached to label? */ + } LabelEntry; + +typedef struct _SignalEntry { + char *str; + struct _f *next; + int signum; /* unique signal number */ + } SignalEntry; + +#define newTermEntry(s) (TermEntry *) newEntry(s, sizeof(TermEntry)) +#define newRuleEntry(s) (RuleEntry *) newEntry(s, sizeof(RuleEntry)) +#define newCacheEntry(s) (CacheEntry *) newEntry(s, sizeof(CacheEntry)) +#define newLabelEntry(s) (LabelEntry *) newEntry(s, sizeof(LabelEntry)) +#define newSignalEntry(s) (SignalEntry *) newEntry(s, sizeof(SignalEntry)) + + +typedef struct _UserAction { + char *action; + int file, line; + } UserAction; + + + /* L e x i c a l C l a s s */ + +/* to switch lex classes, switch ExprStr and Texpr (hash table) */ +typedef struct _lc { + char *classnum, **exprs; + Entry **htable; + } LClass; + +typedef struct _exprOrder { + char *expr; + int lclass; + } Expr; + + +typedef Graph Attrib; + + /* M a x i m u m s */ + +#ifndef HashTableSize +#define HashTableSize 253 +#endif +#ifndef StrTableSize +#define StrTableSize 15000 /* all tokens, nonterminals, rexprs stored here */ +#endif +#define MaxLexClasses 50 /* how many automatons */ +/* TokenStart and EofToken are ignored if #tokdefs meta-op is used */ +#define TokenStart 2 /* MUST be in 1 + EofToken */ +#define EofToken 1 /* Always predefined to be 1 */ +#define MaxNumFiles 20 +#define MaxFileName 300 /* largest file name size */ +#define MaxRuleName 100 /* largest rule name size */ +#define TSChunk 100 /* how much to expand TokenStr/ExprStr each time */ +#define TIChunk TSChunk /* expand TokenInd by same as TokenStr to mirror them */ +#define FoStackSize 100 /* deepest FOLLOW recursion possible */ + +#define NumPredefinedSignals 3 + + /* S t a n d a r d S i g n a l s */ + +#define sigNoSignal 0 +#define sigMismatchedToken 1 +#define sigNoViableAlt 2 +#define sigNoSemViableAlt 3 + + + +/* AST token types */ +#define ASTexclude 0 +#define ASTchild 1 +#define ASTroot 2 +#define ASTinclude 3 /* include subtree made by rule ref */ + + +#define PredictionVariable "zzpr_expr" +#define PredictionLexClassSuffix "_zzpred" + +#define WildCardString "WildCard" + +#ifndef ANTLRm +#define ANTLRm(st, f, _m) zzbufsize = ZZLEXBUFSIZE;\ + zzmode(_m); \ + zzenterANTLR(f); \ + st; ++zzasp; \ + zzleaveANTLR(f); +#endif + +#include "proto.h" +#include "config.h" +#include diff --git a/antlr/globals.c b/antlr/globals.c new file mode 100755 index 0000000..8795fd9 --- /dev/null +++ b/antlr/globals.c @@ -0,0 +1,386 @@ +/* + * globals.c -- File containing all variables/tables visible to all files. + * + * $Id: globals.c,v 1.5 95/10/05 11:57:02 parrt Exp $ + * $Revision: 1.5 $ + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#ifdef __cplusplus +#ifndef __STDC__ +#define __STDC__ +#endif +#endif +#include "set.h" +#include "syn.h" +#include "hash.h" +#include "generic.h" + +char Version[] = "1.33" ; /* PCCTS version number */ +char VersionDef[] = "133"; /* same (except int equiv for preproc symbol) */ +char LexStartSymbol[] = "START";/* Name of starting lexical class/automaton */ + +char *RemapFileName = "remap.h"; +char *DlgFileName = "parser.dlg"; +char *DefFileName = "tokens.h"; +char *ErrFileName = "err.c"; +char *ModeFileName = "mode.h"; + +char *ParserName = DefaultParserName; + +/* list of PCCTS supplied support symbols; these are renamed when more than + * one ANTLR-generated parsers are linked together to avoid name conflicts. + * Can't use '##' ANSIC preprocessor concat operator with K&R and: + * #define zzskip zzparser ## skip + * will not work for ANSI/C++ as 'zzparserskip' is created w/o zzparser + * being substituted--ack!!! + */ +char *StandardSymbols[] = { +/* ANTLR stuff */ + "zzStackOvfMsg", + "zzasp", + "zzaStack", + "inf_tokens", + "inf_text", + "inf_text_buffer", + "inf_text_buffer_ptr", + "inf_text_buffer_size", + "inf_labase", + "inf_last", + "inf_lap", + "zztokenLA", + "zztextLA", + "zzlap", + "zzlabase", + "zztoktext", + "zztoken", + "zzdirty", + "zzguessing", + "zzguess_start", + "zzresynch", + "zzinf_tokens", + "zzinf_text", + "zzinf_text_buffer", + "zzinf_labase", + "zzinf_last", + "zzfill_inf_look", + "zzFAIL", + "zzsave_antlr_state", + "zzrestore_antlr_state", + "zzsyn", + "zzset_el", + "zzset_deg", + "zzedecode", + "_zzsetmatch", + "_zzmatch", + "_inf_zzgettok", + "zzconsumeUntil", + "zzconsumeUntilToken", + "_zzmatch_wsig", + "_zzsetmatch_wsig", + "_zzmatch_wdfltsig", + "_zzsetmatch_wdfltsig", + "zzdflthandlers", +/* DLG stuff */ + "zzreal_line", + "zzcharfull", + "zzerr", + "zzlextext", + "zzbegexpr", + "zzendexpr", + "zzbufsize", + "zzbegcol", + "zzendcol", + "zzline", + "zzchar", + "zzbufovf", + "zzrdstream", + "zzrdfunc", + "zzrdstr", + "zzclose_stream", + "zzsave_dlg_state", + "zzrestore_dlg_state", + "zzmode", + "zzskip", + "zzmore", + "zzreplchar", + "zzreplstr", + "zzgettok", + "zzadvance", + "zzerrstd", + "zzerr_in", + "zzconstr_attr", + "zzempty_attr", + "zzerraction", + "zztokens", /* list of token regular expressions */ + "dfa", + "accepts", + "actions", + NULL /* must be present */ +}; + +/* list of PCCTS supplied support functions; these are renamed when more than + * one ANTLR-generated parsers are linked together to avoid name conflicts. + */ +char *ASTSymbols[] = { + "AST", + "zzast_sp", + "zzastStack", + "zzlink", + "zzastnew", + "zzsubchild", + "zzsubroot", + "zzpre_ast", + "zzfree_ast", + "zztmake", + "zzdup_ast", + "zztfree", + "zzdouble_link", + NULL /* must be present */ +}; + +/* Current ambiguity examination information */ +int CurAmbigAlt1, CurAmbigAlt2, CurAmbigline, CurAmbigfile; +char *CurAmbigbtype; + + + /* M e t h o d T a b l e s */ +/* + * The following tables are used to fill syntax diagram nodes with the correct + * function pointers for computing FIRST sets and printing themselves. + */ + +/* fpTraverse[node type] == pointer to function that calculates trees + * representing the FIRST sets for that node (maintains spatial info). + * We use 'struct _tree' not 'tree' due to a g++ 2.4.3 bug. + */ +#ifdef __cplusplus +struct _tree *(*fpTraverse[NumNodeTypes+1])(... /* Node *, int, set * */) = { + NULL, + (struct _tree *(*)(...)) tJunc, + (struct _tree *(*)(...)) tRuleRef, + (struct _tree *(*)(...)) tToken, + (struct _tree *(*)(...)) tAction +}; +#else +Tree *(*fpTraverse[NumNodeTypes+1])() = { + NULL, + tJunc, + tRuleRef, + tToken, + tAction +}; +#endif + +/* fpReach[node type] == pointer to function that calculates FIRST set for + * that node. (r stands for reach). We use 'struct _set' not 'set' + * due to a g++ 2.4.3 bug. + */ +#ifdef __cplusplus +struct _set (*fpReach[NumNodeTypes+1])(... /* Node *, int, set * */) = { + NULL, + (struct _set (*)(...)) rJunc, + (struct _set (*)(...)) rRuleRef, + (struct _set (*)(...)) rToken, + (struct _set (*)(...)) rAction +}; +#else +set (*fpReach[NumNodeTypes+1])() = { + NULL, + rJunc, + rRuleRef, + rToken, + rAction +}; +#endif + +/* fpPrint[node type] == pointer to function that knows how to print that node. */ +#ifdef __cplusplus +void (*fpPrint[NumNodeTypes+1])(... /* Node * */) = { + NULL, + (void (*)(...)) pJunc, + (void (*)(...)) pRuleRef, + (void (*)(...)) pToken, + (void (*)(...)) pAction +}; +#else +void (*fpPrint[NumNodeTypes+1])() = { + NULL, + pJunc, + pRuleRef, + pToken, + pAction +}; +#endif + +char *decodeJType[] = { + "invalid", + "aSubBlk", + "aOptBlk", + "aLoopBlk", + "EndBlk", + "RuleBlk", + "Generic", + "EndRule", + "aPlusBlk", + "aLoopBegin" +}; + + + /* H a s h T a b l e s */ + +Entry **Tname, /* Table of all token names (maps name to tok num)*/ + **Texpr, /* Table of all token expressions + (maps expr to tok num) */ + **Rname, /* Table of all Rules (has ptr to start of rule) */ + **Fcache, /* Cache of First/Follow Computations */ + **Tcache; /* Tree cache; First/Follow for permute trees */ +Entry **Elabel; /* Table of all element label names */ +Entry **Sname; /* Signal names */ + + + /* V a r i a b l e s */ + +int EpToken=0; /* Imaginary Epsilon token number */ +int WildCardToken=0; +int CurFile= -1; /* Index into FileStr table */ +char *CurRule=NULL; /* Pointer to current rule name */ +RuleEntry *CurRuleNode=NULL;/* Pointer to current rule node in syntax tree */ +char *CurRetDef=NULL; /* Pointer to current return type definition */ +char *CurParmDef=NULL; /* Pointer to current parameter definition */ +Junction *CurRuleBlk=NULL; /* Pointer to current block node for enclosing block */ +ListNode *CurExGroups=NULL; /* Current list of exception groups for rule/alts */ +ListNode *CurElementLabels=NULL; +int CurBlockID=0; /* Unique int for each block */ +int CurAltNum=0; +Junction *CurAltStart = NULL; /* Junction node that starts the alt */ +int NumRules=0; /* Rules are from 1 to n */ +FILE *output=NULL; /* current parser output file */ +FILE *input=NULL; /* current grammar input file */ +char *FileStr[MaxNumFiles];/* Ptr to array of file names on command-line */ +int NumFiles=0; /* current grammar file number */ +#ifdef __cplusplus +void (**fpTrans)(...), /* array of ptrs to funcs that translate nodes */ + (**fpJTrans)(...); /* ... that translate junctions */ +#else +void (**fpTrans)(), /* array of ptrs to funcs that translate nodes */ + (**fpJTrans)(); /* ... that translate junctions */ +#endif +int **FoStack; /* Array of LL_k ptrs to stacks of rule numbers */ +int **FoTOS; /* FOLLOW stack top-of-stack pointers */ +Junction *SynDiag = NULL; /* Pointer to start of syntax diagram */ +int BlkLevel=1; /* Current block level. Set by antlr.g, used by + * scanner to translate $i.j attributes */ +set reserved_positions; /* set of token positions reserved by '#token T=i' cmds */ +set all_tokens; /* set of all token types */ +set imag_tokens; /* set of all imaginary token types (EpToken, errclasses...) */ +set tokclasses; /* set of all token class token types */ +ListNode *ForcedTokens = 0; /* list of token_id/token_num pairs to remap */ +ListNode *MetaTokenNodes=NULL; /* list of meta token refs such as token classes etc... */ +int *TokenInd=NULL; /* an indirection level between token num and position + * of that token def in TokenStr and ExprStr */ +int LastTokenCounted=0; /* ==TokenNum if no token renumbering (same as old TokenNum) */ +int TokenNum=TokenStart; +char **TokenStr=NULL; /* map token # to token name */ +char **ExprStr=NULL; /* map token # to expr */ +Junction **RulePtr=NULL; /* map rule # to RuleBlk node of rule */ +ListNode *ExprOrder=NULL; /* list of exprs as they are found in grammar */ +ListNode *BeforeActions=NULL;/* list of grammar actions before rules */ +ListNode *AfterActions=NULL;/* list of grammar actions after rules */ +ListNode *LexActions=NULL; /* list of lexical actions */ +ListNode **Cycles=NULL; /* list of cycles (for each k) found when + doing FOLLOWs */ +ListNode *eclasses=NULL; /* list of error classes */ +ListNode *tclasses=NULL; /* list of token classes */ +LClass lclass[MaxLexClasses]; /* array of lex class definitions */ +int CurrentLexClass; /* index into lclass */ +int NumLexClasses=0; /* in range 1..MaxLexClasses (init 0) */ + +char *HdrAction=NULL; /* action defined with #header */ +FILE *ErrFile; /* sets and error recovery stuff */ +FILE *DefFile=NULL; /* list of tokens, return value structs, setwd defs */ +int CannotContinue=FALSE; +int OutputLL_k = 1; /* LL_k for parsing must be power of 2 */ +int action_file; /* used to track start of action */ +int action_line; +int FoundGuessBlk=0; /* there is a (...)? block somewhere in grammar */ +int FoundException=0; /* there is an exception somewhere in grammar */ +int pLevel=0; /* print Level */ +int pAlt1,pAlt2; /* print "==>" in front of these alts */ + +/* C++ output stuff */ +FILE *Parser_h, /* where subclass of ANTLRParser goes */ + *Parser_c; /* where code for subclass of ANTLRParser goes */ +char Parser_h_Name[MaxFileName+1] = ""; +char Parser_c_Name[MaxFileName+1] = ""; + +/* list of actions inside the #class {...} defs */ +ListNode *class_before_actions=NULL; +ListNode *class_after_actions=NULL; + +char CurrentClassName[MaxRuleName]=""; +int no_classes_found=1; +char *UserTokenDefsFile; +int UserDefdTokens=0; /* found #tokdefs? */ +char *OutputDirectory=TopDirectory; +ExceptionGroup *DefaultExGroup = NULL; +int NumSignals = NumPredefinedSignals; +int ContextGuardTRAV=0; + + + /* C m d - L i n e O p t i o n s */ + +int LL_k=1; /* how many tokens of full lookahead */ +int CLL_k= -1; /* how many tokens of compressed lookahead */ +int PrintOut = FALSE; /* print out the grammar */ +int PrintAnnotate = FALSE;/* annotate printout with FIRST sets */ +int CodeGen=TRUE; /* Generate output code? */ +int LexGen=TRUE; /* Generate lexical files? (tokens.h, parser.dlg) */ +int GenAST=FALSE; /* Generate AST's? */ +int GenANSI=FALSE; /* Generate ANSI code where necessary */ +int GenExprSets=TRUE; /* use sets not (LA(1)==tok) expression lists */ +int GenCR=FALSE; /* Generate cross reference? */ +int GenLineInfo=FALSE; /* Generate # line "file" stuff? */ +int TraceGen=FALSE; /* Generate code to trace rule invocation */ +int elevel=1; /* error level for ambiguity messages */ +int GenEClasseForRules=0;/* don't generate eclass for each rule */ +int TreeResourceLimit= -1;/* don't limit tree resource */ +int DemandLookahead = 0;/* demand/delayed lookahead or not */ +char *RulePrefix = ""; /* prefix each generated rule with this */ +char *stdpccts = "stdpccts.h";/* where to generate std pccts include file */ +int GenStdPccts = 0; /* don't gen stdpccts.h? */ +int ParseWithPredicates = 1; +int WarningLevel = 1; +int HoistPredicateContext = 0; +int GenCC = 0; /* Generate C++ output */ + +/* DontCopyTokens and Pragma_DupLabeledTokens were a bad idea. I've just + turned them off rather than backpatching the code. Who knows? We + may need them in the future. + */ +int DontCopyTokens = 1; /* in C++, don't copy ANTLRToken passed to ANTLR */ diff --git a/antlr/hash.c b/antlr/hash.c new file mode 100755 index 0000000..6e8a278 --- /dev/null +++ b/antlr/hash.c @@ -0,0 +1,229 @@ +/* + * hash.c + * + * $Id: hash.c,v 1.3 95/10/05 11:57:07 parrt Exp $ + * $Revision: 1.3 $ + * + * Manage hash tables. + * + * The following functions are visible: + * + * char *mystrdup(char *); Make space and copy string + * Entry **newHashTable(); Create and return initialized hash table + * Entry *hash_add(Entry **, char *, Entry *) + * Entry *hash_get(Entry **, char *) + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#include +#include "config.h" +#ifdef __cplusplus +#ifndef __STDC__ +#define __STDC__ +#endif +#endif +#include "hash.h" +#ifdef __STDC__ +#include +#else +#ifdef VAXC +#include +#else +#include +#endif +#endif +#include + +#define StrSame 0 + +#define fatal(err) \ + {fprintf(stderr, "%s(%d):", __FILE__, __LINE__); \ + fprintf(stderr, " %s\n", err); exit(PCCTS_EXIT_FAILURE);} +#define require(expr, err) {if ( !(expr) ) fatal(err);} + +static unsigned size = HashTableSize; +static char *strings = NULL; +static char *strp; +static unsigned strsize = StrTableSize; + +/* create the hash table and string table for terminals (string table only once) */ +Entry ** +#ifdef __STDC__ +newHashTable( void ) +#else +newHashTable( ) +#endif +{ + Entry **table; + + table = (Entry **) calloc(size, sizeof(Entry *)); + require( table != NULL, "cannot allocate hash table"); + if ( strings == NULL ) + { + strings = (char *) calloc(strsize, sizeof(char)); + require( strings != NULL, "cannot allocate string table"); + strp = strings; + } + return table; +} + +void +#ifdef __STDC__ +killHashTable( Entry **table ) +#else +killHashTable( table ) +Entry **table; +#endif +{ + /* for now, just free table, forget entries */ + free( table ); +} + +/* Given a table, add 'rec' with key 'key' (add to front of list). return ptr to entry */ +Entry * +#ifdef __STDC__ +hash_add( Entry **table, char *key, Entry *rec ) +#else +hash_add( table, key, rec ) +Entry **table; +char *key; +Entry *rec; +#endif +{ + unsigned h=0; + char *p=key; + extern Entry *Globals; + require(table!=NULL && key!=NULL && rec!=NULL, "add: invalid addition"); + + Hash(p,h,size); + rec->next = table[h]; /* Add to singly-linked list */ + table[h] = rec; + return rec; +} + +/* Return ptr to 1st entry found in table under key (return NULL if none found) */ +Entry * +#ifdef __STDC__ +hash_get( Entry **table, char *key ) +#else +hash_get( table, key ) +Entry **table; +char *key; +#endif +{ + unsigned h=0; + char *p=key; + Entry *q; +/* require(table!=NULL && key!=NULL, "get: invalid table and/or key");*/ + if ( !(table!=NULL && key!=NULL) ) *((char *) 34) = 3; + + Hash(p,h,size); + for (q = table[h]; q != NULL; q = q->next) + { + if ( strcmp(key, q->str) == StrSame ) return( q ); + } + return( NULL ); +} + +#ifdef DEBUG_HASH +void +#ifdef __STDC__ +hashStat( Entry **table ) +#else +hashStat( table ) +Entry **table; +#endif +{ + static unsigned short count[20]; + int i,n=0,low=0, hi=0; + Entry **p; + float avg=0.0; + + for (i=0; i<20; i++) count[i] = 0; + for (p=table; p<&(table[size]); p++) + { + Entry *q = *p; + int len; + + if ( q != NULL && low==0 ) low = p-table; + len = 0; + if ( q != NULL ) fprintf(stderr, "[%d]", p-table); + while ( q != NULL ) + { + len++; + n++; + fprintf(stderr, " %s", q->str); + q = q->next; + if ( q == NULL ) fprintf(stderr, "\n"); + } + count[len]++; + if ( *p != NULL ) hi = p-table; + } + + fprintf(stderr, "Storing %d recs used %d hash positions out of %d\n", + n, size-count[0], size); + fprintf(stderr, "%f %% utilization\n", + ((float)(size-count[0]))/((float)size)); + for (i=0; i<20; i++) + { + if ( count[i] != 0 ) + { + avg += (((float)(i*count[i]))/((float)n)) * i; + fprintf(stderr, "Bucket len %d == %d (%f %% of recs)\n", + i, count[i], ((float)(i*count[i]))/((float)n)); + } + } + fprintf(stderr, "Avg bucket length %f\n", avg); + fprintf(stderr, "Range of hash function: %d..%d\n", low, hi); +} +#endif + +/* Add a string to the string table and return a pointer to it. + * Bump the pointer into the string table to next avail position. + */ +char * +#ifdef __STDC__ +mystrdup( char *s ) +#else +mystrdup( s ) +char *s; +#endif +{ + char *start=strp; + require(s!=NULL, "mystrdup: NULL string"); + + while ( *s != '\0' ) + { + require( strp <= &(strings[strsize-2]), + "string table overflow\nIncrease StrTableSize in hash.h and recompile hash.c\n"); + *strp++ = *s++; + } + *strp++ = '\0'; + + return( start ); +} diff --git a/antlr/lex.c b/antlr/lex.c new file mode 100755 index 0000000..78ed821 --- /dev/null +++ b/antlr/lex.c @@ -0,0 +1,783 @@ +/* + * lex.c -- Generate all of the lexical type files: parser.dlg tokens.h + * + * $Id: lex.c,v 1.4 95/09/26 12:58:36 parrt Exp $ + * $Revision: 1.4 $ + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#include +#ifdef __cplusplus +#ifndef __STDC__ +#define __STDC__ +#endif +#endif +#include "set.h" +#include "syn.h" +#include "hash.h" +#include "generic.h" + +#define DLGErrorString "invalid token" + +/* Generate a complete lexical description of the lexemes found in the grammar */ +void +#ifdef __STDC__ +genLexDescr( void ) +#else +genLexDescr( ) +#endif +{ + ListNode *p; + FILE *dlgFile = fopen(OutMetaName(DlgFileName), "w"); + require(dlgFile!=NULL, eMsg1("genLexFile: cannot open %s", OutMetaName(DlgFileName)) ); + special_fopen_actions(OutMetaName(DlgFileName)); + + fprintf(dlgFile, "<<\n"); + fprintf(dlgFile, "/* %s -- DLG Description of scanner\n", DlgFileName); + fprintf(dlgFile, " *\n"); + fprintf(dlgFile, " * Generated from:"); + {int i; for (i=0; i\n"); + fprintf(dlgFile, "#define ANTLR_VERSION %s\n", VersionDef); + if ( strcmp(ParserName, DefaultParserName)!=0 ) + fprintf(dlgFile, "#define %s %s\n", DefaultParserName, ParserName); + if ( strcmp(ParserName, DefaultParserName)!=0 ) + fprintf(dlgFile, "#include \"%s\"\n", RemapFileName); + if ( HdrAction != NULL ) dumpAction( HdrAction, dlgFile, 0, -1, 0, 1 ); + if ( FoundGuessBlk ) + { + fprintf(dlgFile, "#define ZZCAN_GUESS\n"); + fprintf(dlgFile, "#include \n"); + } + if ( OutputLL_k > 1 ) fprintf(dlgFile, "#define LL_K %d\n", OutputLL_k); + if ( DemandLookahead ) fprintf(dlgFile, "#define DEMAND_LOOK\n"); + fprintf(dlgFile, "#include \"antlr.h\"\n"); + if ( GenAST ) { + fprintf(dlgFile, "#include \"ast.h\"\n"); + } + if ( UserDefdTokens ) + fprintf(dlgFile, "#include %s\n", UserTokenDefsFile); + /* still need this one as it has the func prototypes */ + fprintf(dlgFile, "#include \"%s\"\n", DefFileName); + fprintf(dlgFile, "#include \"dlgdef.h\"\n"); + fprintf(dlgFile, "LOOKAHEAD\n"); + fprintf(dlgFile, "void zzerraction()\n"); + fprintf(dlgFile, "{\n"); + fprintf(dlgFile, "\t(*zzerr)(\"%s\");\n", DLGErrorString); + fprintf(dlgFile, "\tzzadvance();\n"); + fprintf(dlgFile, "\tzzskip();\n"); + fprintf(dlgFile, "}\n"); + } + fprintf(dlgFile, ">>\n\n"); + + /* dump all actions */ + if (LexActions != NULL) + { + for (p = LexActions->next; p!=NULL; p=p->next) + { + fprintf(dlgFile, "<<\n"); + dumpAction( (char *)p->elem, dlgFile, 0, -1, 0, 1 ); + fprintf(dlgFile, ">>\n"); + } + } + /* dump all regular expression rules/actions (skip sentinel node) */ + if ( ExprOrder == NULL ) { + warnNoFL("no regular expressions found in grammar"); + } + else dumpLexClasses(dlgFile); + fprintf(dlgFile, "%%%%\n"); + fclose( dlgFile ); +} + +/* For each lexical class, scan ExprOrder looking for expressions + * in that lexical class. Print out only those that match. + * Each element of the ExprOrder list has both an expr and an lclass + * field. + */ +void +#ifdef __STDC__ +dumpLexClasses( FILE *dlgFile ) +#else +dumpLexClasses( dlgFile ) +FILE *dlgFile; +#endif +{ + int i; + TermEntry *t; + ListNode *p; + Expr *q; + + for (i=0; inext; p!=NULL; p=p->next) + { + q = (Expr *) p->elem; + if ( q->lclass != i ) continue; + lexmode(i); + t = (TermEntry *) hash_get(Texpr, q->expr); + require(t!=NULL, eMsg1("genLexDescr: rexpr %s not in hash table",q->expr) ); + if ( t->token == EpToken ) continue; + fprintf(dlgFile, "%s\n\t<<\n", StripQuotes(q->expr)); + /* replace " killed by StripQuotes() */ + q->expr[ strlen(q->expr) ] = '"'; + if ( !GenCC ) { + if ( TokenString(t->token) != NULL ) + fprintf(dlgFile, "\t\tNLA = %s;\n", TokenString(t->token)); + else + fprintf(dlgFile, "\t\tNLA = %d;\n", t->token); + } + if ( t->action != NULL ) dumpAction( t->action, dlgFile, 2,-1,0,1 ); + if ( GenCC ) { + if ( TokenString(t->token) != NULL ) + fprintf(dlgFile, "\t\treturn %s;\n", TokenString(t->token)); + else + fprintf(dlgFile, "\t\treturn (ANTLRTokenType)%d;\n", t->token); + } + fprintf(dlgFile, "\t>>\n\n"); + } + } +} + +/* Strip the leading path (if any) from a filename */ +char * +#ifdef __STDC__ +StripPath( char *fileName ) +#else +StripPath( fileName ) +char *fileName; +#endif +{ + char *p; + static char dirSym[2] = DirectorySymbol; + + if(NULL != (p = strrchr(fileName, dirSym[0]))) + p++; + else + p = fileName; + + return(p); +} + +/* Generate a list of #defines && list of struct definitions for + * aggregate retv's */ +void +#ifdef __STDC__ +genDefFile( void ) +#else +genDefFile( ) +#endif +{ + int i; + + /* If C++ mode and #tokdef used, then don't need anything in here since + * C++ puts all definitions in the class file name. + */ + if ( GenCC && UserTokenDefsFile ) return; + + DefFile = fopen(OutMetaName(DefFileName), "w"); + require(DefFile!=NULL, eMsg1("genDefFile: cannot open %s", OutMetaName(DefFileName)) ); + special_fopen_actions(OutMetaName(DefFileName)); + + fprintf(DefFile, "#ifndef %s\n", StripPath(gate_symbol(DefFileName))); + fprintf(DefFile, "#define %s\n", StripPath(gate_symbol(DefFileName))); + + fprintf(DefFile, "/* %s -- List of labelled tokens and stuff\n", DefFileName); + fprintf(DefFile, " *\n"); + fprintf(DefFile, " * Generated from:"); + for (i=0; i1 ) + { + int j; + /* look in all lexclasses for the reg expr */ + for (j=0; j=NumLexClasses ) + { + warnNoFL(eMsg1("token label has no associated rexpr: %s",TokenString(i))); + } + } + require((p=(TermEntry *)hash_get(Tname, TokenString(i))) != NULL, + "token not in sym tab when it should be"); + if ( !p->classname ) + { + if ( GenCC ) { + if ( !first ) fprintf(DefFile, ",\n"); + first = 0; + fprintf(DefFile, "\t%s=%d", TokenString(i), i); + } + else + fprintf(DefFile, "#define %s %d\n", TokenString(i), i); + } + } + } + if ( GenCC ) fprintf(DefFile, "};\n"); + } + + if ( !GenCC ) GenRulePrototypes(DefFile, SynDiag); + + fprintf(DefFile, "\n#endif\n"); +} + +void +#ifdef __STDC__ +GenRemapFile( void ) +#else +GenRemapFile( ) +#endif +{ + if ( strcmp(ParserName, DefaultParserName)!=0 ) + { + FILE *f; + int i; + + f = fopen(OutMetaName(RemapFileName), "w"); + require(f!=NULL, eMsg1("GenRemapFile: cannot open %s", OutMetaName(RemapFileName)) ); + special_fopen_actions(OutMetaName(RemapFileName)); + + fprintf(f, "/* %s -- List of symbols to remap\n", RemapFileName); + fprintf(f, " *\n"); + fprintf(f, " * Generated from:"); + for (i=0; irname, ParserName, p->rname); + p = (Junction *)p->p2; + } +} + +/* Generate a bunch of #defines that rename all standard symbols to be + * "ParserName_symbol". The list of standard symbols to change is in + * globals.c. + */ +void +#ifdef __STDC__ +GenPredefinedSymbolRedefs( FILE *f ) +#else +GenPredefinedSymbolRedefs( f ) +FILE *f; +#endif +{ + char **p; + + fprintf(f, "\n/* rename PCCTS-supplied symbols to be 'ParserName_symbol' */\n"); + for (p = &StandardSymbols[0]; *p!=NULL; p++) + { + fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p); + } +} + +/* Generate a bunch of #defines that rename all AST symbols to be + * "ParserName_symbol". The list of AST symbols to change is in + * globals.c. + */ +void +#ifdef __STDC__ +GenASTSymbolRedefs( FILE *f ) +#else +GenASTSymbolRedefs( f ) +FILE *f; +#endif +{ + char **p; + + fprintf(f, "\n/* rename PCCTS-supplied AST symbols to be 'ParserName_symbol' */\n"); + for (p = &ASTSymbols[0]; *p!=NULL; p++) + { + fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p); + } +} + +/* redefine all sets generated by ANTLR; WARNING: 'zzerr', 'setwd' must match + * use in bits.c (DumpSetWd() etc...) + */ +void +#ifdef __STDC__ +GenSetRedefs( FILE *f ) +#else +GenSetRedefs( f ) +FILE *f; +#endif +{ + int i; + + for (i=1; i<=wordnum; i++) + { + fprintf(f, "#define setwd%d %s_setwd%d\n", i, ParserName, i); + } + for (i=1; i<=esetnum; i++) + { + fprintf(f, "#define zzerr%d %s_err%d\n", i, ParserName, i); + } +} + +/* Find all return types/parameters that require structs and def + * all rules with ret types. + */ +void +#ifdef __STDC__ +GenRulePrototypes( FILE *f, Junction *p ) +#else +GenRulePrototypes( f, p ) +FILE *f; +Junction *p; +#endif +{ + int i; + + i = 1; + while ( p!=NULL ) + { + if ( p->ret != NULL ) + { + if ( HasComma(p->ret) ) + { + DumpRetValStruct(f, p->ret, i); + } + fprintf(f, "\n#ifdef __STDC__\n"); + if ( HasComma(p->ret) ) + { + fprintf(f, "extern struct _rv%d", i); + } + else + { + fprintf(f, "extern "); + DumpType(p->ret, f); + } + fprintf(f, " %s%s(", RulePrefix, p->rname); + DumpANSIFunctionArgDef(f,p); + fprintf(f, ";\n"); +#ifdef OLD + if ( p->pdecl != NULL || GenAST ) + { + if ( GenAST ) { + fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":""); + } + if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); + } + else fprintf(f, "void"); + fprintf(f, ");\n"); +#endif + fprintf(f, "#else\n"); + if ( HasComma(p->ret) ) + { + fprintf(f, "extern struct _rv%d", i); + } + else + { + fprintf(f, "extern "); + DumpType(p->ret, f); + } + fprintf(f, " %s%s();\n", RulePrefix, p->rname); + fprintf(f, "#endif\n"); + } + else + { + fprintf(f, "\n#ifdef __STDC__\n"); + fprintf(f, "void %s%s(", RulePrefix, p->rname); + DumpANSIFunctionArgDef(f,p); + fprintf(f, ";\n"); +#ifdef OLD + if ( p->pdecl != NULL || GenAST ) + { + if ( GenAST ) { + fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":""); + } + if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); + } + else fprintf(f, "void"); + fprintf(f, ");\n"); +#endif + fprintf(f, "#else\n"); + fprintf(f, "extern void %s%s();\n", RulePrefix, p->rname); + fprintf(f, "#endif\n"); + } + i++; + p = (Junction *)p->p2; + } +} + +/* Define all rules in the class.h file; generate any required + * struct definitions first, however. + */ +void +#ifdef __STDC__ +GenRuleMemberDeclarationsForCC( FILE *f, Junction *q ) +#else +GenRuleMemberDeclarationsForCC( f, q ) +FILE *f; +Junction *q; +#endif +{ + Junction *p = q; + int i; + + fprintf(f, "private:\n"); + + /* Dump dflt handler declaration */ + fprintf(f, "\tvoid zzdflthandlers( int _signal, int *_retsignal );\n\n"); + + fprintf(f, "public:\n"); + + /* Dump return value structs */ + i = 1; + while ( p!=NULL ) + { + if ( p->ret != NULL ) + { + if ( HasComma(p->ret) ) + { + DumpRetValStruct(f, p->ret, i); + } + } + i++; + p = (Junction *)p->p2; + } + + /* Dump member func defs && CONSTRUCTOR */ + fprintf(f, "\t%s(ANTLRTokenBuffer *input);\n", CurrentClassName); +/* + fprintf(f, "\t%s(ANTLRTokenBuffer *input, ANTLRTokenType eof);\n", + CurrentClassName); +*/ + + i = 1; + p = q; + while ( p!=NULL ) + { + if ( p->ret != NULL ) + { + if ( HasComma(p->ret) ) + { + fprintf(f, "\tstruct _rv%d", i); + } + else + { + fprintf(f, "\t"); + DumpType(p->ret, f); + } + fprintf(f, " %s(", p->rname); + DumpANSIFunctionArgDef(f,p); + fprintf(f, ";\n"); +#ifdef OLD + if ( p->pdecl != NULL || GenAST ) + { + if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":""); + if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); + } + fprintf(f, ");\n"); +#endif + } + else + { + fprintf(f, "\tvoid %s(", p->rname); + DumpANSIFunctionArgDef(f,p); + fprintf(f, ";\n"); +#ifdef OLD + if ( p->pdecl != NULL || GenAST ) + { + if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":""); + if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); + } + fprintf(f, ");\n"); +#endif + } + i++; + p = (Junction *)p->p2; + } +} + +/* Given a list of ANSI-style parameter declarations, print out a + * comma-separated list of the symbols (w/o types). + * Basically, we look for a comma, then work backwards until start of + * the symbol name. Then print it out until 1st non-alnum char. Now, + * move on to next parameter. + */ +void +#ifdef __STDC__ +DumpListOfParmNames( char *pdecl, FILE *output ) +#else +DumpListOfParmNames( pdecl, output ) +char *pdecl; +FILE *output; +#endif +{ + int firstTime = 1, done = 0; + require(output!=NULL, "DumpListOfParmNames: NULL parm"); + + if ( pdecl == NULL ) return; + while ( !done ) + { + if ( !firstTime ) putc(',', output); + done = DumpNextNameInDef(&pdecl, output); + firstTime = 0; + } +} + +/* given a list of parameters or return values, dump the next + * name to output. Return 1 if last one just printed, 0 if more to go. + */ +int +#ifdef __STDC__ +DumpNextNameInDef( char **q, FILE *output ) +#else +DumpNextNameInDef( q, output ) +char **q; +FILE *output; +#endif +{ + char *p = *q; /* where did we leave off? */ + int done=0; + + while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */ + if ( *p == '\0' ) done = 1; + while ( !isalnum(*p) && *p!='_' ) --p; /* scan back until valid var character */ + while ( isalnum(*p) || *p=='_' ) --p; /* scan back until beginning of variable */ + p++; /* move to start of variable */ + while ( isalnum(*p) || *p=='_' ) {putc(*p, output); p++;} + while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */ + p++; /* move past this parameter */ + + *q = p; /* record where we left off */ + return done; +} + +/* Given a list of ANSI-style parameter declarations, dump K&R-style + * declarations, one per line for each parameter. Basically, convert + * comma to semi-colon, newline. + */ +void +#ifdef __STDC__ +DumpOldStyleParms( char *pdecl, FILE *output ) +#else +DumpOldStyleParms( pdecl, output ) +char *pdecl; +FILE *output; +#endif +{ + require(output!=NULL, "DumpOldStyleParms: NULL parm"); + + if ( pdecl == NULL ) return; + while ( *pdecl != '\0' ) + { + if ( *pdecl == ',' ) + { + pdecl++; + putc(';', output); putc('\n', output); + while ( *pdecl==' ' || *pdecl=='\t' || *pdecl=='\n' ) pdecl++; + } + else {putc(*pdecl, output); pdecl++;} + } + putc(';', output); + putc('\n', output); +} + +/* Take in a type definition (type + symbol) and print out type only */ +void +#ifdef __STDC__ +DumpType( char *s, FILE *f ) +#else +DumpType( s, f ) +char *s; +FILE *f; +#endif +{ + char *p, *end; + require(s!=NULL, "DumpType: invalid type string"); + + p = &s[strlen(s)-1]; /* start at end of string and work back */ + /* scan back until valid variable character */ + while ( !isalnum(*p) && *p!='_' ) --p; + /* scan back until beginning of variable */ + while ( isalnum(*p) || *p=='_' ) --p; + if ( p<=s ) + { + warnNoFL(eMsg1("invalid parameter/return value: '%s'",s)); + return; + } + end = p; /* here is where we stop printing alnum */ + p = s; + while ( p!=end ) {putc(*p, f); p++;} /* dump until just before variable */ + while ( *p!='\0' ) /* dump rest w/o variable */ + { + if ( !isalnum(*p) && *p!='_' ) putc(*p, f); + p++; + } +} + +/* check to see if string e is a word in string s */ +int +#ifdef __STDC__ +strmember( char *s, char *e ) +#else +strmember( s, e ) +char *s; +char *e; +#endif +{ + register char *p; + require(s!=NULL&&e!=NULL, "strmember: NULL string"); + + if ( *e=='\0' ) return 1; /* empty string is always member */ + do { + while ( *s!='\0' && !isalnum(*s) && *s!='_' ) + ++s; + p = e; + while ( *p!='\0' && *p==*s ) {p++; s++;} + if ( *p=='\0' ) { + if ( *s=='\0' ) return 1; + if ( !isalnum (*s) && *s != '_' ) return 1; + } + while ( isalnum(*s) || *s == '_' ) + ++s; + } while ( *s!='\0' ); + return 0; +} + +int +#ifdef __STDC__ +HasComma( char *s ) +#else +HasComma( s ) +char *s; +#endif +{ + while (*s!='\0') + if ( *s++ == ',' ) return 1; + return 0; +} + +void +#ifdef __STDC__ +DumpRetValStruct( FILE *f, char *ret, int i ) +#else +DumpRetValStruct( f, ret, i ) +FILE *f; +char *ret; +int i; +#endif +{ + fprintf(f, "\nstruct _rv%d {\n", i); + while ( *ret != '\0' ) + { + while ( *ret==' ' || *ret=='\t' ) ret++; /* ignore white */ + putc('\t', f); + while ( *ret!=',' && *ret!='\0' ) {putc(*ret,f); ret++;} + if ( *ret == ',' ) {putc(';', f); putc('\n', f); ret++;} + } + fprintf(f, ";\n};\n"); +} + +/* given "s" yield s -- DESTRUCTIVE (we modify s if starts with " else return s) */ +char * +#ifdef __STDC__ +StripQuotes( char *s ) +#else +StripQuotes( s ) +char *s; +#endif +{ + if ( *s == '"' ) + { + s[ strlen(s)-1 ] = '\0'; /* remove last quote */ + return( s+1 ); /* return address past initial quote */ + } + return( s ); +} + diff --git a/antlr/main.c b/antlr/main.c new file mode 100755 index 0000000..1d0284a --- /dev/null +++ b/antlr/main.c @@ -0,0 +1,1220 @@ +/* + * main.c -- main program for PCCTS ANTLR. + * + * $Id: main.c,v 1.7 95/10/05 11:57:08 parrt Exp $ + * $Revision: 1.7 $ + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ + +#include +#ifdef __cplusplus +#ifndef __STDC__ +#define __STDC__ +#endif +#endif +#include "stdpccts.h" + +#define MAX_INT_STACK 50 +static int istack[MAX_INT_STACK]; /* Int stack */ +static int isp = MAX_INT_STACK; + +static int DontAcceptFiles = 0; /* if stdin, don't read files */ +static int DontAcceptStdin = 0; /* if files seen first, don't accept stdin */ + + + /* C m d - L i n e O p t i o n S t r u c t & F u n c s */ + +typedef struct _Opt { + char *option; + int arg; +#ifdef __cplusplus + void (*process)(...); +#else + void (*process)(); +#endif + char *descr; + } Opt; + +#ifdef __STDC__ +extern void ProcessArgs(int, char **, Opt *); +#else +extern void ProcessArgs(); +#endif + +static void +#ifdef __STDC__ +pStdin( void ) +#else +pStdin( ) +#endif +{ + if ( DontAcceptStdin ) + { + warnNoFL("'-' (stdin) ignored as files were specified first"); + return; + } + + require(NumFiles0 ) + { + warnNoFL("must have compressed lookahead >= full LL(k) lookahead (setting -ck to -k)"); + CLL_k = LL_k; + } + if ( CLL_k == -1 ) CLL_k = LL_k; + OutputLL_k = CLL_k; + if ( ((CLL_k-1)&CLL_k)!=0 ) { /* output ll(k) must be power of 2 */ + int n; + for(n=1; n1 && HdrAction == NULL ) + warnNoFL("no #header action was found"); + + EpToken = addTname(EPSTR); /* add imaginary token epsilon */ + set_orel(EpToken, &imag_tokens); + + /* this won't work for hand-built scanners since EofToken is not + * known. Forces EOF to be token type 1. + */ + set_orel(EofToken, &imag_tokens); + + set_size(NumWords(TokenNum-1)); + + /* compute the set of all known token types + * It represents the set of tokens from 1 to last_token_num + the + * reserved positions above that (if any). Don't include the set of + * imaginary tokens such as the token/error classes or EOF. + */ + { + set a; + a = set_dup(reserved_positions); + for (i=1; inext; p!=NULL; p=p->next) + { + UserAction *ua = (UserAction *)p->elem; + dumpAction( ua->action, Parser_h, 0, ua->file, ua->line, 1); + } + } + GenParser_c_Hdr(); + NewSetWd(); + TRANS(SynDiag); /* Translate to the target language */ + DumpSetWd(); + GenRuleMemberDeclarationsForCC(Parser_h, SynDiag); + if ( class_after_actions != NULL ) + { + ListNode *p; + for (p = class_after_actions->next; p!=NULL; p=p->next) + { + UserAction *ua = (UserAction *)p->elem; + dumpAction( ua->action, Parser_h, 0, ua->file, ua->line, 1); + } + } + DumpRemainingTokSets(); + fprintf(Parser_h, "};\n"); + fprintf(Parser_h, "\n#endif /* %s_h */\n", CurrentClassName); + fclose( Parser_h ); + fclose( Parser_c ); + } + } + + if ( PrintOut ) + { + if ( SynDiag == NULL ) {warnNoFL("no grammar description recognized");} + else PRINT(SynDiag); + } + + GenRemapFile(); /* create remap.h */ + + cleanUp(); + exit(PCCTS_EXIT_SUCCESS); +} + +static void +#ifdef __STDC__ +init( void ) +#else +init( ) +#endif +{ + SignalEntry *q; + + Tname = newHashTable(); + Rname = newHashTable(); + Fcache = newHashTable(); + Tcache = newHashTable(); + Sname = newHashTable(); + /* Add default signal names */ + q = (SignalEntry *)hash_add(Sname, + "NoViableAlt", + (Entry *)newSignalEntry("NoViableAlt")); + require(q!=NULL, "cannot alloc signal entry"); + q->signum = sigNoViableAlt; + q = (SignalEntry *)hash_add(Sname, + "MismatchedToken", + (Entry *)newSignalEntry("MismatchedToken")); + require(q!=NULL, "cannot alloc signal entry"); + q->signum = sigMismatchedToken; + q = (SignalEntry *)hash_add(Sname, + "NoSemViableAlt", + (Entry *)newSignalEntry("NoSemViableAlt")); + require(q!=NULL, "cannot alloc signal entry"); + q->signum = sigNoSemViableAlt; + + reserved_positions = empty; + all_tokens = empty; + imag_tokens = empty; + tokclasses = empty; + TokenStr = (char **) calloc(TSChunk, sizeof(char *)); + require(TokenStr!=NULL, "main: cannot allocate TokenStr"); + FoStack = (int **) calloc(CLL_k+1, sizeof(int *)); + require(FoStack!=NULL, "main: cannot allocate FoStack"); + FoTOS = (int **) calloc(CLL_k+1, sizeof(int *)); + require(FoTOS!=NULL, "main: cannot allocate FoTOS"); + Cycles = (ListNode **) calloc(CLL_k+1, sizeof(ListNode *)); + require(Cycles!=NULL, "main: cannot allocate Cycles List"); +} + +static void +#ifdef __STDC__ +help( void ) +#else +help( ) +#endif +{ + Opt *p = options; + fprintf(stderr, "antlr [options] f1 f2 ... fn\n"); + while ( *(p->option) != '*' ) + { + fprintf(stderr, "\t%-4s %s %s\n", + p->option, + (p->arg)?"___":" ", + p->descr); + p++; + } +} + +/* The RulePtr array is filled in here. RulePtr exists primarily + * so that sets of rules can be maintained for the FOLLOW caching + * mechanism found in rJunc(). RulePtr maps a rule num from 1 to n + * to a pointer to its RuleBlk junction where n is the number of rules. + */ +static void +#ifdef __STDC__ +buildRulePtr( void ) +#else +buildRulePtr( ) +#endif +{ + int r=1; + Junction *p = SynDiag; + RulePtr = (Junction **) calloc(NumRules+1, sizeof(Junction *)); + require(RulePtr!=NULL, "cannot allocate RulePtr array"); + + while ( p!=NULL ) + { + require(r<=NumRules, "too many rules???"); + RulePtr[r++] = p; + p = (Junction *)p->p2; + } +} + +void +#ifdef __STDC__ +dlgerror(const char *s) +#else +dlgerror(s) +char *s; +#endif +{ + fprintf(stderr, ErrHdr, FileStr[CurFile], zzline); + fprintf(stderr, " lexical error: %s (text was '%s')\n", + ((s == NULL) ? "Lexical error" : s), zzlextext); +} + +void +#ifdef __STDC__ +readDescr( void ) +#else +readDescr( ) +#endif +{ + zzerr = dlgerror; + input = NextFile(); + if ( input==NULL ) fatal("No grammar description found (exiting...)"); + ANTLR(grammar(), input); +} + +FILE * +#ifdef __STDC__ +NextFile( void ) +#else +NextFile( ) +#endif +{ + FILE *f; + + for (;;) + { + CurFile++; + if ( CurFile >= NumFiles ) return(NULL); + if ( strcmp(FileStr[CurFile],"stdin") == 0 ) return stdin; + f = fopen(FileStr[CurFile], "r"); + if ( f == NULL ) + { + warnNoFL( eMsg1("file %s doesn't exist; ignored", FileStr[CurFile]) ); + } + else + { + return(f); + } + } +} + +/* + * Return a string corresponding to the output file name associated + * with the input file name passed in. + * + * Observe the following rules: + * + * f.e --> f".c" + * f --> f".c" + * f. --> f".c" + * f.e.g --> f.e".c" + * + * Where f,e,g are arbitrarily long sequences of characters in a file + * name. + * + * In other words, if a ".x" appears on the end of a file name, make it + * ".c". If no ".x" appears, append ".c" to the end of the file name. + * + * C++ mode using .cpp not .c. + * + * Use malloc() for new string. + */ +char * +#ifdef __STDC__ +outname( char *fs ) +#else +outname( fs ) +char *fs; +#endif +{ + static char buf[MaxFileName+1]; + char *p; + require(fs!=NULL&&*fs!='\0', "outname: NULL filename"); + + p = buf; + strcpy(buf, fs); + while ( *p != '\0' ) {p++;} /* Stop on '\0' */ + while ( *p != '.' && p != buf ) {--p;} /* Find '.' */ + if ( p != buf ) *p = '\0'; /* Found '.' */ + require(strlen(buf) + 2 < (size_t)MaxFileName, "outname: filename too big"); + if ( GenCC ) strcat(buf, CPP_FILE_SUFFIX); + else strcat(buf, ".c"); + return( buf ); +} + +void +#ifdef __STDC__ +fatalFL( char *err_, char *f, int l ) +#else +fatalFL( err_, f, l ) +char *err_; +char *f; +int l; +#endif +{ + fprintf(stderr, ErrHdr, f, l); + fprintf(stderr, " %s\n", err_); + cleanUp(); + exit(PCCTS_EXIT_FAILURE); +} + +void +#ifdef __STDC__ +fatal_intern( char *err_, char *f, int l ) +#else +fatal_intern( err_, f, l ) +char *err_; +char *f; +int l; +#endif +{ + fprintf(stderr, ErrHdr, f, l); + fprintf(stderr, " #$%%*&@# internal error: %s\n", err_); + fprintf(stderr, ErrHdr, f, l); + fprintf(stderr, " [complain to nearest government official\n"); + fprintf(stderr, ErrHdr, f, l); + fprintf(stderr, " or send hate-mail to parrt@parr-research.com;\n"); + fprintf(stderr, ErrHdr, f, l); + fprintf(stderr, " please pray to the ``bug'' gods that there is a trival fix.]\n"); + cleanUp(); + exit(PCCTS_EXIT_FAILURE); +} + +void +#ifdef __STDC__ +cleanUp( void ) +#else +cleanUp( ) +#endif +{ + if ( DefFile != NULL) fclose( DefFile ); +} + +/* sprintf up to 3 strings */ +char * +#ifdef __STDC__ +eMsg3( char *s, char *a1, char *a2, char *a3 ) +#else +eMsg3( s, a1, a2, a3 ) +char *s; +char *a1; +char *a2; +char *a3; +#endif +{ + static char buf[250]; /* DANGEROUS as hell !!!!!! */ + + sprintf(buf, s, a1, a2, a3); + return( buf ); +} + +/* sprintf a decimal */ +char * +#ifdef __STDC__ +eMsgd( char *s, int d ) +#else +eMsgd( s, d ) +char *s; +int d; +#endif +{ + static char buf[250]; /* DANGEROUS as hell !!!!!! */ + + sprintf(buf, s, d); + return( buf ); +} + +void +#ifdef __STDC__ +s_fprT( FILE *f, set e ) +#else +s_fprT( f, e ) +FILE *f; +set e; +#endif +{ + register unsigned *p; + unsigned *q; + + if ( set_nil(e) ) return; + if ( (q=p=set_pdq(e)) == NULL ) fatal_internal("Can't alloc space for set_pdq"); + fprintf(f, "{"); + while ( *p != nil ) + { + fprintf(f, " %s", TerminalString(*p)); + p++; + } + fprintf(f, " }"); + free((char *)q); +} + +/* Return the token name or regular expression for a token number. */ +char * +#ifdef __STDC__ +TerminalString( int token ) +#else +TerminalString( token ) +int token; +#endif +{ + int j; + + /* look in all lexclasses for the token */ + if ( TokenString(token) != NULL ) return TokenString(token); + for (j=0; j0, "pushint: stack overflow"); + istack[--isp] = i; +} + +int +#ifdef __STDC__ +popint( void ) +#else +popint( ) +#endif +{ + require(isp 0 ) + { + p = options; + while ( p->option != NULL ) + { + if ( strcmp(p->option, "*") == 0 || + strcmp(p->option, *argv) == 0 ) + { + if ( p->arg ) + { + (*p->process)( *argv, *(argv+1) ); + argv++; + argc--; + } + else + (*p->process)( *argv ); + break; + } + p++; + } + argv++; + } +} + +/* Go back into the syntax diagram and compute all meta tokens; i.e. + * turn all '.', ranges, token class refs etc... into actual token sets + */ +static void +CompleteTokenSetRefs() +{ + ListNode *p; + + if ( MetaTokenNodes==NULL ) return; + for (p = MetaTokenNodes->next; p!=NULL; p=p->next) + { + set a,b; + + TokNode *q = (TokNode *)p->elem; + if ( q->wild_card ) + { + q->tset = all_tokens; + } + else if ( q->tclass!=NULL ) + { + if ( q->complement ) q->tset = set_dif(all_tokens, q->tclass->tset); + else q->tset = q->tclass->tset; + } + else if ( q->upper_range!=0 ) + { + /* we have a range on our hands: make a set from q->token .. q->upper_range */ + int i; + a = empty; + for (i=q->token; i<=(int)q->upper_range; i++) { set_orel(i, &a); } + q->tset = a; + } + /* at this point, it can only be a complemented single token */ + else if ( q->complement ) + { + a = set_of(q->token); + b = set_dif(all_tokens, a); + set_free(a); + q->tset=b; + } + else fatal("invalid meta token"); + } +} + +char * +#ifdef __STDC__ +OutMetaName(char *n) +#else +OutMetaName(n) +char *n; +#endif +{ + static char buf[MaxFileName+1]; + + if ( strcmp(OutputDirectory,TopDirectory)==0 ) return n; + strcpy(buf, OutputDirectory); + if ( strcmp(&buf[strlen(buf) - 1], DirectorySymbol ) ) + strcat(buf, DirectorySymbol); + strcat(buf, n); + return buf; +} + +static void +#ifdef __STDC__ +ensure_no_C_file_collisions(char *class_c_file) +#else +ensure_no_C_file_collisions(class_c_file) +char *class_c_file; +#endif +{ + int i; + + for (i=0; i= NumFiles && CurFile >= 1 ) CurFile--; + fprintf(stderr, ErrHdr, FileStr[CurFile], zzline); + fprintf(stderr, " warning: %s\n", err); +} + +void +#ifdef __STDC__ +warnNoCR( char *err ) +#else +warnNoCR( err ) +char *err; +#endif +{ + /* back up the file number if we hit an error at the end of the last file */ + if ( CurFile >= NumFiles && CurFile >= 1 ) CurFile--; + fprintf(stderr, ErrHdr, FileStr[CurFile], zzline); + fprintf(stderr, " warning: %s", err); +} + +void +#ifdef __STDC__ +errNoFL(char *err) +#else +errNoFL(err) +char *err; +#endif +{ + fprintf(stderr, "error: %s\n", err); +} + +void +#ifdef __STDC__ +errFL(char *err,char *f,int l) +#else +errFL(err,f,l) +char *err; +char *f; +int l; +#endif +{ + fprintf(stderr, ErrHdr, f, l); + fprintf(stderr, " error: %s\n", err); +} + +void +#ifdef __STDC__ +err(char *err) +#else +err(err) +char *err; +#endif +{ + /* back up the file number if we hit an error at the end of the last file */ + if ( CurFile >= NumFiles && CurFile >= 1 ) CurFile--; + fprintf(stderr, ErrHdr, FileStr[CurFile], zzline); + fprintf(stderr, " error: %s\n", err); +} + +void +#ifdef __STDC__ +errNoCR( char *err ) +#else +errNoCR( err ) +char *err; +#endif +{ + /* back up the file number if we hit an error at the end of the last file */ + if ( CurFile >= NumFiles && CurFile >= 1 ) CurFile--; + fprintf(stderr, ErrHdr, FileStr[CurFile], zzline); + fprintf(stderr, " error: %s", err); +} + +UserAction * +#ifdef __STDC__ +newUserAction(char *s) +#else +newUserAction(s) +char *s; +#endif +{ + UserAction *ua = (UserAction *) calloc(1, sizeof(UserAction)); + require(ua!=NULL, "cannot allocate UserAction"); + + ua->action = (char *) calloc(strlen(LATEXT(1))+1, sizeof(char)); + strcpy(ua->action, s); + return ua; +} + +/* Added by TJP September 1994 */ +/* Take in file.h and return file_h; names w/o '.'s are left alone */ +char * +#ifdef __USE_PROTOS +gate_symbol(char *name) +#else +gate_symbol(name) +char *name; +#endif +{ + static char buf[100]; + char *p; + sprintf(buf, "%s", name); + + for (p=buf; *p!='\0'; p++) + { + if ( *p=='.' ) *p = '_'; + } + return buf; +} + +char * +#ifdef __USE_PROTOS +makeAltID(int blockid, int altnum) +#else +makeAltID(blockid, altnum) +int blockid; +int altnum; +#endif +{ + static char buf[100]; + char *p; + sprintf(buf, "_blk%d_alt%d", blockid, altnum); + p = (char *)malloc(strlen(buf)+1); + strcpy(p, buf); + return p; +} diff --git a/antlr/misc.c b/antlr/misc.c new file mode 100755 index 0000000..7dda4ec --- /dev/null +++ b/antlr/misc.c @@ -0,0 +1,1325 @@ +/* + * misc.c + * + * Manage tokens, regular expressions. + * Print methods for debugging + * Compute follow lists onto tail ends of rules. + * + * The following functions are visible: + * + * int addTname(char *); Add token name + * int addTexpr(char *); Add token expression + * int Tnum(char *); Get number of expr/token + * void Tklink(char *, char *); Link a name with an expression + * int hasAction(expr); Does expr already have action assigned? + * void setHasAction(expr); Indicate that expr now has an action + * Entry *newEntry(char *,int); Create new table entry with certain size + * void list_add(ListNode **list, char *e) + * void list_apply(ListNode *list, void (*f)()) + * void lexclass(char *m); switch to new/old lexical class + * void lexmode(int i); switch to old lexical class i + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#ifdef __cplusplus +#ifndef __STDC__ +#define __STDC__ +#endif +#endif +#include "set.h" +#include "syn.h" +#include "hash.h" +#include "generic.h" +#include "dlgdef.h" + +static int tsize=TSChunk; /* size of token str arrays */ + +static void +#ifdef __STDC__ +RemapForcedTokensInSyntaxDiagram(Node *); +#else +RemapForcedTokensInSyntaxDiagram(); +#endif + + /* T o k e n M a n i p u l a t i o n */ + +/* + * add token 't' to the TokenStr/Expr array. Make more room if necessary. + * 't' is either an expression or a token name. + * + * There is only one TokenStr array, but multiple ExprStr's. Therefore, + * for each lex class (element of lclass) we must extend the ExprStr array. + * ExprStr's and TokenStr are always all the same size. + * + * Also, there is a Texpr hash table for each automaton. + */ +static void +#ifdef __STDC__ +Ttrack( char *t ) +#else +Ttrack( t ) +char *t; +#endif +{ + if ( TokenNum >= tsize ) /* terminal table overflow? */ + { + char **p; + int i, more, j; + + more = TSChunk * (1 + ((TokenNum-tsize) / TSChunk)); + tsize += more; + TokenStr = (char **) realloc((char *)TokenStr, tsize*sizeof(char *)); + require(TokenStr != NULL, "Ttrack: can't extend TokenStr"); + for (i=0; iexpr = e; + p->lclass = CurrentLexClass; + return p; +} + +/* switch to lexical class/mode m. This amounts to creating a new + * lex mode if one does not already exist and making ExprStr point + * to the correct char string array. We must also switch Texpr tables. + * + * BTW, we need multiple ExprStr arrays because more than one automaton + * may have the same label for a token, but with different expressions. + * We need to track an expr for each automaton. If we disallowed this + * feature, only one ExprStr would be required. + */ +void +#ifdef __STDC__ +lexclass( char *m ) +#else +lexclass( m ) +char *m; +#endif +{ + int i; + TermEntry *p; + static char EOFSTR[] = "\"@\""; + + if ( hash_get(Tname, m) != NULL ) + { + warn(eMsg1("lexclass name conflicts with token/errclass label '%s'",m)); + } + /* does m already exist? */ + i = LexClassIndex(m); + if ( i != -1 ) {lexmode(i); return;} + /* must make new one */ + NumLexClasses++; + CurrentLexClass = NumLexClasses-1; + require(NumLexClasses<=MaxLexClasses, "number of allowable lexclasses exceeded\nIncrease MaxLexClasses in generic.h and recompile all C files"); + lclass[CurrentLexClass].classnum = m; + lclass[CurrentLexClass].exprs = (char **) calloc(tsize, sizeof(char *)); + require(lclass[CurrentLexClass].exprs!=NULL, + "lexclass: cannot allocate ExprStr"); + lclass[CurrentLexClass].htable = newHashTable(); + ExprStr = lclass[CurrentLexClass].exprs; + Texpr = lclass[CurrentLexClass].htable; + /* define EOF for each automaton */ + p = newTermEntry( EOFSTR ); + p->token = EofToken; /* couldn't have remapped tokens yet, use EofToken */ + hash_add(Texpr, EOFSTR, (Entry *)p); + list_add(&ExprOrder, (void *)newExpr(EOFSTR)); + /* note: we use the actual ExprStr array + * here as TokenInd doesn't exist yet + */ + ExprStr[EofToken] = EOFSTR; +} + +void +#ifdef __STDC__ +lexmode( int i ) +#else +lexmode( i ) +int i; +#endif +{ + require(iaction!=NULL); +} + +void +#ifdef __STDC__ +setHasAction( char *expr, char *action ) +#else +setHasAction( expr, action ) +char *expr; +char *action; +#endif +{ + TermEntry *p; + require(expr!=NULL, "setHasAction: invalid expr"); + + p = (TermEntry *) hash_get(Texpr, expr); + require(p!=NULL, eMsg1("setHasAction: expr '%s' doesn't exist",expr)); + p->action = action; +} + +ForcedToken * +#ifdef __STDC__ +newForcedToken(char *token, int tnum) +#else +newForcedToken(token, tnum) +char *token; +int tnum; +#endif +{ + ForcedToken *ft = (ForcedToken *) calloc(1, sizeof(ForcedToken)); + require(ft!=NULL, "out of memory"); + ft->token = token; + ft->tnum = tnum; + return ft; +} + +/* + * Make a token indirection array that remaps token numbers and then walk + * the appropriate symbol tables and SynDiag to change token numbers + */ +void +#ifdef __STDC__ +RemapForcedTokens(void) +#else +RemapForcedTokens() +#endif +{ + ListNode *p; + ForcedToken *q; + unsigned int max_token_number=0; + int i; + + if ( ForcedTokens == NULL ) return; + + /* find max token num */ + for (p = ForcedTokens->next; p!=NULL; p=p->next) + { + q = (ForcedToken *) p->elem; + if ( q->tnum > max_token_number ) max_token_number = q->tnum; + } + fprintf(stderr, "max token number is %d\n", max_token_number); + + /* make token indirection array */ + TokenInd = (int *) calloc(max_token_number+1, sizeof(int)); + LastTokenCounted = TokenNum; + TokenNum = max_token_number+1; + require(TokenInd!=NULL, "RemapForcedTokens: cannot allocate TokenInd"); + + /* fill token indirection array and change token id htable ; swap token indices */ + for (i=1; inext; p!=NULL; p=p->next) + { + TermEntry *te; + int old_pos, t; + + q = (ForcedToken *) p->elem; + fprintf(stderr, "%s forced to %d\n", q->token, q->tnum); + te = (TermEntry *) hash_get(Tname, q->token); + require(te!=NULL, "RemapForcedTokens: token not in hash table"); + old_pos = te->token; + fprintf(stderr, "Before: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); + fprintf(stderr, "Before: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); + q = (ForcedToken *) p->elem; + t = TokenInd[old_pos]; + TokenInd[old_pos] = q->tnum; + TokenInd[q->tnum] = t; + te->token = q->tnum; /* update token type id symbol table */ + fprintf(stderr, "After: TokenInd[old_pos==%d] is %d\n", old_pos, TokenInd[old_pos]); + fprintf(stderr, "After: TokenInd[target==%d] is %d\n", q->tnum, TokenInd[q->tnum]); + + /* Change the token number in the sym tab entry for the exprs + * at the old position of the token id and the target position + */ + /* update expr at target (if any) of forced token id */ + if ( q->tnum < TokenNum ) /* is it a valid position? */ + { + for (i=0; itnum]!=NULL ) + { + /* update the symbol table for this expr */ + TermEntry *e = (TermEntry *) hash_get(lclass[i].htable, lclass[i].exprs[q->tnum]); + require(e!=NULL, "RemapForcedTokens: expr not in hash table"); + e->token = old_pos; + fprintf(stderr, "found expr '%s' at target %d in lclass[%d]; changed to %d\n", + lclass[i].exprs[q->tnum], q->tnum, i, old_pos); + } + } + } + /* update expr at old position (if any) of forced token id */ + for (i=0; itoken = q->tnum; + fprintf(stderr, "found expr '%s' for id %s in lclass[%d]; changed to %d\n", + lclass[i].exprs[old_pos], q->token, i, q->tnum); + } + } + } + + /* Update SynDiag */ + RemapForcedTokensInSyntaxDiagram((Node *)SynDiag); +} + +static void +#ifdef __STDC__ +RemapForcedTokensInSyntaxDiagram(Node *p) +#else +RemapForcedTokensInSyntaxDiagram(p) +Node *p; +#endif +{ + Junction *j = (Junction *) p; + RuleRefNode *r = (RuleRefNode *) p; + TokNode *t = (TokNode *)p; + + if ( p==NULL ) return; + require(p->ntype>=1 && p->ntype<=NumNodeTypes, "Remap...: invalid diagram node"); + switch ( p->ntype ) + { + case nJunction : + if ( j->visited ) return; + if ( j->jtype == EndRule ) return; + j->visited = TRUE; + RemapForcedTokensInSyntaxDiagram( j->p1 ); + RemapForcedTokensInSyntaxDiagram( j->p2 ); + j->visited = FALSE; + return; + case nRuleRef : + RemapForcedTokensInSyntaxDiagram( r->next ); + return; + case nToken : + if ( t->remapped ) return; /* we've been here before */ + t->remapped = 1; + fprintf(stderr, "remapping %d to %d\n", t->token, TokenInd[t->token]); + t->token = TokenInd[t->token]; + RemapForcedTokensInSyntaxDiagram( t->next ); + return; + case nAction : + RemapForcedTokensInSyntaxDiagram( ((ActionNode *)p)->next ); + return; + default : + fatal_internal("invalid node type"); + } +} + +/* + * Add a token name. Return the token number associated with it. If it already + * exists, then return the token number assigned to it. + * + * Track the order in which tokens are found so that the DLG output maintains + * that order. It also lets us map token numbers to strings. + */ +int +#ifdef __STDC__ +addTname( char *token ) +#else +addTname( token ) +char *token; +#endif +{ + TermEntry *p; + require(token!=NULL, "addTname: invalid token name"); + + if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; + p = newTermEntry( token ); + Ttrack( p->str ); + p->token = TokenNum++; + hash_add(Tname, token, (Entry *)p); + return p->token; +} + +/* This is the same as addTname except we force the TokenNum to be tnum. + * We don't have to use the Forced token stuff as no tokens will have + * been defined with #tokens when this is called. This is only called + * when a #tokdefs meta-op is used. + */ +int +#ifdef __STDC__ +addForcedTname( char *token, int tnum ) +#else +addForcedTname( token, tnum ) +char *token; +int tnum; +#endif +{ + TermEntry *p; + require(token!=NULL, "addTname: invalid token name"); + + if ( (p=(TermEntry *)hash_get(Tname, token)) != NULL ) return p->token; + p = newTermEntry( token ); + Ttrack( p->str ); + p->token = tnum; + hash_add(Tname, token, (Entry *)p); + return p->token; +} + +/* + * Add a token expr. Return the token number associated with it. If it already + * exists, then return the token number assigned to it. + */ +int +#ifdef __STDC__ +addTexpr( char *expr ) +#else +addTexpr( expr ) +char *expr; +#endif +{ + TermEntry *p; + require(expr!=NULL, "addTexpr: invalid regular expression"); + + if ( (p=(TermEntry *)hash_get(Texpr, expr)) != NULL ) return p->token; + p = newTermEntry( expr ); + Ttrack( p->str ); + /* track the order in which they occur */ + list_add(&ExprOrder, (void *)newExpr(p->str)); + p->token = TokenNum++; + hash_add(Texpr, expr, (Entry *)p); + return p->token; +} + +/* return the token number of 'term'. Return 0 if no 'term' exists */ +int +#ifdef __STDC__ +Tnum( char *term ) +#else +Tnum( term ) +char *term; +#endif +{ + TermEntry *p; + require(term!=NULL, "Tnum: invalid terminal"); + + if ( *term=='"' ) p = (TermEntry *) hash_get(Texpr, term); + else p = (TermEntry *) hash_get(Tname, term); + if ( p == NULL ) return 0; + else return p->token; +} + +/* associate a Name with an expr. If both have been already assigned + * token numbers, then an error is reported. Add the token or expr + * that has not been added if no error. This 'represents' the #token + * ANTLR pseudo-op. If both have not been defined, define them both + * linked to same token number. + */ +void +#ifdef __STDC__ +Tklink( char *token, char *expr ) +#else +Tklink( token, expr ) +char *token; +char *expr; +#endif +{ + TermEntry *p, *q; + require(token!=NULL && expr!=NULL, "Tklink: invalid token name and/or expr"); + + p = (TermEntry *) hash_get(Tname, token); + q = (TermEntry *) hash_get(Texpr, expr); + if ( p != NULL && q != NULL ) /* both defined */ + { + warn( eMsg2("token name %s and rexpr %s already defined; ignored", + token, expr) ); + return; + } + if ( p==NULL && q==NULL ) /* both not defined */ + { + int t = addTname( token ); + q = newTermEntry( expr ); + hash_add(Texpr, expr, (Entry *)q); + q->token = t; + /* note: we use the actual ExprStr array + * here as TokenInd doesn't exist yet + */ + ExprStr[t] = q->str; + /* track the order in which they occur */ + list_add(&ExprOrder, (void *)newExpr(q->str)); + return; + } + if ( p != NULL ) /* one is defined, one is not */ + { + q = newTermEntry( expr ); + hash_add(Texpr, expr, (Entry *)q); + q->token = p->token; + ExprStr[p->token] = q->str; /* both expr and token str defined now */ + list_add(&ExprOrder, (void *)newExpr(q->str)); + } + else /* trying to associate name with expr here*/ + { + p = newTermEntry( token ); + hash_add(Tname, token, (Entry *)p); + p->token = q->token; + TokenStr[p->token] = p->str;/* both expr and token str defined now */ + } +} + +/* + * Given a string, this function allocates and returns a pointer to a + * hash table record of size 'sz' whose "str" pointer is reset to a position + * in the string table. + */ +Entry * +#ifdef __STDC__ +newEntry( char *text, int sz ) +#else +newEntry( text, sz ) +char *text; +int sz; +#endif +{ + Entry *p; + require(text!=NULL, "new: NULL terminal"); + + if ( (p = (Entry *) calloc(1,sz)) == 0 ) + { + fatal_internal("newEntry: out of memory for terminals\n"); + exit(PCCTS_EXIT_FAILURE); + } + p->str = mystrdup(text); + + return(p); +} + +/* + * add an element to a list. + * + * Any non-empty list has a sentinel node whose 'elem' pointer is really + * a pointer to the last element. (i.e. length(list) = #elemIn(list)+1). + * Elements are appended to the list. + */ +void +#ifdef __STDC__ +list_add( ListNode **list, void *e ) +#else +list_add( list, e ) +ListNode **list; +void *e; +#endif +{ + ListNode *p, *tail; + require(e!=NULL, "list_add: attempting to add NULL list element"); + + p = newListNode; + require(p!=NULL, "list_add: cannot alloc new list node"); + p->elem = e; + if ( *list == NULL ) + { + ListNode *sentinel = newListNode; + require(sentinel!=NULL, "list_add: cannot alloc sentinel node"); + *list=sentinel; + sentinel->next = p; + sentinel->elem = (char *)p; /* set tail pointer */ + } + else /* find end of list */ + { + tail = (ListNode *) (*list)->elem; /* get tail pointer */ + tail->next = p; + (*list)->elem = (char *) p; /* reset tail */ + } +} + +void +#ifdef __STDC__ +list_apply( ListNode *list, void (*f)(void *) ) +#else +list_apply( list, f ) +ListNode *list; +void (*f)(); +#endif +{ + ListNode *p; + require(f!=NULL, "list_apply: NULL function to apply"); + + if ( list == NULL ) return; + for (p = list->next; p!=NULL; p=p->next) (*f)( p->elem ); +} + + /* F O L L O W C y c l e S t u f f */ + +/* make a key based upon (rulename, computation, k value). + * Computation values are 'i'==FIRST, 'o'==FOLLOW. + */ +char * +#ifdef __STDC__ +Fkey( char *rule, int computation, int k ) +#else +Fkey( rule, computation, k ) +char *rule; +int computation; +int k; +#endif +{ + static char key[MaxRuleName+2+1]; + int i; + + if ( k > 255 ) + fatal("k>255 is too big for this implementation of ANTLR!\n"); + if ( (i=strlen(rule)) > MaxRuleName ) + fatal( eMsgd("rule name > max of %d\n", MaxRuleName) ); + strcpy(key,rule); + key[i] = (int) computation; + key[i+1] = (char) ((unsigned int) k); + key[i+2] = '\0'; + return key; +} + +/* Push a rule onto the kth FOLLOW stack */ +void +#ifdef __STDC__ +FoPush( char *rule, int k ) +#else +FoPush( rule, k ) +char *rule; +int k; +#endif +{ + RuleEntry *r; + require(rule!=NULL, "FoPush: tried to push NULL rule"); + require(k<=CLL_k, "FoPush: tried to access non-existent stack"); + + /*fprintf(stderr, "FoPush(%s)\n", rule);*/ + r = (RuleEntry *) hash_get(Rname, rule); + if ( r == NULL ) {fatal_internal( eMsg1("rule %s must be defined but isn't", rule) );} + if ( FoStack[k] == NULL ) /* Does the kth stack exist yet? */ + { + /*fprintf(stderr, "allocating FoStack\n");*/ + FoStack[k] = (int *) calloc(FoStackSize, sizeof(int)); + require(FoStack[k]!=NULL, "FoPush: cannot allocate FOLLOW stack\n"); + } + if ( FoTOS[k] == NULL ) + { + FoTOS[k]=FoStack[k]; + *(FoTOS[k]) = r->rulenum; + } + else + { +#ifdef MEMCHK + require(valid(FoStack[k]), "FoPush: invalid FoStack"); +#endif + if ( FoTOS[k] >= &(FoStack[k][FoStackSize-1]) ) + fatal( eMsgd("exceeded max depth of FOLLOW recursion (%d)\n", + FoStackSize) ); + require(FoTOS[k]>=FoStack[k], + eMsg1("FoPush: FoStack stack-ptr is playing out of its sandbox", + rule)); + ++(FoTOS[k]); + *(FoTOS[k]) = r->rulenum; + } + { + /* + int *p; + fprintf(stderr, "FoStack[k=%d]:\n", k); + for (p=FoStack[k]; p<=FoTOS[k]; p++) + { + fprintf(stderr, "\t%s\n", RulePtr[*p]->rname); + } + */ + } +} + +/* Pop one rule off of the FOLLOW stack. TOS ptr is NULL if empty. */ +void +#ifdef __STDC__ +FoPop( int k ) +#else +FoPop( k ) +int k; +#endif +{ + require(k<=CLL_k, "FoPop: tried to access non-existent stack"); + /*fprintf(stderr, "FoPop\n");*/ + require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), + "FoPop: FoStack stack-ptr is playing out of its sandbox"); + if ( FoTOS[k] == FoStack[k] ) FoTOS[k] = NULL; + else (FoTOS[k])--; +} + +/* Compute FOLLOW cycle. + * Mark all FOLLOW sets for rules in cycle as incomplete. + * Then, save cycle on the cycle list (Cycles) for later resolution. + * The Cycle is stored in the form: + * (head of cycle==croot, rest of rules in cycle==cyclicDep) + * + * e.g. (Fo means "FOLLOW of", "-->" means requires or depends on) + * + * Fo(x)-->Fo(a)-->Fo(b)-->Fo(c)-->Fo(x) + * ^----Infinite recursion (cycle) + * + * the cycle would be: x -> {a,b,c} or stored as (x,{a,b,c}). Fo(x) depends + * on the FOLLOW of a,b, and c. The root of a cycle is always complete after + * Fo(x) finishes. Fo(a,b,c) however are not. It turns out that all rules + * in a FOLLOW cycle have the same FOLLOW set. + */ +void +#ifdef __STDC__ +RegisterCycle( char *rule, int k ) +#else +RegisterCycle( rule, k ) +char *rule; +int k; +#endif +{ + CacheEntry *f; + Cycle *c; + int *p; + RuleEntry *r; + require(rule!=NULL, "RegisterCycle: tried to register NULL rule"); + require(k<=CLL_k, "RegisterCycle: tried to access non-existent stack"); + + /*fprintf(stderr, "RegisterCycle(%s)\n", rule);*/ + /* Find cycle start */ + r = (RuleEntry *) hash_get(Rname, rule); + require(r!=NULL,eMsg1("rule %s must be defined but isn't", rule)); + require(FoTOS[k]>=FoStack[k]&&FoTOS[k]<=&(FoStack[k][FoStackSize-1]), + eMsg1("RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox", + rule)); +/* if ( FoTOS[k]&(FoStack[k][FoStackSize-1]) ) + { + fprintf(stderr, "RegisterCycle(%s): FoStack stack-ptr is playing out of its sandbox\n", + rule); + fprintf(stderr, "RegisterCycle: sp==0x%x out of bounds 0x%x...0x%x\n", + FoTOS[k], FoStack[k], &(FoStack[k][FoStackSize-1])); + exit(PCCTS_EXIT_FAILURE); + } +*/ +#ifdef MEMCHK + require(valid(FoStack[k]), "RegisterCycle: invalid FoStack"); +#endif + for (p=FoTOS[k]; *p != r->rulenum && p >= FoStack[k]; --p) {;} + require(p>=FoStack[k], "RegisterCycle: FoStack is screwed up beyond belief"); + if ( p == FoTOS[k] ) return; /* don't worry about cycles to oneself */ + + /* compute cyclic dependents (rules in cycle except head) */ + c = newCycle; + require(c!=NULL, "RegisterCycle: couldn't alloc new cycle"); + c->cyclicDep = empty; + c->croot = *p++; /* record root of cycle */ + for (; p<=FoTOS[k]; p++) + { + /* Mark all dependent rules as incomplete */ + f = (CacheEntry *) hash_get(Fcache, Fkey(RulePtr[*p]->rname,'o',k)); + if ( f==NULL ) + { + f = newCacheEntry( Fkey(RulePtr[*p]->rname,'o',k) ); + hash_add(Fcache, Fkey(RulePtr[*p]->rname,'o',k), (Entry *)f); + } + f->incomplete = TRUE; + + set_orel(*p, &(c->cyclicDep)); /* mark rule as dependent of croot */ + } + list_add(&(Cycles[k]), (void *)c); +} + +/* make all rules in cycle complete + * + * while ( some set has changed ) do + * for each cycle do + * if degree of FOLLOW set for croot > old degree then + * update all FOLLOW sets for rules in cyclic dependency + * change = TRUE + * endif + * endfor + * endwhile + */ +void +#ifdef __STDC__ +ResolveFoCycles( int k ) +#else +ResolveFoCycles( k ) +int k; +#endif +{ + ListNode *p, *q; + Cycle *c; + int changed = 1; + CacheEntry *f,*g; + int r,i; + unsigned d; + + /*fprintf(stderr, "Resolving following cycles for %d\n", k);*/ + while ( changed ) + { + changed = 0; + i = 0; + for (p = Cycles[k]->next; p!=NULL; p=p->next) + { + c = (Cycle *) p->elem; + /*fprintf(stderr, "cycle %d: %s -->", i++, RulePtr[c->croot]->rname);*/ + /*s_fprT(stderr, c->cyclicDep);*/ + /*fprintf(stderr, "\n");*/ + f = (CacheEntry *) + hash_get(Fcache, Fkey(RulePtr[c->croot]->rname,'o',k)); + require(f!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[c->croot]->rname) ); + if ( (d=set_deg(f->fset)) > c->deg ) + { + /*fprintf(stderr, "Fo(%s) has changed\n", RulePtr[c->croot]->rname);*/ + changed = 1; + c->deg = d; /* update cycle FOLLOW set degree */ + while ( !set_nil(c->cyclicDep) ) + { + r = set_int(c->cyclicDep); + set_rm(r, c->cyclicDep); + /*fprintf(stderr, "updating Fo(%s)\n", RulePtr[r]->rname);*/ + g = (CacheEntry *) + hash_get(Fcache, Fkey(RulePtr[r]->rname,'o',k)); + require(g!=NULL, eMsg1("FOLLOW(%s) must be in cache but isn't", RulePtr[r]->rname) ); + set_orin(&(g->fset), f->fset); + g->incomplete = FALSE; + } + } + } + if ( i == 1 ) changed = 0; /* if only 1 cycle, no need to repeat */ + } + /* kill Cycle list */ + for (q = Cycles[k]->next; q != NULL; q=p) + { + p = q->next; + set_free( ((Cycle *)q->elem)->cyclicDep ); + free((char *)q); + } + free( (char *)Cycles[k] ); + Cycles[k] = NULL; +} + + + /* P r i n t i n g S y n t a x D i a g r a m s */ + +static void +#ifdef __STDC__ +pBlk( Junction *q, int btype ) +#else +pBlk( q, btype ) +Junction *q; +int btype; +#endif +{ + int k,a; + Junction *alt, *p; + + q->end->pvisited = TRUE; + if ( btype == aLoopBegin ) + { + require(q->p2!=NULL, "pBlk: invalid ()* block"); + PRINT(q->p1); + alt = (Junction *)q->p2; + PRINT(alt->p1); + if ( PrintAnnotate ) + { + printf(" /* Opt "); + k = 1; + while ( !set_nil(alt->fset[k]) ) + { + s_fprT(stdout, alt->fset[k]); + if ( k++ == CLL_k ) break; + if ( !set_nil(alt->fset[k]) ) printf(", "); + } + printf(" */\n"); + } + return; + } + for (a=1,alt=q; alt != NULL; alt= (Junction *) alt->p2, a++) + { + if ( alt->p1 != NULL ) PRINT(alt->p1); + if ( PrintAnnotate ) + { + printf( " /* [%d] ", alt->altnum); + k = 1; + while ( !set_nil(alt->fset[k]) ) + { + s_fprT(stdout, alt->fset[k]); + if ( k++ == CLL_k ) break; + if ( !set_nil(alt->fset[k]) ) printf(", "); + } + if ( alt->p2 == NULL && btype == aOptBlk ) + printf( " (optional branch) */\n"); + else printf( " */\n"); + } + + /* ignore implied empty alt of Plus blocks */ + if ( alt->p2 != NULL && ((Junction *)alt->p2)->ignore ) break; + + if ( alt->p2 != NULL && !(((Junction *)alt->p2)->p2==NULL && btype == aOptBlk) ) + { + if ( pLevel == 1 ) + { + printf("\n"); + if ( a+1==pAlt1 || a+1==pAlt2 ) printf("=>"); + printf("\t"); + } + else printf(" "); + printf("|"); + if ( pLevel == 1 ) + { + p = (Junction *) ((Junction *)alt->p2)->p1; + while ( p!=NULL ) + { + if ( p->ntype==nAction ) + { + p=(Junction *)((ActionNode *)p)->next; + continue; + } + if ( p->ntype!=nJunction ) + { + break; + } + if ( p->jtype==EndBlk || p->jtype==EndRule ) + { + p = NULL; + break; + } + p = (Junction *)p->p1; + } + if ( p==NULL ) printf("\n\t"); /* Empty alt? */ + } + } + } + q->end->pvisited = FALSE; +} + +/* How to print out a junction */ +void +#ifdef __STDC__ +pJunc( Junction *q ) +#else +pJunc( q ) +Junction *q; +#endif +{ + int dum_k; + int doing_rule; + require(q!=NULL, "pJunc: NULL node"); + require(q->ntype==nJunction, "pJunc: not junction"); + + if ( q->pvisited == TRUE ) return; + q->pvisited = TRUE; + switch ( q->jtype ) + { + case aSubBlk : + if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); + if ( q->end->p1 != NULL && ((Junction *)q->end->p1)->ntype==nJunction && + ((Junction *)q->end->p1)->jtype == EndRule ) doing_rule = 1; + else doing_rule = 0; + pLevel++; + if ( pLevel==1 ) + { + if ( pAlt1==1 ) printf("=>"); + printf("\t"); + } + else printf(" "); + if ( doing_rule ) + { + if ( pLevel==1 ) printf(" "); + pBlk(q,q->jtype); + } + else { + printf("("); + if ( pLevel==1 ) printf(" "); + pBlk(q,q->jtype); + if ( pLevel>1 ) printf(" "); + printf(")"); + } + if ( q->guess ) printf("?"); + pLevel--; + if ( PrintAnnotate ) freeBlkFsets(q); + if ( q->end->p1 != NULL ) PRINT(q->end->p1); + break; + case aOptBlk : + if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); + pLevel++; + if ( pLevel==1 ) + { + if ( pAlt1==1 ) printf("=>"); + printf("\t"); + } + else printf(" "); + printf("{"); + if ( pLevel==1 ) printf(" "); + pBlk(q,q->jtype); + if ( pLevel>1 ) printf(" "); + else printf("\n\t"); + printf("}"); + pLevel--; + if ( PrintAnnotate ) freeBlkFsets(q); + if ( q->end->p1 != NULL ) PRINT(q->end->p1); + break; + case aLoopBegin : + if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); + pLevel++; + if ( pLevel==1 ) + { + if ( pAlt1==1 ) printf("=>"); + printf("\t"); + } + else printf(" "); + printf("("); + if ( pLevel==1 ) printf(" "); + pBlk(q,q->jtype); + if ( pLevel>1 ) printf(" "); + else printf("\n\t"); + printf(")*"); + pLevel--; + if ( PrintAnnotate ) freeBlkFsets(q); + if ( q->end->p1 != NULL ) PRINT(q->end->p1); + break; + case aLoopBlk : + if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); + pBlk(q,q->jtype); + if ( PrintAnnotate ) freeBlkFsets(q); + break; + case aPlusBlk : + if ( PrintAnnotate ) First(q, 1, q->jtype, &dum_k); + pLevel++; + if ( pLevel==1 ) + { + if ( pAlt1==1 ) printf("=>"); + printf("\t"); + } + else printf(" "); + printf("("); + if ( pLevel==1 ) printf(" "); + pBlk(q,q->jtype); + if ( pLevel>1 ) printf(" "); + printf(")+"); + pLevel--; + if ( PrintAnnotate ) freeBlkFsets(q); + if ( q->end->p1 != NULL ) PRINT(q->end->p1); + break; + case EndBlk : + break; + case RuleBlk : + printf( "\n%s :\n", q->rname); + PRINT(q->p1); + if ( q->p2 != NULL ) PRINT(q->p2); + break; + case Generic : + if ( q->p1 != NULL ) PRINT(q->p1); + q->pvisited = FALSE; + if ( q->p2 != NULL ) PRINT(q->p2); + break; + case EndRule : + printf( "\n\t;\n"); + break; + } + q->pvisited = FALSE; +} + +/* How to print out a rule reference node */ +void +#ifdef __STDC__ +pRuleRef( RuleRefNode *p ) +#else +pRuleRef( p ) +RuleRefNode *p; +#endif +{ + require(p!=NULL, "pRuleRef: NULL node"); + require(p->ntype==nRuleRef, "pRuleRef: not rule ref node"); + + printf( " %s", p->text); + PRINT(p->next); +} + +/* How to print out a terminal node */ +void +#ifdef __STDC__ +pToken( TokNode *p ) +#else +pToken( p ) +TokNode *p; +#endif +{ + require(p!=NULL, "pToken: NULL node"); + require(p->ntype==nToken, "pToken: not token node"); + + if ( p->wild_card ) printf(" ."); + printf( " %s", TerminalString(p->token)); + PRINT(p->next); +} + +/* How to print out a terminal node */ +void +#ifdef __STDC__ +pAction( ActionNode *p ) +#else +pAction( p ) +ActionNode *p; +#endif +{ + require(p!=NULL, "pAction: NULL node"); + require(p->ntype==nAction, "pAction: not action node"); + + PRINT(p->next); +} + + /* F i l l F o l l o w L i s t s */ + +/* + * Search all rules for all rule reference nodes, q to rule, r. + * Add q->next to follow list dangling off of rule r. + * i.e. + * + * r: -o-R-o-->o--> Ptr to node following rule r in another rule + * | + * o--> Ptr to node following another reference to r. + * + * This is the data structure employed to avoid FOLLOW set computation. We + * simply compute the FIRST (reach) of the EndRule Node which follows the + * list found at the end of all rules which are referenced elsewhere. Rules + * not invoked by other rules have no follow list (r->end->p1==NULL). + * Generally, only start symbols are not invoked by another rule. + * + * Note that this mechanism also gives a free cross-reference mechanism. + * + * The entire syntax diagram is layed out like this: + * + * SynDiag + * | + * v + * o-->R1--o + * | + * o-->R2--o + * | + * ... + * | + * o-->Rn--o + * + */ +void +#ifdef __STDC__ +FoLink( Node *p ) +#else +FoLink( p ) +Node *p; +#endif +{ + RuleEntry *q; + Junction *j = (Junction *) p; + RuleRefNode *r = (RuleRefNode *) p; + + if ( p==NULL ) return; + require(p->ntype>=1 && p->ntype<=NumNodeTypes, + eMsgd("FoLink: invalid diagram node: ntype==%d",p->ntype)); + switch ( p->ntype ) + { + case nJunction : + if ( j->fvisited ) return; + if ( j->jtype == EndRule ) return; + j->fvisited = TRUE; + FoLink( j->p1 ); + FoLink( j->p2 ); + return; + case nRuleRef : + if ( r->linked ) return; + q = (RuleEntry *) hash_get(Rname, r->text); + if ( q == NULL ) + { + warnFL( eMsg1("rule %s not defined",r->text), FileStr[r->file], r->line ); + } + else + { + if ( r->parms!=NULL && RulePtr[q->rulenum]->pdecl==NULL ) + { + warnFL( eMsg1("rule %s accepts no parameter(s)", r->text), + FileStr[r->file], r->line ); + } + if ( r->parms==NULL && RulePtr[q->rulenum]->pdecl!=NULL ) + { + warnFL( eMsg1("rule %s requires parameter(s)", r->text), + FileStr[r->file], r->line ); + } + if ( r->assign!=NULL && RulePtr[q->rulenum]->ret==NULL ) + { + warnFL( eMsg1("rule %s yields no return value(s)", r->text), + FileStr[r->file], r->line ); + } + if ( r->assign==NULL && RulePtr[q->rulenum]->ret!=NULL ) + { + warnFL( eMsg1("rule %s returns a value(s)", r->text), + FileStr[r->file], r->line ); + } + if ( !r->linked ) + { + addFoLink( r->next, r->rname, RulePtr[q->rulenum] ); + r->linked = TRUE; + } + } + FoLink( r->next ); + return; + case nToken : + FoLink( ((TokNode *)p)->next ); + return; + case nAction : + FoLink( ((ActionNode *)p)->next ); + return; + default : + fatal_internal("invalid node type"); + } +} + +/* + * Add a reference to the end of a rule. + * + * 'r' points to the RuleBlk node in a rule. r->end points to the last node + * (EndRule jtype) in a rule. + * + * Initial: + * r->end --> o + * + * After: + * r->end --> o-->o--> Ptr to node following rule r in another rule + * | + * o--> Ptr to node following another reference to r. + * + * Note that the links are added to the head of the list so that r->end->p1 + * always points to the most recently added follow-link. At the end, it should + * point to the last reference found in the grammar (starting from the 1st rule). + */ +void +#ifdef __STDC__ +addFoLink( Node *p, char *rname, Junction *r ) +#else +addFoLink( p, rname, r ) +Node *p; +char *rname; +Junction *r; +#endif +{ + Junction *j; + require(r!=NULL, "addFoLink: incorrect rule graph"); + require(r->end!=NULL, "addFoLink: incorrect rule graph"); + require(r->end->jtype==EndRule, "addFoLink: incorrect rule graph"); + require(p!=NULL, "addFoLink: NULL FOLLOW link"); + + j = newJunction(); + j->rname = rname; /* rname on follow links point to target rule */ + j->p1 = p; /* link to other rule */ + j->p2 = (Node *) r->end->p1;/* point to head of list */ + r->end->p1 = (Node *) j; /* reset head to point to new node */ +} + +void +#ifdef __STDC__ +GenCrossRef( Junction *p ) +#else +GenCrossRef( p ) +Junction *p; +#endif +{ + set a; + Junction *j; + RuleEntry *q; + unsigned e; + require(p!=NULL, "GenCrossRef: why are you passing me a null grammar?"); + + printf("Cross Reference:\n\n"); + a = empty; + for (; p!=NULL; p = (Junction *)p->p2) + { + printf("Rule %11s referenced by {", p->rname); + /* make a set of rules for uniqueness */ + for (j = (Junction *)(p->end)->p1; j!=NULL; j = (Junction *)j->p2) + { + q = (RuleEntry *) hash_get(Rname, j->rname); + require(q!=NULL, "GenCrossRef: FoLinks are screwed up"); + set_orel(q->rulenum, &a); + } + for (; !set_nil(a); set_rm(e, a)) + { + e = set_int(a); + printf(" %s", RulePtr[e]->rname); + } + printf(" }\n"); + } + set_free( a ); +} diff --git a/antlr/pred.c b/antlr/pred.c new file mode 100755 index 0000000..0c32795 --- /dev/null +++ b/antlr/pred.c @@ -0,0 +1,607 @@ +/* + * pred.c -- source for predicate detection, manipulation + * + * $Id: pred.c,v 1.6 95/09/26 12:58:44 parrt Exp $ + * $Revision: 1.6 $ + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#ifdef __cplusplus +#ifndef __STDC__ +#define __STDC__ +#endif +#endif +#include "set.h" +#include "syn.h" +#include "hash.h" +#include "generic.h" +#include "dlgdef.h" +#include + +#ifdef __STDC__ +static void complete_context_sets(RuleRefNode *, Predicate *); +static void complete_context_trees(RuleRefNode *, Predicate *); +#else +static void complete_context_sets(); +static void complete_context_trees(); +#endif + +static Predicate pred_empty = { + NULL,NULL,NULL,NULL,NULL,NULL,0, + {set_init,set_init},set_init +}; + +char *PRED_AND_LIST = "AND"; +char *PRED_OR_LIST = "OR"; + +/* + * In C mode, return the largest constant integer found as the + * sole argument to LATEXT(i). + * + * In C++ mode, return the largest constant integer found as the + * sole argument to LT(i) given that the char before is nonalpha. + */ +static int +#ifdef __STDC__ +predicateLookaheadDepth(ActionNode *a) +#else +predicateLookaheadDepth(a) +ActionNode *a; +#endif +{ + int max_k=0; + + if ( GenCC ) + { + /* scan for LT(i) */ + int k = 0; + char *p = a->action; + while ( p!=NULL ) + { + p = strstr(p, "LT("); + if ( p!=NULL ) + { + if ( p>=a->action && !isalpha(*(p-1)) ) + { + k = atoi(p+strlen("LT(")); + if ( k>max_k ) max_k=k; + } + p += strlen("LT("); + } + } + } + else { + /* scan for LATEXT(i) */ + int k = 0; + char *p = a->action; + while ( p!=NULL ) + { + p = strstr(p, "LATEXT("); + if ( p!=NULL ) + { + p += strlen("LATEXT("); + k = atoi(p); + if ( k>max_k ) max_k=k; + } + } + } + + if ( max_k==0 ) + { + if ( !a->frmwarned ) + { + a->frmwarned = 1; + warnFL(eMsg1("predicate: %s missing, bad, or with i=0; assuming i=1", + GenCC?"LT(i)":"LATEXT(i)"), + FileStr[a->file], a->line); + } + max_k = 1; + } + + return max_k; +} + +/* Find all predicates in a block of alternatives. DO NOT find predicates + * behind the block because that predicate could depend on things set in + * one of the nonoptional blocks + */ +Predicate * +#ifdef __STDC__ +find_in_aSubBlk( Junction *alt ) +#else +find_in_aSubBlk( alt ) +Junction *alt; +#endif +{ + Predicate *a, *head=NULL, *tail, *root=NULL; + Junction *p = alt; + + for (; p!=NULL; p=(Junction *)p->p2) + { + /* ignore empty alts */ + if ( p->p1->ntype != nJunction || + ((Junction *)p->p1)->jtype != EndBlk ) + { + a = find_predicates(p->p1); /* get preds for this alt */ + if ( a==NULL ) continue; + + /* make an OR list of predicates */ + if ( head==NULL ) + { + root = new_pred(); + root->expr = PRED_OR_LIST; + head = tail = a; + root->down = head; + } + else { + tail->right = a; + a->left = tail; + a->up = tail->up; + tail = a; + } + } + } + + /* if just one pred, remove OR root */ + if ( root!=NULL && root->down->right == NULL ) + { + Predicate *d = root->down; + free(root); + return d; + } + + return root; +} + +Predicate * +#ifdef __STDC__ +find_in_aOptBlk( Junction *alt ) +#else +find_in_aOptBlk( alt ) +Junction *alt; +#endif +{ + return find_in_aSubBlk( alt ); +} + +Predicate * +#ifdef __STDC__ +find_in_aLoopBegin( Junction *alt ) +#else +find_in_aLoopBegin( alt ) +Junction *alt; +#endif +{ + return find_in_aSubBlk( (Junction *) alt->p1 ); /* get preds in alts */ +} + +Predicate * +#ifdef __STDC__ +find_in_aPlusBlk( Junction *alt ) +#else +find_in_aPlusBlk( alt ) +Junction *alt; +#endif +{ + require(alt!=NULL&&alt->p2!=NULL, "invalid aPlusBlk"); + return find_in_aSubBlk( alt ); +} + +/* Look for a predicate; + * + * Do not pass anything but Junction nodes; no Actions, Tokens, RuleRefs. + * This means that a "hoisting distance" of zero is the only distance + * allowable. Init actions are ignored. + * + * WARNING: + * Assumes no (..)? block after predicate for the moment. + * Does not check to see if pred is in production that can generate + * a sequence contained in the set of ambiguous tuples. + * + * Return the predicate found if any. + */ +Predicate * +#ifdef __STDC__ +find_predicates( Node *alt ) +#else +find_predicates( alt ) +Node *alt; +#endif +{ +#ifdef DBG_PRED + Junction *j; + RuleRefNode *r; + TokNode *t; +#endif + Predicate *pred; + + if ( alt==NULL ) return NULL; + +#ifdef DBG_PRED + switch ( alt->ntype ) + { + case nJunction : + j = (Junction *) alt; + fprintf(stderr, "Junction(in %s)", j->rname); + switch ( j->jtype ) + { + case aSubBlk : + fprintf(stderr,"aSubBlk\n"); + break; + case aOptBlk : + fprintf(stderr,"aOptBlk\n"); + break; + case aLoopBegin : + fprintf(stderr,"aLoopBeginBlk\n"); + break; + case aLoopBlk : + fprintf(stderr,"aLoopBlk\n"); + break; + case aPlusBlk : + fprintf(stderr,"aPlusBlk\n"); + break; + case EndBlk : + fprintf(stderr,"EndBlk\n"); + break; + case RuleBlk : + fprintf(stderr,"RuleBlk\n"); + break; + case Generic : + fprintf(stderr,"Generic\n"); + break; + case EndRule : + fprintf(stderr,"EndRule\n"); + break; + } + break; + case nRuleRef : + r = (RuleRefNode *) alt; + fprintf(stderr, "RuleRef(in %s)\n", r->rname); + break; + case nToken : + t = (TokNode *) alt; + fprintf(stderr, "TokenNode(in %s)%s\n", t->rname, TokenString(t->token)); + break; + case nAction : + fprintf(stderr, "Action\n"); + break; + } +#endif + + switch ( alt->ntype ) + { + case nJunction : + { + Predicate *a, *b; + Junction *p = (Junction *) alt; + + /* lock nodes */ + if ( p->jtype==aLoopBlk || p->jtype==RuleBlk || + p->jtype==aPlusBlk || p->jtype==EndRule ) + { + require(p->pred_lock!=NULL, "rJunc: lock array is NULL"); + if ( p->pred_lock[1] ) + { + return NULL; + } + p->pred_lock[1] = TRUE; + } + + switch ( p->jtype ) + { + case aSubBlk : + a = find_in_aSubBlk(p); + return a; /* nothing is visible past this guy */ + case aOptBlk : + a = find_in_aOptBlk(p); + return a; + case aLoopBegin : + a = find_in_aLoopBegin(p); + return a; + case aLoopBlk : + a = find_in_aSubBlk(p); + p->pred_lock[1] = FALSE; + return a; + case aPlusBlk : + a = find_in_aPlusBlk(p); + p->pred_lock[1] = FALSE; + return a; /* nothing is visible past this guy */ + case RuleBlk : + a = find_predicates(p->p1); + p->pred_lock[1] = FALSE; + return a; + case Generic : + a = find_predicates(p->p1); + b = find_predicates(p->p2); + if ( p->pred_lock!=NULL ) p->pred_lock[1] = FALSE; + if ( a==NULL ) return b; + if ( b==NULL ) return a; + /* otherwise OR the two preds together */ + { + fatal_internal("hit unknown situation during predicate hoisting"); + } + case EndBlk : + case EndRule : /* Find no predicates after a rule ref */ + return NULL; + default: + fatal_internal("this cannot be printed\n"); + break; + } + } + case nAction : + { + ActionNode *p = (ActionNode *) alt; + if ( p->init_action ) return find_predicates(p->next); + if ( p->is_predicate ) + { + Tree *t; +#ifdef DBG_PRED + fprintf(stderr, "predicate: <<%s>>?\n", p->action); +#endif + if ( p->guardpred!=NULL ) + { + pred = p->guardpred; + } + else + { + pred = new_pred(); + pred->k = predicateLookaheadDepth(p); + pred->source = p; + pred->expr = p->action; + if ( HoistPredicateContext && pred->k > 1 ) + { + if ( first_item_is_guess_block((Junction *)p->next) ) + { + warnFL("cannot compute context of predicate in front of (..)? block", FileStr[p->file], p->line); + } + else + { + ConstrainSearch = 0; + TRAV(p->next, + pred->k, + &(pred->completion), t); + pred->tcontext = t; +#ifdef DBG_PRED + fprintf(stderr, "LL(%d) context:", pred->k); + preorder(t); + fprintf(stderr, "\n"); +#endif + } + } + else if ( HoistPredicateContext && pred->k == 1 ) + { + pred->scontext[1] = empty; + if ( first_item_is_guess_block((Junction *)p->next) ) + { + warnFL("cannot compute context of predicate in front of (..)? block", FileStr[p->file], p->line); + } + else + { + REACH((Junction *)p->next, + 1, + &(pred->completion), + pred->scontext[1]); +#ifdef DBG_PRED + fprintf(stderr, "LL(1) context:"); + s_fprT(stderr, pred->scontext[1]); + fprintf(stderr, "\n"); +#endif + } + } + } + { + Predicate *d = find_predicates(p->next), *root; +/* Warning: Doesn't seem like the up pointers will all be set correctly; + * TJP: that's ok, we're not using them now. + */ + if ( d!=NULL ) + { + root = new_pred(); + root->expr = PRED_AND_LIST; + root->down = pred; + pred->right = d; + pred->up = root; + d->left = pred; + d->up = pred->up; + return root; + } + } + return pred; + } + return NULL; + } + case nRuleRef : + { + Predicate *a; + RuleRefNode *p = (RuleRefNode *) alt; + Junction *r; + int save_halt; + RuleEntry *q = (RuleEntry *) hash_get(Rname, p->text); + if ( q == NULL ) + { + warnFL( eMsg1("rule %s not defined",p->text), FileStr[p->file], p->line ); + return NULL; + } + r = RulePtr[q->rulenum]; + if ( r->pred_lock[1] ) + { + /* infinite left-recursion; ignore 'cause LL sup 1 (k) analysis + * must have seen it earlier. + */ + return NULL; + } + save_halt = r->end->halt; + r->end->halt = TRUE; +/* a = find_predicates((Node *)r->p1);*/ + a = find_predicates((Node *)r); + r->end->halt = save_halt; + if ( a==NULL ) return NULL; + /* attempt to compute the "local" FOLLOW just like in normal lookahead + * computation if needed + */ + complete_context_sets(p,a); + complete_context_trees(p,a); + return a; + } + case nToken : + break; + } + + return NULL; +} + +Predicate * +#ifdef __STDC__ +new_pred( void ) +#else +new_pred( ) +#endif +{ + Predicate *p = (Predicate *) malloc(sizeof(Predicate)); + require(p!=NULL, "new_pred: cannot alloc predicate"); + *p = pred_empty; + return p; +} + +static void +#ifdef __STDC__ +complete_context_sets( RuleRefNode *p, Predicate *a ) +#else +complete_context_sets( p, a ) +RuleRefNode *p; +Predicate *a; +#endif +{ + set rk2, b; + int k2; + +#ifdef DBG_PRED + fprintf(stderr, "enter complete_context_sets\n"); +#endif + for (; a!=NULL; a=a->right) + { + if ( a->expr == PRED_AND_LIST || a->expr == PRED_OR_LIST ) + { + complete_context_sets(p, a->down); + continue; + } + rk2 = b = empty; + while ( !set_nil(a->completion) ) + { + k2 = set_int(a->completion); + set_rm(k2, a->completion); + REACH(p->next, k2, &rk2, b); + set_orin(&(a->scontext[1]), b); + set_free(b); + } + set_orin(&(a->completion), rk2);/* remember what we couldn't do */ + set_free(rk2); +#ifdef DBG_PRED + fprintf(stderr, "LL(1) context for %s(addr 0x%x) after ruleref:", a->expr, a); + s_fprT(stderr, a->scontext[1]); + fprintf(stderr, "\n"); +#endif +/* complete_context_sets(p, a->down);*/ + } +#ifdef DBG_PRED + fprintf(stderr, "exit complete_context_sets\n"); +#endif +} + +static void +#ifdef __STDC__ +complete_context_trees( RuleRefNode *p, Predicate *a ) +#else +complete_context_trees( p, a ) +RuleRefNode *p; +Predicate *a; +#endif +{ + set rk2; + int k2; + Tree *u; + +#ifdef DBG_PRED + fprintf(stderr, "enter complete_context_trees\n"); +#endif + for (; a!=NULL; a=a->right) + { + if ( a->expr == PRED_AND_LIST || a->expr == PRED_OR_LIST ) + { + complete_context_trees(p, a->down); + continue; + } + rk2 = empty; + /* any k left to do? if so, link onto tree */ + while ( !set_nil(a->completion) ) + { + k2 = set_int(a->completion); + set_rm(k2, a->completion); + u = NULL; + TRAV(p->next, k2, &rk2, u); + /* any subtrees missing k2 tokens, add u onto end */ + a->tcontext = tlink(a->tcontext, u, k2); + } + set_orin(&(a->completion), rk2);/* remember what we couldn't do */ + set_free(rk2); +#ifdef DBG_PRED + fprintf(stderr, "LL(i<%d) context after ruleref:", LL_k); + preorder(a->tcontext); + fprintf(stderr, "\n"); +#endif +/* complete_context_trees(p, a->down);*/ + } +#ifdef DBG_PRED + fprintf(stderr, "exit complete_context_trees\n"); +#endif +} + +/* Walk a list of predicates and return the set of all tokens in scontext[1]'s */ +set +#ifdef __STDC__ +covered_set( Predicate *p ) +#else +covered_set( p ) +Predicate *p; +#endif +{ + set a; + + a = empty; + for (; p!=NULL; p=p->right) + { + if ( p->expr == PRED_AND_LIST || p->expr == PRED_OR_LIST ) + { + set_orin(&a, covered_set(p->down)); + continue; + } + set_orin(&a, p->scontext[1]); + set_orin(&a, covered_set(p->down)); + } + return a; +} -- 2.40.0