/* * bits.c -- manage creation and output of bit sets used by the parser. * * $Id: bits.c,v 1.3 95/09/26 12:58:38 parrt Exp $ * $Revision: 1.3 $ * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * ANTLR 1.33 * Terence Parr * Parr Research Corporation * with Purdue University and AHPCRC, University of Minnesota * 1989-1995 */ #include #include #ifdef __cplusplus #ifndef __STDC__ #define __STDC__ #endif #endif #include "set.h" #include "syn.h" #include "hash.h" #include "generic.h" #include "dlgdef.h" /* char is only thing that is pretty much always known == 8 bits * This allows output of antlr (set stuff, anyway) to be androgynous (portable) */ typedef unsigned char SetWordType; #define BitsPerByte 8 #define BitsPerWord BitsPerByte*sizeof(SetWordType) static SetWordType *setwd = NULL; int setnum = -1; int wordnum = 0; int esetnum = 0; /* Used to convert native wordsize, which ANTLR uses (via set.c) to manipulate sets, to bytes that are most portable size-wise. */ void #ifdef __STDC__ DumpIntAsChars( FILE *f, char *format, unsigned wd ) #else DumpIntAsChars( f, format, wd ) FILE *f; char *format; unsigned wd; #endif { int i; /* uses max of 32 bit unsigned integer for the moment */ static unsigned long byte_mask[sizeof(unsigned long)] = { 0xFF, 0xFF00, 0xFF0000, 0xFF000000 }; /* 0xFF00000000, 0xFF0000000000, 0xFF000000000000, 0xFF00000000000000 };*/ /* for each byte in the word */ for (i=0; i>(i*BitsPerByte)); if ( itok))); return empty; } r = RulePtr[q->rulenum]; r->end->halt = TRUE; /* don't let reach fall off end of rule here */ rk = empty; REACH(r, 1, &rk, a); r->end->halt = FALSE; return a; } /* * scan the list of tokens/eclasses/nonterminals filling the new eclass * with the set described by the list. Note that an eclass can be * quoted to allow spaces etc... However, an eclass must not conflict * with a reg expr found elsewhere. The reg expr will be taken over * the eclass name. */ static void #ifdef __STDC__ doEclass( char *eclass ) #else doEclass( eclass ) char *eclass; #endif { TermEntry *q; ECnode *p; ListNode *e; unsigned int t; unsigned deg=0; set a; require(eclass!=NULL, "doEclass: NULL eset"); p = (ECnode *) eclass; lexmode(p->lexclass); /* switch to lexclass where errclass is defined */ p->eset = empty; for (e = (p->elist)->next; e!=NULL; e=e->next) { if ( islower( *((char *)e->elem) ) ) /* is it a rule ref? (alias FIRST request) */ { a = Efirst((char *)e->elem, p); set_orin(&p->eset, a); deg += set_deg(a); set_free( a ); continue; } else if ( *((char *)e->elem)=='"' ) { t = 0; q = (TermEntry *) hash_get(Texpr, (char *) e->elem); if ( q == NULL ) { /* if quoted and not an expr look for eclass name */ q = (TermEntry *) hash_get(Tname, *((char **)&(e->elem))=StripQuotes((char *)e->elem)); if ( q != NULL ) t = q->token; } else t = q->token; } else /* labelled token/eclass/tokclass */ { q = (TermEntry *) hash_get(Tname, (char *)e->elem); if ( q != NULL ) { if ( strcmp((char *)e->elem, TokenString(p->tok))==0 ) { warnNoFL(eMsg1("self-referential error class '%s'; ignored", (char *)e->elem)); continue; } else t = q->token; } else t=0; } if ( t!=0 ) { set_orel(t, &p->eset); deg++; } else warnNoFL(eMsg2("undefined token '%s' referenced in errclass '%s'; ignored", (char *)e->elem, TokenString(p->tok))); } p->setdeg = deg; } void #ifdef __STDC__ ComputeErrorSets( void ) #else ComputeErrorSets( ) #endif { #ifdef __cplusplus list_apply(eclasses, (void (*)(void *)) doEclass); #else #ifdef __STDC__ list_apply(eclasses, (void (*)(void *)) doEclass); #else list_apply(eclasses, doEclass); #endif #endif } void #ifdef __STDC__ ComputeTokSets( void ) #else ComputeTokSets( ) #endif { ListNode *t, *e = NULL; int something_changed; TCnode *p; TermEntry *q; if ( tclasses == NULL ) return; /* turn lists of token/tokclass references into sets */ for (t = tclasses->next; t!=NULL; t=t->next) { p = (TCnode *) t->elem; /* if wild card, then won't have entries in tclass, assume all_tokens */ if ( p->tok == WildCardToken ) { p->tset = set_dup(all_tokens); continue; } lexmode(p->lexclass); /* switch to lexclass where tokclass is defined */ p->tset = empty; /* instantiate all tokens/token_classes into the tset */ for (e = (p->tlist)->next; e!=NULL; e=e->next) { char *tokstr; tokstr = (char *)e->elem; if ( *tokstr == '"' ) q = (TermEntry *) hash_get(Texpr, tokstr); else q = (TermEntry *) hash_get(Tname, tokstr); require(q!=NULL, "ComputeTokSets: no token def"); set_orel(q->token, &p->tset); } } /* Go thru list of tokclasses again looking for tokclasses in sets */ again: something_changed = 0; for (t = tclasses->next; t!=NULL; t=t->next) { set tcl; p = (TCnode *) t->elem; tcl = set_and(p->tset, tokclasses); if ( !set_nil(tcl) ) { int tk; /* replace refs to tokclasses with the associated set of tokens */ something_changed = 1; while ( !set_nil(tcl) ) { tk = set_int(tcl); /* grab one of the tok class refs */ set_rm(tk, tcl); if ( p->tok != tk ) /* tokclass ref to yourself? */ { q = (TermEntry *) hash_get(Tname, TokenString(tk)); require(q!=NULL, "#tokclass not in hash table"); set_orin(&p->tset, q->tclass->tset); } set_rm(tk, p->tset); /* remove ref that we replaced */ } } set_free(tcl); } if ( something_changed ) goto again; } void DumpRemainingTokSets() { TCnode *p; ListNode *t; /* Go thru tclasses (for the last time) and dump the sets not dumped * during code gen; yes, this is a bogus way to do this, but ComputeTokSets() * can't dump the defs as the error file and tok file has not been created * yet etc... */ if ( tclasses==NULL ) return; for (t = tclasses->next; t!=NULL; t=t->next) { unsigned e; p = (TCnode *) t->elem; if ( p->dumped ) continue; e = DefErrSet(&(p->tset), 0, TokenString(p->tok)); p->dumped = 1; p->setnum = e; } } /* replace a subset of an error set with an error class name if a subset is found * repeat process until no replacements made */ void #ifdef __STDC__ SubstErrorClass( set *f ) #else SubstErrorClass( f ) set *f; #endif { int max, done = 0; ListNode *p; ECnode *ec, *maxclass = NULL; set a; require(f!=NULL, "SubstErrorClass: NULL eset"); if ( eclasses == NULL ) return; while ( !done ) { max = 0; maxclass = NULL; for (p=eclasses->next; p!=NULL; p=p->next) /* chk all error classes */ { ec = (ECnode *) p->elem; if ( ec->setdeg > max ) { if ( set_sub(ec->eset, *f) || set_equ(ec->eset, *f) ) {maxclass = ec; max=ec->setdeg;} } } if ( maxclass != NULL ) /* if subset found, replace with token */ { a = set_dif(*f, maxclass->eset); set_orel((unsigned)maxclass->tok, &a); set_free(*f); *f = a; } else done = 1; } } int #ifdef __STDC__ DefErrSet( set *f, int subst, char *name ) #else DefErrSet( f, subst, name ) set *f; int subst; /* should be substitute error classes? */ char *name; #endif { if ( GenCC ) return DefErrSetForCC( f, subst, name ); else return DefErrSetForC( f, subst, name ); } /* Define a new error set. WARNING...set-implementation dependent. */ int #ifdef __STDC__ DefErrSetForC( set *f, int subst, char *name ) #else DefErrSetForC( f, subst, name ) set *f; int subst; /* should be substitute error classes? */ char *name; #endif { unsigned *p, *endp; int e=1; require(!set_nil(*f), "DefErrSet: nil set to dump?"); if ( subst ) SubstErrorClass(f); p = f->setword; endp = &(f->setword[f->n]); esetnum++; if ( name!=NULL ) fprintf(DefFile, "extern SetWordType %s_set[];\n", name); else fprintf(DefFile, "extern SetWordType zzerr%d[];\n", esetnum); if ( name!=NULL ) { fprintf(ErrFile, "SetWordType %s_set[%d] = {", name, NumWords(TokenNum-1)*sizeof(unsigned)); } else { fprintf(ErrFile, "SetWordType zzerr%d[%d] = {", esetnum, NumWords(TokenNum-1)*sizeof(unsigned)); } while ( p < endp ) { if ( e > 1 ) fprintf(ErrFile, ", "); DumpIntAsChars(ErrFile, "0x%x", *p++); if ( e == 3 ) { DAWDLE; if ( p < endp ) fprintf(ErrFile, ","); fprintf(ErrFile, "\n\t"); e=1; } else e++; } fprintf(ErrFile, "};\n"); return esetnum; } /* Define a new error set. WARNING...set-implementation dependent; * Only used when -CC on. */ int #ifdef __STDC__ DefErrSetForCC( set *f, int subst, char *name ) #else DefErrSetForCC( f, subst, name ) set *f; int subst; /* should be substitute error classes? */ char *name; #endif { unsigned *p, *endp; int e=1; require(!set_nil(*f), "DefErrSet: nil set to dump?"); if ( subst ) SubstErrorClass(f); p = f->setword; endp = &(f->setword[f->n]); esetnum++; if ( name!=NULL ) { fprintf(Parser_h, "\tstatic SetWordType %s_set[%d];\n", name, NumWords(TokenNum-1)*sizeof(unsigned)); fprintf(Parser_c, "SetWordType %s::%s_set[%d] = {", CurrentClassName, name, NumWords(TokenNum-1)*sizeof(unsigned)); } else { fprintf(Parser_c, "SetWordType %s::err%d[%d] = {", CurrentClassName, esetnum, NumWords(TokenNum-1)*sizeof(unsigned)); fprintf(Parser_h, "\tstatic SetWordType err%d[%d];\n", esetnum, NumWords(TokenNum-1)*sizeof(unsigned)); } while ( p < endp ) { if ( e > 1 ) fprintf(Parser_c, ", "); DumpIntAsChars(Parser_c, "0x%x", *p++); if ( e == 3 ) { if ( p < endp ) fprintf(Parser_c, ","); fprintf(Parser_c, "\n\t"); e=1; } else e++; } fprintf(Parser_c, "};\n"); return esetnum; } void #ifdef __STDC__ GenParser_c_Hdr(void) #else GenParser_c_Hdr() #endif { int i,j; fprintf(Parser_c, "/*\n"); fprintf(Parser_c, " * %s: P a r s e r S u p p o r t\n", CurrentClassName); fprintf(Parser_c, " *\n"); fprintf(Parser_c, " * Generated from:"); for (i=0; i\n"); fprintf(Parser_c, "#define ANTLR_VERSION %s\n", VersionDef); fprintf(Parser_c, "#define ANTLR_SUPPORT_CODE\n"); if ( UserTokenDefsFile != NULL ) fprintf(Parser_c, "#include %s\n", UserTokenDefsFile); else fprintf(Parser_c, "#include \"%s\"\n", DefFileName); fprintf(Parser_c, "#include \"%s.h\"\n", CurrentClassName); /* Dump a Parser::tokens for each automaton */ fprintf(Parser_c, "\nANTLRChar *%s::_token_tbl[]={\n", CurrentClassName); fprintf(Parser_c, "\t/* 00 */\t\"Invalid\""); for (i=1; i=LastTokenCounted ) { fprintf(Parser_c, ",\n\t/* %02d */\t\"invalid\"", i); continue; } if ( TokenString(i) != NULL ) fprintf(Parser_c, ",\n\t/* %02d */\t\"%s\"", i, TokenString(i)); else { /* look in all lexclasses for the reg expr */ for (j=0; j=NumLexClasses ) { if ( UserDefdTokens ) { fprintf(Parser_c, ",\n\t/* %02d */\t\"\"", i); } else fatal_internal(eMsgd("No label or expr for token %d",i)); } } } fprintf(Parser_c, "\n};\n"); /* Build constructors */ fprintf(Parser_c, "\n%s::", CurrentClassName); fprintf(Parser_c, "%s(ANTLRTokenBuffer *input) : ANTLRParser(input,%d,%d,%d,%d)\n", CurrentClassName, OutputLL_k, FoundGuessBlk, DemandLookahead, NumWords(TokenNum-1)*sizeof(unsigned)); fprintf(Parser_c, "{\n"); fprintf(Parser_c, "\ttoken_tbl = _token_tbl;\n"); fprintf(Parser_c, "}\n\n"); } void #ifdef __STDC__ GenParser_h_Hdr(void) #else GenParser_h_Hdr() #endif { int i; fprintf(Parser_h, "/*\n"); fprintf(Parser_h, " * %s: P a r s e r H e a d e r \n", CurrentClassName); fprintf(Parser_h, " *\n"); fprintf(Parser_h, " * Generated from:"); for (i=0; i\n"); fprintf(ErrFile, "#define ANTLR_VERSION %s\n", VersionDef); if ( strcmp(ParserName, DefaultParserName)!=0 ) fprintf(ErrFile, "#define %s %s\n", DefaultParserName, ParserName); if ( strcmp(ParserName, DefaultParserName)!=0 ) fprintf(ErrFile, "#include \"%s\"\n", RemapFileName); if ( HdrAction != NULL ) dumpAction( HdrAction, ErrFile, 0, -1, 0, 1 ); if ( FoundGuessBlk ) { fprintf(ErrFile, "#define ZZCAN_GUESS\n"); fprintf(ErrFile, "#include \n"); } if ( OutputLL_k > 1 ) fprintf(ErrFile, "#define LL_K %d\n", OutputLL_k); #ifdef DUM if ( LexGen ) fprintf(ErrFile, "#define zzEOF_TOKEN %d\n", (TokenInd!=NULL?TokenInd[EofToken]:EofToken)); #endif fprintf(ErrFile, "#define zzSET_SIZE %d\n", NumWords(TokenNum-1)*sizeof(unsigned)); if ( DemandLookahead ) fprintf(ErrFile, "#define DEMAND_LOOK\n"); fprintf(ErrFile, "#include \"antlr.h\"\n"); if ( GenAST ) fprintf(ErrFile, "#include \"ast.h\"\n"); if ( UserDefdTokens ) fprintf(ErrFile, "#include %s\n", UserTokenDefsFile); /* still need this one as it has the func prototypes */ fprintf(ErrFile, "#include \"%s\"\n", DefFileName); fprintf(ErrFile, "#include \"dlgdef.h\"\n"); fprintf(ErrFile, "#include \"err.h\"\n\n"); /* Dump a zztokens for each automaton */ if ( strcmp(ParserName, DefaultParserName)!=0 ) { fprintf(ErrFile, "ANTLRChar *%s_zztokens[%d]={\n", ParserName, TokenNum-1); } else { fprintf(ErrFile, "ANTLRChar *zztokens[%d]={\n", TokenNum-1); } fprintf(ErrFile, "\t/* 00 */\t\"Invalid\""); for (i=1; i=LastTokenCounted ) { fprintf(ErrFile, ",\n\t/* %02d */\t\"invalid\"", i); continue; } if ( TokenString(i) != NULL ) fprintf(ErrFile, ",\n\t/* %02d */\t\"%s\"", i, TokenString(i)); else { /* look in all lexclasses for the reg expr */ for (j=0; j=NumLexClasses ) { if ( UserDefdTokens ) { fprintf(ErrFile, ",\n\t/* %02d */\t\"\"", i); } else fatal_internal(eMsgd("No label or expr for token %d",i)); } } } fprintf(ErrFile, "\n};\n"); } void #ifdef __STDC__ dumpExpr( FILE *f, char *e ) #else dumpExpr( f, e ) FILE *f; char *e; #endif { while ( *e!='\0' ) { if ( *e=='\\' && *(e+1)=='\\' ) {putc('\\', f); putc('\\', f); e+=2;} else if ( *e=='\\' && *(e+1)=='"' ) {putc('\\', f); putc('"', f); e+=2;} else if ( *e=='\\' ) {putc('\\', f); putc('\\', f); e++;} else {putc(*e, f); e++;} } }