X-Git-Url: https://pd.if.org/git/?p=pccts;a=blobdiff_plain;f=antlr%2Fbits.c;fp=antlr%2Fbits.c;h=ba71f816af00a1802e325424f191da1c52e19083;hp=0000000000000000000000000000000000000000;hb=129ce0f1c9d43c04ed8198ac184bce8d8be0042e;hpb=b5b3c41d4e99ca613b441d68458aa3cd873aa417 diff --git a/antlr/bits.c b/antlr/bits.c new file mode 100755 index 0000000..ba71f81 --- /dev/null +++ b/antlr/bits.c @@ -0,0 +1,826 @@ +/* + * bits.c -- manage creation and output of bit sets used by the parser. + * + * $Id: bits.c,v 1.3 95/09/26 12:58:38 parrt Exp $ + * $Revision: 1.3 $ + * + * SOFTWARE RIGHTS + * + * We reserve no LEGAL rights to the Purdue Compiler Construction Tool + * Set (PCCTS) -- PCCTS is in the public domain. An individual or + * company may do whatever they wish with source code distributed with + * PCCTS or the code generated by PCCTS, including the incorporation of + * PCCTS, or its output, into commerical software. + * + * We encourage users to develop software with PCCTS. However, we do ask + * that credit is given to us for developing PCCTS. By "credit", + * we mean that if you incorporate our source code into one of your + * programs (commercial product, research project, or otherwise) that you + * acknowledge this fact somewhere in the documentation, research report, + * etc... If you like PCCTS and have developed a nice tool with the + * output, please mention that you developed it using PCCTS. In + * addition, we ask that this header remain intact in our source code. + * As long as these guidelines are kept, we expect to continue enhancing + * this system and expect to make other tools available as they are + * completed. + * + * ANTLR 1.33 + * Terence Parr + * Parr Research Corporation + * with Purdue University and AHPCRC, University of Minnesota + * 1989-1995 + */ +#include +#include +#ifdef __cplusplus +#ifndef __STDC__ +#define __STDC__ +#endif +#endif +#include "set.h" +#include "syn.h" +#include "hash.h" +#include "generic.h" +#include "dlgdef.h" + +/* char is only thing that is pretty much always known == 8 bits + * This allows output of antlr (set stuff, anyway) to be androgynous (portable) + */ +typedef unsigned char SetWordType; +#define BitsPerByte 8 +#define BitsPerWord BitsPerByte*sizeof(SetWordType) + +static SetWordType *setwd = NULL; +int setnum = -1; +int wordnum = 0; + +int esetnum = 0; + +/* Used to convert native wordsize, which ANTLR uses (via set.c) to manipulate sets, + to bytes that are most portable size-wise. + */ +void +#ifdef __STDC__ +DumpIntAsChars( FILE *f, char *format, unsigned wd ) +#else +DumpIntAsChars( f, format, wd ) +FILE *f; +char *format; +unsigned wd; +#endif +{ + int i; + /* uses max of 32 bit unsigned integer for the moment */ + static unsigned long byte_mask[sizeof(unsigned long)] = + { 0xFF, 0xFF00, 0xFF0000, 0xFF000000 }; +/* 0xFF00000000, 0xFF0000000000, 0xFF000000000000, 0xFF00000000000000 };*/ + + /* for each byte in the word */ + for (i=0; i>(i*BitsPerByte)); + if ( itok))); + return empty; + } + r = RulePtr[q->rulenum]; + r->end->halt = TRUE; /* don't let reach fall off end of rule here */ + rk = empty; + REACH(r, 1, &rk, a); + r->end->halt = FALSE; + return a; +} + +/* + * scan the list of tokens/eclasses/nonterminals filling the new eclass + * with the set described by the list. Note that an eclass can be + * quoted to allow spaces etc... However, an eclass must not conflict + * with a reg expr found elsewhere. The reg expr will be taken over + * the eclass name. + */ +static void +#ifdef __STDC__ +doEclass( char *eclass ) +#else +doEclass( eclass ) +char *eclass; +#endif +{ + TermEntry *q; + ECnode *p; + ListNode *e; + unsigned int t; + unsigned deg=0; + set a; + require(eclass!=NULL, "doEclass: NULL eset"); + + p = (ECnode *) eclass; + lexmode(p->lexclass); /* switch to lexclass where errclass is defined */ + p->eset = empty; + for (e = (p->elist)->next; e!=NULL; e=e->next) + { + if ( islower( *((char *)e->elem) ) ) /* is it a rule ref? (alias FIRST request) */ + { + a = Efirst((char *)e->elem, p); + set_orin(&p->eset, a); + deg += set_deg(a); + set_free( a ); + continue; + } + else if ( *((char *)e->elem)=='"' ) + { + t = 0; + q = (TermEntry *) hash_get(Texpr, (char *) e->elem); + if ( q == NULL ) + { + /* if quoted and not an expr look for eclass name */ + q = (TermEntry *) hash_get(Tname, *((char **)&(e->elem))=StripQuotes((char *)e->elem)); + if ( q != NULL ) t = q->token; + } + else t = q->token; + } + else /* labelled token/eclass/tokclass */ + { + q = (TermEntry *) hash_get(Tname, (char *)e->elem); + if ( q != NULL ) + { + if ( strcmp((char *)e->elem, TokenString(p->tok))==0 ) + { + warnNoFL(eMsg1("self-referential error class '%s'; ignored", + (char *)e->elem)); + continue; + } + else + t = q->token; + } + else t=0; + } + if ( t!=0 ) + { + set_orel(t, &p->eset); + deg++; + } + else warnNoFL(eMsg2("undefined token '%s' referenced in errclass '%s'; ignored", + (char *)e->elem, TokenString(p->tok))); + } + p->setdeg = deg; +} + +void +#ifdef __STDC__ +ComputeErrorSets( void ) +#else +ComputeErrorSets( ) +#endif +{ +#ifdef __cplusplus + list_apply(eclasses, (void (*)(void *)) doEclass); +#else +#ifdef __STDC__ + list_apply(eclasses, (void (*)(void *)) doEclass); +#else + list_apply(eclasses, doEclass); +#endif +#endif +} + +void +#ifdef __STDC__ +ComputeTokSets( void ) +#else +ComputeTokSets( ) +#endif +{ + ListNode *t, *e = NULL; + int something_changed; + TCnode *p; + TermEntry *q; + + if ( tclasses == NULL ) return; + + /* turn lists of token/tokclass references into sets */ + for (t = tclasses->next; t!=NULL; t=t->next) + { + p = (TCnode *) t->elem; + + /* if wild card, then won't have entries in tclass, assume all_tokens */ + if ( p->tok == WildCardToken ) + { + p->tset = set_dup(all_tokens); + continue; + } + + lexmode(p->lexclass); /* switch to lexclass where tokclass is defined */ + p->tset = empty; + + /* instantiate all tokens/token_classes into the tset */ + for (e = (p->tlist)->next; e!=NULL; e=e->next) + { + char *tokstr; + tokstr = (char *)e->elem; + if ( *tokstr == '"' ) q = (TermEntry *) hash_get(Texpr, tokstr); + else q = (TermEntry *) hash_get(Tname, tokstr); + require(q!=NULL, "ComputeTokSets: no token def"); + set_orel(q->token, &p->tset); + } + } + + /* Go thru list of tokclasses again looking for tokclasses in sets */ +again: + something_changed = 0; + for (t = tclasses->next; t!=NULL; t=t->next) + { + set tcl; + p = (TCnode *) t->elem; + tcl = set_and(p->tset, tokclasses); + if ( !set_nil(tcl) ) + { + int tk; + /* replace refs to tokclasses with the associated set of tokens */ + something_changed = 1; + while ( !set_nil(tcl) ) + { + tk = set_int(tcl); /* grab one of the tok class refs */ + set_rm(tk, tcl); + if ( p->tok != tk ) /* tokclass ref to yourself? */ + { + q = (TermEntry *) hash_get(Tname, TokenString(tk)); + require(q!=NULL, "#tokclass not in hash table"); + set_orin(&p->tset, q->tclass->tset); + } + set_rm(tk, p->tset); /* remove ref that we replaced */ + } + } + set_free(tcl); + } + if ( something_changed ) goto again; +} + +void +DumpRemainingTokSets() +{ + TCnode *p; + ListNode *t; + + /* Go thru tclasses (for the last time) and dump the sets not dumped + * during code gen; yes, this is a bogus way to do this, but ComputeTokSets() + * can't dump the defs as the error file and tok file has not been created + * yet etc... + */ + if ( tclasses==NULL ) return; + for (t = tclasses->next; t!=NULL; t=t->next) + { + unsigned e; + p = (TCnode *) t->elem; + if ( p->dumped ) continue; + e = DefErrSet(&(p->tset), 0, TokenString(p->tok)); + p->dumped = 1; + p->setnum = e; + } +} + + +/* replace a subset of an error set with an error class name if a subset is found + * repeat process until no replacements made + */ +void +#ifdef __STDC__ +SubstErrorClass( set *f ) +#else +SubstErrorClass( f ) +set *f; +#endif +{ + int max, done = 0; + ListNode *p; + ECnode *ec, *maxclass = NULL; + set a; + require(f!=NULL, "SubstErrorClass: NULL eset"); + + if ( eclasses == NULL ) return; + while ( !done ) + { + max = 0; + maxclass = NULL; + for (p=eclasses->next; p!=NULL; p=p->next) /* chk all error classes */ + { + ec = (ECnode *) p->elem; + if ( ec->setdeg > max ) + { + if ( set_sub(ec->eset, *f) || set_equ(ec->eset, *f) ) + {maxclass = ec; max=ec->setdeg;} + } + } + if ( maxclass != NULL ) /* if subset found, replace with token */ + { + a = set_dif(*f, maxclass->eset); + set_orel((unsigned)maxclass->tok, &a); + set_free(*f); + *f = a; + } + else done = 1; + } +} + +int +#ifdef __STDC__ +DefErrSet( set *f, int subst, char *name ) +#else +DefErrSet( f, subst, name ) +set *f; +int subst; /* should be substitute error classes? */ +char *name; +#endif +{ + if ( GenCC ) return DefErrSetForCC( f, subst, name ); + else return DefErrSetForC( f, subst, name ); +} + +/* Define a new error set. WARNING...set-implementation dependent. + */ +int +#ifdef __STDC__ +DefErrSetForC( set *f, int subst, char *name ) +#else +DefErrSetForC( f, subst, name ) +set *f; +int subst; /* should be substitute error classes? */ +char *name; +#endif +{ + unsigned *p, *endp; + int e=1; + require(!set_nil(*f), "DefErrSet: nil set to dump?"); + + if ( subst ) SubstErrorClass(f); + p = f->setword; + endp = &(f->setword[f->n]); + esetnum++; + if ( name!=NULL ) + fprintf(DefFile, "extern SetWordType %s_set[];\n", name); + else + fprintf(DefFile, "extern SetWordType zzerr%d[];\n", esetnum); + if ( name!=NULL ) { + fprintf(ErrFile, "SetWordType %s_set[%d] = {", + name, + NumWords(TokenNum-1)*sizeof(unsigned)); + } + else { + fprintf(ErrFile, "SetWordType zzerr%d[%d] = {", + esetnum, + NumWords(TokenNum-1)*sizeof(unsigned)); + } + while ( p < endp ) + { + if ( e > 1 ) fprintf(ErrFile, ", "); + DumpIntAsChars(ErrFile, "0x%x", *p++); + if ( e == 3 ) + { + DAWDLE; + if ( p < endp ) fprintf(ErrFile, ","); + fprintf(ErrFile, "\n\t"); + e=1; + } + else e++; + } + fprintf(ErrFile, "};\n"); + + return esetnum; +} + +/* Define a new error set. WARNING...set-implementation dependent; + * Only used when -CC on. + */ +int +#ifdef __STDC__ +DefErrSetForCC( set *f, int subst, char *name ) +#else +DefErrSetForCC( f, subst, name ) +set *f; +int subst; /* should be substitute error classes? */ +char *name; +#endif +{ + unsigned *p, *endp; + int e=1; + require(!set_nil(*f), "DefErrSet: nil set to dump?"); + + if ( subst ) SubstErrorClass(f); + p = f->setword; + endp = &(f->setword[f->n]); + esetnum++; + + if ( name!=NULL ) { + fprintf(Parser_h, "\tstatic SetWordType %s_set[%d];\n", name, + NumWords(TokenNum-1)*sizeof(unsigned)); + fprintf(Parser_c, "SetWordType %s::%s_set[%d] = {", + CurrentClassName, + name, + NumWords(TokenNum-1)*sizeof(unsigned)); + } + else { + fprintf(Parser_c, "SetWordType %s::err%d[%d] = {", + CurrentClassName, + esetnum, + NumWords(TokenNum-1)*sizeof(unsigned)); + fprintf(Parser_h, "\tstatic SetWordType err%d[%d];\n", esetnum, + NumWords(TokenNum-1)*sizeof(unsigned)); + } + + while ( p < endp ) + { + if ( e > 1 ) fprintf(Parser_c, ", "); + DumpIntAsChars(Parser_c, "0x%x", *p++); + if ( e == 3 ) + { + if ( p < endp ) fprintf(Parser_c, ","); + fprintf(Parser_c, "\n\t"); + e=1; + } + else e++; + } + fprintf(Parser_c, "};\n"); + + return esetnum; +} + +void +#ifdef __STDC__ +GenParser_c_Hdr(void) +#else +GenParser_c_Hdr() +#endif +{ + int i,j; + + fprintf(Parser_c, "/*\n"); + fprintf(Parser_c, " * %s: P a r s e r S u p p o r t\n", CurrentClassName); + fprintf(Parser_c, " *\n"); + fprintf(Parser_c, " * Generated from:"); + for (i=0; i\n"); + fprintf(Parser_c, "#define ANTLR_VERSION %s\n", VersionDef); + fprintf(Parser_c, "#define ANTLR_SUPPORT_CODE\n"); + if ( UserTokenDefsFile != NULL ) + fprintf(Parser_c, "#include %s\n", UserTokenDefsFile); + else + fprintf(Parser_c, "#include \"%s\"\n", DefFileName); + + fprintf(Parser_c, "#include \"%s.h\"\n", CurrentClassName); + + /* Dump a Parser::tokens for each automaton */ + fprintf(Parser_c, "\nANTLRChar *%s::_token_tbl[]={\n", CurrentClassName); + fprintf(Parser_c, "\t/* 00 */\t\"Invalid\""); + + for (i=1; i=LastTokenCounted ) + { + fprintf(Parser_c, ",\n\t/* %02d */\t\"invalid\"", i); + continue; + } + if ( TokenString(i) != NULL ) + fprintf(Parser_c, ",\n\t/* %02d */\t\"%s\"", i, TokenString(i)); + else + { + /* look in all lexclasses for the reg expr */ + for (j=0; j=NumLexClasses ) + { + if ( UserDefdTokens ) + { + fprintf(Parser_c, ",\n\t/* %02d */\t\"\"", i); + } + else + fatal_internal(eMsgd("No label or expr for token %d",i)); + } + } + } + fprintf(Parser_c, "\n};\n"); + + /* Build constructors */ + fprintf(Parser_c, "\n%s::", CurrentClassName); + fprintf(Parser_c, "%s(ANTLRTokenBuffer *input) : ANTLRParser(input,%d,%d,%d,%d)\n", + CurrentClassName, + OutputLL_k, + FoundGuessBlk, + DemandLookahead, + NumWords(TokenNum-1)*sizeof(unsigned)); + fprintf(Parser_c, "{\n"); + fprintf(Parser_c, "\ttoken_tbl = _token_tbl;\n"); + fprintf(Parser_c, "}\n\n"); +} + +void +#ifdef __STDC__ +GenParser_h_Hdr(void) +#else +GenParser_h_Hdr() +#endif +{ + int i; + + fprintf(Parser_h, "/*\n"); + fprintf(Parser_h, " * %s: P a r s e r H e a d e r \n", CurrentClassName); + fprintf(Parser_h, " *\n"); + fprintf(Parser_h, " * Generated from:"); + for (i=0; i\n"); + fprintf(ErrFile, "#define ANTLR_VERSION %s\n", VersionDef); + if ( strcmp(ParserName, DefaultParserName)!=0 ) + fprintf(ErrFile, "#define %s %s\n", DefaultParserName, ParserName); + if ( strcmp(ParserName, DefaultParserName)!=0 ) + fprintf(ErrFile, "#include \"%s\"\n", RemapFileName); + if ( HdrAction != NULL ) dumpAction( HdrAction, ErrFile, 0, -1, 0, 1 ); + if ( FoundGuessBlk ) + { + fprintf(ErrFile, "#define ZZCAN_GUESS\n"); + fprintf(ErrFile, "#include \n"); + } + + if ( OutputLL_k > 1 ) fprintf(ErrFile, "#define LL_K %d\n", OutputLL_k); +#ifdef DUM + if ( LexGen ) fprintf(ErrFile, "#define zzEOF_TOKEN %d\n", (TokenInd!=NULL?TokenInd[EofToken]:EofToken)); +#endif + fprintf(ErrFile, "#define zzSET_SIZE %d\n", NumWords(TokenNum-1)*sizeof(unsigned)); + if ( DemandLookahead ) fprintf(ErrFile, "#define DEMAND_LOOK\n"); + fprintf(ErrFile, "#include \"antlr.h\"\n"); + if ( GenAST ) fprintf(ErrFile, "#include \"ast.h\"\n"); + + if ( UserDefdTokens ) fprintf(ErrFile, "#include %s\n", UserTokenDefsFile); + /* still need this one as it has the func prototypes */ + fprintf(ErrFile, "#include \"%s\"\n", DefFileName); + fprintf(ErrFile, "#include \"dlgdef.h\"\n"); + fprintf(ErrFile, "#include \"err.h\"\n\n"); + + /* Dump a zztokens for each automaton */ + if ( strcmp(ParserName, DefaultParserName)!=0 ) + { + fprintf(ErrFile, "ANTLRChar *%s_zztokens[%d]={\n", ParserName, TokenNum-1); + } + else + { + fprintf(ErrFile, "ANTLRChar *zztokens[%d]={\n", TokenNum-1); + } + fprintf(ErrFile, "\t/* 00 */\t\"Invalid\""); + for (i=1; i=LastTokenCounted ) + { + fprintf(ErrFile, ",\n\t/* %02d */\t\"invalid\"", i); + continue; + } + if ( TokenString(i) != NULL ) + fprintf(ErrFile, ",\n\t/* %02d */\t\"%s\"", i, TokenString(i)); + else + { + /* look in all lexclasses for the reg expr */ + for (j=0; j=NumLexClasses ) + { + if ( UserDefdTokens ) + { + fprintf(ErrFile, ",\n\t/* %02d */\t\"\"", i); + } + else + fatal_internal(eMsgd("No label or expr for token %d",i)); + } + } + } + fprintf(ErrFile, "\n};\n"); +} + +void +#ifdef __STDC__ +dumpExpr( FILE *f, char *e ) +#else +dumpExpr( f, e ) +FILE *f; +char *e; +#endif +{ + while ( *e!='\0' ) + { + if ( *e=='\\' && *(e+1)=='\\' ) + {putc('\\', f); putc('\\', f); e+=2;} + else if ( *e=='\\' && *(e+1)=='"' ) + {putc('\\', f); putc('"', f); e+=2;} + else if ( *e=='\\' ) {putc('\\', f); putc('\\', f); e++;} + else {putc(*e, f); e++;} + } +}