2 * bits.c -- manage creation and output of bit sets used by the parser.
4 * $Id: bits.c,v 1.3 95/09/26 12:58:38 parrt Exp $
9 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
10 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
11 * company may do whatever they wish with source code distributed with
12 * PCCTS or the code generated by PCCTS, including the incorporation of
13 * PCCTS, or its output, into commerical software.
15 * We encourage users to develop software with PCCTS. However, we do ask
16 * that credit is given to us for developing PCCTS. By "credit",
17 * we mean that if you incorporate our source code into one of your
18 * programs (commercial product, research project, or otherwise) that you
19 * acknowledge this fact somewhere in the documentation, research report,
20 * etc... If you like PCCTS and have developed a nice tool with the
21 * output, please mention that you developed it using PCCTS. In
22 * addition, we ask that this header remain intact in our source code.
23 * As long as these guidelines are kept, we expect to continue enhancing
24 * this system and expect to make other tools available as they are
29 * Parr Research Corporation
30 * with Purdue University and AHPCRC, University of Minnesota
46 /* char is only thing that is pretty much always known == 8 bits
47 * This allows output of antlr (set stuff, anyway) to be androgynous (portable)
49 typedef unsigned char SetWordType;
51 #define BitsPerWord BitsPerByte*sizeof(SetWordType)
53 static SetWordType *setwd = NULL;
59 /* Used to convert native wordsize, which ANTLR uses (via set.c) to manipulate sets,
60 to bytes that are most portable size-wise.
64 DumpIntAsChars( FILE *f, char *format, unsigned wd )
66 DumpIntAsChars( f, format, wd )
73 /* uses max of 32 bit unsigned integer for the moment */
74 static unsigned long byte_mask[sizeof(unsigned long)] =
75 { 0xFF, 0xFF00, 0xFF0000, 0xFF000000 };
76 /* 0xFF00000000, 0xFF0000000000, 0xFF000000000000, 0xFF00000000000000 };*/
78 /* for each byte in the word */
79 for (i=0; i<sizeof(unsigned); i++)
81 /* mask out the ith byte and shift down to the first 8 bits */
82 fprintf(f, format, (wd&byte_mask[i])>>(i*BitsPerByte));
83 if ( i<sizeof(unsigned)-1) fprintf(f, ",");
87 /* Create a new setwd (ignoring [Ep] token on end) */
99 setwd = (SetWordType *) calloc(TokenNum, sizeof(SetWordType));
100 require(setwd!=NULL, "NewSetWd: cannot alloc set wd\n");
102 for (p = setwd; p<&(setwd[TokenNum]); p++) {*p=0;}
113 if ( GenCC ) DumpSetWdForCC();
114 else DumpSetWdForC();
117 /* Dump the current setwd to ErrFile. 0..MaxTokenVal */
120 DumpSetWdForC( void )
127 if ( setwd==NULL ) return;
128 if ( !GenCC ) fprintf(DefFile, "extern SetWordType setwd%d[];\n", wordnum);
130 "SetWordType setwd%d[%d] = {", wordnum, TokenNum-1);
131 for (i=0; i<TokenNum-1; i++)
134 if ( i!=0 ) fprintf(ErrFile, ",");
135 if ( c == 8 ) {fprintf(ErrFile, "\n\t"); c=1;} else c++;
136 fprintf(ErrFile, "0x%x", setwd[i]);
138 fprintf(ErrFile, "};\n");
141 /* Dump the current setwd to Parser.C file. 0..MaxTokenVal;
142 * Only used if -CC on.
146 DumpSetWdForCC( void )
153 if ( setwd==NULL ) return;
154 fprintf(Parser_h, "\tstatic SetWordType setwd%d[%d];\n", wordnum, TokenNum-1);
156 "SetWordType %s::setwd%d[%d] = {", CurrentClassName, wordnum,
158 for (i=0; i<TokenNum-1; i++)
161 if ( i!=0 ) fprintf(Parser_c, ",");
162 if ( c == 8 ) {fprintf(Parser_c, "\n\t"); c=1;} else c++;
163 fprintf(Parser_c, "0x%x", setwd[i]);
165 fprintf(Parser_c, "};\n");
168 /* Make a new set. Dump old setwd and create new setwd if current setwd is full */
177 if ( setnum==BitsPerWord ) /* is current setwd full? */
179 DumpSetWd(); NewSetWd(); setnum = 0;
183 /* s is a set of tokens. Turn on bit at each token position in set 'setnum' */
192 SetWordType mask=(((unsigned)1)<<setnum);
195 while ( !set_nil(s) )
203 /* E r r o r C l a s s S t u f f */
205 /* compute the FIRST of a rule for the error class stuff */
208 Efirst( char *rule, ECnode *eclass )
210 Efirst( rule, eclass )
217 RuleEntry *q = (RuleEntry *) hash_get(Rname, rule);
221 warnNoFL(eMsg2("undefined rule '%s' referenced in errclass '%s'; ignored",
222 rule, TokenString(eclass->tok)));
225 r = RulePtr[q->rulenum];
226 r->end->halt = TRUE; /* don't let reach fall off end of rule here */
229 r->end->halt = FALSE;
234 * scan the list of tokens/eclasses/nonterminals filling the new eclass
235 * with the set described by the list. Note that an eclass can be
236 * quoted to allow spaces etc... However, an eclass must not conflict
237 * with a reg expr found elsewhere. The reg expr will be taken over
242 doEclass( char *eclass )
254 require(eclass!=NULL, "doEclass: NULL eset");
256 p = (ECnode *) eclass;
257 lexmode(p->lexclass); /* switch to lexclass where errclass is defined */
259 for (e = (p->elist)->next; e!=NULL; e=e->next)
261 if ( islower( *((char *)e->elem) ) ) /* is it a rule ref? (alias FIRST request) */
263 a = Efirst((char *)e->elem, p);
264 set_orin(&p->eset, a);
269 else if ( *((char *)e->elem)=='"' )
272 q = (TermEntry *) hash_get(Texpr, (char *) e->elem);
275 /* if quoted and not an expr look for eclass name */
276 q = (TermEntry *) hash_get(Tname, *((char **)&(e->elem))=StripQuotes((char *)e->elem));
277 if ( q != NULL ) t = q->token;
281 else /* labelled token/eclass/tokclass */
283 q = (TermEntry *) hash_get(Tname, (char *)e->elem);
286 if ( strcmp((char *)e->elem, TokenString(p->tok))==0 )
288 warnNoFL(eMsg1("self-referential error class '%s'; ignored",
299 set_orel(t, &p->eset);
302 else warnNoFL(eMsg2("undefined token '%s' referenced in errclass '%s'; ignored",
303 (char *)e->elem, TokenString(p->tok)));
310 ComputeErrorSets( void )
316 list_apply(eclasses, (void (*)(void *)) doEclass);
319 list_apply(eclasses, (void (*)(void *)) doEclass);
321 list_apply(eclasses, doEclass);
328 ComputeTokSets( void )
333 ListNode *t, *e = NULL;
334 int something_changed;
338 if ( tclasses == NULL ) return;
340 /* turn lists of token/tokclass references into sets */
341 for (t = tclasses->next; t!=NULL; t=t->next)
343 p = (TCnode *) t->elem;
345 /* if wild card, then won't have entries in tclass, assume all_tokens */
346 if ( p->tok == WildCardToken )
348 p->tset = set_dup(all_tokens);
352 lexmode(p->lexclass); /* switch to lexclass where tokclass is defined */
355 /* instantiate all tokens/token_classes into the tset */
356 for (e = (p->tlist)->next; e!=NULL; e=e->next)
359 tokstr = (char *)e->elem;
360 if ( *tokstr == '"' ) q = (TermEntry *) hash_get(Texpr, tokstr);
361 else q = (TermEntry *) hash_get(Tname, tokstr);
362 require(q!=NULL, "ComputeTokSets: no token def");
363 set_orel(q->token, &p->tset);
367 /* Go thru list of tokclasses again looking for tokclasses in sets */
369 something_changed = 0;
370 for (t = tclasses->next; t!=NULL; t=t->next)
373 p = (TCnode *) t->elem;
374 tcl = set_and(p->tset, tokclasses);
378 /* replace refs to tokclasses with the associated set of tokens */
379 something_changed = 1;
380 while ( !set_nil(tcl) )
382 tk = set_int(tcl); /* grab one of the tok class refs */
384 if ( p->tok != tk ) /* tokclass ref to yourself? */
386 q = (TermEntry *) hash_get(Tname, TokenString(tk));
387 require(q!=NULL, "#tokclass not in hash table");
388 set_orin(&p->tset, q->tclass->tset);
390 set_rm(tk, p->tset); /* remove ref that we replaced */
395 if ( something_changed ) goto again;
399 DumpRemainingTokSets()
404 /* Go thru tclasses (for the last time) and dump the sets not dumped
405 * during code gen; yes, this is a bogus way to do this, but ComputeTokSets()
406 * can't dump the defs as the error file and tok file has not been created
409 if ( tclasses==NULL ) return;
410 for (t = tclasses->next; t!=NULL; t=t->next)
413 p = (TCnode *) t->elem;
414 if ( p->dumped ) continue;
415 e = DefErrSet(&(p->tset), 0, TokenString(p->tok));
422 /* replace a subset of an error set with an error class name if a subset is found
423 * repeat process until no replacements made
427 SubstErrorClass( set *f )
435 ECnode *ec, *maxclass = NULL;
437 require(f!=NULL, "SubstErrorClass: NULL eset");
439 if ( eclasses == NULL ) return;
444 for (p=eclasses->next; p!=NULL; p=p->next) /* chk all error classes */
446 ec = (ECnode *) p->elem;
447 if ( ec->setdeg > max )
449 if ( set_sub(ec->eset, *f) || set_equ(ec->eset, *f) )
450 {maxclass = ec; max=ec->setdeg;}
453 if ( maxclass != NULL ) /* if subset found, replace with token */
455 a = set_dif(*f, maxclass->eset);
456 set_orel((unsigned)maxclass->tok, &a);
466 DefErrSet( set *f, int subst, char *name )
468 DefErrSet( f, subst, name )
470 int subst; /* should be substitute error classes? */
474 if ( GenCC ) return DefErrSetForCC( f, subst, name );
475 else return DefErrSetForC( f, subst, name );
478 /* Define a new error set. WARNING...set-implementation dependent.
482 DefErrSetForC( set *f, int subst, char *name )
484 DefErrSetForC( f, subst, name )
486 int subst; /* should be substitute error classes? */
492 require(!set_nil(*f), "DefErrSet: nil set to dump?");
494 if ( subst ) SubstErrorClass(f);
496 endp = &(f->setword[f->n]);
499 fprintf(DefFile, "extern SetWordType %s_set[];\n", name);
501 fprintf(DefFile, "extern SetWordType zzerr%d[];\n", esetnum);
503 fprintf(ErrFile, "SetWordType %s_set[%d] = {",
505 NumWords(TokenNum-1)*sizeof(unsigned));
508 fprintf(ErrFile, "SetWordType zzerr%d[%d] = {",
510 NumWords(TokenNum-1)*sizeof(unsigned));
514 if ( e > 1 ) fprintf(ErrFile, ", ");
515 DumpIntAsChars(ErrFile, "0x%x", *p++);
519 if ( p < endp ) fprintf(ErrFile, ",");
520 fprintf(ErrFile, "\n\t");
525 fprintf(ErrFile, "};\n");
530 /* Define a new error set. WARNING...set-implementation dependent;
531 * Only used when -CC on.
535 DefErrSetForCC( set *f, int subst, char *name )
537 DefErrSetForCC( f, subst, name )
539 int subst; /* should be substitute error classes? */
545 require(!set_nil(*f), "DefErrSet: nil set to dump?");
547 if ( subst ) SubstErrorClass(f);
549 endp = &(f->setword[f->n]);
553 fprintf(Parser_h, "\tstatic SetWordType %s_set[%d];\n", name,
554 NumWords(TokenNum-1)*sizeof(unsigned));
555 fprintf(Parser_c, "SetWordType %s::%s_set[%d] = {",
558 NumWords(TokenNum-1)*sizeof(unsigned));
561 fprintf(Parser_c, "SetWordType %s::err%d[%d] = {",
564 NumWords(TokenNum-1)*sizeof(unsigned));
565 fprintf(Parser_h, "\tstatic SetWordType err%d[%d];\n", esetnum,
566 NumWords(TokenNum-1)*sizeof(unsigned));
571 if ( e > 1 ) fprintf(Parser_c, ", ");
572 DumpIntAsChars(Parser_c, "0x%x", *p++);
575 if ( p < endp ) fprintf(Parser_c, ",");
576 fprintf(Parser_c, "\n\t");
581 fprintf(Parser_c, "};\n");
588 GenParser_c_Hdr(void)
595 fprintf(Parser_c, "/*\n");
596 fprintf(Parser_c, " * %s: P a r s e r S u p p o r t\n", CurrentClassName);
597 fprintf(Parser_c, " *\n");
598 fprintf(Parser_c, " * Generated from:");
599 for (i=0; i<NumFiles; i++) fprintf(Parser_c, " %s", FileStr[i]);
600 fprintf(Parser_c, "\n");
601 fprintf(Parser_c, " *\n");
602 fprintf(Parser_c, " * Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995\n");
603 fprintf(Parser_c, " * Parr Research Corporation\n");
604 fprintf(Parser_c, " * with Purdue University Electrical Engineering\n");
605 fprintf(Parser_c, " * with AHPCRC, University of Minnesota\n");
606 fprintf(Parser_c, " * ANTLR Version %s\n", Version);
607 fprintf(Parser_c, " */\n\n");
608 fprintf(Parser_c, "#include <stdio.h>\n");
609 fprintf(Parser_c, "#define ANTLR_VERSION %s\n", VersionDef);
610 fprintf(Parser_c, "#define ANTLR_SUPPORT_CODE\n");
611 if ( UserTokenDefsFile != NULL )
612 fprintf(Parser_c, "#include %s\n", UserTokenDefsFile);
614 fprintf(Parser_c, "#include \"%s\"\n", DefFileName);
616 fprintf(Parser_c, "#include \"%s.h\"\n", CurrentClassName);
618 /* Dump a Parser::tokens for each automaton */
619 fprintf(Parser_c, "\nANTLRChar *%s::_token_tbl[]={\n", CurrentClassName);
620 fprintf(Parser_c, "\t/* 00 */\t\"Invalid\"");
622 for (i=1; i<TokenNum-1; i++)
625 if ( i == EpToken ) continue;
626 /* remapped to invalid token? */
627 if ( TokenInd!=NULL && TokenInd[i]>=LastTokenCounted )
629 fprintf(Parser_c, ",\n\t/* %02d */\t\"invalid\"", i);
632 if ( TokenString(i) != NULL )
633 fprintf(Parser_c, ",\n\t/* %02d */\t\"%s\"", i, TokenString(i));
636 /* look in all lexclasses for the reg expr */
637 for (j=0; j<NumLexClasses; j++)
640 if ( ExprString(i) != NULL )
642 fprintf(Parser_c, ",\n\t/* %02d */\t", i);
643 dumpExpr(Parser_c, ExprString(i));
647 if ( j>=NumLexClasses )
649 if ( UserDefdTokens )
651 fprintf(Parser_c, ",\n\t/* %02d */\t\"\"", i);
654 fatal_internal(eMsgd("No label or expr for token %d",i));
658 fprintf(Parser_c, "\n};\n");
660 /* Build constructors */
661 fprintf(Parser_c, "\n%s::", CurrentClassName);
662 fprintf(Parser_c, "%s(ANTLRTokenBuffer *input) : ANTLRParser(input,%d,%d,%d,%d)\n",
667 NumWords(TokenNum-1)*sizeof(unsigned));
668 fprintf(Parser_c, "{\n");
669 fprintf(Parser_c, "\ttoken_tbl = _token_tbl;\n");
670 fprintf(Parser_c, "}\n\n");
675 GenParser_h_Hdr(void)
682 fprintf(Parser_h, "/*\n");
683 fprintf(Parser_h, " * %s: P a r s e r H e a d e r \n", CurrentClassName);
684 fprintf(Parser_h, " *\n");
685 fprintf(Parser_h, " * Generated from:");
686 for (i=0; i<NumFiles; i++) fprintf(Parser_h, " %s", FileStr[i]);
687 fprintf(Parser_h, "\n");
688 fprintf(Parser_h, " *\n");
689 fprintf(Parser_h, " * Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995\n");
690 fprintf(Parser_h, " * Parr Research Corporation\n");
691 fprintf(Parser_h, " * with Purdue University Electrical Engineering\n");
692 fprintf(Parser_h, " * with AHPCRC, University of Minnesota\n");
693 fprintf(Parser_h, " * ANTLR Version %s\n", Version);
694 fprintf(Parser_h, " */\n\n");
695 fprintf(Parser_h, "#ifndef %s_h\n", CurrentClassName);
696 fprintf(Parser_h, "#define %s_h\n", CurrentClassName);
697 if ( GenAST ) fprintf(Parser_h, "class ASTBase;\n");
698 fprintf(Parser_h, "#include \"%s\"\n\n", APARSER_H);
700 if ( HdrAction != NULL ) dumpAction( HdrAction, Parser_h, 0, -1, 0, 1);
702 fprintf(Parser_h, "class %s : public ANTLRParser {\n", CurrentClassName);
703 fprintf(Parser_h, "protected:\n");
704 fprintf(Parser_h, "\tstatic ANTLRChar *_token_tbl[];\n");
705 fprintf(Parser_h, "private:\n");
708 /* Currently, this is only used in !GenCC mode */
718 fprintf(ErrFile, "/*\n");
719 fprintf(ErrFile, " * A n t l r S e t s / E r r o r F i l e H e a d e r\n");
720 fprintf(ErrFile, " *\n");
721 fprintf(ErrFile, " * Generated from:");
722 for (i=0; i<NumFiles; i++) fprintf(ErrFile, " %s", FileStr[i]);
723 fprintf(ErrFile, "\n");
724 fprintf(ErrFile, " *\n");
725 fprintf(ErrFile, " * Terence Parr, Russell Quong, Will Cohen, and Hank Dietz: 1989-1995\n");
726 fprintf(ErrFile, " * Parr Research Corporation\n");
727 fprintf(ErrFile, " * with Purdue University Electrical Engineering\n");
728 fprintf(ErrFile, " * With AHPCRC, University of Minnesota\n");
729 fprintf(ErrFile, " * ANTLR Version %s\n", Version);
730 fprintf(ErrFile, " */\n\n");
731 fprintf(ErrFile, "#include <stdio.h>\n");
732 fprintf(ErrFile, "#define ANTLR_VERSION %s\n", VersionDef);
733 if ( strcmp(ParserName, DefaultParserName)!=0 )
734 fprintf(ErrFile, "#define %s %s\n", DefaultParserName, ParserName);
735 if ( strcmp(ParserName, DefaultParserName)!=0 )
736 fprintf(ErrFile, "#include \"%s\"\n", RemapFileName);
737 if ( HdrAction != NULL ) dumpAction( HdrAction, ErrFile, 0, -1, 0, 1 );
740 fprintf(ErrFile, "#define ZZCAN_GUESS\n");
741 fprintf(ErrFile, "#include <setjmp.h>\n");
744 if ( OutputLL_k > 1 ) fprintf(ErrFile, "#define LL_K %d\n", OutputLL_k);
746 if ( LexGen ) fprintf(ErrFile, "#define zzEOF_TOKEN %d\n", (TokenInd!=NULL?TokenInd[EofToken]:EofToken));
748 fprintf(ErrFile, "#define zzSET_SIZE %d\n", NumWords(TokenNum-1)*sizeof(unsigned));
749 if ( DemandLookahead ) fprintf(ErrFile, "#define DEMAND_LOOK\n");
750 fprintf(ErrFile, "#include \"antlr.h\"\n");
751 if ( GenAST ) fprintf(ErrFile, "#include \"ast.h\"\n");
753 if ( UserDefdTokens ) fprintf(ErrFile, "#include %s\n", UserTokenDefsFile);
754 /* still need this one as it has the func prototypes */
755 fprintf(ErrFile, "#include \"%s\"\n", DefFileName);
756 fprintf(ErrFile, "#include \"dlgdef.h\"\n");
757 fprintf(ErrFile, "#include \"err.h\"\n\n");
759 /* Dump a zztokens for each automaton */
760 if ( strcmp(ParserName, DefaultParserName)!=0 )
762 fprintf(ErrFile, "ANTLRChar *%s_zztokens[%d]={\n", ParserName, TokenNum-1);
766 fprintf(ErrFile, "ANTLRChar *zztokens[%d]={\n", TokenNum-1);
768 fprintf(ErrFile, "\t/* 00 */\t\"Invalid\"");
769 for (i=1; i<TokenNum-1; i++)
772 if ( i == EpToken ) continue;
773 /* remapped to invalid token? */
774 if ( TokenInd!=NULL && TokenInd[i]>=LastTokenCounted )
776 fprintf(ErrFile, ",\n\t/* %02d */\t\"invalid\"", i);
779 if ( TokenString(i) != NULL )
780 fprintf(ErrFile, ",\n\t/* %02d */\t\"%s\"", i, TokenString(i));
783 /* look in all lexclasses for the reg expr */
784 for (j=0; j<NumLexClasses; j++)
787 if ( ExprString(i) != NULL )
789 fprintf(ErrFile, ",\n\t/* %02d */\t", i);
790 dumpExpr(ErrFile, ExprString(i));
794 if ( j>=NumLexClasses )
796 if ( UserDefdTokens )
798 fprintf(ErrFile, ",\n\t/* %02d */\t\"\"", i);
801 fatal_internal(eMsgd("No label or expr for token %d",i));
805 fprintf(ErrFile, "\n};\n");
810 dumpExpr( FILE *f, char *e )
819 if ( *e=='\\' && *(e+1)=='\\' )
820 {putc('\\', f); putc('\\', f); e+=2;}
821 else if ( *e=='\\' && *(e+1)=='"' )
822 {putc('\\', f); putc('"', f); e+=2;}
823 else if ( *e=='\\' ) {putc('\\', f); putc('\\', f); e++;}
824 else {putc(*e, f); e++;}