2 * lex.c -- Generate all of the lexical type files: parser.dlg tokens.h
4 * $Id: lex.c,v 1.4 95/09/26 12:58:36 parrt Exp $
9 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
10 * Set (PCCTS) -- PCCTS is in the public domain. An individual or
11 * company may do whatever they wish with source code distributed with
12 * PCCTS or the code generated by PCCTS, including the incorporation of
13 * PCCTS, or its output, into commerical software.
15 * We encourage users to develop software with PCCTS. However, we do ask
16 * that credit is given to us for developing PCCTS. By "credit",
17 * we mean that if you incorporate our source code into one of your
18 * programs (commercial product, research project, or otherwise) that you
19 * acknowledge this fact somewhere in the documentation, research report,
20 * etc... If you like PCCTS and have developed a nice tool with the
21 * output, please mention that you developed it using PCCTS. In
22 * addition, we ask that this header remain intact in our source code.
23 * As long as these guidelines are kept, we expect to continue enhancing
24 * this system and expect to make other tools available as they are
29 * Parr Research Corporation
30 * with Purdue University and AHPCRC, University of Minnesota
45 #define DLGErrorString "invalid token"
47 /* Generate a complete lexical description of the lexemes found in the grammar */
56 FILE *dlgFile = fopen(OutMetaName(DlgFileName), "w");
57 require(dlgFile!=NULL, eMsg1("genLexFile: cannot open %s", OutMetaName(DlgFileName)) );
58 special_fopen_actions(OutMetaName(DlgFileName));
60 fprintf(dlgFile, "<<\n");
61 fprintf(dlgFile, "/* %s -- DLG Description of scanner\n", DlgFileName);
62 fprintf(dlgFile, " *\n");
63 fprintf(dlgFile, " * Generated from:");
64 {int i; for (i=0; i<NumFiles; i++) fprintf(dlgFile, " %s", FileStr[i]);}
65 fprintf(dlgFile, "\n");
66 fprintf(dlgFile, " *\n");
67 fprintf(dlgFile, " * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994\n");
68 fprintf(dlgFile, " * Purdue University Electrical Engineering\n");
69 fprintf(dlgFile, " * With AHPCRC, University of Minnesota\n");
70 fprintf(dlgFile, " * ANTLR Version %s\n", Version);
71 fprintf(dlgFile, " */\n\n");
74 if ( !UserDefdTokens ) fprintf(dlgFile, "#include \"%s\"\n", DefFileName);
75 else fprintf(dlgFile, "#include %s\n", UserTokenDefsFile);
76 fprintf(dlgFile, "#include \"%s\"\n", ATOKEN_H);
77 if ( GenAST ) fprintf(dlgFile, "#include \"%s\"\n", ASTBASE_H);
78 if ( HdrAction != NULL ) dumpAction( HdrAction, dlgFile, 0, -1, 0, 1 );
82 fprintf(dlgFile, "#include <stdio.h>\n");
83 fprintf(dlgFile, "#define ANTLR_VERSION %s\n", VersionDef);
84 if ( strcmp(ParserName, DefaultParserName)!=0 )
85 fprintf(dlgFile, "#define %s %s\n", DefaultParserName, ParserName);
86 if ( strcmp(ParserName, DefaultParserName)!=0 )
87 fprintf(dlgFile, "#include \"%s\"\n", RemapFileName);
88 if ( HdrAction != NULL ) dumpAction( HdrAction, dlgFile, 0, -1, 0, 1 );
91 fprintf(dlgFile, "#define ZZCAN_GUESS\n");
92 fprintf(dlgFile, "#include <setjmp.h>\n");
94 if ( OutputLL_k > 1 ) fprintf(dlgFile, "#define LL_K %d\n", OutputLL_k);
95 if ( DemandLookahead ) fprintf(dlgFile, "#define DEMAND_LOOK\n");
96 fprintf(dlgFile, "#include \"antlr.h\"\n");
98 fprintf(dlgFile, "#include \"ast.h\"\n");
100 if ( UserDefdTokens )
101 fprintf(dlgFile, "#include %s\n", UserTokenDefsFile);
102 /* still need this one as it has the func prototypes */
103 fprintf(dlgFile, "#include \"%s\"\n", DefFileName);
104 fprintf(dlgFile, "#include \"dlgdef.h\"\n");
105 fprintf(dlgFile, "LOOKAHEAD\n");
106 fprintf(dlgFile, "void zzerraction()\n");
107 fprintf(dlgFile, "{\n");
108 fprintf(dlgFile, "\t(*zzerr)(\"%s\");\n", DLGErrorString);
109 fprintf(dlgFile, "\tzzadvance();\n");
110 fprintf(dlgFile, "\tzzskip();\n");
111 fprintf(dlgFile, "}\n");
113 fprintf(dlgFile, ">>\n\n");
115 /* dump all actions */
116 if (LexActions != NULL)
118 for (p = LexActions->next; p!=NULL; p=p->next)
120 fprintf(dlgFile, "<<\n");
121 dumpAction( (char *)p->elem, dlgFile, 0, -1, 0, 1 );
122 fprintf(dlgFile, ">>\n");
125 /* dump all regular expression rules/actions (skip sentinel node) */
126 if ( ExprOrder == NULL ) {
127 warnNoFL("no regular expressions found in grammar");
129 else dumpLexClasses(dlgFile);
130 fprintf(dlgFile, "%%%%\n");
134 /* For each lexical class, scan ExprOrder looking for expressions
135 * in that lexical class. Print out only those that match.
136 * Each element of the ExprOrder list has both an expr and an lclass
141 dumpLexClasses( FILE *dlgFile )
143 dumpLexClasses( dlgFile )
152 for (i=0; i<NumLexClasses; i++)
154 fprintf(dlgFile, "\n%%%%%s\n\n", lclass[i].classnum);
155 for (p=ExprOrder->next; p!=NULL; p=p->next)
157 q = (Expr *) p->elem;
158 if ( q->lclass != i ) continue;
160 t = (TermEntry *) hash_get(Texpr, q->expr);
161 require(t!=NULL, eMsg1("genLexDescr: rexpr %s not in hash table",q->expr) );
162 if ( t->token == EpToken ) continue;
163 fprintf(dlgFile, "%s\n\t<<\n", StripQuotes(q->expr));
164 /* replace " killed by StripQuotes() */
165 q->expr[ strlen(q->expr) ] = '"';
167 if ( TokenString(t->token) != NULL )
168 fprintf(dlgFile, "\t\tNLA = %s;\n", TokenString(t->token));
170 fprintf(dlgFile, "\t\tNLA = %d;\n", t->token);
172 if ( t->action != NULL ) dumpAction( t->action, dlgFile, 2,-1,0,1 );
174 if ( TokenString(t->token) != NULL )
175 fprintf(dlgFile, "\t\treturn %s;\n", TokenString(t->token));
177 fprintf(dlgFile, "\t\treturn (ANTLRTokenType)%d;\n", t->token);
179 fprintf(dlgFile, "\t>>\n\n");
184 /* Strip the leading path (if any) from a filename */
187 StripPath( char *fileName )
189 StripPath( fileName )
194 static char dirSym[2] = DirectorySymbol;
196 if(NULL != (p = strrchr(fileName, dirSym[0])))
204 /* Generate a list of #defines && list of struct definitions for
205 * aggregate retv's */
215 /* If C++ mode and #tokdef used, then don't need anything in here since
216 * C++ puts all definitions in the class file name.
218 if ( GenCC && UserTokenDefsFile ) return;
220 DefFile = fopen(OutMetaName(DefFileName), "w");
221 require(DefFile!=NULL, eMsg1("genDefFile: cannot open %s", OutMetaName(DefFileName)) );
222 special_fopen_actions(OutMetaName(DefFileName));
224 fprintf(DefFile, "#ifndef %s\n", StripPath(gate_symbol(DefFileName)));
225 fprintf(DefFile, "#define %s\n", StripPath(gate_symbol(DefFileName)));
227 fprintf(DefFile, "/* %s -- List of labelled tokens and stuff\n", DefFileName);
228 fprintf(DefFile, " *\n");
229 fprintf(DefFile, " * Generated from:");
230 for (i=0; i<NumFiles; i++) fprintf(DefFile, " %s", FileStr[i]);
231 fprintf(DefFile, "\n");
232 fprintf(DefFile, " *\n");
233 fprintf(DefFile, " * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994\n");
234 fprintf(DefFile, " * Purdue University Electrical Engineering\n");
235 fprintf(DefFile, " * ANTLR Version %s\n", Version);
236 fprintf(DefFile, " */\n");
238 if ( !GenCC && LexGen ) {
239 fprintf(DefFile,"#define zzEOF_TOKEN %d\n",
240 TokenInd!=NULL?TokenInd[EofToken]:EofToken);
243 if ( !UserDefdTokens )
247 if ( GenCC ) fprintf(DefFile, "enum ANTLRTokenType {\n");
248 for (i=1; i<TokenNum; i++)
250 /* Don't do EpToken or expr w/o labels */
251 if ( TokenString(i)!=NULL && i != EpToken )
255 if ( WarningLevel>1 )
258 /* look in all lexclasses for the reg expr */
259 for (j=0; j<NumLexClasses; j++)
262 if ( ExprString(i)!=NULL ) break;
264 if ( j>=NumLexClasses )
266 warnNoFL(eMsg1("token label has no associated rexpr: %s",TokenString(i)));
269 require((p=(TermEntry *)hash_get(Tname, TokenString(i))) != NULL,
270 "token not in sym tab when it should be");
274 if ( !first ) fprintf(DefFile, ",\n");
276 fprintf(DefFile, "\t%s=%d", TokenString(i), i);
279 fprintf(DefFile, "#define %s %d\n", TokenString(i), i);
283 if ( GenCC ) fprintf(DefFile, "};\n");
286 if ( !GenCC ) GenRulePrototypes(DefFile, SynDiag);
288 fprintf(DefFile, "\n#endif\n");
298 if ( strcmp(ParserName, DefaultParserName)!=0 )
303 f = fopen(OutMetaName(RemapFileName), "w");
304 require(f!=NULL, eMsg1("GenRemapFile: cannot open %s", OutMetaName(RemapFileName)) );
305 special_fopen_actions(OutMetaName(RemapFileName));
307 fprintf(f, "/* %s -- List of symbols to remap\n", RemapFileName);
309 fprintf(f, " * Generated from:");
310 for (i=0; i<NumFiles; i++) fprintf(f, " %s", FileStr[i]);
313 fprintf(f, " * Terence Parr, Will Cohen, and Hank Dietz: 1989-1994\n");
314 fprintf(f, " * Purdue University Electrical Engineering\n");
315 fprintf(f, " * ANTLR Version %s\n", Version);
318 GenRuleFuncRedefs(f, SynDiag);
319 GenPredefinedSymbolRedefs(f);
320 if ( GenAST ) GenASTSymbolRedefs(f);
327 /* Generate a bunch of #defines that rename all functions to be "ParserName_func" */
330 GenRuleFuncRedefs( FILE *f, Junction *p )
332 GenRuleFuncRedefs( f, p )
337 fprintf(f, "\n/* rename rule functions to be 'ParserName_func' */\n");
340 fprintf(f, "#define %s %s_%s\n", p->rname, ParserName, p->rname);
341 p = (Junction *)p->p2;
345 /* Generate a bunch of #defines that rename all standard symbols to be
346 * "ParserName_symbol". The list of standard symbols to change is in
351 GenPredefinedSymbolRedefs( FILE *f )
353 GenPredefinedSymbolRedefs( f )
359 fprintf(f, "\n/* rename PCCTS-supplied symbols to be 'ParserName_symbol' */\n");
360 for (p = &StandardSymbols[0]; *p!=NULL; p++)
362 fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p);
366 /* Generate a bunch of #defines that rename all AST symbols to be
367 * "ParserName_symbol". The list of AST symbols to change is in
372 GenASTSymbolRedefs( FILE *f )
374 GenASTSymbolRedefs( f )
380 fprintf(f, "\n/* rename PCCTS-supplied AST symbols to be 'ParserName_symbol' */\n");
381 for (p = &ASTSymbols[0]; *p!=NULL; p++)
383 fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p);
387 /* redefine all sets generated by ANTLR; WARNING: 'zzerr', 'setwd' must match
388 * use in bits.c (DumpSetWd() etc...)
392 GenSetRedefs( FILE *f )
400 for (i=1; i<=wordnum; i++)
402 fprintf(f, "#define setwd%d %s_setwd%d\n", i, ParserName, i);
404 for (i=1; i<=esetnum; i++)
406 fprintf(f, "#define zzerr%d %s_err%d\n", i, ParserName, i);
410 /* Find all return types/parameters that require structs and def
411 * all rules with ret types.
415 GenRulePrototypes( FILE *f, Junction *p )
417 GenRulePrototypes( f, p )
427 if ( p->ret != NULL )
429 if ( HasComma(p->ret) )
431 DumpRetValStruct(f, p->ret, i);
433 fprintf(f, "\n#ifdef __STDC__\n");
434 if ( HasComma(p->ret) )
436 fprintf(f, "extern struct _rv%d", i);
440 fprintf(f, "extern ");
443 fprintf(f, " %s%s(", RulePrefix, p->rname);
444 DumpANSIFunctionArgDef(f,p);
447 if ( p->pdecl != NULL || GenAST )
450 fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":"");
452 if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl);
454 else fprintf(f, "void");
457 fprintf(f, "#else\n");
458 if ( HasComma(p->ret) )
460 fprintf(f, "extern struct _rv%d", i);
464 fprintf(f, "extern ");
467 fprintf(f, " %s%s();\n", RulePrefix, p->rname);
468 fprintf(f, "#endif\n");
472 fprintf(f, "\n#ifdef __STDC__\n");
473 fprintf(f, "void %s%s(", RulePrefix, p->rname);
474 DumpANSIFunctionArgDef(f,p);
477 if ( p->pdecl != NULL || GenAST )
480 fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":"");
482 if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl);
484 else fprintf(f, "void");
487 fprintf(f, "#else\n");
488 fprintf(f, "extern void %s%s();\n", RulePrefix, p->rname);
489 fprintf(f, "#endif\n");
492 p = (Junction *)p->p2;
496 /* Define all rules in the class.h file; generate any required
497 * struct definitions first, however.
501 GenRuleMemberDeclarationsForCC( FILE *f, Junction *q )
503 GenRuleMemberDeclarationsForCC( f, q )
511 fprintf(f, "private:\n");
513 /* Dump dflt handler declaration */
514 fprintf(f, "\tvoid zzdflthandlers( int _signal, int *_retsignal );\n\n");
516 fprintf(f, "public:\n");
518 /* Dump return value structs */
522 if ( p->ret != NULL )
524 if ( HasComma(p->ret) )
526 DumpRetValStruct(f, p->ret, i);
530 p = (Junction *)p->p2;
533 /* Dump member func defs && CONSTRUCTOR */
534 fprintf(f, "\t%s(ANTLRTokenBuffer *input);\n", CurrentClassName);
536 fprintf(f, "\t%s(ANTLRTokenBuffer *input, ANTLRTokenType eof);\n",
544 if ( p->ret != NULL )
546 if ( HasComma(p->ret) )
548 fprintf(f, "\tstruct _rv%d", i);
555 fprintf(f, " %s(", p->rname);
556 DumpANSIFunctionArgDef(f,p);
559 if ( p->pdecl != NULL || GenAST )
561 if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":"");
562 if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl);
569 fprintf(f, "\tvoid %s(", p->rname);
570 DumpANSIFunctionArgDef(f,p);
573 if ( p->pdecl != NULL || GenAST )
575 if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":"");
576 if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl);
582 p = (Junction *)p->p2;
586 /* Given a list of ANSI-style parameter declarations, print out a
587 * comma-separated list of the symbols (w/o types).
588 * Basically, we look for a comma, then work backwards until start of
589 * the symbol name. Then print it out until 1st non-alnum char. Now,
590 * move on to next parameter.
594 DumpListOfParmNames( char *pdecl, FILE *output )
596 DumpListOfParmNames( pdecl, output )
601 int firstTime = 1, done = 0;
602 require(output!=NULL, "DumpListOfParmNames: NULL parm");
604 if ( pdecl == NULL ) return;
607 if ( !firstTime ) putc(',', output);
608 done = DumpNextNameInDef(&pdecl, output);
613 /* given a list of parameters or return values, dump the next
614 * name to output. Return 1 if last one just printed, 0 if more to go.
618 DumpNextNameInDef( char **q, FILE *output )
620 DumpNextNameInDef( q, output )
625 char *p = *q; /* where did we leave off? */
628 while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */
629 if ( *p == '\0' ) done = 1;
630 while ( !isalnum(*p) && *p!='_' ) --p; /* scan back until valid var character */
631 while ( isalnum(*p) || *p=='_' ) --p; /* scan back until beginning of variable */
632 p++; /* move to start of variable */
633 while ( isalnum(*p) || *p=='_' ) {putc(*p, output); p++;}
634 while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */
635 p++; /* move past this parameter */
637 *q = p; /* record where we left off */
641 /* Given a list of ANSI-style parameter declarations, dump K&R-style
642 * declarations, one per line for each parameter. Basically, convert
643 * comma to semi-colon, newline.
647 DumpOldStyleParms( char *pdecl, FILE *output )
649 DumpOldStyleParms( pdecl, output )
654 require(output!=NULL, "DumpOldStyleParms: NULL parm");
656 if ( pdecl == NULL ) return;
657 while ( *pdecl != '\0' )
662 putc(';', output); putc('\n', output);
663 while ( *pdecl==' ' || *pdecl=='\t' || *pdecl=='\n' ) pdecl++;
665 else {putc(*pdecl, output); pdecl++;}
671 /* Take in a type definition (type + symbol) and print out type only */
674 DumpType( char *s, FILE *f )
682 require(s!=NULL, "DumpType: invalid type string");
684 p = &s[strlen(s)-1]; /* start at end of string and work back */
685 /* scan back until valid variable character */
686 while ( !isalnum(*p) && *p!='_' ) --p;
687 /* scan back until beginning of variable */
688 while ( isalnum(*p) || *p=='_' ) --p;
691 warnNoFL(eMsg1("invalid parameter/return value: '%s'",s));
694 end = p; /* here is where we stop printing alnum */
696 while ( p!=end ) {putc(*p, f); p++;} /* dump until just before variable */
697 while ( *p!='\0' ) /* dump rest w/o variable */
699 if ( !isalnum(*p) && *p!='_' ) putc(*p, f);
704 /* check to see if string e is a word in string s */
707 strmember( char *s, char *e )
715 require(s!=NULL&&e!=NULL, "strmember: NULL string");
717 if ( *e=='\0' ) return 1; /* empty string is always member */
719 while ( *s!='\0' && !isalnum(*s) && *s!='_' )
722 while ( *p!='\0' && *p==*s ) {p++; s++;}
724 if ( *s=='\0' ) return 1;
725 if ( !isalnum (*s) && *s != '_' ) return 1;
727 while ( isalnum(*s) || *s == '_' )
729 } while ( *s!='\0' );
742 if ( *s++ == ',' ) return 1;
748 DumpRetValStruct( FILE *f, char *ret, int i )
750 DumpRetValStruct( f, ret, i )
756 fprintf(f, "\nstruct _rv%d {\n", i);
757 while ( *ret != '\0' )
759 while ( *ret==' ' || *ret=='\t' ) ret++; /* ignore white */
761 while ( *ret!=',' && *ret!='\0' ) {putc(*ret,f); ret++;}
762 if ( *ret == ',' ) {putc(';', f); putc('\n', f); ret++;}
764 fprintf(f, ";\n};\n");
767 /* given "s" yield s -- DESTRUCTIVE (we modify s if starts with " else return s) */
770 StripQuotes( char *s )
778 s[ strlen(s)-1 ] = '\0'; /* remove last quote */
779 return( s+1 ); /* return address past initial quote */