/* * lex.c -- Generate all of the lexical type files: parser.dlg tokens.h * * $Id: lex.c,v 1.4 95/09/26 12:58:36 parrt Exp $ * $Revision: 1.4 $ * * SOFTWARE RIGHTS * * We reserve no LEGAL rights to the Purdue Compiler Construction Tool * Set (PCCTS) -- PCCTS is in the public domain. An individual or * company may do whatever they wish with source code distributed with * PCCTS or the code generated by PCCTS, including the incorporation of * PCCTS, or its output, into commerical software. * * We encourage users to develop software with PCCTS. However, we do ask * that credit is given to us for developing PCCTS. By "credit", * we mean that if you incorporate our source code into one of your * programs (commercial product, research project, or otherwise) that you * acknowledge this fact somewhere in the documentation, research report, * etc... If you like PCCTS and have developed a nice tool with the * output, please mention that you developed it using PCCTS. In * addition, we ask that this header remain intact in our source code. * As long as these guidelines are kept, we expect to continue enhancing * this system and expect to make other tools available as they are * completed. * * ANTLR 1.33 * Terence Parr * Parr Research Corporation * with Purdue University and AHPCRC, University of Minnesota * 1989-1995 */ #include #include #ifdef __cplusplus #ifndef __STDC__ #define __STDC__ #endif #endif #include "set.h" #include "syn.h" #include "hash.h" #include "generic.h" #define DLGErrorString "invalid token" /* Generate a complete lexical description of the lexemes found in the grammar */ void #ifdef __STDC__ genLexDescr( void ) #else genLexDescr( ) #endif { ListNode *p; FILE *dlgFile = fopen(OutMetaName(DlgFileName), "w"); require(dlgFile!=NULL, eMsg1("genLexFile: cannot open %s", OutMetaName(DlgFileName)) ); special_fopen_actions(OutMetaName(DlgFileName)); fprintf(dlgFile, "<<\n"); fprintf(dlgFile, "/* %s -- DLG Description of scanner\n", DlgFileName); fprintf(dlgFile, " *\n"); fprintf(dlgFile, " * Generated from:"); {int i; for (i=0; i\n"); fprintf(dlgFile, "#define ANTLR_VERSION %s\n", VersionDef); if ( strcmp(ParserName, DefaultParserName)!=0 ) fprintf(dlgFile, "#define %s %s\n", DefaultParserName, ParserName); if ( strcmp(ParserName, DefaultParserName)!=0 ) fprintf(dlgFile, "#include \"%s\"\n", RemapFileName); if ( HdrAction != NULL ) dumpAction( HdrAction, dlgFile, 0, -1, 0, 1 ); if ( FoundGuessBlk ) { fprintf(dlgFile, "#define ZZCAN_GUESS\n"); fprintf(dlgFile, "#include \n"); } if ( OutputLL_k > 1 ) fprintf(dlgFile, "#define LL_K %d\n", OutputLL_k); if ( DemandLookahead ) fprintf(dlgFile, "#define DEMAND_LOOK\n"); fprintf(dlgFile, "#include \"antlr.h\"\n"); if ( GenAST ) { fprintf(dlgFile, "#include \"ast.h\"\n"); } if ( UserDefdTokens ) fprintf(dlgFile, "#include %s\n", UserTokenDefsFile); /* still need this one as it has the func prototypes */ fprintf(dlgFile, "#include \"%s\"\n", DefFileName); fprintf(dlgFile, "#include \"dlgdef.h\"\n"); fprintf(dlgFile, "LOOKAHEAD\n"); fprintf(dlgFile, "void zzerraction()\n"); fprintf(dlgFile, "{\n"); fprintf(dlgFile, "\t(*zzerr)(\"%s\");\n", DLGErrorString); fprintf(dlgFile, "\tzzadvance();\n"); fprintf(dlgFile, "\tzzskip();\n"); fprintf(dlgFile, "}\n"); } fprintf(dlgFile, ">>\n\n"); /* dump all actions */ if (LexActions != NULL) { for (p = LexActions->next; p!=NULL; p=p->next) { fprintf(dlgFile, "<<\n"); dumpAction( (char *)p->elem, dlgFile, 0, -1, 0, 1 ); fprintf(dlgFile, ">>\n"); } } /* dump all regular expression rules/actions (skip sentinel node) */ if ( ExprOrder == NULL ) { warnNoFL("no regular expressions found in grammar"); } else dumpLexClasses(dlgFile); fprintf(dlgFile, "%%%%\n"); fclose( dlgFile ); } /* For each lexical class, scan ExprOrder looking for expressions * in that lexical class. Print out only those that match. * Each element of the ExprOrder list has both an expr and an lclass * field. */ void #ifdef __STDC__ dumpLexClasses( FILE *dlgFile ) #else dumpLexClasses( dlgFile ) FILE *dlgFile; #endif { int i; TermEntry *t; ListNode *p; Expr *q; for (i=0; inext; p!=NULL; p=p->next) { q = (Expr *) p->elem; if ( q->lclass != i ) continue; lexmode(i); t = (TermEntry *) hash_get(Texpr, q->expr); require(t!=NULL, eMsg1("genLexDescr: rexpr %s not in hash table",q->expr) ); if ( t->token == EpToken ) continue; fprintf(dlgFile, "%s\n\t<<\n", StripQuotes(q->expr)); /* replace " killed by StripQuotes() */ q->expr[ strlen(q->expr) ] = '"'; if ( !GenCC ) { if ( TokenString(t->token) != NULL ) fprintf(dlgFile, "\t\tNLA = %s;\n", TokenString(t->token)); else fprintf(dlgFile, "\t\tNLA = %d;\n", t->token); } if ( t->action != NULL ) dumpAction( t->action, dlgFile, 2,-1,0,1 ); if ( GenCC ) { if ( TokenString(t->token) != NULL ) fprintf(dlgFile, "\t\treturn %s;\n", TokenString(t->token)); else fprintf(dlgFile, "\t\treturn (ANTLRTokenType)%d;\n", t->token); } fprintf(dlgFile, "\t>>\n\n"); } } } /* Strip the leading path (if any) from a filename */ char * #ifdef __STDC__ StripPath( char *fileName ) #else StripPath( fileName ) char *fileName; #endif { char *p; static char dirSym[2] = DirectorySymbol; if(NULL != (p = strrchr(fileName, dirSym[0]))) p++; else p = fileName; return(p); } /* Generate a list of #defines && list of struct definitions for * aggregate retv's */ void #ifdef __STDC__ genDefFile( void ) #else genDefFile( ) #endif { int i; /* If C++ mode and #tokdef used, then don't need anything in here since * C++ puts all definitions in the class file name. */ if ( GenCC && UserTokenDefsFile ) return; DefFile = fopen(OutMetaName(DefFileName), "w"); require(DefFile!=NULL, eMsg1("genDefFile: cannot open %s", OutMetaName(DefFileName)) ); special_fopen_actions(OutMetaName(DefFileName)); fprintf(DefFile, "#ifndef %s\n", StripPath(gate_symbol(DefFileName))); fprintf(DefFile, "#define %s\n", StripPath(gate_symbol(DefFileName))); fprintf(DefFile, "/* %s -- List of labelled tokens and stuff\n", DefFileName); fprintf(DefFile, " *\n"); fprintf(DefFile, " * Generated from:"); for (i=0; i1 ) { int j; /* look in all lexclasses for the reg expr */ for (j=0; j=NumLexClasses ) { warnNoFL(eMsg1("token label has no associated rexpr: %s",TokenString(i))); } } require((p=(TermEntry *)hash_get(Tname, TokenString(i))) != NULL, "token not in sym tab when it should be"); if ( !p->classname ) { if ( GenCC ) { if ( !first ) fprintf(DefFile, ",\n"); first = 0; fprintf(DefFile, "\t%s=%d", TokenString(i), i); } else fprintf(DefFile, "#define %s %d\n", TokenString(i), i); } } } if ( GenCC ) fprintf(DefFile, "};\n"); } if ( !GenCC ) GenRulePrototypes(DefFile, SynDiag); fprintf(DefFile, "\n#endif\n"); } void #ifdef __STDC__ GenRemapFile( void ) #else GenRemapFile( ) #endif { if ( strcmp(ParserName, DefaultParserName)!=0 ) { FILE *f; int i; f = fopen(OutMetaName(RemapFileName), "w"); require(f!=NULL, eMsg1("GenRemapFile: cannot open %s", OutMetaName(RemapFileName)) ); special_fopen_actions(OutMetaName(RemapFileName)); fprintf(f, "/* %s -- List of symbols to remap\n", RemapFileName); fprintf(f, " *\n"); fprintf(f, " * Generated from:"); for (i=0; irname, ParserName, p->rname); p = (Junction *)p->p2; } } /* Generate a bunch of #defines that rename all standard symbols to be * "ParserName_symbol". The list of standard symbols to change is in * globals.c. */ void #ifdef __STDC__ GenPredefinedSymbolRedefs( FILE *f ) #else GenPredefinedSymbolRedefs( f ) FILE *f; #endif { char **p; fprintf(f, "\n/* rename PCCTS-supplied symbols to be 'ParserName_symbol' */\n"); for (p = &StandardSymbols[0]; *p!=NULL; p++) { fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p); } } /* Generate a bunch of #defines that rename all AST symbols to be * "ParserName_symbol". The list of AST symbols to change is in * globals.c. */ void #ifdef __STDC__ GenASTSymbolRedefs( FILE *f ) #else GenASTSymbolRedefs( f ) FILE *f; #endif { char **p; fprintf(f, "\n/* rename PCCTS-supplied AST symbols to be 'ParserName_symbol' */\n"); for (p = &ASTSymbols[0]; *p!=NULL; p++) { fprintf(f, "#define %s %s_%s\n", *p, ParserName, *p); } } /* redefine all sets generated by ANTLR; WARNING: 'zzerr', 'setwd' must match * use in bits.c (DumpSetWd() etc...) */ void #ifdef __STDC__ GenSetRedefs( FILE *f ) #else GenSetRedefs( f ) FILE *f; #endif { int i; for (i=1; i<=wordnum; i++) { fprintf(f, "#define setwd%d %s_setwd%d\n", i, ParserName, i); } for (i=1; i<=esetnum; i++) { fprintf(f, "#define zzerr%d %s_err%d\n", i, ParserName, i); } } /* Find all return types/parameters that require structs and def * all rules with ret types. */ void #ifdef __STDC__ GenRulePrototypes( FILE *f, Junction *p ) #else GenRulePrototypes( f, p ) FILE *f; Junction *p; #endif { int i; i = 1; while ( p!=NULL ) { if ( p->ret != NULL ) { if ( HasComma(p->ret) ) { DumpRetValStruct(f, p->ret, i); } fprintf(f, "\n#ifdef __STDC__\n"); if ( HasComma(p->ret) ) { fprintf(f, "extern struct _rv%d", i); } else { fprintf(f, "extern "); DumpType(p->ret, f); } fprintf(f, " %s%s(", RulePrefix, p->rname); DumpANSIFunctionArgDef(f,p); fprintf(f, ";\n"); #ifdef OLD if ( p->pdecl != NULL || GenAST ) { if ( GenAST ) { fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":""); } if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); } else fprintf(f, "void"); fprintf(f, ");\n"); #endif fprintf(f, "#else\n"); if ( HasComma(p->ret) ) { fprintf(f, "extern struct _rv%d", i); } else { fprintf(f, "extern "); DumpType(p->ret, f); } fprintf(f, " %s%s();\n", RulePrefix, p->rname); fprintf(f, "#endif\n"); } else { fprintf(f, "\n#ifdef __STDC__\n"); fprintf(f, "void %s%s(", RulePrefix, p->rname); DumpANSIFunctionArgDef(f,p); fprintf(f, ";\n"); #ifdef OLD if ( p->pdecl != NULL || GenAST ) { if ( GenAST ) { fprintf(f, "AST **%s",(p->pdecl!=NULL)?",":""); } if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); } else fprintf(f, "void"); fprintf(f, ");\n"); #endif fprintf(f, "#else\n"); fprintf(f, "extern void %s%s();\n", RulePrefix, p->rname); fprintf(f, "#endif\n"); } i++; p = (Junction *)p->p2; } } /* Define all rules in the class.h file; generate any required * struct definitions first, however. */ void #ifdef __STDC__ GenRuleMemberDeclarationsForCC( FILE *f, Junction *q ) #else GenRuleMemberDeclarationsForCC( f, q ) FILE *f; Junction *q; #endif { Junction *p = q; int i; fprintf(f, "private:\n"); /* Dump dflt handler declaration */ fprintf(f, "\tvoid zzdflthandlers( int _signal, int *_retsignal );\n\n"); fprintf(f, "public:\n"); /* Dump return value structs */ i = 1; while ( p!=NULL ) { if ( p->ret != NULL ) { if ( HasComma(p->ret) ) { DumpRetValStruct(f, p->ret, i); } } i++; p = (Junction *)p->p2; } /* Dump member func defs && CONSTRUCTOR */ fprintf(f, "\t%s(ANTLRTokenBuffer *input);\n", CurrentClassName); /* fprintf(f, "\t%s(ANTLRTokenBuffer *input, ANTLRTokenType eof);\n", CurrentClassName); */ i = 1; p = q; while ( p!=NULL ) { if ( p->ret != NULL ) { if ( HasComma(p->ret) ) { fprintf(f, "\tstruct _rv%d", i); } else { fprintf(f, "\t"); DumpType(p->ret, f); } fprintf(f, " %s(", p->rname); DumpANSIFunctionArgDef(f,p); fprintf(f, ";\n"); #ifdef OLD if ( p->pdecl != NULL || GenAST ) { if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":""); if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); } fprintf(f, ");\n"); #endif } else { fprintf(f, "\tvoid %s(", p->rname); DumpANSIFunctionArgDef(f,p); fprintf(f, ";\n"); #ifdef OLD if ( p->pdecl != NULL || GenAST ) { if ( GenAST ) fprintf(f, "ASTBase **%s",(p->pdecl!=NULL)?",":""); if ( p->pdecl!=NULL ) fprintf(f, "%s", p->pdecl); } fprintf(f, ");\n"); #endif } i++; p = (Junction *)p->p2; } } /* Given a list of ANSI-style parameter declarations, print out a * comma-separated list of the symbols (w/o types). * Basically, we look for a comma, then work backwards until start of * the symbol name. Then print it out until 1st non-alnum char. Now, * move on to next parameter. */ void #ifdef __STDC__ DumpListOfParmNames( char *pdecl, FILE *output ) #else DumpListOfParmNames( pdecl, output ) char *pdecl; FILE *output; #endif { int firstTime = 1, done = 0; require(output!=NULL, "DumpListOfParmNames: NULL parm"); if ( pdecl == NULL ) return; while ( !done ) { if ( !firstTime ) putc(',', output); done = DumpNextNameInDef(&pdecl, output); firstTime = 0; } } /* given a list of parameters or return values, dump the next * name to output. Return 1 if last one just printed, 0 if more to go. */ int #ifdef __STDC__ DumpNextNameInDef( char **q, FILE *output ) #else DumpNextNameInDef( q, output ) char **q; FILE *output; #endif { char *p = *q; /* where did we leave off? */ int done=0; while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */ if ( *p == '\0' ) done = 1; while ( !isalnum(*p) && *p!='_' ) --p; /* scan back until valid var character */ while ( isalnum(*p) || *p=='_' ) --p; /* scan back until beginning of variable */ p++; /* move to start of variable */ while ( isalnum(*p) || *p=='_' ) {putc(*p, output); p++;} while ( *p!='\0' && *p!=',' ) p++; /* find end of decl */ p++; /* move past this parameter */ *q = p; /* record where we left off */ return done; } /* Given a list of ANSI-style parameter declarations, dump K&R-style * declarations, one per line for each parameter. Basically, convert * comma to semi-colon, newline. */ void #ifdef __STDC__ DumpOldStyleParms( char *pdecl, FILE *output ) #else DumpOldStyleParms( pdecl, output ) char *pdecl; FILE *output; #endif { require(output!=NULL, "DumpOldStyleParms: NULL parm"); if ( pdecl == NULL ) return; while ( *pdecl != '\0' ) { if ( *pdecl == ',' ) { pdecl++; putc(';', output); putc('\n', output); while ( *pdecl==' ' || *pdecl=='\t' || *pdecl=='\n' ) pdecl++; } else {putc(*pdecl, output); pdecl++;} } putc(';', output); putc('\n', output); } /* Take in a type definition (type + symbol) and print out type only */ void #ifdef __STDC__ DumpType( char *s, FILE *f ) #else DumpType( s, f ) char *s; FILE *f; #endif { char *p, *end; require(s!=NULL, "DumpType: invalid type string"); p = &s[strlen(s)-1]; /* start at end of string and work back */ /* scan back until valid variable character */ while ( !isalnum(*p) && *p!='_' ) --p; /* scan back until beginning of variable */ while ( isalnum(*p) || *p=='_' ) --p; if ( p<=s ) { warnNoFL(eMsg1("invalid parameter/return value: '%s'",s)); return; } end = p; /* here is where we stop printing alnum */ p = s; while ( p!=end ) {putc(*p, f); p++;} /* dump until just before variable */ while ( *p!='\0' ) /* dump rest w/o variable */ { if ( !isalnum(*p) && *p!='_' ) putc(*p, f); p++; } } /* check to see if string e is a word in string s */ int #ifdef __STDC__ strmember( char *s, char *e ) #else strmember( s, e ) char *s; char *e; #endif { register char *p; require(s!=NULL&&e!=NULL, "strmember: NULL string"); if ( *e=='\0' ) return 1; /* empty string is always member */ do { while ( *s!='\0' && !isalnum(*s) && *s!='_' ) ++s; p = e; while ( *p!='\0' && *p==*s ) {p++; s++;} if ( *p=='\0' ) { if ( *s=='\0' ) return 1; if ( !isalnum (*s) && *s != '_' ) return 1; } while ( isalnum(*s) || *s == '_' ) ++s; } while ( *s!='\0' ); return 0; } int #ifdef __STDC__ HasComma( char *s ) #else HasComma( s ) char *s; #endif { while (*s!='\0') if ( *s++ == ',' ) return 1; return 0; } void #ifdef __STDC__ DumpRetValStruct( FILE *f, char *ret, int i ) #else DumpRetValStruct( f, ret, i ) FILE *f; char *ret; int i; #endif { fprintf(f, "\nstruct _rv%d {\n", i); while ( *ret != '\0' ) { while ( *ret==' ' || *ret=='\t' ) ret++; /* ignore white */ putc('\t', f); while ( *ret!=',' && *ret!='\0' ) {putc(*ret,f); ret++;} if ( *ret == ',' ) {putc(';', f); putc('\n', f); ret++;} } fprintf(f, ";\n};\n"); } /* given "s" yield s -- DESTRUCTIVE (we modify s if starts with " else return s) */ char * #ifdef __STDC__ StripQuotes( char *s ) #else StripQuotes( s ) char *s; #endif { if ( *s == '"' ) { s[ strlen(s)-1 ] = '\0'; /* remove last quote */ return( s+1 ); /* return address past initial quote */ } return( s ); }