pd.if.org Git - lice/blob - lexer.h

   1 #ifndef LICE_LEXER_HDR
   2 #define LICE_LEXER_HDR
   3 /*
   4  * File: lexer.h
   5  *  Implements the interface for LICE's lexer
   6  */
   7 #include <stdbool.h>
   8
   9 /*
  10  * Type: lexer_token_type_t
  11  *  Type to describe a tokens type.
  12  *
  13  *  Remarks:
  14  *   Implemented as a typedef of an enumeration, lexer_token_t
  15  *   is used to describe the current lexer token. The following
  16  *   tokens exist (as constants).
  17  *
  18  *  Tokens:
  19  *    LEXER_TOKEN_IDENTIFIER        - Identifier
  20  *    LEXER_TOKEN_PUNCT             - Language punctuation
  21  *    LEXER_TOKEN_CHAR              - Character literal
  22  *    LEXER_TOKEN_STRING            - String literal
  23  *    LEXER_TOKEN_NUMBER            - Number (of any type)
  24  *    LEXER_TOKEN_EQUAL             - Equal
  25  *    LEXER_TOKEN_LEQUAL            - Lesser-or-equal
  26  *    LEXER_TOKEN_GEQUAL            - Greater-or-equal
  27  *    LEXER_TOKEN_NEQUAL            - Not-equal
  28  *    LEXER_TOKEN_INCREMENT         - Pre/post increment
  29  *    LEXER_TOKEN_DECREMENT         - Pre/post decrement
  30  *    LEXER_TOKEN_ARROW             - Pointer arrow `->`
  31  *    LEXER_TOKEN_LSHIFT            - Left shift
  32  *    LEXER_TOKEN_RSHIFT            - Right shift
  33  *    LEXER_TOKEN_COMPOUND_ADD      - Compound-assignment addition
  34  *    LEXER_TOKEN_COMPOUND_SUB      - Compound-assignment subtraction
  35  *    LEXER_TOKEN_COMPOUND_MUL      - Compound-assignment multiplication
  36  *    LEXER_TOKEN_COMPOUND_DIV      - Compound-assignment division
  37  *    LEXER_TOKEN_COMPOUND_MOD      - Compound-assignment moduluas
  38  *    LEXER_TOKEN_COMPOUND_OR       - Compound-assignment bit-or
  39  *    LEXER_TOKEN_COMPOUND_XOR      - Compound-assignment bit-xor
  40  *    LEXER_TOKEN_COMPOUND_LSHIFT   - Compound-assignment left-shift
  41  *    LEXER_TOKEN_COMPOUND_RSHIFT   - Compound-assignment right-shift
  42  *    LEXER_TOKEN_AND               - Logical and
  43  *    LEXER_TOKEN_OR                - Logical or
  44  */
  45 typedef enum {
  46     LEXER_TOKEN_IDENTIFIER,
  47     LEXER_TOKEN_PUNCT,
  48     LEXER_TOKEN_CHAR,
  49     LEXER_TOKEN_STRING,
  50     LEXER_TOKEN_NUMBER,
  51     LEXER_TOKEN_EQUAL,
  52     LEXER_TOKEN_LEQUAL,
  53     LEXER_TOKEN_GEQUAL,
  54     LEXER_TOKEN_NEQUAL,
  55     LEXER_TOKEN_INCREMENT,
  56     LEXER_TOKEN_DECREMENT,
  57     LEXER_TOKEN_ARROW,
  58     LEXER_TOKEN_LSHIFT,
  59     LEXER_TOKEN_RSHIFT,
  60     LEXER_TOKEN_COMPOUND_ADD,
  61     LEXER_TOKEN_COMPOUND_SUB,
  62     LEXER_TOKEN_COMPOUND_MUL,
  63     LEXER_TOKEN_COMPOUND_DIV,
  64     LEXER_TOKEN_COMPOUND_MOD,
  65     LEXER_TOKEN_COMPOUND_AND,
  66     LEXER_TOKEN_COMPOUND_OR,
  67     LEXER_TOKEN_COMPOUND_XOR,
  68     LEXER_TOKEN_COMPOUND_LSHIFT,
  69     LEXER_TOKEN_COMPOUND_RSHIFT,
  70     LEXER_TOKEN_AND,
  71     LEXER_TOKEN_OR
  72 } lexer_token_type_t;
  73
  74 /*
  75  * Class: lexer_token_t
  76  *  Describes a token in the token stream
  77  */
  78 typedef struct {
  79     /*
  80      * Variable: type
  81      *  The token type
  82      */
  83     lexer_token_type_t type;
  84
  85     union {
  86         long  integer;
  87         int   punct;
  88         char *string;
  89         char  character;
  90     };
  91 } lexer_token_t;
  92
  93 /*
  94  * Function: lexer_islong
  95  *  Checks for a given string if it's a long-integer-literal.
  96  *
  97  * Parameters:
  98  *  string  - The string to check
  99  *
 100  * Remarks:
 101  *  Returns `true` if the string is a long-literal,
 102  *  `false` otherwise.
 103  */
 104 bool lexer_islong(char *string);
 105
 106 /*
 107  * Function: lexer_isint
 108  *  Checks for a given string if it's a int-integer-literal.
 109  *
 110  * Parameters:
 111  *  string  - The string to check
 112  *
 113  * Remarks:
 114  *  Returns `true` if the string is a int-literal,
 115  * `false` otherwise.
 116  */
 117 bool lexer_isint (char *string);
 118
 119 /*
 120  * Function: lexer_isfloat
 121  *  Checks for a given string if it's a floating-point-literal.
 122  *
 123  * Parameters:
 124  *  string  - The string to check
 125  *
 126  * Remarks:
 127  *  Returns `true` if the string is floating-point-literal,
 128  * `false` otherwise.
 129  */
 130 bool lexer_isfloat(char *string);
 131
 132 /*
 133  * Function: lexer_ispunct
 134  *  Checks if a given token is language punctuation and matches.
 135  *
 136  * Parameters:
 137  *  token   - The token to test
 138  *  c       - The punction to test if matches
 139  *
 140  * Remarks:
 141  *  Returns `true` if the given token is language punctuation and
 142  *  matches *c*.
 143  */
 144 bool lexer_ispunct(lexer_token_t *token, int c);
 145
 146 /*
 147  * Function: lexer_unget
 148  *  Undo the given token in the token stream.
 149  *
 150  * Parameters:
 151  *  token   - The token to unget
 152  */
 153 void lexer_unget(lexer_token_t *token);
 154
 155 /*
 156  * Function: lexer_next
 157  *  Get the next token in the token stream.
 158  *
 159  * Returns:
 160  *  The next token in the token stream or NULL
 161  *  on failure or EOF.
 162  */
 163 lexer_token_t *lexer_next(void);
 164
 165 /*
 166  * Function: lexer_peek
 167  *  Look at the next token without advancing the stream.
 168  *
 169  * Returns:
 170  *  The next token without advancing the token stream or NULL on failure
 171  *  or EOF.
 172  *
 173  * Remarks:
 174  *  The function will peek ahead to see the next token in the stream
 175  *  without advancing the lexer state.
 176  */
 177 lexer_token_t *lexer_peek(void);
 178
 179 /*
 180  * Function: lexer_token_string
 181  *  Convert a token to a human-readable representation
 182  *
 183  * Parameters:
 184  *  token   - The token to convert
 185  *
 186  * Returns:
 187  *  A string representation of the token or NULL on failure.
 188  */
 189 char *lexer_token_string(lexer_token_t *token);
 190
 191 /*
 192  * Function: lexer_marker
 193  *  Get the line marker of where the lexer currently is.
 194  *
 195  * Remarks:
 196  *  Currently returns file.c:line, will later be extended to also include
 197  *  column marker. This is used in error reporting.
 198  */
 199 char *lexer_marker(void);
 200
 201 #endif