#include <stdio.h>
#include "gram.h"

/* START HEADER */
struct token {
	int type;
	int rtype; /* a "real type", e.g. In, NAME, etc. */
	int length;
	int delimiter;
	int nondigits;
	int nonnames;
	struct token *next;
	char text[2048];
};

struct token_state {
	FILE *input;
	struct token *tok;

	char *strinput;
	int is_string;
	int strlen;
	int cursor;

	int pc; /* previous char */
	int cc; /* current char */
	int nc; /* next char */
	int push_back; /* true if next char is valid */

	int prev_op; /* previous character was used as part of an operator */
	int prev_word; /* previous character is part of a word */
	int quoting; /* type of quoting */

	char buf[2048];
	int buflen;
};

struct token_state *ts_init(struct token_state *ts, FILE *f);
int get_token(struct token_state *ts, struct token *tok);

/* END HEADER */

#if DEBUG_TOKEN
static void ptoken(struct token_state *ts) {
	fprintf(stderr, "token %d = %s\n", ts->tok->type, ts->tok->text);
}
#endif

static int nextch(struct token_state *ts);
static int peek(struct token_state *ts);
static int nextchar(struct token_state *ts); 
static int process_char(struct token_state *ts);

static int peek(struct token_state *ts) {
	if (!ts->push_back) {
		ts->nc = nextchar(ts);
		ts->push_back = 1;
	}
	return ts->nc;
}

static int nextch(struct token_state *ts) {
	if (!ts->push_back) {
		ts->nc = nextchar(ts);
	}
	ts->pc = ts->cc;
	ts->cc = ts->nc;
	ts->push_back = 0;
	return ts->cc;
}

#if 0
static int pushback(struct token_state *ts) {
	/* TODO error if already pushed back? */
	ts->nc = ts->cc;
	ts->cc = ts->pc;
	return ts->cc;
}
#endif

static int add_to_token(struct token_state *ts, int ch) {
	ts->tok->text[ts->tok->length++] = (char)ch;
#if DEBUG_TOKEN
	ptoken(ts);
#endif
	return ts->tok->length;
}

static int start_token(struct token_state *ts, int type, int ch) {
	int i;
	for (i=0;i<sizeof ts->tok->text; i++) {
		ts->tok->text[i] = 0;
	}
	ts->tok->type = type;
	ts->tok->text[0] = (char)ch;
	ts->tok->length = 1;
#if DEBUG_TOKEN
	fprintf(stderr, "starting ");
	ptoken(ts);
#endif
	return 1;
}

struct token_state *ts_init(struct token_state *ts, FILE *f) {
	if (!ts) return NULL;
	/* TODO malloc one */

	ts->is_string = 0;
	ts->push_back = 0;
	ts->prev_op = 0;
	ts->prev_word = 0;
	ts->quoting = 0;

	ts->input = f;
	ts->buf[0] = 0;
	return ts;
}

int get_token(struct token_state *ts, struct token *tok) {
	int i;
	ts->tok = tok;
	tok->type = 0;
	tok->length = 0;
	ts->prev_op = 0;
	ts->prev_word = 0;
	ts->quoting = 0;
	for (i=0;i<sizeof tok->text; i++) {
		tok->text[i] = 0;
	}

	while (!process_char(ts)) {
	}
	return tok->type;
}

/* true if the character could be used with the given token to make
 * an operator
 */
static int can_op(int type, int ch) {
	/* TODO */
	switch (type) {
		case TOKEN_Ampersand:
			if (ch == '&') return TOKEN_AND_IF; break;
		case TOKEN_Pipe:
			if (ch == '|') return TOKEN_OR_IF; break;
		case TOKEN_Semicolon:
			if (ch == ';') return TOKEN_DSEMI; break;
		case TOKEN_DLESS:
			if (ch == '-') return TOKEN_DLESSDASH; break;
		case TOKEN_Lessthan:
			switch (ch) {
				case '<': return TOKEN_DLESS; break;
				case '&': return TOKEN_LESSAND; break;
				case '>': return TOKEN_LESSGREAT; break;
				default: break;
			}
			break;
		case TOKEN_Greaterthan:
			switch (ch) {
				case '>': return TOKEN_DGREAT; break;
				case '&': return TOKEN_GREATAND; break;
				case '|': return TOKEN_CLOBBER; break;
				default: break;
			}
			break;
		default:
			break;
	}
	return 0;
}

static int valid_token(struct token_state *ts) {
	return ts->tok && ts->tok->length;
}

static int delimit(struct token_state *ts, int ch) {
	if (valid_token(ts)) {
		ts->tok->delimiter = ch;
		if (ts->tok->length == 1 && ts->tok->text[0] == '\n') {
			ts->tok->type = TOKEN_NEWLINE;
		} else if (!ts->tok->nondigits && (ts->tok->delimiter == '>' || ts->tok->delimiter == '<')) {
			ts->tok->type = TOKEN_IO_NUMBER;
		}
#if DEBUG_TOKEN
	fprintf(stderr, "delimited ");
	ptoken(ts);
#endif
		return 1;
	}

	return 0;
}

/* return true if delimited */
static int process_char(struct token_state *ts) {
	int cc;

	cc = peek(ts);
#if 1
	fprintf(stderr, "looking at a '%c'\n", cc);
#endif

/*
 * If the end of input is recognized, the current token shall be delimited. If
 * there is no current token, the end-of-input indicator shall be returned as
 * the token.
 */
	if (cc == EOF) {
		if (delimit(ts,cc)) {
			return 1;
		}
		fprintf(stderr, "EOF Token\n");
		ts->tok->type = EOF;
		return 1;
	}

/*
 * If the previous character was used as part of an operator and the current
 * character is not quoted and can be used with the current characters to form
 * an operator, it shall be used as part of that (operator) token.
 */
	if (ts->prev_op && !ts->quoting) {
		int newop = can_op(ts->tok->type, cc);
		if (newop) {
			ts->tok->type = newop;
			add_to_token(ts,cc);
			nextch(ts);
			return 0;
		}
	}

/*
 * If the previous character was used as part of an operator and the current
 * character cannot be used with the current characters to form an
 * operator, the operator containing the previous character shall be
 * delimited.
 */
	if (ts->prev_op && ! can_op(ts->tok->type, cc)) {
		if (delimit(ts,cc)) return 1;
		/* TODO error here, should be impossible */
	}

	/* match quote */
/*
 * If the current character is <backslash>, single-quote, or double-quote and
 * it is not quoted, it shall affect quoting for subsequent characters up to
 * the end of the quoted text. The rules for quoting are as described in
 * Quoting . During token recognition no substitutions shall be actually
 * performed, and the result token shall contain exactly the characters that
 * appear in the input (except for <newline> joining), unmodified, including
 * any embedded or enclosing quotes or substitution operators, between the
 * <quotation-mark> and the end of the quoted text. The token shall not be
 * delimited by the end of the quoted field.
 */
	if (!ts->quoting) {
		if (cc == '\\' || cc == '\'' || cc == '"') {
			ts->quoting = cc;
		}
	}

	/* match expansion */
/*
 * If the current character is an unquoted '$' or '`', the shell shall identify
 * the start of any candidates for parameter expansion (Parameter Expansion),
 * command substitution (Command Substitution), or arithmetic expansion
 * (Arithmetic Expansion) from their introductory unquoted character sequences:
 * '$' or "${", "$(" or '`', and "$((", respectively. The shell shall read
 * sufficient input to determine the end of the unit to be expanded (as
 * explained in the cited sections). While processing the characters, if
 * instances of expansions or quoting are found nested within the substitution,
 * the shell shall recursively process them in the manner specified for the
 * construct that is found. The characters found from the beginning of the
 * substitution to its end, allowing for any recursion necessary to recognize
 * embedded constructs, shall be included unmodified in the result token,
 * including any embedded or enclosing substitution operators or quotes.  The
 * token shall not be delimited by the end of the substitution.
 */
#if 0
	if (!ts->quoting) {
		if (cc == '$' || cc == '`') {
			int nc;
			nc = peek(ts);
			switch (nc) {
				case '{':
					ts->expanding = EXP_BRACKET; break;
				case '(':
					ts->expanding = EXP_COMMAND; break;
				default:
					ts->expanding = EXP_WORDVAR; break;
			}
			do_expansion(ts, cc);
		} else if (cc == '`') {
			ts->expanding = EXP_COMMAND;
		}
	}
#endif

/*
 * If the current character is not quoted and can be used as the first
 * character of a new operator, the current token (if any) shall be delimited.
 * The current character shall be used as the beginning of the next (operator)
 * token.
 */

/*
 * If the current character is an unquoted <newline>, the current token shall
 * be delimited.
 */

/*
 * If the current character is an unquoted <blank>, any token containing the
 * previous character is delimited and the current character shall be
 * discarded.
 */

/* 
 * The application shall quote the following characters if they are to
 * represent themselves:
 *
 * |  &  ;  <  >  (  )  $  `  \  "  '  <space>  <tab>  <newline>
 */


	if (! ts->quoting) {
		switch (cc) {
			case '&':
				if (delimit(ts,cc)) return 1;
				nextch(ts);
				ts->prev_op = 1;
				start_token(ts, TOKEN_Ampersand, cc);
			       	return 0; break;
			case '|':
				if (delimit(ts,cc)) return 1;
				nextch(ts);
				ts->prev_op = 1;
				start_token(ts, TOKEN_Pipe,cc);
			       	return 0; break;
			case ';':
				if (delimit(ts,cc)) return 1;
				nextch(ts);
				ts->prev_op = 1;
				start_token(ts, TOKEN_Semicolon,cc);
			       	return 0; break;
			case '<':
				if (delimit(ts,cc)) return 1;
				nextch(ts);
				ts->prev_op = 1;
				start_token(ts, TOKEN_Lessthan,cc);
			       	return 0; break;
			case '>':
				if (delimit(ts,cc)) return 1;
				nextch(ts);
				ts->prev_op = 1;
				start_token(ts, TOKEN_Greaterthan,cc);
			       	return 0; break;
			case '\n':
				if (delimit(ts,cc)) return 1;
				fprintf(stderr, "newline\n");
				nextch(ts);
				start_token(ts, TOKEN_NEWLINE,cc);
				if (delimit(ts,cc)) return 1;
				break;
			case '(':
				if (delimit(ts,cc)) return 1;
				nextch(ts);
				start_token(ts, TOKEN_Lparen,cc);
				if (delimit(ts,cc)) return 1;
				break;
			case ')':
				if (delimit(ts,cc)) return 1;
				nextch(ts);
				start_token(ts, TOKEN_Rparen,cc);
				if (delimit(ts,cc)) return 1;
				break;
			case ' ':
			case '\t':
				if (delimit(ts,cc)) return 1;
			       	nextch(ts);
			       	return 0; break;
			default:
				break;
		}
	}

/*
 * If the previous character was part of a word, the current character shall
 * be appended to that word.
 */
	if (ts->prev_word) {
		add_to_token(ts, cc);
		nextch(ts);
		return 0;
	}

/*
 * If the current character is a '#', it and all subsequent characters up
 * to, but excluding, the next <newline> shall be discarded as a comment.
 * The <newline> that ends the line is not considered part of the comment.
 */
	if (cc == '#') {
		cc = nextch(ts);
		cc = peek(ts);
		while (cc != '\n' && cc != EOF) {
			cc = nextch(ts);
			cc = peek(ts);
		}
		return 0;
	}

/*
 * The current character is used as the start of a new word.
 */
	ts->prev_word = 1;
	start_token(ts, TOKEN_WORD, cc);
	nextch(ts);
	return 0;
}

static int nextchar(struct token_state *ts) {
	if (ts->is_string) {
		if (ts->cursor >= ts->strlen) {
			return EOF;
		}
		return (int)(unsigned char)ts->strinput[ts->cursor++];
	}

	return fgetc(ts->input);
}