#include "lexeme-parser.h"

#define _CRT_SECURE_NO_WARNINGS
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "mem-buffer.h"

// -------------------------------------------------------------------------------------------- //
// * Implementation *
// -------------------------------------------------------------------------------------------- //

scm::LexemeParser::LexemeParser()
{
}

scm::LexemeParser::~LexemeParser()
{
}
// -------------------------------------------------------------------------------------------- //

// * run * //
// -------------------------------------------------------------------------------------------- //
bool scm::LexemeParser::run(const char* filename, LexemeList& list)
{
	// --- resetting list
	list.reset();
	// ----------------------------------------------------- //

	// --- opening file
	FILE *ptr = fopen(filename, "rt");
	if (ptr == NULL) {
		printf(" LEX-PARSE:> failed to open file: '%s'\n", filename);
		return false;
	}
	// ----------------------------------------------------- //

	// --- setting token buffer
	mem_buffer< char > token_buf;
	token_buf.reset();
	// ----------------------------------------------------- //
	
	int nline = 1, fsym;
	STATE state = IS_NULL;
	bool hold_sym = false;
	bool status = true;
	// ----------------------------------------------------- //

	// --- reading file
	while (!feof(ptr))
	{
		// --- reading char from file
		if (!hold_sym) fsym = fgetc(ptr);
		else
			hold_sym = false;

		// --- processing null state
		if (state == IS_NULL) 
		{
			// --- EOF
			if (fsym == EOF) continue;
			// --- newline
			if (fsym == '\n') {
				nline++;
				continue;
			}
			// --- spaces
			if (isspace(fsym)) continue;
			// --- comment line symbol
			if (fsym == '#') {
				state = IS_COMMENT_LINE;
				continue;
			}
			// --- string
			if (fsym == '"') {
				state = IS_STRING;
				token_buf.append(fsym);
				continue;
			}
			// --- '='
			if (fsym == '=') {
				state = IS_ASSIGNMENT;
				token_buf.append(fsym);
				continue;
			}
			// --- '!'
			if (fsym == '!') {
				state = IS_NOT_EQUAL;
				token_buf.append(fsym);
				continue;
			}
			// --- '&'
			if (fsym == '&') {
				state = IS_LOGICAL_AND;
				token_buf.append(fsym);
				continue;
			}
			// --- '|'
			if (fsym == '|') {
				state = IS_LOGICAL_OR;
				token_buf.append(fsym);
				continue;
			}
			// --- '<'
			if (fsym == '<') {
				state = IS_LESS_THAN;
				token_buf.append(fsym);
				continue;
			}
			// --- '>'
			if (fsym == '>') {
				state = IS_GREATER_THAN;
				token_buf.append(fsym);
				continue;
			}
			// --- '{','}'
			if (fsym == '{') {
				list.add("{", Lexeme::IS_BRACE_OPEN, nline);
				continue;
			}
			if (fsym == '}') {
				list.add("}", Lexeme::IS_BRACE_CLOSE, nline);
				continue;
			}
			// --- '(', ')'
			if (fsym == '(') {
				list.add("(", Lexeme::IS_PAREN_OPEN, nline);
				continue;
			}
			if (fsym == ')') {
				list.add(")", Lexeme::IS_PAREN_CLOSE, nline);
				continue;
			}
			// --- '[', ']'
			if (fsym == '[') {
				list.add("[", Lexeme::IS_BRACKET_OPEN, nline);
				continue;
			}
			if (fsym == ']') {
				list.add("]", Lexeme::IS_BRACKET_CLOSE, nline);
				continue;
			}
			// --- ';'
			if (fsym == ';') {
				list.add(";", Lexeme::IS_SEMICOLON, nline);
				continue;
			}
			// --- ','
			if (fsym == ',') {
				list.add(",", Lexeme::IS_COMMA, nline);
				continue;
			}
			// --- '+'
			if (fsym == '+') {
				// --- checking for unary '+'
				if (list.get_size() == 0) list.add("+", Lexeme::IS_OP_PLUS, nline);
				else
				{
					if ((list.is_value(list.get_size() - 1)) ||
						(list.get_type(list.get_size() - 1) == Lexeme::IS_NAME) ||
						(list.get_type(list.get_size() - 1) == Lexeme::IS_PAREN_CLOSE) ||
						(list.get_type(list.get_size() - 1) == Lexeme::IS_BRACKET_CLOSE))
					{
						list.add("+", Lexeme::IS_OP_ADD, nline);
					}
					else
						list.add("+", Lexeme::IS_OP_PLUS, nline);
				}
				continue;
			}
			// --- '-'
			if (fsym == '-') {
				// --- checking for unary '-'
				if (list.get_size() == 0) list.add("-", Lexeme::IS_OP_MINUS, nline);
				else
				{
					if ((list.is_value(list.get_size() - 1)) ||
						(list.get_type(list.get_size() - 1) == Lexeme::IS_NAME) ||
						(list.get_type(list.get_size() - 1) == Lexeme::IS_PAREN_CLOSE) ||
						(list.get_type(list.get_size() - 1) == Lexeme::IS_BRACKET_CLOSE))
					{
						list.add("-", Lexeme::IS_OP_SUB, nline);
					}
					else
						list.add("-", Lexeme::IS_OP_MINUS, nline);
				}
				continue;
			}
			// --- '*'
			if (fsym == '*') {
				list.add("*", Lexeme::IS_OP_MUL, nline);
				continue;
			}
			// --- '/'
			if (fsym == '/') {
				list.add("/", Lexeme::IS_OP_DIV, nline);
				continue;
			}
			// --- '%'
			if (fsym == '%') {
				list.add("%", Lexeme::IS_OP_MOD, nline);
				continue;
			}
			// --- '^'
			if (fsym == '^') {
				list.add("^", Lexeme::IS_OP_EXP, nline);
				continue;
			}
			// --- name
			if (isalpha(fsym) || fsym == '_')
			{
				state = IS_NAME;
				token_buf.append(fsym);
				continue;
			}
			// --- number
			if (isdigit(fsym))
			{
				state = IS_NUMBER;
				token_buf.append(fsym);
				continue;
			}

			// --- unknown symbol
			printf(" PARSE:> invalid symbol: '%c' (line, %i)\n", fsym, nline);
			status = false;
			break;
		}
		// ----------------------------------------------------- //

		// --- assignment || comparsion (==)
		if (state == IS_ASSIGNMENT)
		{
			if (fsym == '=') {
				// found '=='
				token_buf.append(fsym);
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_EQ, nline);
			}
			else {
				// found '='
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_ASSIGNMENT, nline);

				hold_sym = true;
			}

			token_buf.reset();

			state = IS_NULL;
			continue;
		}
		// ----------------------------------------------------- //

		// --- comparison (!=)
		if (state == IS_NOT_EQUAL)
		{
			if (fsym == '=') {
				// found '!='
				token_buf.append(fsym);
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_NEQ, nline);

				token_buf.reset();
				state = IS_NULL;
				continue;
			}

			// --- unknown symbol
			printf(" PARSE:> invalid symbol: '%c', expecting '=' (line, %i)\n", fsym, nline);
			status = false;
			break;
		}
		// ----------------------------------------------------- //

		// --- logical (&&)
		if (state == IS_LOGICAL_AND)
		{
			if (fsym == '&') {
				// found '&&'
				token_buf.append(fsym);
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_LOGICAL_AND, nline);

				token_buf.reset();
				state = IS_NULL;
				continue;
			}

			// --- unknown symbol
			printf(" PARSE:> invalid symbol: '%c', expecting '&' (line, %i)\n", fsym, nline);
			status = false;
			break;
		}
		// ----------------------------------------------------- //

		// --- logical (||)
		if (state == IS_LOGICAL_OR)
		{
			if (fsym == '|') {
				// found '||'
				token_buf.append(fsym);
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_LOGICAL_OR, nline);

				token_buf.reset();
				state = IS_NULL;
				continue;
			}

			// --- unknown symbol
			printf(" PARSE:> invalid symbol: '%c', expecting '|' (line, %i)\n", fsym, nline);
			status = false;
			break;
		}
		// ----------------------------------------------------- //

		// --- comparison (<) || (<=)
		if (state == IS_LESS_THAN)
		{
			if (fsym == '=') {
				// found '<='
				token_buf.append(fsym);
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_LEQ, nline);
			}
			else {
				// found '<'
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_LT, nline);

				hold_sym = true;
			}

			token_buf.reset();

			state = IS_NULL;
			continue;
		}
		// ----------------------------------------------------- //

		// --- comparison (>) || (>=)
		if (state == IS_GREATER_THAN)
		{
			if (fsym == '=') {
				// found '>='
				token_buf.append(fsym);
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_GEQ, nline);
			}
			else {
				// found '>'
				token_buf.append('\0');
				list.add(token_buf.get_ptr(), Lexeme::IS_OP_GT, nline);

				hold_sym = true;
			}

			token_buf.reset();

			state = IS_NULL;
			continue;
		}
		// ----------------------------------------------------- //

		// --- processing comment line
		if (state == IS_COMMENT_LINE) 
		{
			// --- EOF
			if (fsym == EOF) {
				state = IS_NULL;
				continue;
			}
			// --- new line
			if (fsym == '\n') {
				nline++;
				state = IS_NULL;
				continue;
			}

			// --- skipping all symbols
			continue;
		}
		// ----------------------------------------------------- //

		// --- processing string
		if (state == IS_STRING) 
		{
			// --- EOF
			if (fsym == EOF) {
				// --- unknown symbol
				token_buf.append('\0');
				printf(" PARSE:> missing closing quotation mark (\") in string: '%s' (line, %i)\n", 
					token_buf.get_ptr(), nline);
				status = false;
				break;
			}
			// --- adding all symbols
			token_buf.append(fsym);
			// --- closing string
			if (fsym == '"')
			{
				token_buf.append('\0');

				list.add(token_buf.get_ptr(), Lexeme::IS_STRING, nline);
				token_buf.reset();
				state = IS_NULL;
			}
			continue;
		}
		// ----------------------------------------------------- //

		// --- processing name
		if (state == IS_NAME) 
		{
			// --- adding alpha, numerals, '_' and '.'
			if (isalnum(fsym) || (fsym == '_') || (fsym == '.')) {
				token_buf.append(fsym);
				continue;
			}
			// --- checking if last element is dot
			if (token_buf.get_value(token_buf.get_size() - 1) == '.') 
			{
				token_buf.append('\0');
				printf(" PARSE:> invalid name ending with '.': '%s' (line, %i)\n",
					token_buf.get_ptr(), nline);
				status = false;
				break;
			}
			// --- closing name
			token_buf.append('\0');

			if ((!strcmp(token_buf.get_ptr(), "true")) ||
				(!strcmp(token_buf.get_ptr(), "false")))
			{
				list.add(token_buf.get_ptr(), Lexeme::IS_BOOL, nline);
			}
			else if (!strcmp(token_buf.get_ptr(), "if"))
				list.add(token_buf.get_ptr(), Lexeme::IS_KEY_IF, nline);
			else if (!strcmp(token_buf.get_ptr(), "then"))
				list.add(token_buf.get_ptr(), Lexeme::IS_KEY_THEN, nline);
			else if (!strcmp(token_buf.get_ptr(), "else"))
				list.add(token_buf.get_ptr(), Lexeme::IS_KEY_ELSE, nline);
			else if (!strcmp(token_buf.get_ptr(), "endif"))
				list.add(token_buf.get_ptr(), Lexeme::IS_KEY_ENDIF, nline);
			else if (!strcmp(token_buf.get_ptr(), "while"))
				list.add(token_buf.get_ptr(), Lexeme::IS_KEY_WHILE, nline);
			else if (!strcmp(token_buf.get_ptr(), "do"))
				list.add(token_buf.get_ptr(), Lexeme::IS_KEY_DO, nline);
			else if (!strcmp(token_buf.get_ptr(), "enddo"))
				list.add(token_buf.get_ptr(), Lexeme::IS_KEY_ENDDO, nline);
			else if (!strcmp(token_buf.get_ptr(), "__sin"))
				list.add(token_buf.get_ptr(), Lexeme::IS_SIN_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__cos"))
				list.add(token_buf.get_ptr(), Lexeme::IS_COS_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__tan"))
				list.add(token_buf.get_ptr(), Lexeme::IS_TAN_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__log"))
				list.add(token_buf.get_ptr(), Lexeme::IS_LOG_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__sqrt"))
				list.add(token_buf.get_ptr(), Lexeme::IS_SQRT_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__min"))
				list.add(token_buf.get_ptr(), Lexeme::IS_MIN_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__max"))
				list.add(token_buf.get_ptr(), Lexeme::IS_MAX_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__vecmin"))
				list.add(token_buf.get_ptr(), Lexeme::IS_VECMIN_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__vecmax"))
				list.add(token_buf.get_ptr(), Lexeme::IS_VECMAX_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__abs"))
				list.add(token_buf.get_ptr(), Lexeme::IS_ABS_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__unirand"))
				list.add(token_buf.get_ptr(), Lexeme::IS_UNIRAND_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__dot"))
				list.add(token_buf.get_ptr(), Lexeme::IS_DOT_PRODUCT_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__l2norm"))
				list.add(token_buf.get_ptr(), Lexeme::IS_L2NORM_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__cnorm"))
				list.add(token_buf.get_ptr(), Lexeme::IS_CNORM_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__to_string"))
				list.add(token_buf.get_ptr(), Lexeme::IS_TO_STRING_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__size"))
				list.add(token_buf.get_ptr(), Lexeme::IS_SIZE_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__defined"))
				list.add(token_buf.get_ptr(), Lexeme::IS_DEFINED_FUNCTION, nline);
			else if (!strcmp(token_buf.get_ptr(), "__vector"))
				list.add(token_buf.get_ptr(), Lexeme::IS_VECTOR_CTOR, nline);
			else if (!strcmp(token_buf.get_ptr(), "__vector_const"))
				list.add(token_buf.get_ptr(), Lexeme::IS_VECTOR_CONST_CTOR, nline);
			else if (!strcmp(token_buf.get_ptr(), "__vector_unirand"))
				list.add(token_buf.get_ptr(), Lexeme::IS_VECTOR_UNIRAND_CTOR, nline);
			else
			{
				if (!strncmp(token_buf.get_ptr(), "__", 2)) {
					list.add(token_buf.get_ptr(), Lexeme::IS_COMMAND, nline);
				}
				else
					list.add(token_buf.get_ptr(), Lexeme::IS_NAME, nline);
			}

			token_buf.reset();
			state = IS_NULL;
			hold_sym = true;
			continue;
		}
		// ----------------------------------------------------- //

		// --- processing number
		if (state == IS_NUMBER) 
		{
			// --- adding digits
			if (isdigit(fsym)) {
				token_buf.append(fsym);
				continue;
			}
			// --- changing state to fractional part
			if (fsym == '.') {
				token_buf.append(fsym);
				state = IS_FRAC_NUMBER;
				continue;
			}
			// --- changing state to exponent
			if ((fsym == 'e') || (fsym == 'E')) {
				token_buf.append(fsym);
				state = IS_EXP_NUMBER;
				continue;
			}

			// --- closing integer
			token_buf.append('\0');
			list.add(token_buf.get_ptr(), Lexeme::IS_INTEGER, nline);
			
			token_buf.reset();
			state = IS_NULL;
			hold_sym = true;
			continue;
		}
		// ----------------------------------------------------- //

		// --- processing fractional part
		if (state == IS_FRAC_NUMBER)
		{
			// --- adding digits
			if (isdigit(fsym)) {
				token_buf.append(fsym);
				continue;
			}
			// --- changing state to exponent
			if ((fsym == 'e') || (fsym == 'E')) {
				token_buf.append(fsym);
				state = IS_EXP_NUMBER;
				continue;
			}
			// --- closing floating point
			token_buf.append('\0');
			list.add(token_buf.get_ptr(), Lexeme::IS_FLOAT, nline);
			
			token_buf.reset();
			state = IS_NULL;
			hold_sym = true;
			continue;
		}

		// --- processing exponent
		if (state == IS_EXP_NUMBER)
		{
			// --- adding digits
			if (isdigit(fsym)) {
				token_buf.append(fsym);
				continue;
			}
			// --- handing exponent sign
			if (((fsym == '+') || (fsym == '-')) &&
				((token_buf.get_value(token_buf.get_size() - 1) == 'e') ||
				(token_buf.get_value(token_buf.get_size() - 1) == 'E')))
			{
				token_buf.append(fsym);
				continue;
			}
			// --- closing floating point
			token_buf.append('\0');
			list.add(token_buf.get_ptr(), Lexeme::IS_FLOAT, nline);
			
			token_buf.reset();
			state = IS_NULL;
			hold_sym = true;
			continue;
		}

		// unknown state
		token_buf.append('\0');
		printf(" PARSE:> invalid parser state: '%s' (line, %i)\n",
			token_buf.get_ptr(), nline);
		status = false;
		break;
	}


	fclose(ptr);
	return status;
}
// -------------------------------------------------------------------------------------------- //