#include "lexeme-parser.h" #define _CRT_SECURE_NO_WARNINGS #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> #include "mem-buffer.h" // -------------------------------------------------------------------------------------------- // // * Implementation * // -------------------------------------------------------------------------------------------- // scm::LexemeParser::LexemeParser() { } scm::LexemeParser::~LexemeParser() { } // -------------------------------------------------------------------------------------------- // // * run * // // -------------------------------------------------------------------------------------------- // bool scm::LexemeParser::run(const char* filename, LexemeList& list) { // --- resetting list list.reset(); // ----------------------------------------------------- // // --- opening file FILE *ptr = fopen(filename, "rt"); if (ptr == NULL) { printf(" LEX-PARSE:> failed to open file: '%s'\n", filename); return false; } // ----------------------------------------------------- // // --- setting token buffer mem_buffer< char > token_buf; token_buf.reset(); // ----------------------------------------------------- // int nline = 1, fsym; STATE state = IS_NULL; bool hold_sym = false; bool status = true; // ----------------------------------------------------- // // --- reading file while (!feof(ptr)) { // --- reading char from file if (!hold_sym) fsym = fgetc(ptr); else hold_sym = false; // --- processing null state if (state == IS_NULL) { // --- EOF if (fsym == EOF) continue; // --- newline if (fsym == '\n') { nline++; continue; } // --- spaces if (isspace(fsym)) continue; // --- comment line symbol if (fsym == '#') { state = IS_COMMENT_LINE; continue; } // --- string if (fsym == '"') { state = IS_STRING; token_buf.append(fsym); continue; } // --- '=' if (fsym == '=') { state = IS_ASSIGNMENT; token_buf.append(fsym); continue; } // --- '!' if (fsym == '!') { state = IS_NOT_EQUAL; token_buf.append(fsym); continue; } // --- '&' if (fsym == '&') { state = IS_LOGICAL_AND; token_buf.append(fsym); continue; } // --- '|' if (fsym == '|') { state = IS_LOGICAL_OR; token_buf.append(fsym); continue; } // --- '<' if (fsym == '<') { state = IS_LESS_THAN; token_buf.append(fsym); continue; } // --- '>' if (fsym == '>') { state = IS_GREATER_THAN; token_buf.append(fsym); continue; } // --- '{','}' if (fsym == '{') { list.add("{", Lexeme::IS_BRACE_OPEN, nline); continue; } if (fsym == '}') { list.add("}", Lexeme::IS_BRACE_CLOSE, nline); continue; } // --- '(', ')' if (fsym == '(') { list.add("(", Lexeme::IS_PAREN_OPEN, nline); continue; } if (fsym == ')') { list.add(")", Lexeme::IS_PAREN_CLOSE, nline); continue; } // --- '[', ']' if (fsym == '[') { list.add("[", Lexeme::IS_BRACKET_OPEN, nline); continue; } if (fsym == ']') { list.add("]", Lexeme::IS_BRACKET_CLOSE, nline); continue; } // --- ';' if (fsym == ';') { list.add(";", Lexeme::IS_SEMICOLON, nline); continue; } // --- ',' if (fsym == ',') { list.add(",", Lexeme::IS_COMMA, nline); continue; } // --- '+' if (fsym == '+') { // --- checking for unary '+' if (list.get_size() == 0) list.add("+", Lexeme::IS_OP_PLUS, nline); else { if ((list.is_value(list.get_size() - 1)) || (list.get_type(list.get_size() - 1) == Lexeme::IS_NAME) || (list.get_type(list.get_size() - 1) == Lexeme::IS_PAREN_CLOSE) || (list.get_type(list.get_size() - 1) == Lexeme::IS_BRACKET_CLOSE)) { list.add("+", Lexeme::IS_OP_ADD, nline); } else list.add("+", Lexeme::IS_OP_PLUS, nline); } continue; } // --- '-' if (fsym == '-') { // --- checking for unary '-' if (list.get_size() == 0) list.add("-", Lexeme::IS_OP_MINUS, nline); else { if ((list.is_value(list.get_size() - 1)) || (list.get_type(list.get_size() - 1) == Lexeme::IS_NAME) || (list.get_type(list.get_size() - 1) == Lexeme::IS_PAREN_CLOSE) || (list.get_type(list.get_size() - 1) == Lexeme::IS_BRACKET_CLOSE)) { list.add("-", Lexeme::IS_OP_SUB, nline); } else list.add("-", Lexeme::IS_OP_MINUS, nline); } continue; } // --- '*' if (fsym == '*') { list.add("*", Lexeme::IS_OP_MUL, nline); continue; } // --- '/' if (fsym == '/') { list.add("/", Lexeme::IS_OP_DIV, nline); continue; } // --- '%' if (fsym == '%') { list.add("%", Lexeme::IS_OP_MOD, nline); continue; } // --- '^' if (fsym == '^') { list.add("^", Lexeme::IS_OP_EXP, nline); continue; } // --- name if (isalpha(fsym) || fsym == '_') { state = IS_NAME; token_buf.append(fsym); continue; } // --- number if (isdigit(fsym)) { state = IS_NUMBER; token_buf.append(fsym); continue; } // --- unknown symbol printf(" PARSE:> invalid symbol: '%c' (line, %i)\n", fsym, nline); status = false; break; } // ----------------------------------------------------- // // --- assignment || comparsion (==) if (state == IS_ASSIGNMENT) { if (fsym == '=') { // found '==' token_buf.append(fsym); token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_EQ, nline); } else { // found '=' token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_ASSIGNMENT, nline); hold_sym = true; } token_buf.reset(); state = IS_NULL; continue; } // ----------------------------------------------------- // // --- comparison (!=) if (state == IS_NOT_EQUAL) { if (fsym == '=') { // found '!=' token_buf.append(fsym); token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_NEQ, nline); token_buf.reset(); state = IS_NULL; continue; } // --- unknown symbol printf(" PARSE:> invalid symbol: '%c', expecting '=' (line, %i)\n", fsym, nline); status = false; break; } // ----------------------------------------------------- // // --- logical (&&) if (state == IS_LOGICAL_AND) { if (fsym == '&') { // found '&&' token_buf.append(fsym); token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_LOGICAL_AND, nline); token_buf.reset(); state = IS_NULL; continue; } // --- unknown symbol printf(" PARSE:> invalid symbol: '%c', expecting '&' (line, %i)\n", fsym, nline); status = false; break; } // ----------------------------------------------------- // // --- logical (||) if (state == IS_LOGICAL_OR) { if (fsym == '|') { // found '||' token_buf.append(fsym); token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_LOGICAL_OR, nline); token_buf.reset(); state = IS_NULL; continue; } // --- unknown symbol printf(" PARSE:> invalid symbol: '%c', expecting '|' (line, %i)\n", fsym, nline); status = false; break; } // ----------------------------------------------------- // // --- comparison (<) || (<=) if (state == IS_LESS_THAN) { if (fsym == '=') { // found '<=' token_buf.append(fsym); token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_LEQ, nline); } else { // found '<' token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_LT, nline); hold_sym = true; } token_buf.reset(); state = IS_NULL; continue; } // ----------------------------------------------------- // // --- comparison (>) || (>=) if (state == IS_GREATER_THAN) { if (fsym == '=') { // found '>=' token_buf.append(fsym); token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_GEQ, nline); } else { // found '>' token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_OP_GT, nline); hold_sym = true; } token_buf.reset(); state = IS_NULL; continue; } // ----------------------------------------------------- // // --- processing comment line if (state == IS_COMMENT_LINE) { // --- EOF if (fsym == EOF) { state = IS_NULL; continue; } // --- new line if (fsym == '\n') { nline++; state = IS_NULL; continue; } // --- skipping all symbols continue; } // ----------------------------------------------------- // // --- processing string if (state == IS_STRING) { // --- EOF if (fsym == EOF) { // --- unknown symbol token_buf.append('\0'); printf(" PARSE:> missing closing quotation mark (\") in string: '%s' (line, %i)\n", token_buf.get_ptr(), nline); status = false; break; } // --- adding all symbols token_buf.append(fsym); // --- closing string if (fsym == '"') { token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_STRING, nline); token_buf.reset(); state = IS_NULL; } continue; } // ----------------------------------------------------- // // --- processing name if (state == IS_NAME) { // --- adding alpha, numerals, '_' and '.' if (isalnum(fsym) || (fsym == '_') || (fsym == '.')) { token_buf.append(fsym); continue; } // --- checking if last element is dot if (token_buf.get_value(token_buf.get_size() - 1) == '.') { token_buf.append('\0'); printf(" PARSE:> invalid name ending with '.': '%s' (line, %i)\n", token_buf.get_ptr(), nline); status = false; break; } // --- closing name token_buf.append('\0'); if ((!strcmp(token_buf.get_ptr(), "true")) || (!strcmp(token_buf.get_ptr(), "false"))) { list.add(token_buf.get_ptr(), Lexeme::IS_BOOL, nline); } else if (!strcmp(token_buf.get_ptr(), "if")) list.add(token_buf.get_ptr(), Lexeme::IS_KEY_IF, nline); else if (!strcmp(token_buf.get_ptr(), "then")) list.add(token_buf.get_ptr(), Lexeme::IS_KEY_THEN, nline); else if (!strcmp(token_buf.get_ptr(), "else")) list.add(token_buf.get_ptr(), Lexeme::IS_KEY_ELSE, nline); else if (!strcmp(token_buf.get_ptr(), "endif")) list.add(token_buf.get_ptr(), Lexeme::IS_KEY_ENDIF, nline); else if (!strcmp(token_buf.get_ptr(), "while")) list.add(token_buf.get_ptr(), Lexeme::IS_KEY_WHILE, nline); else if (!strcmp(token_buf.get_ptr(), "do")) list.add(token_buf.get_ptr(), Lexeme::IS_KEY_DO, nline); else if (!strcmp(token_buf.get_ptr(), "enddo")) list.add(token_buf.get_ptr(), Lexeme::IS_KEY_ENDDO, nline); else if (!strcmp(token_buf.get_ptr(), "__sin")) list.add(token_buf.get_ptr(), Lexeme::IS_SIN_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__cos")) list.add(token_buf.get_ptr(), Lexeme::IS_COS_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__tan")) list.add(token_buf.get_ptr(), Lexeme::IS_TAN_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__log")) list.add(token_buf.get_ptr(), Lexeme::IS_LOG_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__sqrt")) list.add(token_buf.get_ptr(), Lexeme::IS_SQRT_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__min")) list.add(token_buf.get_ptr(), Lexeme::IS_MIN_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__max")) list.add(token_buf.get_ptr(), Lexeme::IS_MAX_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__vecmin")) list.add(token_buf.get_ptr(), Lexeme::IS_VECMIN_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__vecmax")) list.add(token_buf.get_ptr(), Lexeme::IS_VECMAX_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__abs")) list.add(token_buf.get_ptr(), Lexeme::IS_ABS_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__unirand")) list.add(token_buf.get_ptr(), Lexeme::IS_UNIRAND_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__dot")) list.add(token_buf.get_ptr(), Lexeme::IS_DOT_PRODUCT_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__l2norm")) list.add(token_buf.get_ptr(), Lexeme::IS_L2NORM_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__cnorm")) list.add(token_buf.get_ptr(), Lexeme::IS_CNORM_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__to_string")) list.add(token_buf.get_ptr(), Lexeme::IS_TO_STRING_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__size")) list.add(token_buf.get_ptr(), Lexeme::IS_SIZE_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__defined")) list.add(token_buf.get_ptr(), Lexeme::IS_DEFINED_FUNCTION, nline); else if (!strcmp(token_buf.get_ptr(), "__vector")) list.add(token_buf.get_ptr(), Lexeme::IS_VECTOR_CTOR, nline); else if (!strcmp(token_buf.get_ptr(), "__vector_const")) list.add(token_buf.get_ptr(), Lexeme::IS_VECTOR_CONST_CTOR, nline); else if (!strcmp(token_buf.get_ptr(), "__vector_unirand")) list.add(token_buf.get_ptr(), Lexeme::IS_VECTOR_UNIRAND_CTOR, nline); else { if (!strncmp(token_buf.get_ptr(), "__", 2)) { list.add(token_buf.get_ptr(), Lexeme::IS_COMMAND, nline); } else list.add(token_buf.get_ptr(), Lexeme::IS_NAME, nline); } token_buf.reset(); state = IS_NULL; hold_sym = true; continue; } // ----------------------------------------------------- // // --- processing number if (state == IS_NUMBER) { // --- adding digits if (isdigit(fsym)) { token_buf.append(fsym); continue; } // --- changing state to fractional part if (fsym == '.') { token_buf.append(fsym); state = IS_FRAC_NUMBER; continue; } // --- changing state to exponent if ((fsym == 'e') || (fsym == 'E')) { token_buf.append(fsym); state = IS_EXP_NUMBER; continue; } // --- closing integer token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_INTEGER, nline); token_buf.reset(); state = IS_NULL; hold_sym = true; continue; } // ----------------------------------------------------- // // --- processing fractional part if (state == IS_FRAC_NUMBER) { // --- adding digits if (isdigit(fsym)) { token_buf.append(fsym); continue; } // --- changing state to exponent if ((fsym == 'e') || (fsym == 'E')) { token_buf.append(fsym); state = IS_EXP_NUMBER; continue; } // --- closing floating point token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_FLOAT, nline); token_buf.reset(); state = IS_NULL; hold_sym = true; continue; } // --- processing exponent if (state == IS_EXP_NUMBER) { // --- adding digits if (isdigit(fsym)) { token_buf.append(fsym); continue; } // --- handing exponent sign if (((fsym == '+') || (fsym == '-')) && ((token_buf.get_value(token_buf.get_size() - 1) == 'e') || (token_buf.get_value(token_buf.get_size() - 1) == 'E'))) { token_buf.append(fsym); continue; } // --- closing floating point token_buf.append('\0'); list.add(token_buf.get_ptr(), Lexeme::IS_FLOAT, nline); token_buf.reset(); state = IS_NULL; hold_sym = true; continue; } // unknown state token_buf.append('\0'); printf(" PARSE:> invalid parser state: '%s' (line, %i)\n", token_buf.get_ptr(), nline); status = false; break; } fclose(ptr); return status; } // -------------------------------------------------------------------------------------------- //