#include #include #include #include #include "lexer.h" #include "misc.h" atom get_token_id(const char c, const int dbg) { switch (c) { case '(': return ATOM_LBRACK; case ')': return ATOM_RBRACK; case '/': return ATOM_SLASH; case '+': return ATOM_PLUS; case '-': return ATOM_MINUS; case '*': return ATOM_ASTR; case '%': return ATOM_PRCNT; case '&': return ATOM_AMPR; case '|': return ATOM_PIPE; case '^': return ATOM_CARROT; case '#': return ATOM_HASH; case ':': return ATOM_COL; case ';': return ATOM_SCOL; case ' ': return ATOM_SPACE; case '_': return ATOM_USCORE; case '=': return ATOM_EQUAL; case '.': return ATOM_DOT; case '?': return ATOM_QMARK; case '!': return ATOM_BANG; case '<': return ATOM_LT; case '>': return ATOM_GT; case '%': return ATOM_PERCENT; case ',': return ATOM_COMMA; case '\\': return ATOM_BSLASH; case '\"': return ATOM_QUOTE; case '\'': return ATOM_SQUOTE; case '\t': return ATOM_TAB; case '\n': return ATOM_NLINE; default: if (isalpha(c)) { return ATOM_ALPHA; } else if (isdigit(c)) { return ATOM_NUM; } break; } return ATOM_NONE; } int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) { int i; for (i = 0; str[i] != '\0'; ++i) { const enum atom atom = get_atom_id(str[i], dbg); for (int j = 0; atoms[j] != ATOM_NONE; ++j) { const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]); if (is_done) { return i; } } } return i; } int lex(char *str, int dbg) { int in_inst_stmt = 0; lexeme *lex_start = NULL; lexeme *lex_end = NULL; for (int i = 0; str[i] != '\0'; ++i) { atom atom_id = get_atom_id(str[i], dbg); switch (atom_id) { case ATOM_PERCENT: if (get_atom_id(str[++i]) == ATOM_PERCENT) { in_inst_stmt = !in_inst_stmt; } break; case ATOM_LBRACK: break; case ATOM_ALPHA: do { const int ident_len = get_atom_span(&str[i], (const atom []) { ATOM_ALPHA, ATOM_USCORE, ATOM_NUM, ATOM_NONE }, 0, dbg); char *ident = calloc(ident_len+1, sizeof(char)); } while(0); break; } } }