summaryrefslogtreecommitdiff
path: root/igen/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'igen/lexer.c')
-rw-r--r--igen/lexer.c91
1 files changed, 91 insertions, 0 deletions
diff --git a/igen/lexer.c b/igen/lexer.c
new file mode 100644
index 0000000..275bcdd
--- /dev/null
+++ b/igen/lexer.c
@@ -0,0 +1,91 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+#include "misc.h"
+
+atom get_token_id(const char c, const int dbg) {
+ switch (c) {
+ case '(': return ATOM_LBRACK;
+ case ')': return ATOM_RBRACK;
+ case '/': return ATOM_SLASH;
+ case '+': return ATOM_PLUS;
+ case '-': return ATOM_MINUS;
+ case '*': return ATOM_ASTR;
+ case '%': return ATOM_PRCNT;
+ case '&': return ATOM_AMPR;
+ case '|': return ATOM_PIPE;
+ case '^': return ATOM_CARROT;
+ case '#': return ATOM_HASH;
+ case ':': return ATOM_COL;
+ case ';': return ATOM_SCOL;
+ case ' ': return ATOM_SPACE;
+ case '_': return ATOM_USCORE;
+ case '=': return ATOM_EQUAL;
+ case '.': return ATOM_DOT;
+ case '?': return ATOM_QMARK;
+ case '!': return ATOM_BANG;
+ case '<': return ATOM_LT;
+ case '>': return ATOM_GT;
+ case '%': return ATOM_PERCENT;
+ case ',': return ATOM_COMMA;
+ case '\\': return ATOM_BSLASH;
+ case '\"': return ATOM_QUOTE;
+ case '\'': return ATOM_SQUOTE;
+ case '\t': return ATOM_TAB;
+ case '\n': return ATOM_NLINE;
+ default:
+ if (isalpha(c)) {
+ return ATOM_ALPHA;
+ } else if (isdigit(c)) {
+ return ATOM_NUM;
+ }
+ break;
+ }
+ return ATOM_NONE;
+}
+
+int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) {
+ int i;
+ for (i = 0; str[i] != '\0'; ++i) {
+ const enum atom atom = get_atom_id(str[i], dbg);
+ for (int j = 0; atoms[j] != ATOM_NONE; ++j) {
+ const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]);
+ if (is_done) {
+ return i;
+ }
+ }
+ }
+ return i;
+}
+
+int lex(char *str, int dbg) {
+ int in_inst_stmt = 0;
+ lexeme *lex_start = NULL;
+ lexeme *lex_end = NULL;
+
+ for (int i = 0; str[i] != '\0'; ++i) {
+ atom atom_id = get_atom_id(str[i], dbg);
+ switch (atom_id) {
+ case ATOM_PERCENT:
+ if (get_atom_id(str[++i]) == ATOM_PERCENT) {
+ in_inst_stmt = !in_inst_stmt;
+ }
+ break;
+ case ATOM_LBRACK:
+ break;
+ case ATOM_ALPHA:
+ do {
+ const int ident_len = get_atom_span(&str[i], (const atom []) {
+ ATOM_ALPHA,
+ ATOM_USCORE,
+ ATOM_NUM,
+ ATOM_NONE
+ }, 0, dbg);
+ char *ident = calloc(ident_len+1, sizeof(char));
+ } while(0);
+ break;
+ }
+ }
+}