summaryrefslogtreecommitdiff
path: root/igen/lexer.c
blob: 275bcdd62fde4bf345f775bbc5e30c10fc02add8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lexer.h"
#include "misc.h"

atom get_token_id(const char c, const int dbg) {
	switch (c) {
		case '(': return ATOM_LBRACK;
		case ')': return ATOM_RBRACK;
		case '/': return ATOM_SLASH;
		case '+': return ATOM_PLUS;
		case '-': return ATOM_MINUS;
		case '*': return ATOM_ASTR;
		case '%': return ATOM_PRCNT;
		case '&': return ATOM_AMPR;
		case '|': return ATOM_PIPE;
		case '^': return ATOM_CARROT;
		case '#': return ATOM_HASH;
		case ':': return ATOM_COL;
		case ';': return ATOM_SCOL;
		case ' ': return ATOM_SPACE;
		case '_': return ATOM_USCORE;
		case '=': return ATOM_EQUAL;
		case '.': return ATOM_DOT;
		case '?': return ATOM_QMARK;
		case '!': return ATOM_BANG;
		case '<': return ATOM_LT;
		case '>': return ATOM_GT;
		case '%': return ATOM_PERCENT;
		case ',': return ATOM_COMMA;
		case '\\': return ATOM_BSLASH;
		case '\"': return ATOM_QUOTE;
		case '\'': return ATOM_SQUOTE;
		case '\t': return ATOM_TAB;
		case '\n': return ATOM_NLINE;
		default:
			if (isalpha(c)) {
				return ATOM_ALPHA;
			} else if (isdigit(c)) {
				return ATOM_NUM;
			}
			break;
	}
	return ATOM_NONE;
}

int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) {
	int i;
	for (i = 0; str[i] != '\0'; ++i) {
		const enum atom atom = get_atom_id(str[i], dbg);
		for (int j = 0; atoms[j] != ATOM_NONE; ++j) {
			const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]);
			if (is_done) {
				return i;
			}
		}
	}
	return i;
}

int lex(char *str, int dbg) {
	int in_inst_stmt = 0;
	lexeme *lex_start = NULL;
	lexeme *lex_end = NULL;

	for (int i = 0; str[i] != '\0'; ++i) {
		atom atom_id = get_atom_id(str[i], dbg);
		switch (atom_id) {
			case ATOM_PERCENT:
				if (get_atom_id(str[++i]) == ATOM_PERCENT) {
					in_inst_stmt = !in_inst_stmt;
				}
				break;
			case ATOM_LBRACK:
				break;
			case ATOM_ALPHA:
				do {
					const int ident_len = get_atom_span(&str[i], (const atom []) {
						ATOM_ALPHA,
						ATOM_USCORE,
						ATOM_NUM,
						ATOM_NONE
					}, 0, dbg);
					char *ident = calloc(ident_len+1, sizeof(char));
				} while(0);
				break;
		}
	}
}