blob: 275bcdd62fde4bf345f775bbc5e30c10fc02add8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lexer.h"
#include "misc.h"
atom get_token_id(const char c, const int dbg) {
switch (c) {
case '(': return ATOM_LBRACK;
case ')': return ATOM_RBRACK;
case '/': return ATOM_SLASH;
case '+': return ATOM_PLUS;
case '-': return ATOM_MINUS;
case '*': return ATOM_ASTR;
case '%': return ATOM_PRCNT;
case '&': return ATOM_AMPR;
case '|': return ATOM_PIPE;
case '^': return ATOM_CARROT;
case '#': return ATOM_HASH;
case ':': return ATOM_COL;
case ';': return ATOM_SCOL;
case ' ': return ATOM_SPACE;
case '_': return ATOM_USCORE;
case '=': return ATOM_EQUAL;
case '.': return ATOM_DOT;
case '?': return ATOM_QMARK;
case '!': return ATOM_BANG;
case '<': return ATOM_LT;
case '>': return ATOM_GT;
case '%': return ATOM_PERCENT;
case ',': return ATOM_COMMA;
case '\\': return ATOM_BSLASH;
case '\"': return ATOM_QUOTE;
case '\'': return ATOM_SQUOTE;
case '\t': return ATOM_TAB;
case '\n': return ATOM_NLINE;
default:
if (isalpha(c)) {
return ATOM_ALPHA;
} else if (isdigit(c)) {
return ATOM_NUM;
}
break;
}
return ATOM_NONE;
}
int get_atom_span(const char *str, const atom *atoms, int inv, int dbg) {
int i;
for (i = 0; str[i] != '\0'; ++i) {
const enum atom atom = get_atom_id(str[i], dbg);
for (int j = 0; atoms[j] != ATOM_NONE; ++j) {
const int is_done = (inv) ? (atom == atoms[j]) : (atom != atoms[j]);
if (is_done) {
return i;
}
}
}
return i;
}
int lex(char *str, int dbg) {
int in_inst_stmt = 0;
lexeme *lex_start = NULL;
lexeme *lex_end = NULL;
for (int i = 0; str[i] != '\0'; ++i) {
atom atom_id = get_atom_id(str[i], dbg);
switch (atom_id) {
case ATOM_PERCENT:
if (get_atom_id(str[++i]) == ATOM_PERCENT) {
in_inst_stmt = !in_inst_stmt;
}
break;
case ATOM_LBRACK:
break;
case ATOM_ALPHA:
do {
const int ident_len = get_atom_span(&str[i], (const atom []) {
ATOM_ALPHA,
ATOM_USCORE,
ATOM_NUM,
ATOM_NONE
}, 0, dbg);
char *ident = calloc(ident_len+1, sizeof(char));
} while(0);
break;
}
}
}
|