summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c154
1 files changed, 80 insertions, 74 deletions
diff --git a/lexer.c b/lexer.c
index 5a79325..6ed6cf4 100644
--- a/lexer.c
+++ b/lexer.c
@@ -153,14 +153,9 @@ uint16_t get_comment(const char *com, uint8_t dbg) {
break;
}
}
- if (comment[i] == NULL) {
+ if (comment[i] == NULL || i == comidx) {
if (dbg) {
printf("get_comment(): oof, the index $%04X is NULL.\n", i);
- }
- return 0xFFFF;
- }
- if (i == comidx) {
- if (dbg) {
printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com);
}
return 0xFFFF;
@@ -175,25 +170,17 @@ uint16_t get_string(const char *str, uint8_t dbg) {
uint16_t i = 0;
uint8_t isstr = 0;
for (; i < stridx; i++) {
- if (string[i] != NULL) {
+ if (isstr || string[i] == NULL) {
+ break;
+ } else {
if (str[0] == string[i][0]) {
isstr = !strcmp(str, string[i]);
}
- } else {
- break;
- }
- if (isstr) {
- break;
}
}
- if (string[i] == NULL) {
+ if (string[i] == NULL || i == stridx) {
if (dbg) {
printf("get_string(): oof, the index $%04X is NULL.\n", i);
- }
- return 0xFFFF;
- }
- if (i == stridx) {
- if (dbg) {
printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str);
}
return 0xFFFF;
@@ -405,12 +392,10 @@ uint64_t update_addr(struct line *ln, uint64_t address, uint8_t fixup, uint16_t
uint16_t find_line(struct line *l, uint16_t ln, uint8_t dbg) {
uint16_t i = 0;
for (; i < lineidx && l[i].linenum != ln; i++);
- if (l[i].linenum == ln) {
- if (dbg) {
+ if (dbg) {
+ if (l[i].linenum == ln) {
printf("find_line(): Found line number %u, at line index %X.\n", ln, i);
}
- }
- if (dbg) {
printf("find_line(): linenum: %u, i: %X\n", l[i].linenum, i);
}
return i;
@@ -425,15 +410,22 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
uint16_t symid = 0;
uint16_t line = 0;
lex_type = 0xFF;
+
uint8_t k = 0;
+ uint8_t ch = 0;
uint8_t rs = 0;
- uint8_t isop = 0;
uint8_t base = 0;
+
+ uint8_t isop = 0;
int num = 0;
int isch = 0;
+ uint8_t isesc = 0;
+ uint8_t islinenum;
+
int16_t ln = -1;
+
char lnum[6];
- uint8_t islinenum;
+
uint8_t space = 0;
uint8_t tab = 0;
uint8_t isstart = 1;
@@ -505,11 +497,9 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
lexeme[j] = '\0';
if (!isop) {
for (k = 0; k < 6; k++) {
- if (tolower(lexeme[0]) == dir_t[k][0]) {
- if (!strcasecmp(lexeme, dir_t[k])) {
- lex_type = TOK_DIR;
- break;
- }
+ if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) {
+ lex_type = TOK_DIR;
+ break;
}
}
l[line].dir = k;
@@ -549,28 +539,18 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
string[strid] = malloc(j+1);
memcpy(string[strid], lexeme, j+1);
l[line].str = strid;
- if (dbg) {
- printf("lex(): str[0x%04X]: %s\n", strid, string[strid]);
- }
stridx += (line == lineidx);
} else {
l[line].str = strid;
- if (dbg) {
- printf("lex(): str[0x%04X]: %s\n", strid, string[strid]);
- }
+ }
+ if (dbg) {
+ printf("lex(): str[0x%04X]: %s\n", strid, string[strid]);
}
if (l[line].dir == DIR_INCLUDE) {
l[line].incl = strid;
}
lex_type = TOK_STRING;
break;
- case '#':
- lexeme[j] = '#';
- lexeme[j+1] = '\0';
- lexeme[j+2] = '\0';
- l[line].am = IMM;
- lex_type = TOK_IMM;
- break;
if (str[i] == '$') {
case '$': base = 16;
} else if (str[i] == '%') {
@@ -586,10 +566,10 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
if (l[line].cm != 0xFF) {
case TOK_PLUS :
case TOK_MINUS: l[line].aop = strtoull(lexeme, NULL, base);
- l[line].aopbase = (base & 16) ? TOK_HEX : TOK_BIN;
+ l[line].aopbase = (base & 16) ? BASE_HEX : BASE_BIN;
} else {
case TOK_SYM: l[line].op = strtoull(lexeme, NULL, base);
- l[line].opbase = (base & 16) ? TOK_HEX : TOK_BIN;
+ l[line].opbase = (base & 16) ? BASE_HEX : BASE_BIN;
}
if (lex_type == TOK_SYM) {
mksymbol(sym, l[line].op, 1, 0, 0, dbg);
@@ -604,23 +584,59 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
lex_type = (base & 16) ? TOK_HEX : TOK_BIN;
break;
- case '+':
- lexeme[j] = '+';
- lexeme[j+1] = '\0';
- l[line].cm = 0;
- lex_type = TOK_PLUS;
- break;
- case '-':
- lexeme[j] = '-';
- lexeme[j+1] = '\0';
- l[line].cm = 1;
- lex_type = TOK_MINUS;
+ case '\'':
+ i++;
+ k = j;
+ while (str[i] != '\'' || isesc) {
+ isesc = (str[i] == '\\' && str[i-1] != '\\');
+ lexeme[j++] = str[i++];
+ }
+ isesc = 0;
+ lexeme[j] = '\0';
+ switch (lexeme[k]) {
+ case '\\':
+ switch (lexeme[++k]) {
+ case 'n' : ch = '\n'; break;
+ case 'r' : ch = '\r'; break;
+ case 'b' : ch = '\b'; break;
+ case '\'': ch = '\''; break;
+ case '\"': ch = '\"'; break;
+ case '\\': ch = '\\'; break;
+ }
+ break;
+ default: ch = lexeme[k];
+ }
+ switch (lex_type) {
+ case TOK_PLUS :
+ case TOK_MINUS:
+ l[line].aop = ch;
+ l[line].aopbase = BASE_CHAR;
+ break;
+ default:
+ l[line].op = ch;
+ l[line].opbase = BASE_CHAR;
+ break;
+ }
+ lex_type = TOK_CHAR;
break;
case '(':
- lexeme[j] = '(';
- lexeme[j+1] = '\0';
- lexeme[j+2] = '\0';
- l[line].am = IND;
+ if (str[i] == '#' || str[i] == '(') {
+ if (str[i] == '#') {
+ case '#': lex_type = TOK_IMM;
+ }
+ l[line].am = (str[i] == '#') ? IMM : IND;
+ } else {
+ case '+':
+ case '-': l[line].cm = (str[i] == '-');
+ lex_type = (str[i] == '-') ? TOK_MINUS : TOK_PLUS;
+ }
+ memset(lexeme, 0, strlen(lexeme)+1);
+ lexeme[j] = str[i];
+ if (str[i] == '=') {
+ case '=': i++;
+ l[line].issym = 1;
+ lex_type = TOK_SYM;
+ }
break;
case ')':
i++;
@@ -679,13 +695,6 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
printf("lex(): isfixup: %u\n", isfixup);
}
break;
- case '=':
- i++;
- lexeme[j] = '=';
- lexeme[j+1] = 0;
- l[line].issym = 1;
- lex_type = TOK_SYM;
- break;
case ';':
i++;
while (str[i] != '\0' && str[i] != '\n') {
@@ -702,15 +711,12 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
comment[comid] = malloc(j+1);
memcpy(comment[comid], lexeme, j+1);
l[line].com = comid;
- if (dbg) {
- printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
- }
comidx += (line == lineidx);
} else {
l[line].com = comid;
- if (dbg) {
- printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
- }
+ }
+ if (dbg) {
+ printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]);
}
lex_type = TOK_COMMENT;
break;
@@ -794,10 +800,10 @@ uint64_t lex(char *str, struct line *l, uint64_t address, uint8_t dbg) {
if (l[line].cm != 0xFF) {
case TOK_PLUS :
case TOK_MINUS: l[line].aop = strtoull(lexeme, NULL, 10);
- l[line].aopbase = TOK_DEC;
+ l[line].aopbase = BASE_DEC;
} else {
case TOK_SYM: l[line].op = strtoull(lexeme, NULL, 10);
- l[line].opbase = TOK_DEC;
+ l[line].opbase = BASE_DEC;
}
if (lex_type == TOK_SYM) {
mksymbol(sym, l[line].op, 1, 0, 0, dbg);