From 96393257a43ac52f2b911594d106741245dec5f0 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Fri, 4 Dec 2020 15:20:28 -0500 Subject: - Started work on writing the new version of the assembler. - Did alot of stuff in the emulator. - Did alot of stuff in the SuB Suite. --- lexer/lexer.c | 311 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 lexer/lexer.c (limited to 'lexer/lexer.c') diff --git a/lexer/lexer.c b/lexer/lexer.c new file mode 100644 index 0000000..1bc7144 --- /dev/null +++ b/lexer/lexer.c @@ -0,0 +1,311 @@ +#include "asmmon.h" +#include "cpu/sux/cpu.h" +#include "lexer.h" + +/* Name: isidstart() + * Desc: Checks if the character is the start of a valid identifier. + * Args: + * c: The character to check. + * Return value: Returns true if the character is the start of a valid identifier, and false if not. + */ + +int isidstart(char c) { + return (isalpha(c) || c == '@' || c == '_'); +} + +/* Name: isidchar() + * Desc: Checks if the character is a valid identifier character. + * Args: + * c: The character to check. + * Return value: Returns true if the character is a valid identifier character, and false if not. + */ + +int isidchar(char c) { + return (isalnum(c) || c == '_'); +} + +/* Name: skip() + * Desc: Skips any whitespace within a string. + * Args: + * s: The string to check. + * Return value: Returns a pointer to the content after the whitespace. + */ + +char *skip(char *s) { + for (; isspace(*s); s++); + return s; +} + +/* Name: iseol() + * Desc: Checks if the character is an end of line character. + * Args: + * c: The character to check. + * Return value: Returns true if the character is an end of line character, and false if not. + */ + +int iseol(char c) { + return (c == '\0' || c == ';'); +} + +/* Name: eol() + * Desc: Checks for end of line, and issues an error, if end of line wasn't found. + * Args: + * s: The string to check. + * Return value: None. + */ + +void eol(char *s) { + if (ignore_trail) { + if (!iseol(*s) && !isspace(*s)) { + syntax_error(6); /* End of line was not found. */ + } + } else { + s = skip(s); + if (!iseol(*s)) { + syntax_error(6); /* End of line was not found. */ + } + } +} + +/* Name: isbadid() + * Desc: Checks if the string is an invalid identifier. + * Args: + * p: The string to check. + * len: Length of the string. + * Return value: Returns true if the string is an invalid identifier, and false if not. + */ + +int isbadid(char *p, int len) { + return (len == 1 && (*p == '@' || *p == '_')); +} + +/* Name: skip_operand() + * Desc: Skips the contents of an operand within a string, gives an error if there + * are either too many closing brackets, or missing closing brackets. + * Args: + * inst_op: Instruction operand flag. + * s: The string to check. + * Return value: Returns the content after the operand. + */ + +char *skip_operand(int inst_op, char *s) { + int brack_count = 0; /* Bracket count. */ + int done = 0; /* Loop done flag. */ + + for (char c = 0;; s++) { + c = *s; + switch (c) { + case '(': brack_count++; break; + case ')': + if (brack_count > 0) { + brack_count--; + } else { + syntax_error(3); /* Too many closing brackets. */ + } + break; + case '\'': + case '\"': s = skip_string(s, c, NULL) - 1; break; + case '\0': + case ';' : done = 1; break; + default : + if ((!inst_op || (inst_op && OPERSEP_COMMA)) && c == ',' && !brack_count) { + done = 1; + break; + } else if (inst_op && OPERSEP_WHITESPACE && isspace(c) && !brack_count) { + done = 1; + break; + } + break; + } + if (done) { + break; + } + } + if (brack_count) { + syntax_error(4); /* Missing closing brackets. */ + } + return s; +} + +/* Name: skip_local() + * Desc: Skips the contents of a local label within a string. + * Args: + * p: The string to check. + * Return value: Returns either a pointer to the content after the local label, or NULL. + */ + +char *skip_local(char *p) { + if (isidstart(*p) || isdigit(*p)) { + for (p++; isidchar(*p); p++); + } else { + p = NULL; + } + return p; +} + +/* Name: get_local_label() + * Desc: Finds a local label within a line. + * Args: + * start: The start of the line. + * Return value: Returns either the name of the local label, or NULL. + */ + +char *get_local_label(char **start) { + char *s = *start; + char *p = skip_local(s); + char *name = NULL; + if (p != NULL && *p == '@' && isidchar(p[1]) && isidstart(*s) && *s != '@') { + /* Skips the local part of a global@local label. */ + s = p+1; + p = skip_local(p); + name = make_local_label(*start, (s-1) - *start, s, p-s); + *start = skip(p); + } else if (p != NULL && p > s+1 && *s == '@') { /* @label */ + s++; + name = make_local_label(NULL, 0, s, p-s); + *start = skip(p); + } + return name; +} + +/* Name: parse_label_or_pc() + * Desc: Finds a global/local label, or the current pc character within a line. + * Args: + * start: The line to parse. + * Return value: Returns either the name of a global/local label, the current pc character, or NULL. + */ + +char *parse_label_or_pc(char **start) { + char *s = skip(*start); + char *name = parse_labeldef(start, 0); + if (name == NULL && *s == current_pc_char && !isidchar(s[1])) { + name = cnvstr(s, 1); + s = skip(s+1); + } + *start = (name) ? s : *start; + return name; +} + +/* Name: lex() + * Desc: Lexically analyze/Tokenize a line into a stream of tokens. + * Args: + * line: The line that will be lexed/tokenized. + * address: Current address of the program counter. + * bline: Current number of blank lines before the current line. + * dbg: Debugging flag. + * Return value: Returns the address of the next line. + */ + +uint64_t lex(char *line, uint64_t address, uint16_t bline, uint8_t dbg) { + char *s; + char *inst; + char *label_name; + char *ext[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1]; + char *op[MAX_OPERANDS]; + int ext_len[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1]; + int op_len[MAX_OPERANDS]; + int ext_cnt; + int op_cnt; + int inst_len; + s = line; + instruction *ip; + + while (isdelm(*s, dbg) != 1) { + label_name = parse_label_or_pc(&s); + if (label_name) { + /* We found a global/local label, or the current pc character. */ + symbol *label; + int equ_len = (*s == '='); + if (equ_len) { + /* Found an equate directive. */ + if (*label_name == current_pc_char) { + handle_org(skip(s+equ_len)); + continue; + } else { + s = skip(s+equ_len); + label = new_equate(label_name, parse_expr_tmplab(&s)); + } + } else { + /* It's just a label. */ + label = new_label_sym(0, label_name); + add_atom(0, new_label_atom(label)); + } + if (!is_local_label(label_name) && autoexport) { + label->flags |= EXPORT; + } + free(label_name); + } + /* Check for directives first. */ + s = skip(s); + if (*s == ';') { + continue; + } + if (*s == current_pc_char && s[1] == '=') { /* "*=" org directive. */ + handle_org(skip(s+2)); + continue; + } + if (handle_directive(s)) { + continue; + } + s = skip(s); + if (iseol(s)) { + continue; + } + /* Read the mnemonic name. */ + inst = s; + if (!isidstart(*s)) { + syntax_error(10); /* Identifier was expected. */ + continue; + } + #if !MAX_QUALLIFIERS + for (; *s && !isspace(*s); s++); + inst_len = s - inst; + #else + s = lex_inst(s, &inst_len, ext, ext_len, &ext_cnt); + #endif + if (!isspace(*s) && *s != '\0') { + syntax_error(2); /* No space before operand. */ + } + s = skip(s); + if (handle_struct(inst, inst_len, s)) { + continue; + } + /* Read the operand(s), separated by comma, or whitespace (unless within brackets). */ + op_cnt = 0; + while (!iseol(*s) && op_cnt < MAX_OPERANDS) { + op[op_cnt] = s; + s = skip_operand(1, s); + op_len[op_cnt] = oplen(s, op[op_cnt]); + op_cnt++; + if (ignore_trail) { + if (*s != ',') { + break; + } + s++; + } else { + s = skip(s); + if (OPERSEP_COMMA) { + if (*s == ',') { + s = skip(s+1); + } else if (!(OPERSEP_WHITESPACE)) { + break; + } + } + } + } + eol(s); + ip = new_inst(inst, inst_len, op_cnt, op, op_len); + if (ip) { + #if MAX_QUALIFIERS > 0 + int i; + for (i = 0; i < ext_cnt; i++) { + ip->qualifiers[i] = cnvstr(ext[i], ext_len[i]); + } + for (; i < MAX_QUALIFIERS; i++) { + ip->qualifiers[i] = NULL; + } + #endif + add_atom(0, new_inst_atom(ip)); + } + } +} -- cgit v1.2.3-13-gbd6f