summaryrefslogtreecommitdiff
path: root/lexer/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'lexer/lexer.c')
-rw-r--r--lexer/lexer.c311
1 files changed, 311 insertions, 0 deletions
diff --git a/lexer/lexer.c b/lexer/lexer.c
new file mode 100644
index 0000000..1bc7144
--- /dev/null
+++ b/lexer/lexer.c
@@ -0,0 +1,311 @@
+#include "asmmon.h"
+#include "cpu/sux/cpu.h"
+#include "lexer.h"
+
+/* Name: isidstart()
+ * Desc: Checks if the character is the start of a valid identifier.
+ * Args:
+ * c: The character to check.
+ * Return value: Returns true if the character is the start of a valid identifier, and false if not.
+ */
+
+int isidstart(char c) {
+ return (isalpha(c) || c == '@' || c == '_');
+}
+
+/* Name: isidchar()
+ * Desc: Checks if the character is a valid identifier character.
+ * Args:
+ * c: The character to check.
+ * Return value: Returns true if the character is a valid identifier character, and false if not.
+ */
+
+int isidchar(char c) {
+ return (isalnum(c) || c == '_');
+}
+
+/* Name: skip()
+ * Desc: Skips any whitespace within a string.
+ * Args:
+ * s: The string to check.
+ * Return value: Returns a pointer to the content after the whitespace.
+ */
+
+char *skip(char *s) {
+ for (; isspace(*s); s++);
+ return s;
+}
+
+/* Name: iseol()
+ * Desc: Checks if the character is an end of line character.
+ * Args:
+ * c: The character to check.
+ * Return value: Returns true if the character is an end of line character, and false if not.
+ */
+
+int iseol(char c) {
+ return (c == '\0' || c == ';');
+}
+
+/* Name: eol()
+ * Desc: Checks for end of line, and issues an error, if end of line wasn't found.
+ * Args:
+ * s: The string to check.
+ * Return value: None.
+ */
+
+void eol(char *s) {
+ if (ignore_trail) {
+ if (!iseol(*s) && !isspace(*s)) {
+ syntax_error(6); /* End of line was not found. */
+ }
+ } else {
+ s = skip(s);
+ if (!iseol(*s)) {
+ syntax_error(6); /* End of line was not found. */
+ }
+ }
+}
+
+/* Name: isbadid()
+ * Desc: Checks if the string is an invalid identifier.
+ * Args:
+ * p: The string to check.
+ * len: Length of the string.
+ * Return value: Returns true if the string is an invalid identifier, and false if not.
+ */
+
+int isbadid(char *p, int len) {
+ return (len == 1 && (*p == '@' || *p == '_'));
+}
+
+/* Name: skip_operand()
+ * Desc: Skips the contents of an operand within a string, gives an error if there
+ * are either too many closing brackets, or missing closing brackets.
+ * Args:
+ * inst_op: Instruction operand flag.
+ * s: The string to check.
+ * Return value: Returns the content after the operand.
+ */
+
+char *skip_operand(int inst_op, char *s) {
+ int brack_count = 0; /* Bracket count. */
+ int done = 0; /* Loop done flag. */
+
+ for (char c = 0;; s++) {
+ c = *s;
+ switch (c) {
+ case '(': brack_count++; break;
+ case ')':
+ if (brack_count > 0) {
+ brack_count--;
+ } else {
+ syntax_error(3); /* Too many closing brackets. */
+ }
+ break;
+ case '\'':
+ case '\"': s = skip_string(s, c, NULL) - 1; break;
+ case '\0':
+ case ';' : done = 1; break;
+ default :
+ if ((!inst_op || (inst_op && OPERSEP_COMMA)) && c == ',' && !brack_count) {
+ done = 1;
+ break;
+ } else if (inst_op && OPERSEP_WHITESPACE && isspace(c) && !brack_count) {
+ done = 1;
+ break;
+ }
+ break;
+ }
+ if (done) {
+ break;
+ }
+ }
+ if (brack_count) {
+ syntax_error(4); /* Missing closing brackets. */
+ }
+ return s;
+}
+
+/* Name: skip_local()
+ * Desc: Skips the contents of a local label within a string.
+ * Args:
+ * p: The string to check.
+ * Return value: Returns either a pointer to the content after the local label, or NULL.
+ */
+
+char *skip_local(char *p) {
+ if (isidstart(*p) || isdigit(*p)) {
+ for (p++; isidchar(*p); p++);
+ } else {
+ p = NULL;
+ }
+ return p;
+}
+
+/* Name: get_local_label()
+ * Desc: Finds a local label within a line.
+ * Args:
+ * start: The start of the line.
+ * Return value: Returns either the name of the local label, or NULL.
+ */
+
+char *get_local_label(char **start) {
+ char *s = *start;
+ char *p = skip_local(s);
+ char *name = NULL;
+ if (p != NULL && *p == '@' && isidchar(p[1]) && isidstart(*s) && *s != '@') {
+ /* Skips the local part of a global@local label. */
+ s = p+1;
+ p = skip_local(p);
+ name = make_local_label(*start, (s-1) - *start, s, p-s);
+ *start = skip(p);
+ } else if (p != NULL && p > s+1 && *s == '@') { /* @label */
+ s++;
+ name = make_local_label(NULL, 0, s, p-s);
+ *start = skip(p);
+ }
+ return name;
+}
+
+/* Name: parse_label_or_pc()
+ * Desc: Finds a global/local label, or the current pc character within a line.
+ * Args:
+ * start: The line to parse.
+ * Return value: Returns either the name of a global/local label, the current pc character, or NULL.
+ */
+
+char *parse_label_or_pc(char **start) {
+ char *s = skip(*start);
+ char *name = parse_labeldef(start, 0);
+ if (name == NULL && *s == current_pc_char && !isidchar(s[1])) {
+ name = cnvstr(s, 1);
+ s = skip(s+1);
+ }
+ *start = (name) ? s : *start;
+ return name;
+}
+
+/* Name: lex()
+ * Desc: Lexically analyze/Tokenize a line into a stream of tokens.
+ * Args:
+ * line: The line that will be lexed/tokenized.
+ * address: Current address of the program counter.
+ * bline: Current number of blank lines before the current line.
+ * dbg: Debugging flag.
+ * Return value: Returns the address of the next line.
+ */
+
+uint64_t lex(char *line, uint64_t address, uint16_t bline, uint8_t dbg) {
+ char *s;
+ char *inst;
+ char *label_name;
+ char *ext[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1];
+ char *op[MAX_OPERANDS];
+ int ext_len[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1];
+ int op_len[MAX_OPERANDS];
+ int ext_cnt;
+ int op_cnt;
+ int inst_len;
+ s = line;
+ instruction *ip;
+
+ while (isdelm(*s, dbg) != 1) {
+ label_name = parse_label_or_pc(&s);
+ if (label_name) {
+ /* We found a global/local label, or the current pc character. */
+ symbol *label;
+ int equ_len = (*s == '=');
+ if (equ_len) {
+ /* Found an equate directive. */
+ if (*label_name == current_pc_char) {
+ handle_org(skip(s+equ_len));
+ continue;
+ } else {
+ s = skip(s+equ_len);
+ label = new_equate(label_name, parse_expr_tmplab(&s));
+ }
+ } else {
+ /* It's just a label. */
+ label = new_label_sym(0, label_name);
+ add_atom(0, new_label_atom(label));
+ }
+ if (!is_local_label(label_name) && autoexport) {
+ label->flags |= EXPORT;
+ }
+ free(label_name);
+ }
+ /* Check for directives first. */
+ s = skip(s);
+ if (*s == ';') {
+ continue;
+ }
+ if (*s == current_pc_char && s[1] == '=') { /* "*=" org directive. */
+ handle_org(skip(s+2));
+ continue;
+ }
+ if (handle_directive(s)) {
+ continue;
+ }
+ s = skip(s);
+ if (iseol(s)) {
+ continue;
+ }
+ /* Read the mnemonic name. */
+ inst = s;
+ if (!isidstart(*s)) {
+ syntax_error(10); /* Identifier was expected. */
+ continue;
+ }
+ #if !MAX_QUALLIFIERS
+ for (; *s && !isspace(*s); s++);
+ inst_len = s - inst;
+ #else
+ s = lex_inst(s, &inst_len, ext, ext_len, &ext_cnt);
+ #endif
+ if (!isspace(*s) && *s != '\0') {
+ syntax_error(2); /* No space before operand. */
+ }
+ s = skip(s);
+ if (handle_struct(inst, inst_len, s)) {
+ continue;
+ }
+ /* Read the operand(s), separated by comma, or whitespace (unless within brackets). */
+ op_cnt = 0;
+ while (!iseol(*s) && op_cnt < MAX_OPERANDS) {
+ op[op_cnt] = s;
+ s = skip_operand(1, s);
+ op_len[op_cnt] = oplen(s, op[op_cnt]);
+ op_cnt++;
+ if (ignore_trail) {
+ if (*s != ',') {
+ break;
+ }
+ s++;
+ } else {
+ s = skip(s);
+ if (OPERSEP_COMMA) {
+ if (*s == ',') {
+ s = skip(s+1);
+ } else if (!(OPERSEP_WHITESPACE)) {
+ break;
+ }
+ }
+ }
+ }
+ eol(s);
+ ip = new_inst(inst, inst_len, op_cnt, op, op_len);
+ if (ip) {
+ #if MAX_QUALIFIERS > 0
+ int i;
+ for (i = 0; i < ext_cnt; i++) {
+ ip->qualifiers[i] = cnvstr(ext[i], ext_len[i]);
+ }
+ for (; i < MAX_QUALIFIERS; i++) {
+ ip->qualifiers[i] = NULL;
+ }
+ #endif
+ add_atom(0, new_inst_atom(ip));
+ }
+ }
+}