#include "asmmon.h" #include "cpu/sux/cpu.h" #include "lexer.h" /* Name: isidstart() * Desc: Checks if the character is the start of a valid identifier. * Args: * c: The character to check. * Return value: Returns true if the character is the start of a valid identifier, and false if not. */ int isidstart(char c) { return (isalpha(c) || c == '@' || c == '_'); } /* Name: isidchar() * Desc: Checks if the character is a valid identifier character. * Args: * c: The character to check. * Return value: Returns true if the character is a valid identifier character, and false if not. */ int isidchar(char c) { return (isalnum(c) || c == '_'); } /* Name: skip() * Desc: Skips any whitespace within a string. * Args: * s: The string to check. * Return value: Returns a pointer to the content after the whitespace. */ char *skip(char *s) { for (; isspace(*s); s++); return s; } /* Name: iseol() * Desc: Checks if the character is an end of line character. * Args: * c: The character to check. * Return value: Returns true if the character is an end of line character, and false if not. */ int iseol(char c) { return (c == '\0' || c == ';'); } /* Name: eol() * Desc: Checks for end of line, and issues an error, if end of line wasn't found. * Args: * s: The string to check. * Return value: None. */ void eol(char *s) { if (ignore_trail) { if (!iseol(*s) && !isspace(*s)) { syntax_error(6); /* End of line was not found. */ } } else { s = skip(s); if (!iseol(*s)) { syntax_error(6); /* End of line was not found. */ } } } /* Name: isbadid() * Desc: Checks if the string is an invalid identifier. * Args: * p: The string to check. * len: Length of the string. * Return value: Returns true if the string is an invalid identifier, and false if not. */ int isbadid(char *p, int len) { return (len == 1 && (*p == '@' || *p == '_')); } /* Name: skip_operand() * Desc: Skips the contents of an operand within a string, gives an error if there * are either too many closing brackets, or missing closing brackets. * Args: * inst_op: Instruction operand flag. * s: The string to check. * Return value: Returns the content after the operand. */ char *skip_operand(int inst_op, char *s) { int brack_count = 0; /* Bracket count. */ int done = 0; /* Loop done flag. */ for (char c = 0;; s++) { c = *s; switch (c) { case '(': brack_count++; break; case ')': if (brack_count > 0) { brack_count--; } else { syntax_error(3); /* Too many closing brackets. */ } break; case '\'': case '\"': s = skip_string(s, c, NULL) - 1; break; case '\0': case ';' : done = 1; break; default : if ((!inst_op || (inst_op && OPERSEP_COMMA)) && c == ',' && !brack_count) { done = 1; break; } else if (inst_op && OPERSEP_WHITESPACE && isspace(c) && !brack_count) { done = 1; break; } break; } if (done) { break; } } if (brack_count) { syntax_error(4); /* Missing closing brackets. */ } return s; } /* Name: skip_local() * Desc: Skips the contents of a local label within a string. * Args: * p: The string to check. * Return value: Returns either a pointer to the content after the local label, or NULL. */ char *skip_local(char *p) { if (isidstart(*p) || isdigit(*p)) { for (p++; isidchar(*p); p++); } else { p = NULL; } return p; } /* Name: get_local_label() * Desc: Finds a local label within a line. * Args: * start: The start of the line. * Return value: Returns either the name of the local label, or NULL. */ char *get_local_label(char **start) { char *s = *start; char *p = skip_local(s); char *name = NULL; if (p != NULL && *p == '@' && isidchar(p[1]) && isidstart(*s) && *s != '@') { /* Skips the local part of a global@local label. */ s = p+1; p = skip_local(p); name = make_local_label(*start, (s-1) - *start, s, p-s); *start = skip(p); } else if (p != NULL && p > s+1 && *s == '@') { /* @label */ s++; name = make_local_label(NULL, 0, s, p-s); *start = skip(p); } return name; } /* Name: parse_label_or_pc() * Desc: Finds a global/local label, or the current pc character within a line. * Args: * start: The line to parse. * Return value: Returns either the name of a global/local label, the current pc character, or NULL. */ char *parse_label_or_pc(char **start) { char *s = skip(*start); char *name = parse_labeldef(start, 0); if (name == NULL && *s == current_pc_char && !isidchar(s[1])) { name = cnvstr(s, 1); s = skip(s+1); } *start = (name) ? s : *start; return name; } /* Name: lex() * Desc: Lexically analyze/Tokenize a line into a stream of tokens. * Args: * line: The line that will be lexed/tokenized. * address: Current address of the program counter. * bline: Current number of blank lines before the current line. * dbg: Debugging flag. * Return value: Returns the address of the next line. */ uint64_t lex(char *line, uint64_t address, uint16_t bline, uint8_t dbg) { char *s; char *inst; char *label_name; char *ext[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1]; char *op[MAX_OPERANDS]; int ext_len[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1]; int op_len[MAX_OPERANDS]; int ext_cnt; int op_cnt; int inst_len; s = line; instruction *ip; while (isdelm(*s, dbg) != 1) { label_name = parse_label_or_pc(&s); if (label_name) { /* We found a global/local label, or the current pc character. */ symbol *label; int equ_len = (*s == '='); if (equ_len) { /* Found an equate directive. */ if (*label_name == current_pc_char) { handle_org(skip(s+equ_len)); continue; } else { s = skip(s+equ_len); label = new_equate(label_name, parse_expr_tmplab(&s)); } } else { /* It's just a label. */ label = new_label_sym(0, label_name); add_atom(0, new_label_atom(label)); } if (!is_local_label(label_name) && autoexport) { label->flags |= EXPORT; } free(label_name); } /* Check for directives first. */ s = skip(s); if (*s == ';') { continue; } if (*s == current_pc_char && s[1] == '=') { /* "*=" org directive. */ handle_org(skip(s+2)); continue; } if (handle_directive(s)) { continue; } s = skip(s); if (iseol(s)) { continue; } /* Read the mnemonic name. */ inst = s; if (!isidstart(*s)) { syntax_error(10); /* Identifier was expected. */ continue; } #if !MAX_QUALLIFIERS for (; *s && !isspace(*s); s++); inst_len = s - inst; #else s = lex_inst(s, &inst_len, ext, ext_len, &ext_cnt); #endif if (!isspace(*s) && *s != '\0') { syntax_error(2); /* No space before operand. */ } s = skip(s); if (handle_struct(inst, inst_len, s)) { continue; } /* Read the operand(s), separated by comma, or whitespace (unless within brackets). */ op_cnt = 0; while (!iseol(*s) && op_cnt < MAX_OPERANDS) { op[op_cnt] = s; s = skip_operand(1, s); op_len[op_cnt] = oplen(s, op[op_cnt]); op_cnt++; if (ignore_trail) { if (*s != ',') { break; } s++; } else { s = skip(s); if (OPERSEP_COMMA) { if (*s == ',') { s = skip(s+1); } else if (!(OPERSEP_WHITESPACE)) { break; } } } } eol(s); ip = new_inst(inst, inst_len, op_cnt, op, op_len); if (ip) { #if MAX_QUALIFIERS > 0 int i; for (i = 0; i < ext_cnt; i++) { ip->qualifiers[i] = cnvstr(ext[i], ext_len[i]); } for (; i < MAX_QUALIFIERS; i++) { ip->qualifiers[i] = NULL; } #endif add_atom(0, new_inst_atom(ip)); } } }