diff options
author | mrb0nk500 <b0nk@b0nk.xyz> | 2020-12-04 15:20:28 -0500 |
---|---|---|
committer | mrb0nk500 <b0nk@b0nk.xyz> | 2020-12-04 15:20:28 -0500 |
commit | 96393257a43ac52f2b911594d106741245dec5f0 (patch) | |
tree | 6b9d11c50ed3cd1920444c4dd0ee4027814ef4bd /lexer | |
parent | 83ce1151ee1f06ae6b1c5c1018cc2489494e5ea4 (diff) |
- Started work on writing the new version of the
assembler.
- Did alot of stuff in the emulator.
- Did alot of stuff in the SuB Suite.
Diffstat (limited to 'lexer')
-rw-r--r-- | lexer/asmmon.h | 794 | ||||
-rw-r--r-- | lexer/backup/asmmon.h | 794 | ||||
-rw-r--r-- | lexer/backup/assemble.c | 975 | ||||
-rw-r--r-- | lexer/backup/enums.h | 540 | ||||
-rw-r--r-- | lexer/backup/lexer.c | 937 | ||||
-rw-r--r-- | lexer/backup/lexer.h | 228 | ||||
-rw-r--r-- | lexer/cpu/sux/cpu.c | 30 | ||||
-rw-r--r-- | lexer/enums.h | 540 | ||||
-rw-r--r-- | lexer/lexer.c | 311 | ||||
-rw-r--r-- | lexer/lexer.h | 228 | ||||
-rw-r--r-- | lexer/misc.c | 14 | ||||
-rw-r--r-- | lexer/parse.c | 171 | ||||
-rw-r--r-- | lexer/symbol.c | 42 |
13 files changed, 5604 insertions, 0 deletions
diff --git a/lexer/asmmon.h b/lexer/asmmon.h new file mode 100644 index 0000000..430d37c --- /dev/null +++ b/lexer/asmmon.h @@ -0,0 +1,794 @@ +#include "opcode.h" +#include <ctype.h> +#include <string.h> + +#define MAX_TOK 0x1000 + +typedef struct tok token ; +typedef struct ln line ; +typedef struct sym symbol ; +typedef struct fix fixup ; +typedef struct inst instruction ; +typedef struct op operand ; + + +struct tok { + token *next; /* Pointer to the next token. */ + uint8_t id; /* Token ID. */ + uint8_t type; /* Token type ID. */ + + uint8_t tab; /* Number of tabs. */ + uint8_t space; /* Number of spaces. */ + + uint8_t subtab; /* Number of sub-token tabs. */ + uint8_t subspace; /* Number of sub-token spaces. */ + + uint8_t digits; /* Number of digits. */ + + /* Token value(s). */ + union { + symbol *sym; + char *str; + uint8_t byte ; + uint16_t word ; + uint32_t dword; + uint64_t qword; + }; +}; + +struct ln { + line *next; /* Pointer to the next line. */ + token *tok; /* The token(s) for this line. */ + uint16_t count; /* Total tokens for this line. */ + uint16_t bline; /* Number of blank lines. */ + uint32_t linenum; /* Line number. */ + uint64_t addr; /* The address of this line. */ +}; + + + +struct fix { + fixup *next; + symbol *s; + token *t; + uint64_t adr; +}; + +struct sym { + symbol *next; + symbol *prev; + symbol *down; + symbol *up; + uint16_t count; + uint64_t val; + uint8_t isstruct : 1; + uint8_t isanon : 1; + uint8_t def : 1; + char *name; + uint16_t id; +}; + +struct inst { + uint32_t am; /* Addressing modes. */ + uint8_t op; /* Base value used to get the actual opcode. */ +}; + +struct op { + uint8_t type; /* Operand Type. 0 = register, 1 = memory. */ + uint8_t id; /* Operand Type ID 1. 4 bits. */ + uint8_t id2[2]; /* Operand Type ID 2. 16 bits. */ + uint64_t value; /* Value of operand (used only by memory operands). */ +}; + + +extern char lexeme[]; +extern char *string[]; +extern char *comment[]; +extern uint16_t incl[]; +extern line *lines; +extern line *last_line; +extern token *tokens; +extern token *last_tok; +extern symbol *symbols; +extern symbol *last_sym; +extern symbol *locals; +extern symbol *last_loc; +extern fixup *fixups; +extern fixup *last_fix; + +extern uint8_t lex_type; + +enum dir { + DIR_ORG, + DIR_BYTE, + DIR_WORD, + DIR_DWORD, + DIR_QWORD, + DIR_INCLUDE, + DIR_RES, + DIR_STRUCT, + DIR_UNION, + DIR_ENDSTRUCT, + DIR_ENDUNION +}; + +enum token { + TOK_DIR, + TOK_LOCAL, + TOK_LABEL, + TOK_SYM, + TOK_EXPR, + TOK_CSV, + TOK_STRING, + TOK_CHAR, + TOK_IND, + TOK_IMM, + TOK_BREG, + TOK_OPCODE, + TOK_EXTOP, + TOK_ORTHO, + TOK_REG, + TOK_MEM, + TOK_CC, + TOK_RS, + TOK_OF, + TOK_COMMENT, + TOK_HEX, + TOK_DEC, + TOK_BIN, + TOK_INCLUDE, + TOK_STRUCT, + TOK_UNION, + TOK_MEMBER + +}; + +enum pre_token { + PTOK_DOT, + PTOK_AT, + PTOK_COLON, + PTOK_EQU, + PTOK_PLUS, + PTOK_MINUS, + PTOK_GT, + PTOK_LT, + PTOK_PIPE, + PTOK_LBRACK, + PTOK_RBRACK, + PTOK_COMMA, + PTOK_B, + PTOK_E, + PTOK_X, + PTOK_Y, + PTOK_S, + PTOK_P, + PTOK_A, + PTOK_C, + PTOK_D, + PTOK_F, + PTOK_R, + PTOK_DQUOTE, + PTOK_SQUOTE, + PTOK_HASH, + PTOK_SCOLON, + PTOK_DOLLAR, + PTOK_PERCENT, + PTOK_NUMBER, + PTOK_ALPHA, + PTOK_OTHER +}; + +enum expr { + EXPR_PLUS, + EXPR_MINUS, + EXPR_LOW, + EXPR_HIGH, + EXPR_OR, + EXPR_LSHFT, + EXPR_RSHFT, + EXPR_NONE +}; + +enum addrmode { + AM_IMM = (1 << 0), + AM_ZM = (1 << 1), + AM_ZMX = (1 << 2), + AM_ZMY = (1 << 3), + AM_IND = (1 << 4), + AM_INDX = (1 << 5), + AM_INDY = (1 << 6), + AM_ABS = (1 << 7), + AM_REL = (1 << 8), + AM_BREG = (1 << 9), + AM_IMPL = (1 << 10), + AM_INDX2 = (1 << 11), + AM_ZM2 = (1 << 12), + AM_EIND = (1 << 13), + AM_EIND2 = (1 << 14), + AM_ABY = (1 << 15), + AM_ABX = (1 << 16), + AM_AIND = (1 << 17), + AM_AINDY = (1 << 18), + AM_AINDX = (1 << 19), + AM_ORTHO = (1 << 20), + AM_ORTHO2 = (1 << 21) +}; + +enum ind { + CMP_IND = 0, + CMP_IDY = 1, + CMP_IDX = 2, + CPB_IND = 3, + CPB_IDY = 4, + CPB_IDX = 5, + JMP_IND = 6, + JSR_IND = 7, + LDA_IND = 8, + LDA_IDY = 9, + LDB_IND = 10, + LDB_IDY = 11, + LDX_IND = 12, + LDY_IND = 13, + STA_IND = 14, + STA_IDY = 15, + STB_IND = 16, + STB_IDY = 17, + STX_IND = 18, + STY_IND = 19 +}; + +enum eind { + DEC_EIND, + INC_EIND, + STY_EIND, + STA_EIND, + STB_EIND, + LDX_EIND, + STX_EIND, + CPB_EIND, + CPX_EIND, + CPY_EIND +}; + +static const uint8_t ind_ops[20] = { + [CMP_IND] = CMP_IN, + [CMP_IDY] = CMP_IY, + [CMP_IDX] = CMP_IX, + [CPB_IND] = CPB_IN, + [CPB_IDY] = CPB_IY, + [CPB_IDX] = CPB_IX, + [JMP_IND] = JMP_IN, + [JSR_IND] = JSR_IN, + [LDA_IND] = LDA_IN, + [LDA_IDY] = LDA_IY, + [LDB_IND] = LDB_IN, + [LDB_IDY] = LDB_IY, + [LDX_IND] = LDX_IN, + [LDY_IND] = LDY_IN, + [STA_IND] = STA_IN, + [STA_IDY] = STA_IY, + [STB_IND] = STB_IN, + [STB_IDY] = STB_IY, + [STX_IND] = STX_IN, + [STY_IND] = STY_IN +}; + +static const uint8_t eind_base_ops[10] = { + [DEC_EIND] = DEC_E, + [INC_EIND] = INC_E, + [STY_EIND] = STY_E, + [STA_EIND] = STA_E, + [STB_EIND] = STB_E, + [LDX_EIND] = LDX_E, + [STX_EIND] = STX_E, + [CPB_EIND] = CPB_E, + [CPX_EIND] = CPX_E, + [CPY_EIND] = CPY_E +}; + +static const instruction inst[OPNUM] = { + [ADC] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x01}, + [AND] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x41}, + [ASR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x62}, + [BCC] = {(AM_REL), 0xA0}, + [BCS] = {(AM_REL), 0x90}, + [BEQ] = {(AM_REL), 0xB0}, + [BNE] = {(AM_REL), 0xC0}, + [BNG] = {(AM_REL), 0x80}, + [BPO] = {(AM_REL), 0x70}, + [BRA] = {(AM_REL), 0xF0}, + [BRK] = {(AM_IMPL), 0x69}, + [BVC] = {(AM_REL), 0xE0}, + [BVS] = {(AM_REL), 0xD0}, + [CLC] = {(AM_IMPL), 0x09}, + [CLI] = {(AM_IMPL), 0x29}, + [CLV] = {(AM_IMPL), 0x49}, + [CMP] = {(AM_IMM|AM_ZM|AM_IND|AM_INDY|AM_ABS|AM_BREG|AM_INDX2|AM_EIND|AM_ORTHO), 0x82}, + [CPB] = {(AM_IMM|AM_ZM|AM_IND|AM_INDY|AM_ABS|AM_INDX2|AM_EIND2), 0x04}, + [CPS] = {(AM_IMPL), 0x00}, + [CPX] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x24}, + [CPY] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x44}, + [DEB] = {(AM_IMPL), 0x99}, + [DEC] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND2|AM_ORTHO2), 0x84}, + [DEX] = {(AM_IMPL), 0xB9}, + [DEY] = {(AM_IMPL), 0x79}, + [DIV] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x42}, + [INB] = {(AM_IMPL), 0xA9}, + [INC] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND2|AM_ORTHO2), 0xA4}, + [INX] = {(AM_IMPL), 0xC9}, + [INY] = {(AM_IMPL), 0x89}, + [JMP] = {(AM_ABS|AM_IND|AM_ZM2|AM_EIND), 0x00}, + [JSR] = {(AM_ABS|AM_IND|AM_ZM2|AM_EIND), 0x20}, + [LDA] = {(AM_IMM|AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND), 0xC2}, + [LDB] = {(AM_IMM|AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND), 0xE2}, + [LDX] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x64}, + [LDY] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND), 0xA2}, + [LSL] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0xA1}, + [LSR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0xC1}, + [MUL] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x22}, + [NOP] = {(AM_IMPL), 0xEA}, + [ORA] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND), 0x61}, + [PHA] = {(AM_IMPL), 0x8E}, + [PHB] = {(AM_IMPL), 0xAE}, + [PHP] = {(AM_IMPL), 0x6E}, + [PHX] = {(AM_IMPL), 0xEE}, + [PHY] = {(AM_IMPL), 0xCE}, + [PLA] = {(AM_IMPL), 0x9E}, + [PLB] = {(AM_IMPL), 0xBE}, + [PLP] = {(AM_IMPL), 0x7E}, + [PLX] = {(AM_IMPL), 0xFE}, + [PLY] = {(AM_IMPL), 0xDE}, + [ROL] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0xE1}, + [ROR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x02}, + [RTI] = {(AM_IMPL), 0x60}, + [RTS] = {(AM_IMPL), 0x50}, + [SBC] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x21}, + [SEC] = {(AM_IMPL), 0x19}, + [SEI] = {(AM_IMPL), 0x39}, + [STA] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND2), 0x28}, + [STB] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND2), 0x48}, + [STX] = {(AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x68}, + [STY] = {(AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x08}, + [TAB] = {(AM_IMPL), 0x0A}, + [TAX] = {(AM_IMPL), 0x4A}, + [TAY] = {(AM_IMPL), 0x2A}, + [TBA] = {(AM_IMPL), 0x1A}, + [TSX] = {(AM_IMPL), 0x8A}, + [TXA] = {(AM_IMPL), 0x5A}, + [TXS] = {(AM_IMPL), 0x9A}, + [TXY] = {(AM_IMPL), 0x7A}, + [TYA] = {(AM_IMPL), 0x3A}, + [TYX] = {(AM_IMPL), 0x6A}, + [WAI] = {(AM_IMPL), 0x59}, + [XOR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x81} +}; + +static const instruction ext_inst[EXT_OPNUM] = { + [LEA] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_ABX|AM_ABY|AM_AIND|AM_AINDX|AM_AINDY|AM_ORTHO), 0x03}, + [PEA] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_ABX|AM_ABY|AM_AIND|AM_AINDX|AM_AINDY|AM_ORTHO2), 0x23}, + [ADD] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO), 0x06}, + [SUB] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO), 0x26}, + [ADE] = {(AM_IMM|AM_ZM|AM_ABS), 0x46}, + [SBE] = {(AM_IMM|AM_ZM|AM_ABS), 0x66}, + [ADS] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x86}, + [SBS] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0xA6}, + [NOT] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0xC6}, + [LLM] = {(AM_ZM|AM_ABS|AM_EIND), 0x48}, + [LRM] = {(AM_ZM|AM_ABS|AM_EIND), 0x68}, + [RLM] = {(AM_ZM|AM_ABS|AM_EIND), 0x88}, + [RRM] = {(AM_ZM|AM_ABS|AM_EIND), 0xA8}, + [ARM] = {(AM_ZM|AM_ABS|AM_EIND), 0xC8}, + [PHE] = {(AM_IMPL), 0x6B}, + [PLE] = {(AM_IMPL), 0x7B}, + [CPE] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x08}, + [ICE] = {(AM_ZM|AM_ABS|AM_EIND), 0x28}, + [LDS] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x40}, + [DEE] = {(AM_IMPL), 0x8B}, + [INE] = {(AM_IMPL), 0x9B}, + [DES] = {(AM_IMPL), 0xAB}, + [INS] = {(AM_IMPL), 0xBB}, + [STS] = {(AM_ZM|AM_ABS|AM_EIND), 0xA0}, + [STE] = {(AM_ZM|AM_ABS), 0xC0}, + [STZ] = {(AM_ZM|AM_ABS|AM_EIND), 0xE0}, + [SCO] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x60}, + [ECO] = {(AM_ZM|AM_ABS|AM_EIND), 0x80}, + [CLZ] = {(AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0x05}, + [CLO] = {(AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0x25}, + [BIT] = {(AM_ZM|AM_ABS|AM_EIND), 0x45}, + [MMV] = {(AM_IMPL), 0xCB}, + [SWP] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0xE6}, + [PCN] = {(AM_ZM|AM_ABS|AM_EIND|AM_ORTHO), 0xE8}, + [REP] = {(AM_REL), 0xBD}, + [REQ] = {(AM_REL), 0xCD}, + [RNE] = {(AM_REL), 0xDD}, + [LNG] = {(AM_IMM|AM_EIND2), 0x0D}, + [LPO] = {(AM_IMM|AM_EIND2), 0x2D}, + [LCS] = {(AM_IMM|AM_EIND2), 0x4D}, + [LCC] = {(AM_IMM|AM_EIND2), 0x6D}, + [LEQ] = {(AM_IMM|AM_EIND2), 0x8D}, + [LNE] = {(AM_IMM|AM_EIND2), 0xAD}, + [SNG] = {(AM_EIND2), 0x1D}, + [SPO] = {(AM_EIND2), 0x3D}, + [SCS] = {(AM_EIND2), 0x5D}, + [SCC] = {(AM_EIND2), 0x7D}, + [SEQ] = {(AM_EIND2), 0x9D}, + [SNE] = {(AM_EIND2), 0xBD} +}; + +static const instruction ortho_inst[ORTHO_OPNUM] = { + [MNG] = {(AM_ORTHO), 0x00}, + [MPO] = {(AM_ORTHO), 0x20}, + [MCS] = {(AM_ORTHO), 0x40}, + [MCC] = {(AM_ORTHO), 0x60}, + [MEQ] = {(AM_ORTHO), 0x80}, + [MNE] = {(AM_ORTHO), 0xA0}, + [MVS] = {(AM_ORTHO), 0xC0}, + [MVC] = {(AM_ORTHO), 0xE0}, + [OR ] = {(AM_ORTHO), 0x61}, + [MOV] = {(AM_ORTHO), 0xA2}, + [IML] = {(AM_ORTHO), 0xC2}, + [IDV] = {(AM_ORTHO), 0xE2}, + [PSH] = {(AM_ORTHO2), 0x04}, + [PUL] = {(AM_ORTHO2), 0x24}, + [NEG] = {(AM_ORTHO2), 0x64}, + [SET] = {(AM_ORTHO2), 0x05} +}; + +static const char *dir_t[11] = { + [ 0] = "org", + [ 1] = "byte", + [ 2] = "word", + [ 3] = "dword", + [ 4] = "qword", + [ 5] = "include", + [ 6] = "res", + [ 7] = "struct", + [ 8] = "union", + [ 9] = "endstruct", + [10] = "endunion" +}; + +static const char *rs_t[4] = { + [0] = "", + [1] = ".w", + [2] = ".d", + [3] = ".q" +}; + +static const char *lex_tok[] = { + [TOK_DIR ] = "TOK_DIR", + [TOK_LOCAL ] = "TOK_LOCAL", + [TOK_LABEL ] = "TOK_LABEL", + [TOK_SYM ] = "TOK_SYM", + [TOK_EXPR ] = "TOK_EXPR", + [TOK_CSV ] = "TOK_CSV", + [TOK_STRING ] = "TOK_STRING", + [TOK_CHAR ] = "TOK_CHAR", + [TOK_IND ] = "TOK_IND", + [TOK_IMM ] = "TOK_IMM", + [TOK_BREG ] = "TOK_BREG", + [TOK_OPCODE ] = "TOK_OPCODE", + [TOK_EXTOP ] = "TOK_EXTOP", + [TOK_ORTHO ] = "TOK_ORTHO", + [TOK_REG ] = "TOK_REG", + [TOK_MEM ] = "TOK_MEM", + [TOK_CC ] = "TOK_CC", + [TOK_RS ] = "TOK_RS", + [TOK_OF ] = "TOK_OF", + [TOK_COMMENT] = "TOK_COMMENT", + [TOK_HEX ] = "TOK_HEX", + [TOK_DEC ] = "TOK_DEC", + [TOK_BIN ] = "TOK_BIN", + [TOK_INCLUDE] = "TOK_INCLUDE", + [TOK_STRUCT ] = "TOK_STRUCT", + [TOK_UNION ] = "TOK_UNION", + [TOK_MEMBER ] = "TOK_MEMBER" +}; + +static const char *adrmode[] = { + [IMM ] = "IMM", + [ZM ] = "ZM", + [ZMX ] = "ZMX", + [ZMY ] = "ZMY", + [IND ] = "IND", + [INDX ] = "INDX", + [INDY ] = "INDY", + [ABS ] = "ABS", + [REL ] = "REL", + [BREG ] = "BREG", + [IMPL ] = "IMPL", + [ABSX ] = "ABSX", + [ABSY ] = "ABSY", + [AIND ] = "AIND", + [AINDX] = "AINDX", + [AINDY] = "AINDY", + [EIND ] = "EIND" +/* [ZMR ] = "ZMR", + [ZINDR] = "ZINDR", + [ZRIND] = "ZRIND", + [AINDR] = "AINDR", + [AINDY] = "ARIND",*/ +}; + +static const char *mne[OPNUM] = { + [ADC] = "ADC", + [AND] = "AND", + [ASR] = "ASR", + [BCC] = "BCC", + [BCS] = "BCS", + [BEQ] = "BEQ", + [BNE] = "BNE", + [BNG] = "BNG", + [BPO] = "BPO", + [BRA] = "BRA", + [BRK] = "BRK", + [BVC] = "BVC", + [BVS] = "BVS", + [CLC] = "CLC", + [CLI] = "CLI", + [CLV] = "CLV", + [CMP] = "CMP", + [CPB] = "CPB", + [CPS] = "CPS", + [CPX] = "CPX", + [CPY] = "CPY", + [DEB] = "DEB", + [DEC] = "DEC", + [DEX] = "DEX", + [DEY] = "DEY", + [DIV] = "DIV", + [INB] = "INB", + [INC] = "INC", + [INX] = "INX", + [INY] = "INY", + [JMP] = "JMP", + [JSR] = "JSR", + [LDA] = "LDA", + [LDB] = "LDB", + [LDX] = "LDX", + [LDY] = "LDY", + [LSL] = "LSL", + [LSR] = "LSR", + [MUL] = "MUL", + [NOP] = "NOP", + [ORA] = "ORA", + [PHA] = "PHA", + [PHB] = "PHB", + [PHP] = "PHP", + [PHX] = "PHX", + [PHY] = "PHY", + [PLA] = "PLA", + [PLB] = "PLB", + [PLP] = "PLP", + [PLX] = "PLX", + [PLY] = "PLY", + [ROL] = "ROL", + [ROR] = "ROR", + [RTI] = "RTI", + [RTS] = "RTS", + [SBC] = "SBC", + [SEC] = "SEC", + [SEI] = "SEI", + [STA] = "STA", + [STB] = "STB", + [STX] = "STX", + [STY] = "STY", + [TAB] = "TAB", + [TAX] = "TAX", + [TAY] = "TAY", + [TBA] = "TBA", + [TSX] = "TSX", + [TXA] = "TXA", + [TXS] = "TXS", + [TXY] = "TXY", + [TYA] = "TYA", + [TYX] = "TYX", + [WAI] = "WAI", + [XOR] = "XOR" +}; + +static const char *ext_mne[EXT_OPNUM] = { + [LEA] = "LEA", + [PEA] = "PEA", + [ADD] = "ADD", + [SUB] = "SUB", + [ADE] = "ADE", + [SBE] = "SBE", + [ADS] = "ADS", + [SBS] = "SBS", + [NOT] = "NOT", + [LLM] = "LLM", + [LRM] = "LRM", + [RLM] = "RLM", + [RRM] = "RRM", + [ARM] = "ARM", + [PHE] = "PHE", + [PLE] = "PLE", + [CPE] = "CPE", + [ICE] = "ICE", + [LDS] = "LDS", + [DEE] = "DEE", + [INE] = "INE", + [DES] = "DES", + [INS] = "INS", + [STS] = "STS", + [STE] = "STE", + [STZ] = "STZ", + [SCO] = "SCO", + [ECO] = "ECO", + [CLZ] = "CLZ", + [CLO] = "CLO", + [BIT] = "BIT", + [MMV] = "MMV", + [SWP] = "SWP", + [PCN] = "PCN", + [REP] = "REP", + [REQ] = "REQ", + [RNE] = "RNE", + [LNG] = "LNG", + [LPO] = "LPO", + [LCS] = "LCS", + [LCC] = "LCC", + [LEQ] = "LEQ", + [LNE] = "LNE", + [SNG] = "SNG", + [SPO] = "SPO", + [SCS] = "SCS", + [SCC] = "SCC", + [SEQ] = "SEQ", + [SNE] = "SNE" +}; + +static const char *ortho_mne[ORTHO_OPNUM] = { + [MNG] = "MNG", + [MPO] = "MPO", + [MCS] = "MCS", + [MCC] = "MCC", + [MEQ] = "MEQ", + [MNE] = "MNE", + [MVS] = "MVS", + [MVC] = "MVC", + [OR ] = "OR", + [MOV] = "MOV", + [IML] = "IML", + [IDV] = "IDV", + [PSH] = "PSH", + [PUL] = "PUL", + [NEG] = "NEG", + [SET] = "SET" +}; + +static const char *set_cc[8] = { + "NG", + "PO", + "CS", + "CC", + "EQ", + "NE", + "VS", + "VC" +}; + +static const char *instdesc[OPNUM] = { + [ADC] = "ADd accumulator, with operand, Carry if needed.", + [AND] = "Bitwise AND accumulator, with operand.", + [ASR] = "Arithmetic Shift Right accumulator, with operand.", + [BCC] = "Branch if the Carry flag has been Cleared.", + [BCS] = "Branch if the Carry flag is Set.", + [BEQ] = "Branch if EQual (the zero flag has been set).", + [BNE] = "Branch if Not Equal (the zero flag has been cleared)", + [BNG] = "Branch if NeGative.", + [BPO] = "Branch if POsitive.", + [BRA] = "BRanch Always.", + [BRK] = "BReaKpoint", + [BVC] = "Branch if the oVerflow flag has been Cleared.", + [BVS] = "Branch if the oVerflow flag is Set.", + [CLC] = "CLear the Carry flag.", + [CLI] = "CLear the Interrupt flag.", + [CLV] = "CLear the oVerflow flag.", + [CMP] = "CoMPare acumulator, with operand.", + [CPB] = "ComPare the B register, with operand.", + [CPS] = "Clears the Processor Status register.", + [CPX] = "ComPare the X register, with operand.", + [CPY] = "ComPare the Y register, with operand.", + [DEB] = "DEcrement the B register.", + [DEC] = "DECrement accumulator, or memory.", + [DEX] = "DEcrement the X register.", + [DEY] = "DEcrement the Y register.", + [DIV] = "DIVide accumulator, with operand, and put the remainder into the B register.", + [INB] = "INcrement the B register.", + [INC] = "INCrement accumulator, or memory.", + [INX] = "INcrement the X register.", + [INY] = "INcrement the Y register.", + [JMP] = "JuMP to the address specified.", + [JSR] = "Jump to a SubRoutine.", + [LDA] = "LoaD the value from the operand, to the Accumulator.", + [LDB] = "LoaD the value from the operand, to the B register.", + [LDX] = "LoaD the value from the operand, to the X register.", + [LDY] = "LoaD the value from the operand, to the Y register.", + [LSL] = "Logical Shift Left accumulator, with operand.", + [LSR] = "Logical Shift Right accumulator, with operand.", + [MUL] = "MULtiply accumulator, with operand.", + [NOP] = "NO oPeration", + [ORA] = "Bitwise OR Accumulator, with operand.", + [PHA] = "PusH the number of bytes specified, from the Accumulator to the stack.", + [PHB] = "PusH the number of bytes specified, from the B register to the stack.", + [PHP] = "PusH the number of bytes specified, from the Processor status register to the stack.", + [PHX] = "PusH the number of bytes specified, from the X register to the stack.", + [PHY] = "PusH the number of bytes specified, from the Y register to the stack.", + [PLA] = "PuLl the number of bytes specified, from the stack, to the Accumulator.", + [PLB] = "PuLl the number of bytes specified, from the stack, to the B register.", + [PLP] = "PuLl the number of bytes specified, from the stack, to the Processor status register.", + [PLX] = "PuLl the number of bytes specified, from the stack, to the X register.", + [PLY] = "PuLl the number of bytes specified, from the stack, to the Y register.", + [ROL] = "ROtate Left accumulator, with operand.", + [ROR] = "ROtate Right accumulator, with operand.", + [RTI] = "ReTurn from an Interrupt.", + [RTS] = "ReTurn from a Subroutine.", + [SBC] = "SuBtract accumulator, with operand, Carry if needed", + [SEC] = "SEt the Carry flag.", + [SEI] = "SEt the Interrupt flag.", + [STA] = "STore the value from the Accumulator, in memory.", + [STB] = "STore the value from the B register, in memory.", + [STX] = "STore the value from the X register, in memory.", + [STY] = "STore the value from the Y register, in memory.", + [TAB] = "Transfer the value from the Accumulator, to the B register.", + [TAX] = "Transfer the value from the Accumulator, to the X register.", + [TAY] = "Transfer the value from the Accumulator, to the Y register.", + [TBA] = "Transfer the value from the Y register, to the Accumulator.", + [TSX] = "Transfer the value from the Stack pointer, to the X register.", + [TXA] = "Transfer the value from the X register, to the Accumulator.", + [TXS] = "Transfer the value from the X register, to the Stack pointer.", + [TXY] = "Transfer the value from the X register, to the Y register.", + [TYA] = "Transfer the value from the Y register, to the Accumulator.", + [TYX] = "Transfer the value from the Y register, to the X register.", + [WAI] = "WAIt for an interrupt", + [XOR] = "Bitwise XOR Accumulator, with operand." +}; + +static const uint8_t bitsize[4] = { + [0] = 0x07, + [1] = 0x0F, + [2] = 0x1F, + [3] = 0x3F +}; + +static const uint8_t amp[8] = { + [0] = 0x00, + [1] = 0x00, + [2] = 0x07, + [4] = 0x07, + [5] = 0x0B, + [6] = 0x0B, + [3] = 0x0F, + [7] = 0x0F +}; + + +extern uint16_t linenum; +extern uint16_t lineidx; +extern uint16_t stridx; +extern uint16_t comidx; +extern uint16_t inc_file; /* Number of included files. */ +extern uint16_t inc_count; + +struct bc { + uint64_t progsize; + uint64_t datasize; +}; + +typedef struct bc bytecount; + +extern uint8_t defined; +extern uint8_t isfixup; + +extern line *find_line(uint32_t ln, uint8_t dbg); +extern uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg); + +extern uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t dbg); +extern token *skip_expr(token *t, uint8_t dbg); +extern uint64_t parse_tokens(token *tm, line **l, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg); +extern token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s); +extern void assemble(line *ln, bytecount *bc, uint8_t dbg); +extern void fix_symtree(line *l); +extern void cleanup(); diff --git a/lexer/backup/asmmon.h b/lexer/backup/asmmon.h new file mode 100644 index 0000000..430d37c --- /dev/null +++ b/lexer/backup/asmmon.h @@ -0,0 +1,794 @@ +#include "opcode.h" +#include <ctype.h> +#include <string.h> + +#define MAX_TOK 0x1000 + +typedef struct tok token ; +typedef struct ln line ; +typedef struct sym symbol ; +typedef struct fix fixup ; +typedef struct inst instruction ; +typedef struct op operand ; + + +struct tok { + token *next; /* Pointer to the next token. */ + uint8_t id; /* Token ID. */ + uint8_t type; /* Token type ID. */ + + uint8_t tab; /* Number of tabs. */ + uint8_t space; /* Number of spaces. */ + + uint8_t subtab; /* Number of sub-token tabs. */ + uint8_t subspace; /* Number of sub-token spaces. */ + + uint8_t digits; /* Number of digits. */ + + /* Token value(s). */ + union { + symbol *sym; + char *str; + uint8_t byte ; + uint16_t word ; + uint32_t dword; + uint64_t qword; + }; +}; + +struct ln { + line *next; /* Pointer to the next line. */ + token *tok; /* The token(s) for this line. */ + uint16_t count; /* Total tokens for this line. */ + uint16_t bline; /* Number of blank lines. */ + uint32_t linenum; /* Line number. */ + uint64_t addr; /* The address of this line. */ +}; + + + +struct fix { + fixup *next; + symbol *s; + token *t; + uint64_t adr; +}; + +struct sym { + symbol *next; + symbol *prev; + symbol *down; + symbol *up; + uint16_t count; + uint64_t val; + uint8_t isstruct : 1; + uint8_t isanon : 1; + uint8_t def : 1; + char *name; + uint16_t id; +}; + +struct inst { + uint32_t am; /* Addressing modes. */ + uint8_t op; /* Base value used to get the actual opcode. */ +}; + +struct op { + uint8_t type; /* Operand Type. 0 = register, 1 = memory. */ + uint8_t id; /* Operand Type ID 1. 4 bits. */ + uint8_t id2[2]; /* Operand Type ID 2. 16 bits. */ + uint64_t value; /* Value of operand (used only by memory operands). */ +}; + + +extern char lexeme[]; +extern char *string[]; +extern char *comment[]; +extern uint16_t incl[]; +extern line *lines; +extern line *last_line; +extern token *tokens; +extern token *last_tok; +extern symbol *symbols; +extern symbol *last_sym; +extern symbol *locals; +extern symbol *last_loc; +extern fixup *fixups; +extern fixup *last_fix; + +extern uint8_t lex_type; + +enum dir { + DIR_ORG, + DIR_BYTE, + DIR_WORD, + DIR_DWORD, + DIR_QWORD, + DIR_INCLUDE, + DIR_RES, + DIR_STRUCT, + DIR_UNION, + DIR_ENDSTRUCT, + DIR_ENDUNION +}; + +enum token { + TOK_DIR, + TOK_LOCAL, + TOK_LABEL, + TOK_SYM, + TOK_EXPR, + TOK_CSV, + TOK_STRING, + TOK_CHAR, + TOK_IND, + TOK_IMM, + TOK_BREG, + TOK_OPCODE, + TOK_EXTOP, + TOK_ORTHO, + TOK_REG, + TOK_MEM, + TOK_CC, + TOK_RS, + TOK_OF, + TOK_COMMENT, + TOK_HEX, + TOK_DEC, + TOK_BIN, + TOK_INCLUDE, + TOK_STRUCT, + TOK_UNION, + TOK_MEMBER + +}; + +enum pre_token { + PTOK_DOT, + PTOK_AT, + PTOK_COLON, + PTOK_EQU, + PTOK_PLUS, + PTOK_MINUS, + PTOK_GT, + PTOK_LT, + PTOK_PIPE, + PTOK_LBRACK, + PTOK_RBRACK, + PTOK_COMMA, + PTOK_B, + PTOK_E, + PTOK_X, + PTOK_Y, + PTOK_S, + PTOK_P, + PTOK_A, + PTOK_C, + PTOK_D, + PTOK_F, + PTOK_R, + PTOK_DQUOTE, + PTOK_SQUOTE, + PTOK_HASH, + PTOK_SCOLON, + PTOK_DOLLAR, + PTOK_PERCENT, + PTOK_NUMBER, + PTOK_ALPHA, + PTOK_OTHER +}; + +enum expr { + EXPR_PLUS, + EXPR_MINUS, + EXPR_LOW, + EXPR_HIGH, + EXPR_OR, + EXPR_LSHFT, + EXPR_RSHFT, + EXPR_NONE +}; + +enum addrmode { + AM_IMM = (1 << 0), + AM_ZM = (1 << 1), + AM_ZMX = (1 << 2), + AM_ZMY = (1 << 3), + AM_IND = (1 << 4), + AM_INDX = (1 << 5), + AM_INDY = (1 << 6), + AM_ABS = (1 << 7), + AM_REL = (1 << 8), + AM_BREG = (1 << 9), + AM_IMPL = (1 << 10), + AM_INDX2 = (1 << 11), + AM_ZM2 = (1 << 12), + AM_EIND = (1 << 13), + AM_EIND2 = (1 << 14), + AM_ABY = (1 << 15), + AM_ABX = (1 << 16), + AM_AIND = (1 << 17), + AM_AINDY = (1 << 18), + AM_AINDX = (1 << 19), + AM_ORTHO = (1 << 20), + AM_ORTHO2 = (1 << 21) +}; + +enum ind { + CMP_IND = 0, + CMP_IDY = 1, + CMP_IDX = 2, + CPB_IND = 3, + CPB_IDY = 4, + CPB_IDX = 5, + JMP_IND = 6, + JSR_IND = 7, + LDA_IND = 8, + LDA_IDY = 9, + LDB_IND = 10, + LDB_IDY = 11, + LDX_IND = 12, + LDY_IND = 13, + STA_IND = 14, + STA_IDY = 15, + STB_IND = 16, + STB_IDY = 17, + STX_IND = 18, + STY_IND = 19 +}; + +enum eind { + DEC_EIND, + INC_EIND, + STY_EIND, + STA_EIND, + STB_EIND, + LDX_EIND, + STX_EIND, + CPB_EIND, + CPX_EIND, + CPY_EIND +}; + +static const uint8_t ind_ops[20] = { + [CMP_IND] = CMP_IN, + [CMP_IDY] = CMP_IY, + [CMP_IDX] = CMP_IX, + [CPB_IND] = CPB_IN, + [CPB_IDY] = CPB_IY, + [CPB_IDX] = CPB_IX, + [JMP_IND] = JMP_IN, + [JSR_IND] = JSR_IN, + [LDA_IND] = LDA_IN, + [LDA_IDY] = LDA_IY, + [LDB_IND] = LDB_IN, + [LDB_IDY] = LDB_IY, + [LDX_IND] = LDX_IN, + [LDY_IND] = LDY_IN, + [STA_IND] = STA_IN, + [STA_IDY] = STA_IY, + [STB_IND] = STB_IN, + [STB_IDY] = STB_IY, + [STX_IND] = STX_IN, + [STY_IND] = STY_IN +}; + +static const uint8_t eind_base_ops[10] = { + [DEC_EIND] = DEC_E, + [INC_EIND] = INC_E, + [STY_EIND] = STY_E, + [STA_EIND] = STA_E, + [STB_EIND] = STB_E, + [LDX_EIND] = LDX_E, + [STX_EIND] = STX_E, + [CPB_EIND] = CPB_E, + [CPX_EIND] = CPX_E, + [CPY_EIND] = CPY_E +}; + +static const instruction inst[OPNUM] = { + [ADC] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x01}, + [AND] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x41}, + [ASR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x62}, + [BCC] = {(AM_REL), 0xA0}, + [BCS] = {(AM_REL), 0x90}, + [BEQ] = {(AM_REL), 0xB0}, + [BNE] = {(AM_REL), 0xC0}, + [BNG] = {(AM_REL), 0x80}, + [BPO] = {(AM_REL), 0x70}, + [BRA] = {(AM_REL), 0xF0}, + [BRK] = {(AM_IMPL), 0x69}, + [BVC] = {(AM_REL), 0xE0}, + [BVS] = {(AM_REL), 0xD0}, + [CLC] = {(AM_IMPL), 0x09}, + [CLI] = {(AM_IMPL), 0x29}, + [CLV] = {(AM_IMPL), 0x49}, + [CMP] = {(AM_IMM|AM_ZM|AM_IND|AM_INDY|AM_ABS|AM_BREG|AM_INDX2|AM_EIND|AM_ORTHO), 0x82}, + [CPB] = {(AM_IMM|AM_ZM|AM_IND|AM_INDY|AM_ABS|AM_INDX2|AM_EIND2), 0x04}, + [CPS] = {(AM_IMPL), 0x00}, + [CPX] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x24}, + [CPY] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x44}, + [DEB] = {(AM_IMPL), 0x99}, + [DEC] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND2|AM_ORTHO2), 0x84}, + [DEX] = {(AM_IMPL), 0xB9}, + [DEY] = {(AM_IMPL), 0x79}, + [DIV] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x42}, + [INB] = {(AM_IMPL), 0xA9}, + [INC] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND2|AM_ORTHO2), 0xA4}, + [INX] = {(AM_IMPL), 0xC9}, + [INY] = {(AM_IMPL), 0x89}, + [JMP] = {(AM_ABS|AM_IND|AM_ZM2|AM_EIND), 0x00}, + [JSR] = {(AM_ABS|AM_IND|AM_ZM2|AM_EIND), 0x20}, + [LDA] = {(AM_IMM|AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND), 0xC2}, + [LDB] = {(AM_IMM|AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND), 0xE2}, + [LDX] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x64}, + [LDY] = {(AM_IMM|AM_ZM|AM_IND|AM_ABS|AM_EIND), 0xA2}, + [LSL] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0xA1}, + [LSR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0xC1}, + [MUL] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x22}, + [NOP] = {(AM_IMPL), 0xEA}, + [ORA] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND), 0x61}, + [PHA] = {(AM_IMPL), 0x8E}, + [PHB] = {(AM_IMPL), 0xAE}, + [PHP] = {(AM_IMPL), 0x6E}, + [PHX] = {(AM_IMPL), 0xEE}, + [PHY] = {(AM_IMPL), 0xCE}, + [PLA] = {(AM_IMPL), 0x9E}, + [PLB] = {(AM_IMPL), 0xBE}, + [PLP] = {(AM_IMPL), 0x7E}, + [PLX] = {(AM_IMPL), 0xFE}, + [PLY] = {(AM_IMPL), 0xDE}, + [ROL] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0xE1}, + [ROR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x02}, + [RTI] = {(AM_IMPL), 0x60}, + [RTS] = {(AM_IMPL), 0x50}, + [SBC] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x21}, + [SEC] = {(AM_IMPL), 0x19}, + [SEI] = {(AM_IMPL), 0x39}, + [STA] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND2), 0x28}, + [STB] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_EIND2), 0x48}, + [STX] = {(AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x68}, + [STY] = {(AM_ZM|AM_IND|AM_ABS|AM_EIND2), 0x08}, + [TAB] = {(AM_IMPL), 0x0A}, + [TAX] = {(AM_IMPL), 0x4A}, + [TAY] = {(AM_IMPL), 0x2A}, + [TBA] = {(AM_IMPL), 0x1A}, + [TSX] = {(AM_IMPL), 0x8A}, + [TXA] = {(AM_IMPL), 0x5A}, + [TXS] = {(AM_IMPL), 0x9A}, + [TXY] = {(AM_IMPL), 0x7A}, + [TYA] = {(AM_IMPL), 0x3A}, + [TYX] = {(AM_IMPL), 0x6A}, + [WAI] = {(AM_IMPL), 0x59}, + [XOR] = {(AM_IMM|AM_ZM|AM_ABS|AM_BREG|AM_EIND|AM_ORTHO), 0x81} +}; + +static const instruction ext_inst[EXT_OPNUM] = { + [LEA] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_ABX|AM_ABY|AM_AIND|AM_AINDX|AM_AINDY|AM_ORTHO), 0x03}, + [PEA] = {(AM_ZM|AM_ZMX|AM_ZMY|AM_IND|AM_INDX|AM_INDY|AM_ABS|AM_ABX|AM_ABY|AM_AIND|AM_AINDX|AM_AINDY|AM_ORTHO2), 0x23}, + [ADD] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO), 0x06}, + [SUB] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO), 0x26}, + [ADE] = {(AM_IMM|AM_ZM|AM_ABS), 0x46}, + [SBE] = {(AM_IMM|AM_ZM|AM_ABS), 0x66}, + [ADS] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x86}, + [SBS] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0xA6}, + [NOT] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0xC6}, + [LLM] = {(AM_ZM|AM_ABS|AM_EIND), 0x48}, + [LRM] = {(AM_ZM|AM_ABS|AM_EIND), 0x68}, + [RLM] = {(AM_ZM|AM_ABS|AM_EIND), 0x88}, + [RRM] = {(AM_ZM|AM_ABS|AM_EIND), 0xA8}, + [ARM] = {(AM_ZM|AM_ABS|AM_EIND), 0xC8}, + [PHE] = {(AM_IMPL), 0x6B}, + [PLE] = {(AM_IMPL), 0x7B}, + [CPE] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x08}, + [ICE] = {(AM_ZM|AM_ABS|AM_EIND), 0x28}, + [LDS] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x40}, + [DEE] = {(AM_IMPL), 0x8B}, + [INE] = {(AM_IMPL), 0x9B}, + [DES] = {(AM_IMPL), 0xAB}, + [INS] = {(AM_IMPL), 0xBB}, + [STS] = {(AM_ZM|AM_ABS|AM_EIND), 0xA0}, + [STE] = {(AM_ZM|AM_ABS), 0xC0}, + [STZ] = {(AM_ZM|AM_ABS|AM_EIND), 0xE0}, + [SCO] = {(AM_IMM|AM_ZM|AM_ABS|AM_EIND), 0x60}, + [ECO] = {(AM_ZM|AM_ABS|AM_EIND), 0x80}, + [CLZ] = {(AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0x05}, + [CLO] = {(AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0x25}, + [BIT] = {(AM_ZM|AM_ABS|AM_EIND), 0x45}, + [MMV] = {(AM_IMPL), 0xCB}, + [SWP] = {(AM_IMPL|AM_ZM|AM_ABS|AM_EIND|AM_ORTHO2), 0xE6}, + [PCN] = {(AM_ZM|AM_ABS|AM_EIND|AM_ORTHO), 0xE8}, + [REP] = {(AM_REL), 0xBD}, + [REQ] = {(AM_REL), 0xCD}, + [RNE] = {(AM_REL), 0xDD}, + [LNG] = {(AM_IMM|AM_EIND2), 0x0D}, + [LPO] = {(AM_IMM|AM_EIND2), 0x2D}, + [LCS] = {(AM_IMM|AM_EIND2), 0x4D}, + [LCC] = {(AM_IMM|AM_EIND2), 0x6D}, + [LEQ] = {(AM_IMM|AM_EIND2), 0x8D}, + [LNE] = {(AM_IMM|AM_EIND2), 0xAD}, + [SNG] = {(AM_EIND2), 0x1D}, + [SPO] = {(AM_EIND2), 0x3D}, + [SCS] = {(AM_EIND2), 0x5D}, + [SCC] = {(AM_EIND2), 0x7D}, + [SEQ] = {(AM_EIND2), 0x9D}, + [SNE] = {(AM_EIND2), 0xBD} +}; + +static const instruction ortho_inst[ORTHO_OPNUM] = { + [MNG] = {(AM_ORTHO), 0x00}, + [MPO] = {(AM_ORTHO), 0x20}, + [MCS] = {(AM_ORTHO), 0x40}, + [MCC] = {(AM_ORTHO), 0x60}, + [MEQ] = {(AM_ORTHO), 0x80}, + [MNE] = {(AM_ORTHO), 0xA0}, + [MVS] = {(AM_ORTHO), 0xC0}, + [MVC] = {(AM_ORTHO), 0xE0}, + [OR ] = {(AM_ORTHO), 0x61}, + [MOV] = {(AM_ORTHO), 0xA2}, + [IML] = {(AM_ORTHO), 0xC2}, + [IDV] = {(AM_ORTHO), 0xE2}, + [PSH] = {(AM_ORTHO2), 0x04}, + [PUL] = {(AM_ORTHO2), 0x24}, + [NEG] = {(AM_ORTHO2), 0x64}, + [SET] = {(AM_ORTHO2), 0x05} +}; + +static const char *dir_t[11] = { + [ 0] = "org", + [ 1] = "byte", + [ 2] = "word", + [ 3] = "dword", + [ 4] = "qword", + [ 5] = "include", + [ 6] = "res", + [ 7] = "struct", + [ 8] = "union", + [ 9] = "endstruct", + [10] = "endunion" +}; + +static const char *rs_t[4] = { + [0] = "", + [1] = ".w", + [2] = ".d", + [3] = ".q" +}; + +static const char *lex_tok[] = { + [TOK_DIR ] = "TOK_DIR", + [TOK_LOCAL ] = "TOK_LOCAL", + [TOK_LABEL ] = "TOK_LABEL", + [TOK_SYM ] = "TOK_SYM", + [TOK_EXPR ] = "TOK_EXPR", + [TOK_CSV ] = "TOK_CSV", + [TOK_STRING ] = "TOK_STRING", + [TOK_CHAR ] = "TOK_CHAR", + [TOK_IND ] = "TOK_IND", + [TOK_IMM ] = "TOK_IMM", + [TOK_BREG ] = "TOK_BREG", + [TOK_OPCODE ] = "TOK_OPCODE", + [TOK_EXTOP ] = "TOK_EXTOP", + [TOK_ORTHO ] = "TOK_ORTHO", + [TOK_REG ] = "TOK_REG", + [TOK_MEM ] = "TOK_MEM", + [TOK_CC ] = "TOK_CC", + [TOK_RS ] = "TOK_RS", + [TOK_OF ] = "TOK_OF", + [TOK_COMMENT] = "TOK_COMMENT", + [TOK_HEX ] = "TOK_HEX", + [TOK_DEC ] = "TOK_DEC", + [TOK_BIN ] = "TOK_BIN", + [TOK_INCLUDE] = "TOK_INCLUDE", + [TOK_STRUCT ] = "TOK_STRUCT", + [TOK_UNION ] = "TOK_UNION", + [TOK_MEMBER ] = "TOK_MEMBER" +}; + +static const char *adrmode[] = { + [IMM ] = "IMM", + [ZM ] = "ZM", + [ZMX ] = "ZMX", + [ZMY ] = "ZMY", + [IND ] = "IND", + [INDX ] = "INDX", + [INDY ] = "INDY", + [ABS ] = "ABS", + [REL ] = "REL", + [BREG ] = "BREG", + [IMPL ] = "IMPL", + [ABSX ] = "ABSX", + [ABSY ] = "ABSY", + [AIND ] = "AIND", + [AINDX] = "AINDX", + [AINDY] = "AINDY", + [EIND ] = "EIND" +/* [ZMR ] = "ZMR", + [ZINDR] = "ZINDR", + [ZRIND] = "ZRIND", + [AINDR] = "AINDR", + [AINDY] = "ARIND",*/ +}; + +static const char *mne[OPNUM] = { + [ADC] = "ADC", + [AND] = "AND", + [ASR] = "ASR", + [BCC] = "BCC", + [BCS] = "BCS", + [BEQ] = "BEQ", + [BNE] = "BNE", + [BNG] = "BNG", + [BPO] = "BPO", + [BRA] = "BRA", + [BRK] = "BRK", + [BVC] = "BVC", + [BVS] = "BVS", + [CLC] = "CLC", + [CLI] = "CLI", + [CLV] = "CLV", + [CMP] = "CMP", + [CPB] = "CPB", + [CPS] = "CPS", + [CPX] = "CPX", + [CPY] = "CPY", + [DEB] = "DEB", + [DEC] = "DEC", + [DEX] = "DEX", + [DEY] = "DEY", + [DIV] = "DIV", + [INB] = "INB", + [INC] = "INC", + [INX] = "INX", + [INY] = "INY", + [JMP] = "JMP", + [JSR] = "JSR", + [LDA] = "LDA", + [LDB] = "LDB", + [LDX] = "LDX", + [LDY] = "LDY", + [LSL] = "LSL", + [LSR] = "LSR", + [MUL] = "MUL", + [NOP] = "NOP", + [ORA] = "ORA", + [PHA] = "PHA", + [PHB] = "PHB", + [PHP] = "PHP", + [PHX] = "PHX", + [PHY] = "PHY", + [PLA] = "PLA", + [PLB] = "PLB", + [PLP] = "PLP", + [PLX] = "PLX", + [PLY] = "PLY", + [ROL] = "ROL", + [ROR] = "ROR", + [RTI] = "RTI", + [RTS] = "RTS", + [SBC] = "SBC", + [SEC] = "SEC", + [SEI] = "SEI", + [STA] = "STA", + [STB] = "STB", + [STX] = "STX", + [STY] = "STY", + [TAB] = "TAB", + [TAX] = "TAX", + [TAY] = "TAY", + [TBA] = "TBA", + [TSX] = "TSX", + [TXA] = "TXA", + [TXS] = "TXS", + [TXY] = "TXY", + [TYA] = "TYA", + [TYX] = "TYX", + [WAI] = "WAI", + [XOR] = "XOR" +}; + +static const char *ext_mne[EXT_OPNUM] = { + [LEA] = "LEA", + [PEA] = "PEA", + [ADD] = "ADD", + [SUB] = "SUB", + [ADE] = "ADE", + [SBE] = "SBE", + [ADS] = "ADS", + [SBS] = "SBS", + [NOT] = "NOT", + [LLM] = "LLM", + [LRM] = "LRM", + [RLM] = "RLM", + [RRM] = "RRM", + [ARM] = "ARM", + [PHE] = "PHE", + [PLE] = "PLE", + [CPE] = "CPE", + [ICE] = "ICE", + [LDS] = "LDS", + [DEE] = "DEE", + [INE] = "INE", + [DES] = "DES", + [INS] = "INS", + [STS] = "STS", + [STE] = "STE", + [STZ] = "STZ", + [SCO] = "SCO", + [ECO] = "ECO", + [CLZ] = "CLZ", + [CLO] = "CLO", + [BIT] = "BIT", + [MMV] = "MMV", + [SWP] = "SWP", + [PCN] = "PCN", + [REP] = "REP", + [REQ] = "REQ", + [RNE] = "RNE", + [LNG] = "LNG", + [LPO] = "LPO", + [LCS] = "LCS", + [LCC] = "LCC", + [LEQ] = "LEQ", + [LNE] = "LNE", + [SNG] = "SNG", + [SPO] = "SPO", + [SCS] = "SCS", + [SCC] = "SCC", + [SEQ] = "SEQ", + [SNE] = "SNE" +}; + +static const char *ortho_mne[ORTHO_OPNUM] = { + [MNG] = "MNG", + [MPO] = "MPO", + [MCS] = "MCS", + [MCC] = "MCC", + [MEQ] = "MEQ", + [MNE] = "MNE", + [MVS] = "MVS", + [MVC] = "MVC", + [OR ] = "OR", + [MOV] = "MOV", + [IML] = "IML", + [IDV] = "IDV", + [PSH] = "PSH", + [PUL] = "PUL", + [NEG] = "NEG", + [SET] = "SET" +}; + +static const char *set_cc[8] = { + "NG", + "PO", + "CS", + "CC", + "EQ", + "NE", + "VS", + "VC" +}; + +static const char *instdesc[OPNUM] = { + [ADC] = "ADd accumulator, with operand, Carry if needed.", + [AND] = "Bitwise AND accumulator, with operand.", + [ASR] = "Arithmetic Shift Right accumulator, with operand.", + [BCC] = "Branch if the Carry flag has been Cleared.", + [BCS] = "Branch if the Carry flag is Set.", + [BEQ] = "Branch if EQual (the zero flag has been set).", + [BNE] = "Branch if Not Equal (the zero flag has been cleared)", + [BNG] = "Branch if NeGative.", + [BPO] = "Branch if POsitive.", + [BRA] = "BRanch Always.", + [BRK] = "BReaKpoint", + [BVC] = "Branch if the oVerflow flag has been Cleared.", + [BVS] = "Branch if the oVerflow flag is Set.", + [CLC] = "CLear the Carry flag.", + [CLI] = "CLear the Interrupt flag.", + [CLV] = "CLear the oVerflow flag.", + [CMP] = "CoMPare acumulator, with operand.", + [CPB] = "ComPare the B register, with operand.", + [CPS] = "Clears the Processor Status register.", + [CPX] = "ComPare the X register, with operand.", + [CPY] = "ComPare the Y register, with operand.", + [DEB] = "DEcrement the B register.", + [DEC] = "DECrement accumulator, or memory.", + [DEX] = "DEcrement the X register.", + [DEY] = "DEcrement the Y register.", + [DIV] = "DIVide accumulator, with operand, and put the remainder into the B register.", + [INB] = "INcrement the B register.", + [INC] = "INCrement accumulator, or memory.", + [INX] = "INcrement the X register.", + [INY] = "INcrement the Y register.", + [JMP] = "JuMP to the address specified.", + [JSR] = "Jump to a SubRoutine.", + [LDA] = "LoaD the value from the operand, to the Accumulator.", + [LDB] = "LoaD the value from the operand, to the B register.", + [LDX] = "LoaD the value from the operand, to the X register.", + [LDY] = "LoaD the value from the operand, to the Y register.", + [LSL] = "Logical Shift Left accumulator, with operand.", + [LSR] = "Logical Shift Right accumulator, with operand.", + [MUL] = "MULtiply accumulator, with operand.", + [NOP] = "NO oPeration", + [ORA] = "Bitwise OR Accumulator, with operand.", + [PHA] = "PusH the number of bytes specified, from the Accumulator to the stack.", + [PHB] = "PusH the number of bytes specified, from the B register to the stack.", + [PHP] = "PusH the number of bytes specified, from the Processor status register to the stack.", + [PHX] = "PusH the number of bytes specified, from the X register to the stack.", + [PHY] = "PusH the number of bytes specified, from the Y register to the stack.", + [PLA] = "PuLl the number of bytes specified, from the stack, to the Accumulator.", + [PLB] = "PuLl the number of bytes specified, from the stack, to the B register.", + [PLP] = "PuLl the number of bytes specified, from the stack, to the Processor status register.", + [PLX] = "PuLl the number of bytes specified, from the stack, to the X register.", + [PLY] = "PuLl the number of bytes specified, from the stack, to the Y register.", + [ROL] = "ROtate Left accumulator, with operand.", + [ROR] = "ROtate Right accumulator, with operand.", + [RTI] = "ReTurn from an Interrupt.", + [RTS] = "ReTurn from a Subroutine.", + [SBC] = "SuBtract accumulator, with operand, Carry if needed", + [SEC] = "SEt the Carry flag.", + [SEI] = "SEt the Interrupt flag.", + [STA] = "STore the value from the Accumulator, in memory.", + [STB] = "STore the value from the B register, in memory.", + [STX] = "STore the value from the X register, in memory.", + [STY] = "STore the value from the Y register, in memory.", + [TAB] = "Transfer the value from the Accumulator, to the B register.", + [TAX] = "Transfer the value from the Accumulator, to the X register.", + [TAY] = "Transfer the value from the Accumulator, to the Y register.", + [TBA] = "Transfer the value from the Y register, to the Accumulator.", + [TSX] = "Transfer the value from the Stack pointer, to the X register.", + [TXA] = "Transfer the value from the X register, to the Accumulator.", + [TXS] = "Transfer the value from the X register, to the Stack pointer.", + [TXY] = "Transfer the value from the X register, to the Y register.", + [TYA] = "Transfer the value from the Y register, to the Accumulator.", + [TYX] = "Transfer the value from the Y register, to the X register.", + [WAI] = "WAIt for an interrupt", + [XOR] = "Bitwise XOR Accumulator, with operand." +}; + +static const uint8_t bitsize[4] = { + [0] = 0x07, + [1] = 0x0F, + [2] = 0x1F, + [3] = 0x3F +}; + +static const uint8_t amp[8] = { + [0] = 0x00, + [1] = 0x00, + [2] = 0x07, + [4] = 0x07, + [5] = 0x0B, + [6] = 0x0B, + [3] = 0x0F, + [7] = 0x0F +}; + + +extern uint16_t linenum; +extern uint16_t lineidx; +extern uint16_t stridx; +extern uint16_t comidx; +extern uint16_t inc_file; /* Number of included files. */ +extern uint16_t inc_count; + +struct bc { + uint64_t progsize; + uint64_t datasize; +}; + +typedef struct bc bytecount; + +extern uint8_t defined; +extern uint8_t isfixup; + +extern line *find_line(uint32_t ln, uint8_t dbg); +extern uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg); + +extern uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t dbg); +extern token *skip_expr(token *t, uint8_t dbg); +extern uint64_t parse_tokens(token *tm, line **l, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg); +extern token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s); +extern void assemble(line *ln, bytecount *bc, uint8_t dbg); +extern void fix_symtree(line *l); +extern void cleanup(); diff --git a/lexer/backup/assemble.c b/lexer/backup/assemble.c new file mode 100644 index 0000000..28825d1 --- /dev/null +++ b/lexer/backup/assemble.c @@ -0,0 +1,975 @@ +#include "asmmon.h" + +#define AM_ADDR (AM_ZM | AM_ZMX | AM_ZMY | \ + AM_IND | AM_INDX | AM_INDY | \ + AM_ABS | AM_ABX | AM_ABY | \ + AM_AIND | AM_AINDX | AM_AINDY | \ + AM_INDX2 | AM_ZM2) + +static const uint64_t mem_size = 0x04000000; /* Size of address space. */ + +token *tok_global; + +uint8_t isexpr(uint8_t type, uint8_t dbg) { + switch (type) { + case EXPR_PLUS: + case EXPR_MINUS: + case EXPR_LOW: + case EXPR_HIGH: + case EXPR_OR: + case EXPR_LSHFT: + case EXPR_RSHFT: + return 1; + default: + return 0; + } +} + +uint8_t get_rs(token *t, uint8_t dbg) { + if (t->id == TOK_RS) { + return t->type; + } else { + return 0xFF; + } +} + +uint8_t get_of(token *t, uint8_t dbg) { + if (t->id == TOK_OF) { + return t->type; + } else { + return 0xFF; + } +} + +uint8_t get_ind(uint8_t mne, uint8_t am, uint8_t dbg) { + uint8_t base_idx = 0; + uint8_t offset = 0; + switch (mne) { + case CMP: base_idx = CMP_IND; break; + case CPB: base_idx = CPB_IND; break; + case JMP: base_idx = JMP_IND; break; + case JSR: base_idx = JSR_IND; break; + case LDA: base_idx = LDA_IND; break; + case LDB: base_idx = LDB_IND; break; + case LDX: base_idx = LDX_IND; break; + case LDY: base_idx = LDY_IND; break; + case STA: base_idx = STA_IND; break; + case STB: base_idx = STB_IND; break; + case STX: base_idx = STX_IND; break; + case STY: base_idx = STY_IND; break; + } + switch (am) { + case IND : offset = 0; break; + case INDY: offset += 1; break; + case INDX: offset += 2; break; + } + return base_idx + offset; +} + +uint8_t get_eind(uint8_t mne, uint8_t dbg) { + switch (mne) { + case DEC: return DEC_EIND; + case INC: return INC_EIND; + case STY: return STY_EIND; + case STA: return STA_EIND; + case STB: return STB_EIND; + case LDX: return LDX_EIND; + case STX: return STX_EIND; + case CPB: return CPB_EIND; + case CPX: return CPX_EIND; + case CPY: return CPY_EIND; + } + return 0xFF; +} + +static void write_value(uint64_t value, uint64_t address, uint8_t size) { + if (address < mem_size) { + size = (size > 7) ? 7 : size; + #if 1 + if (size < 7) { + uint64_t mask = (-(uint64_t)1 >> ((7 - size) * 8)); + *(uint64_t *)(addr+address) = (*(uint64_t *)(addr+address) & ~mask) | (value & mask); + } else { + *(uint64_t *)(addr+address) = value; + } + #else + memcpy(addr+address, &value, size+1); + #endif + } +} + +uint64_t get_val(token *t, uint64_t addr, uint8_t size, uint8_t end_expr, uint8_t stop_reg, uint8_t dbg) { + uint64_t value = 0; + uint64_t tmp_val = 0; + uint8_t type = EXPR_NONE; + uint8_t isstart = 1; + do { + if (t->id == TOK_EXPR) { + type = t->type; + t = t->next; + } + if (stop_reg && t->id == TOK_REG) { + break; + } + switch (t->id) { + case TOK_HEX: + case TOK_DEC: + case TOK_BIN: + case TOK_CHAR: tmp_val = t->qword; t = t->next; break; + case TOK_SYM: + case TOK_LABEL: + for (; t->sym && t->sym->isstruct && t->next && t->next->id == TOK_SYM; t = t->next); + tmp_val = (t->sym) ? t->sym->val : addr; + t = t->next; + break; + } + if (end_expr != 0xFF && type == end_expr) { + break; + } + switch (type) { + case EXPR_PLUS : (isstart) ? (value = tmp_val) : (value += tmp_val); break; + case EXPR_MINUS: (isstart) ? (value = -tmp_val) : (value -= tmp_val); break; + case EXPR_OR : value |= tmp_val; break; + case EXPR_LSHFT: value <<= tmp_val; break; + case EXPR_RSHFT: value >>= tmp_val; break; + case EXPR_LOW : + value = tmp_val; + switch (size) { + default: + case 2 : value &= 0xFFFFFFFF; break; + case 1 : value &= 0x0000FFFF; break; + case 0 : value &= 0x000000FF; break; + } + break; + case EXPR_HIGH : + value = tmp_val; + switch (size) { + default: + case 2 : value >>= 0x20; break; + case 1 : value >>= 0x10; break; + case 0 : value >>= 0x08; break; + } + break; + case EXPR_NONE : value = tmp_val; break; + } + isstart = 0; + if (dbg) { + printf("get_val(): Value: $%"PRIX64", Expression type: $%X, Expression Value: $%"PRIX64".\n", value, type, tmp_val); + } + } while (t && t->id == TOK_EXPR && isexpr(t->type, dbg)); + return value; +} + +token *skip_expr(token *t, uint8_t end_expr, uint8_t stop_reg, uint8_t dbg) { + do { + t = (t->id == TOK_EXPR) ? t->next : t; + if (stop_reg && t->id == TOK_REG) { + break; + } + switch (t->id) { + case TOK_HEX : + case TOK_DEC : + case TOK_BIN : + case TOK_CHAR : + case TOK_SYM : + case TOK_LABEL: t = t->next; break; + } + if (end_expr != 0xFF && t->id == TOK_EXPR && t->type == end_expr) { + break; + } + } while (t && t->id == TOK_EXPR && isexpr(t->type, dbg)); + return t; +} + + + +uint8_t get_directivesize(uint8_t type, uint8_t dbg) { + switch (type) { + case DIR_QWORD: return 3; + case DIR_DWORD: return 2; + case DIR_WORD : return 1; + case DIR_BYTE : return 0; + } + return 0; +} + +uint16_t handle_struct(line **ln, uint64_t address, uint16_t offset, uint8_t dbg) { + uint8_t is_struct = 0; + uint8_t done = 0; + uint8_t ismember = 0; + uint16_t size = 0; + uint16_t member_size = 0; + line *l = *ln; + symbol *strct = NULL; + token *tok = l->tok; + + for (uint8_t found = 0; tok && !found; tok = tok->next) { + switch (tok->id) { + case TOK_DIR: + is_struct = (tok->type == DIR_STRUCT); + found = (tok->type == DIR_STRUCT || tok->type == DIR_UNION); + break; + case TOK_STRUCT: is_struct = 1; + case TOK_UNION : found = 1; break; + } + } + if (tok != NULL) { + strct = tok->sym; + } + + if (l && l->next) { + l = l->next; + } + + for (; l && !done; l = l->next) { + token *t = l->tok; + token *start = t; + symbol *member; + for (; t && !done; t = t->next) { + switch (t->id) { + case TOK_MEMBER: ismember = 1; member = t->sym; break; + case TOK_DIR : + ismember = (t->type == DIR_STRUCT || t->type == DIR_UNION) ? 1 : ismember; + done = ((is_struct && t->type == DIR_ENDSTRUCT) || (!is_struct && t->type == DIR_ENDUNION)); + if (!done && ismember) { + switch (t->type) { + case DIR_BYTE : member_size = 1; break; + case DIR_WORD : member_size = 2; break; + case DIR_DWORD : member_size = 4; break; + case DIR_QWORD : member_size = 8; break; + case DIR_UNION : + case DIR_STRUCT: member_size = handle_struct(&l, address, offset, dbg); break; + case DIR_RES : member_size = get_val(t, address, 3, 0xFF, 0, dbg); t = skip_expr(t, 0xFF, 0, dbg); break; + } + if (member && t->type != DIR_UNION && t->type != DIR_STRUCT) { + member->val = offset; + } + if (is_struct) { + size += member_size; + offset += member_size; + } else if (size < member_size) { + size = member_size; + } + } + ismember = 0; + break; + } + } + if (done) { + break; + } + } + *ln = l; + if (strct != NULL) { + strct->val = size; + } + return size; +} + +uint64_t handle_directive(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { + union reg val; + uint8_t c = 0; + uint8_t tmp = 0; + uint8_t type = t->type; + uint64_t tmpaddr = address; + t = t->next; + for (; t; t = t->next) { + tmp = 0; + switch (t->id) { + case TOK_HEX: + case TOK_DEC: + case TOK_BIN: + case TOK_CHAR: + case TOK_SYM: + case TOK_LABEL: + val.u64 = get_val(t, tmpaddr, get_directivesize(type, dbg), 0xFF, 0, dbg); + switch (type) { + case DIR_QWORD: tmp = 8; break; + case DIR_DWORD: tmp = 4; break; + case DIR_WORD : tmp = 2; break; + case DIR_BYTE : tmp = 1; break; + } + write_value(val.u64, tmpaddr, tmp-1); + tmpaddr += tmp; + bc->datasize += tmp; + if (t->next && t->next->id == TOK_EXPR && isexpr(t->next->type, dbg)) { + t = skip_expr(t, 0xFF, 0, dbg); + } + break; + case TOK_STRING: + if (type == DIR_BYTE) { + for (uint16_t k = 0; t->str[k] != '\0'; k++) { + switch (t->str[k]) { + case '\\': + switch (t->str[k+1]) { + case 'n' : c = '\n'; break; + case 'r' : c = '\r'; break; + case 't' : c = '\t'; break; + case '\"': c = '\"'; break; + case '\'': c = '\''; break; + case '\\': c = '\\'; break; + case '0' : c = '\0'; break; + } + k++; + break; + default: c = t->str[k]; break; + } + if (isasm) { + addr[tmpaddr] = c; + } + tmpaddr++; + bc->datasize++; + } + if (isasm) { + addr[tmpaddr] = '\0'; + } + tmpaddr++; + bc->datasize++; + } + break; + } + if (t == NULL) { + break; + } + } + return tmpaddr; +} + +static uint8_t write_inst(uint8_t prefix, uint8_t ext_prefix, uint8_t opcode, uint64_t value, uint64_t address, uint8_t size, uint8_t isasm, uint8_t dbg) { + uint8_t inst_size = 0; + union reg ins; + if (prefix & 3) { + ins.u8[inst_size++] = prefix; + } + if ((ext_prefix & 0x0D) == 0x0D) { + ins.u8[inst_size++] = ext_prefix; + } + ins.u8[inst_size++] = opcode; + if (isasm) { + write_value(ins.u64, address, inst_size-1); + if (size) { + write_value(value, address+inst_size, size-1); + } + } + inst_size += size; + return inst_size; +} + +void get_operands(token *t, operand *op, uint64_t address, uint8_t rs, uint8_t dbg) { + uint8_t op_type; + uint8_t op_inst; + switch (t->id) { + case TOK_OPCODE: + case TOK_EXTOP : + case TOK_ORTHO : + op_type = tok->id; + op_inst = tok->byte; + t = t->next; + break; + } + int i = 0; + int old_i = -1; + uint8_t expr_type = 0xFF; + uint8_t stop_reg = 0; + uint64_t value = 0; + uint8_t reg = 0; + uint8_t got_value = 0; + uint8_t is_sib = 0; + for (; t; t = t->next) { + switch (t->id) { + case TOK_HEX : + case TOK_DEC : + case TOK_BIN : + case TOK_SYM : + case TOK_LABEL: + if (!got_value) { + expr_type = (expr_type == 0xFF && t->next && t->next->id == TOK_EXPR) ? t->next->type : expr_type; + switch (expr_type) { + default : stop_reg = 1; break; + case EXPR_MUL : stop_reg = 0; break; + } + is_sib = !stop_reg; + value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_reg) ? expr_type : 0xFF, stop_reg, dbg); + got_value = 1; + } else { + got_value = 0; + } + /* Falls Through. */ + case TOK_MEM: + if (old_i != i) { + op[i].type = 1; /* Set type to memory. */ + op[i].id = (t->id == TOK_MEM) ? t->type : op[i].id; + op[i].id = (is_sib) ? MEM_SIB : op[i].id; + op[i].id2[0] = (is_sib) ? value : op[i].id2[0]; + old_i = i; + } else { + } + op[i].value = (!is_sib && got_value) ? value : op[i].value; + if (stop_reg && got_value) { + t = skip_expr(t, 0xFF, stop_reg, dgb); + } + i += (t->next && t->next->id != TOK_EXPR); + break; + case TOK_REG: + if (old_i != i) { + op[i].type = 0; /* Set type to register. */ + op[i].id = t->type; + old_i = i; + } else { + op[i].id = (op[i].id == MEM_IND) ? MEM_RIND : op[i].id; + is_sib = (op[i].id == MEM_SIB); + op[i].id2[is_sib] = (t->type << (reg*4)); + reg++; + reg = (reg > 1) ? 0 : reg; + } + i += (t->next && t->next->id != TOK_EXPR); + break; + case TOK_EXPR: + expr_type = t->type; + switch (expr_type) { + default : stop_reg = 1; break; + case EXPR_MUL : stop_reg = 0; break; + } + if (!got_value) { + value = get_val(t, address, (rs != 0xFF) ? rs : 0, (!stop_reg) ? expr_type : 0xFF, stop_reg, dbg); + got_value = 1; + } else { + got_value = 0; + } + break; + } + } +} + +uint64_t handle_opcode(token *t, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { + union reg val; + uint8_t opsize; + uint8_t id; + uint8_t instr; + uint8_t opcode; + uint8_t ext_prefix = 0; + uint8_t type; + uint32_t am = 0; + uint8_t tmp = 0; + uint8_t prefix = 0; + uint8_t rs = 0; + uint8_t of = 0; + uint8_t tmp_prefix = 0; + uint8_t inst_size = 0; + val.u64 = 0; + instruction ins; + operand op[2]; + memset(op, 0xFF, sizeof(op)); + + for (; t; t = t->next) { + if (t->id == TOK_OPCODE || t->id == TOK_EXTOP) { + id = t->id; + instr = t->byte; + type = t->type; + } else { + break; + } + tmp = 0; + opsize = 1; + opcode = 0; + if (t->next) { + rs = get_rs(t->next, dbg); + t = (rs != 0xFF) ? t->next : t; + if (t->next) { + of = get_of(t->next, dbg); + t = (of != 0xFF) ? t->next : t; + } + } + get_operands(t, op, address, rs, dbg); + if (rs != 0xFF || of != 0xFF) { + tmp_prefix = (rs != 0xFF) ? (rs << 0) : tmp_prefix; + tmp_prefix |= (of != 0xFF) ? (of << 2) : tmp_prefix; + } + prefix = (tmp_prefix) ? ((tmp_prefix << 4) | 3) : 0; + uint8_t isincdec = (instr == INC || instr == DEC); + uint8_t isimplied = (op[0].type == 0xFF); + switch (id) { + case TOK_OPCODE: ins = inst[instr]; break; + case TOK_EXTOP : ins = ext_inst[instr]; break; + case TOK_ORTHO : ins = ortho_inst[instr]; break; + } + am = ins.am; + uint8_t is_eind = (op[0].type && op[0].id == MEM_RIND && op[0].id2[0] == REG_E); + uint8_t is_mem = (op[0].type && op[0].id != MEM_IMM); + uint8_t is_idx = (is_mem && !op[1].type && (op[1].id == REG_X || op[1].id == REG_Y)); + if (id == TOK_EXTOP || (id == TOK_OPCODE && is_eind)) { + ext_prefix = 0x0D; + } else if (!is_idx) { + ext_prefix = 0x1D; + } + if ((am & AM_IMPL) && isimplied) { + type = IMPL; + } else { + if (ins.am & AM_REL) { + type = REL; + } + } + opcode = ins.op; + uint64_t saveaddr = address; + uint64_t max_val = 0; + uint8_t i = 0; + uint8_t j = 1; + uint8_t type2 = 0xFF; + + switch (type) { + case REG_B: + case IMPL: + case REG_E: + if (id == TOK_OPCODE && instr == CPS) { + rs = 0; + } + if ((am & (AM_IMPL|AM_BREG|AM_EIND|AM_EIND2))) { + if ((type == EIND) && (am & AM_EIND|AM_EIND2)) { + int eind_type = ((am & AM_EIND2) != 0); + switch (eind_type) { + case 0: opcode = (id == TOK_EXTOP) ? opcode+0x14 : opcode+0x10; break; + case 1: opcode = (id == TOK_EXTOP) ? opcode+0x01 : eind_base_ops[get_eind(instr, dbg)]; break; + } + } + opcode = ((am & AM_BREG) && type == BREG) ? opcode+0x14 : opcode; + } + break; + case REL: + case IMM: + if (am & (AM_IMM|AM_REL|AM_ORTHO|AM_ORTHO2)) { + rs = (rs != 0xFF) ? rs : 0; + tmp = (1 << rs); + if (type == REL) { + uint64_t max_sign = 0; + uint8_t offset = 1; + uint64_t tmp_val; + tmp_val = val.u64; + offset += (prefix != 0); + tmp_val -= offset+tmp; + tmp_val -= address; + switch (rs) { + default: max_sign = (int8_t )(1 << 7); break; + case 1 : max_sign = (int16_t)(1 << 15); break; + case 2 : max_sign = (int32_t)(1 << 31); break; + case 3 : max_sign = (int64_t)((uint64_t)1 << 63); break; + } + if ((int64_t)tmp_val > ~(int64_t)max_sign || (int64_t)tmp_val < (int64_t)max_sign) { + offset += (!rs); + rs += (rs <= 3); + tmp = (1 << rs); + tmp_val = val.u64; + tmp_val -= offset+tmp; + tmp_val -= address; + prefix = ((rs << 4) | 3); + } + val.u64 = tmp_val; + } + } + break; + default: + if (of != 0xFF) { + i = 8; + for (; i <= 64; i += 8, j++) { + max_val |= ((uint64_t)1 << (i-1)); + if ((int64_t)val.u64 >= ~(int64_t)(max_val) || (int64_t)val.u64 <= (int64_t)(max_val)) { + opsize = j; + break; + } + } + } else { + for (; i <= 64; i += 8, j++) { + max_val |= (0xFF << i); + if (val.u64 <= max_val) { + opsize = j; + break; + } + } + } + type2 = type; + if (type == 0xFF || (id == TOK_EXTOP && type2 != 0xFF)) { + switch (opsize-1) { + case 0: case 2: case 5: case 3: type = ZM ; break; + case 1: case 4: case 6: case 7: type = ABS; break; + } + } + switch (type2) { + case ZMX : type = (type == ABS) ? ABSX : type2; break; + case ZMY : type = (type == ABS) ? ABSY : type2; break; + case IND : type = (type == ABS) ? AIND : type2; break; + case INDX: type = (type == ABS) ? AINDX : type2; break; + case INDY: type = (type == ABS) ? AINDY : type2; break; + } + if (opsize) { + uint8_t is_abs = 0; + switch (type) { + case ABS : + case ABSX : + case ABSY : + case AIND : + case AINDX: + case AINDY: is_abs = 1; break; + + } + if (!is_abs || (type2 != 0xFF && type == type2)) { + switch (opsize) { + case 2: opsize = 3; break; + case 5: opsize = 6; break; + } + } + prefix |= amp[opsize-1]; + } + if (am & AM_ADDR|AM_ORTHO|AM_ORTHO2) { + switch (type) { + case ZM: + if (am & AM_ZM) { + opcode += 0x04; + } else if (am & AM_ZM2) { + opcode += 0x20; + } + break; + case ZMX: + if (am & AM_ZMX) { + opcode += (id == TOK_OPCODE) ? 0x06 : 0x54; + } + break; + case ZMY: + if (am & AM_ZMY) { + opcode += 0x14; + } + break; + case INDX: + if (am & AM_INDX) { + opcode += (id == TOK_OPCODE) ? 0x16 : 0x94; + break; + } + /* Falls Through. */ + case IND: + case INDY: + if ((id == TOK_OPCODE) && (am & (AM_IND|AM_INDY|AM_INDX2))) { + opcode = ind_ops[get_ind(instr, type, dbg)]; + } else { + opcode += (type == IND) ? 0x44 : 0x84; + } + break; + case ABS: + if (am & AM_ABS) { + opcode += 0x10; + } + break; + case ABSX: + if (am & AM_ABX) { + opcode += 0x50; + } + break; + case ABSY: + if (am & AM_ABY) { + opcode += 0x00; + } + break; + case AIND: + if (am & AM_AIND) { + opcode += 0x40; + } + break; + case AINDX: + if (am & AM_AINDX) { + opcode += 0x90; + } + break; + case AINDY: + if (am & AM_AINDY) { + opcode += 0x80; + } + break; + + } + tmp = opsize; + } + break; + } + inst_size = write_inst(prefix, ext_prefix, opcode, val.u64, address, tmp, isasm, dbg); + address += inst_size; + bc->progsize += inst_size; + } + return address; +} + +uint64_t parse_tokens(token *t, line **l, bytecount *bc, uint8_t isasm, uint64_t address, uint8_t dbg) { + for (; t; t = t->next) { + switch (t->id) { + case TOK_DIR: + switch (t->type) { + case DIR_STRUCT: + case DIR_UNION : handle_struct(l, address, 0, dbg); break; + case DIR_RES: t = t->next; address += get_val(t, address, 3, 0xFF, 0, dbg); break; + case DIR_ORG: t = t->next; address = get_val(t, address, 3, 0xFF, 0, dbg); break; + case DIR_BYTE: + case DIR_WORD: + case DIR_DWORD: + case DIR_QWORD: address = handle_directive(t, bc, isasm, address, dbg); break; + } + break; + case TOK_EXTOP : + case TOK_OPCODE: address = handle_opcode(t, bc, isasm, address, dbg); break; + case TOK_COMMENT: break; + } + } + return address; +} + +token *make_token(uint8_t id, uint8_t type, uint8_t space, uint8_t tab, uint64_t value, char *str, symbol *s) { + token *new_tok = malloc(sizeof(token)); + (last_tok) ? (last_tok->next = new_tok) : (tokens = new_tok); + + new_tok->id = id; + new_tok->type = type; + + new_tok->tab = tab; + new_tok->space = space; + + new_tok->subtab = 0; + new_tok->subspace = 0; + + new_tok->digits = 0; + + if (s) { + new_tok->sym = s; + } else if (str[0]) { + new_tok->str = str; + } else { + new_tok->qword = value; + } + new_tok->next = NULL; + last_tok = new_tok; + return new_tok; +} + +void assemble(line *ln, bytecount *bc, uint8_t dbg) { + uint64_t address = 0; + line *l = ln; + for (int i = 0; i < 2; i++) { + l = ln; + for (; l; l = l->next) { + l->addr = address; + token *t = l->tok; + address = parse_tokens(t, &l, bc, 0, address, dbg); + } + l = ln; + for (; l; l = l->next) { + address = l->addr; + token *t = l->tok; + for (; t; t = t->next) { + if (t->id == TOK_LABEL && t->sym->val != address) { + t->sym->val = l->addr; + } + } + } + } + l = ln; + bc->progsize = 0; + bc->datasize = 0; + for (; l; l = l->next) { + address = parse_tokens(l->tok, &l, bc, 1, address, dbg); + } +} + +static void find_dupsym() { + symbol *root = symbols; + symbol *s = symbols; + for (; s; s = s->next) { + root = symbols; + for (int i = 0; root; root = root->next) { + if (root == s) { + i++; + } + if (i > 1) { + printf("Found duplicate symbol, s->name: %s, root->name: %s\n", s->name, root->name); + i = 0; + } + } + } +} + +static symbol *find_fixup(token *t) { + fixup* f = fixups; + for (; f && t != f->t; f = f->next); + return (f && t == f->t) ? f->s : NULL; +} + + +static void print_symval(symbol *s) { + if (s) { + if (s->down) { + print_symval(s->down); + } + if (s->name) { + printf("s->name: %s, s->val: $%"PRIX64"\n", s->name, s->val); + } + print_symval(s->next); + } +} + +static void print_symtree(symbol *s, int depth) { + if (s) { + if (s->name != NULL) { + for (int i = depth; i; i--) { + printf("|%s", (i > 1) ? " " : "--"); + } + printf("%s: $%"PRIX64"\n", s->name, s->val); + } + if (s->down != NULL) { + print_symtree(s->down, depth+1); + } + print_symtree(s->next, depth); + } +} + +void fix_symtree(line *l) { + symbol *s = symbols; + symbol *cur_sym = NULL; + symbol *sym_struct = NULL; + symbols = NULL; + last_sym = NULL; + int islocal = 0; + int isanon = 0; + int is_struct = 0; + int is_structsym = 0; + for (; l; l = l->next) { + token *t = l->tok; + token *lt = NULL; + for (; t; t = t->next) { + int ismember = (t->id == TOK_MEMBER); + switch (t->id) { + case TOK_STRUCT: + case TOK_UNION : islocal = !(is_struct == 1 && lt && lt->id == TOK_DIR); + case TOK_SYM : + if (t->id == TOK_SYM && t != l->tok) { + break; + } + case TOK_MEMBER: + case TOK_LABEL : + if (symbols) { + (!islocal && s && !s->up) ? (last_sym = s) : (last_loc = s); + } + if (((t->type == 1 || ismember) && !islocal) || (islocal && ismember && is_structsym)) { + is_structsym = 0; + last_loc = NULL; + islocal = 1; + cur_sym = s; + s->down = t->sym; + s->down->up = s; + s = s->down; + if (s) { + s->next = NULL; + s->prev = NULL; + s->down = NULL; + } + locals = s; + } else if ((islocal || t->type == 0)) { + if (t->type == 0 && !is_struct && islocal && !ismember) { + islocal = 0; + if (s) { + s->up->down = locals; + s = s->up; + } + } + symbol *tmp = s; + s = t->sym; + if (s) { + s->prev = (tmp && tmp != s) ? tmp : NULL; + s->up = (s->prev) ? s->prev->up : s->up; + } + if (s && s->next) { + s->next = NULL; + } + } + if (!islocal) { + last_loc = NULL; + (last_sym) ? (last_sym->next = s) : (symbols = s); + cur_sym = s; + if (last_sym) { + last_sym->next->prev = last_sym; + last_sym->next->up = last_sym->up; + last_sym->next->down = NULL; + } + } else { + (last_loc) ? (last_loc->next = s) : (locals = s); + if (last_loc) { + last_loc->next->prev = last_loc; + last_loc->next->up = last_loc->up; + last_loc->next->down = NULL; + } else { + locals->prev = NULL; + locals->down = NULL; + } + } + break; + case TOK_DIR: + if (t->type == DIR_STRUCT || t->type == DIR_UNION) { + is_struct++; + is_structsym = (t->next && (t->next->id == TOK_STRUCT || t->next->id == TOK_UNION)); + if ((!is_structsym) || (isanon && is_structsym)) { + isanon++; + } + sym->name = NULL; + } + s = s->next; + free(sym); + sym = NULL; + free_symbols(s); + } +} + +static inline void free_fixups(fixup *f) { + fixup *fix; + if (f != NULL) { + fix = f; + f = f->next; + free(fix); + fix = NULL; + free_fixups(f); + } +} + +uint64_t get_tokmem(token *t) { + uint64_t i = 0; + for (; t; t = t->next, i++); + return i*sizeof(token); +} + +void get_linemem(line *l) { + uint64_t i = 0; + uint64_t j = 0; + for (; l; j += get_tokmem(l->tok), l = l->next, i++); + printf("Bytes per line: %"PRIu64", Bytes per token: %"PRIu64", Total size of line table in bytes: %"PRIu64"\n", sizeof(line), sizeof(token), j+(i*sizeof(line))); +} + +void cleanup() { + uint16_t i; + if (lines) { + /*get_linemem(lines);*/ + /*fix_symtree(lines);*/ + free_lines(lines); + lines = NULL; + } + if (symbols) { + /*print_symtree(symbols, 0);*/ + free_symbols(symbols); + symbols = NULL; + } + if (fixups) { + free_fixups(fixups); + fixups = NULL; + } + while (i < stridx || i < comidx) { + if (i < stridx && string[i]) { + free(string[i]); + string[i] = NULL; + } + if (i < comidx && comment[i]) { + free(comment[i]); + comment[i] = NULL; + } + i++; + } +} diff --git a/lexer/backup/enums.h b/lexer/backup/enums.h new file mode 100644 index 0000000..07338ee --- /dev/null +++ b/lexer/backup/enums.h @@ -0,0 +1,540 @@ +enum am { + /* Part of Base ISA. */ + IMM, /* Immediate Data. */ + ZM, /* Zero Matrix. */ + ZMX, /* Zero Matrix, indexed with X. */ + ZMY, /* Zero Matrix, indexed with Y. */ + IND, /* Indirect. */ + INDX, /* Indexed Indirect. */ + INDY, /* Indirect Indexed. */ + ABS, /* Absolute. */ + REL, /* Relative to Program Counter. */ + BREG, /* B Register. */ + IMPL, /* Implied. */ + /* Part of Base Extension. */ + ABSX, /* Absolute, Indexed with X. */ + ABSY, /* Absolute, Indexed with Y. */ + AIND, /* Absolute Indirect. */ + AINDX, /* Absolute Indexed Indirect. */ + AINDY, /* Absolute Indirect Indexed. */ + EIND, /* Effective Address Register, Indirect. */ +}; + +/* Part of the Orthogonal Extension. */ +enum ortho_reg { + REG_A, + REG_B, + REG_X, + REG_Y, + REG_E, + REG_C, + REG_D, + REG_S, + REG_F, + REG_SP, + REG_BP, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15, +}; + +enum ortho_mem { + MEM_ABS, /* Absolute. */ + MEM_ZM, /* Zero Matrix. */ + MEM_ABSR, /* Absolute, Indexed with register. */ + MEM_ZMR, /* Zero Matrix, Indexed with register. */ + MEM_ZINDR, /* Zero Matrix, Indirect Indexed Register. */ + MEM_ZRIND, /* Zero Matrix, Indexed Indirect Register. */ + MEM_AINDR, /* Absolute, Indirect Indexed Register. */ + MEM_ARIND, /* Absolute, Indexed Indirect Register. */ + MEM_RIND, /* Register Indirect. */ + MEM_SIB, /* Scale Index Base. */ +}; + +enum mne { + ADC, + AND, + ASR, + BCC, + BCS, + BEQ, + BNE, + BNG, + BPO, + BRA, + BRK, + BVC, + BVS, + CLC, + CLI, + CLV, + CMP, + CPB, + CPS, + CPX, + CPY, + DEB, + DEC, + DEX, + DEY, + DIV, + INB, + INC, + INX, + INY, + JMP, + JSR, + LDA, + LDB, + LDX, + LDY, + LSL, + LSR, + MUL, + NOP, + ORA, + PHA, + PHB, + PHP, + PHX, + PHY, + PLA, + PLB, + PLP, + PLX, + PLY, + ROL, + ROR, + RTI, + RTS, + SBC, + SEC, + SEI, + STA, + STB, + STX, + STY, + TAB, + TAX, + TAY, + TBA, + TSX, + TXA, + TXS, + TXY, + TYA, + TYX, + WAI, + XOR +}; + +enum ext_mne { + LEA, + PEA, + ADD, + SUB, + ADE, + SBE, + ADS, + SBS, + NOT, + LLM, + LRM, + RLM, + RRM, + ARM, + PHE, + PLE, + CPE, + ICE, + LDS, + DEE, + INE, + DES, + INS, + STS, + STE, + STZ, + SCO, + ECO, + CLZ, + CLO, + BIT, + MMV, + SWP, + PCN, + REP, + REQ, + RNE, + LNG, + LPO, + LCS, + LCC, + LEQ, + LNE, + SNG, + SPO, + SCS, + SCC, + SEQ, + SNE +}; + +enum ortho_mne { + MNG, + MPO, + MCS, + MCC, + MEQ, + MNE, + MVS, + MVC, + OR , + MOV, + IML, + IDV, + PSH, + PUL, + NEG, + SET +}; + +enum base_isa { + CPS_IMP = 0x00, /* Clear Processor Status. */ + ADC_IMM = 0x01, /* ADd with Carry. */ + ROR_IMM = 0x02, /* ROtate Right. */ + CPB_IMM = 0x04, /* ComPare B register. */ + ADC_Z = 0x05, /* ADC Zero Matrix. */ + ROR_Z = 0x06, /* ROR Zero Matrix. */ + CPB_Z = 0x08, /* CPB Zero Matrix. */ + CLC_IMP = 0x09, /* CLear Carry flag. */ + TAB_IMP = 0x0A, /* Transfer Accumulator to B. */ + STY_Z = 0x0C, /* STore Y register. */ + JMP_AB = 0x10, /* JMP Absolute. */ + ADC_AB = 0x11, /* ADC Absolute. */ + ROR_AB = 0x12, /* ROR Absolute. */ + CPB_AB = 0x14, /* CPB Absolute. */ + ADC_B = 0x15, /* ADC B Register. */ + ROR_B = 0x16, /* ROR B Register. */ + STY_AB = 0x18, /* STY Absolute. */ + SEC_IMP = 0x19, /* SEt Carry flag. */ + TBA_IMP = 0x1A, /* Transfer B to Accumulator. */ + JMP_Z = 0x20, /* JuMP to memory location. */ + SBC_IMM = 0x21, /* SuBtract with Carry. */ + MUL_IMM = 0x22, /* MULtiply accumulator. */ + CPX_IMM = 0x24, /* ComPare X register. */ + SBC_Z = 0x25, /* SBC Zero Matrix. */ + MUL_Z = 0x26, /* MUL Zero Matrix. */ + CPX_Z = 0x28, /* CPX Zero Matrix. */ + CLI_IMP = 0x29, /* CLear Interupt flag. */ + TAY_IMP = 0x2A, /* Transfer Accumulator to Y. */ + STA_Z = 0x2C, /* STore Accumulator. */ + STA_ZX = 0x2E, /* STA Zero Marrix, Indexed with X. */ + JSR_AB = 0x30, /* JSR Absolute. */ + SBC_AB = 0x31, /* SBC Absolute. */ + MUL_AB = 0x32, /* MUL Absolute. */ + CPX_AB = 0x34, /* CPX Absolute. */ + SBC_B = 0x35, /* SBC B Register. */ + MUL_B = 0x36, /* MUL B Register. */ + STA_AB = 0x38, /* STA Absolute. */ + SEI_IMP = 0x39, /* SEt Interupt flag. */ + TYA_IMP = 0x3A, /* Transfer Y to Accumulator. */ + STA_ZY = 0x3C, /* STA Zero Marrix, Indexed with Y. */ + STA_IX = 0x3E, /* STA Indexed Indirect. */ + JSR_Z = 0x40, /* Jump to SubRoutine. */ + AND_IMM = 0x41, /* bitwise AND with accumulator. */ + DIV_IMM = 0x42, /* DIVide with accumulator. */ + CPY_IMM = 0x44, /* ComPare Y register. */ + AND_Z = 0x45, /* AND Zero Matrix. */ + DIV_Z = 0x46, /* DIV Zero Matrix. */ + CPY_Z = 0x48, /* CPY Zero Matrix. */ + CLV_IMP = 0x49, /* CLear oVerflow flag. */ + TAX_IMP = 0x4A, /* Transfer Accumulator to X. */ + STB_Z = 0x4C, /* STore B register. */ + STB_ZX = 0x4E, /* STB Zero Marrix, Indexed with X. */ + RTS_IMP = 0x50, /* ReTurn from Subroutine. */ + AND_AB = 0x51, /* AND Absolute. */ + DIV_AB = 0x52, /* DIV Absolute. */ + CPY_AB = 0x54, /* CPY Absolute. */ + AND_B = 0x55, /* AND B Register. */ + DIV_B = 0x56, /* DIV B Register. */ + STB_AB = 0x58, /* STB Absolute. */ + WAI_IMP = 0x59, /* WAit for Interrupt. */ + TXA_IMP = 0x5A, /* Transfer X to Accumulator. */ + STB_ZY = 0x5C, /* STB Zero Marrix, Indexed with Y. */ + STB_IX = 0x5E, /* STB Indexed Indirect. */ + RTI_IMP = 0x60, /* ReTurn from Interrupt. */ + ORA_IMM = 0x61, /* bitwise OR with Accumulator. */ + ASR_IMM = 0x62, /* Arithmetic Shift Right. */ + LDX_IMM = 0x64, /* LoaD X register. */ + ORA_Z = 0x65, /* ORA Zero Matrix. */ + ASR_Z = 0x66, /* ASR Zero Matrix. */ + LDX_Z = 0x68, /* LDX Zero Matrix. */ + BRK_IMP = 0x69, /* BReaK. */ + TYX_IMP = 0x6A, /* Transfer Y to X. */ + STX_Z = 0x6C, /* STore X register. */ + PHP_IMP = 0x6E, /* PusH Processor status to stack. */ + BPO_REL = 0x70, /* Branch if POsitive. */ + ORA_AB = 0x71, /* ORA Absolute. */ + ASR_AB = 0x72, /* ASR Absolute. */ + LDX_AB = 0x74, /* LDX Absolute. */ + ORA_B = 0x75, /* ORA B Register. */ + ASR_B = 0x76, /* ASR B Register. */ + STX_AB = 0x78, /* STX Absolute. */ + DEY_IMP = 0x79, /* DEcrement Y register. */ + TXY_IMP = 0x7A, /* Transfer X to Y. */ + CPB_IN = 0x7C, /* CPB Indirect */ + PLP_IMP = 0x7E, /* PuLl Processor status from stack. */ + BNG_REL = 0x80, /* Branch if NeGative. */ + XOR_IMM = 0x81, /* bitwise XOR with accumulator. */ + CMP_IMM = 0x82, /* CoMPare accumulator. */ + DEC_IMP = 0x84, /* DECrement accumulator. */ + XOR_Z = 0x85, /* XOR Zero Matrix. */ + CMP_Z = 0x86, /* CMP Zero Matrix. */ + DEC_Z = 0x88, /* DEC Zero Matrix. */ + INY_IMP = 0x89, /* INcrement Y register. */ + TSX_IMP = 0x8A, /* Transfer Stack pointer to X. */ + CMP_IN = 0x8C, /* CMP Indirect */ + PHA_IMP = 0x8E, /* PusH Accumulator to stack. */ + BCS_REL = 0x90, /* Branch if Carry Set. */ + XOR_AB = 0x91, /* XOR Absolute. */ + CMP_AB = 0x92, /* CMP Absolute. */ + DEC_AB = 0x94, /* DEC Absolute. */ + XOR_B = 0x95, /* XOR B Register. */ + CMP_B = 0x96, /* CMP B Register. */ + DEB_IMP = 0x99, /* Decrement B register. */ + TXS_IMP = 0x9A, /* Transfer X to Stack pointer. */ + STY_IN = 0x9C, /* STY Indirect */ + PLA_IMP = 0x9E, /* PuLl Accumulator from stack. */ + BCC_REL = 0xA0, /* Branch if Carry Clear. */ + LSL_IMM = 0xA1, /* Logical Shift Left. */ + LDY_IMM = 0xA2, /* LoaD Y register. */ + INC_IMP = 0xA4, /* INCrement accumulator. */ + LSL_Z = 0xA5, /* LSL Zero Matrix. */ + LDY_Z = 0xA6, /* LDY Zero Matrix. */ + INC_Z = 0xA8, /* INC Zero Matrix. */ + INB_IMP = 0xA9, /* Increment B register. */ + CMP_IX = 0xAA, /* CMP Indexed Indirect. */ + LDY_IN = 0xAC, /* LDY Indirect */ + PHB_IMP = 0xAE, /* PusH B register to stack. */ + BEQ_REL = 0xB0, /* Branch if EQual. */ + LSL_AB = 0xB1, /* LSL Absolute. */ + LDY_AB = 0xB2, /* LDY Absolute. */ + INC_AB = 0xB4, /* INC Absolute. */ + LSL_B = 0xB5, /* LSL B Register. */ + DEX_IMP = 0xB9, /* DEcrement X register. */ + CPB_IX = 0xBA, /* CPB Indexed Indirect. */ + LDX_IN = 0xBC, /* LDX Indirect */ + PLB_IMP = 0xBE, /* PuLl B register to stack. */ + BNE_REL = 0xC0, /* Branch if Not Equal. */ + LSR_IMM = 0xC1, /* Logical Shift Right. */ + LDA_IMM = 0xC2, /* LoaD Accumulator. */ + LDA_IN = 0xC4, /* LDA Indirect */ + LSR_Z = 0xC5, /* LSR Zero Matrix. */ + LDA_Z = 0xC6, /* LDA Zero Matrix. */ + LDA_ZX = 0xC8, /* LDA Zero Marrix, Indexed with X. */ + INX_IMP = 0xC9, /* INcrement X register. */ + STA_IY = 0xCA, /* STA Indirect Indexed. */ + STX_IN = 0xCC, /* STX Indirect */ + PHY_IMP = 0xCE, /* PusH Y register to stack. */ + BVS_REL = 0xD0, /* Branch if oVerflow Set. */ + LSR_AB = 0xD1, /* LSR Absolute. */ + LDA_AB = 0xD2, /* LDA Absolute. */ + STA_IN = 0xD4, /* STA Indirect */ + LSR_B = 0xD5, /* LSR B Register. */ + LDA_ZY = 0xD6, /* LDA Zero Marrix, Indexed with Y. */ + LDA_IX = 0xD8, /* LDA Indexed Indirect. */ + LDA_IY = 0xD9, /* LDA Indirect Indexed. */ + STB_IY = 0xDA, /* STB Indirect Indexed. */ + JSR_IN = 0xDC, /* JSR Indirect */ + PLY_IMP = 0xDE, /* PuLl Y register from stack. */ + BVC_REL = 0xE0, /* Branch if oVerflow Clear. */ + ROL_IMM = 0xE1, /* ROtate Left. */ + LDB_IMM = 0xE2, /* LoaD B register. */ + LDB_IN = 0xE4, /* LDB Indirect */ + ROL_Z = 0xE5, /* ROL Zero Matrix. */ + LDB_Z = 0xE6, /* LDB Zero Matrix. */ + LDB_ZX = 0xE8, /* LDB Zero Marrix, Indexed with X. */ + LDB_IY = 0xE9, /* LDB Indirect Indexed. */ + NOP_IMP = 0xEA, /* No OPeration. */ + JMP_IN = 0xEC, /* JMP Indirect */ + PHX_IMP = 0xEE, /* PusH X register to stack. */ + BRA_REL = 0xF0, /* BRanch Always. */ + ROL_AB = 0xF1, /* ROL Absolute. */ + LDB_AB = 0xF2, /* LDB Absolute. */ + STB_IN = 0xF4, /* STB Indirect */ + ROL_B = 0xF5, /* ROL B Register. */ + LDB_ZY = 0xF6, /* LDB Zero Marrix, Indexed with Y. */ + LDB_IX = 0xF8, /* LDB Indexed Indirect. */ + CMP_IY = 0xF9, /* CMP Indirect Indexed. */ + CPB_IY = 0xFA, /* CPB Indirect Indexed. */ + PLX_IMP = 0xFE /* PuLl X register from stack. */ +}; + +enum base_ext { + LEA_AY = 0x03, /* LEA Absolute, indexed with Y. */ + ADD_IMM = 0x06, /* ADD without carry. */ + LEA_Z = 0x07, /* Load Effective Address. */ + CPE_IMM = 0x08, /* ComPare Effective address register. */ + CLZ_Z = 0x09, /* Count Leading Zeros. */ + ADD_Z = 0x0A, /* ADD Zero Matrix. */ + STB_E = 0x0B, /* STB E Indirect. */ + CPE_Z = 0x0C, /* CPE Zero Matrix. */ + LNG_IMM = 0x0D, /* Load accumulator, if NeGative. */ + LNG_E = 0x0E, /* LNG E Indirect. */ + JMP_E = 0x10, /* JMP E Indirect. */ + ADC_E = 0x11, /* ADC E Indirect. */ + ROR_E = 0x12, /* ROR E Indirect. */ + LEA_AB = 0x13, /* LEA Absolute. */ + CLZ_AB = 0x15, /* CLZ Absolute. */ + ADD_AB = 0x16, /* ADD Absolute. */ + LEA_ZY = 0x17, /* LEA Zero Matrix, indexed with Y. */ + CPE_AB = 0x18, /* CPE Absolute. */ + CLZ_E = 0x19, /* CLZ E Indirect. */ + ADD_E = 0x1A, /* ADD E Indirect. */ + LDX_E = 0x1B, /* LDX E Indirect. */ + SNG_E = 0x1E, /* Store accumulator, if NeGative. */ + PEA_AY = 0x23, /* PEA Absolute, indexed with Y. */ + SUB_IMM = 0x26, /* SUBtract without carry. */ + PEA_Z = 0x27, /* Push Effective Address. */ + CLO_Z = 0x29, /* Count Leading Ones. */ + SUB_Z = 0x2A, /* SUB Zero Matrix. */ + STX_E = 0x2B, /* STX E Indirect. */ + ICE_Z = 0x2C, /* Interlocked Compare, and Exchange. */ + LPO_IMM = 0x2D, /* Load accumulator, if POsitive. */ + LPO_E = 0x2E, /* LPO E Indirect. */ + JSR_E = 0x30, /* JSR E Indirect. */ + SBC_E = 0x31, /* SBC E Indirect. */ + MUL_E = 0x32, /* MUL E Indirect. */ + PEA_AB = 0x33, /* PEA Absolute. */ + CLO_AB = 0x34, /* CLO Absolute. */ + SUB_AB = 0x35, /* SUB Absolute. */ + PEA_ZY = 0x37, /* PEA Zero Matrix, indexed with Y. */ + ICE_AB = 0x38, /* ICE Absolute. */ + CLO_E = 0x39, /* CLO E Indirect. */ + SUB_E = 0x3A, /* SUB E Indirect. */ + CPB_E = 0x3B, /* CPB E Indirect. */ + ICE_E = 0x3C, /* ICE E Indirect. */ + SPO_E = 0x3E, /* Store accumulator, if POsitive. */ + LDS_IMM = 0x40, /* LoaD Stack pointer. */ + LEA_AI = 0x43, /* LEA Absolute Indirect. */ + LDS_Z = 0x44, /* LDS Zero Matrix. */ + ADE_IMM = 0x46, /* ADd Effective address register. */ + LEA_IN = 0x47, /* LEA Indirect. */ + BIT_Z = 0x49, /* BIt Test. */ + ADE_Z = 0x4A, /* ADE Zero Matrix. */ + CPX_E = 0x4B, /* CPX E Indirect. */ + LLM_Z = 0x4C, /* Logical shift Left, on Memory. */ + LCS_IMM = 0x4D, /* Load accumulator, if Carry Set. */ + LCS_E = 0x4E, /* LCS E Indirect. */ + LDS_AB = 0x50, /* LDS Absolute. */ + AND_E = 0x51, /* AND E Indirect. */ + DIV_E = 0x52, /* DIV E Indirect. */ + LEA_AX = 0x53, /* LEA Absolute, indexed with X. */ + LDS_E = 0x54, /* LDS E Indirect. */ + BIT_AB = 0x55, /* BIT Absolute. */ + ADE_AB = 0x56, /* ADE Absolute. */ + LEA_ZX = 0x57, /* LEA Zero Matrix, indexed with X. */ + LLM_AB = 0x58, /* LLM Absolute. */ + BIT_E = 0x59, /* BIT E Indirect. */ + CPY_E = 0x5B, /* CPY E Indirect. */ + LLM_E = 0x5C, /* LLM E Indirect. */ + SCS_E = 0x5E, /* Store accumulator, if Carry Set. */ + SCO_IMM = 0x60, /* Start one, or more COre(s). */ + PEA_AI = 0x63, /* PEA Absolute Indirect. */ + SCO_Z = 0x64, /* SCO Zero Matrix. */ + SBE_IMM = 0x66, /* SuBtract Effective address register. */ + PEA_IN = 0x67, /* PEA Indirect. */ + SBE_Z = 0x6A, /* SBE Zero Matrix. */ + PHE_IMP = 0x6B, /* PusH Effective address register to stack. */ + LRM_Z = 0x6C, /* Logical shift Right, on Memory. */ + LCC_IMM = 0x6D, /* Load accumulator, if Carry Clear. */ + LCC_E = 0x6E, /* LCC E Indirect. */ + SCO_AB = 0x70, /* SCO Absolute. */ + ORA_E = 0x71, /* ORA E Indirect. */ + ASR_E = 0x72, /* ASR E Indirect. */ + PEA_AX = 0x73, /* PEA Absolute, indexed with X. */ + SCO_E = 0x74, /* SCO E Indirect. */ + SBE_AB = 0x76, /* SBE Absolute. */ + PEA_ZX = 0x77, /* PEA Zero Matrix, indexed with X. */ + LRM_AB = 0x78, /* LRM Absolute. */ + PLE_IMP = 0x7B, /* PuLl Effective address register from stack. */ + LRM_E = 0x7C, /* LRM E Indirect. */ + SCC_E = 0x7E, /* Store accumulator, if Carry Clear. */ + ECO_IMM = 0x80, /* End one, or more COre(s). */ + DEC_E = 0x82, /* DEC E Indirect. */ + LEA_AIY = 0x83, /* LEA Absolute Indirect Indexed. */ + ECO_Z = 0x84, /* ECO Zero Matrix. */ + ADS_IMM = 0x86, /* ADd Stack pointer. */ + LEA_IY = 0x87, /* LEA Indirect Indexed. */ + ADS_Z = 0x8A, /* ADS Zero Matrix. */ + DEE_IMP = 0x8B, /* DEcrement Effective address register. */ + RLM_Z = 0x8C, /* Rotate Left, on Memory. */ + LEQ_IMM = 0x8D, /* Load accumulator, if EQual. */ + LEQ_E = 0x8E, /* LEQ E Indirect. */ + ECO_AB = 0x90, /* ECO Absolute. */ + XOR_E = 0x91, /* XOR E Indirect. */ + CMP_E = 0x92, /* CMP E Indirect. */ + LEA_AIX = 0x93, /* LEA Absolute Indexed Indirect. */ + ECO_E = 0x94, /* ECO E Indirect. */ + ADS_AB = 0x96, /* ADS Absolute. */ + LEA_IX = 0x97, /* LEA Indexed Indirect. */ + RLM_AB = 0x98, /* RLM Absolute. */ + ADS_E = 0x9A, /* ADS E Indirect. */ + INE_IMP = 0x9B, /* INcrement Effective address register. */ + RLM_E = 0x9C, /* RLM E Indirect. */ + SEQ_E = 0x9E, /* Store accumulator, if EQual. */ + INC_E = 0xA2, /* INC E Indirect. */ + PEA_AIY = 0xA3, /* PEA Absolute Indirect Indexed. */ + STS_Z = 0xA4, /* STore Stack pointer. */ + SBS_IMM = 0xA6, /* SuBtract Stack pointer. */ + PEA_IY = 0xA7, /* PEA Indirect Indexed. */ + SBS_Z = 0xAA, /* SBS Zero Matrix. */ + DES_IMP = 0xAB, /* DEcrement Stack pointer. */ + RRM_Z = 0xAC, /* Rotate Right, on Memory. */ + LNE_IMM = 0xAD, /* Load accumulator, if Not Equal. */ + LNE_E = 0xAE, /* LNE E Indirect. */ + STS_AB = 0xB0, /* STS Absolute. */ + LSL_E = 0xB1, /* LSL E Indirect. */ + LDY_E = 0xB2, /* LDY E Indirect. */ + PEA_AIX = 0xB3, /* PEA Absolute Indexed Indirect. */ + STS_E = 0xB4, /* STS E Indirect. */ + SBS_AB = 0xB6, /* SBS Absolute. */ + PEA_IX = 0xB7, /* PEA Indexed Indirect. */ + RRM_AB = 0xB8, /* RRM Absolute. */ + SBS_E = 0xBA, /* SBS E Indirect. */ + INS_IMP = 0xBB, /* INcrement Stack pointer. */ + RRM_E = 0xBC, /* RRM E Indirect. */ + REP_REL = 0xBD, /* REPeat until counter is zero. */ + SNE_E = 0xBE, /* Store accumulator, if Not Equal. */ + STY_E = 0xC2, /* STY E Indirect. */ + STE_Z = 0xC4, /* STore Effective address register. */ + NOT_A = 0xC6, /* bitwise NOT with accumulator. */ + NOT_Z = 0xCA, /* NOT Zero Matrix. */ + MMV_IMP = 0xCB, /* Memory MoVe. */ + ARM_Z = 0xCC, /* Arithmetic shift Right, on Memory. */ + REQ_REL = 0xCD, /* Repeat until either counter is zero, or zero flag isn't set. */ + STE_AB = 0xD0, /* STE Absolute. */ + LSR_E = 0xD1, /* LSR E Indirect. */ + LDA_E = 0xD2, /* LDA E Indirect. */ + NOT_AB = 0xD6, /* NOT Absolute. */ + ARM_AB = 0xD8, /* ARM Absolute. */ + NOT_E = 0xDA, /* NOT E Indirect. */ + ARM_E = 0xDC, /* ARM E Indirect. */ + RNE_REL = 0xDD, /* Repeat until either counter is zero, or zero flag is set. */ + STA_E = 0xE2, /* STA E Indirect. */ + STZ_Z = 0xE4, /* STore Zero. */ + SWP_A = 0xE6, /* SWaP lower half, with upper half. */ + SWP_Z = 0xEA, /* SWP Zero Matrix. */ + PCN_Z = 0xEC, /* Population CouNt. */ + STZ_AB = 0xF0, /* STZ Absolute. */ + ROL_E = 0xF1, /* ROL E Indirect. */ + LDB_E = 0xF2, /* LDB E Indirect. */ + STZ_E = 0xF4, /* STZ E Indirect. */ + SWP_AB = 0xF6, /* SWP Absolute. */ + PCN_AB = 0xF8, /* PCN Absolute. */ + SWP_E = 0xFA, /* SWP E Indirect. */ + PCN_E = 0xFC /* PCN E Indirect. */ +}; diff --git a/lexer/backup/lexer.c b/lexer/backup/lexer.c new file mode 100644 index 0000000..1654f44 --- /dev/null +++ b/lexer/backup/lexer.c @@ -0,0 +1,937 @@ +#include "asmmon.h" +#include "lexer.h" + +uint8_t lex_type; +uint16_t sym_count = 0; +token *tokens = NULL; +token *last_tok = NULL; +symbol *locals = NULL; +symbol *last_loc = NULL; +symbol *cur_sym = NULL; +symbol *struct_sym = NULL; + +line *tmp_line = NULL; + +symbol *mksymbol(const char *name, uint64_t val, uint8_t def, uint8_t islocal, uint8_t useid, uint16_t id, uint8_t dbg) { + uint16_t i = 0; + symbol *s = (!islocal) ? symbols : locals; + uint8_t flag = 0; + for (; s; s = s->next, i++) { + if (!useid && name[0] != s->name[0]) { + continue; + } + flag = (useid) ? (id == s->id) : !strcmp(name, s->name); + if (flag) { + if (def) { + if (s->def) { + if (dbg) { + printf("mksymbol(): oof, you cannot redefine the symbol: %s\n", name); + } + defined = 1; + } else { + defined = 0; + } + s->def = def; + s->val = val; + s->id = i; + if (dbg) { + printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s\n", def, val, name); + printf("mksymbol(): i: $%X, id: $%04X\n", i, s->id); + } + } + return s; + } + } + size_t str_size = strlen(name)+1; + s = malloc(sizeof(symbol)); + s->down = NULL; + if (!islocal) { + (last_sym) ? (last_sym->next = s) : (symbols = s); + if (last_sym) { + last_sym->next->prev = last_sym; + last_sym->next->up = last_sym->up; + last_sym->next->down = NULL; + } else { + symbols->prev = NULL; + symbols->up = NULL; + symbols->down = NULL; + } + } else { + (last_loc) ? (last_loc->next = s) : (locals = s); + if (last_loc) { + last_loc->next->prev = last_loc; + last_loc->next->up = last_loc->up; + last_loc->next->down = NULL; + } else { + locals->prev = NULL; + locals->down = NULL; + } + } + s->name = malloc(str_size); + s->def = def; + s->val = val; + s->count = 0; + s->isstruct = 0; + memcpy(s->name, name, str_size); + s->next = NULL; + s->id = sym_count++; + (!islocal) ? (last_sym = s) : (last_loc = s); + if (!islocal) { + s->down = NULL; + /*if (def) { + locals = NULL; + last_loc = NULL; + }*/ + } else { + cur_sym->count++; + } + defined = 0; + if (dbg) { + printf("mksymbol(): def: %u, val: $%016"PRIX64", name: %s, id: $%04X\n", def, val, name, sym_count-1); + } + return s; +} + +uint16_t fixup_cnt = 0; +symbol *get_sym(const char *name, uint64_t val, token *t, uint8_t islocal, uint8_t dbg) { + symbol *s = mksymbol(name, 0, 0, islocal, 0, 0, dbg); + if (dbg) { + printf("get_sym(): Symbol ID: $%X.\n", s->id); + } + if (s->def) { + return s; + } else { + if (dbg) { + printf("get_sym(): oof, symbol %s, does not exist, yet.\n", name); + } + fixup *f = malloc(sizeof(fixup)); + (last_fix) ? (last_fix->next = f) : (fixups = f); + f->adr = val; + f->t = t; + f->s = s; + f->next = NULL; + last_fix = f; + fixup_cnt++; + return NULL; + } +} + +symbol *find_member(char *name, symbol* root, uint8_t dbg) { + /*for (; root->up; root = root->up);*/ + symbol *s = root; + if (s->down == NULL && s->up != NULL) { + s = s->up; + } + do { + s = s->down; + for (symbol *m = s; m; m = m->next) { + size_t len1 = strlen(name); + size_t len2 = strlen(m->name); + if (len1 == len2 && name[0] == m->name[0] && !strcmp(name, m->name)) { + return m; + } + } + for (; s->next && !s->down; s = s->next); + } while (s->down); + return NULL; +} + +uint16_t reslv_fixups(uint8_t dbg) { + fixup *f = fixups; + symbol *ls; + uint16_t i = 0, j = 0; + for (; f; f = f->next) { + if (f->s->def) { + if (dbg) { + printf("reslv_fixups(): Symbol ID: $%X, Symbol Name: %s, Symbol Value: $%"PRIX64".\n", f->s->id, f->s->name, f->s->val); + } + f->t->sym = f->s; + } else { + if (dbg) { + printf("reslv_fixups(): oof, undefined reference to '%s', at $%016"PRIX64".\n", f->s->name, f->adr); + } + i++; + } + } + return i; + +} + +uint16_t get_comment(const char *com, uint8_t dbg) { + uint16_t i = 0; + for (; comment[i] && i < comidx; i++) { + if (com[0] == comment[i][0] && !strcmp(com, comment[i])) { + break; + } + } + if (comment[i] == NULL) { + if (dbg) { + printf("get_comment(): oof, the index $%04X is NULL.\n", i); + printf("get_comment(): oof, the comment \"%s\", was not found in the comment table.\n", com); + } + size_t size = strlen(com)+1; + comment[comidx] = malloc(size); + memcpy(comment[comidx], com, size); + return comidx++; + + } + if (dbg) { + if (strcmp(com, comment[i])) { + printf("get_comment(): oof, the comment \"%s\" is somehow not in the comment table, even though it should be at index $%04X.\n", com, i); + } + printf("get_comment(): The return value of strcmp(com, comment[$%04X]) is %i.\n", i, strcmp(com, comment[i])); + printf("get_comment(): Found comment \"%s\", in the table, at index $%04X.\n", com, i); + } + return i; +} + +uint16_t get_string(const char *str, uint8_t dbg) { + uint16_t i = 0; + uint8_t isstr = 0; + for (; i < stridx; i++) { + if (isstr || string[i] == NULL) { + break; + } else if (str[0] == string[i][0]) { + isstr = !strcmp(str, string[i]); + } + } + if (string[i] == NULL || i == stridx) { + if (dbg) { + printf("get_string(): oof, the index $%04X is NULL.\n", i); + printf("get_string(): oof, the string \"%s\", was not found in the string table.\n", str); + } + return 0xFFFF; + } + if (dbg) { + printf("get_string(): Found string \"%s\", in the table, at index $%04X.\n", str, i); + } + return i; +} + +line *find_line(uint32_t ln, uint8_t dbg) { + uint32_t i = 0; + line *l = lines; + for (; l && l->linenum != ln; l = l->next); + if (l != NULL) { + if (l->linenum == ln) { + if (dbg) { + printf("find_line(): Found line number %u.\n", ln); + } + return l; + } + } else { + if (dbg) { + printf("find_line(): oof, could not find line number %u.\n", ln); + } + return NULL; + } + return l; +} + +int is_struct = 0; +int is_anon = 0; + +void create_struct(symbol *c_sym, line *l, token *t, token *lt, char *name, uint8_t dbg) { + uint8_t ismember = !(is_struct == 1 && lt && lt->id == TOK_DIR); + mksymbol(name, 0, 1, ismember, 0, 0, dbg); + if (isfixup) { + isfixup = reslv_fixups(dbg); + } + t->sym = get_sym(name, 0, t, ismember, dbg); + if (lt && lt->id == TOK_DIR) { + t->sym->isstruct = 1; + t->id = (lt->type == DIR_STRUCT) ? TOK_STRUCT : TOK_UNION; + tmp_line = l; + } else { + t->id = TOK_MEMBER; + t->sym->isanon = (is_anon > 0); + } + isfixup += (t->sym == NULL); + int is_top = (c_sym == NULL); + c_sym = (!ismember && !c_sym) ? last_sym : c_sym; + if (!ismember) { + if (!is_top) { + c_sym = t->sym; + locals = NULL; + last_loc = NULL; + } else { + c_sym->down = locals; + } + } else { + if (lt && lt->id == TOK_DIR) { + if (lt->type == DIR_UNION || lt->type == DIR_STRUCT) { + c_sym->down = locals; + c_sym->down->up = c_sym; + last_loc->up = c_sym; + c_sym = last_loc; + locals = NULL; + last_loc = NULL; + } + } + } + cur_sym = c_sym; +} + +void end_struct(symbol *c_sym, symbol *s_sym, uint8_t dbg) { + int skip = 0; + if (is_anon > 0) { + if ((c_sym && c_sym->isanon) || (c_sym->up && !c_sym->up->isanon) || (c_sym && s_sym->isanon)) { + is_anon--; + } else if (is_struct <= 0) { + is_anon = 0; + } + skip = (!is_anon); + } + if (((is_struct-is_anon) > 0 && !skip) || (is_anon <= 0 && is_struct <= 0)) { + symbol *s; + for (s = locals; s; s = s->next) { + if (s->up == NULL) { + s->up = c_sym; + } + if (dbg) { + printf("s: %p, s->up: %p, c_sym: %p, last_loc: %p\n", s, s->up, c_sym, last_loc); + } + } + if (c_sym->down == NULL) { + c_sym->down = locals; + } + } + if ((is_anon <= 0 || is_struct <= 0)) { + for (s_sym = c_sym; s_sym->prev && !s_sym->isanon; s_sym = s_sym->prev); + struct_sym = s_sym; + } + if ((is_struct-is_anon) > 0 && !skip) { + symbol *s = c_sym; + for (; s->prev; s = s->prev) { + if (s->up == NULL && c_sym->up) { + s->up = c_sym->up; + } + if (dbg) { + printf("s: %p, s->up: %p, c_sym->up: %p, last_loc: %p\n", s, s->up, c_sym->up, last_loc); + } + } + if (c_sym->up) { + cur_sym = c_sym->up; + } + for (locals = locals->up; locals->prev; locals = locals->prev); + for (last_loc = locals; last_loc->next; last_loc = last_loc->next); + } +} + +uint64_t lex(char *str, uint64_t address, uint16_t bline, uint8_t dbg) { + char sym[0x100]; + uint16_t i = 0; + uint16_t j = 0; + uint16_t comid = 0; + uint16_t strid = 0; + uint16_t symid = 0; + uint64_t value = 0; + lex_type = 0xFF; + + uint8_t k = 0; + uint8_t k2 = 0; + union reg ch; + ch.u64 = 0; + uint8_t rs = 0; + uint8_t of = 0; + uint8_t base = 0; + + uint8_t islocal = 0; + + uint8_t isop = 0; + int num = 0; + int isch = 0; + uint8_t isesc = 0; + uint8_t islinenum; + + int16_t ln = -1; + + char lnum[6]; + + uint8_t space = 0; + uint8_t tab = 0; + uint8_t fall = 0; + uint8_t done = 0; + + + /*uint8_t is_newcom = 0;*/ + line *l = NULL; + token *st = NULL; + token *t = NULL; + token *lt = NULL; + symbol *tmp_sym = NULL; + symbol *tsym = NULL; + + while (isdigit(str[i]) && isdelm(str[i], dbg) != 16) { + lnum[j++] = str[i++]; + } + islinenum = i; + if (i) { + lnum[j] = '\0'; + ln = strtol(lnum, NULL, 10); + j = 0; + l = find_line(ln, dbg); + } else { + ln = linenum; + l = NULL; + } + if (l) { + address = l->addr; + } else { + l = malloc(sizeof(line)); + (last_line) ? (last_line->next = l) : (lines = l); + l->tok = NULL; + l->next = NULL; + l->count = 0; + l->bline = bline; + last_line = l; + + } + l->addr = address; + while (isdelm(str[i], dbg) != 1) { + uint8_t offset = 0; + base = 0; + space = 0; + tab = 0; + while (isdelm(str[i+j], dbg) == 16) { + tab += str[i+j] == '\t'; + space += str[i+j] == ' '; + j++; + } + j = 0; + if (dbg) { + printf("lex(): tab: %u, space: %u\n", tab, space); + } + if (isdelm(str[i], dbg) == 16) { + for (; isdelm(str[i], dbg) == 16; i++); + } + uint8_t ptok = get_ptok(str[i], dbg); + if (is_altok(ptok, dbg)) { + offset++; + if (((ptok == PTOK_S || ptok == PTOK_B) && toupper(str[i+1]) == 'P') || (ptok == PTOK_P && toupper(str[i+1]) == 'C')) { + offset++; + } + switch (get_ptok(str[i+offset], dbg)) { + case PTOK_B : + case PTOK_E : + case PTOK_X : + case PTOK_Y : + case PTOK_S : + case PTOK_P : + case PTOK_A : + case PTOK_C : + case PTOK_D : + case PTOK_F : + case PTOK_R : + case PTOK_ALPHA : ptok = PTOK_ALPHA; break; + case PTOK_NUMBER: + if (ptok == PTOK_R) { + char reg_num[3]; + for (int isnum = 0; isdigit(str[i+offset]) && !(isdelm(str[i+offset], dbg) & 0x03) && isnum < 2; offset++, isnum++) { + reg_num[isnum] = str[i+offset]; + } + reg_num[isnum] = '\0'; + if (isnum == 2) { + int regnum = strtoul(reg_num, NULL, 10); + ptok = (regnum < 11 || regnum > 15) ? PTOK_ALPHA : ptok; + } else { + ptok = PTOK_ALPHA; + } + } else { + ptok = PTOK_ALPHA; + } + break; + } + if ((ptok == PTOK_S && str[i+1] && toupper(str[i+1]) != 'P') || (ptok == PTOK_P && toupper(str[i+1]) != 'C')) { + ptok = PTOK_ALPHA; + } + } + /*i = ptok_handler[ptok](str, i, lex_type, l, t, dbg);*/ + switch (ptok) { + case PTOK_DOT: + i++; + for (; !(isdelm(str[i+j], dbg) & 17); j++); + memcpy(lexeme, str+i, j); + lexeme[j] = '\0'; + i += j; + if (!isop) { + for (k = 0; k < 11; k++) { + if (tolower(lexeme[0]) == dir_t[k][0] && !strcasecmp(lexeme, dir_t[k])) { + lex_type = TOK_DIR; + uint16_t tmp = j; + for (j = 0; isdelm(str[i+j], dbg) & 16; j++); + uint8_t ret = get_ptok(str[i+j], dbg); + j = tmp; + + if ((k == DIR_STRUCT || k == DIR_UNION) && (ret != PTOK_ALPHA || (is_anon && ret == PTOK_ALPHA))) { + is_anon++; + } + is_struct += (k == DIR_STRUCT || k == DIR_UNION); + is_struct -= (k == DIR_ENDSTRUCT || k == DIR_ENDUNION); + if ((k == DIR_ENDSTRUCT || k == DIR_ENDUNION)) { + end_struct(cur_sym, struct_sym, dbg); + } + break; + } + } + if (lex_type != TOK_DIR && lt && lt->id == TOK_SYM) { + lex_type = TOK_MEMBER; + i -= j; + } else { + l->count++; + t = make_token(lex_type, k, space, tab, 0, "", NULL); + } + } else { + lex_type = TOK_RS; + switch (tolower(lexeme[j-1])) { + case '2': + case 'w': + rs = 1; + break; + case '4': + case 'd': + rs = 2; + break; + case '8': + case 'q': + rs = 3; + break; + } + l->count++; + t = make_token(lex_type, rs, space, tab, 0, "", NULL); + isop = 0; + } + break; + case PTOK_DQUOTE: + i++; + for (; isdelm(str[i+j], dbg) != 4 || isesc; j++) { + isesc = (str[i+j] == '\\' && str[i+(j-1)] != '\\'); + } + memcpy(lexeme, str+i, j); + lexeme[j] = '\0'; + i += j; + strid = get_string(lexeme, dbg); + if (strid == 0xFFFF) { + strid = stridx; + string[strid] = malloc(j+1); + memcpy(string[strid], lexeme, j+1); + stridx++; + } else { + } + if (dbg) { + printf("lex(): str[0x%04X]: %s\n", strid, string[strid]); + } + if (lt->id == TOK_DIR && lt->type == DIR_INCLUDE) { + incl[inc_count+inc_file] = strid; + inc_file++; + } + lex_type = TOK_STRING; + l->count++; + t = make_token(lex_type, 0, space, tab, 0, string[strid], NULL); + break; + case PTOK_DOLLAR: + case PTOK_PERCENT: + case PTOK_NUMBER: + value = 0; + switch (ptok) { + case PTOK_DOLLAR : base = 16; lex_type = TOK_HEX; i++; break; + case PTOK_PERCENT: base = 2; lex_type = TOK_BIN; i++; break; + case PTOK_NUMBER : base = 10; lex_type = TOK_DEC; /**/ break; + } + for (; isxdigit(str[i+j]) && !(isdelm(str[i+j], dbg) & 0x03); j++); + memcpy(lexeme, str+i, j); + lexeme[j] = '\0'; + i += j; + value = strtoull(lexeme, NULL, base); + if (lt->id == TOK_SYM) { + tsym = mksymbol(sym, value, 1, islocal, 0, 0, dbg); + if (lt) { + lt->sym = get_sym(sym, address, lt, islocal, dbg); + } + if (!islocal) { + cur_sym = last_sym; + } + tsym = NULL; + islocal = 0; + isfixup += (lt->sym == NULL); + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + } + l->count++; + t = make_token(lex_type, 0, space, tab, value, "", NULL); + t->digits = (lt->id != TOK_SYM) ? j : 0; + break; + case PTOK_SQUOTE: + i++; + k = 0; + j = 0; + while (isdelm(str[i], dbg) != 8 || isesc) { + isesc = (str[i] == '\\' && str[i-1] != '\\'); + lexeme[j++] = str[i++]; + } + isesc = 0; + lexeme[j] = '\0'; + for (j = 0; lexeme[k] != '\0' && j < 7; k++) { + switch (lexeme[k]) { + case '\\': + switch (lexeme[++k]) { + case 'n' : ch.u8[j++] = '\n'; break; + case 'r' : ch.u8[j++] = '\r'; break; + case 't' : ch.u8[j++] = '\t'; break; + case 'b' : ch.u8[j++] = '\b'; break; + case '\'': ch.u8[j++] = '\''; break; + case '\"': ch.u8[j++] = '\"'; break; + case '\\': ch.u8[j++] = '\\'; break; + } + break; + default: ch.u8[j++] = lexeme[k]; + } + } + lex_type = TOK_CHAR; + l->count++; + t = make_token(lex_type, 0, space, tab, ch.u64, "", NULL); + break; + case PTOK_LBRACK: + case PTOK_HASH : + /*l->tok->type = (ptok == PTOK_LBRACK) ? IND : IMM; + lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM;*/ + lex_type = TOK_MEM; + value = (ptok == PTOK_LBRACK) ? MEM_IND : MEM_IMM; + l->count++; + t = make_token(lex_type, value, space, tab, 0, "", NULL); + lex_type = (ptok == PTOK_LBRACK) ? TOK_IND : TOK_IMM; + memset(lexeme, 0, strlen(lexeme)+1); + lexeme[j++] = str[i]; + /*(t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab);*/ + break; + case PTOK_PLUS: + case PTOK_MINUS: + case PTOK_GT: + case PTOK_LT: + case PTOK_PIPE: + lex_type = TOK_EXPR; + switch (ptok) { + case PTOK_PLUS : value = EXPR_PLUS ; break; + case PTOK_MINUS: value = EXPR_MINUS; break; + case PTOK_PIPE : value = EXPR_OR ; break; + case PTOK_GT : value = (get_ptok(str[i+1], dbg) == PTOK_GT) ? (EXPR_RSHFT) : (EXPR_LOW) ; break; + case PTOK_LT : value = (get_ptok(str[i+1], dbg) == PTOK_LT) ? (EXPR_LSHFT) : (EXPR_HIGH); break; + } + l->count++; + t = make_token(lex_type, value, space, tab, 0, "", NULL); + memset(lexeme, 0, strlen(lexeme)+1); + lexeme[j++] = str[i]; + if (value == EXPR_LSHFT || value == EXPR_RSHFT) { + lexeme[j++] = str[++i]; + } + break; + case PTOK_EQU: + i++; + lex_type = TOK_SYM; + memset(lexeme, 0, strlen(lexeme)+1); + lexeme[j] = str[i]; + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); + break; + case PTOK_RBRACK: + i++; + lex_type = TOK_IND; + lexeme[j] = ')'; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); + break; + case PTOK_COMMA: + i++; + if (lex_type != TOK_IND && lex_type != TOK_OF) { + lex_type = TOK_CSV; + } + lexeme[j] = ','; + lexeme[j+1] = '\0'; + lexeme[j+2] = '\0'; + break; + case PTOK_B: + case PTOK_E: + case PTOK_X: + case PTOK_Y: + case PTOK_S: + case PTOK_A: + case PTOK_C: + case PTOK_D: + case PTOK_F: + case PTOK_R: + lexeme[j+0] = str[i++]; + lexeme[j+1] = (ptok == PTOK_R || ((ptok == PTOK_S || ptok == PTOK_B) && get_ptok(str[i], dbg) == PTOK_P)) ? str[i++] : '\0'; + lexeme[j+2] = (ptok == PTOK_R) ? str[i++] : '\0'; + lexeme[j+3] = '\0'; + lex_type = TOK_REG; + switch (ptok) { + case PTOK_A: value = REG_A; break; + case PTOK_X: value = REG_X; break; + case PTOK_Y: value = REG_Y; break; + case PTOK_E: value = REG_E; break; + case PTOK_C: value = REG_C; break; + case PTOK_D: value = REG_D; break; + case PTOK_S: + case PTOK_B: + if (get_ptok(lexeme[j+1], dbg) == PTOK_P) { + value = (ptok == PTOK_S) ? REG_SP : REG_BP; + } else { + value = (ptok == PTOK_S) ? REG_S : REG_B; + } + break; + case PTOK_F: value = REG_F; break; + case PTOK_R: value = strtoull(lexeme+j+1, NULL, 10); break; + } + l->count++; + t = make_token(lex_type, value, space, tab, 0, "", NULL); + /*(t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab);*/ + break; + case PTOK_P: + lexeme[j] = str[i++]; + lexeme[j+1] = (str[i] != ',') ? str[i++] : '\0'; + lexeme[j+2] = '\0'; + /*switch (ptok) { + case PTOK_S: of = 1; break; + case PTOK_P: of = 2; break; + }*/ + of = 2; + lex_type = TOK_OF; + l->count++; + t = make_token(lex_type, of, space, tab, 0, "", NULL); + break; + case PTOK_AT: + memset(lexeme, 0, strlen(lexeme)+1); + lexeme[j] = '@'; + islocal = 1; + lex_type = TOK_LOCAL; + if (lt || t) { + (t) ? (t->subspace = space) : (lt->subspace = space); + (t) ? (t->subtab = tab) : (lt->subtab = tab); + } + break; + case PTOK_COLON: + i++; + lexeme[j] = ':'; + lexeme[j+1] = '\0'; + lex_type = TOK_LABEL; + tsym = mksymbol(sym, address, 1, islocal, 0, 0, dbg); + if (isfixup) { + isfixup = reslv_fixups(dbg); + } + if (lt) { + lt->id = lex_type; + lt->type = islocal; + lt->sym = get_sym(sym, address, t, islocal, dbg); + isfixup += (lt->sym == NULL); + } + if (!islocal) { + cur_sym = last_sym; + locals = NULL; + last_loc = NULL; + } else if (cur_sym->down == NULL && cur_sym == last_sym) { + cur_sym->down = locals; + cur_sym->down->up = cur_sym; + } + tsym = NULL; + islocal = 0; + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + break; + case PTOK_SCOLON: + i++; + for (; isdelm(str[i+j], dbg) != 1; j++); + if (!j) { + lexeme[j] = ' '; + lexeme[j+1] = '\0'; + + } else { + memcpy(lexeme, str+i, j); + lexeme[j] = '\0'; + i += j; + comid = get_comment(lexeme, dbg); + /*is_newcom = (comid == 0xFFFF); + if (comid == 0xFFFF) { + if (line != lineidx && l[line].com != 0xFFFF) { + comid = l[line].com; + } else { + comid = comidx; + } + comid = comidx; + comment[comid] = malloc(j+1); + memcpy(comment[comid], lexeme, j+1); + comidx++; + }*/ + if (dbg) { + printf("lex(): com[0x%04X]: %s\n", comid, comment[comid]); + } + } + lex_type = TOK_COMMENT; + l->count++; + if (j) { + t = make_token(lex_type, 0, space, tab, 0, comment[comid], NULL); + } else { + t = make_token(lex_type, 0, space, tab, 0, "" , NULL); + } + + break; + case PTOK_ALPHA: + for (; !isdelm2(str[i+j], dbg); j++); + memcpy(lexeme, str+i, j); + lexeme[j] = '\0'; + i += j; + isch = 0; + isop = 0; + if (j == 3 && str[i] != ':' && !is_struct) { + for (k = 0; k < OPNUM; k++) { + int find_ext = (k < EXT_OPNUM); + int find_ortho = (k < ORTHO_OPNUM); + int upper = toupper(lexeme[0]); + int isbase = (upper == mne[k][0]); + int isext = (find_ext && upper == ext_mne[k][0]); + int isortho = (find_ortho && upper == ortho_mne[k][0]); + + if (isbase || isext || isortho) { + int is_base = !strcasecmp(lexeme, mne[k]); + int is_ext = (find_ext && !strcasecmp(lexeme, ext_mne[k])); + int is_ortho = (find_ortho && !strcasecmp(lexeme, ortho_mne[k])); + if (is_base || is_ext || is_ortho) { + lex_type = (is_base) ? TOK_OPCODE : lex_type; + lex_type = (is_ext) ? TOK_EXTOP : lex_type; + lex_type = (is_ortho) ? TOK_ORTHO : lex_type; + isop = 1; + l->count++; + t = make_token(lex_type, 0xFF, space, tab, k, "", NULL); + break; + } + } + } + } + if (!isop) { + uint8_t spaces = 0; + if (l->tok->type == TOK_ORTHO && l->tok->byte == SET) { + for (k = 0; k < 8; k++) { + int upper = toupper(lexeme[0]); + if (upper == set_cc[k][0]) { + if (!strcasecmp(lexeme, set_cc[k])) { + lex_type = TOK_CC; + l->count++; + t = make_token(lex_type, 0xFF, space, tab, k, "", NULL); + } + } + } + } + for (; isdelm(str[i+spaces], dbg) == 16; spaces++); + uint8_t ret = get_ptok(str[i+spaces], dbg); + if (ret == PTOK_COLON || ret == PTOK_EQU) { + islocal = (lex_type == TOK_LOCAL); + } + lex_type = TOK_SYM; + l->count++; + t = make_token(lex_type, islocal, space, tab, 0, "", NULL); + memcpy(sym, lexeme, j+1); + if (dbg) { + printf("lex(): spaces: %u\n", spaces); + } + if (is_struct) { + create_struct(cur_sym, l, t, lt, sym, dbg); + islocal = 0; + } else if ((str[i+spaces] != ':' && str[i+spaces] != '=')) { + uint8_t sym_struct = 0; + symbol *s; + /*tmp_sym = (s && s->isstruct) ? NULL : tmp_sym;*/ + if (tmp_sym) { + t->sym = find_member(lexeme, tmp_sym, dbg); + tmp_sym = NULL; + } else { + t->sym = get_sym(lexeme, address, t, islocal, dbg); + } + isfixup += (t && t->sym == NULL); + islocal = 0; + if (dbg) { + printf("lex(): isfixup: %u\n", isfixup); + } + } + if (!is_struct && t && t->sym && t->sym->isstruct) { + tmp_sym = t->sym; + } + } + break; + } + if (!l->tok && t) { + l->tok = tokens; + } + if (dbg) { + printf("lex(): lexeme: %s, lex_type: %s\n", lexeme, (lex_type != 0xFF) ? lex_tok[lex_type] : "TOK_NONE"); + } + j = 0; + if ((lex_type == TOK_OPCODE || lex_type == TOK_EXTOP) && !isop) { + j = 0; + } else if (lex_type == TOK_EXPR || (lex_type != TOK_MEMBER && !isdelm2(str[i], dbg))) { + i++; + } + switch (lex_type) { + default: + lex_type = 0xFF; + case TOK_CSV: + case TOK_IND: + case TOK_LOCAL: + memset(lexeme, 0, strlen(lexeme)+1); + case TOK_SYM: + break; + } + if (t) { + lt = t; + t = t->next; + } + } + if (i) { + l->tok = tokens; + token *tok = tokens; + if (tok->id == TOK_SYM && tok->next) { + symbol *s = tok->sym; + for (; tok; tok = tok->next) { + switch (tok->id) { + case TOK_HEX : + case TOK_BIN : + case TOK_DEC : + case TOK_CHAR: + case TOK_EXPR: + s->val = get_val(tok, address, 3, dbg); + if (tok->next) { + tok = skip_expr(tok, dbg); + } + break; + } + } + } + tokens = NULL; + last_tok = NULL; + bytecount dummy; + if (!is_struct) { + l = (tmp_line) ? tmp_line : l; + address = parse_tokens(l->tok, &l, &dummy, 0, address, dbg); + if (tmp_line) { + tmp_line = NULL; + } + } + if (dbg) { + printf("lex(): Next address: $%"PRIX64"\n", address); + } + if (ln > linenum || islinenum) { + l->linenum = ln; + if (ln > linenum) { + linenum+=(10+(ln & 10)); + } + } else if (!islinenum) { + l->linenum = linenum; + linenum += 10; + } + } + return address; +} diff --git a/lexer/backup/lexer.h b/lexer/backup/lexer.h new file mode 100644 index 0000000..2595158 --- /dev/null +++ b/lexer/backup/lexer.h @@ -0,0 +1,228 @@ +static uint8_t isdelm(char c, uint8_t dbg) { + switch (c) { + default : return 0x00; + case '\0': + case '\n': return 0x01; + case ',' : return 0x02; + case '\"': return 0x04; + case '\'': return 0x08; + case '\t': + case ' ' : return 0x10; + } +} + +static uint8_t isdelm2(char c, uint8_t dbg) { + switch (c) { + default : return 0; + case ')' : + case ',' : + case '.' : + case '+' : + case '<' : + case '|' : + case '>' : + case '-' : + case ':' : + case '=' : + case ';' : + case '\0': + case '\n': return 1; + case '\t': + case ' ' : return 2; + } +} + +static uint8_t get_ptok(char c, uint8_t dbg) { + switch (c) { + case '.' : return PTOK_DOT ; + case '@' : return PTOK_AT ; + case ':' : return PTOK_COLON ; + case '=' : return PTOK_EQU ; + case '+' : return PTOK_PLUS ; + case '-' : return PTOK_MINUS ; + case '>' : return PTOK_GT ; + case '<' : return PTOK_LT ; + case '|' : return PTOK_PIPE ; + case '(' : return PTOK_LBRACK ; + case ')' : return PTOK_RBRACK ; + case ',' : return PTOK_COMMA ; + case 'B': case 'b' : return PTOK_B ; + case 'E': case 'e' : return PTOK_E ; + case 'X': case 'x' : return PTOK_X ; + case 'Y': case 'y' : return PTOK_Y ; + case 'S': case 's' : return PTOK_S ; + case 'P': case 'p' : return PTOK_P ; + case 'A': case 'a' : return PTOK_A ; + case 'C': case 'c' : return PTOK_C ; + case 'D': case 'd' : return PTOK_D ; + case 'F': case 'f' : return PTOK_F ; + case 'R': case 'r' : return PTOK_R ; + case '\"': return PTOK_DQUOTE ; + case '\'': return PTOK_SQUOTE ; + case '#' : return PTOK_HASH ; + case ';' : return PTOK_SCOLON ; + case '$' : return PTOK_DOLLAR ; + case '%' : return PTOK_PERCENT; + default : + if (isdigit(c)) { + return PTOK_NUMBER; + } else if (isalpha(c) || c == '_') { + return PTOK_ALPHA; + } else { + return PTOK_OTHER; + } + } +} + +static uint8_t is_altok(uint8_t ptok, uint8_t dbg) { + switch (ptok) { + case PTOK_B: + case PTOK_E: + case PTOK_X: + case PTOK_Y: + case PTOK_S: + case PTOK_P: return 1; + default : return 0; + } +} + +#if 0 +static int handle_dot(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_at(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_colon(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_equ(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_plus(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_minus(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_gt(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_lt(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_pipe(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_lbrack(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_rbrack(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_comma(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_b(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_e(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_x(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_y(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_s(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_p(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_dquote(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_squote(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_hash(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_scolon(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_dollar(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_percent(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} +static int handle_number(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_alpha(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_other(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + return idx+1; +} + +typedef int (*ptok_func)(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg); + +static ptok_func ptok_handler[PTOK_OTHER+1] = { + [PTOK_DOT ] = handle_dot, + [PTOK_AT ] = handle_at, + [PTOK_COLON ] = handle_colon, + [PTOK_EQU ] = handle_equ, + [PTOK_PLUS ] = handle_plus, + [PTOK_MINUS ] = handle_minus, + [PTOK_GT ] = handle_gt, + [PTOK_LT ] = handle_lt, + [PTOK_PIPE ] = handle_pipe, + [PTOK_LBRACK ] = handle_lbrack, + [PTOK_RBRACK ] = handle_rbrack, + [PTOK_COMMA ] = handle_comma, + [PTOK_B ] = handle_b, + [PTOK_E ] = handle_e, + [PTOK_X ] = handle_x, + [PTOK_Y ] = handle_y, + [PTOK_S ] = handle_s, + [PTOK_P ] = handle_p, + [PTOK_DQUOTE ] = handle_dquote, + [PTOK_SQUOTE ] = handle_squote, + [PTOK_HASH ] = handle_hash, + [PTOK_SCOLON ] = handle_scolon, + [PTOK_DOLLAR ] = handle_dollar, + [PTOK_PERCENT] = handle_percent, + [PTOK_NUMBER ] = handle_number, + [PTOK_ALPHA ] = handle_alpha, + [PTOK_OTHER ] = handle_other +}; +#endif diff --git a/lexer/cpu/sux/cpu.c b/lexer/cpu/sux/cpu.c new file mode 100644 index 0000000..cd6d364 --- /dev/null +++ b/lexer/cpu/sux/cpu.c @@ -0,0 +1,30 @@ +#include <ctype.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +static char current_ext; + +int is_ext(char c) { + return (c && c != '.' && !isspace(c)); +} + +void cpu_error(int code) { + +} + +char *lex_inst(char *s, int *inst_len, char **ext, int *ext_len, int *ext_count) { + char *inst = s; + int count = *ext_count; + for (; is_ext(*s); s++); + *inst_len = s - inst; + for (; *s++ == '.' && count < MAX_QUALIFIERS;) { + ext[count] = s; + for (; is_ext(*s); s++); + ext_len[count] = s - ext[count]; + (ext_len[count] <= 0) ? (cpu_error(34)) : (count++); + } + *ext_count = count; + current_ext = (count > 0) ? tolower(ext[0][0]) : '\0'; + return s; +} diff --git a/lexer/enums.h b/lexer/enums.h new file mode 100644 index 0000000..07338ee --- /dev/null +++ b/lexer/enums.h @@ -0,0 +1,540 @@ +enum am { + /* Part of Base ISA. */ + IMM, /* Immediate Data. */ + ZM, /* Zero Matrix. */ + ZMX, /* Zero Matrix, indexed with X. */ + ZMY, /* Zero Matrix, indexed with Y. */ + IND, /* Indirect. */ + INDX, /* Indexed Indirect. */ + INDY, /* Indirect Indexed. */ + ABS, /* Absolute. */ + REL, /* Relative to Program Counter. */ + BREG, /* B Register. */ + IMPL, /* Implied. */ + /* Part of Base Extension. */ + ABSX, /* Absolute, Indexed with X. */ + ABSY, /* Absolute, Indexed with Y. */ + AIND, /* Absolute Indirect. */ + AINDX, /* Absolute Indexed Indirect. */ + AINDY, /* Absolute Indirect Indexed. */ + EIND, /* Effective Address Register, Indirect. */ +}; + +/* Part of the Orthogonal Extension. */ +enum ortho_reg { + REG_A, + REG_B, + REG_X, + REG_Y, + REG_E, + REG_C, + REG_D, + REG_S, + REG_F, + REG_SP, + REG_BP, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15, +}; + +enum ortho_mem { + MEM_ABS, /* Absolute. */ + MEM_ZM, /* Zero Matrix. */ + MEM_ABSR, /* Absolute, Indexed with register. */ + MEM_ZMR, /* Zero Matrix, Indexed with register. */ + MEM_ZINDR, /* Zero Matrix, Indirect Indexed Register. */ + MEM_ZRIND, /* Zero Matrix, Indexed Indirect Register. */ + MEM_AINDR, /* Absolute, Indirect Indexed Register. */ + MEM_ARIND, /* Absolute, Indexed Indirect Register. */ + MEM_RIND, /* Register Indirect. */ + MEM_SIB, /* Scale Index Base. */ +}; + +enum mne { + ADC, + AND, + ASR, + BCC, + BCS, + BEQ, + BNE, + BNG, + BPO, + BRA, + BRK, + BVC, + BVS, + CLC, + CLI, + CLV, + CMP, + CPB, + CPS, + CPX, + CPY, + DEB, + DEC, + DEX, + DEY, + DIV, + INB, + INC, + INX, + INY, + JMP, + JSR, + LDA, + LDB, + LDX, + LDY, + LSL, + LSR, + MUL, + NOP, + ORA, + PHA, + PHB, + PHP, + PHX, + PHY, + PLA, + PLB, + PLP, + PLX, + PLY, + ROL, + ROR, + RTI, + RTS, + SBC, + SEC, + SEI, + STA, + STB, + STX, + STY, + TAB, + TAX, + TAY, + TBA, + TSX, + TXA, + TXS, + TXY, + TYA, + TYX, + WAI, + XOR +}; + +enum ext_mne { + LEA, + PEA, + ADD, + SUB, + ADE, + SBE, + ADS, + SBS, + NOT, + LLM, + LRM, + RLM, + RRM, + ARM, + PHE, + PLE, + CPE, + ICE, + LDS, + DEE, + INE, + DES, + INS, + STS, + STE, + STZ, + SCO, + ECO, + CLZ, + CLO, + BIT, + MMV, + SWP, + PCN, + REP, + REQ, + RNE, + LNG, + LPO, + LCS, + LCC, + LEQ, + LNE, + SNG, + SPO, + SCS, + SCC, + SEQ, + SNE +}; + +enum ortho_mne { + MNG, + MPO, + MCS, + MCC, + MEQ, + MNE, + MVS, + MVC, + OR , + MOV, + IML, + IDV, + PSH, + PUL, + NEG, + SET +}; + +enum base_isa { + CPS_IMP = 0x00, /* Clear Processor Status. */ + ADC_IMM = 0x01, /* ADd with Carry. */ + ROR_IMM = 0x02, /* ROtate Right. */ + CPB_IMM = 0x04, /* ComPare B register. */ + ADC_Z = 0x05, /* ADC Zero Matrix. */ + ROR_Z = 0x06, /* ROR Zero Matrix. */ + CPB_Z = 0x08, /* CPB Zero Matrix. */ + CLC_IMP = 0x09, /* CLear Carry flag. */ + TAB_IMP = 0x0A, /* Transfer Accumulator to B. */ + STY_Z = 0x0C, /* STore Y register. */ + JMP_AB = 0x10, /* JMP Absolute. */ + ADC_AB = 0x11, /* ADC Absolute. */ + ROR_AB = 0x12, /* ROR Absolute. */ + CPB_AB = 0x14, /* CPB Absolute. */ + ADC_B = 0x15, /* ADC B Register. */ + ROR_B = 0x16, /* ROR B Register. */ + STY_AB = 0x18, /* STY Absolute. */ + SEC_IMP = 0x19, /* SEt Carry flag. */ + TBA_IMP = 0x1A, /* Transfer B to Accumulator. */ + JMP_Z = 0x20, /* JuMP to memory location. */ + SBC_IMM = 0x21, /* SuBtract with Carry. */ + MUL_IMM = 0x22, /* MULtiply accumulator. */ + CPX_IMM = 0x24, /* ComPare X register. */ + SBC_Z = 0x25, /* SBC Zero Matrix. */ + MUL_Z = 0x26, /* MUL Zero Matrix. */ + CPX_Z = 0x28, /* CPX Zero Matrix. */ + CLI_IMP = 0x29, /* CLear Interupt flag. */ + TAY_IMP = 0x2A, /* Transfer Accumulator to Y. */ + STA_Z = 0x2C, /* STore Accumulator. */ + STA_ZX = 0x2E, /* STA Zero Marrix, Indexed with X. */ + JSR_AB = 0x30, /* JSR Absolute. */ + SBC_AB = 0x31, /* SBC Absolute. */ + MUL_AB = 0x32, /* MUL Absolute. */ + CPX_AB = 0x34, /* CPX Absolute. */ + SBC_B = 0x35, /* SBC B Register. */ + MUL_B = 0x36, /* MUL B Register. */ + STA_AB = 0x38, /* STA Absolute. */ + SEI_IMP = 0x39, /* SEt Interupt flag. */ + TYA_IMP = 0x3A, /* Transfer Y to Accumulator. */ + STA_ZY = 0x3C, /* STA Zero Marrix, Indexed with Y. */ + STA_IX = 0x3E, /* STA Indexed Indirect. */ + JSR_Z = 0x40, /* Jump to SubRoutine. */ + AND_IMM = 0x41, /* bitwise AND with accumulator. */ + DIV_IMM = 0x42, /* DIVide with accumulator. */ + CPY_IMM = 0x44, /* ComPare Y register. */ + AND_Z = 0x45, /* AND Zero Matrix. */ + DIV_Z = 0x46, /* DIV Zero Matrix. */ + CPY_Z = 0x48, /* CPY Zero Matrix. */ + CLV_IMP = 0x49, /* CLear oVerflow flag. */ + TAX_IMP = 0x4A, /* Transfer Accumulator to X. */ + STB_Z = 0x4C, /* STore B register. */ + STB_ZX = 0x4E, /* STB Zero Marrix, Indexed with X. */ + RTS_IMP = 0x50, /* ReTurn from Subroutine. */ + AND_AB = 0x51, /* AND Absolute. */ + DIV_AB = 0x52, /* DIV Absolute. */ + CPY_AB = 0x54, /* CPY Absolute. */ + AND_B = 0x55, /* AND B Register. */ + DIV_B = 0x56, /* DIV B Register. */ + STB_AB = 0x58, /* STB Absolute. */ + WAI_IMP = 0x59, /* WAit for Interrupt. */ + TXA_IMP = 0x5A, /* Transfer X to Accumulator. */ + STB_ZY = 0x5C, /* STB Zero Marrix, Indexed with Y. */ + STB_IX = 0x5E, /* STB Indexed Indirect. */ + RTI_IMP = 0x60, /* ReTurn from Interrupt. */ + ORA_IMM = 0x61, /* bitwise OR with Accumulator. */ + ASR_IMM = 0x62, /* Arithmetic Shift Right. */ + LDX_IMM = 0x64, /* LoaD X register. */ + ORA_Z = 0x65, /* ORA Zero Matrix. */ + ASR_Z = 0x66, /* ASR Zero Matrix. */ + LDX_Z = 0x68, /* LDX Zero Matrix. */ + BRK_IMP = 0x69, /* BReaK. */ + TYX_IMP = 0x6A, /* Transfer Y to X. */ + STX_Z = 0x6C, /* STore X register. */ + PHP_IMP = 0x6E, /* PusH Processor status to stack. */ + BPO_REL = 0x70, /* Branch if POsitive. */ + ORA_AB = 0x71, /* ORA Absolute. */ + ASR_AB = 0x72, /* ASR Absolute. */ + LDX_AB = 0x74, /* LDX Absolute. */ + ORA_B = 0x75, /* ORA B Register. */ + ASR_B = 0x76, /* ASR B Register. */ + STX_AB = 0x78, /* STX Absolute. */ + DEY_IMP = 0x79, /* DEcrement Y register. */ + TXY_IMP = 0x7A, /* Transfer X to Y. */ + CPB_IN = 0x7C, /* CPB Indirect */ + PLP_IMP = 0x7E, /* PuLl Processor status from stack. */ + BNG_REL = 0x80, /* Branch if NeGative. */ + XOR_IMM = 0x81, /* bitwise XOR with accumulator. */ + CMP_IMM = 0x82, /* CoMPare accumulator. */ + DEC_IMP = 0x84, /* DECrement accumulator. */ + XOR_Z = 0x85, /* XOR Zero Matrix. */ + CMP_Z = 0x86, /* CMP Zero Matrix. */ + DEC_Z = 0x88, /* DEC Zero Matrix. */ + INY_IMP = 0x89, /* INcrement Y register. */ + TSX_IMP = 0x8A, /* Transfer Stack pointer to X. */ + CMP_IN = 0x8C, /* CMP Indirect */ + PHA_IMP = 0x8E, /* PusH Accumulator to stack. */ + BCS_REL = 0x90, /* Branch if Carry Set. */ + XOR_AB = 0x91, /* XOR Absolute. */ + CMP_AB = 0x92, /* CMP Absolute. */ + DEC_AB = 0x94, /* DEC Absolute. */ + XOR_B = 0x95, /* XOR B Register. */ + CMP_B = 0x96, /* CMP B Register. */ + DEB_IMP = 0x99, /* Decrement B register. */ + TXS_IMP = 0x9A, /* Transfer X to Stack pointer. */ + STY_IN = 0x9C, /* STY Indirect */ + PLA_IMP = 0x9E, /* PuLl Accumulator from stack. */ + BCC_REL = 0xA0, /* Branch if Carry Clear. */ + LSL_IMM = 0xA1, /* Logical Shift Left. */ + LDY_IMM = 0xA2, /* LoaD Y register. */ + INC_IMP = 0xA4, /* INCrement accumulator. */ + LSL_Z = 0xA5, /* LSL Zero Matrix. */ + LDY_Z = 0xA6, /* LDY Zero Matrix. */ + INC_Z = 0xA8, /* INC Zero Matrix. */ + INB_IMP = 0xA9, /* Increment B register. */ + CMP_IX = 0xAA, /* CMP Indexed Indirect. */ + LDY_IN = 0xAC, /* LDY Indirect */ + PHB_IMP = 0xAE, /* PusH B register to stack. */ + BEQ_REL = 0xB0, /* Branch if EQual. */ + LSL_AB = 0xB1, /* LSL Absolute. */ + LDY_AB = 0xB2, /* LDY Absolute. */ + INC_AB = 0xB4, /* INC Absolute. */ + LSL_B = 0xB5, /* LSL B Register. */ + DEX_IMP = 0xB9, /* DEcrement X register. */ + CPB_IX = 0xBA, /* CPB Indexed Indirect. */ + LDX_IN = 0xBC, /* LDX Indirect */ + PLB_IMP = 0xBE, /* PuLl B register to stack. */ + BNE_REL = 0xC0, /* Branch if Not Equal. */ + LSR_IMM = 0xC1, /* Logical Shift Right. */ + LDA_IMM = 0xC2, /* LoaD Accumulator. */ + LDA_IN = 0xC4, /* LDA Indirect */ + LSR_Z = 0xC5, /* LSR Zero Matrix. */ + LDA_Z = 0xC6, /* LDA Zero Matrix. */ + LDA_ZX = 0xC8, /* LDA Zero Marrix, Indexed with X. */ + INX_IMP = 0xC9, /* INcrement X register. */ + STA_IY = 0xCA, /* STA Indirect Indexed. */ + STX_IN = 0xCC, /* STX Indirect */ + PHY_IMP = 0xCE, /* PusH Y register to stack. */ + BVS_REL = 0xD0, /* Branch if oVerflow Set. */ + LSR_AB = 0xD1, /* LSR Absolute. */ + LDA_AB = 0xD2, /* LDA Absolute. */ + STA_IN = 0xD4, /* STA Indirect */ + LSR_B = 0xD5, /* LSR B Register. */ + LDA_ZY = 0xD6, /* LDA Zero Marrix, Indexed with Y. */ + LDA_IX = 0xD8, /* LDA Indexed Indirect. */ + LDA_IY = 0xD9, /* LDA Indirect Indexed. */ + STB_IY = 0xDA, /* STB Indirect Indexed. */ + JSR_IN = 0xDC, /* JSR Indirect */ + PLY_IMP = 0xDE, /* PuLl Y register from stack. */ + BVC_REL = 0xE0, /* Branch if oVerflow Clear. */ + ROL_IMM = 0xE1, /* ROtate Left. */ + LDB_IMM = 0xE2, /* LoaD B register. */ + LDB_IN = 0xE4, /* LDB Indirect */ + ROL_Z = 0xE5, /* ROL Zero Matrix. */ + LDB_Z = 0xE6, /* LDB Zero Matrix. */ + LDB_ZX = 0xE8, /* LDB Zero Marrix, Indexed with X. */ + LDB_IY = 0xE9, /* LDB Indirect Indexed. */ + NOP_IMP = 0xEA, /* No OPeration. */ + JMP_IN = 0xEC, /* JMP Indirect */ + PHX_IMP = 0xEE, /* PusH X register to stack. */ + BRA_REL = 0xF0, /* BRanch Always. */ + ROL_AB = 0xF1, /* ROL Absolute. */ + LDB_AB = 0xF2, /* LDB Absolute. */ + STB_IN = 0xF4, /* STB Indirect */ + ROL_B = 0xF5, /* ROL B Register. */ + LDB_ZY = 0xF6, /* LDB Zero Marrix, Indexed with Y. */ + LDB_IX = 0xF8, /* LDB Indexed Indirect. */ + CMP_IY = 0xF9, /* CMP Indirect Indexed. */ + CPB_IY = 0xFA, /* CPB Indirect Indexed. */ + PLX_IMP = 0xFE /* PuLl X register from stack. */ +}; + +enum base_ext { + LEA_AY = 0x03, /* LEA Absolute, indexed with Y. */ + ADD_IMM = 0x06, /* ADD without carry. */ + LEA_Z = 0x07, /* Load Effective Address. */ + CPE_IMM = 0x08, /* ComPare Effective address register. */ + CLZ_Z = 0x09, /* Count Leading Zeros. */ + ADD_Z = 0x0A, /* ADD Zero Matrix. */ + STB_E = 0x0B, /* STB E Indirect. */ + CPE_Z = 0x0C, /* CPE Zero Matrix. */ + LNG_IMM = 0x0D, /* Load accumulator, if NeGative. */ + LNG_E = 0x0E, /* LNG E Indirect. */ + JMP_E = 0x10, /* JMP E Indirect. */ + ADC_E = 0x11, /* ADC E Indirect. */ + ROR_E = 0x12, /* ROR E Indirect. */ + LEA_AB = 0x13, /* LEA Absolute. */ + CLZ_AB = 0x15, /* CLZ Absolute. */ + ADD_AB = 0x16, /* ADD Absolute. */ + LEA_ZY = 0x17, /* LEA Zero Matrix, indexed with Y. */ + CPE_AB = 0x18, /* CPE Absolute. */ + CLZ_E = 0x19, /* CLZ E Indirect. */ + ADD_E = 0x1A, /* ADD E Indirect. */ + LDX_E = 0x1B, /* LDX E Indirect. */ + SNG_E = 0x1E, /* Store accumulator, if NeGative. */ + PEA_AY = 0x23, /* PEA Absolute, indexed with Y. */ + SUB_IMM = 0x26, /* SUBtract without carry. */ + PEA_Z = 0x27, /* Push Effective Address. */ + CLO_Z = 0x29, /* Count Leading Ones. */ + SUB_Z = 0x2A, /* SUB Zero Matrix. */ + STX_E = 0x2B, /* STX E Indirect. */ + ICE_Z = 0x2C, /* Interlocked Compare, and Exchange. */ + LPO_IMM = 0x2D, /* Load accumulator, if POsitive. */ + LPO_E = 0x2E, /* LPO E Indirect. */ + JSR_E = 0x30, /* JSR E Indirect. */ + SBC_E = 0x31, /* SBC E Indirect. */ + MUL_E = 0x32, /* MUL E Indirect. */ + PEA_AB = 0x33, /* PEA Absolute. */ + CLO_AB = 0x34, /* CLO Absolute. */ + SUB_AB = 0x35, /* SUB Absolute. */ + PEA_ZY = 0x37, /* PEA Zero Matrix, indexed with Y. */ + ICE_AB = 0x38, /* ICE Absolute. */ + CLO_E = 0x39, /* CLO E Indirect. */ + SUB_E = 0x3A, /* SUB E Indirect. */ + CPB_E = 0x3B, /* CPB E Indirect. */ + ICE_E = 0x3C, /* ICE E Indirect. */ + SPO_E = 0x3E, /* Store accumulator, if POsitive. */ + LDS_IMM = 0x40, /* LoaD Stack pointer. */ + LEA_AI = 0x43, /* LEA Absolute Indirect. */ + LDS_Z = 0x44, /* LDS Zero Matrix. */ + ADE_IMM = 0x46, /* ADd Effective address register. */ + LEA_IN = 0x47, /* LEA Indirect. */ + BIT_Z = 0x49, /* BIt Test. */ + ADE_Z = 0x4A, /* ADE Zero Matrix. */ + CPX_E = 0x4B, /* CPX E Indirect. */ + LLM_Z = 0x4C, /* Logical shift Left, on Memory. */ + LCS_IMM = 0x4D, /* Load accumulator, if Carry Set. */ + LCS_E = 0x4E, /* LCS E Indirect. */ + LDS_AB = 0x50, /* LDS Absolute. */ + AND_E = 0x51, /* AND E Indirect. */ + DIV_E = 0x52, /* DIV E Indirect. */ + LEA_AX = 0x53, /* LEA Absolute, indexed with X. */ + LDS_E = 0x54, /* LDS E Indirect. */ + BIT_AB = 0x55, /* BIT Absolute. */ + ADE_AB = 0x56, /* ADE Absolute. */ + LEA_ZX = 0x57, /* LEA Zero Matrix, indexed with X. */ + LLM_AB = 0x58, /* LLM Absolute. */ + BIT_E = 0x59, /* BIT E Indirect. */ + CPY_E = 0x5B, /* CPY E Indirect. */ + LLM_E = 0x5C, /* LLM E Indirect. */ + SCS_E = 0x5E, /* Store accumulator, if Carry Set. */ + SCO_IMM = 0x60, /* Start one, or more COre(s). */ + PEA_AI = 0x63, /* PEA Absolute Indirect. */ + SCO_Z = 0x64, /* SCO Zero Matrix. */ + SBE_IMM = 0x66, /* SuBtract Effective address register. */ + PEA_IN = 0x67, /* PEA Indirect. */ + SBE_Z = 0x6A, /* SBE Zero Matrix. */ + PHE_IMP = 0x6B, /* PusH Effective address register to stack. */ + LRM_Z = 0x6C, /* Logical shift Right, on Memory. */ + LCC_IMM = 0x6D, /* Load accumulator, if Carry Clear. */ + LCC_E = 0x6E, /* LCC E Indirect. */ + SCO_AB = 0x70, /* SCO Absolute. */ + ORA_E = 0x71, /* ORA E Indirect. */ + ASR_E = 0x72, /* ASR E Indirect. */ + PEA_AX = 0x73, /* PEA Absolute, indexed with X. */ + SCO_E = 0x74, /* SCO E Indirect. */ + SBE_AB = 0x76, /* SBE Absolute. */ + PEA_ZX = 0x77, /* PEA Zero Matrix, indexed with X. */ + LRM_AB = 0x78, /* LRM Absolute. */ + PLE_IMP = 0x7B, /* PuLl Effective address register from stack. */ + LRM_E = 0x7C, /* LRM E Indirect. */ + SCC_E = 0x7E, /* Store accumulator, if Carry Clear. */ + ECO_IMM = 0x80, /* End one, or more COre(s). */ + DEC_E = 0x82, /* DEC E Indirect. */ + LEA_AIY = 0x83, /* LEA Absolute Indirect Indexed. */ + ECO_Z = 0x84, /* ECO Zero Matrix. */ + ADS_IMM = 0x86, /* ADd Stack pointer. */ + LEA_IY = 0x87, /* LEA Indirect Indexed. */ + ADS_Z = 0x8A, /* ADS Zero Matrix. */ + DEE_IMP = 0x8B, /* DEcrement Effective address register. */ + RLM_Z = 0x8C, /* Rotate Left, on Memory. */ + LEQ_IMM = 0x8D, /* Load accumulator, if EQual. */ + LEQ_E = 0x8E, /* LEQ E Indirect. */ + ECO_AB = 0x90, /* ECO Absolute. */ + XOR_E = 0x91, /* XOR E Indirect. */ + CMP_E = 0x92, /* CMP E Indirect. */ + LEA_AIX = 0x93, /* LEA Absolute Indexed Indirect. */ + ECO_E = 0x94, /* ECO E Indirect. */ + ADS_AB = 0x96, /* ADS Absolute. */ + LEA_IX = 0x97, /* LEA Indexed Indirect. */ + RLM_AB = 0x98, /* RLM Absolute. */ + ADS_E = 0x9A, /* ADS E Indirect. */ + INE_IMP = 0x9B, /* INcrement Effective address register. */ + RLM_E = 0x9C, /* RLM E Indirect. */ + SEQ_E = 0x9E, /* Store accumulator, if EQual. */ + INC_E = 0xA2, /* INC E Indirect. */ + PEA_AIY = 0xA3, /* PEA Absolute Indirect Indexed. */ + STS_Z = 0xA4, /* STore Stack pointer. */ + SBS_IMM = 0xA6, /* SuBtract Stack pointer. */ + PEA_IY = 0xA7, /* PEA Indirect Indexed. */ + SBS_Z = 0xAA, /* SBS Zero Matrix. */ + DES_IMP = 0xAB, /* DEcrement Stack pointer. */ + RRM_Z = 0xAC, /* Rotate Right, on Memory. */ + LNE_IMM = 0xAD, /* Load accumulator, if Not Equal. */ + LNE_E = 0xAE, /* LNE E Indirect. */ + STS_AB = 0xB0, /* STS Absolute. */ + LSL_E = 0xB1, /* LSL E Indirect. */ + LDY_E = 0xB2, /* LDY E Indirect. */ + PEA_AIX = 0xB3, /* PEA Absolute Indexed Indirect. */ + STS_E = 0xB4, /* STS E Indirect. */ + SBS_AB = 0xB6, /* SBS Absolute. */ + PEA_IX = 0xB7, /* PEA Indexed Indirect. */ + RRM_AB = 0xB8, /* RRM Absolute. */ + SBS_E = 0xBA, /* SBS E Indirect. */ + INS_IMP = 0xBB, /* INcrement Stack pointer. */ + RRM_E = 0xBC, /* RRM E Indirect. */ + REP_REL = 0xBD, /* REPeat until counter is zero. */ + SNE_E = 0xBE, /* Store accumulator, if Not Equal. */ + STY_E = 0xC2, /* STY E Indirect. */ + STE_Z = 0xC4, /* STore Effective address register. */ + NOT_A = 0xC6, /* bitwise NOT with accumulator. */ + NOT_Z = 0xCA, /* NOT Zero Matrix. */ + MMV_IMP = 0xCB, /* Memory MoVe. */ + ARM_Z = 0xCC, /* Arithmetic shift Right, on Memory. */ + REQ_REL = 0xCD, /* Repeat until either counter is zero, or zero flag isn't set. */ + STE_AB = 0xD0, /* STE Absolute. */ + LSR_E = 0xD1, /* LSR E Indirect. */ + LDA_E = 0xD2, /* LDA E Indirect. */ + NOT_AB = 0xD6, /* NOT Absolute. */ + ARM_AB = 0xD8, /* ARM Absolute. */ + NOT_E = 0xDA, /* NOT E Indirect. */ + ARM_E = 0xDC, /* ARM E Indirect. */ + RNE_REL = 0xDD, /* Repeat until either counter is zero, or zero flag is set. */ + STA_E = 0xE2, /* STA E Indirect. */ + STZ_Z = 0xE4, /* STore Zero. */ + SWP_A = 0xE6, /* SWaP lower half, with upper half. */ + SWP_Z = 0xEA, /* SWP Zero Matrix. */ + PCN_Z = 0xEC, /* Population CouNt. */ + STZ_AB = 0xF0, /* STZ Absolute. */ + ROL_E = 0xF1, /* ROL E Indirect. */ + LDB_E = 0xF2, /* LDB E Indirect. */ + STZ_E = 0xF4, /* STZ E Indirect. */ + SWP_AB = 0xF6, /* SWP Absolute. */ + PCN_AB = 0xF8, /* PCN Absolute. */ + SWP_E = 0xFA, /* SWP E Indirect. */ + PCN_E = 0xFC /* PCN E Indirect. */ +}; diff --git a/lexer/lexer.c b/lexer/lexer.c new file mode 100644 index 0000000..1bc7144 --- /dev/null +++ b/lexer/lexer.c @@ -0,0 +1,311 @@ +#include "asmmon.h" +#include "cpu/sux/cpu.h" +#include "lexer.h" + +/* Name: isidstart() + * Desc: Checks if the character is the start of a valid identifier. + * Args: + * c: The character to check. + * Return value: Returns true if the character is the start of a valid identifier, and false if not. + */ + +int isidstart(char c) { + return (isalpha(c) || c == '@' || c == '_'); +} + +/* Name: isidchar() + * Desc: Checks if the character is a valid identifier character. + * Args: + * c: The character to check. + * Return value: Returns true if the character is a valid identifier character, and false if not. + */ + +int isidchar(char c) { + return (isalnum(c) || c == '_'); +} + +/* Name: skip() + * Desc: Skips any whitespace within a string. + * Args: + * s: The string to check. + * Return value: Returns a pointer to the content after the whitespace. + */ + +char *skip(char *s) { + for (; isspace(*s); s++); + return s; +} + +/* Name: iseol() + * Desc: Checks if the character is an end of line character. + * Args: + * c: The character to check. + * Return value: Returns true if the character is an end of line character, and false if not. + */ + +int iseol(char c) { + return (c == '\0' || c == ';'); +} + +/* Name: eol() + * Desc: Checks for end of line, and issues an error, if end of line wasn't found. + * Args: + * s: The string to check. + * Return value: None. + */ + +void eol(char *s) { + if (ignore_trail) { + if (!iseol(*s) && !isspace(*s)) { + syntax_error(6); /* End of line was not found. */ + } + } else { + s = skip(s); + if (!iseol(*s)) { + syntax_error(6); /* End of line was not found. */ + } + } +} + +/* Name: isbadid() + * Desc: Checks if the string is an invalid identifier. + * Args: + * p: The string to check. + * len: Length of the string. + * Return value: Returns true if the string is an invalid identifier, and false if not. + */ + +int isbadid(char *p, int len) { + return (len == 1 && (*p == '@' || *p == '_')); +} + +/* Name: skip_operand() + * Desc: Skips the contents of an operand within a string, gives an error if there + * are either too many closing brackets, or missing closing brackets. + * Args: + * inst_op: Instruction operand flag. + * s: The string to check. + * Return value: Returns the content after the operand. + */ + +char *skip_operand(int inst_op, char *s) { + int brack_count = 0; /* Bracket count. */ + int done = 0; /* Loop done flag. */ + + for (char c = 0;; s++) { + c = *s; + switch (c) { + case '(': brack_count++; break; + case ')': + if (brack_count > 0) { + brack_count--; + } else { + syntax_error(3); /* Too many closing brackets. */ + } + break; + case '\'': + case '\"': s = skip_string(s, c, NULL) - 1; break; + case '\0': + case ';' : done = 1; break; + default : + if ((!inst_op || (inst_op && OPERSEP_COMMA)) && c == ',' && !brack_count) { + done = 1; + break; + } else if (inst_op && OPERSEP_WHITESPACE && isspace(c) && !brack_count) { + done = 1; + break; + } + break; + } + if (done) { + break; + } + } + if (brack_count) { + syntax_error(4); /* Missing closing brackets. */ + } + return s; +} + +/* Name: skip_local() + * Desc: Skips the contents of a local label within a string. + * Args: + * p: The string to check. + * Return value: Returns either a pointer to the content after the local label, or NULL. + */ + +char *skip_local(char *p) { + if (isidstart(*p) || isdigit(*p)) { + for (p++; isidchar(*p); p++); + } else { + p = NULL; + } + return p; +} + +/* Name: get_local_label() + * Desc: Finds a local label within a line. + * Args: + * start: The start of the line. + * Return value: Returns either the name of the local label, or NULL. + */ + +char *get_local_label(char **start) { + char *s = *start; + char *p = skip_local(s); + char *name = NULL; + if (p != NULL && *p == '@' && isidchar(p[1]) && isidstart(*s) && *s != '@') { + /* Skips the local part of a global@local label. */ + s = p+1; + p = skip_local(p); + name = make_local_label(*start, (s-1) - *start, s, p-s); + *start = skip(p); + } else if (p != NULL && p > s+1 && *s == '@') { /* @label */ + s++; + name = make_local_label(NULL, 0, s, p-s); + *start = skip(p); + } + return name; +} + +/* Name: parse_label_or_pc() + * Desc: Finds a global/local label, or the current pc character within a line. + * Args: + * start: The line to parse. + * Return value: Returns either the name of a global/local label, the current pc character, or NULL. + */ + +char *parse_label_or_pc(char **start) { + char *s = skip(*start); + char *name = parse_labeldef(start, 0); + if (name == NULL && *s == current_pc_char && !isidchar(s[1])) { + name = cnvstr(s, 1); + s = skip(s+1); + } + *start = (name) ? s : *start; + return name; +} + +/* Name: lex() + * Desc: Lexically analyze/Tokenize a line into a stream of tokens. + * Args: + * line: The line that will be lexed/tokenized. + * address: Current address of the program counter. + * bline: Current number of blank lines before the current line. + * dbg: Debugging flag. + * Return value: Returns the address of the next line. + */ + +uint64_t lex(char *line, uint64_t address, uint16_t bline, uint8_t dbg) { + char *s; + char *inst; + char *label_name; + char *ext[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1]; + char *op[MAX_OPERANDS]; + int ext_len[MAX_QUALIFIERS ? MAX_QUALIFIERS : 1]; + int op_len[MAX_OPERANDS]; + int ext_cnt; + int op_cnt; + int inst_len; + s = line; + instruction *ip; + + while (isdelm(*s, dbg) != 1) { + label_name = parse_label_or_pc(&s); + if (label_name) { + /* We found a global/local label, or the current pc character. */ + symbol *label; + int equ_len = (*s == '='); + if (equ_len) { + /* Found an equate directive. */ + if (*label_name == current_pc_char) { + handle_org(skip(s+equ_len)); + continue; + } else { + s = skip(s+equ_len); + label = new_equate(label_name, parse_expr_tmplab(&s)); + } + } else { + /* It's just a label. */ + label = new_label_sym(0, label_name); + add_atom(0, new_label_atom(label)); + } + if (!is_local_label(label_name) && autoexport) { + label->flags |= EXPORT; + } + free(label_name); + } + /* Check for directives first. */ + s = skip(s); + if (*s == ';') { + continue; + } + if (*s == current_pc_char && s[1] == '=') { /* "*=" org directive. */ + handle_org(skip(s+2)); + continue; + } + if (handle_directive(s)) { + continue; + } + s = skip(s); + if (iseol(s)) { + continue; + } + /* Read the mnemonic name. */ + inst = s; + if (!isidstart(*s)) { + syntax_error(10); /* Identifier was expected. */ + continue; + } + #if !MAX_QUALLIFIERS + for (; *s && !isspace(*s); s++); + inst_len = s - inst; + #else + s = lex_inst(s, &inst_len, ext, ext_len, &ext_cnt); + #endif + if (!isspace(*s) && *s != '\0') { + syntax_error(2); /* No space before operand. */ + } + s = skip(s); + if (handle_struct(inst, inst_len, s)) { + continue; + } + /* Read the operand(s), separated by comma, or whitespace (unless within brackets). */ + op_cnt = 0; + while (!iseol(*s) && op_cnt < MAX_OPERANDS) { + op[op_cnt] = s; + s = skip_operand(1, s); + op_len[op_cnt] = oplen(s, op[op_cnt]); + op_cnt++; + if (ignore_trail) { + if (*s != ',') { + break; + } + s++; + } else { + s = skip(s); + if (OPERSEP_COMMA) { + if (*s == ',') { + s = skip(s+1); + } else if (!(OPERSEP_WHITESPACE)) { + break; + } + } + } + } + eol(s); + ip = new_inst(inst, inst_len, op_cnt, op, op_len); + if (ip) { + #if MAX_QUALIFIERS > 0 + int i; + for (i = 0; i < ext_cnt; i++) { + ip->qualifiers[i] = cnvstr(ext[i], ext_len[i]); + } + for (; i < MAX_QUALIFIERS; i++) { + ip->qualifiers[i] = NULL; + } + #endif + add_atom(0, new_inst_atom(ip)); + } + } +} diff --git a/lexer/lexer.h b/lexer/lexer.h new file mode 100644 index 0000000..2595158 --- /dev/null +++ b/lexer/lexer.h @@ -0,0 +1,228 @@ +static uint8_t isdelm(char c, uint8_t dbg) { + switch (c) { + default : return 0x00; + case '\0': + case '\n': return 0x01; + case ',' : return 0x02; + case '\"': return 0x04; + case '\'': return 0x08; + case '\t': + case ' ' : return 0x10; + } +} + +static uint8_t isdelm2(char c, uint8_t dbg) { + switch (c) { + default : return 0; + case ')' : + case ',' : + case '.' : + case '+' : + case '<' : + case '|' : + case '>' : + case '-' : + case ':' : + case '=' : + case ';' : + case '\0': + case '\n': return 1; + case '\t': + case ' ' : return 2; + } +} + +static uint8_t get_ptok(char c, uint8_t dbg) { + switch (c) { + case '.' : return PTOK_DOT ; + case '@' : return PTOK_AT ; + case ':' : return PTOK_COLON ; + case '=' : return PTOK_EQU ; + case '+' : return PTOK_PLUS ; + case '-' : return PTOK_MINUS ; + case '>' : return PTOK_GT ; + case '<' : return PTOK_LT ; + case '|' : return PTOK_PIPE ; + case '(' : return PTOK_LBRACK ; + case ')' : return PTOK_RBRACK ; + case ',' : return PTOK_COMMA ; + case 'B': case 'b' : return PTOK_B ; + case 'E': case 'e' : return PTOK_E ; + case 'X': case 'x' : return PTOK_X ; + case 'Y': case 'y' : return PTOK_Y ; + case 'S': case 's' : return PTOK_S ; + case 'P': case 'p' : return PTOK_P ; + case 'A': case 'a' : return PTOK_A ; + case 'C': case 'c' : return PTOK_C ; + case 'D': case 'd' : return PTOK_D ; + case 'F': case 'f' : return PTOK_F ; + case 'R': case 'r' : return PTOK_R ; + case '\"': return PTOK_DQUOTE ; + case '\'': return PTOK_SQUOTE ; + case '#' : return PTOK_HASH ; + case ';' : return PTOK_SCOLON ; + case '$' : return PTOK_DOLLAR ; + case '%' : return PTOK_PERCENT; + default : + if (isdigit(c)) { + return PTOK_NUMBER; + } else if (isalpha(c) || c == '_') { + return PTOK_ALPHA; + } else { + return PTOK_OTHER; + } + } +} + +static uint8_t is_altok(uint8_t ptok, uint8_t dbg) { + switch (ptok) { + case PTOK_B: + case PTOK_E: + case PTOK_X: + case PTOK_Y: + case PTOK_S: + case PTOK_P: return 1; + default : return 0; + } +} + +#if 0 +static int handle_dot(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_at(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_colon(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_equ(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_plus(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_minus(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_gt(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_lt(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_pipe(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_lbrack(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_rbrack(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_comma(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_b(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_e(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_x(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_y(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_s(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_p(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_dquote(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_squote(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_hash(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_scolon(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_dollar(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_percent(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} +static int handle_number(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_alpha(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + +} + +static int handle_other(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg) { + return idx+1; +} + +typedef int (*ptok_func)(char *str, int idx, uint8_t lex_type, line *l, token *t, uint8_t dbg); + +static ptok_func ptok_handler[PTOK_OTHER+1] = { + [PTOK_DOT ] = handle_dot, + [PTOK_AT ] = handle_at, + [PTOK_COLON ] = handle_colon, + [PTOK_EQU ] = handle_equ, + [PTOK_PLUS ] = handle_plus, + [PTOK_MINUS ] = handle_minus, + [PTOK_GT ] = handle_gt, + [PTOK_LT ] = handle_lt, + [PTOK_PIPE ] = handle_pipe, + [PTOK_LBRACK ] = handle_lbrack, + [PTOK_RBRACK ] = handle_rbrack, + [PTOK_COMMA ] = handle_comma, + [PTOK_B ] = handle_b, + [PTOK_E ] = handle_e, + [PTOK_X ] = handle_x, + [PTOK_Y ] = handle_y, + [PTOK_S ] = handle_s, + [PTOK_P ] = handle_p, + [PTOK_DQUOTE ] = handle_dquote, + [PTOK_SQUOTE ] = handle_squote, + [PTOK_HASH ] = handle_hash, + [PTOK_SCOLON ] = handle_scolon, + [PTOK_DOLLAR ] = handle_dollar, + [PTOK_PERCENT] = handle_percent, + [PTOK_NUMBER ] = handle_number, + [PTOK_ALPHA ] = handle_alpha, + [PTOK_OTHER ] = handle_other +}; +#endif diff --git a/lexer/misc.c b/lexer/misc.c new file mode 100644 index 0000000..9014421 --- /dev/null +++ b/lexer/misc.c @@ -0,0 +1,14 @@ +/* Name: cnvstr() + * Desc: Converts a pointer, and length pair into a null-terminated string. + * Args: + * s: Pointer to convert. + * len: Length to convert. + * Return value: Returns a null-terminated string. + */ + +char *cnvstr(char *s, int len) { + char *p = malloc(len+1); + memcpy(p, s, len); + p[len] = '\0'; + return p; +} diff --git a/lexer/parse.c b/lexer/parse.c new file mode 100644 index 0000000..0f26a0e --- /dev/null +++ b/lexer/parse.c @@ -0,0 +1,171 @@ +/* Name: handle_escape() + * Desc: Handles parsing an escape sequence. + * Args: + * s: The string to check. + * code: Pointer to save the handled escape code into, if not NULL. + * Return value: Returns the content after the escape sequence. + */ + +char *handle_escape(char *s, char *code) { + char dummy; + int count; + char *end; + int base = 0; + unsigned int value; + + if (*s++ != '\\') { + ierror(0); /* Start of escape sequence not found. */ + } + if (code == NULL) { + code = &dummy; + } + if (!esc_sequences) { + *code = '\\'; + return s; + } + + switch (*s) { + case 'b' : *code = '\b'; return s+1; + case 'f' : *code = '\f'; return s+1; + case 'n' : *code = '\n'; return s+1; + case 'r' : *code = '\r'; return s+1; + case 't' : *code = '\t'; return s+1; + case '\\': *code = '\\'; return s+1; + case '\"': *code = '\"'; return s+1; + case '\'': *code = '\''; return s+1; + case 'e' : *code = '\x1B'; return s+1; + case 'x' : case 'X' : base = 16; s++; /* Falls Through. */ + case '0' : case '1' : case '2' : case '3' : case '4' : + case '5' : case '6' : case '7' : case '8' : case '9' : + base = (!base) ? 8 : base; + value = strtoull(s, &end, base); + *code = value; + return end; + default : + general_error(35, *s); /* No valid escape sequence was found. */ + return s; + } +} + +/* Name: skip_string() + * Desc: Skips the contents of some delimited string. + * Args: + * s: The string to check. + * delm: Delimiter of the string. + * size: Pointer to save the size of the string into, if not NULL. + * Return value: Returns the content after the string. + */ + +char *skip_string(char *s, char delm, size_t *size) { + size_t n = 0; + + if (*s != delm) { + general_error(6, delm); /* Delimiter was expected. */ + } else { + s++; + } + for (; *s; n++) { + if (*s == '\\') { + s = handle_escape(s, NULL); + } else { + if (*s++ = delm) { + if (*s == delm) { + s++; /* Allow multiple delimiters in a row to be recognized as a single delimiter. */ + } else { + break; + } + } + } + } + if (*(s-1) != delm) { + general_error(6, delm); /* Delimiter was expected. */ + } + if (size) { + *size = n; + } + return s; +} + +/* Name: skip_identifier() + * Desc: Skips the contents of an identifier within a string. + * Args: + * s: The string to check. + * Return value: Returns either a pointer to the content after the identifier, or NULL. + */ + +char *skip_identifier(char *s) { + char *name = s; + if (isidstart(*s) || isdigit(*s)) { + for (s++; isidchar(*s); s++); + if (s) { + if (isbadid(name, s-name)) { + return s; + } + } + } + return NULL; +} + +/* Name: parse_identifier() + * Desc: Parses an indentifier within a line. + * Args: + * s: The line to be parsed. + * Return value: Returns either a pointer to the start of the identifier, or NULL. + */ + +char *parse_identifier(char **s) { + char *name = *s; + char *end_name = skip_identifier(name); + /*char *endgame;*/ /* LOL LE EPIC FUNNY MARVEL THANOS MEME. XDDDDDD */ + if (end_name) { + *s = end_name; + return cnvstr(name, end_name-name); + } + return NULL; +} + +/* Name: parse_symbol() + * Desc: Parses a symbol within a line. + * Args: + * s: The line to be parse. + * Return value: Returns either a pointer to an allocated local/global symbol string, or NULL. + */ + +char *parse_symbol(char **s) { + char *name = get_local_label(s); + name = (name == NULL) ? parse_identifier(s) : name; + return name; +} + +/* Name: parse_labeldef() + * Desc: Parses either a global, or local label definition, at the begining of a line. + * Args: + * line: The line that will be parsed. + * colreq: Require a trailing colon, when true. + * Return value: Returns a pointer to the allocated buffer, when valid. + */ + +char **parse_labeldef(char **line, int colreq) { + char *s = *line; + char *label_name; + + if (isspace(*s)) { + s = skip(s); + colreq = 1; /* Colon required, if label doesn't start at the first column. */ + } + label_name = parse_symbol(&s); + if (label_name) { + s = skip(s); + if (*s == ':') { + s++; + colreq = 0; + } + if (colreq) { + free(label_name); + label_name = NULL; + } else { + *line = s; + } + } + return label_name; +} diff --git a/lexer/symbol.c b/lexer/symbol.c new file mode 100644 index 0000000..7a8001f --- /dev/null +++ b/lexer/symbol.c @@ -0,0 +1,42 @@ +/* Name: make_local_label() + * Desc: Constructs a local label of the form: + * " " + global_name + " " + local_name + * Args: + * global: Global label name. + * global_len: Length of Global label name. + * local: Local label name. + * local_len: Length of Local label name. + * Return value: Returns the constructed local label. + */ + +char make_local_label(char *global, int global_len, char *local, int local_len) { + char *name; + char *p; + + if (!global_len) { + /* Use last defined global. */ + global = last_global; + global_len = strlen(last_global); + } + name = malloc(local_len+global_len+3); + p = name; + *p++ = ' '; + if (global_len) { + memcpy(p, global, global_len); + p += global_len; + } + *p++ = ' '; + memcpy(p, local, local_len); + p[local_len] = '\0'; + return name; +} + +/* Name: + * Desc: + * Args: + * + * + * + * + * Return value: + */ |