From 887802efcdb3b56263069cc6778a8f53ed89d599 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Mon, 22 Jun 2020 17:56:52 -0400 Subject: Did some more stuff. - Fixed some bugs in the emulator's assembler. - Worked on SuBAsm's lexer some more. - Created a new directory for the SuB suite, and moved all of the SuB suite's files in there. --- programs/sub-suite/lexer.s | 413 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 413 insertions(+) create mode 100644 programs/sub-suite/lexer.s (limited to 'programs/sub-suite/lexer.s') diff --git a/programs/sub-suite/lexer.s b/programs/sub-suite/lexer.s new file mode 100644 index 0000000..315bee4 --- /dev/null +++ b/programs/sub-suite/lexer.s @@ -0,0 +1,413 @@ +; Lexer, and supporting routines for SuBAsm. + +; Enums. + +; Directives. +DIR_ORG = 0 ; Origin. +DIR_BYTE = 1 ; Byte = 8 bits. +DIR_WORD = 2 ; Word = 16 bits. +DIR_DWORD = 3 ; Dword = 32 bits. +DIR_QWORD = 4 ; Qword = 64 bits. +DIR_INCL = 5 ; Include. + +; Tokens. +TOK_DIR = 0 ; Directive. +TOK_LOCAL = 1 ; Local syobol. +TOK_LABEL = 2 ; Label. +TOK_SYM = 3 ; Symbol. +TOK_EXPR = 4 ; Expression. +TOK_CSV = 5 ; Comma separated value. +TOK_STR = 6 ; String. +TOK_CHAR = 7 ; Character. +TOK_IND = 8 ; Indirect addressing. +TOK_IMM = 9 ; Immediate data. +TOK_MNE = 10 ; Opcode/Mnemonic. +TOK_RS = 11 ; Register size prefix. +TOK_COMM = 12 ; Comment. +TOK_HEX = 13 ; Hex value. +TOK_DEC = 14 ; Decimal value. +TOK_BIN = 15 ; Binary value. +TOK_INCL = 16 ; Include file. + +; Pre-Tokens. +PTOK_DOT = 0 ; . +PTOK_AT = 1 ; @ +PTOK_COLON = 2 ; : +PTOK_EQU = 3 ; = +PTOK_PLUS = 4 ; + +PTOK_MINUS = 5 ; - +PTOK_GT = 6 ; > +PTOK_LT = 7 ; < +PTOK_LBRAK = 8 ; ( +PTOK_RBRAK = 9 ; ) +PTOK_COMMA = 10 ; , +PTOK_X = 11 ; x +PTOK_Y = 12 ; y +PTOK_DQUOT = 13 ; " +PTOK_SQUOT = 14 ; ' +PTOK_HASH = 15 ; # +PTOK_SCOLN = 16 ; ; +PTOK_DOLR = 17 ; $ +PTOK_PRCNT = 18 ; % +PTOK_NUM = 19 ; 0-9 +PTOK_ALPH = 20 ; a-z A-Z +PTOK_OTHR = 21 ; Everything else. + +; Expressions. +EXPR_PLUS = 0 ; Plus. +EXPR_MINUS = 1 ; Minus. +EXPR_LOW = 2 ; Lower half of address. +EXPR_HIGH = 3 ; Upper half of address. +EXPR_NONE = 4 ; No expression. + + +; Data. +.org lexer_data +; Jump table for parsing pre-tokens. +swtab: + .word ptok_dot ; PTOK_DOT + .word ptok_at ; PTOK_AT + .word ptok_col ; PTOK_COLON + .word ptok_equ ; PTOK_EQU + .word ptok_plus ; PTOK_PLUS + .word ptok_min ; PTOK_MINUS + .word ptok_gt ; PTOK_GT + .word ptok_lt ; PTOK_LT + .word ptok_lbrk ; PTOK_LBRAK + .word ptok_rbrk ; PTOK_RBRAK + .word ptok_com ; PTOK_COMMA + .word ptok_xr ; PTOK_X + .word ptok_yr ; PTOK_Y + .word ptok_dqu ; PTOK_DQUOT + .word ptok_squ ; PTOK_SQUOT + .word ptok_hash ; PTOK_HASH + .word ptok_scol ; PTOK_SCOLN + .word ptok_dolr ; PTOK_DOLR + .word ptok_prcn ; PTOK_PRCNT + .word ptok_num ; PTOK_NUM + .word ptok_alph ; PTOK_ALPH + .word ptok_othr ; PTOK_OTHR + +; Data entry point for utility subroutines. +util_data: + + +; Program code. +.org lexer +lex: + ldx #0 ; Reset X. + txa ; Reset A. + phy #2 ; Preserve the screen buffer index. + txy ; Reset Y. + sty.q idx0 ; Clear the first index. + sty.q idx1 ; Clear the second index. + sty.q idx2 ; Clear the third index. + sty b ; Clear the isop flag. +; lda (ptr), y ; Get a character from the line. +; pha #1 ; Preserve the character. +; jsr isdigit ; Is this character a digit? +; pla #1 ; Get the character back. +@getline: + lda #2 ; Get the third byte, of the line table address. + lsl #$10 ; Shift it by 2 bytes. + ldb #1 ; Set the second pointer + lda.w ltok ; to the last line. + jsr set_ptr ; + lda.w (ptr2) ; Get the next line. + jsr set_ptr ; Set the second pointer to the next line. + sta.w ctok ; Make it the current line. + and #0 ; Reset A. +@loop: + ldy.w idx0 ; Get the string index. + lda (ptr), y ; Get a character from the line. + jsr isdelm ; Get the delimiter. + cmp #1 ; Are we at the end of the line? + beq @end ; Yes, so we're done. +@spaces: + ldy.w idx0 ; Get the string index. + lda (ptr), y ; Get a character from the line. + pha #1 ; Preserve the character. + jsr isdelm ; Get the delimiter. + and #$10 ; Is this character, a space, or tab? + pla #1 ; Get the character back. + beq @isstart ; No, so check for the start of the line. + inc.w idx0 ; Yes, so increment the string index. + cmp #' ' ; Is this character, a space? + beq @incs ; Yes, so increment the starting space count. + cmp #'\t' ; No, but is it a tab? + beq @inct ; Yes, so increment the starting tab count. + jmp @spaces ; No, so Keep looping. +@incs: + inc idx1 ; Increment the space count. + jmp @spaces ; Keep looping. +@inct: + inc idx1+1 ; Increment the tab count. + jmp @spaces ; Keep looping. +@isstart: + pha #2 ; Preserve the character. + lda.w idx1 ; Was there any whitespace? + pla #2 ; Get the character back. + beq @switch ; No, so start lexing. + cpb #1 ; Yes, and are we at the start of the line? + bne @switch ; No, so start lexing. +@whtspace: + ldy #2 ; Yes, so set the line index to the starting whitespace counters. + lda.w idx1 ; Get both indecies. + sta.w (ptr2), y ; Save them in the line. + and #0 ; Reset A. + sta.w idx1 ; Reset the second index. + deb ; Set the isstart flag to false. +@switch: + ldy.w idx0 ; Get the string index. + lda (ptr), y ; Get the character. + jsr get_ptok ; Get the pre-token. + jsr parse_ptok ; Parse the pre-token. +; beq @end ; We got to the end of the string. + jmp @loop ; Keep looping. +@end: + ply #2 ; Get the screen buffer index back. + rts ; End of lex. + + +parse_ptok: + pha #1 ; Preserve the pre-token. + ldb #2 ; Set the third pointer + lda.w #swtab ; to the start of the jump table. + jsr set_ptr ; + and #0 ; Reset A. + pla #1 ; Get the pre-token back. + phy #2 ; Preserve Y. + lsl #1 ; Multiply the pre-token by two, to get the jump index. + tay ; Get the index of the jump table. + lda.w (ptr3), y ; Get the address to jump to. + jsr set_ptr ; Set the third pointer to the case address. + and #0 ; Reset A. + tab ; Reset B. + ply #2 ; Get Y back. + jmp (ptr3) ; Jump to the case label. +ptok_dot: + ldb #1 ; Make init_lex increment the string index. + jsr init_lex ; Initialize the lexeme buffer for copying. + ldb #$11 ; Set the delimiter comparison value to whitespace. + jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. +@isop: + lda b ; Has the isop flag been set? + beq @dir ; No, so check for a directive. +@rs: + lda #TOK_RS ; Yes, so set the lexeme type to TOK_RS. + sta lex_type ; + ldy.w idx1 ; Get the lexeme index. + dey ; Decrement the lexeme index. + lda (ptr3), y ; Get the suffix character. + jsr get_rs ; Get the register size. + jmp @end ; We are done. +@dir: + lda #TOK_DIR ; Set the lexeme type to TOK_DIR. + sta lex_type ; + ldb #0 ; Make the lexeme buffer, the first pointer. + stb.q idx1 ; Reset the first index. + jsr set_lexptr ; Set up the lexeme buffer. +@dir_loop: + lda.w #dir ; Get pointer to the start of the directive table. + clc ; Prepare for a non carrying add. + adc.w idx2 ; Offset the pointer, by the length of the previous string. + pha #8 ; Preserve the directive string pointer. + jsr strcasecmp ; Is the lexeme buffer, the same as the directive string? + pla #8 ; Get the directive string pointer back. + beq @found ; Yes, so create a new token. + ldb idx1 ; No, so Get the directive ID. + cpb #6 ; Have we reached the end of the directive table? + beq @end ; Yes, so we're done. + inc idx1 ; No, so increment the directive ID. +@getlen: + jsr strlen ; Get the string's length. + inx ; Add one to the length. + txa ; Place it in the accumulator. + clc ; Prepare for a non carrying add. + adc.w idx2 ; Add the string offset to the current length + sta.w idx2 ; Save the offset in the third index. + jmp @dir_loop ; Keep looping. +@found: + nop ; +@end: + jsr make_tok ; Create the token. + jsr set_cmdbuf ; Set the first pointer to the command buffer. + rts ; End of parse_ptok. +ptok_at: + rts ; End of parse_ptok. +ptok_col: + rts ; End of parse_ptok. +ptok_equ: + rts ; End of parse_ptok. +ptok_plus: + rts ; End of parse_ptok. +ptok_min: + rts ; End of parse_ptok. +ptok_gt: + rts ; End of parse_ptok. +ptok_lt: + rts ; End of parse_ptok. +ptok_lbrk: + rts ; End of parse_ptok. +ptok_rbrk: + rts ; End of parse_ptok. +ptok_com: + rts ; End of parse_ptok. +ptok_xr: + rts ; End of parse_ptok. +ptok_yr: + rts ; End of parse_ptok. +ptok_dqu: + ldb #1 ; Make init_lex increment the string index. + jsr init_lex ; Initialize the lexeme buffer for copying. + ldb #4 ; Set the delimiter comparison value to a double quote. + jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. +@term: + rts ; End of parse_ptok. +ptok_squ: + rts ; End of parse_ptok. +ptok_hash: + rts ; End of parse_ptok. +ptok_scol: + rts ; End of parse_ptok. +ptok_dolr: + rts ; End of parse_ptok. +ptok_prcn: + rts ; End of parse_ptok. +ptok_num: + rts ; End of parse_ptok. +ptok_alph: + ldb #0 ; Do not let init_lex increment the string index. + jsr init_lex ; Initialize the lexeme buffer for copying. + ldb #1 ; Stop at any possible delimiter, except whitespace. + tba ; Use isdelm2 for the comparison. + jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. + lda #0 ; Reset A. + sta b ; Clear the isop flag. +@isop: + ldb #0 ; Make the lexeme buffer, the first pointer. + stb.q idx1 ; Reset the first index. + jsr set_lexptr ; Set up the lexeme buffer. +@isop_loop: + lda.w #mne ; Get pointer to the start of the instruction table. + clc ; Prepare for a non carrying add. + adc.w idx2 ; Offset the pointer, by the length of the previous string. + jsr strcasecmp ; Is the lexeme buffer, the same as the mnemonic string? + beq @found ; Yes, so create a new token. + ldb idx1 ; No, so Get the instruction ID. + cpb #OPNUM-1 ; Have we reached the end of the instruction table? + beq @end ; Yes, so we're done. + inc idx1 ; No, so increment the instruction ID. +@offset: + lda #13 ; Get the base size of the instruction table. + clc ; Prepare for a non carrying multiply. + mul idx1 ; Multiply the base offset, by the instruction ID. + sta.w idx2 ; Save the offset in the third index. + jmp @isop_loop ; Keep looping. +@found: + lda #TOK_MNE ; Set the lexeme type to TOK_MNE. + sta lex_type ; + inc b ; Set the isop flag. +@end: + jsr make_tok ; Create the token. + jsr set_cmdbuf ; Set the first pointer to the command buffer. + rts ; End of parse_ptok. +ptok_othr: + rts ; End of parse_ptok. + + +set_lexptr: + lda.d #lexeme ; Set the pointer to the lexeme buffer. + jsr set_ptr ; + and #0 ; Reset A. + tab ; Reset B. + sta.q idx1 ; Reset the second index. + rts ; End of set_lexptr. + + +set_cmdbuf: + ldb #0 ; Set the first pointer + lda.d #cmd_buf ; to the command buffer. + jsr set_ptr ; + and #0 ; Reset A. + tab ; Reset B. + rts ; End of set_cmdbuf. + + +init_lex: + cpb #0 ; Do we need to increment the string index? + beq @init ; No, so skip that step. +@inc_str: + inc.w idx0 ; Yes, so increment the string index. +@init: + lda #0 ; Reset A. + sta.q idx1 ; Reset the second index + sta.q idx2 ; Reset the third index + ldb #2 ; Make the lexeme buffer, the third pointer. + jsr set_lexptr ; Set up the lexeme buffer. + rts ; End of init_lex. + + +delmcpy: + sta a ; Save the delimiter check flag. + stb c ; Save the delimiter comparison value. +@loop: + ldy.w idx0 ; Get the string index. + lda (ptr), y ; Get a character from the line. + pha #1 ; Preserve the character. + lda a ; Are we calling isdelm2? + pla #1 ; Get the character back. + bne @isdelm2 ; Yes, so use isdelm2. + jsr isdelm ; No, so get the delimiter value from isdelm. + and c ; Are both delimiter values, the same? + bne @end ; Yes, so we're done. + jmp @copy ; No, so start copying the character. +@isdelm2: + jsr isdelm2 ; Get the delimiter value from isdelm2. + cmp c ; Are both delimiter values, the same? + beq @end ; Yes, so we're done. +@copy: + lda (ptr), y ; Get a character from the line. + ldy.w idx1 ; Get the lexeme index. + sta (ptr3), y ; Copy the character to the lexeme buffer. + inc.w idx0 ; Increment the string index. + inc.w idx1 ; Increment the lexeme index. + jmp @loop ; Keep looping. +@end: + lda #0 ; Terminate the lexeme buffer. + sta (ptr3), y ; + rts ; End of delmcpy. + + +get_rs: + phb #1 ; Preserve B. + ldb #0 ; Set the isop flag to false. + plb #1 ; Get B back. + jsr tolower ; Convert the character to lowercase. + cmp #'w' ; Is it .w? + beq @r1 ; Yes, so return 1. + cmp #'d' ; No, but was it .d? + beq @r2 ; Yes, so return 2. + cmp #'q' ; No, but was it .d? + beq @r3 ; Yes, so return 3. +@r0: + lda #0 ; Return 0. + rts ; End of get_rs. +@r1: + lda #1 ; Return 1. + rts ; End of get_rs. +@r2: + lda #2 ; Return 2. + rts ; End of get_rs. +@r3: + lda #3 ; Return 3. + rts ; End of get_rs. + + +make_tok: + nop ; +@end: + rts ; End of make_tok. + +; Entry point for utility subroutines. +utils: -- cgit v1.2.3-13-gbd6f