From 460832c13c9d476d71e626a0c42de4eeff3feb63 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Thu, 11 Jun 2020 21:39:59 -0400 Subject: Did some more stuff. - Fix some bugs with strings. - Started to refactor the instruction functions. - Added support for using RS prefixes on the memory based increment, and decrement instructions. - Started work on SuBAsm's lexer. Have fun looking at this, BieHDC. :) --- programs/lexer.s | 231 +++++++++++++++++++++++++++++++++++++++++++++ programs/subasm.s | 92 +++++++++++------- programs/subeditor.s | 20 ++-- programs/utils.s | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 556 insertions(+), 44 deletions(-) create mode 100644 programs/lexer.s (limited to 'programs') diff --git a/programs/lexer.s b/programs/lexer.s new file mode 100644 index 0000000..cd5bb54 --- /dev/null +++ b/programs/lexer.s @@ -0,0 +1,231 @@ +; Lexer, and supporting routines for SuBAsm. + +; Enums. + +; Directives. +DIR_ORG = 0 ; Origin. +DIR_BYTE = 1 ; Byte = 8 bits. +DIR_WORD = 2 ; Word = 16 bits. +DIR_DWORD = 3 ; Dword = 32 bits. +DIR_QWORD = 4 ; Qword = 64 bits. +DIR_INCL = 5 ; Include. + +; Tokens. +TOK_DIR = 0 ; Directive. +TOK_LOCAL = 1 ; Local syobol. +TOK_LABEL = 2 ; Label. +TOK_SYM = 3 ; Symbol. +TOK_EXPR = 4 ; Expression. +TOK_CSV = 5 ; Comma separated value. +TOK_STR = 6 ; String. +TOK_CHAR = 7 ; Character. +TOK_IND = 8 ; Indirect addressing. +TOK_IMM = 9 ; Immediate data. +TOK_MNE = 10 ; Opcode/Mnemonic. +TOK_RS = 11 ; Register size prefix. +TOK_COMM = 12 ; Comment. +TOK_HEX = 13 ; Hex value. +TOK_DEC = 14 ; Decimal value. +TOK_BIN = 15 ; Binary value. +TOK_INCL = 16 ; Include file. + +; Pre-Tokens. +PTOK_DOT = 0 ; . +PTOK_AT = 1 ; @ +PTOK_COLON = 2 ; : +PTOK_EQU = 3 ; = +PTOK_PLUS = 4 ; + +PTOK_MINUS = 5 ; - +PTOK_GT = 6 ; > +PTOK_LT = 7 ; < +PTOK_LBRAK = 8 ; ( +PTOK_RBRAK = 9 ; ) +PTOK_COMMA = 10 ; , +PTOK_X = 11 ; x +PTOK_Y = 12 ; y +PTOK_DQUOT = 13 ; " +PTOK_SQUOT = 14 ; ' +PTOK_HASH = 15 ; # +PTOK_SCOLN = 16 ; ; +PTOK_DOLR = 17 ; $ +PTOK_PRCNT = 18 ; % +PTOK_NUM = 19 ; 0-9 +PTOK_ALPH = 20 ; a-z A-Z +PTOK_OTHR = 21 ; Everything else. + +; Expressions. +EXPR_PLUS = 0 ; Plus. +EXPR_MINUS = 1 ; Minus. +EXPR_LOW = 2 ; Lower half of address. +EXPR_HIGH = 3 ; Upper half of address. +EXPR_NONE = 4 ; No expression. + + +; Data. +.org lexer_data +; Jump table for parsing pre-tokens. +swtab: + .word ptok_dot ; PTOK_DOT + .word ptok_at ; PTOK_AT + .word ptok_col ; PTOK_COLON + .word ptok_equ ; PTOK_EQU + .word ptok_plus ; PTOK_PLUS + .word ptok_min ; PTOK_MINUS + .word ptok_gt ; PTOK_GT + .word ptok_lt ; PTOK_LT + .word ptok_lbrk ; PTOK_LBRAK + .word ptok_rbrk ; PTOK_RBRAK + .word ptok_com ; PTOK_COMMA + .word ptok_xr ; PTOK_X + .word ptok_yr ; PTOK_Y + .word ptok_dqu ; PTOK_DQUOT + .word ptok_squ ; PTOK_SQUOT + .word ptok_hash ; PTOK_HASH + .word ptok_scol ; PTOK_SCOLN + .word ptok_dolr ; PTOK_DOLR + .word ptok_prcn ; PTOK_PRCNT + .word ptok_num ; PTOK_NUM + .word ptok_alph ; PTOK_ALPH + .word ptok_othr ; PTOK_OTHR + +; Data entry point for utility subroutines. +util_data: + + +; Program code. +.org lexer +lex: + ldx #0 ; Reset X. + txa ; Reset A. + phy #2 ; Preserve the screen buffer index. + txy ; Reset Y. + sty.q idx0 ; Clear the first index. + sty.q idx1 ; Clear the second index. + sty.q idx2 ; Clear the third index. +; lda (ptr), y ; Get a character from the line. +; pha #1 ; Preserve the character. +; jsr isdigit ; Is this character a digit? +; pla #1 ; Get the character back. +@getline: + lda #2 ; Get the third byte, of the line table address. + lsl #$10 ; Shift it by 2 bytes. + ldb #1 ; Set the second pointer + lda.w ltok ; to the last line. + jsr set_ptr ; + lda.w (ptr2) ; Get the next line. + jsr set_ptr ; Set the second pointer to the next line. + sta.w ctok ; Make it the current line. + and #0 ; Reset A. +@loop: + ldy.w idx0 ; Get the string index. + lda (ptr), y ; Get a character from the line. + jsr isdelm ; Get the delimiter. + cmp #1 ; Are we at the end of the line? + beq @end ; Yes, so we're done. +@spaces: + ldy.w idx0 ; Get the string index. + inc.w idx0 ; Increment the string index. + lda (ptr), y ; Get a character from the line. + pha #1 ; Preserve the character. + jsr isdelm ; Get the delimiter. + and #$10 ; Is this character, a space, or tab? + pla #1 ; Get the character back. + beq @isstart ; No, so check for the start of the line. + cmp #' ' ; Is this character, a space? + beq @incs ; Yes, so increment the starting space count. + cmp #'\t' ; No, but is it a tab? + beq @inct ; Yes, so increment the starting tab count. + jmp @spaces ; No, so keep looping. +@incs: + inc idx1 ; Increment the space count. + jmp @spaces ; Keep looping. +@inct: + inc idx1+1 ; Increment the tab count. + jmp @spaces ; Keep looping. +@isstart: + pha #2 ; Preserve the character. + lda.w idx1 ; Was there any whitespace? + pla #2 ; Get the character back. + beq @switch ; No, so start lexing. + cpb #1 ; Yes, and are we at the start of the line? + bne @switch ; No, so start lexing. +@whtspace: + ldy #2 ; Yes, so set the line index to the starting whitespace counters. + lda.w idx1 ; Get both indecies. + sta.w (ptr2), y ; Save them in the line. + and #0 ; Reset A. + sta.w idx1 ; Reset the second index. + deb ; Set the isstart flag to false. +@switch: + jsr get_ptok ; Get the pre-token. + jsr parse_ptok ; Parse the pre-token. + beq @end ; We got to the end of the string. + jmp @loop ; Keep looping. +@end: + ply #2 ; Get the screen buffer index back. + rts ; End of lex. + +parse_ptok: + pha #1 ; Preserve the pre-token. + ldb #2 ; Set the third pointer + lda.w #swtab ; to the start of the jump table. + jsr set_ptr ; + and #0 ; Reset A. + pla #1 ; Get the pre-token back. + phy #2 ; Preserve Y. + lsl #1 ; Multiply the pre-token by two, to get the jump index. + tay ; Get the index of the jump table. + lda.w (ptr3), y ; Get the address to jump to. + jsr set_ptr ; Set the third pointer to the case address. + and #0 ; Reset A. + tab ; Reset B. + ply #2 ; Get Y back. + jmp (ptr3) ; Jump to the case label. +ptok_dot: + rts ; End of parse_ptok. +ptok_at: + rts ; End of parse_ptok. +ptok_col: + rts ; End of parse_ptok. +ptok_equ: + rts ; End of parse_ptok. +ptok_plus: + rts ; End of parse_ptok. +ptok_min: + rts ; End of parse_ptok. +ptok_gt: + rts ; End of parse_ptok. +ptok_lt: + rts ; End of parse_ptok. +ptok_lbrk: + rts ; End of parse_ptok. +ptok_rbrk: + rts ; End of parse_ptok. +ptok_com: + rts ; End of parse_ptok. +ptok_xr: + rts ; End of parse_ptok. +ptok_yr: + rts ; End of parse_ptok. +ptok_dqu: + rts ; End of parse_ptok. +ptok_squ: + rts ; End of parse_ptok. +ptok_hash: + rts ; End of parse_ptok. +ptok_scol: + rts ; End of parse_ptok. +ptok_dolr: + rts ; End of parse_ptok. +ptok_prcn: + rts ; End of parse_ptok. +ptok_num: + rts ; End of parse_ptok. +ptok_alph: + rts ; End of parse_ptok. +ptok_othr: + rts ; End of parse_ptok. + + +; Entry point for utility subroutines. +utils: diff --git a/programs/subasm.s b/programs/subasm.s index 4cec219..3c8e767 100644 --- a/programs/subasm.s +++ b/programs/subasm.s @@ -5,6 +5,7 @@ MAX_SYM = $800 ; Max symbol size. +.include "lexer.s" .include "utils.s" .org incl @@ -16,7 +17,7 @@ asm_ver: ; Directives. dir: - .byte "org", + .byte "org" .byte "byte" .byte "word" .byte "dword" @@ -152,8 +153,8 @@ cmd_srt: .word run .word set -; Data entry point for utility subroutines. -util_data: +; Data entry point for the lexer. +lexer_data: ; Token table. @@ -185,27 +186,25 @@ idx2: idx3: .qword 0 +; Current token line. +ctok: + .word 0 + +; Last token line. +ltok: + .word 0 + ; Lexeme string. lexeme: -; Symbol tables. +; Symbol table. .org lexeme+$100 -sym_val: -.org sym_val+$4000 -sym_id: -.org sym_id+$1000 -sym_def: -.org sym_def+$100 -sym_name: +sym: ; Fixup table. ; Fixups are unresolved symbols. -.org sym_name+$1000 -fix_sym: -.org fix_sym+$1000 -fix_ln: -.org fix_ln+$1000 -fix_val: +.org sym+$8000 +fix: ; Start of program code. @@ -217,16 +216,12 @@ subasm: tba ; Reset A. tax ; Reset X. jsr chk_shcmd ; Did we get a shortend command? - bne parse_cmd ; Yes, so skip everything else. - jmp @end ; + bne @cmd ; Yes, so skip everything else. jsr chk_cmd ; No, but did we get a full command? - bne parse_cmd ; Yes, so skip everything else. - jsr lexer ; No, so start lexing this line. -@end: -subasm_end: - rts ; End of subasm. - -parse_cmd: + bne @cmd ; Yes, so skip everything else. + jsr lex ; No, so start lexing this line. + jmp @end ; We are done. +@cmd: ldb #1 ; Set the second pointer lda.d #cmd_srt ; to the command subroutine table. jsr set_ptr ; @@ -234,7 +229,7 @@ parse_cmd: tba ; Reset A. lda f ; Get the command ID. cmp #8 ; Is the command ID greater than the command count? - bcs subasm_end ; Yes, so we're done. + bcs @end ; Yes, so we're done. lsl #1 ; No, so multiply the command ID by two. phy #2 ; Preserve the screen buffer position. tay ; Set the index to the offset that we just calculated. @@ -244,7 +239,8 @@ parse_cmd: jsr set_ptr ; ldb #0 ; Reset B. jsr (ptr3) ; Run the command's subroutine. - jmp subasm_end ; We are done. +@end: + rts ; End of subasm. chk_shcmd: tba ; Reset A. @@ -259,6 +255,7 @@ chk_shcmd: beq @false ; Yes, so return that we failed. cmp #' ' ; No, but is this character, a space? beq @false ; Yes, so return that we failed. + jsr tolower ; No, so convert it to lowercase. @loop: ldb (ptr2), y ; Are we at the end of the table? beq @false ; Yes, so return that we failed. @@ -286,6 +283,41 @@ chk_shcmd: rts ; End of chk_shcmd. +chk_cmd: + tba ; Reset A. + tax ; Reset X. + sta.q idx0 ; Reset the first index. + sta.q idx1 ; Reset the second index. +@loop: + lda.w #cmds ; Get pointer to the start of the command table. + clc ; Prepare for a non carrying add. + adc.w idx0 ; Offset the pointer, by the length of the previous string. + pha #8 ; Preserve the command string pointer. + jsr strcasecmp ; Is the command buffer, the same as the command string? + pla #8 ; Get the command string pointer back. + beq @true ; Yes, so return true. + ldb idx1 ; No, so Get the command ID. + cpb #7 ; Have we reached the end of the command table? + beq @false ; Yes, so return false. + inc idx1 ; No, so increment the command ID. +@getlen: + jsr strlen ; Get the string's length. + inx ; Add one to the length. + txa ; Place it in the accumulator. + clc ; Prepare for a non carrying add. + adc.w idx0 ; Add the string offset to the current length + sta.w idx0 ; Save the offset in the first index. + jmp @loop ; Keep looping. +@true: + ldb idx1 ; Get the command ID. + stb f ; Return the command ID. + ldb #1 ; Return true. + jmp @end ; We are done. +@false: + ldb #0 ; Return false. +@end: + rts ; End of chk_cmd. + viewmem: lda.q prg_cnt ; Get the program counter. sta.q idx0 ; Save the address in the first index. @@ -347,7 +379,5 @@ set: @end: rts ; End of set. - ; Entry point for utility subroutines. -utils: - +lexer: diff --git a/programs/subeditor.s b/programs/subeditor.s index 26c6f7d..1768d62 100644 --- a/programs/subeditor.s +++ b/programs/subeditor.s @@ -1058,8 +1058,10 @@ update_pos: adc scr_str ; with the starting line number to get the absolute line number. tay ; Place it in the Y regster for now. mul #maxcol+1 ; Multiply the line number by the screen's max column count, plus 1. + clc ; Clear the carry flag. adc scr_col ; Add the cursor's column number to get the screen index. tay ; Place the index into the Y register. + tba ; Reset A. lda #$1B ; Print an escape character sta scr ; to the screen. lda #'[' ; Print '[' @@ -1072,27 +1074,19 @@ update_pos: getrow: lda scr_row ; Get the cursor's y coordinate. - div #10 ; Divide A by 10. - adc #'0' ; Convert it to ascii, and - sta scr ; print to the screen. - tba ; Get the remainder. - adc #'0' ; Convert it to ascii, and - sta scr ; print to the screen. - rts ; End of getrow. - + jmp bcd ; Convert it to BCD. getcol: lda #';' ; Print ';' sta scr ; to the screen. lda scr_col ; Get the cursor's x coordinate. +bcd: div #10 ; Divide A by 10. - clc ; - adc #'0' ; Convert it to ascii, and + ora #'0' ; Convert it to ascii, and sta scr ; print to the screen. tba ; Get the remainder. - clc - adc #'0' ; Convert it to ascii, and + ora #'0' ; Convert it to ascii, and sta scr ; print to the screen. - rts ; End of getrow. + rts ; End of bcd. scrl_down: inc scr_str ; Increment the starting line of the screen. diff --git a/programs/utils.s b/programs/utils.s index 6032a47..d8d4aa2 100644 --- a/programs/utils.s +++ b/programs/utils.s @@ -5,6 +5,16 @@ hex_char: .byte "0123456789ABCDEF" +; Compare, and return table for pre-tokens. +ptok_tab: + .byte ".@:=+-><(),xy\"\'#;$%" +; Compare, and return table for isdelm. +dtab: + .byte "\n,\"\' " +; Compare, and return table for isdelm2. +dtab2: + .byte "),.+<>-=;\n" + .org utils print_hi: @@ -134,3 +144,250 @@ charcpy: sta strbuf, x ; Save it in the string buffer. inc idx3 ; Increment the string index. rts ; End of charcpy. + + +strlen: + ldb #1 ; Set the second pointer + jsr set_ptr ; to the passed pointer. + deb ; Reset B. + tba ; Reset A. + tax ; Reset X. + phy #2 ; Preserve Y. + txy ; Reset Y. +@loop: + lda (ptr2), y ; Are we at the end of the string? + beq @end ; Yes, so we're done. + iny ; No, so increment the index. + jmp @loop ; Keep looping. +@end: + tyx ; Return the length in X. + ply #2 ; Get the preserved value back. + rts ; End of strlen. + + +strcmp: + ldb #1 ; Set the second pointer + jsr set_ptr ; to the passed pointer. + deb ; Reset B. + tba ; Reset A. + phy #2 ; Preserve Y. + tay ; Reset Y. +@loop: + ldb #0 ; Set the islong flag to false. + lda (ptr), y ; Are we at the end of the first string? + beq cmpr ; Yes, so check if we're too short, or too long. + ldb #1 ; No, so set the islong flag to true. + cmp (ptr2), y ; Is the character of both strings, the same? + bne cmpr ; No, so check if we're too short, or too long. + iny ; Yes, so increment the index. + jmp @loop ; Keep looping. + +strcasecmp: + ldb #1 ; Set the second pointer + jsr set_ptr ; to the passed pointer. + deb ; Reset B. + tba ; Reset A. + phy #2 ; Preserve Y. + tay ; Reset Y. +@loop: + ldb #0 ; Set the islong flag to false. + lda (ptr), y ; Are we at the end of the first string? + beq cmpr ; Yes, so check if we're too short, or too long. + ldb #1 ; No, so set the islong flag to true. + jsr tolower ; Convert the character of string 1 to lowercase. + phb #1 ; Preserve the islong flag. + pha #1 ; Preserve the converted character. + lda (ptr2), y ; Get the character of the second string. + jsr tolower ; Convert the character of string 2 to lowercase. + tab ; Place it in B. + pla #1 ; Get the character of string 1 back. + cab ; Is the character of both strings, the same? + plb #1 ; Get the islong flag back. + bne cmpr ; No, so check if we're too short, or too long. + iny ; Yes, so increment the index. + jmp @loop ; Keep looping. + +cmpr: + lda (ptr2), y ; Are we at the end of the second string? + beq @islong ; Yes, so check the islong flag. +@isshort: + lda (ptr), y ; No, but are we at the end of the first string? + beq @short ; Yes, so return -1. +@islong: + cpb #1 ; Is the islong flag true? + bne @equ ; No, so return 0. +@long: + lda #1 ; Yes, so return 1. + jmp @end ; We are done. +@equ: + lda #0 ; Return 0. + jmp @end ; We are done. +@short: + lda #$FF ; Return -1. +@end: + ply #2 ; Get the preserved value back. + rts ; End of strcmp. + + +isdigit: + sec ; Prepare for a non carrying subtraction. + sbc #'0' ; Subtract $30 from the passed character. + and #$FF ; Make sure that we have only one byte. + cmp #10 ; Is the subtracted value, less than 10? + bcs @false ; No, so return false. +@true: + lda #1 ; Yes, so return true. + jmp @end ; We are done. +@false: + lda #0 ; Return false. +@end: + rts ; End of isdigit. + +isxdigit: + pha #1 ; Preserve the character. + jsr isdigit ; Is this character, a decimal digit? + pla #1 ; Get the character back. + bne @true ; Yes, so return true. +@alpha: + sec ; No, so prepare for a non carrying subtract. + ora #$20 ; Convert it to lowercase. + sbc #'a' ; Subtract $61 from the character. + and #$FF ; Make sure that we have only one byte. + cmp #6 ; Is the subtracted value, less than 6? + bcs @false ; No, so return false. +@true: + lda #1 ; Yes, so return true. + jmp @end ; We are done. +@false: + lda #0 ; Return false. +@end: + rts ; End of isxdigit. + + +isupper: + sec ; Prepare for a non carrying subtraction. + sbc #'A' ; Subtract $41 from the passed character. + jmp isletter ; Check if it's less than 26. +islower: + sec ; Prepare for a non carrying subtraction. + sbc #'a' ; Subtract $61 from the passed character. +isletter: + and #$FF ; Make sure that we have only one byte. + cmp #26 ; Is the subtracted value, less than 26? + bcs @false ; No, so return false. +@true: + lda #1 ; Yes, so return true. + jmp @end ; We are done. +@false: + lda #0 ; Return false. +@end: + rts ; End of isletter. + + +tolower: + pha #1 ; Preserve the character. + jsr isupper ; Is this character, an uppercase character? + pla #1 ; Get the character back. + beq @end ; No, so we're done. +@lower: + ora #$20 ; Yes, so convert it to lowercase. +@end: + rts ; End of tolower. + + +toupper: + pha #1 ; Preserve the character. + jsr islower ; Is this character, a lowercase character? + pla #1 ; Get the character back. + beq @end ; No, so we're done. +@upper: + and #$5F ; Yes, so convert it to uppercase. +@end: + rts ; End of toupper. + + +isdelm2: + ldx #0 ; Reset X. +@loop: + ldb dtab2, x ; Get the compare value. + beq @other ; We hit the end of the table, so check for the others. + cab ; Are they the same? + beq @r1 ; Yes, so return 1. + inx ; No, so increment the table index. + jmp @loop ; Keep looping. +@other: + ldx #0 ; Reset X. + cmp #0 ; Is this a null terminator? + beq @r1 ; Yes, so return 1. + cmp #'\t' ; No, but is it a tab? + beq @r2 ; Yes, so return 2. + cmp #' ' ; No, but is it a space? + beq @r2 ; Yes, so also return 2. +@r0: + lda #0 ; Return 0. + rts ; End of isdelm2. +@r1: + ldx #0 ; Reset X. + lda #1 ; Return 1. + rts ; End of isdelm2. +@r2: + lda #2 ; Return 2. + rts ; End of isdelm2. + + +isdelm: + ldx #0 ; Reset X. +@loop: + ldb dtab, x ; Get the compare value. + beq @other ; We hit the end of the table, so check for the others. + cab ; Are they the same? + beq @rshft ; Yes, so return 1 << index. + inx ; No, so increment the table index. + jmp @loop ; Keep looping. +@other: + ldx #0 ; Reset X. + cmp #0 ; Is this a null terminator? + beq @rshft ; Yes, so return 1. + ldx #4 ; No, so set the shift amount to 4. + cmp #'\t' ; Is this a tab? + beq @rshft ; Yes, so return 16. + ldx #0 ; No, so reset X. +@r0: + lda #0 ; Return 0. + rts ; End of isdelm. +@rshft: + stx a ; Save the shift value. + ldx #0 ; Reset X. + lda #1 ; Set up the bitshift. + lsl a ; Return 1 << X. + rts ; End of isdelm. + + +get_ptok: + ldx #0 ; Reset X. + jsr tolower ; Conver the character to lowercase. +@loop: + ldb ptok_tab, x ; Get the compare value. + beq @other ; We hit the end of the table, so check for the others. + cab ; Are they the same? + beq @rtab ; Yes, so return X. + inx ; No, so increment the table index. + jmp @loop ; Keep looping. +@rtab: + txa ; Return X. + rts ; End of get_ptok. +@other: + tab ; Preserve the character. + jsr isdigit ; Is this character a digit? + bne @rnum ; Yes, so return PTOK_NUM. + tba ; No, so get the character back. + jsr islower ; Is it an alphabetical character? + bne @ralph ; Yes, so return PTOK_ALPH. + lda #PTOK_OTHR ; No, so return PTOK_OTHR. + rts ; End of get_ptok. +@rnum: + lda #PTOK_NUM ; Return PTOK_NUM. + rts ; End of get_ptok. +@ralph: + lda #PTOK_ALPH ; Return PTOK_ALPH. + rts ; End of get_ptok. -- cgit v1.2.3-13-gbd6f