diff options
Diffstat (limited to 'programs/sub-suite/lexer.s')
-rw-r--r-- | programs/sub-suite/lexer.s | 152 |
1 files changed, 36 insertions, 116 deletions
diff --git a/programs/sub-suite/lexer.s b/programs/sub-suite/lexer.s index cd7a33a..c144f9a 100644 --- a/programs/sub-suite/lexer.s +++ b/programs/sub-suite/lexer.s @@ -1,103 +1,9 @@ ; Lexer, and supporting routines for SuBAsm. -; Enums. - -; Directives. -DIR_ORG = 0 ; Origin. -DIR_BYTE = 1 ; Byte = 8 bits. -DIR_WORD = 2 ; Word = 16 bits. -DIR_DWORD = 3 ; Dword = 32 bits. -DIR_QWORD = 4 ; Qword = 64 bits. -DIR_INCL = 5 ; Include. - -; Tokens. -TOK_DIR = 0 ; Directive. -TOK_LOCAL = 1 ; Local syobol. -TOK_LABEL = 2 ; Label. -TOK_SYM = 3 ; Symbol. -TOK_EXPR = 4 ; Expression. -TOK_CSV = 5 ; Comma separated value. -TOK_STR = 6 ; String. -TOK_CHAR = 7 ; Character. -TOK_IND = 8 ; Indirect addressing. -TOK_IMM = 9 ; Immediate data. -TOK_MNE = 10 ; Opcode/Mnemonic. -TOK_RS = 11 ; Register size prefix. -TOK_COMM = 12 ; Comment. -TOK_HEX = 13 ; Hex value. -TOK_DEC = 14 ; Decimal value. -TOK_BIN = 15 ; Binary value. -TOK_INCL = 16 ; Include file. - -; Pre-Tokens. -PTOK_DOT = 0 ; . -PTOK_AT = 1 ; @ -PTOK_COLON = 2 ; : -PTOK_EQU = 3 ; = -PTOK_PLUS = 4 ; + -PTOK_MINUS = 5 ; - -PTOK_GT = 6 ; > -PTOK_LT = 7 ; < -PTOK_LBRAK = 8 ; ( -PTOK_RBRAK = 9 ; ) -PTOK_COMMA = 10 ; , -PTOK_X = 11 ; x -PTOK_Y = 12 ; y -PTOK_DQUOT = 13 ; " -PTOK_SQUOT = 14 ; ' -PTOK_HASH = 15 ; # -PTOK_SCOLN = 16 ; ; -PTOK_DOLR = 17 ; $ -PTOK_PRCNT = 18 ; % -PTOK_NUM = 19 ; 0-9 -PTOK_ALPH = 20 ; a-z A-Z -PTOK_OTHR = 21 ; Everything else. - -; Expressions. -EXPR_PLUS = 0 ; Plus. -EXPR_MINUS = 1 ; Minus. -EXPR_LOW = 2 ; Lower half of address. -EXPR_HIGH = 3 ; Upper half of address. -EXPR_NONE = 4 ; No expression. - - -; Data. -.org lexer_data -; Jump table for parsing pre-tokens. -swtab: - .word ptok_dot ; PTOK_DOT - .word ptok_at ; PTOK_AT - .word ptok_col ; PTOK_COLON - .word ptok_equ ; PTOK_EQU - .word ptok_plus ; PTOK_PLUS - .word ptok_min ; PTOK_MINUS - .word ptok_gt ; PTOK_GT - .word ptok_lt ; PTOK_LT - .word ptok_lbrk ; PTOK_LBRAK - .word ptok_rbrk ; PTOK_RBRAK - .word ptok_com ; PTOK_COMMA - .word ptok_xr ; PTOK_X - .word ptok_yr ; PTOK_Y - .word ptok_dqu ; PTOK_DQUOT - .word ptok_squ ; PTOK_SQUOT - .word ptok_hash ; PTOK_HASH - .word ptok_scol ; PTOK_SCOLN - .word ptok_dolr ; PTOK_DOLR - .word ptok_prcn ; PTOK_PRCNT - .word ptok_num ; PTOK_NUM - .word ptok_alph ; PTOK_ALPH - .word ptok_othr ; PTOK_OTHR - -; Data entry point for utility subroutines. -util_data: - - ; Program code. -.org lexer lex: ldx #0 ; Reset X. txa ; Reset A. - phy.w ; Preserve the screen buffer index. txy ; Reset Y. sty.q idx0 ; Clear the first index. sty.q idx1 ; Clear the second index. @@ -165,7 +71,9 @@ lex: ; beq @end ; We got to the end of the string. bra @loop ; Keep looping. @end: - ply.w ; Get the screen buffer index back. + jsr update_ptr ; Get the screen buffer index. + tay ; Save it in Y. + and #0 ; Reset A. rts ; End of lex. @@ -208,21 +116,20 @@ ptok_dot: stb.q idx1 ; Reset the first index. jsr set_lexptr ; Set up the lexeme buffer. @dir_loop: + ldb idx1 ; Get the directive ID. + cpb #7 ; Have we reached the end of the directive table? + beq @end ; Yes, so we're done. lda.w #dir ; Get pointer to the start of the directive table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. pha.q ; Preserve the directive string pointer. - jsr strcasecmp ; Is the lexeme buffer, the same as the directive string? + jsr strcaseptr ; Is the lexeme buffer, the same as the directive string? pla.q ; Get the directive string pointer back. beq @found ; Yes, so create a new token. - ldb idx1 ; No, so Get the directive ID. - cpb #6 ; Have we reached the end of the directive table? - beq @end ; Yes, so we're done. inc idx1 ; No, so increment the directive ID. @getlen: jsr strlen ; Get the string's length. - inx ; Add one to the length. - txa ; Place it in the accumulator. + inc ; Add one to the length. clc ; Prepare for a non carrying add. adc.w idx2 ; Add the string offset to the current length sta.w idx2 ; Save the offset in the third index. @@ -243,16 +150,24 @@ ptok_equ: inc.w idx0 ; rts ; End of parse_ptok. ptok_plus: - inc.w idx0 ; - rts ; End of parse_ptok. + lda #EXPR_PLUS ; Set the expresion type to EXPR_PLUS. + bra ptok_expr ; Set up the token. ptok_min: - inc.w idx0 ; - rts ; End of parse_ptok. + lda #EXPR_MINUS ; Set the expresion type to EXPR_MINUS. + bra ptok_expr ; Set up the token. ptok_gt: - inc.w idx0 ; - rts ; End of parse_ptok. + lda #EXPR_LOW ; Set the expresion type to EXPR_LOW. + bra ptok_expr ; Set up the token. ptok_lt: + lda #EXPR_HIGH ; Set the expresion type to EXPR_HIGH. +ptok_expr: + lda #TOK_EXPR ; Set the lexeme type to TOK_EXPR. + sta lex_type ; inc.w idx0 ; +; ldb #1 ; Make init_lex increment the string index. +; jsr init_lex ; Initialize the lexeme buffer for copying. + jsr make_tok ; Create the token. + jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_lbrk: inc.w idx0 ; @@ -269,6 +184,12 @@ ptok_xr: ptok_yr: inc.w idx0 ; rts ; End of parse_ptok. +ptok_sp: + inc.w idx0 ; + rts ; End of parse_ptok. +ptok_pc: + inc.w idx0 ; + rts ; End of parse_ptok. ptok_dqu: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. @@ -289,6 +210,7 @@ ptok_hash: ptok_scol: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. + ldb #1 ; Set the delimiter to EOL. jsr delmcpy ; Copy the string, to the lexeme buffer, until EOL. @end: rts ; End of parse_ptok. @@ -312,10 +234,10 @@ ptok_num: ptok_num2: pha ; Preserve the base. jsr init_lex ; Initialize the lexeme buffer for copying. - ldb #3 ; Set the delimiter to both the EOL, or a comma. + ldb #3 ; Set the delimiter to both the EOL, and a comma. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. pla ; Get the base back. - jsr strtoull ; Convert the string into a numeric value. + jsr strtoullg ; Convert the string into a numeric value. jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. @@ -335,14 +257,14 @@ ptok_alph: lda.w #mne ; Get pointer to the start of the instruction table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. - jsr strcasecmp ; Is the lexeme buffer, the same as the mnemonic string? + jsr strcaseg ; Is the lexeme buffer, the same as the mnemonic string? beq @found ; Yes, so create a new token. ldb idx1 ; No, so Get the instruction ID. cpb #OPNUM-1 ; Have we reached the end of the instruction table? beq @end ; Yes, so we're done. inc idx1 ; No, so increment the instruction ID. @offset: - lda #13 ; Get the base size of the instruction table. + lda #14 ; Get the base size of the instruction table. clc ; Prepare for a non carrying multiply. mul idx1 ; Multiply the base offset, by the instruction ID. sta.w idx2 ; Save the offset in the third index. @@ -371,7 +293,8 @@ set_lexptr: set_cmdbuf: - ldb #0 ; Set the first pointer + and #0 ; Reset A. + tab ; Reset B. lda.d #cmd_buf ; to the command buffer. jsr set_ptr ; and #0 ; Reset A. @@ -414,7 +337,7 @@ delmcpy: and #$FF ; Get the current byte. pha ; Preserve the character. lda a ; Are we calling isdelm2? - pla ; Get the character back. + pla ; Get the character back. bne @isdelm2 ; Yes, so use isdelm2. jsr isdelm ; No, so get the delimiter value from isdelm. @delmchk: @@ -504,6 +427,3 @@ make_tok: nop ; @end: rts ; End of make_tok. - -; Entry point for utility subroutines. -utils: |