; Lexer, and supporting routines for SuBAsm. ; Enums. ; Directives. DIR_ORG = 0 ; Origin. DIR_BYTE = 1 ; Byte = 8 bits. DIR_WORD = 2 ; Word = 16 bits. DIR_DWORD = 3 ; Dword = 32 bits. DIR_QWORD = 4 ; Qword = 64 bits. DIR_INCL = 5 ; Include. ; Tokens. TOK_DIR = 0 ; Directive. TOK_LOCAL = 1 ; Local syobol. TOK_LABEL = 2 ; Label. TOK_SYM = 3 ; Symbol. TOK_EXPR = 4 ; Expression. TOK_CSV = 5 ; Comma separated value. TOK_STR = 6 ; String. TOK_CHAR = 7 ; Character. TOK_IND = 8 ; Indirect addressing. TOK_IMM = 9 ; Immediate data. TOK_MNE = 10 ; Opcode/Mnemonic. TOK_RS = 11 ; Register size prefix. TOK_COMM = 12 ; Comment. TOK_HEX = 13 ; Hex value. TOK_DEC = 14 ; Decimal value. TOK_BIN = 15 ; Binary value. TOK_INCL = 16 ; Include file. ; Pre-Tokens. PTOK_DOT = 0 ; . PTOK_AT = 1 ; @ PTOK_COLON = 2 ; : PTOK_EQU = 3 ; = PTOK_PLUS = 4 ; + PTOK_MINUS = 5 ; - PTOK_GT = 6 ; > PTOK_LT = 7 ; < PTOK_LBRAK = 8 ; ( PTOK_RBRAK = 9 ; ) PTOK_COMMA = 10 ; , PTOK_X = 11 ; x PTOK_Y = 12 ; y PTOK_DQUOT = 13 ; " PTOK_SQUOT = 14 ; ' PTOK_HASH = 15 ; # PTOK_SCOLN = 16 ; ; PTOK_DOLR = 17 ; $ PTOK_PRCNT = 18 ; % PTOK_NUM = 19 ; 0-9 PTOK_ALPH = 20 ; a-z A-Z PTOK_OTHR = 21 ; Everything else. ; Expressions. EXPR_PLUS = 0 ; Plus. EXPR_MINUS = 1 ; Minus. EXPR_LOW = 2 ; Lower half of address. EXPR_HIGH = 3 ; Upper half of address. EXPR_NONE = 4 ; No expression. ; Data. .org lexer_data ; Jump table for parsing pre-tokens. swtab: .word ptok_dot ; PTOK_DOT .word ptok_at ; PTOK_AT .word ptok_col ; PTOK_COLON .word ptok_equ ; PTOK_EQU .word ptok_plus ; PTOK_PLUS .word ptok_min ; PTOK_MINUS .word ptok_gt ; PTOK_GT .word ptok_lt ; PTOK_LT .word ptok_lbrk ; PTOK_LBRAK .word ptok_rbrk ; PTOK_RBRAK .word ptok_com ; PTOK_COMMA .word ptok_xr ; PTOK_X .word ptok_yr ; PTOK_Y .word ptok_dqu ; PTOK_DQUOT .word ptok_squ ; PTOK_SQUOT .word ptok_hash ; PTOK_HASH .word ptok_scol ; PTOK_SCOLN .word ptok_dolr ; PTOK_DOLR .word ptok_prcn ; PTOK_PRCNT .word ptok_num ; PTOK_NUM .word ptok_alph ; PTOK_ALPH .word ptok_othr ; PTOK_OTHR ; Data entry point for utility subroutines. util_data: ; Program code. .org lexer lex: ldx #0 ; Reset X. txa ; Reset A. phy #2 ; Preserve the screen buffer index. txy ; Reset Y. sty.q idx0 ; Clear the first index. sty.q idx1 ; Clear the second index. sty.q idx2 ; Clear the third index. sty b ; Clear the isop flag. ; lda (ptr), y ; Get a character from the line. ; pha #1 ; Preserve the character. ; jsr isdigit ; Is this character a digit? ; pla #1 ; Get the character back. @getline: lda #2 ; Get the third byte, of the line table address. lsl #$10 ; Shift it by 2 bytes. ldb #1 ; Set the second pointer lda.w ltok ; to the last line. jsr set_ptr ; lda.w (ptr2) ; Get the next line. jsr set_ptr ; Set the second pointer to the next line. sta.w ctok ; Make it the current line. and #0 ; Reset A. @loop: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. jsr isdelm ; Get the delimiter. cmp #1 ; Are we at the end of the line? beq @end ; Yes, so we're done. @spaces: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. pha #1 ; Preserve the character. jsr isdelm ; Get the delimiter. and #$10 ; Is this character, a space, or tab? pla #1 ; Get the character back. beq @isstart ; No, so check for the start of the line. inc.w idx0 ; Yes, so increment the string index. cmp #' ' ; Is this character, a space? beq @incs ; Yes, so increment the starting space count. cmp #'\t' ; No, but is it a tab? beq @inct ; Yes, so increment the starting tab count. jmp @spaces ; No, so Keep looping. @incs: inc idx1 ; Increment the space count. jmp @spaces ; Keep looping. @inct: inc idx1+1 ; Increment the tab count. jmp @spaces ; Keep looping. @isstart: pha #2 ; Preserve the character. lda.w idx1 ; Was there any whitespace? pla #2 ; Get the character back. beq @switch ; No, so start lexing. cpb #1 ; Yes, and are we at the start of the line? bne @switch ; No, so start lexing. @whtspace: ldy #2 ; Yes, so set the line index to the starting whitespace counters. lda.w idx1 ; Get both indecies. sta.w (ptr2), y ; Save them in the line. and #0 ; Reset A. sta.w idx1 ; Reset the second index. deb ; Set the isstart flag to false. @switch: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get the character. jsr get_ptok ; Get the pre-token. jsr parse_ptok ; Parse the pre-token. ; beq @end ; We got to the end of the string. jmp @loop ; Keep looping. @end: ply #2 ; Get the screen buffer index back. rts ; End of lex. parse_ptok: pha #1 ; Preserve the pre-token. ldb #2 ; Set the third pointer lda.w #swtab ; to the start of the jump table. jsr set_ptr ; and #0 ; Reset A. pla #1 ; Get the pre-token back. phy #2 ; Preserve Y. lsl #1 ; Multiply the pre-token by two, to get the jump index. tay ; Get the index of the jump table. lda.w (ptr3), y ; Get the address to jump to. jsr set_ptr ; Set the third pointer to the case address. and #0 ; Reset A. tab ; Reset B. ply #2 ; Get Y back. jmp (ptr3) ; Jump to the case label. ptok_dot: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #$11 ; Set the delimiter comparison value to whitespace. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. @isop: lda b ; Has the isop flag been set? beq @dir ; No, so check for a directive. @rs: lda #TOK_RS ; Yes, so set the lexeme type to TOK_RS. sta lex_type ; ldy.w idx1 ; Get the lexeme index. dey ; Decrement the lexeme index. lda (ptr3), y ; Get the suffix character. jsr get_rs ; Get the register size. jmp @end ; We are done. @dir: lda #TOK_DIR ; Set the lexeme type to TOK_DIR. sta lex_type ; ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the first index. jsr set_lexptr ; Set up the lexeme buffer. @dir_loop: lda.w #dir ; Get pointer to the start of the directive table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. pha #8 ; Preserve the directive string pointer. jsr strcasecmp ; Is the lexeme buffer, the same as the directive string? pla #8 ; Get the directive string pointer back. beq @found ; Yes, so create a new token. ldb idx1 ; No, so Get the directive ID. cpb #6 ; Have we reached the end of the directive table? beq @end ; Yes, so we're done. inc idx1 ; No, so increment the directive ID. @getlen: jsr strlen ; Get the string's length. inx ; Add one to the length. txa ; Place it in the accumulator. clc ; Prepare for a non carrying add. adc.w idx2 ; Add the string offset to the current length sta.w idx2 ; Save the offset in the third index. jmp @dir_loop ; Keep looping. @found: nop ; @end: jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_at: rts ; End of parse_ptok. ptok_col: rts ; End of parse_ptok. ptok_equ: rts ; End of parse_ptok. ptok_plus: rts ; End of parse_ptok. ptok_min: rts ; End of parse_ptok. ptok_gt: rts ; End of parse_ptok. ptok_lt: rts ; End of parse_ptok. ptok_lbrk: rts ; End of parse_ptok. ptok_rbrk: rts ; End of parse_ptok. ptok_com: rts ; End of parse_ptok. ptok_xr: rts ; End of parse_ptok. ptok_yr: rts ; End of parse_ptok. ptok_dqu: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #4 ; Set the delimiter comparison value to a double quote. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. @term: rts ; End of parse_ptok. ptok_squ: rts ; End of parse_ptok. ptok_hash: rts ; End of parse_ptok. ptok_scol: rts ; End of parse_ptok. ptok_dolr: rts ; End of parse_ptok. ptok_prcn: rts ; End of parse_ptok. ptok_num: rts ; End of parse_ptok. ptok_alph: ldb #0 ; Do not let init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #1 ; Stop at any possible delimiter, except whitespace. tba ; Use isdelm2 for the comparison. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. lda #0 ; Reset A. sta b ; Clear the isop flag. @isop: ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the first index. jsr set_lexptr ; Set up the lexeme buffer. @isop_loop: lda.w #mne ; Get pointer to the start of the instruction table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. jsr strcasecmp ; Is the lexeme buffer, the same as the mnemonic string? beq @found ; Yes, so create a new token. ldb idx1 ; No, so Get the instruction ID. cpb #OPNUM-1 ; Have we reached the end of the instruction table? beq @end ; Yes, so we're done. inc idx1 ; No, so increment the instruction ID. @offset: lda #13 ; Get the base size of the instruction table. clc ; Prepare for a non carrying multiply. mul idx1 ; Multiply the base offset, by the instruction ID. sta.w idx2 ; Save the offset in the third index. jmp @isop_loop ; Keep looping. @found: lda #TOK_MNE ; Set the lexeme type to TOK_MNE. sta lex_type ; inc b ; Set the isop flag. @end: jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_othr: rts ; End of parse_ptok. set_lexptr: lda.d #lexeme ; Set the pointer to the lexeme buffer. jsr set_ptr ; and #0 ; Reset A. tab ; Reset B. sta.q idx1 ; Reset the second index. rts ; End of set_lexptr. set_cmdbuf: ldb #0 ; Set the first pointer lda.d #cmd_buf ; to the command buffer. jsr set_ptr ; and #0 ; Reset A. tab ; Reset B. rts ; End of set_cmdbuf. init_lex: cpb #0 ; Do we need to increment the string index? beq @init ; No, so skip that step. @inc_str: inc.w idx0 ; Yes, so increment the string index. @init: lda #0 ; Reset A. sta.q idx1 ; Reset the second index sta.q idx2 ; Reset the third index ldb #2 ; Make the lexeme buffer, the third pointer. jsr set_lexptr ; Set up the lexeme buffer. rts ; End of init_lex. delmcpy: sta a ; Save the delimiter check flag. stb c ; Save the delimiter comparison value. @loop: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. pha #1 ; Preserve the character. lda a ; Are we calling isdelm2? pla #1 ; Get the character back. bne @isdelm2 ; Yes, so use isdelm2. jsr isdelm ; No, so get the delimiter value from isdelm. and c ; Are both delimiter values, the same? bne @end ; Yes, so we're done. jmp @copy ; No, so start copying the character. @isdelm2: jsr isdelm2 ; Get the delimiter value from isdelm2. cmp c ; Are both delimiter values, the same? beq @end ; Yes, so we're done. @copy: lda (ptr), y ; Get a character from the line. ldy.w idx1 ; Get the lexeme index. sta (ptr3), y ; Copy the character to the lexeme buffer. inc.w idx0 ; Increment the string index. inc.w idx1 ; Increment the lexeme index. jmp @loop ; Keep looping. @end: lda #0 ; Terminate the lexeme buffer. sta (ptr3), y ; rts ; End of delmcpy. get_rs: phb #1 ; Preserve B. ldb #0 ; Set the isop flag to false. plb #1 ; Get B back. jsr tolower ; Convert the character to lowercase. cmp #'w' ; Is it .w? beq @r1 ; Yes, so return 1. cmp #'d' ; No, but was it .d? beq @r2 ; Yes, so return 2. cmp #'q' ; No, but was it .d? beq @r3 ; Yes, so return 3. @r0: lda #0 ; Return 0. rts ; End of get_rs. @r1: lda #1 ; Return 1. rts ; End of get_rs. @r2: lda #2 ; Return 2. rts ; End of get_rs. @r3: lda #3 ; Return 3. rts ; End of get_rs. make_tok: nop ; @end: rts ; End of make_tok. ; Entry point for utility subroutines. utils: