; Lexer, and supporting routines for SuBAsm. ; Enums. ; Directives. DIR_ORG = 0 ; Origin. DIR_BYTE = 1 ; Byte = 8 bits. DIR_WORD = 2 ; Word = 16 bits. DIR_DWORD = 3 ; Dword = 32 bits. DIR_QWORD = 4 ; Qword = 64 bits. DIR_INCL = 5 ; Include. ; Tokens. TOK_DIR = 0 ; Directive. TOK_LOCAL = 1 ; Local syobol. TOK_LABEL = 2 ; Label. TOK_SYM = 3 ; Symbol. TOK_EXPR = 4 ; Expression. TOK_CSV = 5 ; Comma separated value. TOK_STR = 6 ; String. TOK_CHAR = 7 ; Character. TOK_IND = 8 ; Indirect addressing. TOK_IMM = 9 ; Immediate data. TOK_MNE = 10 ; Opcode/Mnemonic. TOK_RS = 11 ; Register size prefix. TOK_COMM = 12 ; Comment. TOK_HEX = 13 ; Hex value. TOK_DEC = 14 ; Decimal value. TOK_BIN = 15 ; Binary value. TOK_INCL = 16 ; Include file. ; Pre-Tokens. PTOK_DOT = 0 ; . PTOK_AT = 1 ; @ PTOK_COLON = 2 ; : PTOK_EQU = 3 ; = PTOK_PLUS = 4 ; + PTOK_MINUS = 5 ; - PTOK_GT = 6 ; > PTOK_LT = 7 ; < PTOK_LBRAK = 8 ; ( PTOK_RBRAK = 9 ; ) PTOK_COMMA = 10 ; , PTOK_X = 11 ; x PTOK_Y = 12 ; y PTOK_DQUOT = 13 ; " PTOK_SQUOT = 14 ; ' PTOK_HASH = 15 ; # PTOK_SCOLN = 16 ; ; PTOK_DOLR = 17 ; $ PTOK_PRCNT = 18 ; % PTOK_NUM = 19 ; 0-9 PTOK_ALPH = 20 ; a-z A-Z PTOK_OTHR = 21 ; Everything else. ; Expressions. EXPR_PLUS = 0 ; Plus. EXPR_MINUS = 1 ; Minus. EXPR_LOW = 2 ; Lower half of address. EXPR_HIGH = 3 ; Upper half of address. EXPR_NONE = 4 ; No expression. ; Data. .org lexer_data ; Jump table for parsing pre-tokens. swtab: .word ptok_dot ; PTOK_DOT .word ptok_at ; PTOK_AT .word ptok_col ; PTOK_COLON .word ptok_equ ; PTOK_EQU .word ptok_plus ; PTOK_PLUS .word ptok_min ; PTOK_MINUS .word ptok_gt ; PTOK_GT .word ptok_lt ; PTOK_LT .word ptok_lbrk ; PTOK_LBRAK .word ptok_rbrk ; PTOK_RBRAK .word ptok_com ; PTOK_COMMA .word ptok_xr ; PTOK_X .word ptok_yr ; PTOK_Y .word ptok_dqu ; PTOK_DQUOT .word ptok_squ ; PTOK_SQUOT .word ptok_hash ; PTOK_HASH .word ptok_scol ; PTOK_SCOLN .word ptok_dolr ; PTOK_DOLR .word ptok_prcn ; PTOK_PRCNT .word ptok_num ; PTOK_NUM .word ptok_alph ; PTOK_ALPH .word ptok_othr ; PTOK_OTHR ; Data entry point for utility subroutines. util_data: ; Program code. .org lexer lex: ldx #0 ; Reset X. txa ; Reset A. phy.w ; Preserve the screen buffer index. txy ; Reset Y. sty.q idx0 ; Clear the first index. sty.q idx1 ; Clear the second index. sty.q idx2 ; Clear the third index. sty b ; Clear the isop flag. ; lda (ptr), y ; Get a character from the line. ; pha ; Preserve the character. ; jsr isdigit ; Is this character a digit? ; pla ; Get the character back. @getline: lda #2 ; Get the third byte, of the line table address. lsl #$10 ; Shift it by 2 bytes. ldb #1 ; Set the second pointer lda.w ltok ; to the last line. jsr set_ptr ; lda.w (ptr2) ; Get the next line. jsr set_ptr ; Set the second pointer to the next line. sta.w ctok ; Make it the current line. and #0 ; Reset A. @loop: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. jsr isdelm ; Get the delimiter. cmp #1 ; Are we at the end of the line? beq @end ; Yes, so we're done. @spaces: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. pha ; Preserve the character. jsr isdelm ; Get the delimiter. and #$10 ; Is this character, a space, or tab? pla ; Get the character back. beq @isstart ; No, so check for the start of the line. inc.w idx0 ; Yes, so increment the string index. cmp #' ' ; Is this character, a space? beq @incs ; Yes, so increment the starting space count. cmp #'\t' ; No, but is it a tab? beq @inct ; Yes, so increment the starting tab count. bra @spaces ; No, so Keep looping. @incs: inc idx1 ; Increment the space count. bra @spaces ; Keep looping. @inct: inc idx1+1 ; Increment the tab count. bra @spaces ; Keep looping. @isstart: pha.w ; Preserve the character. lda.w idx1 ; Was there any whitespace? pla.w ; Get the character back. beq @switch ; No, so start lexing. cpb #1 ; Yes, and are we at the start of the line? bne @switch ; No, so start lexing. @whtspace: ldy #2 ; Yes, so set the line index to the starting whitespace counters. lda.w idx1 ; Get both indecies. sta.w (ptr2), y ; Save them in the line. and #0 ; Reset A. sta.w idx1 ; Reset the second index. deb ; Set the isstart flag to false. @switch: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get the character. jsr get_ptok ; Get the pre-token. jsr parse_ptok ; Parse the pre-token. ; beq @end ; We got to the end of the string. bra @loop ; Keep looping. @end: ply.w ; Get the screen buffer index back. rts ; End of lex. parse_ptok: pha ; Preserve the pre-token. ldb #2 ; Set the third pointer lda.w #swtab ; to the start of the jump table. jsr set_ptr ; and #0 ; Reset A. pla ; Get the pre-token back. phy.w ; Preserve Y. lsl #1 ; Multiply the pre-token by two, to get the jump index. tay ; Get the index of the jump table. lda.w (ptr3), y ; Get the address to jump to. jsr set_ptr ; Set the third pointer to the case address. and #0 ; Reset A. tab ; Reset B. ply.w ; Get Y back. jmp (ptr3) ; Jump to the case label. ptok_dot: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #$11 ; Set the delimiter comparison value to whitespace. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. @isop: lda b ; Has the isop flag been set? beq @dir ; No, so check for a directive. @rs: lda #TOK_RS ; Yes, so set the lexeme type to TOK_RS. sta lex_type ; ldy.w idx1 ; Get the lexeme index. dey ; Decrement the lexeme index. lda (ptr3), y ; Get the suffix character. jsr get_rs ; Get the register size. bra @end ; We are done. @dir: lda #TOK_DIR ; Set the lexeme type to TOK_DIR. sta lex_type ; ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the first index. jsr set_lexptr ; Set up the lexeme buffer. @dir_loop: lda.w #dir ; Get pointer to the start of the directive table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. pha.q ; Preserve the directive string pointer. jsr strcasecmp ; Is the lexeme buffer, the same as the directive string? pla.q ; Get the directive string pointer back. beq @found ; Yes, so create a new token. ldb idx1 ; No, so Get the directive ID. cpb #6 ; Have we reached the end of the directive table? beq @end ; Yes, so we're done. inc idx1 ; No, so increment the directive ID. @getlen: jsr strlen ; Get the string's length. inx ; Add one to the length. txa ; Place it in the accumulator. clc ; Prepare for a non carrying add. adc.w idx2 ; Add the string offset to the current length sta.w idx2 ; Save the offset in the third index. bra @dir_loop ; Keep looping. @found: nop ; @end: jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_at: inc.w idx0 ; rts ; End of parse_ptok. ptok_col: inc.w idx0 ; rts ; End of parse_ptok. ptok_equ: inc.w idx0 ; rts ; End of parse_ptok. ptok_plus: inc.w idx0 ; rts ; End of parse_ptok. ptok_min: inc.w idx0 ; rts ; End of parse_ptok. ptok_gt: inc.w idx0 ; rts ; End of parse_ptok. ptok_lt: inc.w idx0 ; rts ; End of parse_ptok. ptok_lbrk: inc.w idx0 ; rts ; End of parse_ptok. ptok_rbrk: inc.w idx0 ; rts ; End of parse_ptok. ptok_com: inc.w idx0 ; rts ; End of parse_ptok. ptok_xr: inc.w idx0 ; rts ; End of parse_ptok. ptok_yr: inc.w idx0 ; rts ; End of parse_ptok. ptok_dqu: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #4 ; Set the delimiter comparison value to a double quote. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. @end: rts ; End of parse_ptok. ptok_squ: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #8 ; Set the delimiter comparison value to a single quote. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. @end: rts ; End of parse_ptok. ptok_hash: inc.w idx0 ; rts ; End of parse_ptok. ptok_scol: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. jsr delmcpy ; Copy the string, to the lexeme buffer, until EOL. @end: rts ; End of parse_ptok. ptok_dolr: lda #TOK_HEX ; Set the lexeme type to TOK_HEX. sta lex_type ; lda #$10 ; Set the base to Hexadecimal. ldb #1 ; Make init_lex increment the string index. bra ptok_num2 ; Parse the value. ptok_prcn: lda #TOK_BIN ; Set the lexeme type to TOK_BIN. sta lex_type ; lda #2 ; Set the base to Binary. ldb #1 ; Make init_lex increment the string index. bra ptok_num2 ; Parse the value. ptok_num: lda #TOK_DEC ; Set the lexeme type to TOK_DEC. sta lex_type ; lda #10 ; Set the base to Decimal. ldb #0 ; Do not let init_lex increment the string index. ptok_num2: pha ; Preserve the base. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #3 ; Set the delimiter to both the EOL, or a comma. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. pla ; Get the base back. jsr strtoull ; Convert the string into a numeric value. jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_alph: ldb #0 ; Do not let init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #3 ; Stop at any possible delimiter. tba ; Use isdelm2 for the comparison. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. lda #0 ; Reset A. sta b ; Clear the isop flag. @isop: ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the second index. jsr set_lexptr ; Set up the lexeme buffer. @isop_loop: lda.w #mne ; Get pointer to the start of the instruction table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. jsr strcasecmp ; Is the lexeme buffer, the same as the mnemonic string? beq @found ; Yes, so create a new token. ldb idx1 ; No, so Get the instruction ID. cpb #OPNUM-1 ; Have we reached the end of the instruction table? beq @end ; Yes, so we're done. inc idx1 ; No, so increment the instruction ID. @offset: lda #13 ; Get the base size of the instruction table. clc ; Prepare for a non carrying multiply. mul idx1 ; Multiply the base offset, by the instruction ID. sta.w idx2 ; Save the offset in the third index. bra @isop_loop ; Keep looping. @found: lda #TOK_MNE ; Set the lexeme type to TOK_MNE. sta lex_type ; inc b ; Set the isop flag. @end: jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_othr: inc.w idx0 ; rts ; End of parse_ptok. set_lexptr: lda.d #lexeme ; Set the pointer to the lexeme buffer. jsr set_ptr ; and #0 ; Reset A. tab ; Reset B. sta.q idx1 ; Reset the second index. sta.q idx2 ; Reset the third index rts ; End of set_lexptr. set_cmdbuf: ldb #0 ; Set the first pointer lda.d #cmd_buf ; to the command buffer. jsr set_ptr ; and #0 ; Reset A. tab ; Reset B. rts ; End of set_cmdbuf. init_lex: cpb #0 ; Do we need to increment the string index? beq @init ; No, so skip that step. @inc_str: inc.w idx0 ; Yes, so increment the string index. @init: ldb #2 ; Make the lexeme buffer, the third pointer. jsr set_lexptr ; Set up the lexeme buffer. phy.w ; Preserve Y. tay ; Reset Y. @loop: lda (ptr3), y ; Have we hit the end of the previous lexeme string? beq @end ; Yes, so we're done. lda #0 ; No, so start clearing the character. sta (ptr3), y ; Clear the character. iny ; Increment the lexeme index. bra @loop ; Keep looping. @end: ply.w ; Get Y back. rts ; End of init_lex. delmcpy: sta a ; Save the delimiter check flag. stb c ; Save the delimiter comparison value. @loop: ldb #0 ; Reset the B register. stb g ; Reset the byte count. ldy.w idx0 ; Get the string index. lda.q (ptr), y ; Get eight bytes from the current line. @loop1: pha.q ; Save the string buffer. and #$FF ; Get the current byte. pha ; Preserve the character. lda a ; Are we calling isdelm2? pla ; Get the character back. bne @isdelm2 ; Yes, so use isdelm2. jsr isdelm ; No, so get the delimiter value from isdelm. @delmchk: and c ; Are both delimiter values, the same? pla.q ; Get back the string buffer. bne @end ; Yes, so we're done. bra @copy ; No, so start copying the character. @isdelm2: jsr isdelm2 ; Get the delimiter value from isdelm2. bra @delmchk ; Check the delimiter. @copy: ldy.w idx1 ; Get the lexeme index. sta (ptr3), y ; Copy one byte from the screen buffer, to the command buffer. inc.w idx0 ; Increment the string index. inc.w idx1 ; Increment the lexeme index. lsr #8 ; Shift in the next byte. inc g ; Increment the byte count. ldb g ; Get back the byte count. cpb #7 ; Did we shift in eight bytes? beq @loop ; Yes, so get eight more bytes. bra @loop1 ; No, so keep shifting in more bytes. @end: ldb #0 ; Reset B. ldy.w idx1 ; Get the lexeme index. stb (ptr3), y ; Terminate the command buffer. @end1: ldy.w idx0 ; Get the string index. tba ; Reset A. rts ; End of delmcpy. ;@loop: ; ldy.w idx0 ; Get the string index. ; lda (ptr), y ; Get a character from the line. ; pha ; Preserve the character. ; lda a ; Are we calling isdelm2? ; pla ; Get the character back. ; bne @isdelm2 ; Yes, so use isdelm2. ; jsr isdelm ; No, so get the delimiter value from isdelm. ;@delmchk: ; and c ; Are both delimiter values, the same? ; bne @end ; Yes, so we're done. ; bra @copy ; No, so start copying the character. ;@isdelm2: ; jsr isdelm2 ; Get the delimiter value from isdelm2. ; bra @delmchk ; Check the delimiter. ;@copy: ; lda (ptr), y ; Get a character from the line. ; ldy.w idx1 ; Get the lexeme index. ; sta (ptr3), y ; Copy the character to the lexeme buffer. ; inc.w idx0 ; Increment the string index. ; inc.w idx1 ; Increment the lexeme index. ; bra @loop ; Keep looping. ;@end: ; ldy.w idx1 ; Get the lexeme index. ; lda #0 ; Terminate the lexeme buffer. ; sta (ptr3), y ; ; ldy.w idx0 ; Get the string index. ; rts ; End of delmcpy. get_rs: phb ; Preserve B. ldb #0 ; Set the isop flag to false. plb ; Get B back. jsr tolower ; Convert the character to lowercase. cmp #'w' ; Is it .w? beq @r1 ; Yes, so return 1. cmp #'d' ; No, but was it .d? beq @r2 ; Yes, so return 2. cmp #'q' ; No, but was it .d? beq @r3 ; Yes, so return 3. @r0: lda #0 ; Return 0. rts ; End of get_rs. @r1: lda #1 ; Return 1. rts ; End of get_rs. @r2: lda #2 ; Return 2. rts ; End of get_rs. @r3: lda #3 ; Return 3. rts ; End of get_rs. make_tok: nop ; @end: rts ; End of make_tok. ; Entry point for utility subroutines. utils: