; Lexer, and supporting routines for SuBAsm. ; Enums. ; Directives. DIR_ORG = 0 ; Origin. DIR_BYTE = 1 ; Byte = 8 bits. DIR_WORD = 2 ; Word = 16 bits. DIR_DWORD = 3 ; Dword = 32 bits. DIR_QWORD = 4 ; Qword = 64 bits. DIR_INCL = 5 ; Include. ; Tokens. TOK_DIR = 0 ; Directive. TOK_LOCAL = 1 ; Local syobol. TOK_LABEL = 2 ; Label. TOK_SYM = 3 ; Symbol. TOK_EXPR = 4 ; Expression. TOK_CSV = 5 ; Comma separated value. TOK_STR = 6 ; String. TOK_CHAR = 7 ; Character. TOK_IND = 8 ; Indirect addressing. TOK_IMM = 9 ; Immediate data. TOK_MNE = 10 ; Opcode/Mnemonic. TOK_RS = 11 ; Register size prefix. TOK_COMM = 12 ; Comment. TOK_HEX = 13 ; Hex value. TOK_DEC = 14 ; Decimal value. TOK_BIN = 15 ; Binary value. TOK_INCL = 16 ; Include file. ; Pre-Tokens. PTOK_DOT = 0 ; . PTOK_AT = 1 ; @ PTOK_COLON = 2 ; : PTOK_EQU = 3 ; = PTOK_PLUS = 4 ; + PTOK_MINUS = 5 ; - PTOK_GT = 6 ; > PTOK_LT = 7 ; < PTOK_LBRAK = 8 ; ( PTOK_RBRAK = 9 ; ) PTOK_COMMA = 10 ; , PTOK_X = 11 ; x PTOK_Y = 12 ; y PTOK_DQUOT = 13 ; " PTOK_SQUOT = 14 ; ' PTOK_HASH = 15 ; # PTOK_SCOLN = 16 ; ; PTOK_DOLR = 17 ; $ PTOK_PRCNT = 18 ; % PTOK_NUM = 19 ; 0-9 PTOK_ALPH = 20 ; a-z A-Z PTOK_OTHR = 21 ; Everything else. ; Expressions. EXPR_PLUS = 0 ; Plus. EXPR_MINUS = 1 ; Minus. EXPR_LOW = 2 ; Lower half of address. EXPR_HIGH = 3 ; Upper half of address. EXPR_NONE = 4 ; No expression. ; Data. .org lexer_data ; Jump table for parsing pre-tokens. swtab: .word ptok_dot ; PTOK_DOT .word ptok_at ; PTOK_AT .word ptok_col ; PTOK_COLON .word ptok_equ ; PTOK_EQU .word ptok_plus ; PTOK_PLUS .word ptok_min ; PTOK_MINUS .word ptok_gt ; PTOK_GT .word ptok_lt ; PTOK_LT .word ptok_lbrk ; PTOK_LBRAK .word ptok_rbrk ; PTOK_RBRAK .word ptok_com ; PTOK_COMMA .word ptok_xr ; PTOK_X .word ptok_yr ; PTOK_Y .word ptok_dqu ; PTOK_DQUOT .word ptok_squ ; PTOK_SQUOT .word ptok_hash ; PTOK_HASH .word ptok_scol ; PTOK_SCOLN .word ptok_dolr ; PTOK_DOLR .word ptok_prcn ; PTOK_PRCNT .word ptok_num ; PTOK_NUM .word ptok_alph ; PTOK_ALPH .word ptok_othr ; PTOK_OTHR ; Data entry point for utility subroutines. util_data: ; Program code. .org lexer lex: ldx #0 ; Reset X. txa ; Reset A. phy #2 ; Preserve the screen buffer index. txy ; Reset Y. sty.q idx0 ; Clear the first index. sty.q idx1 ; Clear the second index. sty.q idx2 ; Clear the third index. ; lda (ptr), y ; Get a character from the line. ; pha #1 ; Preserve the character. ; jsr isdigit ; Is this character a digit? ; pla #1 ; Get the character back. @getline: lda #2 ; Get the third byte, of the line table address. lsl #$10 ; Shift it by 2 bytes. ldb #1 ; Set the second pointer lda.w ltok ; to the last line. jsr set_ptr ; lda.w (ptr2) ; Get the next line. jsr set_ptr ; Set the second pointer to the next line. sta.w ctok ; Make it the current line. and #0 ; Reset A. @loop: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. jsr isdelm ; Get the delimiter. cmp #1 ; Are we at the end of the line? beq @end ; Yes, so we're done. @spaces: ldy.w idx0 ; Get the string index. inc.w idx0 ; Increment the string index. lda (ptr), y ; Get a character from the line. pha #1 ; Preserve the character. jsr isdelm ; Get the delimiter. and #$10 ; Is this character, a space, or tab? pla #1 ; Get the character back. beq @isstart ; No, so check for the start of the line. cmp #' ' ; Is this character, a space? beq @incs ; Yes, so increment the starting space count. cmp #'\t' ; No, but is it a tab? beq @inct ; Yes, so increment the starting tab count. jmp @spaces ; No, so keep looping. @incs: inc idx1 ; Increment the space count. jmp @spaces ; Keep looping. @inct: inc idx1+1 ; Increment the tab count. jmp @spaces ; Keep looping. @isstart: pha #2 ; Preserve the character. lda.w idx1 ; Was there any whitespace? pla #2 ; Get the character back. beq @switch ; No, so start lexing. cpb #1 ; Yes, and are we at the start of the line? bne @switch ; No, so start lexing. @whtspace: ldy #2 ; Yes, so set the line index to the starting whitespace counters. lda.w idx1 ; Get both indecies. sta.w (ptr2), y ; Save them in the line. and #0 ; Reset A. sta.w idx1 ; Reset the second index. deb ; Set the isstart flag to false. @switch: jsr get_ptok ; Get the pre-token. jsr parse_ptok ; Parse the pre-token. beq @end ; We got to the end of the string. jmp @loop ; Keep looping. @end: ply #2 ; Get the screen buffer index back. rts ; End of lex. parse_ptok: pha #1 ; Preserve the pre-token. ldb #2 ; Set the third pointer lda.w #swtab ; to the start of the jump table. jsr set_ptr ; and #0 ; Reset A. pla #1 ; Get the pre-token back. phy #2 ; Preserve Y. lsl #1 ; Multiply the pre-token by two, to get the jump index. tay ; Get the index of the jump table. lda.w (ptr3), y ; Get the address to jump to. jsr set_ptr ; Set the third pointer to the case address. and #0 ; Reset A. tab ; Reset B. ply #2 ; Get Y back. jmp (ptr3) ; Jump to the case label. ptok_dot: rts ; End of parse_ptok. ptok_at: rts ; End of parse_ptok. ptok_col: rts ; End of parse_ptok. ptok_equ: rts ; End of parse_ptok. ptok_plus: rts ; End of parse_ptok. ptok_min: rts ; End of parse_ptok. ptok_gt: rts ; End of parse_ptok. ptok_lt: rts ; End of parse_ptok. ptok_lbrk: rts ; End of parse_ptok. ptok_rbrk: rts ; End of parse_ptok. ptok_com: rts ; End of parse_ptok. ptok_xr: rts ; End of parse_ptok. ptok_yr: rts ; End of parse_ptok. ptok_dqu: rts ; End of parse_ptok. ptok_squ: rts ; End of parse_ptok. ptok_hash: rts ; End of parse_ptok. ptok_scol: rts ; End of parse_ptok. ptok_dolr: rts ; End of parse_ptok. ptok_prcn: rts ; End of parse_ptok. ptok_num: rts ; End of parse_ptok. ptok_alph: rts ; End of parse_ptok. ptok_othr: rts ; End of parse_ptok. ; Entry point for utility subroutines. utils: