diff options
Diffstat (limited to 'programs/lexer.s')
-rw-r--r-- | programs/lexer.s | 231 |
1 files changed, 231 insertions, 0 deletions
diff --git a/programs/lexer.s b/programs/lexer.s new file mode 100644 index 0000000..cd5bb54 --- /dev/null +++ b/programs/lexer.s @@ -0,0 +1,231 @@ +; Lexer, and supporting routines for SuBAsm. + +; Enums. + +; Directives. +DIR_ORG = 0 ; Origin. +DIR_BYTE = 1 ; Byte = 8 bits. +DIR_WORD = 2 ; Word = 16 bits. +DIR_DWORD = 3 ; Dword = 32 bits. +DIR_QWORD = 4 ; Qword = 64 bits. +DIR_INCL = 5 ; Include. + +; Tokens. +TOK_DIR = 0 ; Directive. +TOK_LOCAL = 1 ; Local syobol. +TOK_LABEL = 2 ; Label. +TOK_SYM = 3 ; Symbol. +TOK_EXPR = 4 ; Expression. +TOK_CSV = 5 ; Comma separated value. +TOK_STR = 6 ; String. +TOK_CHAR = 7 ; Character. +TOK_IND = 8 ; Indirect addressing. +TOK_IMM = 9 ; Immediate data. +TOK_MNE = 10 ; Opcode/Mnemonic. +TOK_RS = 11 ; Register size prefix. +TOK_COMM = 12 ; Comment. +TOK_HEX = 13 ; Hex value. +TOK_DEC = 14 ; Decimal value. +TOK_BIN = 15 ; Binary value. +TOK_INCL = 16 ; Include file. + +; Pre-Tokens. +PTOK_DOT = 0 ; . +PTOK_AT = 1 ; @ +PTOK_COLON = 2 ; : +PTOK_EQU = 3 ; = +PTOK_PLUS = 4 ; + +PTOK_MINUS = 5 ; - +PTOK_GT = 6 ; > +PTOK_LT = 7 ; < +PTOK_LBRAK = 8 ; ( +PTOK_RBRAK = 9 ; ) +PTOK_COMMA = 10 ; , +PTOK_X = 11 ; x +PTOK_Y = 12 ; y +PTOK_DQUOT = 13 ; " +PTOK_SQUOT = 14 ; ' +PTOK_HASH = 15 ; # +PTOK_SCOLN = 16 ; ; +PTOK_DOLR = 17 ; $ +PTOK_PRCNT = 18 ; % +PTOK_NUM = 19 ; 0-9 +PTOK_ALPH = 20 ; a-z A-Z +PTOK_OTHR = 21 ; Everything else. + +; Expressions. +EXPR_PLUS = 0 ; Plus. +EXPR_MINUS = 1 ; Minus. +EXPR_LOW = 2 ; Lower half of address. +EXPR_HIGH = 3 ; Upper half of address. +EXPR_NONE = 4 ; No expression. + + +; Data. +.org lexer_data +; Jump table for parsing pre-tokens. +swtab: + .word ptok_dot ; PTOK_DOT + .word ptok_at ; PTOK_AT + .word ptok_col ; PTOK_COLON + .word ptok_equ ; PTOK_EQU + .word ptok_plus ; PTOK_PLUS + .word ptok_min ; PTOK_MINUS + .word ptok_gt ; PTOK_GT + .word ptok_lt ; PTOK_LT + .word ptok_lbrk ; PTOK_LBRAK + .word ptok_rbrk ; PTOK_RBRAK + .word ptok_com ; PTOK_COMMA + .word ptok_xr ; PTOK_X + .word ptok_yr ; PTOK_Y + .word ptok_dqu ; PTOK_DQUOT + .word ptok_squ ; PTOK_SQUOT + .word ptok_hash ; PTOK_HASH + .word ptok_scol ; PTOK_SCOLN + .word ptok_dolr ; PTOK_DOLR + .word ptok_prcn ; PTOK_PRCNT + .word ptok_num ; PTOK_NUM + .word ptok_alph ; PTOK_ALPH + .word ptok_othr ; PTOK_OTHR + +; Data entry point for utility subroutines. +util_data: + + +; Program code. +.org lexer +lex: + ldx #0 ; Reset X. + txa ; Reset A. + phy #2 ; Preserve the screen buffer index. + txy ; Reset Y. + sty.q idx0 ; Clear the first index. + sty.q idx1 ; Clear the second index. + sty.q idx2 ; Clear the third index. +; lda (ptr), y ; Get a character from the line. +; pha #1 ; Preserve the character. +; jsr isdigit ; Is this character a digit? +; pla #1 ; Get the character back. +@getline: + lda #2 ; Get the third byte, of the line table address. + lsl #$10 ; Shift it by 2 bytes. + ldb #1 ; Set the second pointer + lda.w ltok ; to the last line. + jsr set_ptr ; + lda.w (ptr2) ; Get the next line. + jsr set_ptr ; Set the second pointer to the next line. + sta.w ctok ; Make it the current line. + and #0 ; Reset A. +@loop: + ldy.w idx0 ; Get the string index. + lda (ptr), y ; Get a character from the line. + jsr isdelm ; Get the delimiter. + cmp #1 ; Are we at the end of the line? + beq @end ; Yes, so we're done. +@spaces: + ldy.w idx0 ; Get the string index. + inc.w idx0 ; Increment the string index. + lda (ptr), y ; Get a character from the line. + pha #1 ; Preserve the character. + jsr isdelm ; Get the delimiter. + and #$10 ; Is this character, a space, or tab? + pla #1 ; Get the character back. + beq @isstart ; No, so check for the start of the line. + cmp #' ' ; Is this character, a space? + beq @incs ; Yes, so increment the starting space count. + cmp #'\t' ; No, but is it a tab? + beq @inct ; Yes, so increment the starting tab count. + jmp @spaces ; No, so keep looping. +@incs: + inc idx1 ; Increment the space count. + jmp @spaces ; Keep looping. +@inct: + inc idx1+1 ; Increment the tab count. + jmp @spaces ; Keep looping. +@isstart: + pha #2 ; Preserve the character. + lda.w idx1 ; Was there any whitespace? + pla #2 ; Get the character back. + beq @switch ; No, so start lexing. + cpb #1 ; Yes, and are we at the start of the line? + bne @switch ; No, so start lexing. +@whtspace: + ldy #2 ; Yes, so set the line index to the starting whitespace counters. + lda.w idx1 ; Get both indecies. + sta.w (ptr2), y ; Save them in the line. + and #0 ; Reset A. + sta.w idx1 ; Reset the second index. + deb ; Set the isstart flag to false. +@switch: + jsr get_ptok ; Get the pre-token. + jsr parse_ptok ; Parse the pre-token. + beq @end ; We got to the end of the string. + jmp @loop ; Keep looping. +@end: + ply #2 ; Get the screen buffer index back. + rts ; End of lex. + +parse_ptok: + pha #1 ; Preserve the pre-token. + ldb #2 ; Set the third pointer + lda.w #swtab ; to the start of the jump table. + jsr set_ptr ; + and #0 ; Reset A. + pla #1 ; Get the pre-token back. + phy #2 ; Preserve Y. + lsl #1 ; Multiply the pre-token by two, to get the jump index. + tay ; Get the index of the jump table. + lda.w (ptr3), y ; Get the address to jump to. + jsr set_ptr ; Set the third pointer to the case address. + and #0 ; Reset A. + tab ; Reset B. + ply #2 ; Get Y back. + jmp (ptr3) ; Jump to the case label. +ptok_dot: + rts ; End of parse_ptok. +ptok_at: + rts ; End of parse_ptok. +ptok_col: + rts ; End of parse_ptok. +ptok_equ: + rts ; End of parse_ptok. +ptok_plus: + rts ; End of parse_ptok. +ptok_min: + rts ; End of parse_ptok. +ptok_gt: + rts ; End of parse_ptok. +ptok_lt: + rts ; End of parse_ptok. +ptok_lbrk: + rts ; End of parse_ptok. +ptok_rbrk: + rts ; End of parse_ptok. +ptok_com: + rts ; End of parse_ptok. +ptok_xr: + rts ; End of parse_ptok. +ptok_yr: + rts ; End of parse_ptok. +ptok_dqu: + rts ; End of parse_ptok. +ptok_squ: + rts ; End of parse_ptok. +ptok_hash: + rts ; End of parse_ptok. +ptok_scol: + rts ; End of parse_ptok. +ptok_dolr: + rts ; End of parse_ptok. +ptok_prcn: + rts ; End of parse_ptok. +ptok_num: + rts ; End of parse_ptok. +ptok_alph: + rts ; End of parse_ptok. +ptok_othr: + rts ; End of parse_ptok. + + +; Entry point for utility subroutines. +utils: |