summaryrefslogtreecommitdiff
path: root/programs/lexer.s
diff options
context:
space:
mode:
Diffstat (limited to 'programs/lexer.s')
-rw-r--r--programs/lexer.s231
1 files changed, 231 insertions, 0 deletions
diff --git a/programs/lexer.s b/programs/lexer.s
new file mode 100644
index 0000000..cd5bb54
--- /dev/null
+++ b/programs/lexer.s
@@ -0,0 +1,231 @@
+; Lexer, and supporting routines for SuBAsm.
+
+; Enums.
+
+; Directives.
+DIR_ORG = 0 ; Origin.
+DIR_BYTE = 1 ; Byte = 8 bits.
+DIR_WORD = 2 ; Word = 16 bits.
+DIR_DWORD = 3 ; Dword = 32 bits.
+DIR_QWORD = 4 ; Qword = 64 bits.
+DIR_INCL = 5 ; Include.
+
+; Tokens.
+TOK_DIR = 0 ; Directive.
+TOK_LOCAL = 1 ; Local syobol.
+TOK_LABEL = 2 ; Label.
+TOK_SYM = 3 ; Symbol.
+TOK_EXPR = 4 ; Expression.
+TOK_CSV = 5 ; Comma separated value.
+TOK_STR = 6 ; String.
+TOK_CHAR = 7 ; Character.
+TOK_IND = 8 ; Indirect addressing.
+TOK_IMM = 9 ; Immediate data.
+TOK_MNE = 10 ; Opcode/Mnemonic.
+TOK_RS = 11 ; Register size prefix.
+TOK_COMM = 12 ; Comment.
+TOK_HEX = 13 ; Hex value.
+TOK_DEC = 14 ; Decimal value.
+TOK_BIN = 15 ; Binary value.
+TOK_INCL = 16 ; Include file.
+
+; Pre-Tokens.
+PTOK_DOT = 0 ; .
+PTOK_AT = 1 ; @
+PTOK_COLON = 2 ; :
+PTOK_EQU = 3 ; =
+PTOK_PLUS = 4 ; +
+PTOK_MINUS = 5 ; -
+PTOK_GT = 6 ; >
+PTOK_LT = 7 ; <
+PTOK_LBRAK = 8 ; (
+PTOK_RBRAK = 9 ; )
+PTOK_COMMA = 10 ; ,
+PTOK_X = 11 ; x
+PTOK_Y = 12 ; y
+PTOK_DQUOT = 13 ; "
+PTOK_SQUOT = 14 ; '
+PTOK_HASH = 15 ; #
+PTOK_SCOLN = 16 ; ;
+PTOK_DOLR = 17 ; $
+PTOK_PRCNT = 18 ; %
+PTOK_NUM = 19 ; 0-9
+PTOK_ALPH = 20 ; a-z A-Z
+PTOK_OTHR = 21 ; Everything else.
+
+; Expressions.
+EXPR_PLUS = 0 ; Plus.
+EXPR_MINUS = 1 ; Minus.
+EXPR_LOW = 2 ; Lower half of address.
+EXPR_HIGH = 3 ; Upper half of address.
+EXPR_NONE = 4 ; No expression.
+
+
+; Data.
+.org lexer_data
+; Jump table for parsing pre-tokens.
+swtab:
+ .word ptok_dot ; PTOK_DOT
+ .word ptok_at ; PTOK_AT
+ .word ptok_col ; PTOK_COLON
+ .word ptok_equ ; PTOK_EQU
+ .word ptok_plus ; PTOK_PLUS
+ .word ptok_min ; PTOK_MINUS
+ .word ptok_gt ; PTOK_GT
+ .word ptok_lt ; PTOK_LT
+ .word ptok_lbrk ; PTOK_LBRAK
+ .word ptok_rbrk ; PTOK_RBRAK
+ .word ptok_com ; PTOK_COMMA
+ .word ptok_xr ; PTOK_X
+ .word ptok_yr ; PTOK_Y
+ .word ptok_dqu ; PTOK_DQUOT
+ .word ptok_squ ; PTOK_SQUOT
+ .word ptok_hash ; PTOK_HASH
+ .word ptok_scol ; PTOK_SCOLN
+ .word ptok_dolr ; PTOK_DOLR
+ .word ptok_prcn ; PTOK_PRCNT
+ .word ptok_num ; PTOK_NUM
+ .word ptok_alph ; PTOK_ALPH
+ .word ptok_othr ; PTOK_OTHR
+
+; Data entry point for utility subroutines.
+util_data:
+
+
+; Program code.
+.org lexer
+lex:
+ ldx #0 ; Reset X.
+ txa ; Reset A.
+ phy #2 ; Preserve the screen buffer index.
+ txy ; Reset Y.
+ sty.q idx0 ; Clear the first index.
+ sty.q idx1 ; Clear the second index.
+ sty.q idx2 ; Clear the third index.
+; lda (ptr), y ; Get a character from the line.
+; pha #1 ; Preserve the character.
+; jsr isdigit ; Is this character a digit?
+; pla #1 ; Get the character back.
+@getline:
+ lda #2 ; Get the third byte, of the line table address.
+ lsl #$10 ; Shift it by 2 bytes.
+ ldb #1 ; Set the second pointer
+ lda.w ltok ; to the last line.
+ jsr set_ptr ;
+ lda.w (ptr2) ; Get the next line.
+ jsr set_ptr ; Set the second pointer to the next line.
+ sta.w ctok ; Make it the current line.
+ and #0 ; Reset A.
+@loop:
+ ldy.w idx0 ; Get the string index.
+ lda (ptr), y ; Get a character from the line.
+ jsr isdelm ; Get the delimiter.
+ cmp #1 ; Are we at the end of the line?
+ beq @end ; Yes, so we're done.
+@spaces:
+ ldy.w idx0 ; Get the string index.
+ inc.w idx0 ; Increment the string index.
+ lda (ptr), y ; Get a character from the line.
+ pha #1 ; Preserve the character.
+ jsr isdelm ; Get the delimiter.
+ and #$10 ; Is this character, a space, or tab?
+ pla #1 ; Get the character back.
+ beq @isstart ; No, so check for the start of the line.
+ cmp #' ' ; Is this character, a space?
+ beq @incs ; Yes, so increment the starting space count.
+ cmp #'\t' ; No, but is it a tab?
+ beq @inct ; Yes, so increment the starting tab count.
+ jmp @spaces ; No, so keep looping.
+@incs:
+ inc idx1 ; Increment the space count.
+ jmp @spaces ; Keep looping.
+@inct:
+ inc idx1+1 ; Increment the tab count.
+ jmp @spaces ; Keep looping.
+@isstart:
+ pha #2 ; Preserve the character.
+ lda.w idx1 ; Was there any whitespace?
+ pla #2 ; Get the character back.
+ beq @switch ; No, so start lexing.
+ cpb #1 ; Yes, and are we at the start of the line?
+ bne @switch ; No, so start lexing.
+@whtspace:
+ ldy #2 ; Yes, so set the line index to the starting whitespace counters.
+ lda.w idx1 ; Get both indecies.
+ sta.w (ptr2), y ; Save them in the line.
+ and #0 ; Reset A.
+ sta.w idx1 ; Reset the second index.
+ deb ; Set the isstart flag to false.
+@switch:
+ jsr get_ptok ; Get the pre-token.
+ jsr parse_ptok ; Parse the pre-token.
+ beq @end ; We got to the end of the string.
+ jmp @loop ; Keep looping.
+@end:
+ ply #2 ; Get the screen buffer index back.
+ rts ; End of lex.
+
+parse_ptok:
+ pha #1 ; Preserve the pre-token.
+ ldb #2 ; Set the third pointer
+ lda.w #swtab ; to the start of the jump table.
+ jsr set_ptr ;
+ and #0 ; Reset A.
+ pla #1 ; Get the pre-token back.
+ phy #2 ; Preserve Y.
+ lsl #1 ; Multiply the pre-token by two, to get the jump index.
+ tay ; Get the index of the jump table.
+ lda.w (ptr3), y ; Get the address to jump to.
+ jsr set_ptr ; Set the third pointer to the case address.
+ and #0 ; Reset A.
+ tab ; Reset B.
+ ply #2 ; Get Y back.
+ jmp (ptr3) ; Jump to the case label.
+ptok_dot:
+ rts ; End of parse_ptok.
+ptok_at:
+ rts ; End of parse_ptok.
+ptok_col:
+ rts ; End of parse_ptok.
+ptok_equ:
+ rts ; End of parse_ptok.
+ptok_plus:
+ rts ; End of parse_ptok.
+ptok_min:
+ rts ; End of parse_ptok.
+ptok_gt:
+ rts ; End of parse_ptok.
+ptok_lt:
+ rts ; End of parse_ptok.
+ptok_lbrk:
+ rts ; End of parse_ptok.
+ptok_rbrk:
+ rts ; End of parse_ptok.
+ptok_com:
+ rts ; End of parse_ptok.
+ptok_xr:
+ rts ; End of parse_ptok.
+ptok_yr:
+ rts ; End of parse_ptok.
+ptok_dqu:
+ rts ; End of parse_ptok.
+ptok_squ:
+ rts ; End of parse_ptok.
+ptok_hash:
+ rts ; End of parse_ptok.
+ptok_scol:
+ rts ; End of parse_ptok.
+ptok_dolr:
+ rts ; End of parse_ptok.
+ptok_prcn:
+ rts ; End of parse_ptok.
+ptok_num:
+ rts ; End of parse_ptok.
+ptok_alph:
+ rts ; End of parse_ptok.
+ptok_othr:
+ rts ; End of parse_ptok.
+
+
+; Entry point for utility subroutines.
+utils: