; Lexer, and supporting routines for SuBAsm.

; Enums.

; Directives.
DIR_ORG     =  0	; Origin.
DIR_BYTE    =  1	; Byte  =  8 bits.
DIR_WORD    =  2	; Word  = 16 bits.
DIR_DWORD   =  3	; Dword = 32 bits.
DIR_QWORD   =  4	; Qword = 64 bits.
DIR_INCL    =  5	; Include.

; Tokens.
TOK_DIR     =  0	; Directive.
TOK_LOCAL   =  1	; Local syobol.
TOK_LABEL   =  2	; Label.
TOK_SYM     =  3	; Symbol.
TOK_EXPR    =  4	; Expression.
TOK_CSV     =  5	; Comma separated value.
TOK_STR     =  6	; String.
TOK_CHAR    =  7	; Character.
TOK_IND     =  8	; Indirect addressing.
TOK_IMM     =  9	; Immediate data.
TOK_MNE     = 10	; Opcode/Mnemonic.
TOK_RS      = 11	; Register size prefix.
TOK_COMM    = 12	; Comment.
TOK_HEX     = 13	; Hex value.
TOK_DEC     = 14	; Decimal value.
TOK_BIN     = 15	; Binary value.
TOK_INCL    = 16	; Include file.

; Pre-Tokens.
PTOK_DOT    =  0	; .
PTOK_AT     =  1	; @
PTOK_COLON  =  2	; :
PTOK_EQU    =  3	; =
PTOK_PLUS   =  4	; +
PTOK_MINUS  =  5	; -
PTOK_GT     =  6	; >
PTOK_LT     =  7	; <
PTOK_LBRAK  =  8	; (
PTOK_RBRAK  =  9	; )
PTOK_COMMA  = 10	; ,
PTOK_X      = 11	; x
PTOK_Y      = 12	; y
PTOK_DQUOT  = 13	; "
PTOK_SQUOT  = 14	; '
PTOK_HASH   = 15	; #
PTOK_SCOLN  = 16	; ;
PTOK_DOLR   = 17	; $
PTOK_PRCNT  = 18	; %
PTOK_NUM    = 19	; 0-9
PTOK_ALPH   = 20	; a-z A-Z
PTOK_OTHR   = 21	; Everything else.

; Expressions.
EXPR_PLUS   =  0	; Plus.
EXPR_MINUS  =  1	; Minus.
EXPR_LOW    =  2	; Lower half of address.
EXPR_HIGH   =  3	; Upper half of address.
EXPR_NONE   =  4	; No expression.


; Data.
.org lexer_data
; Jump table for parsing pre-tokens.
swtab:
	.word ptok_dot	; PTOK_DOT
	.word ptok_at	; PTOK_AT
	.word ptok_col	; PTOK_COLON
	.word ptok_equ	; PTOK_EQU
	.word ptok_plus	; PTOK_PLUS
	.word ptok_min	; PTOK_MINUS
	.word ptok_gt	; PTOK_GT
	.word ptok_lt	; PTOK_LT
	.word ptok_lbrk	; PTOK_LBRAK
	.word ptok_rbrk	; PTOK_RBRAK
	.word ptok_com	; PTOK_COMMA
	.word ptok_xr	; PTOK_X
	.word ptok_yr	; PTOK_Y
	.word ptok_dqu	; PTOK_DQUOT
	.word ptok_squ	; PTOK_SQUOT
	.word ptok_hash	; PTOK_HASH
	.word ptok_scol	; PTOK_SCOLN
	.word ptok_dolr	; PTOK_DOLR
	.word ptok_prcn	; PTOK_PRCNT
	.word ptok_num	; PTOK_NUM
	.word ptok_alph	; PTOK_ALPH
	.word ptok_othr	; PTOK_OTHR

; Data entry point for utility subroutines.
util_data:


; Program code.
.org lexer
lex:
	ldx #0		; Reset X.
	txa		; Reset A.
	phy #2		; Preserve the screen buffer index.
	txy		; Reset Y.
	sty.q idx0	; Clear the first index.
	sty.q idx1	; Clear the second index.
	sty.q idx2	; Clear the third index.
	sty b		; Clear the isop flag.
;	lda (ptr), y	; Get a character from the line.
;	pha #1		; Preserve the character.
;	jsr isdigit	; Is this character a digit?
;	pla #1		; Get the character back.
@getline:
	lda #2		; Get the third byte, of the line table address.
	lsl #$10	; Shift it by 2 bytes.
	ldb #1		; Set the second pointer
	lda.w ltok	; to the last line.
	jsr set_ptr	;
	lda.w (ptr2)	; Get the next line.
	jsr set_ptr	; Set the second pointer to the next line.
	sta.w ctok	; Make it the current line.
	and #0		; Reset A.
@loop:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get a character from the line.
	jsr isdelm	; Get the delimiter.
	cmp #1		; Are we at the end of the line?
	beq @end	; Yes, so we're done.
@spaces:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get a character from the line.
	pha #1		; Preserve the character.
	jsr isdelm	; Get the delimiter.
	and #$10	; Is this character, a space, or tab?
	pla #1		; Get the character back.
	beq @isstart	; No, so check for the start of the line.
	inc.w idx0	; Yes, so increment the string index.
	cmp #' '	; Is this character, a space?
	beq @incs	; Yes, so increment the starting space count.
	cmp #'\t'	; No, but is it a tab?
	beq @inct	; Yes, so increment the starting tab count.
	jmp @spaces	; No, so Keep looping.
@incs:
	inc idx1	; Increment the space count.
	jmp @spaces	; Keep looping.
@inct:
	inc idx1+1	; Increment the tab count.
	jmp @spaces	; Keep looping.
@isstart:
	pha #2		; Preserve the character.
	lda.w idx1	; Was there any whitespace?
	pla #2		; Get the character back.
	beq @switch	; No, so start lexing.
	cpb #1		; Yes, and are we at the start of the line?
	bne @switch	; No, so start lexing.
@whtspace:
	ldy #2		; Yes, so set the line index to the starting whitespace counters.
	lda.w idx1	; Get both indecies.
	sta.w (ptr2), y	; Save them in the line.
	and #0		; Reset A.
	sta.w idx1	; Reset the second index.
	deb		; Set the isstart flag to false.
@switch:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get the character.
	jsr get_ptok	; Get the pre-token.
	jsr parse_ptok	; Parse the pre-token.
;	beq @end	; We got to the end of the string.
	jmp @loop	; Keep looping.
@end:
	ply #2		; Get the screen buffer index back.
	rts		; End of lex.


parse_ptok:
	pha #1		; Preserve the pre-token.
	ldb #2		; Set the third pointer
	lda.w #swtab	; to the start of the jump table.
	jsr set_ptr	;
	and #0		; Reset A.
	pla #1		; Get the pre-token back.
	phy #2		; Preserve Y.
	lsl #1		; Multiply the pre-token by two, to get the jump index.
	tay		; Get the index of the jump table.
	lda.w (ptr3), y	; Get the address to jump to.
	jsr set_ptr	; Set the third pointer to the case address.
	and #0		; Reset A.
	tab		; Reset B.
	ply #2		; Get Y back.
	jmp (ptr3)	; Jump to the case label.
ptok_dot:
	ldb #1		; Make init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #$11	; Set the delimiter comparison value to whitespace.
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
@isop:
	lda b		; Has the isop flag been set?
	beq @dir	; No, so check for a directive.
@rs:
	lda #TOK_RS	; Yes, so set the lexeme type to TOK_RS.
	sta lex_type	;
	ldy.w idx1	; Get the lexeme index.
	dey		; Decrement the lexeme index.
	lda (ptr3), y	; Get the suffix character.
	jsr get_rs	; Get the register size.
	jmp @end	; We are done.
@dir:
	lda #TOK_DIR	; Set the lexeme type to TOK_DIR.
	sta lex_type	;
	ldb #0		; Make the lexeme buffer, the first pointer.
	stb.q idx1	; Reset the first index.
	jsr set_lexptr	; Set up the lexeme buffer.
@dir_loop:
	lda.w #dir	; Get pointer to the start of the directive table.
	clc		; Prepare for a non carrying add.
	adc.w idx2	; Offset the pointer, by the length of the previous string.
	pha #8		; Preserve the directive string pointer.
	jsr strcasecmp	; Is the lexeme buffer, the same as the directive string?
	pla #8		; Get the directive string pointer back.
	beq @found	; Yes, so create a new token.
	ldb idx1	; No, so Get the directive ID.
	cpb #6		; Have we reached the end of the directive table?
	beq @end	; Yes, so we're done.
	inc idx1	; No, so increment the directive ID.
@getlen:
	jsr strlen	; Get the string's length.
	inx		; Add one to the length.
	txa		; Place it in the accumulator.
	clc		; Prepare for a non carrying add.
	adc.w idx2	; Add the string offset to the current length
	sta.w idx2	; Save the offset in the third index.
	jmp @dir_loop	; Keep looping.
@found:
	nop		;
@end:
	jsr make_tok	; Create the token.
	jsr set_cmdbuf	; Set the first pointer to the command buffer.
	rts		; End of parse_ptok.
ptok_at:
	rts		; End of parse_ptok.
ptok_col:
	rts		; End of parse_ptok.
ptok_equ:
	rts		; End of parse_ptok.
ptok_plus:
	rts		; End of parse_ptok.
ptok_min:
	rts		; End of parse_ptok.
ptok_gt:
	rts		; End of parse_ptok.
ptok_lt:
	rts		; End of parse_ptok.
ptok_lbrk:
	rts		; End of parse_ptok.
ptok_rbrk:
	rts		; End of parse_ptok.
ptok_com:
	rts		; End of parse_ptok.
ptok_xr:
	rts		; End of parse_ptok.
ptok_yr:
	rts		; End of parse_ptok.
ptok_dqu:
	ldb #1		; Make init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #4		; Set the delimiter comparison value to a double quote.
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
@term:
	rts		; End of parse_ptok.
ptok_squ:
	rts		; End of parse_ptok.
ptok_hash:
	rts		; End of parse_ptok.
ptok_scol:
	rts		; End of parse_ptok.
ptok_dolr:
	rts		; End of parse_ptok.
ptok_prcn:
	rts		; End of parse_ptok.
ptok_num:
	rts		; End of parse_ptok.
ptok_alph:
	ldb #0		; Do not let init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #1		; Stop at any possible delimiter, except whitespace.
	tba		; Use isdelm2 for the comparison.
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
	lda #0		; Reset A.
	sta b		; Clear the isop flag.
@isop:
	ldb #0		; Make the lexeme buffer, the first pointer.
	stb.q idx1	; Reset the first index.
	jsr set_lexptr	; Set up the lexeme buffer.
@isop_loop:
	lda.w #mne	; Get pointer to the start of the instruction table.
	clc		; Prepare for a non carrying add.
	adc.w idx2	; Offset the pointer, by the length of the previous string.
	jsr strcasecmp	; Is the lexeme buffer, the same as the mnemonic string?
	beq @found	; Yes, so create a new token.
	ldb idx1	; No, so Get the instruction ID.
	cpb #OPNUM-1	; Have we reached the end of the instruction table?
	beq @end	; Yes, so we're done.
	inc idx1	; No, so increment the instruction ID.
@offset:
	lda #13		; Get the base size of the instruction table.
	clc		; Prepare for a non carrying multiply.
	mul idx1	; Multiply the base offset, by the instruction ID.
	sta.w idx2	; Save the offset in the third index.
	jmp @isop_loop	; Keep looping.
@found:
	lda #TOK_MNE	; Set the lexeme type to TOK_MNE.
	sta lex_type	;
	inc b		; Set the isop flag.
@end:
	jsr make_tok	; Create the token.
	jsr set_cmdbuf	; Set the first pointer to the command buffer.
	rts		; End of parse_ptok.
ptok_othr:
	rts		; End of parse_ptok.


set_lexptr:
	lda.d #lexeme	; Set the pointer to the lexeme buffer.
	jsr set_ptr	;
	and #0		; Reset A.
	tab		; Reset B.
	sta.q idx1	; Reset the second index.
	rts		; End of set_lexptr.


set_cmdbuf:
	ldb #0		; Set the first pointer
	lda.d #cmd_buf	; to the command buffer.
	jsr set_ptr	;
	and #0		; Reset A.
	tab		; Reset B.
	rts		; End of set_cmdbuf.


init_lex:
	cpb #0		; Do we need to increment the string index?
	beq @init	; No, so skip that step.
@inc_str:
	inc.w idx0	; Yes, so increment the string index.
@init:
	lda #0		; Reset A.
	sta.q idx1	; Reset the second index
	sta.q idx2	; Reset the third index
	ldb #2		; Make the lexeme buffer, the third pointer.
	jsr set_lexptr	; Set up the lexeme buffer.
	rts		; End of init_lex.


delmcpy:
	sta a		; Save the delimiter check flag.
	stb c		; Save the delimiter comparison value.
@loop:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get a character from the line.
	pha #1		; Preserve the character.
	lda a		; Are we calling isdelm2?
	pla #1		; Get the character back.
	bne @isdelm2	; Yes, so use isdelm2.
	jsr isdelm	; No, so get the delimiter value from isdelm.
	and c		; Are both delimiter values, the same?
	bne @end	; Yes, so we're done.
	jmp @copy	; No, so start copying the character.
@isdelm2:
	jsr isdelm2	; Get the delimiter value from isdelm2.
	cmp c		; Are both delimiter values, the same?
	beq @end	; Yes, so we're done.
@copy:
	lda (ptr), y	; Get a character from the line.
	ldy.w idx1	; Get the lexeme index.
	sta (ptr3), y	; Copy the character to the lexeme buffer.
	inc.w idx0	; Increment the string index.
	inc.w idx1	; Increment the lexeme index.
	jmp @loop	; Keep looping.
@end:
	lda #0		; Terminate the lexeme buffer.
	sta (ptr3), y	;
	rts		; End of delmcpy.


get_rs:
	phb #1		; Preserve B.
	ldb #0		; Set the isop flag to false.
	plb #1		; Get B back.
	jsr tolower	; Convert the character to lowercase.
	cmp #'w'	; Is it .w?
	beq @r1		; Yes, so return 1.
	cmp #'d'	; No, but was it .d?
	beq @r2		; Yes, so return 2.
	cmp #'q'	; No, but was it .d?
	beq @r3		; Yes, so return 3.
@r0:
	lda #0		; Return 0.
	rts		; End of get_rs.
@r1:
	lda #1		; Return 1.
	rts		; End of get_rs.
@r2:
	lda #2		; Return 2.
	rts		; End of get_rs.
@r3:
	lda #3		; Return 3.
	rts		; End of get_rs.


make_tok:
	nop		;
@end:
	rts		; End of make_tok.

; Entry point for utility subroutines.
utils: