; Lexer, and supporting routines for SuBAsm.

; Program code.
lex:
	ldx #0		; Reset X.
	txa		; Reset A.
	txy		; Reset Y.
	sty.q idx0	; Clear the first index.
	sty.q idx1	; Clear the second index.
	sty.q idx2	; Clear the third index.
	sty.d t_id	; Clear the token ID, type, space count, and tab count.
	sty.q t_val	; Clear the token value.
	sty.q t_str	; Clear the token string.
	sty.q t_sym	; Clear the token symbol.
	sty regb	; Clear the isop flag.
;	lda (ptr), y	; Get a character from the line.
;	pha 		; Preserve the character.
;	jsr isdigit	; Is this character a digit?
;	pla 		; Get the character back.
@getline:
	ldb #1		; Set the second pointer
	lda.q lline	; to the last line.
	jsr set_ptr	;
	ldy #ln.next	; Set the index to the next line pointer.
	lda.q (ptr2), y	; Get the next line.
	jsr set_ptr	; Set the second pointer to the next line.
	sta.q cline	; Make it the current line.
	and #0		; Reset A.
	tay		; Reset Y.
@loop:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get a character from the line.
	jsr isdelm	; Get the delimiter.
	cmp #1		; Are we at the end of the line?
	beq @end	; Yes, so we're done.
@spaces:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get a character from the line.
	pha 		; Preserve the character.
	jsr isdelm	; Get the delimiter.
	and #$10	; Is this character, a space, or tab?
	pla 		; Get the character back.
	beq @switch	; No, so start lexing.
	inc.w idx0	; Yes, so increment the string index.
	cmp #' '	; Is this character, a space?
	beq @incs	; Yes, so increment the starting space count.
	cmp #'\t'	; No, but is it a tab?
	beq @inct	; Yes, so increment the starting tab count.
	bra @spaces	; No, so Keep looping.
@incs:
	inc t_space	; Increment the space count.
	bra @spaces	; Keep looping.
@inct:
	inc t_tab	; Increment the tab count.
	bra @spaces	; Keep looping.
@switch:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get the character.
	jsr get_ptok	; Get the pre-token.
	pha		; Preserve the pre-token.
	jsr is_altok	; Is this one of the single letter pre-tokens?
	pla		; Get the pre-token back.
	bne @is_altok	; Yes, so check the rest of the pre-token.
@parse:
	jsr parse_ptok	; Parse the pre-token.
;	beq @end	; We got to the end of the string.
	lda lex_type	; Get the lexeme type.
	cmp #TOK_EXPR	; Was this token, an expression?
	beq @inc_idx	; Yes, so increment the index.
	ldy.w idx0	; No, so get the string index.
	lda (ptr), y	; Get a character from the line.
	jsr isdelm2	; Is this not a delimiter?
	beq @inc_idx	; Yes, so increment the index.
	bra @loop	; No, so keep looping.
@is_altok:
	sta lex_type	; Save the pre-token in the lexeme type.
	iny		; Increment the offset.
	cmp #PTOK_S	; Is this pre-token, PTOK_S?
	bne @ptok_p	; No, so check for PTOK_P.
	lda (ptr), y	; Yes, so get the next character after it.
	jsr tolower	; Convert it to lowercase.
	cmp #'p'	; Is the next character 'p'?
	bne @ptok_p	; No, so check for PTOK_P.
	bra @inc_offset	; Yes, so increment the offset.
@ptok_p:
	cmp #PTOK_P	; Is this pre-token, PTOK_P?
	bne @is_altok2	; No, so skip incrementing the offset.
	lda (ptr), y	; Yes, so get the next character after it.
	jsr tolower	; Convert it to lowercase.
	cmp #'c'	; Is the next character 'c'?
	bne @is_altok2	; No, so skip incrementing the offset.
@inc_offset:
	iny		; Increment the offset.
@is_altok2:
	lda (ptr), y	; Yes, so get the character, at the current offset.
	jsr get_ptok	; Get the pre-token of the character.
	cmp #PTOK_P	; Is this pre-token greater than PTOK_P?
	bcc @ptok_num	; No, so check for PTOK_NUM.
	beq @ptok_num	;
	cmp #PTOK_B	; Yes, and is this pre-token greater than, or equal to PTOK_B?
	bcs @ptok_al	; Yes, so set the pre-token to PTOK_ALPH.
	lda lex_type	; No, so get the original pre-token back.
	ldy.w idx0	; Get the string index.
	bra @parse	; Go back to parsing the pre-token.
@ptok_al:
	lda #PTOK_ALPH	; Set the pre-token to PTOK_ALPH.
	ldy.w idx0	; Get the string index.
	bra @parse	; Go back to parsing the pre-token.
@inc_idx:
	inc.w idx0	; Increment the string index.
	bra @loop	; Keep looping.
@end:
	jsr update_ptr	; Get the screen buffer index.
	tay		; Save it in Y.
	and #0		; Reset A.
	rts		; End of lex.


parse_ptok:
	pha 		; Preserve the pre-token.
	ldb #2		; Set the third pointer
	lda.w #swtab	; to the start of the jump table.
	jsr set_ptr	;
	and #0		; Reset A.
	pla 		; Get the pre-token back.
	phy.w		; Preserve Y.
	lsl #1		; Multiply the pre-token by two, to get the jump index.
	tay		; Get the index of the jump table.
	lda.w (ptr3), y	; Get the address to jump to.
	jsr set_ptr	; Set the third pointer to the case address.
	and #0		; Reset A.
	tab		; Reset B.
	ply.w		; Get Y back.
	jmp (ptr3)	; Jump to the case label.
ptok_dot:
	ldb #1		; Make init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #$11	; Set the delimiter comparison value to whitespace.
	lda #0		; Set the isesc flag to false.
	pha		;
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
	pla		;
@isop:
	lda regb	; Has the isop flag been set?
	beq @dir	; No, so check for a directive.
@rs:
	lda #TOK_RS	; Yes, so set the lexeme type to TOK_RS.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_RS.
	ldy.w idx1	; Get the lexeme index.
	dey		; Decrement the lexeme index.
	lda (ptr3), y	; Get the suffix character.
	jsr get_rs	; Get the register size.
	bra @end	; We are done.
@dir:
	ldb #0		; Make the lexeme buffer, the first pointer.
	stb.q idx1	; Reset the first index.
	jsr set_lexptr	; Set up the lexeme buffer.
@dir_loop:
	ldb idx1	; Get the directive ID.
	cpb #7		; Have we reached the end of the directive table?
	beq @end	; Yes, so we're done.
	lda.w #dir	; No, so get the start of the directive table.
	clc		; Prepare for a non carrying add.
	adc.w idx2	; Offset the pointer, by the length of the previous string.
	pha.q		; Preserve the directive string pointer.
	jsr strcaseg	; Is the lexeme buffer, the same as the directive string?
	pla.q		; Get the directive string pointer back.
	beq @found	; Yes, so create a new token.
	inc idx1	; No, so increment the directive ID.
@getlen:
	jsr strlen	; Get the string's length.
	inc		; Add one to the length.
	clc		; Prepare for a non carrying add.
	adc.w idx2	; Add the string offset to the current length
	sta.w idx2	; Save the offset in the third index.
	bra @dir_loop	; Keep looping.
@found:
	lda #TOK_DIR	; Set the lexeme type to TOK_DIR.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_DIR.
	lda idx1	; Set the token type to the directive ID.
	sta t_type	;
@end:
	jsr make_tok	; Create the token.
	jsr set_cmdbuf	; Set the first pointer to the command buffer.
	rts		; End of parse_ptok.
ptok_at:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_col:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_equ:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_plus:
	lda #EXPR_PLUS	; Set the expresion type to EXPR_PLUS.
	bra ptok_expr	; Set up the token.
ptok_min:
	lda #EXPR_MINUS	; Set the expresion type to EXPR_MINUS.
	bra ptok_expr	; Set up the token.
ptok_gt:
	lda #EXPR_LOW	; Set the expresion type to EXPR_LOW.
	bra ptok_expr	; Set up the token.
ptok_lt:
	lda #EXPR_HIGH	; Set the expresion type to EXPR_HIGH.
	bra ptok_expr	; Set up the token.
ptok_pipe:
	lda #EXPR_OR	; Set the expresion type to EXPR_OR.
ptok_expr:
	sta t_type	; Set the token type to the expression type.
	lda #TOK_EXPR	; Set the lexeme type to TOK_EXPR.
	sta t_id	; Also set the token ID to TOK_EXPR.
	sta lex_type	;
	inc.w idx0	;
;	ldb #1		; Make init_lex increment the string index.
;	jsr init_lex	; Initialize the lexeme buffer for copying.
	jsr make_tok	; Create the token.
	jsr set_cmdbuf	; Set the first pointer to the command buffer.
	rts		; End of parse_ptok.
ptok_lbrk:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_rbrk:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_com:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_br:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_xr:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_yr:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_sp:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_pc:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_dqu:
	ldb #1		; Make init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #5		; Set the delimiter comparison value to a double quote, or EOL.
	lda #1		; Set the isesc flag to true.
	pha		;
	and #0		; Make delmcpy use isdelm.
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
	pla		;
	lda #TOK_DQUOT	; Set the lexeme type to TOK_DQUOT.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_DQUOT.
	lda.d ptr3	; Get the address of the lexeme buffer.
	sta.q t_str	; Save it in the token string.
@end:
	jsr make_tok	; Create the token.
	rts		; End of parse_ptok.
ptok_squ:
	ldb #1		; Make init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #9		; Set the delimiter comparison value to a single quote, or EOL.
	lda #1		; Set the isesc flag to true.
	pha		;
	and #0		; Make delmcpy use isdelm.
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
	pla		;
	lda #TOK_SQUOT	; Set the lexeme type to TOK_SQUOT.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_SQUOT.
	lda.d ptr3	; Get the address of the lexeme buffer.
	sta.q t_str	; Save it in the token string.
@end:
	jsr make_tok	; Create the token.
	rts		; End of parse_ptok.
ptok_hash:
	inc.w idx0	;
	rts		; End of parse_ptok.
ptok_scol:
	ldb #1		; Make init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #1		; Set the delimiter to EOL.
	lda #0		; Set the isesc flag to false.
	pha		;
	jsr delmcpy	; Copy the string, to the lexeme buffer, until EOL.
	pla		;
	lda #TOK_SCOLN	; Set the lexeme type to TOK_SCOLN.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_SCOLN.
	lda.d ptr3	; Get the address of the lexeme buffer.
	sta.q t_str	; Save it in the token string.
@end:
	jsr make_tok	; Create the token.
	rts		; End of parse_ptok.
ptok_dolr:
	lda #TOK_HEX	; Set the lexeme type to TOK_HEX.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_HEX.
	lda #$10	; Set the base to Hexadecimal.
	ldb #1		; Make init_lex increment the string index.
	bra ptok_num2	; Parse the value.
ptok_prcn:
	lda #TOK_BIN	; Set the lexeme type to TOK_BIN.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_BIN.
	lda #2		; Set the base to Binary.
	ldb #1		; Make init_lex increment the string index.
	bra ptok_num2	; Parse the value.
ptok_num:
	lda #TOK_DEC	; Set the lexeme type to TOK_DEC.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_DEC.
	lda #10		; Set the base to Decimal.
	ldb #0		; Do not let init_lex increment the string index.
ptok_num2:
	pha 		; Preserve the base.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #3		; Set the delimiter to both the EOL, and a comma.
	lda #0		; Set the isesc flag to false.
	pha		;
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
	pla		;
	pla 		; Get the base back.
	jsr strtoullg	; Convert the string into a numeric value.
	sta.q t_val	; Set the token value to the converted value.
	jsr make_tok	; Create the token.
	jsr set_cmdbuf	; Set the first pointer to the command buffer.
	rts		; End of parse_ptok.
ptok_alph:
	ldb #0		; Do not let init_lex increment the string index.
	jsr init_lex	; Initialize the lexeme buffer for copying.
	ldb #3		; Stop at any possible delimiter.
	lda #0		; Set the isesc flag to false.
	pha		;
	tba		; Use isdelm2 for the comparison.
	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
	pla		;
	lda #0		; Reset A.
	sta regb	; Clear the isop flag.
@isop:
	ldb #0		; Make the lexeme buffer, the first pointer.
	stb.q idx1	; Reset the second index.
	stb.q idx2	; Reset the third index.
	jsr set_lexptr	; Set up the lexeme buffer.
@isop_loop:
	ldb idx1	; Get the instruction ID.
	cpb #OPNUM	; Have we reached the end of the mnemonic table?
	beq @end	; Yes, so we're done.
	lda.w #mne	; No, so get the start of the mnemonic table.
	clc		; Prepare for a non carrying add.
	adc.w idx2	; Offset the pointer, by the length of the previous string.
	pha.q		; Preserve the mnemonic string pointer.
	jsr strcaseg	; Is the lexeme buffer, the same as the mnemonic string?
	pla.q		; Get the mnemonic string pointer back.
	beq @found	; Yes, so create a new token.
	inc idx1	; No, so increment the instruction ID.
@offset:
	jsr strlen	; Get the string's length.
	inc		; Add one to the length.
	clc		; Prepare for a non carrying add.
	adc.w idx2	; Add the string offset to the current length
	sta.w idx2	; Save the offset in the third index.
	bra @isop_loop	; Keep looping.
@found:
	lda #TOK_MNE	; Set the lexeme type to TOK_MNE.
	sta lex_type	;
	sta t_id	; Also set the token ID to TOK_MNE.
	lda.q idx1	; Get the instruction ID.
	sta.q t_val	; Set the token value to the instruction ID.
	lda #$FF	; Set the token type to -1.
	sta t_type	;
	inc regb	; Set the isop flag.
@end:
	jsr make_tok	; Create the token.
	jsr set_cmdbuf	; Set the first pointer to the command buffer.
	rts		; End of parse_ptok.
ptok_othr:
	inc.w idx0	;
	rts		; End of parse_ptok.


set_lexptr:
	lda.d #lexeme	; Set the pointer to the lexeme buffer.
	jsr set_ptr	;
	and #0		; Reset A.
	tab		; Reset B.
	sta.q idx1	; Reset the second index.
	sta.q idx2	; Reset the third index
	rts		; End of set_lexptr.


set_cmdbuf:
	and #0		; Reset A.
	tab		; Reset B.
	lda.d #cmd_buf	; to the command buffer.
	jsr set_ptr	;
	and #0		; Reset A.
	tab		; Reset B.
	rts		; End of set_cmdbuf.


init_lex:
	cpb #0		; Do we need to increment the string index?
	beq @init	; No, so skip that step.
@inc_str:
	inc.w idx0	; Yes, so increment the string index.
@init:
	ldb #2		; Make the lexeme buffer, the third pointer.
	jsr set_lexptr	; Set up the lexeme buffer.
	phy.w		; Preserve Y.
	tay		; Reset Y.
@loop:
	lda (ptr3), y	; Have we hit the end of the previous lexeme string?
	beq @end	; Yes, so we're done.
	lda #0		; No, so start clearing the character.
	sta (ptr3), y	; Clear the character.
	iny		; Increment the lexeme index.
	bra @loop	; Keep looping.
@end:
	ply.w		; Get Y back.
	rts		; End of init_lex.


delmcpy:
	pha		; Save the delimiter check flag.
	phb		; Save the delimiter comparison value.
	and #0		; Reset A.
	pha		; Reset the isesc flag.
;	sta rega	; Save the delimiter check flag.
;	stb regc	; Save the delimiter comparison value.
@loop:
	ldy.w idx0	; Get the string index.
	lda (ptr), y	; Get a character from the line.
	pha 		; Preserve the character.
	lda sp+4	; Are we calling isdelm2?
	pla 		; Get the character back.
	bne @isdelm2	; Yes, so use isdelm2.
	jsr isdelm	; No, so get the delimiter value from isdelm.
@delmchk:
	and sp+2	; Are both delimiter values, the same?
	beq @copy	; No, so copy the character.
@isesc:
	lda sp+1	; Was the isesc flag true?
	beq @end	; No, so we're done.
	bra @copy	; Yes, so copy the character.
@isdelm2:
	jsr isdelm2	; Get the delimiter value from isdelm2.
	bra @delmchk	; Check the delimiter.
@copy:
	lda sp+12	; Was the do_isesc flag set?
	bne @do_isesc	; Yes, so set the isesc flag.
@copy1:
	lda (ptr), y	; Get a character from the line.
	ldy.w idx1	; Get the lexeme index.
	sta (ptr3), y	; Copy the character to the lexeme buffer.
	inc.w idx0	; Increment the string index.
	inc.w idx1	; Increment the lexeme index.
	bra @loop	; Keep looping.
@do_isesc:
	jsr isesc	; Check if this is an escaped character.
	sta sp+1	; Save it in the isesc flag.
	bra @copy1	; Copy the character.
@end:
	pla.w		; Pull both arguments off the stack.
	pla		; Pull the isesc flag off the stack.
	and #0		; Reset A.
	ldy.w idx1	; Get the lexeme index.
	sta (ptr3), y	; Terminate the lexeme buffer.
	ldy.w idx0	; Get the string index.
	rts		; End of delmcpy.

;@loop:
;	ldb #0		; Reset the B register.
;	stb regg	; Reset the byte count.
;	ldy.w idx0	; Get the string index.
;	lda.q (ptr), y	; Get eight bytes from the current line.
;@loop1:
;	pha.q		; Save the string buffer.
;	and #$FF	; Get the current byte.
;	pha 		; Preserve the character.
;	lda rega	; Are we calling isdelm2?
;	pla		; Get the character back.
;	bne @isdelm2	; Yes, so use isdelm2.
;	jsr isdelm	; No, so get the delimiter value from isdelm.
;@delmchk:
;	and regc	; Are both delimiter values, the same?
;	pla.q		; Get back the string buffer.
;	bne @end	; Yes, so we're done.
;	bra @copy	; No, so start copying the character.
;@isdelm2:
;	jsr isdelm2	; Get the delimiter value from isdelm2.
;	bra @delmchk	; Check the delimiter.
;@copy:
;	ldy.w idx1	; Get the lexeme index.
;	sta (ptr3), y	; Copy one byte from the screen buffer, to the command buffer.
;	inc.w idx0	; Increment the string index.
;	inc.w idx1	; Increment the lexeme index.
;	lsr #8		; Shift in the next byte.
;	inc regg	; Increment the byte count.
;	ldb regg	; Get back the byte count.
;	cpb #7		; Did we shift in eight bytes?
;	beq @loop	; Yes, so get eight more bytes.
;	bra @loop1	; No, so keep shifting in more bytes.
;@end:
;	ldb #0		; Reset B.
;	ldy.w idx1	; Get the lexeme index.
;	stb (ptr3), y	; Terminate the command buffer.
;@end1:
;	ldy.w idx0	; Get the string index.
;	tba		; Reset A.
;	rts		; End of delmcpy.


get_rs:
	phb 		; Preserve B.
	ldb #0		; Set the isop flag to false.
	plb 		; Get B back.
	jsr tolower	; Convert the character to lowercase.
	cmp #'w'	; Is it .w?
	beq @r1		; Yes, so return 1.
	cmp #'d'	; No, but was it .d?
	beq @r2		; Yes, so return 2.
	cmp #'q'	; No, but was it .d?
	beq @r3		; Yes, so return 3.
@r0:
	lda #0		; Return 0.
	rts		; End of get_rs.
@r1:
	lda #1		; Return 1.
	rts		; End of get_rs.
@r2:
	lda #2		; Return 2.
	rts		; End of get_rs.
@r3:
	lda #3		; Return 3.
	rts		; End of get_rs.


make_tok:
	nop		;
@end:
	rts		; End of make_tok.