From 4ed07ca38b99abdca750c6612c512f30965f1714 Mon Sep 17 00:00:00 2001
From: mrb0nk500 <b0nk@b0nk.xyz>
Date: Sun, 30 Aug 2020 12:44:21 -0400
Subject: - Did some more work on SuBAsm's lexer.

- Optimized the memory read, and write functions.

- Made the emulator faster, and cleaner in general.
---
 programs/sub-suite/lexer.s | 228 +++++++++++++++++++++++++++++++++------------
 1 file changed, 169 insertions(+), 59 deletions(-)

(limited to 'programs/sub-suite/lexer.s')

diff --git a/programs/sub-suite/lexer.s b/programs/sub-suite/lexer.s
index 3a856b5..72f1db6 100644
--- a/programs/sub-suite/lexer.s
+++ b/programs/sub-suite/lexer.s
@@ -8,6 +8,10 @@ lex:
 	sty.q idx0	; Clear the first index.
 	sty.q idx1	; Clear the second index.
 	sty.q idx2	; Clear the third index.
+	sty.d t_id	; Clear the token ID, type, space count, and tab count.
+	sty.q t_val	; Clear the token value.
+	sty.q t_str	; Clear the token string.
+	sty.q t_sym	; Clear the token symbol.
 	sty regb	; Clear the isop flag.
 ;	lda (ptr), y	; Get a character from the line.
 ;	pha 		; Preserve the character.
@@ -17,11 +21,11 @@ lex:
 	lda #2		; Get the third byte, of the line table address.
 	lsl #$10	; Shift it by 2 bytes.
 	ldb #1		; Set the second pointer
-	lda.w ltok	; to the last line.
+	lda.w lline	; to the last line.
 	jsr set_ptr	;
 	lda.w (ptr2)	; Get the next line.
 	jsr set_ptr	; Set the second pointer to the next line.
-	sta.w ctok	; Make it the current line.
+	sta.w cline	; Make it the current line.
 	and #0		; Reset A.
 @loop:
 	ldy.w idx0	; Get the string index.
@@ -36,7 +40,7 @@ lex:
 	jsr isdelm	; Get the delimiter.
 	and #$10	; Is this character, a space, or tab?
 	pla 		; Get the character back.
-	beq @isstart	; No, so check for the start of the line.
+	beq @switch	; No, so start lexing.
 	inc.w idx0	; Yes, so increment the string index.
 	cmp #' '	; Is this character, a space?
 	beq @incs	; Yes, so increment the starting space count.
@@ -44,31 +48,66 @@ lex:
 	beq @inct	; Yes, so increment the starting tab count.
 	bra @spaces	; No, so Keep looping.
 @incs:
-	inc idx1	; Increment the space count.
+	inc t_space	; Increment the space count.
 	bra @spaces	; Keep looping.
 @inct:
-	inc idx1+1	; Increment the tab count.
+	inc t_tab	; Increment the tab count.
 	bra @spaces	; Keep looping.
-@isstart:
-	pha.w		; Preserve the character.
-	lda.w idx1	; Was there any whitespace?
-	pla.w		; Get the character back.
-	beq @switch	; No, so start lexing.
-	cpb #1		; Yes, and are we at the start of the line?
-	bne @switch	; No, so start lexing.
-@whtspace:
-	ldy #2		; Yes, so set the line index to the starting whitespace counters.
-	lda.w idx1	; Get both indecies.
-	sta.w (ptr2), y	; Save them in the line.
-	and #0		; Reset A.
-	sta.w idx1	; Reset the second index.
-	deb		; Set the isstart flag to false.
 @switch:
 	ldy.w idx0	; Get the string index.
 	lda (ptr), y	; Get the character.
 	jsr get_ptok	; Get the pre-token.
+	pha		; Preserve the pre-token.
+	jsr is_altok	; Is this one of the single letter pre-tokens?
+	pla		; Get the pre-token back.
+	bne @is_altok	; Yes, so check the rest of the pre-token.
+@parse:
 	jsr parse_ptok	; Parse the pre-token.
 ;	beq @end	; We got to the end of the string.
+	lda lex_type	; Get the lexeme type.
+	cmp #TOK_EXPR	; Was this token, an expression?
+	beq @inc_idx	; Yes, so increment the index.
+	ldy.w idx0	; No, so get the string index.
+	lda (ptr), y	; Get a character from the line.
+	jsr isdelm2	; Is this not a delimiter?
+	beq @inc_idx	; Yes, so increment the index.
+	bra @loop	; No, so keep looping.
+@is_altok:
+	sta lex_type	; Save the pre-token in the lexeme type.
+	iny		; Increment the offset.
+	cmp #PTOK_S	; Is this pre-token, PTOK_S?
+	bne @ptok_p	; No, so check for PTOK_P.
+	lda (ptr), y	; Yes, so get the next character after it.
+	jsr tolower	; Convert it to lowercase.
+	cmp #'p'	; Is the next character 'p'?
+	bne @ptok_p	; No, so check for PTOK_P.
+	bra @inc_offset	; Yes, so increment the offset.
+@ptok_p:
+	cmp #PTOK_P	; Is this pre-token, PTOK_P?
+	bne @is_altok2	; No, so skip incrementing the offset.
+	lda (ptr), y	; Yes, so get the next character after it.
+	jsr tolower	; Convert it to lowercase.
+	cmp #'c'	; Is the next character 'c'?
+	bne @is_altok2	; No, so skip incrementing the offset.
+@inc_offset:
+	iny		; Increment the offset.
+@is_altok2:
+	lda (ptr), y	; Yes, so get the character, at the current offset.
+	jsr get_ptok	; Get the pre-token of the character.
+	cmp #PTOK_P	; Is this pre-token greater than PTOK_P?
+	bcc @ptok_num	; No, so check for PTOK_NUM.
+	beq @ptok_num	;
+	cmp #PTOK_B	; Yes, and is this pre-token greater than, or equal to PTOK_B?
+	bcs @ptok_al	; Yes, so set the pre-token to PTOK_ALPH.
+	lda lex_type	; No, so get the original pre-token back.
+	ldy.w idx0	; Get the string index.
+	bra @parse	; Go back to parsing the pre-token.
+@ptok_al:
+	lda #PTOK_ALPH	; Set the pre-token to PTOK_ALPH.
+	ldy.w idx0	; Get the string index.
+	bra @parse	; Go back to parsing the pre-token.
+@inc_idx:
+	inc.w idx0	; Increment the string index.
 	bra @loop	; Keep looping.
 @end:
 	jsr update_ptr	; Get the screen buffer index.
@@ -97,21 +136,23 @@ ptok_dot:
 	ldb #1		; Make init_lex increment the string index.
 	jsr init_lex	; Initialize the lexeme buffer for copying.
 	ldb #$11	; Set the delimiter comparison value to whitespace.
+	lda #0		; Set the isesc flag to false.
+	pha		;
 	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+	pla		;
 @isop:
 	lda regb	; Has the isop flag been set?
 	beq @dir	; No, so check for a directive.
 @rs:
 	lda #TOK_RS	; Yes, so set the lexeme type to TOK_RS.
 	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_RS.
 	ldy.w idx1	; Get the lexeme index.
 	dey		; Decrement the lexeme index.
 	lda (ptr3), y	; Get the suffix character.
 	jsr get_rs	; Get the register size.
 	bra @end	; We are done.
 @dir:
-	lda #TOK_DIR	; Set the lexeme type to TOK_DIR.
-	sta lex_type	;
 	ldb #0		; Make the lexeme buffer, the first pointer.
 	stb.q idx1	; Reset the first index.
 	jsr set_lexptr	; Set up the lexeme buffer.
@@ -135,7 +176,11 @@ ptok_dot:
 	sta.w idx2	; Save the offset in the third index.
 	bra @dir_loop	; Keep looping.
 @found:
-	nop		;
+	lda #TOK_DIR	; Set the lexeme type to TOK_DIR.
+	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_DIR.
+	lda idx1	; Set the token type to the directive ID.
+	sta t_type	;
 @end:
 	jsr make_tok	; Create the token.
 	jsr set_cmdbuf	; Set the first pointer to the command buffer.
@@ -156,7 +201,6 @@ ptok_min:
 	lda #EXPR_MINUS	; Set the expresion type to EXPR_MINUS.
 	bra ptok_expr	; Set up the token.
 ptok_gt:
-
 	lda #EXPR_LOW	; Set the expresion type to EXPR_LOW.
 	bra ptok_expr	; Set up the token.
 ptok_lt:
@@ -165,7 +209,9 @@ ptok_lt:
 ptok_pipe:
 	lda #EXPR_OR	; Set the expresion type to EXPR_OR.
 ptok_expr:
+	sta t_type	; Set the token type to the expression type.
 	lda #TOK_EXPR	; Set the lexeme type to TOK_EXPR.
+	sta t_id	; Also set the token ID to TOK_EXPR.
 	sta lex_type	;
 	inc.w idx0	;
 ;	ldb #1		; Make init_lex increment the string index.
@@ -182,6 +228,9 @@ ptok_rbrk:
 ptok_com:
 	inc.w idx0	;
 	rts		; End of parse_ptok.
+ptok_br:
+	inc.w idx0	;
+	rts		; End of parse_ptok.
 ptok_xr:
 	inc.w idx0	;
 	rts		; End of parse_ptok.
@@ -197,16 +246,36 @@ ptok_pc:
 ptok_dqu:
 	ldb #1		; Make init_lex increment the string index.
 	jsr init_lex	; Initialize the lexeme buffer for copying.
-	ldb #4		; Set the delimiter comparison value to a double quote.
+	ldb #5		; Set the delimiter comparison value to a double quote, or EOL.
+	lda #1		; Set the isesc flag to true.
+	pha		;
+	and #0		; Make delmcpy use isdelm.
 	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+	pla		;
+	lda #TOK_DQUOT	; Set the lexeme type to TOK_DQUOT.
+	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_DQUOT.
+	lda.d ptr3	; Get the address of the lexeme buffer.
+	sta.q t_str	; Save it in the token string.
 @end:
+	jsr make_tok	; Create the token.
 	rts		; End of parse_ptok.
 ptok_squ:
 	ldb #1		; Make init_lex increment the string index.
 	jsr init_lex	; Initialize the lexeme buffer for copying.
-	ldb #8		; Set the delimiter comparison value to a single quote.
+	ldb #9		; Set the delimiter comparison value to a single quote, or EOL.
+	lda #1		; Set the isesc flag to true.
+	pha		;
+	and #0		; Make delmcpy use isdelm.
 	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+	pla		;
+	lda #TOK_SQUOT	; Set the lexeme type to TOK_SQUOT.
+	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_SQUOT.
+	lda.d ptr3	; Get the address of the lexeme buffer.
+	sta.q t_str	; Save it in the token string.
 @end:
+	jsr make_tok	; Create the token.
 	rts		; End of parse_ptok.
 ptok_hash:
 	inc.w idx0	;
@@ -215,33 +284,49 @@ ptok_scol:
 	ldb #1		; Make init_lex increment the string index.
 	jsr init_lex	; Initialize the lexeme buffer for copying.
 	ldb #1		; Set the delimiter to EOL.
+	lda #0		; Set the isesc flag to false.
+	pha		;
 	jsr delmcpy	; Copy the string, to the lexeme buffer, until EOL.
+	pla		;
+	lda #TOK_SCOLN	; Set the lexeme type to TOK_SCOLN.
+	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_SCOLN.
+	lda.d ptr3	; Get the address of the lexeme buffer.
+	sta.q t_str	; Save it in the token string.
 @end:
+	jsr make_tok	; Create the token.
 	rts		; End of parse_ptok.
 ptok_dolr:
 	lda #TOK_HEX	; Set the lexeme type to TOK_HEX.
 	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_HEX.
 	lda #$10	; Set the base to Hexadecimal.
 	ldb #1		; Make init_lex increment the string index.
 	bra ptok_num2	; Parse the value.
 ptok_prcn:
 	lda #TOK_BIN	; Set the lexeme type to TOK_BIN.
 	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_BIN.
 	lda #2		; Set the base to Binary.
 	ldb #1		; Make init_lex increment the string index.
 	bra ptok_num2	; Parse the value.
 ptok_num:
 	lda #TOK_DEC	; Set the lexeme type to TOK_DEC.
 	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_DEC.
 	lda #10		; Set the base to Decimal.
 	ldb #0		; Do not let init_lex increment the string index.
 ptok_num2:
 	pha 		; Preserve the base.
 	jsr init_lex	; Initialize the lexeme buffer for copying.
 	ldb #3		; Set the delimiter to both the EOL, and a comma.
+	lda #0		; Set the isesc flag to false.
+	pha		;
 	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+	pla		;
 	pla 		; Get the base back.
 	jsr strtoullg	; Convert the string into a numeric value.
+	sta.q t_val	; Set the token value to the converted value.
 	jsr make_tok	; Create the token.
 	jsr set_cmdbuf	; Set the first pointer to the command buffer.
 	rts		; End of parse_ptok.
@@ -249,17 +334,21 @@ ptok_alph:
 	ldb #0		; Do not let init_lex increment the string index.
 	jsr init_lex	; Initialize the lexeme buffer for copying.
 	ldb #3		; Stop at any possible delimiter.
+	lda #0		; Set the isesc flag to false.
+	pha		;
 	tba		; Use isdelm2 for the comparison.
 	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+	pla		;
 	lda #0		; Reset A.
 	sta regb	; Clear the isop flag.
 @isop:
 	ldb #0		; Make the lexeme buffer, the first pointer.
 	stb.q idx1	; Reset the second index.
+	stb.q idx2	; Reset the third index.
 	jsr set_lexptr	; Set up the lexeme buffer.
 @isop_loop:
 	ldb idx1	; Get the instruction ID.
-	cpb #OPNUM-1	; Have we reached the end of the mnemonic table?
+	cpb #OPNUM	; Have we reached the end of the mnemonic table?
 	beq @end	; Yes, so we're done.
 	lda.w #mne	; No, so get the start of the mnemonic table.
 	clc		; Prepare for a non carrying add.
@@ -268,7 +357,6 @@ ptok_alph:
 	jsr strcaseg	; Is the lexeme buffer, the same as the mnemonic string?
 	pla.q		; Get the mnemonic string pointer back.
 	beq @found	; Yes, so create a new token.
-	beq @end	; Yes, so we're done.
 	inc idx1	; No, so increment the instruction ID.
 @offset:
 	jsr strlen	; Get the string's length.
@@ -280,6 +368,11 @@ ptok_alph:
 @found:
 	lda #TOK_MNE	; Set the lexeme type to TOK_MNE.
 	sta lex_type	;
+	sta t_id	; Also set the token ID to TOK_MNE.
+	lda.q idx1	; Get the instruction ID.
+	sta.q t_val	; Set the token value to the instruction ID.
+	lda #$FF	; Set the token type to -1.
+	sta t_type	;
 	inc regb	; Set the isop flag.
 @end:
 	jsr make_tok	; Create the token.
@@ -333,79 +426,96 @@ init_lex:
 
 
 delmcpy:
-	sta rega	; Save the delimiter check flag.
-	stb regc	; Save the delimiter comparison value.
+	pha		; Save the delimiter check flag.
+	phb		; Save the delimiter comparison value.
+	and #0		; Reset A.
+	pha		; Reset the isesc flag.
+;	sta rega	; Save the delimiter check flag.
+;	stb regc	; Save the delimiter comparison value.
 @loop:
-	ldb #0		; Reset the B register.
-	stb regg	; Reset the byte count.
 	ldy.w idx0	; Get the string index.
-	lda.q (ptr), y	; Get eight bytes from the current line.
-@loop1:
-	pha.q		; Save the string buffer.
-	and #$FF	; Get the current byte.
+	lda (ptr), y	; Get a character from the line.
 	pha 		; Preserve the character.
-	lda rega	; Are we calling isdelm2?
-	pla		; Get the character back.
+	lda sp+4	; Are we calling isdelm2?
+	pla 		; Get the character back.
 	bne @isdelm2	; Yes, so use isdelm2.
 	jsr isdelm	; No, so get the delimiter value from isdelm.
 @delmchk:
-	and regc	; Are both delimiter values, the same?
-	pla.q		; Get back the string buffer.
-	bne @end	; Yes, so we're done.
-	bra @copy	; No, so start copying the character.
+	and sp+2	; Are both delimiter values, the same?
+	beq @copy	; No, so copy the character.
+@isesc:
+	lda sp+1	; Was the isesc flag true?
+	beq @end	; No, so we're done.
+	bra @copy	; Yes, so copy the character.
 @isdelm2:
 	jsr isdelm2	; Get the delimiter value from isdelm2.
 	bra @delmchk	; Check the delimiter.
 @copy:
+	lda sp+12	; Was the do_isesc flag set?
+	bne @do_isesc	; Yes, so set the isesc flag.
+@copy1:
+	lda (ptr), y	; Get a character from the line.
 	ldy.w idx1	; Get the lexeme index.
-	sta (ptr3), y	; Copy one byte from the screen buffer, to the command buffer.
+	sta (ptr3), y	; Copy the character to the lexeme buffer.
 	inc.w idx0	; Increment the string index.
 	inc.w idx1	; Increment the lexeme index.
-	lsr #8		; Shift in the next byte.
-	inc regg	; Increment the byte count.
-	ldb regg	; Get back the byte count.
-	cpb #7		; Did we shift in eight bytes?
-	beq @loop	; Yes, so get eight more bytes.
-	bra @loop1	; No, so keep shifting in more bytes.
+	bra @loop	; Keep looping.
+@do_isesc:
+	jsr isesc	; Check if this is an escaped character.
+	sta sp+1	; Save it in the isesc flag.
+	bra @copy1	; Copy the character.
 @end:
-	ldb #0		; Reset B.
+	pla.w		; Pull both arguments off the stack.
+	pla		; Pull the isesc flag off the stack.
+	and #0		; Reset A.
 	ldy.w idx1	; Get the lexeme index.
-	stb (ptr3), y	; Terminate the command buffer.
-@end1:
+	sta (ptr3), y	; Terminate the lexeme buffer.
 	ldy.w idx0	; Get the string index.
-	tba		; Reset A.
 	rts		; End of delmcpy.
 
 ;@loop:
+;	ldb #0		; Reset the B register.
+;	stb regg	; Reset the byte count.
 ;	ldy.w idx0	; Get the string index.
-;	lda (ptr), y	; Get a character from the line.
+;	lda.q (ptr), y	; Get eight bytes from the current line.
+;@loop1:
+;	pha.q		; Save the string buffer.
+;	and #$FF	; Get the current byte.
 ;	pha 		; Preserve the character.
 ;	lda rega	; Are we calling isdelm2?
-;	pla 		; Get the character back.
+;	pla		; Get the character back.
 ;	bne @isdelm2	; Yes, so use isdelm2.
 ;	jsr isdelm	; No, so get the delimiter value from isdelm.
 ;@delmchk:
 ;	and regc	; Are both delimiter values, the same?
+;	pla.q		; Get back the string buffer.
 ;	bne @end	; Yes, so we're done.
 ;	bra @copy	; No, so start copying the character.
 ;@isdelm2:
 ;	jsr isdelm2	; Get the delimiter value from isdelm2.
 ;	bra @delmchk	; Check the delimiter.
 ;@copy:
-;	lda (ptr), y	; Get a character from the line.
 ;	ldy.w idx1	; Get the lexeme index.
-;	sta (ptr3), y	; Copy the character to the lexeme buffer.
+;	sta (ptr3), y	; Copy one byte from the screen buffer, to the command buffer.
 ;	inc.w idx0	; Increment the string index.
 ;	inc.w idx1	; Increment the lexeme index.
-;	bra @loop	; Keep looping.
+;	lsr #8		; Shift in the next byte.
+;	inc regg	; Increment the byte count.
+;	ldb regg	; Get back the byte count.
+;	cpb #7		; Did we shift in eight bytes?
+;	beq @loop	; Yes, so get eight more bytes.
+;	bra @loop1	; No, so keep shifting in more bytes.
 ;@end:
+;	ldb #0		; Reset B.
 ;	ldy.w idx1	; Get the lexeme index.
-;	lda #0		; Terminate the lexeme buffer.
-;	sta (ptr3), y	;
+;	stb (ptr3), y	; Terminate the command buffer.
+;@end1:
 ;	ldy.w idx0	; Get the string index.
+;	tba		; Reset A.
 ;	rts		; End of delmcpy.
 
 
+
 get_rs:
 	phb 		; Preserve B.
 	ldb #0		; Set the isop flag to false.
-- 
cgit v1.2.3-13-gbd6f