From 887802efcdb3b56263069cc6778a8f53ed89d599 Mon Sep 17 00:00:00 2001
From: mrb0nk500 <b0nk@b0nk.xyz>
Date: Mon, 22 Jun 2020 17:56:52 -0400
Subject: Did some more stuff.

- Fixed some bugs in the emulator's assembler.
- Worked on SuBAsm's lexer some more.
- Created a new directory for the SuB suite, and moved
  all of the SuB suite's files in there.
---
 programs/sub-suite/lexer.s | 413 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 413 insertions(+)
 create mode 100644 programs/sub-suite/lexer.s

(limited to 'programs/sub-suite/lexer.s')

diff --git a/programs/sub-suite/lexer.s b/programs/sub-suite/lexer.s
new file mode 100644
index 0000000..315bee4
--- /dev/null
+++ b/programs/sub-suite/lexer.s
@@ -0,0 +1,413 @@
+; Lexer, and supporting routines for SuBAsm.
+
+; Enums.
+
+; Directives.
+DIR_ORG     =  0	; Origin.
+DIR_BYTE    =  1	; Byte  =  8 bits.
+DIR_WORD    =  2	; Word  = 16 bits.
+DIR_DWORD   =  3	; Dword = 32 bits.
+DIR_QWORD   =  4	; Qword = 64 bits.
+DIR_INCL    =  5	; Include.
+
+; Tokens.
+TOK_DIR     =  0	; Directive.
+TOK_LOCAL   =  1	; Local syobol.
+TOK_LABEL   =  2	; Label.
+TOK_SYM     =  3	; Symbol.
+TOK_EXPR    =  4	; Expression.
+TOK_CSV     =  5	; Comma separated value.
+TOK_STR     =  6	; String.
+TOK_CHAR    =  7	; Character.
+TOK_IND     =  8	; Indirect addressing.
+TOK_IMM     =  9	; Immediate data.
+TOK_MNE     = 10	; Opcode/Mnemonic.
+TOK_RS      = 11	; Register size prefix.
+TOK_COMM    = 12	; Comment.
+TOK_HEX     = 13	; Hex value.
+TOK_DEC     = 14	; Decimal value.
+TOK_BIN     = 15	; Binary value.
+TOK_INCL    = 16	; Include file.
+
+; Pre-Tokens.
+PTOK_DOT    =  0	; .
+PTOK_AT     =  1	; @
+PTOK_COLON  =  2	; :
+PTOK_EQU    =  3	; =
+PTOK_PLUS   =  4	; +
+PTOK_MINUS  =  5	; -
+PTOK_GT     =  6	; >
+PTOK_LT     =  7	; <
+PTOK_LBRAK  =  8	; (
+PTOK_RBRAK  =  9	; )
+PTOK_COMMA  = 10	; ,
+PTOK_X      = 11	; x
+PTOK_Y      = 12	; y
+PTOK_DQUOT  = 13	; "
+PTOK_SQUOT  = 14	; '
+PTOK_HASH   = 15	; #
+PTOK_SCOLN  = 16	; ;
+PTOK_DOLR   = 17	; $
+PTOK_PRCNT  = 18	; %
+PTOK_NUM    = 19	; 0-9
+PTOK_ALPH   = 20	; a-z A-Z
+PTOK_OTHR   = 21	; Everything else.
+
+; Expressions.
+EXPR_PLUS   =  0	; Plus.
+EXPR_MINUS  =  1	; Minus.
+EXPR_LOW    =  2	; Lower half of address.
+EXPR_HIGH   =  3	; Upper half of address.
+EXPR_NONE   =  4	; No expression.
+
+
+; Data.
+.org lexer_data
+; Jump table for parsing pre-tokens.
+swtab:
+	.word ptok_dot	; PTOK_DOT
+	.word ptok_at	; PTOK_AT
+	.word ptok_col	; PTOK_COLON
+	.word ptok_equ	; PTOK_EQU
+	.word ptok_plus	; PTOK_PLUS
+	.word ptok_min	; PTOK_MINUS
+	.word ptok_gt	; PTOK_GT
+	.word ptok_lt	; PTOK_LT
+	.word ptok_lbrk	; PTOK_LBRAK
+	.word ptok_rbrk	; PTOK_RBRAK
+	.word ptok_com	; PTOK_COMMA
+	.word ptok_xr	; PTOK_X
+	.word ptok_yr	; PTOK_Y
+	.word ptok_dqu	; PTOK_DQUOT
+	.word ptok_squ	; PTOK_SQUOT
+	.word ptok_hash	; PTOK_HASH
+	.word ptok_scol	; PTOK_SCOLN
+	.word ptok_dolr	; PTOK_DOLR
+	.word ptok_prcn	; PTOK_PRCNT
+	.word ptok_num	; PTOK_NUM
+	.word ptok_alph	; PTOK_ALPH
+	.word ptok_othr	; PTOK_OTHR
+
+; Data entry point for utility subroutines.
+util_data:
+
+
+; Program code.
+.org lexer
+lex:
+	ldx #0		; Reset X.
+	txa		; Reset A.
+	phy #2		; Preserve the screen buffer index.
+	txy		; Reset Y.
+	sty.q idx0	; Clear the first index.
+	sty.q idx1	; Clear the second index.
+	sty.q idx2	; Clear the third index.
+	sty b		; Clear the isop flag.
+;	lda (ptr), y	; Get a character from the line.
+;	pha #1		; Preserve the character.
+;	jsr isdigit	; Is this character a digit?
+;	pla #1		; Get the character back.
+@getline:
+	lda #2		; Get the third byte, of the line table address.
+	lsl #$10	; Shift it by 2 bytes.
+	ldb #1		; Set the second pointer
+	lda.w ltok	; to the last line.
+	jsr set_ptr	;
+	lda.w (ptr2)	; Get the next line.
+	jsr set_ptr	; Set the second pointer to the next line.
+	sta.w ctok	; Make it the current line.
+	and #0		; Reset A.
+@loop:
+	ldy.w idx0	; Get the string index.
+	lda (ptr), y	; Get a character from the line.
+	jsr isdelm	; Get the delimiter.
+	cmp #1		; Are we at the end of the line?
+	beq @end	; Yes, so we're done.
+@spaces:
+	ldy.w idx0	; Get the string index.
+	lda (ptr), y	; Get a character from the line.
+	pha #1		; Preserve the character.
+	jsr isdelm	; Get the delimiter.
+	and #$10	; Is this character, a space, or tab?
+	pla #1		; Get the character back.
+	beq @isstart	; No, so check for the start of the line.
+	inc.w idx0	; Yes, so increment the string index.
+	cmp #' '	; Is this character, a space?
+	beq @incs	; Yes, so increment the starting space count.
+	cmp #'\t'	; No, but is it a tab?
+	beq @inct	; Yes, so increment the starting tab count.
+	jmp @spaces	; No, so Keep looping.
+@incs:
+	inc idx1	; Increment the space count.
+	jmp @spaces	; Keep looping.
+@inct:
+	inc idx1+1	; Increment the tab count.
+	jmp @spaces	; Keep looping.
+@isstart:
+	pha #2		; Preserve the character.
+	lda.w idx1	; Was there any whitespace?
+	pla #2		; Get the character back.
+	beq @switch	; No, so start lexing.
+	cpb #1		; Yes, and are we at the start of the line?
+	bne @switch	; No, so start lexing.
+@whtspace:
+	ldy #2		; Yes, so set the line index to the starting whitespace counters.
+	lda.w idx1	; Get both indecies.
+	sta.w (ptr2), y	; Save them in the line.
+	and #0		; Reset A.
+	sta.w idx1	; Reset the second index.
+	deb		; Set the isstart flag to false.
+@switch:
+	ldy.w idx0	; Get the string index.
+	lda (ptr), y	; Get the character.
+	jsr get_ptok	; Get the pre-token.
+	jsr parse_ptok	; Parse the pre-token.
+;	beq @end	; We got to the end of the string.
+	jmp @loop	; Keep looping.
+@end:
+	ply #2		; Get the screen buffer index back.
+	rts		; End of lex.
+
+
+parse_ptok:
+	pha #1		; Preserve the pre-token.
+	ldb #2		; Set the third pointer
+	lda.w #swtab	; to the start of the jump table.
+	jsr set_ptr	;
+	and #0		; Reset A.
+	pla #1		; Get the pre-token back.
+	phy #2		; Preserve Y.
+	lsl #1		; Multiply the pre-token by two, to get the jump index.
+	tay		; Get the index of the jump table.
+	lda.w (ptr3), y	; Get the address to jump to.
+	jsr set_ptr	; Set the third pointer to the case address.
+	and #0		; Reset A.
+	tab		; Reset B.
+	ply #2		; Get Y back.
+	jmp (ptr3)	; Jump to the case label.
+ptok_dot:
+	ldb #1		; Make init_lex increment the string index.
+	jsr init_lex	; Initialize the lexeme buffer for copying.
+	ldb #$11	; Set the delimiter comparison value to whitespace.
+	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+@isop:
+	lda b		; Has the isop flag been set?
+	beq @dir	; No, so check for a directive.
+@rs:
+	lda #TOK_RS	; Yes, so set the lexeme type to TOK_RS.
+	sta lex_type	;
+	ldy.w idx1	; Get the lexeme index.
+	dey		; Decrement the lexeme index.
+	lda (ptr3), y	; Get the suffix character.
+	jsr get_rs	; Get the register size.
+	jmp @end	; We are done.
+@dir:
+	lda #TOK_DIR	; Set the lexeme type to TOK_DIR.
+	sta lex_type	;
+	ldb #0		; Make the lexeme buffer, the first pointer.
+	stb.q idx1	; Reset the first index.
+	jsr set_lexptr	; Set up the lexeme buffer.
+@dir_loop:
+	lda.w #dir	; Get pointer to the start of the directive table.
+	clc		; Prepare for a non carrying add.
+	adc.w idx2	; Offset the pointer, by the length of the previous string.
+	pha #8		; Preserve the directive string pointer.
+	jsr strcasecmp	; Is the lexeme buffer, the same as the directive string?
+	pla #8		; Get the directive string pointer back.
+	beq @found	; Yes, so create a new token.
+	ldb idx1	; No, so Get the directive ID.
+	cpb #6		; Have we reached the end of the directive table?
+	beq @end	; Yes, so we're done.
+	inc idx1	; No, so increment the directive ID.
+@getlen:
+	jsr strlen	; Get the string's length.
+	inx		; Add one to the length.
+	txa		; Place it in the accumulator.
+	clc		; Prepare for a non carrying add.
+	adc.w idx2	; Add the string offset to the current length
+	sta.w idx2	; Save the offset in the third index.
+	jmp @dir_loop	; Keep looping.
+@found:
+	nop		;
+@end:
+	jsr make_tok	; Create the token.
+	jsr set_cmdbuf	; Set the first pointer to the command buffer.
+	rts		; End of parse_ptok.
+ptok_at:
+	rts		; End of parse_ptok.
+ptok_col:
+	rts		; End of parse_ptok.
+ptok_equ:
+	rts		; End of parse_ptok.
+ptok_plus:
+	rts		; End of parse_ptok.
+ptok_min:
+	rts		; End of parse_ptok.
+ptok_gt:
+	rts		; End of parse_ptok.
+ptok_lt:
+	rts		; End of parse_ptok.
+ptok_lbrk:
+	rts		; End of parse_ptok.
+ptok_rbrk:
+	rts		; End of parse_ptok.
+ptok_com:
+	rts		; End of parse_ptok.
+ptok_xr:
+	rts		; End of parse_ptok.
+ptok_yr:
+	rts		; End of parse_ptok.
+ptok_dqu:
+	ldb #1		; Make init_lex increment the string index.
+	jsr init_lex	; Initialize the lexeme buffer for copying.
+	ldb #4		; Set the delimiter comparison value to a double quote.
+	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+@term:
+	rts		; End of parse_ptok.
+ptok_squ:
+	rts		; End of parse_ptok.
+ptok_hash:
+	rts		; End of parse_ptok.
+ptok_scol:
+	rts		; End of parse_ptok.
+ptok_dolr:
+	rts		; End of parse_ptok.
+ptok_prcn:
+	rts		; End of parse_ptok.
+ptok_num:
+	rts		; End of parse_ptok.
+ptok_alph:
+	ldb #0		; Do not let init_lex increment the string index.
+	jsr init_lex	; Initialize the lexeme buffer for copying.
+	ldb #1		; Stop at any possible delimiter, except whitespace.
+	tba		; Use isdelm2 for the comparison.
+	jsr delmcpy	; Copy the string, to the lexeme buffer, until delimiter.
+	lda #0		; Reset A.
+	sta b		; Clear the isop flag.
+@isop:
+	ldb #0		; Make the lexeme buffer, the first pointer.
+	stb.q idx1	; Reset the first index.
+	jsr set_lexptr	; Set up the lexeme buffer.
+@isop_loop:
+	lda.w #mne	; Get pointer to the start of the instruction table.
+	clc		; Prepare for a non carrying add.
+	adc.w idx2	; Offset the pointer, by the length of the previous string.
+	jsr strcasecmp	; Is the lexeme buffer, the same as the mnemonic string?
+	beq @found	; Yes, so create a new token.
+	ldb idx1	; No, so Get the instruction ID.
+	cpb #OPNUM-1	; Have we reached the end of the instruction table?
+	beq @end	; Yes, so we're done.
+	inc idx1	; No, so increment the instruction ID.
+@offset:
+	lda #13		; Get the base size of the instruction table.
+	clc		; Prepare for a non carrying multiply.
+	mul idx1	; Multiply the base offset, by the instruction ID.
+	sta.w idx2	; Save the offset in the third index.
+	jmp @isop_loop	; Keep looping.
+@found:
+	lda #TOK_MNE	; Set the lexeme type to TOK_MNE.
+	sta lex_type	;
+	inc b		; Set the isop flag.
+@end:
+	jsr make_tok	; Create the token.
+	jsr set_cmdbuf	; Set the first pointer to the command buffer.
+	rts		; End of parse_ptok.
+ptok_othr:
+	rts		; End of parse_ptok.
+
+
+set_lexptr:
+	lda.d #lexeme	; Set the pointer to the lexeme buffer.
+	jsr set_ptr	;
+	and #0		; Reset A.
+	tab		; Reset B.
+	sta.q idx1	; Reset the second index.
+	rts		; End of set_lexptr.
+
+
+set_cmdbuf:
+	ldb #0		; Set the first pointer
+	lda.d #cmd_buf	; to the command buffer.
+	jsr set_ptr	;
+	and #0		; Reset A.
+	tab		; Reset B.
+	rts		; End of set_cmdbuf.
+
+
+init_lex:
+	cpb #0		; Do we need to increment the string index?
+	beq @init	; No, so skip that step.
+@inc_str:
+	inc.w idx0	; Yes, so increment the string index.
+@init:
+	lda #0		; Reset A.
+	sta.q idx1	; Reset the second index
+	sta.q idx2	; Reset the third index
+	ldb #2		; Make the lexeme buffer, the third pointer.
+	jsr set_lexptr	; Set up the lexeme buffer.
+	rts		; End of init_lex.
+
+
+delmcpy:
+	sta a		; Save the delimiter check flag.
+	stb c		; Save the delimiter comparison value.
+@loop:
+	ldy.w idx0	; Get the string index.
+	lda (ptr), y	; Get a character from the line.
+	pha #1		; Preserve the character.
+	lda a		; Are we calling isdelm2?
+	pla #1		; Get the character back.
+	bne @isdelm2	; Yes, so use isdelm2.
+	jsr isdelm	; No, so get the delimiter value from isdelm.
+	and c		; Are both delimiter values, the same?
+	bne @end	; Yes, so we're done.
+	jmp @copy	; No, so start copying the character.
+@isdelm2:
+	jsr isdelm2	; Get the delimiter value from isdelm2.
+	cmp c		; Are both delimiter values, the same?
+	beq @end	; Yes, so we're done.
+@copy:
+	lda (ptr), y	; Get a character from the line.
+	ldy.w idx1	; Get the lexeme index.
+	sta (ptr3), y	; Copy the character to the lexeme buffer.
+	inc.w idx0	; Increment the string index.
+	inc.w idx1	; Increment the lexeme index.
+	jmp @loop	; Keep looping.
+@end:
+	lda #0		; Terminate the lexeme buffer.
+	sta (ptr3), y	;
+	rts		; End of delmcpy.
+
+
+get_rs:
+	phb #1		; Preserve B.
+	ldb #0		; Set the isop flag to false.
+	plb #1		; Get B back.
+	jsr tolower	; Convert the character to lowercase.
+	cmp #'w'	; Is it .w?
+	beq @r1		; Yes, so return 1.
+	cmp #'d'	; No, but was it .d?
+	beq @r2		; Yes, so return 2.
+	cmp #'q'	; No, but was it .d?
+	beq @r3		; Yes, so return 3.
+@r0:
+	lda #0		; Return 0.
+	rts		; End of get_rs.
+@r1:
+	lda #1		; Return 1.
+	rts		; End of get_rs.
+@r2:
+	lda #2		; Return 2.
+	rts		; End of get_rs.
+@r3:
+	lda #3		; Return 3.
+	rts		; End of get_rs.
+
+
+make_tok:
+	nop		;
+@end:
+	rts		; End of make_tok.
+
+; Entry point for utility subroutines.
+utils:
-- 
cgit v1.2.3-13-gbd6f