From 4ed07ca38b99abdca750c6612c512f30965f1714 Mon Sep 17 00:00:00 2001 From: mrb0nk500 Date: Sun, 30 Aug 2020 12:44:21 -0400 Subject: - Did some more work on SuBAsm's lexer. - Optimized the memory read, and write functions. - Made the emulator faster, and cleaner in general. --- programs/sub-suite/lexer.s | 228 +++++++++++++++++++++++++++++++++------------ 1 file changed, 169 insertions(+), 59 deletions(-) (limited to 'programs/sub-suite/lexer.s') diff --git a/programs/sub-suite/lexer.s b/programs/sub-suite/lexer.s index 3a856b5..72f1db6 100644 --- a/programs/sub-suite/lexer.s +++ b/programs/sub-suite/lexer.s @@ -8,6 +8,10 @@ lex: sty.q idx0 ; Clear the first index. sty.q idx1 ; Clear the second index. sty.q idx2 ; Clear the third index. + sty.d t_id ; Clear the token ID, type, space count, and tab count. + sty.q t_val ; Clear the token value. + sty.q t_str ; Clear the token string. + sty.q t_sym ; Clear the token symbol. sty regb ; Clear the isop flag. ; lda (ptr), y ; Get a character from the line. ; pha ; Preserve the character. @@ -17,11 +21,11 @@ lex: lda #2 ; Get the third byte, of the line table address. lsl #$10 ; Shift it by 2 bytes. ldb #1 ; Set the second pointer - lda.w ltok ; to the last line. + lda.w lline ; to the last line. jsr set_ptr ; lda.w (ptr2) ; Get the next line. jsr set_ptr ; Set the second pointer to the next line. - sta.w ctok ; Make it the current line. + sta.w cline ; Make it the current line. and #0 ; Reset A. @loop: ldy.w idx0 ; Get the string index. @@ -36,7 +40,7 @@ lex: jsr isdelm ; Get the delimiter. and #$10 ; Is this character, a space, or tab? pla ; Get the character back. - beq @isstart ; No, so check for the start of the line. + beq @switch ; No, so start lexing. inc.w idx0 ; Yes, so increment the string index. cmp #' ' ; Is this character, a space? beq @incs ; Yes, so increment the starting space count. @@ -44,31 +48,66 @@ lex: beq @inct ; Yes, so increment the starting tab count. bra @spaces ; No, so Keep looping. @incs: - inc idx1 ; Increment the space count. + inc t_space ; Increment the space count. bra @spaces ; Keep looping. @inct: - inc idx1+1 ; Increment the tab count. + inc t_tab ; Increment the tab count. bra @spaces ; Keep looping. -@isstart: - pha.w ; Preserve the character. - lda.w idx1 ; Was there any whitespace? - pla.w ; Get the character back. - beq @switch ; No, so start lexing. - cpb #1 ; Yes, and are we at the start of the line? - bne @switch ; No, so start lexing. -@whtspace: - ldy #2 ; Yes, so set the line index to the starting whitespace counters. - lda.w idx1 ; Get both indecies. - sta.w (ptr2), y ; Save them in the line. - and #0 ; Reset A. - sta.w idx1 ; Reset the second index. - deb ; Set the isstart flag to false. @switch: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get the character. jsr get_ptok ; Get the pre-token. + pha ; Preserve the pre-token. + jsr is_altok ; Is this one of the single letter pre-tokens? + pla ; Get the pre-token back. + bne @is_altok ; Yes, so check the rest of the pre-token. +@parse: jsr parse_ptok ; Parse the pre-token. ; beq @end ; We got to the end of the string. + lda lex_type ; Get the lexeme type. + cmp #TOK_EXPR ; Was this token, an expression? + beq @inc_idx ; Yes, so increment the index. + ldy.w idx0 ; No, so get the string index. + lda (ptr), y ; Get a character from the line. + jsr isdelm2 ; Is this not a delimiter? + beq @inc_idx ; Yes, so increment the index. + bra @loop ; No, so keep looping. +@is_altok: + sta lex_type ; Save the pre-token in the lexeme type. + iny ; Increment the offset. + cmp #PTOK_S ; Is this pre-token, PTOK_S? + bne @ptok_p ; No, so check for PTOK_P. + lda (ptr), y ; Yes, so get the next character after it. + jsr tolower ; Convert it to lowercase. + cmp #'p' ; Is the next character 'p'? + bne @ptok_p ; No, so check for PTOK_P. + bra @inc_offset ; Yes, so increment the offset. +@ptok_p: + cmp #PTOK_P ; Is this pre-token, PTOK_P? + bne @is_altok2 ; No, so skip incrementing the offset. + lda (ptr), y ; Yes, so get the next character after it. + jsr tolower ; Convert it to lowercase. + cmp #'c' ; Is the next character 'c'? + bne @is_altok2 ; No, so skip incrementing the offset. +@inc_offset: + iny ; Increment the offset. +@is_altok2: + lda (ptr), y ; Yes, so get the character, at the current offset. + jsr get_ptok ; Get the pre-token of the character. + cmp #PTOK_P ; Is this pre-token greater than PTOK_P? + bcc @ptok_num ; No, so check for PTOK_NUM. + beq @ptok_num ; + cmp #PTOK_B ; Yes, and is this pre-token greater than, or equal to PTOK_B? + bcs @ptok_al ; Yes, so set the pre-token to PTOK_ALPH. + lda lex_type ; No, so get the original pre-token back. + ldy.w idx0 ; Get the string index. + bra @parse ; Go back to parsing the pre-token. +@ptok_al: + lda #PTOK_ALPH ; Set the pre-token to PTOK_ALPH. + ldy.w idx0 ; Get the string index. + bra @parse ; Go back to parsing the pre-token. +@inc_idx: + inc.w idx0 ; Increment the string index. bra @loop ; Keep looping. @end: jsr update_ptr ; Get the screen buffer index. @@ -97,21 +136,23 @@ ptok_dot: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #$11 ; Set the delimiter comparison value to whitespace. + lda #0 ; Set the isesc flag to false. + pha ; jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. + pla ; @isop: lda regb ; Has the isop flag been set? beq @dir ; No, so check for a directive. @rs: lda #TOK_RS ; Yes, so set the lexeme type to TOK_RS. sta lex_type ; + sta t_id ; Also set the token ID to TOK_RS. ldy.w idx1 ; Get the lexeme index. dey ; Decrement the lexeme index. lda (ptr3), y ; Get the suffix character. jsr get_rs ; Get the register size. bra @end ; We are done. @dir: - lda #TOK_DIR ; Set the lexeme type to TOK_DIR. - sta lex_type ; ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the first index. jsr set_lexptr ; Set up the lexeme buffer. @@ -135,7 +176,11 @@ ptok_dot: sta.w idx2 ; Save the offset in the third index. bra @dir_loop ; Keep looping. @found: - nop ; + lda #TOK_DIR ; Set the lexeme type to TOK_DIR. + sta lex_type ; + sta t_id ; Also set the token ID to TOK_DIR. + lda idx1 ; Set the token type to the directive ID. + sta t_type ; @end: jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. @@ -156,7 +201,6 @@ ptok_min: lda #EXPR_MINUS ; Set the expresion type to EXPR_MINUS. bra ptok_expr ; Set up the token. ptok_gt: - lda #EXPR_LOW ; Set the expresion type to EXPR_LOW. bra ptok_expr ; Set up the token. ptok_lt: @@ -165,7 +209,9 @@ ptok_lt: ptok_pipe: lda #EXPR_OR ; Set the expresion type to EXPR_OR. ptok_expr: + sta t_type ; Set the token type to the expression type. lda #TOK_EXPR ; Set the lexeme type to TOK_EXPR. + sta t_id ; Also set the token ID to TOK_EXPR. sta lex_type ; inc.w idx0 ; ; ldb #1 ; Make init_lex increment the string index. @@ -182,6 +228,9 @@ ptok_rbrk: ptok_com: inc.w idx0 ; rts ; End of parse_ptok. +ptok_br: + inc.w idx0 ; + rts ; End of parse_ptok. ptok_xr: inc.w idx0 ; rts ; End of parse_ptok. @@ -197,16 +246,36 @@ ptok_pc: ptok_dqu: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. - ldb #4 ; Set the delimiter comparison value to a double quote. + ldb #5 ; Set the delimiter comparison value to a double quote, or EOL. + lda #1 ; Set the isesc flag to true. + pha ; + and #0 ; Make delmcpy use isdelm. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. + pla ; + lda #TOK_DQUOT ; Set the lexeme type to TOK_DQUOT. + sta lex_type ; + sta t_id ; Also set the token ID to TOK_DQUOT. + lda.d ptr3 ; Get the address of the lexeme buffer. + sta.q t_str ; Save it in the token string. @end: + jsr make_tok ; Create the token. rts ; End of parse_ptok. ptok_squ: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. - ldb #8 ; Set the delimiter comparison value to a single quote. + ldb #9 ; Set the delimiter comparison value to a single quote, or EOL. + lda #1 ; Set the isesc flag to true. + pha ; + and #0 ; Make delmcpy use isdelm. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. + pla ; + lda #TOK_SQUOT ; Set the lexeme type to TOK_SQUOT. + sta lex_type ; + sta t_id ; Also set the token ID to TOK_SQUOT. + lda.d ptr3 ; Get the address of the lexeme buffer. + sta.q t_str ; Save it in the token string. @end: + jsr make_tok ; Create the token. rts ; End of parse_ptok. ptok_hash: inc.w idx0 ; @@ -215,33 +284,49 @@ ptok_scol: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #1 ; Set the delimiter to EOL. + lda #0 ; Set the isesc flag to false. + pha ; jsr delmcpy ; Copy the string, to the lexeme buffer, until EOL. + pla ; + lda #TOK_SCOLN ; Set the lexeme type to TOK_SCOLN. + sta lex_type ; + sta t_id ; Also set the token ID to TOK_SCOLN. + lda.d ptr3 ; Get the address of the lexeme buffer. + sta.q t_str ; Save it in the token string. @end: + jsr make_tok ; Create the token. rts ; End of parse_ptok. ptok_dolr: lda #TOK_HEX ; Set the lexeme type to TOK_HEX. sta lex_type ; + sta t_id ; Also set the token ID to TOK_HEX. lda #$10 ; Set the base to Hexadecimal. ldb #1 ; Make init_lex increment the string index. bra ptok_num2 ; Parse the value. ptok_prcn: lda #TOK_BIN ; Set the lexeme type to TOK_BIN. sta lex_type ; + sta t_id ; Also set the token ID to TOK_BIN. lda #2 ; Set the base to Binary. ldb #1 ; Make init_lex increment the string index. bra ptok_num2 ; Parse the value. ptok_num: lda #TOK_DEC ; Set the lexeme type to TOK_DEC. sta lex_type ; + sta t_id ; Also set the token ID to TOK_DEC. lda #10 ; Set the base to Decimal. ldb #0 ; Do not let init_lex increment the string index. ptok_num2: pha ; Preserve the base. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #3 ; Set the delimiter to both the EOL, and a comma. + lda #0 ; Set the isesc flag to false. + pha ; jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. + pla ; pla ; Get the base back. jsr strtoullg ; Convert the string into a numeric value. + sta.q t_val ; Set the token value to the converted value. jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. @@ -249,17 +334,21 @@ ptok_alph: ldb #0 ; Do not let init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #3 ; Stop at any possible delimiter. + lda #0 ; Set the isesc flag to false. + pha ; tba ; Use isdelm2 for the comparison. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. + pla ; lda #0 ; Reset A. sta regb ; Clear the isop flag. @isop: ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the second index. + stb.q idx2 ; Reset the third index. jsr set_lexptr ; Set up the lexeme buffer. @isop_loop: ldb idx1 ; Get the instruction ID. - cpb #OPNUM-1 ; Have we reached the end of the mnemonic table? + cpb #OPNUM ; Have we reached the end of the mnemonic table? beq @end ; Yes, so we're done. lda.w #mne ; No, so get the start of the mnemonic table. clc ; Prepare for a non carrying add. @@ -268,7 +357,6 @@ ptok_alph: jsr strcaseg ; Is the lexeme buffer, the same as the mnemonic string? pla.q ; Get the mnemonic string pointer back. beq @found ; Yes, so create a new token. - beq @end ; Yes, so we're done. inc idx1 ; No, so increment the instruction ID. @offset: jsr strlen ; Get the string's length. @@ -280,6 +368,11 @@ ptok_alph: @found: lda #TOK_MNE ; Set the lexeme type to TOK_MNE. sta lex_type ; + sta t_id ; Also set the token ID to TOK_MNE. + lda.q idx1 ; Get the instruction ID. + sta.q t_val ; Set the token value to the instruction ID. + lda #$FF ; Set the token type to -1. + sta t_type ; inc regb ; Set the isop flag. @end: jsr make_tok ; Create the token. @@ -333,79 +426,96 @@ init_lex: delmcpy: - sta rega ; Save the delimiter check flag. - stb regc ; Save the delimiter comparison value. + pha ; Save the delimiter check flag. + phb ; Save the delimiter comparison value. + and #0 ; Reset A. + pha ; Reset the isesc flag. +; sta rega ; Save the delimiter check flag. +; stb regc ; Save the delimiter comparison value. @loop: - ldb #0 ; Reset the B register. - stb regg ; Reset the byte count. ldy.w idx0 ; Get the string index. - lda.q (ptr), y ; Get eight bytes from the current line. -@loop1: - pha.q ; Save the string buffer. - and #$FF ; Get the current byte. + lda (ptr), y ; Get a character from the line. pha ; Preserve the character. - lda rega ; Are we calling isdelm2? - pla ; Get the character back. + lda sp+4 ; Are we calling isdelm2? + pla ; Get the character back. bne @isdelm2 ; Yes, so use isdelm2. jsr isdelm ; No, so get the delimiter value from isdelm. @delmchk: - and regc ; Are both delimiter values, the same? - pla.q ; Get back the string buffer. - bne @end ; Yes, so we're done. - bra @copy ; No, so start copying the character. + and sp+2 ; Are both delimiter values, the same? + beq @copy ; No, so copy the character. +@isesc: + lda sp+1 ; Was the isesc flag true? + beq @end ; No, so we're done. + bra @copy ; Yes, so copy the character. @isdelm2: jsr isdelm2 ; Get the delimiter value from isdelm2. bra @delmchk ; Check the delimiter. @copy: + lda sp+12 ; Was the do_isesc flag set? + bne @do_isesc ; Yes, so set the isesc flag. +@copy1: + lda (ptr), y ; Get a character from the line. ldy.w idx1 ; Get the lexeme index. - sta (ptr3), y ; Copy one byte from the screen buffer, to the command buffer. + sta (ptr3), y ; Copy the character to the lexeme buffer. inc.w idx0 ; Increment the string index. inc.w idx1 ; Increment the lexeme index. - lsr #8 ; Shift in the next byte. - inc regg ; Increment the byte count. - ldb regg ; Get back the byte count. - cpb #7 ; Did we shift in eight bytes? - beq @loop ; Yes, so get eight more bytes. - bra @loop1 ; No, so keep shifting in more bytes. + bra @loop ; Keep looping. +@do_isesc: + jsr isesc ; Check if this is an escaped character. + sta sp+1 ; Save it in the isesc flag. + bra @copy1 ; Copy the character. @end: - ldb #0 ; Reset B. + pla.w ; Pull both arguments off the stack. + pla ; Pull the isesc flag off the stack. + and #0 ; Reset A. ldy.w idx1 ; Get the lexeme index. - stb (ptr3), y ; Terminate the command buffer. -@end1: + sta (ptr3), y ; Terminate the lexeme buffer. ldy.w idx0 ; Get the string index. - tba ; Reset A. rts ; End of delmcpy. ;@loop: +; ldb #0 ; Reset the B register. +; stb regg ; Reset the byte count. ; ldy.w idx0 ; Get the string index. -; lda (ptr), y ; Get a character from the line. +; lda.q (ptr), y ; Get eight bytes from the current line. +;@loop1: +; pha.q ; Save the string buffer. +; and #$FF ; Get the current byte. ; pha ; Preserve the character. ; lda rega ; Are we calling isdelm2? -; pla ; Get the character back. +; pla ; Get the character back. ; bne @isdelm2 ; Yes, so use isdelm2. ; jsr isdelm ; No, so get the delimiter value from isdelm. ;@delmchk: ; and regc ; Are both delimiter values, the same? +; pla.q ; Get back the string buffer. ; bne @end ; Yes, so we're done. ; bra @copy ; No, so start copying the character. ;@isdelm2: ; jsr isdelm2 ; Get the delimiter value from isdelm2. ; bra @delmchk ; Check the delimiter. ;@copy: -; lda (ptr), y ; Get a character from the line. ; ldy.w idx1 ; Get the lexeme index. -; sta (ptr3), y ; Copy the character to the lexeme buffer. +; sta (ptr3), y ; Copy one byte from the screen buffer, to the command buffer. ; inc.w idx0 ; Increment the string index. ; inc.w idx1 ; Increment the lexeme index. -; bra @loop ; Keep looping. +; lsr #8 ; Shift in the next byte. +; inc regg ; Increment the byte count. +; ldb regg ; Get back the byte count. +; cpb #7 ; Did we shift in eight bytes? +; beq @loop ; Yes, so get eight more bytes. +; bra @loop1 ; No, so keep shifting in more bytes. ;@end: +; ldb #0 ; Reset B. ; ldy.w idx1 ; Get the lexeme index. -; lda #0 ; Terminate the lexeme buffer. -; sta (ptr3), y ; +; stb (ptr3), y ; Terminate the command buffer. +;@end1: ; ldy.w idx0 ; Get the string index. +; tba ; Reset A. ; rts ; End of delmcpy. + get_rs: phb ; Preserve B. ldb #0 ; Set the isop flag to false. -- cgit v1.2.3-13-gbd6f