; Lexer, and supporting routines for SuBAsm. ; Program code. lex: ldx #0 ; Reset X. txa ; Reset A. txy ; Reset Y. sty.q idx0 ; Clear the first index. sty.q idx1 ; Clear the second index. sty.q idx2 ; Clear the third index. sty.d t_id ; Clear the token ID, type, space count, and tab count. sty.q t_val ; Clear the token value. sty.q t_str ; Clear the token string. sty.q t_sym ; Clear the token symbol. sty regb ; Clear the isop flag. ; lda (ptr), y ; Get a character from the line. ; pha ; Preserve the character. ; jsr isdigit ; Is this character a digit? ; pla ; Get the character back. @getline: ldb #1 ; Set the second pointer lda.q lline ; to the last line. jsr set_ptr ; ldy #ln.next ; Set the index to the next line pointer. lda.q (ptr2), y ; Get the next line. jsr set_ptr ; Set the second pointer to the next line. sta.q cline ; Make it the current line. and #0 ; Reset A. tay ; Reset Y. @loop: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. jsr isdelm ; Get the delimiter. cmp #1 ; Are we at the end of the line? beq @end ; Yes, so we're done. @spaces: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. pha ; Preserve the character. jsr isdelm ; Get the delimiter. and #$10 ; Is this character, a space, or tab? pla ; Get the character back. beq @switch ; No, so start lexing. inc.w idx0 ; Yes, so increment the string index. cmp #' ' ; Is this character, a space? beq @incs ; Yes, so increment the starting space count. cmp #'\t' ; No, but is it a tab? beq @inct ; Yes, so increment the starting tab count. bra @spaces ; No, so Keep looping. @incs: inc t_space ; Increment the space count. bra @spaces ; Keep looping. @inct: inc t_tab ; Increment the tab count. bra @spaces ; Keep looping. @switch: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get the character. jsr get_ptok ; Get the pre-token. pha ; Preserve the pre-token. jsr is_altok ; Is this one of the single letter pre-tokens? pla ; Get the pre-token back. bne @is_altok ; Yes, so check the rest of the pre-token. @parse: jsr parse_ptok ; Parse the pre-token. ; beq @end ; We got to the end of the string. lda lex_type ; Get the lexeme type. cmp #TOK_EXPR ; Was this token, an expression? beq @inc_idx ; Yes, so increment the index. ldy.w idx0 ; No, so get the string index. lda (ptr), y ; Get a character from the line. jsr isdelm2 ; Is this not a delimiter? beq @inc_idx ; Yes, so increment the index. bra @loop ; No, so keep looping. @is_altok: sta lex_type ; Save the pre-token in the lexeme type. iny ; Increment the offset. cmp #PTOK_S ; Is this pre-token, PTOK_S? bne @ptok_p ; No, so check for PTOK_P. lda (ptr), y ; Yes, so get the next character after it. jsr tolower ; Convert it to lowercase. cmp #'p' ; Is the next character 'p'? bne @ptok_p ; No, so check for PTOK_P. bra @inc_offset ; Yes, so increment the offset. @ptok_p: cmp #PTOK_P ; Is this pre-token, PTOK_P? bne @is_altok2 ; No, so skip incrementing the offset. lda (ptr), y ; Yes, so get the next character after it. jsr tolower ; Convert it to lowercase. cmp #'c' ; Is the next character 'c'? bne @is_altok2 ; No, so skip incrementing the offset. @inc_offset: iny ; Increment the offset. @is_altok2: lda (ptr), y ; Yes, so get the character, at the current offset. jsr get_ptok ; Get the pre-token of the character. cmp #PTOK_P ; Is this pre-token greater than PTOK_P? bcc @ptok_num ; No, so check for PTOK_NUM. beq @ptok_num ; cmp #PTOK_B ; Yes, and is this pre-token greater than, or equal to PTOK_B? bcs @ptok_al ; Yes, so set the pre-token to PTOK_ALPH. lda lex_type ; No, so get the original pre-token back. ldy.w idx0 ; Get the string index. bra @parse ; Go back to parsing the pre-token. @ptok_al: lda #PTOK_ALPH ; Set the pre-token to PTOK_ALPH. ldy.w idx0 ; Get the string index. bra @parse ; Go back to parsing the pre-token. @inc_idx: inc.w idx0 ; Increment the string index. bra @loop ; Keep looping. @end: jsr update_ptr ; Get the screen buffer index. tay ; Save it in Y. and #0 ; Reset A. rts ; End of lex. parse_ptok: pha ; Preserve the pre-token. ldb #2 ; Set the third pointer lda.w #swtab ; to the start of the jump table. jsr set_ptr ; and #0 ; Reset A. pla ; Get the pre-token back. phy.w ; Preserve Y. lsl #1 ; Multiply the pre-token by two, to get the jump index. tay ; Get the index of the jump table. lda.w (ptr3), y ; Get the address to jump to. jsr set_ptr ; Set the third pointer to the case address. and #0 ; Reset A. tab ; Reset B. ply.w ; Get Y back. jmp (ptr3) ; Jump to the case label. ptok_dot: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #$11 ; Set the delimiter comparison value to whitespace. lda #0 ; Set the isesc flag to false. pha ; jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. pla ; @isop: lda regb ; Has the isop flag been set? beq @dir ; No, so check for a directive. @rs: lda #TOK_RS ; Yes, so set the lexeme type to TOK_RS. sta lex_type ; sta t_id ; Also set the token ID to TOK_RS. ldy.w idx1 ; Get the lexeme index. dey ; Decrement the lexeme index. lda (ptr3), y ; Get the suffix character. jsr get_rs ; Get the register size. bra @end ; We are done. @dir: ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the first index. jsr set_lexptr ; Set up the lexeme buffer. @dir_loop: ldb idx1 ; Get the directive ID. cpb #7 ; Have we reached the end of the directive table? beq @end ; Yes, so we're done. lda.w #dir ; No, so get the start of the directive table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. pha.q ; Preserve the directive string pointer. jsr strcaseg ; Is the lexeme buffer, the same as the directive string? pla.q ; Get the directive string pointer back. beq @found ; Yes, so create a new token. inc idx1 ; No, so increment the directive ID. @getlen: jsr strlen ; Get the string's length. inc ; Add one to the length. clc ; Prepare for a non carrying add. adc.w idx2 ; Add the string offset to the current length sta.w idx2 ; Save the offset in the third index. bra @dir_loop ; Keep looping. @found: lda #TOK_DIR ; Set the lexeme type to TOK_DIR. sta lex_type ; sta t_id ; Also set the token ID to TOK_DIR. lda idx1 ; Set the token type to the directive ID. sta t_type ; @end: jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_at: inc.w idx0 ; rts ; End of parse_ptok. ptok_col: inc.w idx0 ; rts ; End of parse_ptok. ptok_equ: inc.w idx0 ; rts ; End of parse_ptok. ptok_plus: lda #EXPR_PLUS ; Set the expresion type to EXPR_PLUS. bra ptok_expr ; Set up the token. ptok_min: lda #EXPR_MINUS ; Set the expresion type to EXPR_MINUS. bra ptok_expr ; Set up the token. ptok_gt: lda #EXPR_LOW ; Set the expresion type to EXPR_LOW. bra ptok_expr ; Set up the token. ptok_lt: lda #EXPR_HIGH ; Set the expresion type to EXPR_HIGH. bra ptok_expr ; Set up the token. ptok_pipe: lda #EXPR_OR ; Set the expresion type to EXPR_OR. ptok_expr: sta t_type ; Set the token type to the expression type. lda #TOK_EXPR ; Set the lexeme type to TOK_EXPR. sta t_id ; Also set the token ID to TOK_EXPR. sta lex_type ; inc.w idx0 ; ; ldb #1 ; Make init_lex increment the string index. ; jsr init_lex ; Initialize the lexeme buffer for copying. jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_lbrk: inc.w idx0 ; rts ; End of parse_ptok. ptok_rbrk: inc.w idx0 ; rts ; End of parse_ptok. ptok_com: inc.w idx0 ; rts ; End of parse_ptok. ptok_br: inc.w idx0 ; rts ; End of parse_ptok. ptok_xr: inc.w idx0 ; rts ; End of parse_ptok. ptok_yr: inc.w idx0 ; rts ; End of parse_ptok. ptok_sp: inc.w idx0 ; rts ; End of parse_ptok. ptok_pc: inc.w idx0 ; rts ; End of parse_ptok. ptok_dqu: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #5 ; Set the delimiter comparison value to a double quote, or EOL. lda #1 ; Set the isesc flag to true. pha ; and #0 ; Make delmcpy use isdelm. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. pla ; lda #TOK_DQUOT ; Set the lexeme type to TOK_DQUOT. sta lex_type ; sta t_id ; Also set the token ID to TOK_DQUOT. lda.d ptr3 ; Get the address of the lexeme buffer. sta.q t_str ; Save it in the token string. @end: jsr make_tok ; Create the token. rts ; End of parse_ptok. ptok_squ: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #9 ; Set the delimiter comparison value to a single quote, or EOL. lda #1 ; Set the isesc flag to true. pha ; and #0 ; Make delmcpy use isdelm. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. pla ; lda #TOK_SQUOT ; Set the lexeme type to TOK_SQUOT. sta lex_type ; sta t_id ; Also set the token ID to TOK_SQUOT. lda.d ptr3 ; Get the address of the lexeme buffer. sta.q t_str ; Save it in the token string. @end: jsr make_tok ; Create the token. rts ; End of parse_ptok. ptok_hash: inc.w idx0 ; rts ; End of parse_ptok. ptok_scol: ldb #1 ; Make init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #1 ; Set the delimiter to EOL. lda #0 ; Set the isesc flag to false. pha ; jsr delmcpy ; Copy the string, to the lexeme buffer, until EOL. pla ; lda #TOK_SCOLN ; Set the lexeme type to TOK_SCOLN. sta lex_type ; sta t_id ; Also set the token ID to TOK_SCOLN. lda.d ptr3 ; Get the address of the lexeme buffer. sta.q t_str ; Save it in the token string. @end: jsr make_tok ; Create the token. rts ; End of parse_ptok. ptok_dolr: lda #TOK_HEX ; Set the lexeme type to TOK_HEX. sta lex_type ; sta t_id ; Also set the token ID to TOK_HEX. lda #$10 ; Set the base to Hexadecimal. ldb #1 ; Make init_lex increment the string index. bra ptok_num2 ; Parse the value. ptok_prcn: lda #TOK_BIN ; Set the lexeme type to TOK_BIN. sta lex_type ; sta t_id ; Also set the token ID to TOK_BIN. lda #2 ; Set the base to Binary. ldb #1 ; Make init_lex increment the string index. bra ptok_num2 ; Parse the value. ptok_num: lda #TOK_DEC ; Set the lexeme type to TOK_DEC. sta lex_type ; sta t_id ; Also set the token ID to TOK_DEC. lda #10 ; Set the base to Decimal. ldb #0 ; Do not let init_lex increment the string index. ptok_num2: pha ; Preserve the base. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #3 ; Set the delimiter to both the EOL, and a comma. lda #0 ; Set the isesc flag to false. pha ; jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. pla ; pla ; Get the base back. jsr strtoullg ; Convert the string into a numeric value. sta.q t_val ; Set the token value to the converted value. jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_alph: ldb #0 ; Do not let init_lex increment the string index. jsr init_lex ; Initialize the lexeme buffer for copying. ldb #3 ; Stop at any possible delimiter. lda #0 ; Set the isesc flag to false. pha ; tba ; Use isdelm2 for the comparison. jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter. pla ; lda #0 ; Reset A. sta regb ; Clear the isop flag. @isop: ldb #0 ; Make the lexeme buffer, the first pointer. stb.q idx1 ; Reset the second index. stb.q idx2 ; Reset the third index. jsr set_lexptr ; Set up the lexeme buffer. @isop_loop: ldb idx1 ; Get the instruction ID. cpb #OPNUM ; Have we reached the end of the mnemonic table? beq @end ; Yes, so we're done. lda.w #mne ; No, so get the start of the mnemonic table. clc ; Prepare for a non carrying add. adc.w idx2 ; Offset the pointer, by the length of the previous string. pha.q ; Preserve the mnemonic string pointer. jsr strcaseg ; Is the lexeme buffer, the same as the mnemonic string? pla.q ; Get the mnemonic string pointer back. beq @found ; Yes, so create a new token. inc idx1 ; No, so increment the instruction ID. @offset: jsr strlen ; Get the string's length. inc ; Add one to the length. clc ; Prepare for a non carrying add. adc.w idx2 ; Add the string offset to the current length sta.w idx2 ; Save the offset in the third index. bra @isop_loop ; Keep looping. @found: lda #TOK_MNE ; Set the lexeme type to TOK_MNE. sta lex_type ; sta t_id ; Also set the token ID to TOK_MNE. lda.q idx1 ; Get the instruction ID. sta.q t_val ; Set the token value to the instruction ID. lda #$FF ; Set the token type to -1. sta t_type ; inc regb ; Set the isop flag. @end: jsr make_tok ; Create the token. jsr set_cmdbuf ; Set the first pointer to the command buffer. rts ; End of parse_ptok. ptok_othr: inc.w idx0 ; rts ; End of parse_ptok. set_lexptr: lda.d #lexeme ; Set the pointer to the lexeme buffer. jsr set_ptr ; and #0 ; Reset A. tab ; Reset B. sta.q idx1 ; Reset the second index. sta.q idx2 ; Reset the third index rts ; End of set_lexptr. set_cmdbuf: and #0 ; Reset A. tab ; Reset B. lda.d #cmd_buf ; to the command buffer. jsr set_ptr ; and #0 ; Reset A. tab ; Reset B. rts ; End of set_cmdbuf. init_lex: cpb #0 ; Do we need to increment the string index? beq @init ; No, so skip that step. @inc_str: inc.w idx0 ; Yes, so increment the string index. @init: ldb #2 ; Make the lexeme buffer, the third pointer. jsr set_lexptr ; Set up the lexeme buffer. phy.w ; Preserve Y. tay ; Reset Y. @loop: lda (ptr3), y ; Have we hit the end of the previous lexeme string? beq @end ; Yes, so we're done. lda #0 ; No, so start clearing the character. sta (ptr3), y ; Clear the character. iny ; Increment the lexeme index. bra @loop ; Keep looping. @end: ply.w ; Get Y back. rts ; End of init_lex. delmcpy: pha ; Save the delimiter check flag. phb ; Save the delimiter comparison value. and #0 ; Reset A. pha ; Reset the isesc flag. ; sta rega ; Save the delimiter check flag. ; stb regc ; Save the delimiter comparison value. @loop: ldy.w idx0 ; Get the string index. lda (ptr), y ; Get a character from the line. pha ; Preserve the character. lda sp+4 ; Are we calling isdelm2? pla ; Get the character back. bne @isdelm2 ; Yes, so use isdelm2. jsr isdelm ; No, so get the delimiter value from isdelm. @delmchk: and sp+2 ; Are both delimiter values, the same? beq @copy ; No, so copy the character. @isesc: lda sp+1 ; Was the isesc flag true? beq @end ; No, so we're done. bra @copy ; Yes, so copy the character. @isdelm2: jsr isdelm2 ; Get the delimiter value from isdelm2. bra @delmchk ; Check the delimiter. @copy: lda sp+12 ; Was the do_isesc flag set? bne @do_isesc ; Yes, so set the isesc flag. @copy1: lda (ptr), y ; Get a character from the line. ldy.w idx1 ; Get the lexeme index. sta (ptr3), y ; Copy the character to the lexeme buffer. inc.w idx0 ; Increment the string index. inc.w idx1 ; Increment the lexeme index. bra @loop ; Keep looping. @do_isesc: jsr isesc ; Check if this is an escaped character. sta sp+1 ; Save it in the isesc flag. bra @copy1 ; Copy the character. @end: pla.w ; Pull both arguments off the stack. pla ; Pull the isesc flag off the stack. and #0 ; Reset A. ldy.w idx1 ; Get the lexeme index. sta (ptr3), y ; Terminate the lexeme buffer. ldy.w idx0 ; Get the string index. rts ; End of delmcpy. ;@loop: ; ldb #0 ; Reset the B register. ; stb regg ; Reset the byte count. ; ldy.w idx0 ; Get the string index. ; lda.q (ptr), y ; Get eight bytes from the current line. ;@loop1: ; pha.q ; Save the string buffer. ; and #$FF ; Get the current byte. ; pha ; Preserve the character. ; lda rega ; Are we calling isdelm2? ; pla ; Get the character back. ; bne @isdelm2 ; Yes, so use isdelm2. ; jsr isdelm ; No, so get the delimiter value from isdelm. ;@delmchk: ; and regc ; Are both delimiter values, the same? ; pla.q ; Get back the string buffer. ; bne @end ; Yes, so we're done. ; bra @copy ; No, so start copying the character. ;@isdelm2: ; jsr isdelm2 ; Get the delimiter value from isdelm2. ; bra @delmchk ; Check the delimiter. ;@copy: ; ldy.w idx1 ; Get the lexeme index. ; sta (ptr3), y ; Copy one byte from the screen buffer, to the command buffer. ; inc.w idx0 ; Increment the string index. ; inc.w idx1 ; Increment the lexeme index. ; lsr #8 ; Shift in the next byte. ; inc regg ; Increment the byte count. ; ldb regg ; Get back the byte count. ; cpb #7 ; Did we shift in eight bytes? ; beq @loop ; Yes, so get eight more bytes. ; bra @loop1 ; No, so keep shifting in more bytes. ;@end: ; ldb #0 ; Reset B. ; ldy.w idx1 ; Get the lexeme index. ; stb (ptr3), y ; Terminate the command buffer. ;@end1: ; ldy.w idx0 ; Get the string index. ; tba ; Reset A. ; rts ; End of delmcpy. get_rs: phb ; Preserve B. ldb #0 ; Set the isop flag to false. plb ; Get B back. jsr tolower ; Convert the character to lowercase. cmp #'w' ; Is it .w? beq @r1 ; Yes, so return 1. cmp #'d' ; No, but was it .d? beq @r2 ; Yes, so return 2. cmp #'q' ; No, but was it .d? beq @r3 ; Yes, so return 3. @r0: lda #0 ; Return 0. rts ; End of get_rs. @r1: lda #1 ; Return 1. rts ; End of get_rs. @r2: lda #2 ; Return 2. rts ; End of get_rs. @r3: lda #3 ; Return 3. rts ; End of get_rs. make_tok: nop ; @end: rts ; End of make_tok.