summaryrefslogtreecommitdiff
path: root/programs/sub-suite/lexer.s
diff options
context:
space:
mode:
Diffstat (limited to 'programs/sub-suite/lexer.s')
-rw-r--r--programs/sub-suite/lexer.s228
1 files changed, 169 insertions, 59 deletions
diff --git a/programs/sub-suite/lexer.s b/programs/sub-suite/lexer.s
index 3a856b5..72f1db6 100644
--- a/programs/sub-suite/lexer.s
+++ b/programs/sub-suite/lexer.s
@@ -8,6 +8,10 @@ lex:
sty.q idx0 ; Clear the first index.
sty.q idx1 ; Clear the second index.
sty.q idx2 ; Clear the third index.
+ sty.d t_id ; Clear the token ID, type, space count, and tab count.
+ sty.q t_val ; Clear the token value.
+ sty.q t_str ; Clear the token string.
+ sty.q t_sym ; Clear the token symbol.
sty regb ; Clear the isop flag.
; lda (ptr), y ; Get a character from the line.
; pha ; Preserve the character.
@@ -17,11 +21,11 @@ lex:
lda #2 ; Get the third byte, of the line table address.
lsl #$10 ; Shift it by 2 bytes.
ldb #1 ; Set the second pointer
- lda.w ltok ; to the last line.
+ lda.w lline ; to the last line.
jsr set_ptr ;
lda.w (ptr2) ; Get the next line.
jsr set_ptr ; Set the second pointer to the next line.
- sta.w ctok ; Make it the current line.
+ sta.w cline ; Make it the current line.
and #0 ; Reset A.
@loop:
ldy.w idx0 ; Get the string index.
@@ -36,7 +40,7 @@ lex:
jsr isdelm ; Get the delimiter.
and #$10 ; Is this character, a space, or tab?
pla ; Get the character back.
- beq @isstart ; No, so check for the start of the line.
+ beq @switch ; No, so start lexing.
inc.w idx0 ; Yes, so increment the string index.
cmp #' ' ; Is this character, a space?
beq @incs ; Yes, so increment the starting space count.
@@ -44,31 +48,66 @@ lex:
beq @inct ; Yes, so increment the starting tab count.
bra @spaces ; No, so Keep looping.
@incs:
- inc idx1 ; Increment the space count.
+ inc t_space ; Increment the space count.
bra @spaces ; Keep looping.
@inct:
- inc idx1+1 ; Increment the tab count.
+ inc t_tab ; Increment the tab count.
bra @spaces ; Keep looping.
-@isstart:
- pha.w ; Preserve the character.
- lda.w idx1 ; Was there any whitespace?
- pla.w ; Get the character back.
- beq @switch ; No, so start lexing.
- cpb #1 ; Yes, and are we at the start of the line?
- bne @switch ; No, so start lexing.
-@whtspace:
- ldy #2 ; Yes, so set the line index to the starting whitespace counters.
- lda.w idx1 ; Get both indecies.
- sta.w (ptr2), y ; Save them in the line.
- and #0 ; Reset A.
- sta.w idx1 ; Reset the second index.
- deb ; Set the isstart flag to false.
@switch:
ldy.w idx0 ; Get the string index.
lda (ptr), y ; Get the character.
jsr get_ptok ; Get the pre-token.
+ pha ; Preserve the pre-token.
+ jsr is_altok ; Is this one of the single letter pre-tokens?
+ pla ; Get the pre-token back.
+ bne @is_altok ; Yes, so check the rest of the pre-token.
+@parse:
jsr parse_ptok ; Parse the pre-token.
; beq @end ; We got to the end of the string.
+ lda lex_type ; Get the lexeme type.
+ cmp #TOK_EXPR ; Was this token, an expression?
+ beq @inc_idx ; Yes, so increment the index.
+ ldy.w idx0 ; No, so get the string index.
+ lda (ptr), y ; Get a character from the line.
+ jsr isdelm2 ; Is this not a delimiter?
+ beq @inc_idx ; Yes, so increment the index.
+ bra @loop ; No, so keep looping.
+@is_altok:
+ sta lex_type ; Save the pre-token in the lexeme type.
+ iny ; Increment the offset.
+ cmp #PTOK_S ; Is this pre-token, PTOK_S?
+ bne @ptok_p ; No, so check for PTOK_P.
+ lda (ptr), y ; Yes, so get the next character after it.
+ jsr tolower ; Convert it to lowercase.
+ cmp #'p' ; Is the next character 'p'?
+ bne @ptok_p ; No, so check for PTOK_P.
+ bra @inc_offset ; Yes, so increment the offset.
+@ptok_p:
+ cmp #PTOK_P ; Is this pre-token, PTOK_P?
+ bne @is_altok2 ; No, so skip incrementing the offset.
+ lda (ptr), y ; Yes, so get the next character after it.
+ jsr tolower ; Convert it to lowercase.
+ cmp #'c' ; Is the next character 'c'?
+ bne @is_altok2 ; No, so skip incrementing the offset.
+@inc_offset:
+ iny ; Increment the offset.
+@is_altok2:
+ lda (ptr), y ; Yes, so get the character, at the current offset.
+ jsr get_ptok ; Get the pre-token of the character.
+ cmp #PTOK_P ; Is this pre-token greater than PTOK_P?
+ bcc @ptok_num ; No, so check for PTOK_NUM.
+ beq @ptok_num ;
+ cmp #PTOK_B ; Yes, and is this pre-token greater than, or equal to PTOK_B?
+ bcs @ptok_al ; Yes, so set the pre-token to PTOK_ALPH.
+ lda lex_type ; No, so get the original pre-token back.
+ ldy.w idx0 ; Get the string index.
+ bra @parse ; Go back to parsing the pre-token.
+@ptok_al:
+ lda #PTOK_ALPH ; Set the pre-token to PTOK_ALPH.
+ ldy.w idx0 ; Get the string index.
+ bra @parse ; Go back to parsing the pre-token.
+@inc_idx:
+ inc.w idx0 ; Increment the string index.
bra @loop ; Keep looping.
@end:
jsr update_ptr ; Get the screen buffer index.
@@ -97,21 +136,23 @@ ptok_dot:
ldb #1 ; Make init_lex increment the string index.
jsr init_lex ; Initialize the lexeme buffer for copying.
ldb #$11 ; Set the delimiter comparison value to whitespace.
+ lda #0 ; Set the isesc flag to false.
+ pha ;
jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter.
+ pla ;
@isop:
lda regb ; Has the isop flag been set?
beq @dir ; No, so check for a directive.
@rs:
lda #TOK_RS ; Yes, so set the lexeme type to TOK_RS.
sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_RS.
ldy.w idx1 ; Get the lexeme index.
dey ; Decrement the lexeme index.
lda (ptr3), y ; Get the suffix character.
jsr get_rs ; Get the register size.
bra @end ; We are done.
@dir:
- lda #TOK_DIR ; Set the lexeme type to TOK_DIR.
- sta lex_type ;
ldb #0 ; Make the lexeme buffer, the first pointer.
stb.q idx1 ; Reset the first index.
jsr set_lexptr ; Set up the lexeme buffer.
@@ -135,7 +176,11 @@ ptok_dot:
sta.w idx2 ; Save the offset in the third index.
bra @dir_loop ; Keep looping.
@found:
- nop ;
+ lda #TOK_DIR ; Set the lexeme type to TOK_DIR.
+ sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_DIR.
+ lda idx1 ; Set the token type to the directive ID.
+ sta t_type ;
@end:
jsr make_tok ; Create the token.
jsr set_cmdbuf ; Set the first pointer to the command buffer.
@@ -156,7 +201,6 @@ ptok_min:
lda #EXPR_MINUS ; Set the expresion type to EXPR_MINUS.
bra ptok_expr ; Set up the token.
ptok_gt:
-
lda #EXPR_LOW ; Set the expresion type to EXPR_LOW.
bra ptok_expr ; Set up the token.
ptok_lt:
@@ -165,7 +209,9 @@ ptok_lt:
ptok_pipe:
lda #EXPR_OR ; Set the expresion type to EXPR_OR.
ptok_expr:
+ sta t_type ; Set the token type to the expression type.
lda #TOK_EXPR ; Set the lexeme type to TOK_EXPR.
+ sta t_id ; Also set the token ID to TOK_EXPR.
sta lex_type ;
inc.w idx0 ;
; ldb #1 ; Make init_lex increment the string index.
@@ -182,6 +228,9 @@ ptok_rbrk:
ptok_com:
inc.w idx0 ;
rts ; End of parse_ptok.
+ptok_br:
+ inc.w idx0 ;
+ rts ; End of parse_ptok.
ptok_xr:
inc.w idx0 ;
rts ; End of parse_ptok.
@@ -197,16 +246,36 @@ ptok_pc:
ptok_dqu:
ldb #1 ; Make init_lex increment the string index.
jsr init_lex ; Initialize the lexeme buffer for copying.
- ldb #4 ; Set the delimiter comparison value to a double quote.
+ ldb #5 ; Set the delimiter comparison value to a double quote, or EOL.
+ lda #1 ; Set the isesc flag to true.
+ pha ;
+ and #0 ; Make delmcpy use isdelm.
jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter.
+ pla ;
+ lda #TOK_DQUOT ; Set the lexeme type to TOK_DQUOT.
+ sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_DQUOT.
+ lda.d ptr3 ; Get the address of the lexeme buffer.
+ sta.q t_str ; Save it in the token string.
@end:
+ jsr make_tok ; Create the token.
rts ; End of parse_ptok.
ptok_squ:
ldb #1 ; Make init_lex increment the string index.
jsr init_lex ; Initialize the lexeme buffer for copying.
- ldb #8 ; Set the delimiter comparison value to a single quote.
+ ldb #9 ; Set the delimiter comparison value to a single quote, or EOL.
+ lda #1 ; Set the isesc flag to true.
+ pha ;
+ and #0 ; Make delmcpy use isdelm.
jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter.
+ pla ;
+ lda #TOK_SQUOT ; Set the lexeme type to TOK_SQUOT.
+ sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_SQUOT.
+ lda.d ptr3 ; Get the address of the lexeme buffer.
+ sta.q t_str ; Save it in the token string.
@end:
+ jsr make_tok ; Create the token.
rts ; End of parse_ptok.
ptok_hash:
inc.w idx0 ;
@@ -215,33 +284,49 @@ ptok_scol:
ldb #1 ; Make init_lex increment the string index.
jsr init_lex ; Initialize the lexeme buffer for copying.
ldb #1 ; Set the delimiter to EOL.
+ lda #0 ; Set the isesc flag to false.
+ pha ;
jsr delmcpy ; Copy the string, to the lexeme buffer, until EOL.
+ pla ;
+ lda #TOK_SCOLN ; Set the lexeme type to TOK_SCOLN.
+ sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_SCOLN.
+ lda.d ptr3 ; Get the address of the lexeme buffer.
+ sta.q t_str ; Save it in the token string.
@end:
+ jsr make_tok ; Create the token.
rts ; End of parse_ptok.
ptok_dolr:
lda #TOK_HEX ; Set the lexeme type to TOK_HEX.
sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_HEX.
lda #$10 ; Set the base to Hexadecimal.
ldb #1 ; Make init_lex increment the string index.
bra ptok_num2 ; Parse the value.
ptok_prcn:
lda #TOK_BIN ; Set the lexeme type to TOK_BIN.
sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_BIN.
lda #2 ; Set the base to Binary.
ldb #1 ; Make init_lex increment the string index.
bra ptok_num2 ; Parse the value.
ptok_num:
lda #TOK_DEC ; Set the lexeme type to TOK_DEC.
sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_DEC.
lda #10 ; Set the base to Decimal.
ldb #0 ; Do not let init_lex increment the string index.
ptok_num2:
pha ; Preserve the base.
jsr init_lex ; Initialize the lexeme buffer for copying.
ldb #3 ; Set the delimiter to both the EOL, and a comma.
+ lda #0 ; Set the isesc flag to false.
+ pha ;
jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter.
+ pla ;
pla ; Get the base back.
jsr strtoullg ; Convert the string into a numeric value.
+ sta.q t_val ; Set the token value to the converted value.
jsr make_tok ; Create the token.
jsr set_cmdbuf ; Set the first pointer to the command buffer.
rts ; End of parse_ptok.
@@ -249,17 +334,21 @@ ptok_alph:
ldb #0 ; Do not let init_lex increment the string index.
jsr init_lex ; Initialize the lexeme buffer for copying.
ldb #3 ; Stop at any possible delimiter.
+ lda #0 ; Set the isesc flag to false.
+ pha ;
tba ; Use isdelm2 for the comparison.
jsr delmcpy ; Copy the string, to the lexeme buffer, until delimiter.
+ pla ;
lda #0 ; Reset A.
sta regb ; Clear the isop flag.
@isop:
ldb #0 ; Make the lexeme buffer, the first pointer.
stb.q idx1 ; Reset the second index.
+ stb.q idx2 ; Reset the third index.
jsr set_lexptr ; Set up the lexeme buffer.
@isop_loop:
ldb idx1 ; Get the instruction ID.
- cpb #OPNUM-1 ; Have we reached the end of the mnemonic table?
+ cpb #OPNUM ; Have we reached the end of the mnemonic table?
beq @end ; Yes, so we're done.
lda.w #mne ; No, so get the start of the mnemonic table.
clc ; Prepare for a non carrying add.
@@ -268,7 +357,6 @@ ptok_alph:
jsr strcaseg ; Is the lexeme buffer, the same as the mnemonic string?
pla.q ; Get the mnemonic string pointer back.
beq @found ; Yes, so create a new token.
- beq @end ; Yes, so we're done.
inc idx1 ; No, so increment the instruction ID.
@offset:
jsr strlen ; Get the string's length.
@@ -280,6 +368,11 @@ ptok_alph:
@found:
lda #TOK_MNE ; Set the lexeme type to TOK_MNE.
sta lex_type ;
+ sta t_id ; Also set the token ID to TOK_MNE.
+ lda.q idx1 ; Get the instruction ID.
+ sta.q t_val ; Set the token value to the instruction ID.
+ lda #$FF ; Set the token type to -1.
+ sta t_type ;
inc regb ; Set the isop flag.
@end:
jsr make_tok ; Create the token.
@@ -333,79 +426,96 @@ init_lex:
delmcpy:
- sta rega ; Save the delimiter check flag.
- stb regc ; Save the delimiter comparison value.
+ pha ; Save the delimiter check flag.
+ phb ; Save the delimiter comparison value.
+ and #0 ; Reset A.
+ pha ; Reset the isesc flag.
+; sta rega ; Save the delimiter check flag.
+; stb regc ; Save the delimiter comparison value.
@loop:
- ldb #0 ; Reset the B register.
- stb regg ; Reset the byte count.
ldy.w idx0 ; Get the string index.
- lda.q (ptr), y ; Get eight bytes from the current line.
-@loop1:
- pha.q ; Save the string buffer.
- and #$FF ; Get the current byte.
+ lda (ptr), y ; Get a character from the line.
pha ; Preserve the character.
- lda rega ; Are we calling isdelm2?
- pla ; Get the character back.
+ lda sp+4 ; Are we calling isdelm2?
+ pla ; Get the character back.
bne @isdelm2 ; Yes, so use isdelm2.
jsr isdelm ; No, so get the delimiter value from isdelm.
@delmchk:
- and regc ; Are both delimiter values, the same?
- pla.q ; Get back the string buffer.
- bne @end ; Yes, so we're done.
- bra @copy ; No, so start copying the character.
+ and sp+2 ; Are both delimiter values, the same?
+ beq @copy ; No, so copy the character.
+@isesc:
+ lda sp+1 ; Was the isesc flag true?
+ beq @end ; No, so we're done.
+ bra @copy ; Yes, so copy the character.
@isdelm2:
jsr isdelm2 ; Get the delimiter value from isdelm2.
bra @delmchk ; Check the delimiter.
@copy:
+ lda sp+12 ; Was the do_isesc flag set?
+ bne @do_isesc ; Yes, so set the isesc flag.
+@copy1:
+ lda (ptr), y ; Get a character from the line.
ldy.w idx1 ; Get the lexeme index.
- sta (ptr3), y ; Copy one byte from the screen buffer, to the command buffer.
+ sta (ptr3), y ; Copy the character to the lexeme buffer.
inc.w idx0 ; Increment the string index.
inc.w idx1 ; Increment the lexeme index.
- lsr #8 ; Shift in the next byte.
- inc regg ; Increment the byte count.
- ldb regg ; Get back the byte count.
- cpb #7 ; Did we shift in eight bytes?
- beq @loop ; Yes, so get eight more bytes.
- bra @loop1 ; No, so keep shifting in more bytes.
+ bra @loop ; Keep looping.
+@do_isesc:
+ jsr isesc ; Check if this is an escaped character.
+ sta sp+1 ; Save it in the isesc flag.
+ bra @copy1 ; Copy the character.
@end:
- ldb #0 ; Reset B.
+ pla.w ; Pull both arguments off the stack.
+ pla ; Pull the isesc flag off the stack.
+ and #0 ; Reset A.
ldy.w idx1 ; Get the lexeme index.
- stb (ptr3), y ; Terminate the command buffer.
-@end1:
+ sta (ptr3), y ; Terminate the lexeme buffer.
ldy.w idx0 ; Get the string index.
- tba ; Reset A.
rts ; End of delmcpy.
;@loop:
+; ldb #0 ; Reset the B register.
+; stb regg ; Reset the byte count.
; ldy.w idx0 ; Get the string index.
-; lda (ptr), y ; Get a character from the line.
+; lda.q (ptr), y ; Get eight bytes from the current line.
+;@loop1:
+; pha.q ; Save the string buffer.
+; and #$FF ; Get the current byte.
; pha ; Preserve the character.
; lda rega ; Are we calling isdelm2?
-; pla ; Get the character back.
+; pla ; Get the character back.
; bne @isdelm2 ; Yes, so use isdelm2.
; jsr isdelm ; No, so get the delimiter value from isdelm.
;@delmchk:
; and regc ; Are both delimiter values, the same?
+; pla.q ; Get back the string buffer.
; bne @end ; Yes, so we're done.
; bra @copy ; No, so start copying the character.
;@isdelm2:
; jsr isdelm2 ; Get the delimiter value from isdelm2.
; bra @delmchk ; Check the delimiter.
;@copy:
-; lda (ptr), y ; Get a character from the line.
; ldy.w idx1 ; Get the lexeme index.
-; sta (ptr3), y ; Copy the character to the lexeme buffer.
+; sta (ptr3), y ; Copy one byte from the screen buffer, to the command buffer.
; inc.w idx0 ; Increment the string index.
; inc.w idx1 ; Increment the lexeme index.
-; bra @loop ; Keep looping.
+; lsr #8 ; Shift in the next byte.
+; inc regg ; Increment the byte count.
+; ldb regg ; Get back the byte count.
+; cpb #7 ; Did we shift in eight bytes?
+; beq @loop ; Yes, so get eight more bytes.
+; bra @loop1 ; No, so keep shifting in more bytes.
;@end:
+; ldb #0 ; Reset B.
; ldy.w idx1 ; Get the lexeme index.
-; lda #0 ; Terminate the lexeme buffer.
-; sta (ptr3), y ;
+; stb (ptr3), y ; Terminate the command buffer.
+;@end1:
; ldy.w idx0 ; Get the string index.
+; tba ; Reset A.
; rts ; End of delmcpy.
+
get_rs:
phb ; Preserve B.
ldb #0 ; Set the isop flag to false.