From a165bb58c50922b5731699c73b74a36d654a72b9 Mon Sep 17 00:00:00 2001 From: ghostly_zsh Date: Fri, 16 Aug 2024 16:52:39 -0500 Subject: [PATCH] fix error with null in file, fix error data in string token, add "but" to errors --- kabel/src/lexer.rs | 146 ++++++++++++++++++++++++-------------------- kabel/src/main.rs | 12 +--- kabel/src/parser.rs | 46 +++++++------- kabel/tmp.kab | 1 + 4 files changed, 105 insertions(+), 100 deletions(-) create mode 100644 kabel/tmp.kab diff --git a/kabel/src/lexer.rs b/kabel/src/lexer.rs index 12b3bf229494898b149cf1772a52efacb0df50b8..b6d68125d97705d1bf9b1321058ba93427696da5 100644 --- a/kabel/src/lexer.rs +++ b/kabel/src/lexer.rs @@ -49,17 +49,27 @@ impl Lexer { output: Vec::new(), } } - pub fn next_token(&mut self) -> bool { - self.read_char(); + let result = self.read_next_token(); + match result { + Ok(b) => b, + Err(e) => { self.errors.push(e); true } + } + } + + fn read_next_token(&mut self) -> Result { + let result = self.read_char(""); + if result.is_err() { + return Ok(false); + } match self.c { '+' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::PlusEqual)); self.start = self.line_current; - } else if self.peek() == '+' { - self.read_char(); + } else if self.peek("").unwrap_or(' ') == '+' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::PlusPlus)); self.start = self.line_current; } else { @@ -68,12 +78,12 @@ impl Lexer { } } '-' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::MinusEqual)); self.start = self.line_current; - } else if self.peek() == '-' { - self.read_char(); + } else if self.peek("").unwrap_or(' ') == '-' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::MinusMinus)); self.start = self.line_current; } else { @@ -82,8 +92,8 @@ impl Lexer { } } '*' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::StarEqual)); self.start = self.line_current; } else { @@ -92,13 +102,13 @@ impl Lexer { } } '/' => { - if self.peek() == '/' { - while self.peek() != '\n' && self.current < self.input.len() { - self.read_char(); + if self.peek("").unwrap_or(' ') == '/' { + while self.peek("").unwrap_or(' ') != '\n' && self.current < self.input.len() { + self.read_char("Kabel broke")?; } self.start = self.line_current; - } else if self.peek() == '=' { - self.read_char(); + } else if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::SlashEqual)); self.start = self.line_current; } else { @@ -107,8 +117,8 @@ impl Lexer { } } '%' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::PercentEqual)); self.start = self.line_current; } else { @@ -161,8 +171,8 @@ impl Lexer { self.start = self.line_current; } '^' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::CaretEqual)); self.start = self.line_current; } else { @@ -171,12 +181,12 @@ impl Lexer { } } '|' => { - if self.peek() == '|' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '|' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::OrOr)); self.start = self.line_current; - } else if self.peek() == '=' { - self.read_char(); + } else if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::OrEqual)); self.start = self.line_current; } else { @@ -185,12 +195,12 @@ impl Lexer { } } '&' => { - if self.peek() == '&' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '&' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::AndAnd)); self.start = self.line_current; - } else if self.peek() == '=' { - self.read_char(); + } else if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::AndEqual)); self.start = self.line_current; } else { @@ -199,8 +209,8 @@ impl Lexer { } } '=' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::EqualEqual)); self.start = self.line_current; } else { @@ -209,8 +219,8 @@ impl Lexer { } } '!' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::BangEqual)); self.start = self.line_current; } else { @@ -219,8 +229,8 @@ impl Lexer { } } '>' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::GreaterEqual)); self.start = self.line_current; } else { @@ -229,8 +239,8 @@ impl Lexer { } } '<' => { - if self.peek() == '=' { - self.read_char(); + if self.peek("").unwrap_or(' ') == '=' { + self.read_char("Kabel broke")?; self.output.push(token!(self, TokenType::LessEqual)); self.start = self.line_current; } else { @@ -240,20 +250,9 @@ impl Lexer { } '"' => { let mut contents = String::new(); - while self.read_char() != '"' { - if self.c == '\0' { - self.errors.push(KabelError::new( - ErrorKind::UnexpectedEof, - "File ended before closing quote".to_string(), - self.line, - self.column, - self.input[self.start..self.current].iter().collect(), - )); - return false; - } + while self.read_char("File ended before closing quote")? != '"' { contents.push(self.c as char); } - self.line_current += contents.len(); self.output.push(token!(self, TokenType::Str(contents))); self.start = self.line_current; } @@ -267,13 +266,12 @@ impl Lexer { ' ' | '\r' | '\t' => { self.start = self.line_current; } - '\0' => return false, c => { if c.is_ascii_alphabetic() || c == '_' { let mut content = (c as char).to_string(); - while self.peek().is_ascii_alphanumeric() || self.c == '_' { + while self.peek("").unwrap_or('%').is_ascii_alphanumeric() || self.c == '_' { content.push(self.c as char); - self.read_char(); + self.read_char("Kabel broke")?; } if self.keywords.contains_key(&content) { self.output @@ -284,16 +282,16 @@ impl Lexer { self.start = self.line_current; } else if c.is_ascii_digit() { let mut number = (c as char).to_string(); - while self.peek().is_ascii_digit() { + while self.peek("").unwrap_or(' ').is_ascii_digit() { number.push(self.c as char); - self.read_char(); + self.read_char("Kabel broke")?; } if self.c == '.' { number.push('.'); - self.read_char(); - while self.peek().is_ascii_digit() { + self.read_char("Kabel broke")?; + while self.peek("").unwrap_or(' ').is_ascii_digit() { number.push(self.c as char); - self.read_char(); + self.read_char("Kabel broke")?; } } // panic = error in this code @@ -306,32 +304,46 @@ impl Lexer { format!("Stray \"{0}\"", c as char), self.line, self.column, - self.input[self.line_current..self.current].iter().collect(), + self.input[self.line_start..self.current].iter().collect(), )); } } } - true + Ok(true) } - pub fn read_char(&mut self) -> char { + pub fn read_char(&mut self, message: &str) -> Result { if self.current >= self.input.len() { - self.c = '\0'; // EOF - return self.c; + /*self.c = '\0'; // EOF + return self.c;*/ + return Err(KabelError::new( + ErrorKind::UnexpectedEof, + message.to_string(), + self.line, + self.column, + self.input[self.line_start..self.current].iter().collect(), + )); } self.c = self.input[self.current]; self.current += 1; self.column += 1; self.line_current += 1; - return self.c; + return Ok(self.c); } - pub fn peek(&mut self) -> char { + pub fn peek(&mut self, message: &str) -> Result { if self.current >= self.input.len() { - self.c = '\0'; // EOF - return self.c; + /*self.c = '\0'; // EOF + return self.c;*/ + return Err(KabelError::new( + ErrorKind::UnexpectedEof, + message.to_string(), + self.line, + self.column, + self.input[self.line_start..self.current].iter().collect(), + )); } self.c = self.input[self.current]; - return self.c; + return Ok(self.c); } } diff --git a/kabel/src/main.rs b/kabel/src/main.rs index f355469e57773cf596cb3ad1fdeb3e46e77feb2f..c0d44a5bc9f2a2d4580c0375d4d612e02e49757d 100644 --- a/kabel/src/main.rs +++ b/kabel/src/main.rs @@ -5,17 +5,9 @@ use std::{env, fs}; use kabel::{debug::{debug_ast, debug_bytecode, debug_stack, debug_token_array}, run_codegen, run_lexer, run_parser, run_semantic_analysis}; fn main() { - /*let args: Vec = env::args().collect(); + let args: Vec = env::args().collect(); let program = - fs::read_to_string(args[1].clone()).unwrap();*/ - - let program = -" -for(var i = 0; i < 5; i++) { - print i; - continue; -} -".to_string(); + fs::read_to_string(args[1].clone()).unwrap(); let mut output = "".to_string(); diff --git a/kabel/src/parser.rs b/kabel/src/parser.rs index 9448606cf2f0293f37250b0c46c745dcd81c3a93..1325c595fbe185caf8ec1d2ac6cbf1994aceb7f6 100644 --- a/kabel/src/parser.rs +++ b/kabel/src/parser.rs @@ -72,7 +72,7 @@ impl Parser { if let TokenType::Ident(name) = ident.token_type { expressions.push(name!(name, ident)); } else { - return Err(unexpected_token!(self, "Expected identifier found {}", ident)); + return Err(unexpected_token!(self, "Expected identifier but found {}", ident)); } if let TokenType::Comma = self.peek()?.token_type { self.read_token()?; @@ -91,15 +91,15 @@ impl Parser { block )); } else { - return Err(unexpected_token!(self, "Expected ) found {}", right_paren)); + return Err(unexpected_token!(self, "Expected ) but found {}", right_paren)); } } else { - return Err(unexpected_token!(self, "Expected ( found {}", left_paren)); + return Err(unexpected_token!(self, "Expected ( but found {}", left_paren)); } } else { return Err(unexpected_token!( self, - "Expected identifier found {}", + "Expected identifier but found {}", ident )); } @@ -124,7 +124,7 @@ impl Parser { semicolon )) } else { - return Err(unexpected_token!(self, "Expected ; found {}", semicolon)); + return Err(unexpected_token!(self, "Expected ; but found {}", semicolon)); } } @@ -152,10 +152,10 @@ impl Parser { block )); } else { - return Err(unexpected_token!(self, "Expected ) found {}", right_paren)); + return Err(unexpected_token!(self, "Expected ) but found {}", right_paren)); } } else { - return Err(unexpected_token!(self, "Expected ( found {}", left_paren)); + return Err(unexpected_token!(self, "Expected ( but found {}", left_paren)); } } @@ -174,7 +174,7 @@ impl Parser { let semicolon = self.read_token()?; if let TokenType::Semicolon = semicolon.token_type {} else { self.current -= 1; - return Err(unexpected_token!(self, "Expected ; found {}", semicolon)); + return Err(unexpected_token!(self, "Expected ; but found {}", semicolon)); } } } @@ -206,13 +206,13 @@ impl Parser { block ));*/ } else { - return Err(unexpected_token!(self, "Expected ) found {}", right_paren)); + return Err(unexpected_token!(self, "Expected ) but found {}", right_paren)); } } else { - return Err(unexpected_token!(self, "Expected ; found {}", semicolon_2)); + return Err(unexpected_token!(self, "Expected ; but found {}", semicolon_2)); } } else { - return Err(unexpected_token!(self, "Expected ( found {}", left_paren)); + return Err(unexpected_token!(self, "Expected ( but found {}", left_paren)); } } fn build_for(for_ident: Token, semicolon_2: Token, expression1: Option, expression2: Option, expression3: Option, block: AST) -> AST { @@ -243,7 +243,7 @@ impl Parser { if let TokenType::Semicolon = semicolon.token_type { Ok(ast_from_token!(ASTType::Break, break_ident, semicolon)) } else { - Err(unexpected_token!(self, "Expected ; found {}", semicolon)) + Err(unexpected_token!(self, "Expected ; but found {}", semicolon)) } } @@ -253,7 +253,7 @@ impl Parser { if let TokenType::Semicolon = semicolon.token_type { Ok(ast_from_token!(ASTType::Continue, continue_ident, semicolon)) } else { - Err(unexpected_token!(self, "Expected ; found {}", semicolon)) + Err(unexpected_token!(self, "Expected ; but found {}", semicolon)) } } @@ -303,10 +303,10 @@ impl Parser { block )); } else { - return Err(unexpected_token!(self, "Expected ) found {}", right_paren)); + return Err(unexpected_token!(self, "Expected ) but found {}", right_paren)); } } else { - return Err(unexpected_token!(self, "Expected ( found {}", left_paren)); + return Err(unexpected_token!(self, "Expected ( but found {}", left_paren)); } } @@ -320,7 +320,7 @@ impl Parser { let right_brace = self.read_token()?; return Ok(ast_from_token!(ASTType::Block(stmts), left_brace, right_brace)); } else { - return Err(unexpected_token!(self, "Expected {{ found {}", left_brace)); + return Err(unexpected_token!(self, "Expected {{ but found {}", left_brace)); } } @@ -339,15 +339,15 @@ impl Parser { semicolon )); } else { - return Err(unexpected_token!(self, "Expected ; found {}", equal)); + return Err(unexpected_token!(self, "Expected ; but found {}", equal)); } } else { - return Err(unexpected_token!(self, "Expected = found {}", equal)); + return Err(unexpected_token!(self, "Expected = but found {}", equal)); } } else { return Err(unexpected_token!( self, - "Expected identifier found {}", + "Expected identifier but found {}", ident )); } @@ -361,7 +361,7 @@ impl Parser { if matches!(semicolon.token_type, TokenType::Semicolon) { Ok(ast_from_token!(ASTType::Print(Box::new(expression)), print_ident, semicolon)) } else { - Err(unexpected_token!(self, "Expected ; found {}", semicolon)) + Err(unexpected_token!(self, "Expected ; but found {}", semicolon)) } } @@ -383,7 +383,7 @@ impl Parser { expression, semicolon)); } else { self.current -= 1; - return Err(unexpected_token!(self, "Expected ; found {}", semicolon)); + return Err(unexpected_token!(self, "Expected ; but found {}", semicolon)); } } @@ -608,7 +608,7 @@ impl Parser { false_expr )); } else { - return Err(unexpected_token!(self, "Expected : found {}", self.token)); + return Err(unexpected_token!(self, "Expected : but found {}", self.token)); } } @@ -864,7 +864,7 @@ impl Parser { right_brace ); } else { - return Err(unexpected_token!(self, "Expected ] found {}", right_brace)); + return Err(unexpected_token!(self, "Expected ] but found {}", right_brace)); } } diff --git a/kabel/tmp.kab b/kabel/tmp.kab new file mode 100644 index 0000000000000000000000000000000000000000..5163081da150e80c7afa70f5a28b8249b58e831d --- /dev/null +++ b/kabel/tmp.kab @@ -0,0 +1 @@ +print f"kabel is not py";