use std::{collections::HashMap, fmt::Display}; use crate::{ error::{ErrorKind, KabelError}, token, token_display, }; pub struct Lexer { input: Vec, start: usize, current: usize, line: usize, line_start: usize, line_current: usize, column: usize, c: char, keywords: HashMap, pub errors: Vec, pub output: Vec, } impl Lexer { pub fn new(input: String) -> Self { let mut keywords = HashMap::new(); keywords.insert("function".to_string(), TokenType::Function); keywords.insert("return".to_string(), TokenType::Return); keywords.insert("loop".to_string(), TokenType::Loop); keywords.insert("while".to_string(), TokenType::While); keywords.insert("for".to_string(), TokenType::For); keywords.insert("break".to_string(), TokenType::Break); keywords.insert("continue".to_string(), TokenType::Continue); keywords.insert("if".to_string(), TokenType::If); keywords.insert("else".to_string(), TokenType::Else); keywords.insert("var".to_string(), TokenType::Var); keywords.insert("true".to_string(), TokenType::True); keywords.insert("false".to_string(), TokenType::False); keywords.insert("print".to_string(), TokenType::Print); Self { input: input.chars().collect(), start: 0, current: 0, line: 0, line_start: 0, line_current: 0, column: 0, c: '\0', keywords, errors: Vec::new(), output: Vec::new(), } } pub fn next_token(&mut self) -> bool { self.read_char(); match self.c { '+' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::PlusEqual)); self.start = self.line_current; } else if self.peek() == '+' { self.read_char(); self.output.push(token!(self, TokenType::PlusPlus)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Plus)); self.start = self.line_current; } } '-' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::MinusEqual)); self.start = self.line_current; } else if self.peek() == '-' { self.read_char(); self.output.push(token!(self, TokenType::MinusMinus)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Minus)); self.start = self.line_current; } } '*' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::StarEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Star)); self.start = self.line_current; } } '/' => { if self.peek() == '/' { while self.peek() != '\n' && self.current < self.input.len() { self.read_char(); } self.start = self.line_current; } else if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::SlashEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Slash)); self.start = self.line_current; } } '%' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::PercentEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Percent)); self.start = self.line_current; } } '(' => { self.output.push(token!(self, TokenType::LeftParen)); self.start = self.line_current; } ')' => { self.output.push(token!(self, TokenType::RightParen)); self.start = self.line_current; } '{' => { self.output.push(token!(self, TokenType::LeftBrace)); self.start = self.line_current; } '}' => { self.output.push(token!(self, TokenType::RightBrace)); self.start = self.line_current; } '[' => { self.output.push(token!(self, TokenType::LeftSquare)); self.start = self.line_current; } ']' => { self.output.push(token!(self, TokenType::RightSquare)); self.start = self.line_current; } '.' => { self.output.push(token!(self, TokenType::Period)); self.start = self.line_current; } ',' => { self.output.push(token!(self, TokenType::Comma)); self.start = self.line_current; } ';' => { self.output.push(token!(self, TokenType::Semicolon)); self.start = self.line_current; } ':' => { self.output.push(token!(self, TokenType::Colon)); self.start = self.line_current; } '?' => { self.output.push(token!(self, TokenType::Question)); self.start = self.line_current; } '^' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::CaretEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Caret)); self.start = self.line_current; } } '|' => { if self.peek() == '|' { self.read_char(); self.output.push(token!(self, TokenType::OrOr)); self.start = self.line_current; } else if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::OrEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Or)); self.start = self.line_current; } } '&' => { if self.peek() == '&' { self.read_char(); self.output.push(token!(self, TokenType::AndAnd)); self.start = self.line_current; } else if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::AndEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::And)); self.start = self.line_current; } } '=' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::EqualEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Equal)); self.start = self.line_current; } } '!' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::BangEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Bang)); self.start = self.line_current; } } '>' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::GreaterEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Greater)); self.start = self.line_current; } } '<' => { if self.peek() == '=' { self.read_char(); self.output.push(token!(self, TokenType::LessEqual)); self.start = self.line_current; } else { self.output.push(token!(self, TokenType::Less)); self.start = self.line_current; } } '"' => { let mut contents = String::new(); while self.read_char() != '"' { if self.c == '\0' { self.errors.push(KabelError::new( ErrorKind::UnexpectedEof, "File ended before closing quote".to_string(), self.line, self.column, self.input[self.start..self.current].iter().collect(), )); return false; } contents.push(self.c as char); } self.line_current += contents.len(); self.output.push(token!(self, TokenType::Str(contents))); self.start = self.line_current; } '\n' => { self.line += 1; self.line_start = self.current; self.start = 0; self.line_current = 0; self.column = 0; } ' ' | '\r' | '\t' => { self.start = self.line_current; } '\0' => return false, c => { if c.is_ascii_alphabetic() || c == '_' { let mut content = (c as char).to_string(); while self.peek().is_ascii_alphanumeric() || self.c == '_' { content.push(self.c as char); self.read_char(); } if self.keywords.contains_key(&content) { self.output .push(token!(self, self.keywords.get(&content).unwrap().clone())); } else { self.output.push(token!(self, TokenType::Ident(content))); } self.start = self.line_current; } else if c.is_ascii_digit() { let mut number = (c as char).to_string(); while self.peek().is_ascii_digit() { number.push(self.c as char); self.read_char(); } if self.c == '.' { number.push('.'); while self.read_char().is_ascii_digit() { number.push(self.c as char); } } // panic = error in this code self.output .push(token!(self, TokenType::Num(number.parse().unwrap()))); self.start = self.line_current; } else { self.errors.push(KabelError::new( ErrorKind::UnexpectedToken, format!("Stray \"{0}\"", c as char), self.line, self.column, self.input[self.line_current..self.current].iter().collect(), )); } } } true } pub fn read_char(&mut self) -> char { if self.current >= self.input.len() { self.c = '\0'; // EOF return self.c; } self.c = self.input[self.current]; self.current += 1; self.column += 1; self.line_current += 1; return self.c; } pub fn peek(&mut self) -> char { if self.current >= self.input.len() { self.c = '\0'; // EOF return self.c; } self.c = self.input[self.current]; return self.c; } } #[derive(Debug, Clone)] pub struct Token { pub token_type: TokenType, pub start_column: usize, pub end_column: usize, pub line: usize, } #[derive(Debug, Clone, PartialEq)] pub enum TokenType { // keywords Function, Return, Loop, While, For, Break, Continue, If, Else, Var, True, False, Print, // characters Star, StarEqual, Slash, SlashEqual, Percent, PercentEqual, Plus, PlusPlus, PlusEqual, Minus, MinusMinus, MinusEqual, LeftParen, RightParen, LeftBrace, RightBrace, LeftSquare, RightSquare, Equal, EqualEqual, Bang, BangEqual, Greater, GreaterEqual, Less, LessEqual, And, AndEqual, AndAnd, Or, OrEqual, OrOr, Caret, CaretEqual, Period, Comma, Semicolon, Colon, Question, Ident(String), Str(String), Num(f32), } impl Display for TokenType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use TokenType::*; token_display!(*self, f, Function, Return, Loop, While, For, Break, Continue, If, Else, Var, True, False, Print, Star, StarEqual, Slash, SlashEqual, Percent, PercentEqual, Plus, PlusPlus, PlusEqual, Minus, MinusMinus, MinusEqual, LeftParen, RightParen, LeftBrace, RightBrace, LeftSquare, RightSquare, Equal, EqualEqual, Bang, BangEqual, Greater, GreaterEqual, Less, LessEqual, And, AndEqual, AndAnd, Or, OrEqual, OrOr, Caret, CaretEqual, Period, Comma, Semicolon, Colon, Question); match *self { Ident(ref name) => { f.write_str("Ident ")?; f.write_str(name)?; } Str(ref value) => { f.write_str("Str ")?; f.write_str(value)?; } Num(ref value) => { f.write_str("Num ")?; f.write_str(&value.to_string())?; } _ => {} } Ok(()) } }