use std::str::from_utf8; use crate::{ error::{ErrorKind, KabelError}, token, }; pub struct Lexer { input: Vec, start: usize, current: usize, line: usize, line_start: usize, column: usize, c: u8, pub errors: Vec, pub output: Vec, } impl Lexer { pub fn new(input: String) -> Self { Self { input: input.as_bytes().to_vec(), start: 0, current: 0, line: 0, line_start: 0, column: 0, c: 0x00, errors: Vec::new(), output: Vec::new(), } } pub fn next_token(&mut self) -> bool { self.read_char(); match self.c { b'+' => { self.output.push(token!(self, TokenType::Plus)); self.start = self.current; } b'-' => { self.output.push(token!(self, TokenType::Minus)); self.start = self.current; } b'*' => { self.output.push(token!(self, TokenType::Star)); self.start = self.current; } b'/' => { if self.peek() == b'/' { while self.peek() != b'\n' && self.current < self.input.len() { self.read_char(); } self.start = self.current; } else { self.output.push(token!(self, TokenType::Slash)); self.start = self.current; } } b'(' => { self.output.push(token!(self, TokenType::LeftParen)); self.start = self.current; } b')' => { self.output.push(token!(self, TokenType::RightParen)); self.start = self.current; } b'{' => { self.output.push(token!(self, TokenType::LeftBrace)); self.start = self.current; } b'}' => { self.output.push(token!(self, TokenType::RightBrace)); self.start = self.current; } b'[' => { self.output.push(token!(self, TokenType::LeftSquare)); self.start = self.current; } b']' => { self.output.push(token!(self, TokenType::RightSquare)); self.start = self.current; } b'.' => { self.output.push(token!(self, TokenType::Period)); self.start = self.current; } b',' => { self.output.push(token!(self, TokenType::Comma)); self.start = self.current; } b';' => { self.output.push(token!(self, TokenType::Semicolon)); self.start = self.current; } b'|' => { if self.peek() == b'|' { self.read_char(); self.output.push(token!(self, TokenType::OrOr)); self.start = self.current; } else { self.output.push(token!(self, TokenType::Or)); self.start = self.current; } } b'&' => { if self.peek() == b'&' { self.read_char(); self.output.push(token!(self, TokenType::AndAnd)); self.start = self.current; } else { self.output.push(token!(self, TokenType::And)); self.start = self.current; } } b'=' => { if self.peek() == b'=' { self.read_char(); self.output.push(token!(self, TokenType::EqualEqual)); self.start = self.current; } else { self.output.push(token!(self, TokenType::Equal)); self.start = self.current; } } b'!' => { if self.peek() == b'=' { self.read_char(); self.output.push(token!(self, TokenType::BangEqual)); self.start = self.current; } else { self.output.push(token!(self, TokenType::Bang)); self.start = self.current; } } b'>' => { if self.peek() == b'=' { self.read_char(); self.output.push(token!(self, TokenType::GreaterEqual)); self.start = self.current; } else { self.output.push(token!(self, TokenType::Greater)); self.start = self.current; } } b'<' => { if self.peek() == b'=' { self.read_char(); self.output.push(token!(self, TokenType::LessEqual)); self.start = self.current; } else { self.output.push(token!(self, TokenType::Less)); self.start = self.current; } } b'"' => { let mut contents = String::new(); while self.read_char() != b'"' { if self.c == 0x05 { self.errors.push(KabelError::new( ErrorKind::UnexpectedEof, "File ended before closing quote".to_string(), self.line, self.column, from_utf8(&self.input[self.start..self.current]) .unwrap() .to_string(), )); return false; } contents.push(self.c as char); } self.start = self.current; self.output.push(token!(self, TokenType::Str(contents))); } b'\n' => { self.line += 1; self.line_start = self.current; self.column = 0; self.start = self.current; } b' ' | b'\r' | b'\t' => { self.start = self.current; } 0x05 => return false, c => { if c.is_ascii_alphabetic() { let mut content = (c as char).to_string(); while self.peek().is_ascii_alphanumeric() || self.c == b'_' { content.push(self.c as char); self.read_char(); } self.output.push(token!(self, TokenType::Ident(content))); self.start = self.current; } else if c.is_ascii_digit() { let mut number = (c as char).to_string(); while self.peek().is_ascii_digit() { number.push(self.c as char); self.read_char(); } if self.c == b'.' { number.push('.'); while self.read_char().is_ascii_digit() { number.push(self.c as char); } } // panic = error in this code self.output .push(token!(self, TokenType::Num(number.parse().unwrap()))); self.start = self.current; } else { self.errors.push(KabelError::new( ErrorKind::UnexpectedToken, format!("Stray \"{0}\"", c as char), self.line, self.column, from_utf8(&self.input[self.line_start..self.current]) .unwrap() .to_string(), )); } } } true } pub fn read_char(&mut self) -> u8 { if self.current >= self.input.len() { self.c = 0x05; // EOF return self.c; } self.c = self.input[self.current]; self.current += 1; self.column += 1; return self.c; } pub fn peek(&mut self) -> u8 { if self.current >= self.input.len() { self.c = 0x05; // EOF return self.c; } self.c = self.input[self.current]; return self.c; } } #[derive(Debug, Clone)] pub struct Token { pub token_type: TokenType, pub column: usize, pub line: usize, pub line_start: usize, pub start: usize, pub end: usize, } #[derive(Debug, Clone, PartialEq)] pub enum TokenType { Star, Slash, Plus, Minus, LeftParen, RightParen, LeftBrace, RightBrace, LeftSquare, RightSquare, Period, Comma, Semicolon, Equal, EqualEqual, Bang, BangEqual, Greater, GreaterEqual, Less, LessEqual, And, AndAnd, Or, OrOr, Ident(String), Str(String), Num(f32), }