use std::str::from_utf8;
use crate::{error::{ErrorKind, KabelError}, token};
pub struct Lexer {
input: Vec<u8>,
start: usize,
current: usize,
line: usize,
line_start: usize,
column: usize,
c: u8,
pub errors: Vec<KabelError>,
pub output: Vec<Token>,
}
impl Lexer {
pub fn new(input: String) -> Self {
Self {
input: input.as_bytes().to_vec(),
start: 0,
current: 0,
line: 0,
line_start: 0,
column: 0,
c: 0x00,
errors: Vec::new(),
output: Vec::new(),
}
}
pub fn next_token(&mut self) -> bool {
self.read_char();
match self.c {
b'+' => { self.output.push(token!(self, TokenType::Plus)); self.start = self.current; },
b'-' => { self.output.push(token!(self, TokenType::Minus)); self.start = self.current; },
b'*' => { self.output.push(token!(self, TokenType::Star)); self.start = self.current; },
b'/' => { self.output.push(token!(self, TokenType::Slash)); self.start = self.current; },
b'(' => { self.output.push(token!(self, TokenType::LeftParen)); self.start = self.current; },
b')' => { self.output.push(token!(self, TokenType::RightParen)); self.start = self.current; },
b'"' => {
let mut contents = String::new();
while self.read_char() != b'"' {
if self.c == 0x05 {
self.errors.push(KabelError::new(ErrorKind::UnexpectedEof,
"File ended before closing quote".to_string(),
self.line, self.column, from_utf8(&self.input[self.start..self.current]).unwrap().to_string()));
return false;
}
contents.push(self.c as char);
}
self.start = self.current;
self.output.push(token!(self, TokenType::Str(contents)));
}
b'\n' => {
self.line += 1;
self.line_start = self.current;
self.column = 0;
}
b' ' | b'\r' | b'\t' => { self.start = self.current; }
0x05 => return false,
c => {
if c.is_ascii_alphabetic() {
let mut content = (c as char).to_string();
while self.peek().is_ascii_alphanumeric() || self.c == b'_' {
content.push(self.c as char);
self.read_char();
}
self.output.push(token!(self, TokenType::Ident(content)));
} else if c.is_ascii_digit() {
let mut number = (c as char).to_string();
while self.peek().is_ascii_digit() {
number.push(self.c as char);
self.read_char();
}
if self.c == b'.' {
number.push('.');
while self.read_char().is_ascii_digit() {
number.push(self.c as char);
}
}
/*self.current -= 1;
self.column -= 1;*/
// panic = error in this code
self.output.push(token!(self, TokenType::Num(number.parse().unwrap())));
self.start = self.current;
} else {
self.errors.push(KabelError::new(ErrorKind::UnexpectedToken,
format!("Stray \"{0}\"", c as char),
self.line, self.column,
from_utf8(&self.input[self.line_start..self.current]).unwrap().to_string()));
}
}
}
true
}
pub fn read_char(&mut self) -> u8 {
if self.current >= self.input.len() {
self.c = 0x05; // EOF
return self.c;
}
self.c = self.input[self.current];
self.current += 1;
self.column += 1;
return self.c;
}
pub fn peek(&mut self) -> u8 {
if self.current >= self.input.len() {
self.c = 0x05; // EOF
return self.c;
}
self.c = self.input[self.current];
return self.c;
}
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub column: usize,
pub line: usize,
pub line_start: usize,
pub start: usize,
pub end: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
Star, Slash, Plus, Minus, LeftParen, RightParen,
Ident(String), Str(String), Num(f32),
}