use std::str::from_utf8;
use crate::{
error::{ErrorKind, KabelError},
token,
};
pub struct Lexer {
input: Vec<u8>,
start: usize,
current: usize,
line: usize,
line_start: usize,
column: usize,
c: u8,
pub errors: Vec<KabelError>,
pub output: Vec<Token>,
}
impl Lexer {
pub fn new(input: String) -> Self {
Self {
input: input.as_bytes().to_vec(),
start: 0,
current: 0,
line: 0,
line_start: 0,
column: 0,
c: 0x00,
errors: Vec::new(),
output: Vec::new(),
}
}
pub fn next_token(&mut self) -> bool {
self.read_char();
match self.c {
b'+' => {
self.output.push(token!(self, TokenType::Plus));
self.start = self.current;
}
b'-' => {
self.output.push(token!(self, TokenType::Minus));
self.start = self.current;
}
b'*' => {
self.output.push(token!(self, TokenType::Star));
self.start = self.current;
}
b'/' => {
if self.peek() == b'/' {
while self.peek() != b'\n' && self.current < self.input.len() {
self.read_char();
}
self.start = self.current;
} else {
self.output.push(token!(self, TokenType::Slash));
self.start = self.current;
}
}
b'(' => {
self.output.push(token!(self, TokenType::LeftParen));
self.start = self.current;
}
b')' => {
self.output.push(token!(self, TokenType::RightParen));
self.start = self.current;
}
b'{' => {
self.output.push(token!(self, TokenType::LeftBrace));
self.start = self.current;
}
b'}' => {
self.output.push(token!(self, TokenType::RightBrace));
self.start = self.current;
}
b'.' => {
self.output.push(token!(self, TokenType::Period));
self.start = self.current;
}
b',' => {
self.output.push(token!(self, TokenType::Comma));
self.start = self.current;
}
b';' => {
self.output.push(token!(self, TokenType::Semicolon));
self.start = self.current;
}
b'|' => {
if self.peek() == b'|' {
self.read_char();
self.output.push(token!(self, TokenType::OrOr));
self.start = self.current;
} else {
self.output.push(token!(self, TokenType::Or));
self.start = self.current;
}
}
b'&' => {
if self.peek() == b'&' {
self.read_char();
self.output.push(token!(self, TokenType::AndAnd));
self.start = self.current;
} else {
self.output.push(token!(self, TokenType::And));
self.start = self.current;
}
}
b'=' => {
if self.peek() == b'=' {
self.read_char();
self.output.push(token!(self, TokenType::EqualEqual));
self.start = self.current;
} else {
self.output.push(token!(self, TokenType::Equal));
self.start = self.current;
}
}
b'!' => {
if self.peek() == b'=' {
self.read_char();
self.output.push(token!(self, TokenType::BangEqual));
self.start = self.current;
} else {
self.output.push(token!(self, TokenType::Bang));
self.start = self.current;
}
}
b'>' => {
if self.peek() == b'=' {
self.read_char();
self.output.push(token!(self, TokenType::GreaterEqual));
self.start = self.current;
} else {
self.output.push(token!(self, TokenType::Greater));
self.start = self.current;
}
}
b'<' => {
if self.peek() == b'=' {
self.read_char();
self.output.push(token!(self, TokenType::LessEqual));
self.start = self.current;
} else {
self.output.push(token!(self, TokenType::Less));
self.start = self.current;
}
}
b'"' => {
let mut contents = String::new();
while self.read_char() != b'"' {
if self.c == 0x05 {
self.errors.push(KabelError::new(
ErrorKind::UnexpectedEof,
"File ended before closing quote".to_string(),
self.line,
self.column,
from_utf8(&self.input[self.start..self.current])
.unwrap()
.to_string(),
));
return false;
}
contents.push(self.c as char);
}
self.start = self.current;
self.output.push(token!(self, TokenType::Str(contents)));
}
b'\n' => {
self.line += 1;
self.line_start = self.current;
self.column = 0;
self.start = self.current;
}
b' ' | b'\r' | b'\t' => {
self.start = self.current;
}
0x05 => return false,
c => {
if c.is_ascii_alphabetic() {
let mut content = (c as char).to_string();
while self.peek().is_ascii_alphanumeric() || self.c == b'_' {
content.push(self.c as char);
self.read_char();
}
self.output.push(token!(self, TokenType::Ident(content)));
} else if c.is_ascii_digit() {
let mut number = (c as char).to_string();
while self.peek().is_ascii_digit() {
number.push(self.c as char);
self.read_char();
}
if self.c == b'.' {
number.push('.');
while self.read_char().is_ascii_digit() {
number.push(self.c as char);
}
}
// panic = error in this code
self.output
.push(token!(self, TokenType::Num(number.parse().unwrap())));
self.start = self.current;
} else {
self.errors.push(KabelError::new(
ErrorKind::UnexpectedToken,
format!("Stray \"{0}\"", c as char),
self.line,
self.column,
from_utf8(&self.input[self.line_start..self.current])
.unwrap()
.to_string(),
));
}
}
}
true
}
pub fn read_char(&mut self) -> u8 {
if self.current >= self.input.len() {
self.c = 0x05; // EOF
return self.c;
}
self.c = self.input[self.current];
self.current += 1;
self.column += 1;
return self.c;
}
pub fn peek(&mut self) -> u8 {
if self.current >= self.input.len() {
self.c = 0x05; // EOF
return self.c;
}
self.c = self.input[self.current];
return self.c;
}
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub column: usize,
pub line: usize,
pub line_start: usize,
pub start: usize,
pub end: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
Star,
Slash,
Plus,
Minus,
LeftParen,
RightParen,
LeftBrace,
RightBrace,
Period,
Comma,
Semicolon,
Equal,
EqualEqual,
Bang,
BangEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
And,
AndAnd,
Or,
OrOr,
Ident(String),
Str(String),
Num(f32),
}