use std::{collections::HashMap, fmt::Display};
use crate::{
error::{ErrorKind, KabelError},
token, token_display,
};
pub struct Lexer {
input: Vec<char>,
start: usize,
current: usize,
line: usize,
line_start: usize,
line_current: usize,
column: usize,
c: char,
keywords: HashMap<String, TokenType>,
pub errors: Vec<KabelError>,
pub output: Vec<Token>,
}
impl Lexer {
pub fn new(input: String) -> Self {
let mut keywords = HashMap::new();
keywords.insert("function".to_string(), TokenType::Function);
keywords.insert("return".to_string(), TokenType::Return);
keywords.insert("loop".to_string(), TokenType::Loop);
keywords.insert("while".to_string(), TokenType::While);
keywords.insert("for".to_string(), TokenType::For);
keywords.insert("break".to_string(), TokenType::Break);
keywords.insert("continue".to_string(), TokenType::Continue);
keywords.insert("if".to_string(), TokenType::If);
keywords.insert("else".to_string(), TokenType::Else);
keywords.insert("var".to_string(), TokenType::Var);
keywords.insert("true".to_string(), TokenType::True);
keywords.insert("false".to_string(), TokenType::False);
keywords.insert("print".to_string(), TokenType::Print);
Self {
input: input.chars().collect(),
start: 0,
current: 0,
line: 0,
line_start: 0,
line_current: 0,
column: 0,
c: '\0',
keywords,
errors: Vec::new(),
output: Vec::new(),
}
}
pub fn next_token(&mut self) -> bool {
let result = self.read_next_token();
match result {
Ok(b) => b,
Err(e) => { self.errors.push(e); true }
}
}
fn read_next_token(&mut self) -> Result<bool, KabelError> {
let result = self.read_char("");
if result.is_err() {
return Ok(false);
}
match self.c {
'+' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::PlusEqual));
self.start = self.line_current;
} else if self.peek("").unwrap_or(' ') == '+' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::PlusPlus));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Plus));
self.start = self.line_current;
}
}
'-' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::MinusEqual));
self.start = self.line_current;
} else if self.peek("").unwrap_or(' ') == '-' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::MinusMinus));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Minus));
self.start = self.line_current;
}
}
'*' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::StarEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Star));
self.start = self.line_current;
}
}
'/' => {
if self.peek("").unwrap_or(' ') == '/' {
while self.peek("").unwrap_or(' ') != '\n' && self.current < self.input.len() {
self.read_char("Kabel broke")?;
}
self.start = self.line_current;
} else if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::SlashEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Slash));
self.start = self.line_current;
}
}
'%' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::PercentEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Percent));
self.start = self.line_current;
}
}
'(' => {
self.output.push(token!(self, TokenType::LeftParen));
self.start = self.line_current;
}
')' => {
self.output.push(token!(self, TokenType::RightParen));
self.start = self.line_current;
}
'{' => {
self.output.push(token!(self, TokenType::LeftBrace));
self.start = self.line_current;
}
'}' => {
self.output.push(token!(self, TokenType::RightBrace));
self.start = self.line_current;
}
'[' => {
self.output.push(token!(self, TokenType::LeftSquare));
self.start = self.line_current;
}
']' => {
self.output.push(token!(self, TokenType::RightSquare));
self.start = self.line_current;
}
'.' => {
self.output.push(token!(self, TokenType::Period));
self.start = self.line_current;
}
',' => {
self.output.push(token!(self, TokenType::Comma));
self.start = self.line_current;
}
';' => {
self.output.push(token!(self, TokenType::Semicolon));
self.start = self.line_current;
}
':' => {
self.output.push(token!(self, TokenType::Colon));
self.start = self.line_current;
}
'?' => {
self.output.push(token!(self, TokenType::Question));
self.start = self.line_current;
}
'^' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::CaretEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Caret));
self.start = self.line_current;
}
}
'|' => {
if self.peek("").unwrap_or(' ') == '|' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::OrOr));
self.start = self.line_current;
} else if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::OrEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Or));
self.start = self.line_current;
}
}
'&' => {
if self.peek("").unwrap_or(' ') == '&' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::AndAnd));
self.start = self.line_current;
} else if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::AndEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::And));
self.start = self.line_current;
}
}
'=' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::EqualEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Equal));
self.start = self.line_current;
}
}
'!' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::BangEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Bang));
self.start = self.line_current;
}
}
'>' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::GreaterEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Greater));
self.start = self.line_current;
}
}
'<' => {
if self.peek("").unwrap_or(' ') == '=' {
self.read_char("Kabel broke")?;
self.output.push(token!(self, TokenType::LessEqual));
self.start = self.line_current;
} else {
self.output.push(token!(self, TokenType::Less));
self.start = self.line_current;
}
}
'"' => {
let mut contents = String::new();
while self.read_char("File ended before closing quote")? != '"' {
contents.push(self.c as char);
}
self.output.push(token!(self, TokenType::Str(contents)));
self.start = self.line_current;
}
'\n' => {
self.line += 1;
self.line_start = self.current;
self.start = 0;
self.line_current = 0;
self.column = 0;
}
' ' | '\r' | '\t' => {
self.start = self.line_current;
}
c => {
if c.is_ascii_alphabetic() || c == '_' {
let mut content = (c as char).to_string();
while self.peek("").unwrap_or('%').is_ascii_alphanumeric() || self.c == '_' {
content.push(self.c as char);
self.read_char("Kabel broke")?;
}
if self.keywords.contains_key(&content) {
self.output
.push(token!(self, self.keywords.get(&content).unwrap().clone()));
} else {
self.output.push(token!(self, TokenType::Ident(content)));
}
self.start = self.line_current;
} else if c.is_ascii_digit() {
let mut number = (c as char).to_string();
while self.peek("").unwrap_or(' ').is_ascii_digit() {
number.push(self.c as char);
self.read_char("Kabel broke")?;
}
if self.c == '.' {
number.push('.');
self.read_char("Kabel broke")?;
while self.peek("").unwrap_or(' ').is_ascii_digit() {
number.push(self.c as char);
self.read_char("Kabel broke")?;
}
}
// panic = error in this code
self.output
.push(token!(self, TokenType::Num(number.parse().unwrap())));
self.start = self.line_current;
} else {
self.errors.push(KabelError::new(
ErrorKind::UnexpectedToken,
format!("Stray \"{0}\"", c as char),
None,
self.line,
self.column,
self.input[self.line_start..self.current].iter().collect(),
));
}
}
}
Ok(true)
}
pub fn read_char(&mut self, message: &str) -> Result<char, KabelError> {
if self.current >= self.input.len() {
/*self.c = '\0'; // EOF
return self.c;*/
return Err(KabelError::new(
ErrorKind::UnexpectedEof,
message.to_string(),
None,
self.line,
self.column,
self.input[self.line_start..self.current].iter().collect(),
));
}
self.c = self.input[self.current];
self.current += 1;
self.column += 1;
self.line_current += 1;
return Ok(self.c);
}
pub fn peek(&mut self, message: &str) -> Result<char, KabelError> {
if self.current >= self.input.len() {
/*self.c = '\0'; // EOF
return self.c;*/
return Err(KabelError::new(
ErrorKind::UnexpectedEof,
message.to_string(),
None,
self.line,
self.column,
self.input[self.line_start..self.current].iter().collect(),
));
}
self.c = self.input[self.current];
return Ok(self.c);
}
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub start_column: usize,
pub end_column: usize,
pub line: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
// keywords
Function,
Return,
Loop,
While,
For,
Break,
Continue,
If,
Else,
Var,
True,
False,
Print,
// characters
Star,
StarEqual,
Slash,
SlashEqual,
Percent,
PercentEqual,
Plus,
PlusPlus,
PlusEqual,
Minus,
MinusMinus,
MinusEqual,
LeftParen,
RightParen,
LeftBrace,
RightBrace,
LeftSquare,
RightSquare,
Equal,
EqualEqual,
Bang,
BangEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
And,
AndEqual,
AndAnd,
Or,
OrEqual,
OrOr,
Caret,
CaretEqual,
Period,
Comma,
Semicolon,
Colon,
Question,
Ident(String),
Str(String),
Num(f32),
}
impl Display for TokenType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use TokenType::*;
token_display!(*self, f, Function, Return, Loop, While, For,
Break, Continue, If, Else, Var, True, False, Print,
Star, StarEqual, Slash, SlashEqual, Percent, PercentEqual,
Plus, PlusPlus, PlusEqual, Minus, MinusMinus, MinusEqual,
LeftParen, RightParen, LeftBrace, RightBrace,
LeftSquare, RightSquare, Equal, EqualEqual,
Bang, BangEqual, Greater, GreaterEqual, Less,
LessEqual, And, AndEqual, AndAnd, Or, OrEqual, OrOr,
Caret, CaretEqual, Period, Comma, Semicolon, Colon, Question);
match *self {
Ident(ref name) => {
f.write_str("Ident ")?;
f.write_str(name)?;
}
Str(ref value) => {
f.write_str("Str ")?;
f.write_str(value)?;
}
Num(ref value) => {
f.write_str("Num ")?;
f.write_str(&value.to_string())?;
}
_ => {}
}
Ok(())
}
}