~starkingdoms/starkingdoms

eec3b4853b2d82f08983ae2a3cfa58b37d4d3217 — ghostlyzsh 1 year, 4 months ago 24160e4
convert lexer to use utf-8 character, bitwise operations
3 files changed, 137 insertions(+), 56 deletions(-)

M kabel/grammar.ebnf
M kabel/src/lexer.rs
M kabel/src/parser.rs
M kabel/grammar.ebnf => kabel/grammar.ebnf +5 -1
@@ 26,7 26,11 @@ assignment = ( identifier , "=" , assignment ) | logical_or;

logical_or = logical_and { , "||" , logical_and } ;

logical_and = equality { , "&&" , equality } ;
logical_and = bit_and { , "&&" , bit_and } ;

bit_and = bit_xor { , "*" , bit_xor } ;
bit_xor = bit_or { , "^" , bit_or } ;
bit_or = equality { , "|" , equality } ;

equality = comparison { , ( "==" | "!=" ) , comparison } ;


M kabel/src/lexer.rs => kabel/src/lexer.rs +51 -50
@@ 6,13 6,13 @@ use crate::{
};

pub struct Lexer {
    input: Vec<u8>,
    input: Vec<char>,
    start: usize,
    current: usize,
    line: usize,
    line_start: usize,
    column: usize,
    c: u8,
    c: char,
    pub errors: Vec<KabelError>,
    pub output: Vec<Token>,
}


@@ 20,13 20,13 @@ pub struct Lexer {
impl Lexer {
    pub fn new(input: String) -> Self {
        Self {
            input: input.as_bytes().to_vec(),
            input: input.chars().collect(),
            start: 0,
            current: 0,
            line: 0,
            line_start: 0,
            column: 0,
            c: 0x00,
            c: '\0',
            errors: Vec::new(),
            output: Vec::new(),
        }


@@ 35,21 35,21 @@ impl Lexer {
    pub fn next_token(&mut self) -> bool {
        self.read_char();
        match self.c {
            b'+' => {
            '+' => {
                self.output.push(token!(self, TokenType::Plus));
                self.start = self.current;
            }
            b'-' => {
            '-' => {
                self.output.push(token!(self, TokenType::Minus));
                self.start = self.current;
            }
            b'*' => {
            '*' => {
                self.output.push(token!(self, TokenType::Star));
                self.start = self.current;
            }
            b'/' => {
                if self.peek() == b'/' {
                    while self.peek() != b'\n' && self.current < self.input.len() {
            '/' => {
                if self.peek() == '/' {
                    while self.peek() != '\n' && self.current < self.input.len() {
                        self.read_char();
                    }
                    self.start = self.current;


@@ 58,44 58,48 @@ impl Lexer {
                    self.start = self.current;
                }
            }
            b'(' => {
            '(' => {
                self.output.push(token!(self, TokenType::LeftParen));
                self.start = self.current;
            }
            b')' => {
            ')' => {
                self.output.push(token!(self, TokenType::RightParen));
                self.start = self.current;
            }
            b'{' => {
            '{' => {
                self.output.push(token!(self, TokenType::LeftBrace));
                self.start = self.current;
            }
            b'}' => {
            '}' => {
                self.output.push(token!(self, TokenType::RightBrace));
                self.start = self.current;
            }
            b'[' => {
            '[' => {
                self.output.push(token!(self, TokenType::LeftSquare));
                self.start = self.current;
            }
            b']' => {
            ']' => {
                self.output.push(token!(self, TokenType::RightSquare));
                self.start = self.current;
            }
            b'.' => {
            '.' => {
                self.output.push(token!(self, TokenType::Period));
                self.start = self.current;
            }
            b',' => {
            ',' => {
                self.output.push(token!(self, TokenType::Comma));
                self.start = self.current;
            }
            b';' => {
            ';' => {
                self.output.push(token!(self, TokenType::Semicolon));
                self.start = self.current;
            }
            b'|' => {
                if self.peek() == b'|' {
            '^' => {
                self.output.push(token!(self, TokenType::Caret));
                self.start = self.current;
            }
            '|' => {
                if self.peek() == '|' {
                    self.read_char();
                    self.output.push(token!(self, TokenType::OrOr));
                    self.start = self.current;


@@ 104,8 108,8 @@ impl Lexer {
                    self.start = self.current;
                }
            }
            b'&' => {
                if self.peek() == b'&' {
            '&' => {
                if self.peek() == '&' {
                    self.read_char();
                    self.output.push(token!(self, TokenType::AndAnd));
                    self.start = self.current;


@@ 114,8 118,8 @@ impl Lexer {
                    self.start = self.current;
                }
            }
            b'=' => {
                if self.peek() == b'=' {
            '=' => {
                if self.peek() == '=' {
                    self.read_char();
                    self.output.push(token!(self, TokenType::EqualEqual));
                    self.start = self.current;


@@ 124,8 128,8 @@ impl Lexer {
                    self.start = self.current;
                }
            }
            b'!' => {
                if self.peek() == b'=' {
            '!' => {
                if self.peek() == '=' {
                    self.read_char();
                    self.output.push(token!(self, TokenType::BangEqual));
                    self.start = self.current;


@@ 134,8 138,8 @@ impl Lexer {
                    self.start = self.current;
                }
            }
            b'>' => {
                if self.peek() == b'=' {
            '>' => {
                if self.peek() == '=' {
                    self.read_char();
                    self.output.push(token!(self, TokenType::GreaterEqual));
                    self.start = self.current;


@@ 144,8 148,8 @@ impl Lexer {
                    self.start = self.current;
                }
            }
            b'<' => {
                if self.peek() == b'=' {
            '<' => {
                if self.peek() == '=' {
                    self.read_char();
                    self.output.push(token!(self, TokenType::LessEqual));
                    self.start = self.current;


@@ 154,18 158,16 @@ impl Lexer {
                    self.start = self.current;
                }
            }
            b'"' => {
            '"' => {
                let mut contents = String::new();
                while self.read_char() != b'"' {
                    if self.c == 0x05 {
                while self.read_char() != '"' {
                    if self.c == '\0'{
                        self.errors.push(KabelError::new(
                            ErrorKind::UnexpectedEof,
                            "File ended before closing quote".to_string(),
                            self.line,
                            self.column,
                            from_utf8(&self.input[self.start..self.current])
                                .unwrap()
                                .to_string(),
                            self.input[self.start..self.current].iter().collect(),
                        ));
                        return false;
                    }


@@ 174,20 176,20 @@ impl Lexer {
                self.start = self.current;
                self.output.push(token!(self, TokenType::Str(contents)));
            }
            b'\n' => {
            '\n' => {
                self.line += 1;
                self.line_start = self.current;
                self.column = 0;
                self.start = self.current;
            }
            b' ' | b'\r' | b'\t' => {
            ' ' | '\r' | '\t' => {
                self.start = self.current;
            }
            0x05 => return false,
            '\0' => return false,
            c => {
                if c.is_ascii_alphabetic() {
                if c.is_ascii_alphabetic() || c == '_' {
                    let mut content = (c as char).to_string();
                    while self.peek().is_ascii_alphanumeric() || self.c == b'_' {
                    while self.peek().is_ascii_alphanumeric() || self.c == '_' {
                        content.push(self.c as char);
                        self.read_char();
                    }


@@ 199,7 201,7 @@ impl Lexer {
                        number.push(self.c as char);
                        self.read_char();
                    }
                    if self.c == b'.' {
                    if self.c == '.' {
                        number.push('.');
                        while self.read_char().is_ascii_digit() {
                            number.push(self.c as char);


@@ 215,9 217,7 @@ impl Lexer {
                        format!("Stray \"{0}\"", c as char),
                        self.line,
                        self.column,
                        from_utf8(&self.input[self.line_start..self.current])
                            .unwrap()
                            .to_string(),
                        self.input[self.line_start..self.current].iter().collect(),
                    ));
                }
            }


@@ 225,9 225,9 @@ impl Lexer {
        true
    }

    pub fn read_char(&mut self) -> u8 {
    pub fn read_char(&mut self) -> char{
        if self.current >= self.input.len() {
            self.c = 0x05; // EOF
            self.c = '\0'; // EOF
            return self.c;
        }
        self.c = self.input[self.current];


@@ 235,9 235,9 @@ impl Lexer {
        self.column += 1;
        return self.c;
    }
    pub fn peek(&mut self) -> u8 {
    pub fn peek(&mut self) -> char {
        if self.current >= self.input.len() {
            self.c = 0x05; // EOF
            self.c = '\0'; // EOF
            return self.c;
        }
        self.c = self.input[self.current];


@@ 282,6 282,7 @@ pub enum TokenType {
    AndAnd,
    Or,
    OrOr,
    Caret,

    Ident(String),
    Str(String),

M kabel/src/parser.rs => kabel/src/parser.rs +81 -5
@@ 166,7 166,7 @@ impl Parser {
        }
    }

    pub fn for_statement(&mut self) -> Result<AST , KabelError> {
    pub fn for_statement(&mut self) -> Result<AST, KabelError> {
        let for_ident = self.read_token()?;
        let left_paren = self.read_token()?;
        if let TokenType::LeftParen = left_paren.token_type {


@@ 196,7 196,12 @@ impl Parser {
                    if let TokenType::RightParen = right_paren.token_type {
                        let block = self.block()?;
                        return Ok(AST {
                            ast_type: ASTType::For(Box::new(expression1), Box::new(expression2), Box::new(expression3), Box::new(block.clone())),
                            ast_type: ASTType::For(
                                Box::new(expression1),
                                Box::new(expression2),
                                Box::new(expression3),
                                Box::new(block.clone()),
                            ),
                            start: for_ident.start,
                            end: block.end,
                            line: for_ident.line,


@@ 449,11 454,11 @@ impl Parser {
        Ok(left)
    }
    pub fn logical_and(&mut self) -> Result<AST, KabelError> {
        let mut left = self.equality()?;
        let mut left = self.bit_and()?;

        while self.current < self.input.len() && self.peek()?.token_type == TokenType::AndAnd {
            self.read_token()?;
            let right = self.equality()?;
            let right = self.bit_and()?;
            left = AST {
                ast_type: ASTType::Binary(
                    Box::new(left.clone()),


@@ 469,6 474,69 @@ impl Parser {

        Ok(left)
    }
    pub fn bit_and(&mut self) -> Result<AST, KabelError> {
        let mut left = self.bit_xor()?;

        while self.current < self.input.len() && self.peek()?.token_type == TokenType::And {
            self.read_token()?;
            let right = self.bit_xor()?;
            left = AST {
                ast_type: ASTType::Binary(
                    Box::new(left.clone()),
                    BinOp::BitAnd,
                    Box::new(right.clone()),
                ),
                start: left.start,
                end: right.end,
                line: left.line,
                column: left.column,
            };
        }

        Ok(left)
    }
    pub fn bit_xor(&mut self) -> Result<AST, KabelError> {
        let mut left = self.bit_or()?;

        while self.current < self.input.len() && self.peek()?.token_type == TokenType::Caret {
            self.read_token()?;
            let right = self.bit_or()?;
            left = AST {
                ast_type: ASTType::Binary(
                    Box::new(left.clone()),
                    BinOp::BitXor,
                    Box::new(right.clone()),
                ),
                start: left.start,
                end: right.end,
                line: left.line,
                column: left.column,
            };
        }

        Ok(left)
    }
    pub fn bit_or(&mut self) -> Result<AST, KabelError> {
        let mut left = self.equality()?;

        while self.current < self.input.len() && self.peek()?.token_type == TokenType::Or {
            self.read_token()?;
            let right = self.equality()?;
            left = AST {
                ast_type: ASTType::Binary(
                    Box::new(left.clone()),
                    BinOp::BitOr,
                    Box::new(right.clone()),
                ),
                start: left.start,
                end: right.end,
                line: left.line,
                column: left.column,
            };
        }

        Ok(left)
    }
    pub fn equality(&mut self) -> Result<AST, KabelError> {
        let mut left = self.comparison()?;



@@ 917,7 985,12 @@ pub enum ASTType {
    Return(Option<Box<AST>>),               // expression
    Loop(Box<AST>),                         // block
    While(Box<AST>, Box<AST>),              // condition, block
    For(Box<Option<AST>>, Box<Option<AST>>, Box<Option<AST>>, Box<AST>), // expr1, expr2, expr3, block
    For(
        Box<Option<AST>>,
        Box<Option<AST>>,
        Box<Option<AST>>,
        Box<AST>,
    ), // expr1, expr2, expr3, block
    Break,
    Continue,
    If(Box<AST>, Box<AST>, Option<Box<AST>>), // condition, block, else/else if


@@ 958,6 1031,9 @@ pub enum BinOp {
    Le,
    Or,
    And,
    BitAnd,
    BitXor,
    BitOr,
    Assign,
}