From 19c233f62b0606b2cf1c6033d0c7123b15bc95cb Mon Sep 17 00:00:00 2001 From: ghostly_zsh Date: Wed, 7 Aug 2024 18:26:32 -0500 Subject: [PATCH] name resolution and features --- kabel/Cargo.toml | 4 + kabel/grammar.ebnf | 8 +- kabel/src/error.rs | 6 + kabel/src/lib.rs | 74 ++++++++++++ kabel/src/macros.rs | 40 +++++++ kabel/src/parser.rs | 127 +++++++++++--------- kabel/src/semantic_analysis.rs | 211 +++++++++++++++++++++++++++++++++ 7 files changed, 408 insertions(+), 62 deletions(-) create mode 100644 kabel/src/semantic_analysis.rs diff --git a/kabel/Cargo.toml b/kabel/Cargo.toml index 5c5441262463aab31b0a789ff1508b08895cbc9d..b9380fcfd6c7a36a6ef5fa7dd957d3715481811e 100644 --- a/kabel/Cargo.toml +++ b/kabel/Cargo.toml @@ -4,3 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] + +[features] +timer = [] +debug = [] diff --git a/kabel/grammar.ebnf b/kabel/grammar.ebnf index 4356b8a1cac03f5e35327da8a8d076450a970d7d..9959a1ad5950e8da4df495079ed39863768ebf72 100644 --- a/kabel/grammar.ebnf +++ b/kabel/grammar.ebnf @@ -1,14 +1,14 @@ program = { statement } ; statement = function | return | loop | while | for | break | continue - | if | expression_statement ; + | if | block | expression_statement ; -function = "function" , identifier , "(" , { identifier , "," , } ")" , block +function = "function" , identifier , "(" , { identifier , "," , } ")" , block ; return = "return" , expression , ";" ; loop = "loop" , block ; while = "while" , "(" , expression , ")" , block ; -for = "for" , "(" , [ expression ] , ";" , [ expression ] , ";" , [ expression ] , ")" , block +for = "for" , "(" , [ expression ] , ";" , [ expression ] , ";" , [ expression ] , ")" , block ; break = "break" , ";" ; continue = "continue" , ";" ; @@ -45,7 +45,7 @@ factor = unary { , ( "*" | "/" ) , unary } ; unary = ( ( "!" | "-" | "++" | "--" ) , unary ) | subscript ; -subscript = primary , { "[" , expression , "]" , } +subscript = primary , { "[" , expression , "]" , } ; primary = identifier | array | member | call | number | incdec | string | group ; diff --git a/kabel/src/error.rs b/kabel/src/error.rs index bdb5c184c3eae61648be954029fb7c8c880b3652..49213aea12a761d15712cfd410ea55f53d71646e 100644 --- a/kabel/src/error.rs +++ b/kabel/src/error.rs @@ -44,6 +44,8 @@ pub enum ErrorKind { UnexpectedCharacter, UnexpectedToken, MissingDelimiter, + OutOfScope, + IncorrectArity, } impl std::fmt::Display for ErrorKind { @@ -54,6 +56,8 @@ impl std::fmt::Display for ErrorKind { UnexpectedCharacter => f.write_str("Unrecognized Charcter"), UnexpectedToken => f.write_str("Unrecognized Token"), MissingDelimiter => f.write_str("Missing delimiter"), + OutOfScope => f.write_str("Out of scope"), + IncorrectArity => f.write_str("Incorrect arity"), } } } @@ -66,6 +70,8 @@ impl From for usize { UnexpectedCharacter => 0x01, UnexpectedToken => 0x02, MissingDelimiter => 0x03, + OutOfScope => 0x04, + IncorrectArity => 0x05, } } } diff --git a/kabel/src/lib.rs b/kabel/src/lib.rs index d0a2499d9796a457498d1c5b73a2f1fee4c0af5f..008c26577b3a76f8fec964110ea0b1a5086141bb 100644 --- a/kabel/src/lib.rs +++ b/kabel/src/lib.rs @@ -1,10 +1,15 @@ +#[cfg(feature = "timer")] +use std::time::Instant; + use lexer::{Lexer, Token}; use parser::{Parser, AST}; +use semantic_analysis::Analyzer; pub mod error; pub mod lexer; pub mod macros; pub mod parser; +pub mod semantic_analysis; pub fn run_lexer(input: String) -> Lexer { let mut lexer = Lexer::new(input); @@ -16,3 +21,72 @@ pub fn run_parser(text: String, input: Vec) -> (AST, Parser) { let mut parser = Parser::new(text, input); (parser.program(), parser) } + +pub fn run_semantic_analysis(text: String, input: AST) -> Analyzer { + let mut analyzer = Analyzer::new(text); + analyzer.visit(input); + analyzer +} + +// TODO: output bytecode +pub fn compile(program: String) -> String { + let mut output = "".to_string(); + + #[cfg(feature = "timer")] + let program_instant = Instant::now(); + + #[cfg(feature = "timer")] + let lexer_instant = Instant::now(); + let lexer = run_lexer(program.clone()); + #[cfg(feature = "timer")] + { + let lexer_elapsed = lexer_instant.elapsed(); + println!("lexer took: {:?}", lexer_elapsed); + } + + for error in lexer.errors.clone() { + output += &error.to_string(); + output += "\n"; + //println!("{}", error); + } + #[cfg(feature = "debug")] + println!("{:?}", lexer.output); + if lexer.errors.len() != 0 || lexer.output.len() == 0 { + return output; + } + + #[cfg(feature = "timer")] + let parser_instant = Instant::now(); + let (ast, parser) = run_parser(program.clone(), lexer.output); + #[cfg(feature = "timer")] + { + let parser_elapsed = parser_instant.elapsed(); + println!("parser took: {:?}", parser_elapsed); + } + + #[cfg(feature = "debug")] + println!("{:#?}", ast); + for error in parser.errors.clone() { + output += &error.to_string(); + output += "\n"; + } + if parser.errors.len() != 0 { + return output; + } + #[cfg(feature = "timer")] + let analyzer_instant = Instant::now(); + let analyzer = run_semantic_analysis(program, ast); + for error in analyzer.errors.clone() { + output += &error.to_string(); + output += "\n"; + } + #[cfg(feature = "timer")] + { + let analyzer_elapsed = analyzer_instant.elapsed(); + println!("semantic analysis took: {:?}", analyzer_elapsed); + + let program_elapsed = program_instant.elapsed(); + println!("{:?}", program_elapsed); + } + output +} diff --git a/kabel/src/macros.rs b/kabel/src/macros.rs index e41d7cf6ee8f095d74b2d12119403ac75ed58bc3..2963126d495ec2165ed0193ae9e979c79c7ef3ee 100644 --- a/kabel/src/macros.rs +++ b/kabel/src/macros.rs @@ -17,6 +17,7 @@ macro_rules! lit { ($type:ident, $data:expr, $token:expr) => { $crate::parser::AST { ast_type: $crate::parser::ASTType::Lit($crate::parser::Lit::$type($data)), + line_start: $token.line_start, start: $token.start, end: $token.end, line: $token.line, @@ -30,6 +31,7 @@ macro_rules! ast { ($ast_type:expr, $start:expr, $end:expr) => { AST { ast_type: $ast_type, + line_start: $start.line_start, start: $start.start, end: $end.end, line: $start.line, @@ -38,6 +40,19 @@ macro_rules! ast { }; } +#[macro_export] +macro_rules! name { + ($name:expr, $token:expr) => { + Name { + name: $name, + line_start: $token.line_start, + end: $token.end, + line: $token.line, + column: $token.column, + } + }; +} + #[macro_export] macro_rules! unexpected_token { ($self:expr, $message:expr, $token:expr) => { @@ -50,3 +65,28 @@ macro_rules! unexpected_token { ) }; } + +#[macro_export] +macro_rules! out_of_scope { + ($self:expr, $message:expr, $name:expr, $expr:expr) => { + $crate::error::KabelError::new( + $crate::error::ErrorKind::OutOfScope, + format!($message, $name), + $expr.line, + $expr.column, + $self.text[$expr.line_start..$expr.end].to_string(), + ) + }; +} +#[macro_export] +macro_rules! out_of_scope_var { + ($self:expr, $message:expr, $name:expr, $expr:expr) => { + $crate::error::KabelError::new( + $crate::error::ErrorKind::OutOfScope, + format!($message, $name.name), + $expr.line, + $name.column, + $self.text[$expr.line_start..$expr.end].to_string(), + ) + }; +} diff --git a/kabel/src/parser.rs b/kabel/src/parser.rs index efd242950360862e7c05c3ad4916fdccaf30040d..a7c630c56c4288ec652735b5c7bb1c8227f86e88 100644 --- a/kabel/src/parser.rs +++ b/kabel/src/parser.rs @@ -2,7 +2,7 @@ use crate::{ ast, error::{ErrorKind, KabelError}, lexer::{Token, TokenType}, - lit, unexpected_token, + lit, unexpected_token, name, }; pub struct Parser { @@ -39,6 +39,7 @@ impl Parser { } AST { ast_type: ASTType::Program(program), + line_start: 0, start: 0, end: 0, line: 0, @@ -56,6 +57,7 @@ impl Parser { TokenType::Break => self.break_statement(), TokenType::Continue => self.continue_statement(), TokenType::If => self.if_statement(), + TokenType::LeftBrace => self.block(), _ => self.expression_statement(), } } @@ -68,7 +70,12 @@ impl Parser { if let TokenType::LeftParen = left_paren.token_type { let mut expressions = Vec::new(); while self.peek()?.token_type != TokenType::RightParen { - expressions.push(self.expression()?); + let ident = self.read_token()?; + if let TokenType::Ident(name) = ident.token_type { + expressions.push(name!(name, ident)); + } else { + return Err(unexpected_token!(self, "Expected identifier found {}", ident)); + } if let TokenType::Comma = self.peek()?.token_type { self.read_token()?; } @@ -78,7 +85,7 @@ impl Parser { let block = self.block()?; return Ok(ast!( ASTType::Function( - Box::new(lit!(Ident, name, ident)), + name!(name, ident), expressions, Box::new(block.clone()) ), @@ -104,19 +111,17 @@ impl Parser { let return_ident = self.read_token()?; if let TokenType::Semicolon = self.peek()?.token_type { let semicolon = self.read_token()?; - return Ok(AST { - ast_type: ASTType::Return(None), - start: return_ident.start, - end: semicolon.end, - line: return_ident.line, - column: return_ident.column, - }); + return Ok(ast!( + ASTType::Return(Box::new(None)), + return_ident, + semicolon + )); } let expression = self.expression()?; let semicolon = self.read_token()?; if let TokenType::Semicolon = semicolon.token_type { Ok(ast!( - ASTType::Return(Some(Box::new(expression))), + ASTType::Return(Box::new(Some(expression))), return_ident, semicolon )) @@ -246,7 +251,7 @@ impl Parser { ASTType::If( Box::new(condition), Box::new(block.clone()), - Some(Box::new(else_block.clone())) + Box::new(Some(else_block.clone())) ), if_ident, else_block @@ -259,7 +264,7 @@ impl Parser { ASTType::If( Box::new(condition), Box::new(block.clone()), - Some(Box::new(else_if.clone())) + Box::new(Some(else_if.clone())) ), if_ident, else_if @@ -269,7 +274,7 @@ impl Parser { } } return Ok(ast!( - ASTType::If(Box::new(condition), Box::new(block.clone()), None), + ASTType::If(Box::new(condition), Box::new(block.clone()), Box::new(None)), if_ident, block )); @@ -332,7 +337,7 @@ impl Parser { if let TokenType::Equal = equal.token_type { let expr = self.expression()?; return Ok(ast!( - ASTType::Decl(Box::new(lit!(Ident, name, ident)), Box::new(expr.clone())), + ASTType::Decl(name!(name, ident), Box::new(expr.clone())), var, expr )); @@ -369,9 +374,8 @@ impl Parser { let expr = self.expression()?; if binop.token_type == TokenType::Equal { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name, ident)), - BinOp::Asn, + ASTType::Assign( + name!(name, ident), Box::new(expr.clone()) ), ident, @@ -379,9 +383,8 @@ impl Parser { )); } else if binop.token_type == TokenType::PlusEqual { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -400,9 +403,8 @@ impl Parser { )); } else if binop.token_type == TokenType::MinusEqual { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -421,9 +423,8 @@ impl Parser { )); } else if binop.token_type == TokenType::StarEqual { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -442,9 +443,8 @@ impl Parser { )); } else if binop.token_type == TokenType::SlashEqual { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -463,9 +463,8 @@ impl Parser { )); } else if binop.token_type == TokenType::PercentEqual { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -484,9 +483,8 @@ impl Parser { )); } else if binop.token_type == TokenType::AndEqual { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -505,9 +503,8 @@ impl Parser { )); } else if binop.token_type == TokenType::CaretEqual { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -526,9 +523,8 @@ impl Parser { )); } else { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new( ast!( ASTType::Binary( @@ -563,7 +559,15 @@ impl Parser { if let TokenType::Colon = self.peek()?.token_type { self.read_token()?; let false_expr = self.expression()?; - return Ok(ast!(ASTType::Ternary(Box::new(condition.clone()), Box::new(true_expr), Box::new(false_expr.clone())), condition, false_expr)); + return Ok(ast!( + ASTType::Ternary( + Box::new(condition.clone()), + Box::new(true_expr), + Box::new(false_expr.clone()) + ), + condition, + false_expr + )); } else { return Err(unexpected_token!(self, "Expected : found {}", self.token)); } @@ -932,7 +936,7 @@ impl Parser { let right_paren = self.read_token()?; if let TokenType::Ident(name) = ident.token_type { return Ok(ast!( - ASTType::Call(Box::new(lit!(Ident, name, ident)), expressions), + ASTType::Call(name!(name, ident), expressions), ident, right_paren )); @@ -944,9 +948,8 @@ impl Parser { let oper = self.read_token()?; if oper.token_type == TokenType::PlusPlus { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new(ast!( ASTType::Binary( Box::new(lit!(Ident, name, ident)), @@ -962,9 +965,8 @@ impl Parser { )); } else { return Ok(ast!( - ASTType::Binary( - Box::new(lit!(Ident, name.clone(), ident)), - BinOp::Asn, + ASTType::Assign( + name!(name.clone(), ident), Box::new(ast!( ASTType::Binary( Box::new(lit!(Ident, name, ident)), @@ -998,7 +1000,7 @@ impl Parser { } self.read_token()?; return Ok(ast!( - ASTType::Group(Box::new(expr.clone())), + expr.ast_type, left_paren, right_paren )); @@ -1048,6 +1050,7 @@ impl Parser { #[derive(Debug, Clone)] pub struct AST { pub ast_type: ASTType, + pub line_start: usize, pub start: usize, pub end: usize, pub line: usize, @@ -1059,8 +1062,8 @@ pub enum ASTType { Program(Vec), // statements - Function(Box, Vec, Box), // name, args, block - Return(Option>), // expression + Function(Name, Vec, Box), // name, args, block + Return(Box>), // expression Loop(Box), // block While(Box, Box), // condition, block For( @@ -1071,23 +1074,32 @@ pub enum ASTType { ), // expr1, expr2, expr3, block Break, Continue, - If(Box, Box, Option>), // condition, block, else/else if + If(Box, Box, Box>), // condition, block, else/else if Block(Vec), // statements // expressions - Decl(Box, Box), // identifier, expression + Decl(Name, Box), // identifier, expression + Assign(Name, Box), Ternary(Box, Box, Box), Subscript(Box, Box), Binary(Box, BinOp, Box), Unary(UnOp, Box), // primary - Group(Box), Lit(Lit), - Call(Box, Vec), + Call(Name, Vec), Member(Box, Box), } +#[derive(Debug, Clone)] +pub struct Name { + pub name: String, + pub line_start: usize, + pub end: usize, + pub line: usize, + pub column: usize, +} + #[derive(Debug, Clone)] pub enum Lit { Ident(String), @@ -1106,7 +1118,6 @@ pub enum BinOp { BitAnd, BitXor, BitOr, - Asn, Eq, Ne, Gr, diff --git a/kabel/src/semantic_analysis.rs b/kabel/src/semantic_analysis.rs new file mode 100644 index 0000000000000000000000000000000000000000..28dbbdd924a4c36c8e2fae4eff6fa0faac30a87f --- /dev/null +++ b/kabel/src/semantic_analysis.rs @@ -0,0 +1,211 @@ +use std::collections::HashMap; + +use crate::{error::{ErrorKind, KabelError}, out_of_scope, out_of_scope_var, parser::{ASTType, Lit, Name, AST}}; + +pub struct Analyzer { + text: String, + symbol_table: Vec>, + pub errors: Vec, +} + +impl Analyzer { + pub fn new(text: String) -> Self { + Self { + text, + symbol_table: vec![HashMap::new()], + errors: Vec::new(), + } + } + pub fn visit(&mut self, ast: AST) { + use ASTType::*; + match ast.ast_type { + Program(asts) => { + self.visit_program(asts); + } + Function(name, args, block) => { + self.visit_function(name, args, *block); + } + Return(expr) => { + self.visit_return(*expr); + } + Loop(block) => { + self.visit_loop(*block); + } + While(condition, block) => { + self.visit_while(*condition, *block); + } + For(expr1, expr2, expr3, block) => { + self.visit_for(*expr1, *expr2, *expr3, *block); + } + If(condition, block, else_expr) => { + self.visit_if(*condition, *block, *else_expr); + } + Block(stmts) => { + self.visit_block(stmts); + } + Decl(name, expr) => { + self.visit_decl(name, *expr); + } + Assign(ref name, ref expr) => { + self.visit_assign(ast.clone(), name.clone(), *expr.clone()); + } + Ternary(condition, true_expr, false_expr) => { + self.visit_ternary(*condition, *true_expr, *false_expr); + } + Subscript(array, index) => { + self.visit_subscript(*array, *index); + } + Binary(left, _oper, right) => { + self.visit_binary(*left, *right); + } + Unary(_oper, right) => { + self.visit_unary(*right); + } + Lit(ref lit) => { + self.visit_lit(ast.clone(), lit.clone()); + } + Call(ref ident, ref args) => { + self.visit_call(ast.clone(), ident.clone(), args.clone()); + } + /*Member(left, right) => { + self.visit_member(*left, *right); + }*/ + _ => {} // not implemented + } + } + pub fn visit_program(&mut self, asts: Vec) { + for ast in asts { + self.visit(ast); + } + } + pub fn visit_function(&mut self, name: Name, args: Vec, block: AST) { + self.symbol_table.last_mut().unwrap().insert(name.name.clone(), Symbol::Function(args.len())); + self.symbol_table.push(HashMap::new()); + for arg in args { + self.symbol_table.last_mut().unwrap().insert(arg.name, Symbol::Var); + } + self.visit(block); + self.symbol_table.pop(); + } + pub fn visit_return(&mut self, expr: Option) { + if let Some(expr) = expr { + self.visit(expr); + } + } + pub fn visit_loop(&mut self, block: AST) { + self.visit(block); + } + pub fn visit_while(&mut self, condition: AST, block: AST) { + self.visit(condition); + self.visit(block); + } + pub fn visit_for(&mut self, expr1: Option, expr2: Option, expr3: Option, block: AST) { + if let Some(expr) = expr1 { + self.visit(expr); + } + if let Some(expr) = expr2 { + self.visit(expr); + } + if let Some(expr) = expr3 { + self.visit(expr); + } + self.visit(block); + } + pub fn visit_if(&mut self, condition: AST, block: AST, else_expr: Option) { + self.visit(condition); + self.visit(block); + if let Some(else_expr) = else_expr { + self.visit(else_expr); + } + } + pub fn visit_block(&mut self, stmts: Vec) { + self.symbol_table.push(HashMap::new()); + for stmt in stmts { + self.visit(stmt); + } + self.symbol_table.pop(); + } + pub fn visit_decl(&mut self, name: Name, expr: AST) { + self.visit(expr); + self.symbol_table.last_mut().unwrap().insert(name.name, Symbol::Var); + } + pub fn visit_assign(&mut self, ast: AST, name: Name, expr: AST) { + self.visit(expr.clone()); + if !self.symbol_table.last().unwrap().contains_key(&name.name) { + self.errors.push(out_of_scope_var!(self, "Variable \"{}\" not in scope", name, ast)) + } + } + pub fn visit_ternary(&mut self, condition: AST, true_expr: AST, false_expr: AST) { + self.visit(condition); + self.visit(true_expr); + self.visit(false_expr); + } + pub fn visit_subscript(&mut self, array: AST, index: AST) { + self.visit(array); + self.visit(index); + } + pub fn visit_binary(&mut self, left: AST, right: AST) { + self.visit(left); + self.visit(right); + } + pub fn visit_unary(&mut self, right: AST) { + self.visit(right); + } + pub fn visit_lit(&mut self, ast: AST, lit: Lit) { + match lit { + Lit::Ident(name) => { + if !self.resolve_var(&name) { + self.errors.push(out_of_scope!(self, "Variable \"{}\" not in scope", name, ast)) + } + } + _ => {} + } + } + pub fn visit_call(&mut self, ast: AST, ident: Name, args: Vec) { + if !self.resolve_function(&ast, &ident.name, args.len()) { + self.errors.push(out_of_scope!(self, "Function \"{}\" not in scope", ident.name, ast)) + } + for arg in args { + self.visit(arg); + } + } + // TODO: make visit_member not throw out of scope errors + /*pub fn visit_member(&mut self, left: AST, right: AST) { + self.visit(left); + self.visit(right); + }*/ + fn resolve_var(&self, name: &String) -> bool { + for scope in self.symbol_table.iter().rev() { + if matches!(scope.get(name), Some(Symbol::Var)) { + return true; + } + } + false + } + fn resolve_function(&mut self, ast: &AST, name: &String, arity: usize) -> bool { + for scope in self.symbol_table.iter().rev() { + if let Some(Symbol::Function(f_arity)) = scope.get(name) { + if *f_arity == arity { + return true; + } else { + self.errors.push( + KabelError::new( + ErrorKind::OutOfScope, + format!("Function {} has {} argument, provided {}", name, *f_arity, arity), + ast.line, + ast.column, + self.text[ast.line_start..ast.end].to_string(), + ) + ); + return true; + } + } + } + false + } +} + +pub enum Symbol { + Var, + Function(usize), +}