From f822df5dc1517846b2f16a573558b85f0112b647 Mon Sep 17 00:00:00 2001 From: ghostly_zsh Date: Sun, 11 Aug 2024 17:53:44 -0500 Subject: [PATCH] variables --- kabel/opcodes.txt | 47 ++++--- kabel/src/ast.rs | 91 ++++++++++++ kabel/src/codegen.rs | 33 +++-- kabel/src/debug.rs | 19 ++- kabel/src/extension.rs | 4 + kabel/src/lib.rs | 19 +-- kabel/src/macros.rs | 11 +- kabel/src/main.rs | 12 +- kabel/src/name_resolution.rs | 230 +++++++++++++++++++++++++++++++ kabel/src/opcodes.rs | 95 ++++++------- kabel/src/parser.rs | 150 +++++--------------- kabel/src/semantic_analysis.rs | 222 ----------------------------- kabel/src/vm.rs | 53 ++++--- kabel/test/syntax/assignment.out | 10 +- 14 files changed, 534 insertions(+), 462 deletions(-) create mode 100644 kabel/src/ast.rs create mode 100644 kabel/src/extension.rs create mode 100644 kabel/src/name_resolution.rs delete mode 100644 kabel/src/semantic_analysis.rs diff --git a/kabel/opcodes.txt b/kabel/opcodes.txt index 05fa0a6378b382192099b261c09f00e2958b1af6..ca1359c340707d8f8db2b26f5a65d81e6c6aa604 100644 --- a/kabel/opcodes.txt +++ b/kabel/opcodes.txt @@ -1,25 +1,28 @@ -CONSTANT VAL ; 0x00 -ADD ; 0x01 -SUB ; 0x02 -MUL ; 0x03 -DIV ; 0x04 -MOD ; 0x05 -BITAND ; 0x06 -BITXOR ; 0x07 -BITOR ; 0x08 -EQ ; 0x09 -NE ; 0x0A -GR ; 0x0B -GE ; 0x0C -LS ; 0x0D -LE ; 0x0E -OR ; 0x0F -AND ; 0x10 +CONSTANT VAL ; 0x00 +VAR STACKPTR ; 0x01 -NOT ; 0x11 -NEG ; 0x12 +ADD ; 0x02 +SUB ; 0x03 +MUL ; 0x04 +DIV ; 0x05 +MOD ; 0x06 +BITAND ; 0x07 +BITXOR ; 0x08 +BITOR ; 0x09 +EQ ; 0x0A +NE ; 0x0B +GR ; 0x0C +GE ; 0x0D +LS ; 0x0E +LE ; 0x0F +OR ; 0x10 +AND ; 0x11 -JMP LOC ; 0x13 -IF_NE ELSE ; 0x14 +NOT ; 0x12 +NEG ; 0x13 -PRINT ; 0xFF +JMP LOC ; 0x14 +IF_NE ELSE ; 0x15 + +POP ; 0xFD +PRINT ; 0xFE diff --git a/kabel/src/ast.rs b/kabel/src/ast.rs new file mode 100644 index 0000000000000000000000000000000000000000..d44b6971bf55d90c73f8f1a6377009700d731f50 --- /dev/null +++ b/kabel/src/ast.rs @@ -0,0 +1,91 @@ +use crate::extension::Extension; + +#[derive(Debug, Clone)] +pub struct AST { + pub ast_type: ASTType, + pub extensions: Vec, + pub start_line: usize, + pub end_line: usize, + pub start_column: usize, + pub end_column: usize, +} + +#[derive(Debug, Clone)] +pub enum ASTType { + Program(Vec), + + // statements + Function(Name, Vec, Box), // name, args, block + Return(Box>), // expression + Loop(Box), // block + While(Box, Box), // condition, block + For( + Box>, + Box>, + Box>, + Box, + ), // expr1, expr2, expr3, block + Break, + Continue, + If(Box, Box, Box>), // condition, block, else/else if + Block(Vec), // statements + Decl(Name, Box), // identifier, expression + Expr(Box), // expr + // REMOVE LATER + Print(Box), + + // expressions + Assign(Name, Box), + Ternary(Box, Box, Box), + Subscript(Box, Box), + Binary(Box, BinOp, Box), + Unary(UnOp, Box), + + // primary + Lit(Lit), + Call(Name, Vec), + Member(Box, Box), +} + +#[derive(Debug, Clone)] +pub struct Name { + pub name: String, + pub start_column: usize, + pub end_column: usize, + pub line: usize, +} + +#[derive(Debug, Clone)] +pub enum Lit { + Ident(String), + Num(f32), + Str(String), + Bool(bool), + Array(Vec), +} + +#[derive(Debug, Clone, Copy)] +pub enum BinOp { + Add, + Sub, + Mul, + Div, + Mod, + BitAnd, + BitXor, + BitOr, + Eq, + Ne, + Gr, + Ge, + Ls, + Le, + Or, + And, +} + +#[derive(Debug, Clone, Copy)] +pub enum UnOp { + Not, + Neg, +} diff --git a/kabel/src/codegen.rs b/kabel/src/codegen.rs index 2eb0f5738fd54c13af891e8d2bf4eab9bf4285a3..0610aa47febcc1a16cdd6e5aac5fa92ad6a690df 100644 --- a/kabel/src/codegen.rs +++ b/kabel/src/codegen.rs @@ -1,4 +1,6 @@ -use crate::{codegen_binary, codegen_unary, opcodes::OpCode, parser::{ASTType, BinOp, Lit, UnOp, AST}, vm::{Value, VM}}; +use std::collections::HashMap; + +use crate::{ast::{ASTType, BinOp, Lit, Name, UnOp, AST}, codegen_binary, codegen_unary, extension::Extension, opcodes::OpCode, vm::{Value, VM}}; pub struct Codegen { pub vm: VM @@ -8,11 +10,11 @@ impl Codegen { pub fn new(text: String) -> Self { Codegen { vm: VM::new(Vec::new(), Vec::new(), - Vec::new(), text.lines().map(|s| s.to_string()).collect()), + Vec::new(), HashMap::new(), text.lines().map(|s| s.to_string()).collect()), } } pub fn visit(&mut self, ast: AST) { - use crate::parser::ASTType::*; + use crate::ast::ASTType::*; match ast.ast_type { Program(asts) => { for ast in asts { @@ -25,6 +27,9 @@ impl Codegen { Block(stmts) => { self.visit_block(stmts); } + Decl(ref name, ref expr) => { + self.visit_decl(&ast, name.clone(), *expr.clone()); + } Expr(expr) => { self.visit_expr_stmt(*expr); } @@ -85,6 +90,13 @@ impl Codegen { self.visit(stmt); } } + pub fn visit_decl(&mut self, ast: &AST, name: Name, expr: AST) { + self.visit(expr); + #[allow(irrefutable_let_patterns)] + if let Extension::Resolution(ptr) = ast.extensions[0] { + self.vm.local.insert(name.name, ptr as u8); + } + } pub fn visit_expr_stmt(&mut self, expr: AST) { self.visit(expr); self.vm.chunk.push(OpCode::POP.into()); @@ -100,20 +112,20 @@ impl Codegen { } } pub fn visit_binary(&mut self, left: AST, oper: BinOp, right: AST) { - use crate::parser::BinOp::*; + use crate::ast::BinOp::*; codegen_binary!(self, left, right, oper, Add, ADD, Sub, SUB, Mul, MUL, Div, DIV, Mod, MOD, BitAnd, BITAND, BitXor, BITXOR, BitOr, BITOR, Eq, EQ, Ne, NE, Gr, GR, Ge, GE, Ls, LS, Le, LE, Or, OR, And, AND); } pub fn visit_unary(&mut self, oper: UnOp, right: AST) { - use crate::parser::UnOp::*; + use crate::ast::UnOp::*; codegen_unary!(self, right, oper, Not, NOT, Neg, NEG); } pub fn visit_lit(&mut self, ast: &AST, lit: Lit) { match lit { Lit::Num(value) => { self.vm.pool.push(Value::Num(value)); - self.vm.chunk.push(OpCode::CONSTANT.into()); + self.vm.chunk.push(OpCode::LOAD.into()); self.vm.chunk.push((self.vm.pool.len()-1) as u8); if self.vm.lines.len() == 0 || self.vm.lines.last().unwrap().0 != ast.end_line { self.vm.lines.push((ast.end_line, 2)); @@ -123,7 +135,7 @@ impl Codegen { } Lit::Str(value) => { self.vm.pool.push(Value::Str(value.into())); - self.vm.chunk.push(OpCode::CONSTANT.into()); + self.vm.chunk.push(OpCode::LOAD.into()); self.vm.chunk.push((self.vm.pool.len()-1) as u8); if self.vm.lines.len() == 0 || self.vm.lines.last().unwrap().0 != ast.end_line { self.vm.lines.push((ast.end_line, 2)); @@ -133,7 +145,7 @@ impl Codegen { } Lit::Bool(value) => { self.vm.pool.push(Value::Bool(value)); - self.vm.chunk.push(OpCode::CONSTANT.into()); + self.vm.chunk.push(OpCode::LOAD.into()); self.vm.chunk.push((self.vm.pool.len()-1) as u8); if self.vm.lines.len() == 0 || self.vm.lines.last().unwrap().0 != ast.end_line { self.vm.lines.push((ast.end_line, 2)); @@ -141,6 +153,11 @@ impl Codegen { self.vm.lines.last_mut().unwrap().1 += 2; } } + Lit::Ident(name) => { + self.vm.chunk.push(OpCode::VAR.into()); + let slot = self.vm.local.get(&name).unwrap(); + self.vm.chunk.push(*slot); + } _ => {} } } diff --git a/kabel/src/debug.rs b/kabel/src/debug.rs index ed780a0ed7acd612e4ed47fc0d77d5fb14e5ad2a..b53d00bbf9c1d4b2ae886f789108545b1fd83cd0 100644 --- a/kabel/src/debug.rs +++ b/kabel/src/debug.rs @@ -1,4 +1,4 @@ -use crate::{lexer::Token, parser::AST, push_output, vm::Value}; +use crate::{lexer::Token, ast::AST, push_output, vm::Value}; pub fn debug_token_array(tokens: Vec) -> String { let mut output = "".to_string(); @@ -9,10 +9,10 @@ pub fn debug_token_array(tokens: Vec) -> String { output[..output.len()-1].to_string() } pub fn debug_ast(ast: AST, level: usize) -> String { - use crate::parser::ASTType::*; - use crate::parser::BinOp::*; - use crate::parser::UnOp::*; - use crate::parser::Lit::*; + use crate::ast::ASTType::*; + use crate::ast::BinOp::*; + use crate::ast::UnOp::*; + use crate::ast::Lit::*; let mut output = "".to_string(); match ast.ast_type { Program(asts) => { @@ -207,13 +207,20 @@ pub fn debug_bytecode(code: &Vec) -> String { let mut output = "".to_string(); while ip < code.len() { match code[ip].into() { - CONSTANT => { + LOAD => { output += &ip.to_string(); output += " CONSTANT "; ip += 1; output += &code[ip].to_string(); output += "\n"; } + VAR => { + output += &ip.to_string(); + output += " VAR "; + ip += 1; + output += &code[ip].to_string(); + output += "\n"; + } ADD => { output += &ip.to_string(); output += " ADD\n" } SUB => { output += &ip.to_string(); output += " SUB\n" } MUL => { output += &ip.to_string(); output += " MUL\n" } diff --git a/kabel/src/extension.rs b/kabel/src/extension.rs new file mode 100644 index 0000000000000000000000000000000000000000..34fc51f77aa2c655fbf185fb0013c2b9f60872c9 --- /dev/null +++ b/kabel/src/extension.rs @@ -0,0 +1,4 @@ +#[derive(Debug, Clone)] +pub enum Extension { + Resolution(usize), // pointer to local table +} diff --git a/kabel/src/lib.rs b/kabel/src/lib.rs index ee5b7ddefbe3f386b000276614a2145adf372be3..038a6417f640f2b70ea3e268e7711e8d5a57d5c9 100644 --- a/kabel/src/lib.rs +++ b/kabel/src/lib.rs @@ -1,10 +1,11 @@ #[cfg(feature = "timer")] use std::time::Instant; +use ast::AST; use codegen::Codegen; use lexer::{Lexer, Token}; -use parser::{Parser, AST}; -use semantic_analysis::Analyzer; +use parser::Parser; +use name_resolution::Resolver; pub mod debug; pub mod error; @@ -12,11 +13,13 @@ pub mod runtime_error; pub mod lexer; pub mod macros; pub mod parser; -pub mod semantic_analysis; +pub mod ast; +pub mod name_resolution; pub mod opcodes; pub mod codegen; pub mod vm; pub mod test; +pub mod extension; pub fn run_lexer(input: String) -> Lexer { let mut lexer = Lexer::new(input); @@ -29,10 +32,10 @@ pub fn run_parser(text: String, input: Vec) -> (AST, Parser) { (parser.program(), parser) } -pub fn run_semantic_analysis(text: String, input: AST) -> Analyzer { - let mut analyzer = Analyzer::new(text); - analyzer.visit(input); - analyzer +pub fn run_semantic_analysis(text: String, input: AST) -> (AST, Resolver) { + let mut analyzer = Resolver::new(text); + let ast = analyzer.visit(input); + (ast, analyzer) } pub fn run_codegen(text: String, input: AST) -> Codegen { @@ -90,7 +93,7 @@ pub fn compile(program: String) -> String { } #[cfg(feature = "timer")] let analyzer_instant = Instant::now(); - let analyzer = run_semantic_analysis(program.clone(), ast.clone()); + let (ast, analyzer) = run_semantic_analysis(program.clone(), ast.clone()); for error in analyzer.errors.clone() { output += &error.to_string(); output += "\n"; diff --git a/kabel/src/macros.rs b/kabel/src/macros.rs index 0c6c72aa3244390549d1ce9344e04cbb057fcce3..410560a5d098044e96fce6fefe3ba0cbb0ef7940 100644 --- a/kabel/src/macros.rs +++ b/kabel/src/macros.rs @@ -25,6 +25,7 @@ macro_rules! lit { ($type:ident, $data:expr, $token:expr) => { $crate::parser::AST { ast_type: $crate::parser::ASTType::Lit($crate::parser::Lit::$type($data)), + extensions: Vec::new(), start_line: $token.line, end_line: $token.line, start_column: $token.start_column, @@ -36,8 +37,9 @@ macro_rules! lit { #[macro_export] macro_rules! ast_from_token { ($ast_type:expr, $start:expr, $end:expr) => { - AST { + $crate::parser::AST { ast_type: $ast_type, + extensions: Vec::new(), start_line: $start.line, end_line: $end.line, start_column: $start.start_column, @@ -48,8 +50,9 @@ macro_rules! ast_from_token { #[macro_export] macro_rules! ast_from_token_ast { ($ast_type:expr, $start:expr, $end:expr) => { - AST { + $crate::parser::AST { ast_type: $ast_type, + extensions: Vec::new(), start_line: $start.line, end_line: $end.end_line, start_column: $start.start_column, @@ -62,6 +65,7 @@ macro_rules! ast_from_ast { ($ast_type:expr, $start:expr, $end:expr) => { AST { ast_type: $ast_type, + extensions: Vec::new(), start_line: $start.start_line, end_line: $end.end_line, start_column: $start.start_column, @@ -74,6 +78,7 @@ macro_rules! ast_from_ast_token { ($ast_type:expr, $start:expr, $end:expr) => { AST { ast_type: $ast_type, + extensions: Vec::new(), start_line: $start.start_line, end_line: $end.line, start_column: $start.start_column, @@ -85,7 +90,7 @@ macro_rules! ast_from_ast_token { #[macro_export] macro_rules! name { ($name:expr, $token:expr) => { - Name { + $crate::ast::Name { name: $name, start_column: $token.start_column, end_column: $token.end_column, diff --git a/kabel/src/main.rs b/kabel/src/main.rs index 069f95269c9d1015d65554d4b051b813587262f2..43fe9fb59da5cbaea034c1250736049bb44611ee 100644 --- a/kabel/src/main.rs +++ b/kabel/src/main.rs @@ -10,7 +10,14 @@ fn main() { fs::read_to_string(args[1].clone()).unwrap();*/ let program = -"2+2; +"var a = 2; +var b = 3; +print a+b; +{ + var b = 7; + var c = 4; + print c; +} ".to_string(); let mut output = "".to_string(); @@ -41,7 +48,7 @@ fn main() { output += &debug_ast(ast.clone(), 0); output += "\n\n"; //output += &format!("{:#?}", ast); - let analyzer = run_semantic_analysis(program.to_string(), ast.clone()); + let (ast, analyzer) = run_semantic_analysis(program.to_string(), ast.clone()); for error in analyzer.errors.clone() { output += &error.to_string(); output += "\n"; @@ -50,6 +57,7 @@ fn main() { println!("{}", output); return; } + //output += &format!("{:#?}", ast); let codegen = run_codegen(program, ast); diff --git a/kabel/src/name_resolution.rs b/kabel/src/name_resolution.rs new file mode 100644 index 0000000000000000000000000000000000000000..b6a9355824a266caacfefdad08afcc7f37beef4a --- /dev/null +++ b/kabel/src/name_resolution.rs @@ -0,0 +1,230 @@ +use std::collections::HashMap; + +use crate::{ast::{ASTType, AST}, ast_from_ast, collect_lines, error::{ErrorKind, KabelError}, extension::Extension, out_of_scope, out_of_scope_var}; + +pub struct Resolver { + text: Vec, + symbol_table: Vec>, // (Symbol, reference to locals) + pub locals: Vec, // reference to stack + pub errors: Vec, +} + +impl Resolver { + pub fn new(text: String) -> Self { + Self { + text: text.lines().collect::>().iter().map(|s| s.to_string()).collect(), + symbol_table: vec![HashMap::new()], + locals: Vec::new(), + errors: Vec::new(), + } + } + pub fn visit(&mut self, ast: AST) -> AST { + use ASTType::*; + match ast.ast_type { + Program(asts) => { + let mut program = Vec::new(); + for ast in asts { + let ast = self.visit(ast.clone()); + program.push(ast) + } + AST { + ast_type: ASTType::Program(program), + extensions: Vec::new(), + start_line: 0, + end_line: 0, + start_column: 0, + end_column: 0, + } + } + Function(name, args, block) => { + self.symbol_table.last_mut().unwrap().insert(name.name.clone(), (Symbol::Function(args.len()),0)); + self.symbol_table.push(HashMap::new()); + for arg in args.clone() { + self.symbol_table.last_mut().unwrap().insert(arg.name, (Symbol::Var,0)); + } + let block = self.visit(*block); + self.symbol_table.pop(); + ast_from_ast!(Function(name, args, Box::new(block)), ast, ast) + } + Return(expr) => { + if let Some(expr) = *expr { + let expr = self.visit(expr); + return ast_from_ast!(Return(Box::new(Some(expr))), ast, ast); + } + ast_from_ast!(Return(Box::new(None)), ast, ast) + } + Loop(block) => { + let block = self.visit(*block); + ast_from_ast!(Loop(Box::new(block)), ast, ast) + } + While(condition, block) => { + let condition = self.visit(*condition); + let block = self.visit(*block); + ast_from_ast!(While(Box::new(condition), Box::new(block)), ast, ast) + } + For(expr1, expr2, expr3, block) => { + let mut n_expr1 = None; + let mut n_expr2 = None; + let mut n_expr3 = None; + if let Some(expr) = *expr1 { + n_expr1 = Some(self.visit(expr)); + } + if let Some(expr) = *expr2 { + n_expr2 = Some(self.visit(expr)); + } + if let Some(expr) = *expr3 { + n_expr3 = Some(self.visit(expr)); + } + let block = self.visit(*block); + ast_from_ast!(For(Box::new(n_expr1), Box::new(n_expr2), Box::new(n_expr3), Box::new(block)), ast, ast) + } + If(condition, block, else_expr) => { + let condition = self.visit(*condition); + let block = self.visit(*block); + let mut n_else_expr = None; + if let Some(else_expr) = *else_expr { + n_else_expr = Some(self.visit(else_expr)); + } + ast_from_ast!(If(Box::new(condition), Box::new(block), Box::new(n_else_expr)), ast, ast) + } + Block(stmts) => { + self.symbol_table.push(HashMap::new()); + let mut n_stmts = Vec::new(); + for stmt in stmts { + n_stmts.push(self.visit(stmt)); + } + self.symbol_table.pop(); + ast_from_ast!(Block(n_stmts), ast, ast) + } + Decl(name, expr) => { + let expr = self.visit(*expr); + self.locals.push(0); + self.symbol_table.last_mut().unwrap().insert(name.name.clone(), (Symbol::Var, self.locals.len()-1)); + AST { + ast_type: Decl(name, Box::new(expr)), + extensions: vec![Extension::Resolution(self.locals.len()-1)], + start_line: ast.start_line, + end_line: ast.end_line, + start_column: ast.start_column, + end_column: ast.end_column, + } + } + Expr(expr) => { + let expr = self.visit(*expr); + ast_from_ast!(Expr(Box::new(expr)), ast, ast) + } + // REMOVE LATER + Print(expr) => { + let expr = self.visit(*expr); + ast_from_ast!(Print(Box::new(expr)), ast, ast) + } + Assign(name, expr) => { + let expr = self.visit(*expr); + if !self.symbol_table.last().unwrap().contains_key(&name.name) { + self.errors.push(out_of_scope_var!(self, "Variable \"{}\" not in scope", name, ast)); + } + ast_from_ast!(Assign(name, Box::new(expr)), ast, ast) + } + Ternary(condition, true_expr, false_expr) => { + let condition = self.visit(*condition); + let true_expr = self.visit(*true_expr); + let false_expr = self.visit(*false_expr); + ast_from_ast!(Ternary(Box::new(condition), Box::new(true_expr), Box::new(false_expr)), ast, ast) + } + Subscript(array, index) => { + let array = self.visit(*array); + let index = self.visit(*index); + ast_from_ast!(Subscript(Box::new(array), Box::new(index)), ast, ast) + } + Binary(left, oper, right) => { + let left = self.visit(*left); + let right = self.visit(*right); + ast_from_ast!(Binary(Box::new(left), oper, Box::new(right)), ast, ast) + } + Unary(oper, right) => { + let right = self.visit(*right); + ast_from_ast!(Unary(oper, Box::new(right)), ast, ast) + } + Lit(ref lit) => { + let lit = lit.clone(); + match lit { + crate::ast::Lit::Ident(ref name) => { + let resolution = self.resolve_var(name); + if !resolution.0 { + self.errors.push(out_of_scope!(self, "Variable \"{}\" not in scope", name, ast)) + } else { + return AST { + ast_type: Lit(lit), + extensions: vec![Extension::Resolution(resolution.1)], + start_line: ast.start_line, + end_line: ast.end_line, + start_column: ast.start_column, + end_column: ast.end_column, + }; + } + } + _ => {} + } + ast_from_ast!(Lit(lit), ast, ast) + } + Call(ident, args) => { + if let Err(e) = self.resolve_function(&ident.name, args.len()) { + match e { + (ErrorKind::OutOfScope, _, _) => self.errors.push(out_of_scope!(self, "Function \"{}\" not in scope", ident.name, ast)), + (ErrorKind::IncorrectArity, Some(f_arity), Some(arity)) => { + self.errors.push( + KabelError::new( + ErrorKind::IncorrectArity, + format!("Function {} has {} argument, provided {}", ident.name, f_arity, arity), + ast.start_line, + ast.start_column, + collect_lines!(self.text[ast.start_line-1..ast.end_line-1]), + ) + ); + } + _ => { panic!("Returned invalid ErrorKind from resolve_function") }, + } + } + let mut n_args = Vec::new(); + for arg in args { + n_args.push(self.visit(arg)); + } + ast_from_ast!(Call(ident, n_args), ast, ast) + } + /*Member(left, right) => { + self.visit_member(*left, *right); + }*/ + _ => { panic!("not implemented") } // not implemented + } + } + // TODO: make visit_member not throw out of scope errors + /*pub fn visit_member(&mut self, left: AST, right: AST) { + self.visit(left); + self.visit(right); + }*/ + fn resolve_var(&self, name: &String) -> (bool, usize) { + for scope in self.symbol_table.iter().rev() { + if let Some((Symbol::Var, place)) = scope.get(name) { + return (true, *place); + } + } + (false, 0) + } + fn resolve_function(&mut self, name: &String, arity: usize) -> Result<(), (ErrorKind, Option, Option)>{ + for scope in self.symbol_table.iter().rev() { + if let Some((Symbol::Function(f_arity), _place)) = scope.get(name) { + if *f_arity == arity { + return Ok(()); + } else { + return Err((ErrorKind::IncorrectArity, Some(*f_arity), Some(arity))); + } + } + } + Err((ErrorKind::OutOfScope, None, None)) + } +} + +pub enum Symbol { + Var, + Function(usize), +} diff --git a/kabel/src/opcodes.rs b/kabel/src/opcodes.rs index 36fe3cf202dda9f8250f1d235e6a815eb7767661..727fb060f47d678bbb93a7824b50d8a0d8baacd1 100644 --- a/kabel/src/opcodes.rs +++ b/kabel/src/opcodes.rs @@ -1,7 +1,8 @@ #![allow(non_camel_case_types)] pub enum OpCode { - CONSTANT, + LOAD, + VAR, ADD, SUB, @@ -35,34 +36,35 @@ impl From for u8 { fn from(value: OpCode) -> Self { use OpCode::*; match value { - CONSTANT => 0x00, + LOAD => 0x00, + VAR => 0x01, - ADD => 0x01, - SUB => 0x02, - MUL => 0x03, - DIV => 0x04, - MOD => 0x05, - BITAND => 0x06, - BITXOR => 0x07, - BITOR => 0x08, - EQ => 0x09, - NE => 0x0A, - GR => 0x0B, - GE => 0x0C, - LS => 0x0D, - LE => 0x0E, - OR => 0x0F, - AND => 0x10, + ADD => 0x02, + SUB => 0x03, + MUL => 0x04, + DIV => 0x05, + MOD => 0x06, + BITAND => 0x07, + BITXOR => 0x08, + BITOR => 0x09, + EQ => 0x010, + NE => 0x0A, + GR => 0x0B, + GE => 0x0C, + LS => 0x0D, + LE => 0x0E, + OR => 0x0F, + AND => 0x11, - NOT => 0x11, - NEG => 0x12, + NOT => 0x12, + NEG => 0x13, - JMP => 0x13, - IF_NE => 0x14, + JMP => 0x14, + IF_NE => 0x15, - POP => 0xFD, - PRINT => 0xFE, - ERR => 0xFF + POP => 0xFD, + PRINT => 0xFE, + ERR => 0xFF } } } @@ -70,30 +72,31 @@ impl From for OpCode { fn from(value: u8) -> Self { use OpCode::*; match value { - 0x00 => CONSTANT, + 0x00 => LOAD, + 0x01 => VAR, - 0x01 => ADD, - 0x02 => SUB, - 0x03 => MUL, - 0x04 => DIV, - 0x05 => MOD, - 0x06 => BITAND, - 0x07 => BITXOR, - 0x08 => BITOR, - 0x09 => EQ, - 0x0A => NE, - 0x0B => GR, - 0x0C => GE, - 0x0D => LS, - 0x0E => LE, - 0x0F => OR, - 0x10 => AND, + 0x02 => ADD, + 0x03 => SUB, + 0x04 => MUL, + 0x05 => DIV, + 0x06 => MOD, + 0x07 => BITAND, + 0x08 => BITXOR, + 0x09 => BITOR, + 0x0A => EQ, + 0x0B => NE, + 0x0C => GR, + 0x0D => GE, + 0x0E => LS, + 0x0F => LE, + 0x10 => OR, + 0x11 => AND, - 0x11 => NOT, - 0x12 => NEG, + 0x12 => NOT, + 0x13 => NEG, - 0x13 => JMP, - 0x14 => IF_NE, + 0x14 => JMP, + 0x15 => IF_NE, 0xFD => POP, 0xFE => PRINT, diff --git a/kabel/src/parser.rs b/kabel/src/parser.rs index 77f9940112b432b9f5c56ae2b450f9948ca78c1a..eda624974629f4bb30020ab7f23aa018b2fa8dfd 100644 --- a/kabel/src/parser.rs +++ b/kabel/src/parser.rs @@ -1,6 +1,7 @@ use crate::{ - ast_from_ast, ast_from_ast_token, ast_from_token, ast_from_token_ast, collect_lines, error::{ErrorKind, KabelError}, lexer::{Token, TokenType}, lit, name, unexpected_token + ast_from_ast, ast_from_ast_token, ast_from_token, ast_from_token_ast, collect_lines, error::{ErrorKind, KabelError}, extension::Extension, lexer::{Token, TokenType}, lit, name, unexpected_token }; +use crate::ast::{AST, ASTType, BinOp, UnOp, Lit}; pub struct Parser { input: Vec, @@ -34,6 +35,7 @@ impl Parser { } AST { ast_type: ASTType::Program(program), + extensions: Vec::new(), start_line: 0, end_line: 0, start_column: 0, @@ -52,6 +54,7 @@ impl Parser { TokenType::Continue => self.continue_statement(), TokenType::If => self.if_statement(), TokenType::LeftBrace => self.block(), + TokenType::Var => self.declaration(), TokenType::Print => self.print_statement(), // REMOVE LATER _ => self.expression_statement(), } @@ -296,6 +299,35 @@ impl Parser { } } + pub fn declaration(&mut self) -> Result { + let var = self.read_token()?; + let ident = self.read_token()?; + if let TokenType::Ident(name) = ident.token_type { + let equal = self.read_token()?; + if let TokenType::Equal = equal.token_type { + let expr = self.expression()?; + let semicolon = self.read_token()?; + if let TokenType::Semicolon = semicolon.token_type { + return Ok(ast_from_token!( + ASTType::Decl(name!(name, ident), Box::new(expr.clone())), + var, + semicolon + )); + } else { + return Err(unexpected_token!(self, "Expected ; found {}", equal)); + } + } else { + return Err(unexpected_token!(self, "Expected = found {}", equal)); + } + } else { + return Err(unexpected_token!( + self, + "Expected identifier found {}", + ident + )); + } + } + // REMOVE LATER pub fn print_statement(&mut self) -> Result { let print_ident = self.read_token()?; @@ -331,37 +363,10 @@ impl Parser { } pub fn expression(&mut self) -> Result { - if let TokenType::Var = self.peek()?.token_type { - return self.declaration(); - } let assignment = self.assignment()?; Ok(assignment) } - pub fn declaration(&mut self) -> Result { - let var = self.read_token()?; - let ident = self.read_token()?; - if let TokenType::Ident(name) = ident.token_type { - let equal = self.read_token()?; - if let TokenType::Equal = equal.token_type { - let expr = self.expression()?; - return Ok(ast_from_token_ast!( - ASTType::Decl(name!(name, ident), Box::new(expr.clone())), - var, - expr - )); - } else { - return Err(unexpected_token!(self, "Expected = found {}", equal)); - } - } else { - return Err(unexpected_token!( - self, - "Expected identifier found {}", - ident - )); - } - } - pub fn assignment(&mut self) -> Result { if let TokenType::Ident(name) = self.peek()?.token_type { let ident = self.read_token()?; @@ -1057,92 +1062,3 @@ impl Parser { return Ok(self.input[self.current].clone()); } } - -#[derive(Debug, Clone)] -pub struct AST { - pub ast_type: ASTType, - pub start_line: usize, - pub end_line: usize, - pub start_column: usize, - pub end_column: usize, -} - -#[derive(Debug, Clone)] -pub enum ASTType { - Program(Vec), - - // statements - Function(Name, Vec, Box), // name, args, block - Return(Box>), // expression - Loop(Box), // block - While(Box, Box), // condition, block - For( - Box>, - Box>, - Box>, - Box, - ), // expr1, expr2, expr3, block - Break, - Continue, - If(Box, Box, Box>), // condition, block, else/else if - Block(Vec), // statements - Expr(Box), // expr - // REMOVE LATER - Print(Box), - - // expressions - Decl(Name, Box), // identifier, expression - Assign(Name, Box), - Ternary(Box, Box, Box), - Subscript(Box, Box), - Binary(Box, BinOp, Box), - Unary(UnOp, Box), - - // primary - Lit(Lit), - Call(Name, Vec), - Member(Box, Box), -} - -#[derive(Debug, Clone)] -pub struct Name { - pub name: String, - pub start_column: usize, - pub end_column: usize, - pub line: usize, -} - -#[derive(Debug, Clone)] -pub enum Lit { - Ident(String), - Num(f32), - Str(String), - Bool(bool), - Array(Vec), -} - -#[derive(Debug, Clone, Copy)] -pub enum BinOp { - Add, - Sub, - Mul, - Div, - Mod, - BitAnd, - BitXor, - BitOr, - Eq, - Ne, - Gr, - Ge, - Ls, - Le, - Or, - And, -} - -#[derive(Debug, Clone, Copy)] -pub enum UnOp { - Not, - Neg, -} diff --git a/kabel/src/semantic_analysis.rs b/kabel/src/semantic_analysis.rs deleted file mode 100644 index 5eaf83f0c0c187a8e64d38085d3e77164027acb1..0000000000000000000000000000000000000000 --- a/kabel/src/semantic_analysis.rs +++ /dev/null @@ -1,222 +0,0 @@ -use std::collections::HashMap; - -use crate::{collect_lines, error::{ErrorKind, KabelError}, out_of_scope, out_of_scope_var, parser::{ASTType, Lit, Name, AST}}; - -pub struct Analyzer { - text: Vec, - symbol_table: Vec>, - pub errors: Vec, -} - -impl Analyzer { - pub fn new(text: String) -> Self { - Self { - text: text.lines().collect::>().iter().map(|s| s.to_string()).collect(), - symbol_table: vec![HashMap::new()], - errors: Vec::new(), - } - } - pub fn visit(&mut self, ast: AST) { - use ASTType::*; - match ast.ast_type { - Program(asts) => { - self.visit_program(asts); - } - Function(name, args, block) => { - self.visit_function(name, args, *block); - } - Return(expr) => { - self.visit_return(*expr); - } - Loop(block) => { - self.visit_loop(*block); - } - While(condition, block) => { - self.visit_while(*condition, *block); - } - For(expr1, expr2, expr3, block) => { - self.visit_for(*expr1, *expr2, *expr3, *block); - } - If(condition, block, else_expr) => { - self.visit_if(*condition, *block, *else_expr); - } - Block(stmts) => { - self.visit_block(stmts); - } - Expr(expr) => { - self.visit(*expr); - } - // REMOVE LATER - Print(expr) => { - self.visit_print(*expr); - } - Decl(name, expr) => { - self.visit_decl(name, *expr); - } - Assign(ref name, ref expr) => { - self.visit_assign(ast.clone(), name.clone(), *expr.clone()); - } - Ternary(condition, true_expr, false_expr) => { - self.visit_ternary(*condition, *true_expr, *false_expr); - } - Subscript(array, index) => { - self.visit_subscript(*array, *index); - } - Binary(left, _oper, right) => { - self.visit_binary(*left, *right); - } - Unary(_oper, right) => { - self.visit_unary(*right); - } - Lit(ref lit) => { - self.visit_lit(ast.clone(), lit.clone()); - } - Call(ref ident, ref args) => { - self.visit_call(ast.clone(), ident.clone(), args.clone()); - } - /*Member(left, right) => { - self.visit_member(*left, *right); - }*/ - _ => {} // not implemented - } - } - pub fn visit_program(&mut self, asts: Vec) { - for ast in asts { - self.visit(ast); - } - } - pub fn visit_function(&mut self, name: Name, args: Vec, block: AST) { - self.symbol_table.last_mut().unwrap().insert(name.name.clone(), Symbol::Function(args.len())); - self.symbol_table.push(HashMap::new()); - for arg in args { - self.symbol_table.last_mut().unwrap().insert(arg.name, Symbol::Var); - } - self.visit(block); - self.symbol_table.pop(); - } - pub fn visit_return(&mut self, expr: Option) { - if let Some(expr) = expr { - self.visit(expr); - } - } - pub fn visit_loop(&mut self, block: AST) { - self.visit(block); - } - pub fn visit_while(&mut self, condition: AST, block: AST) { - self.visit(condition); - self.visit(block); - } - pub fn visit_for(&mut self, expr1: Option, expr2: Option, expr3: Option, block: AST) { - if let Some(expr) = expr1 { - self.visit(expr); - } - if let Some(expr) = expr2 { - self.visit(expr); - } - if let Some(expr) = expr3 { - self.visit(expr); - } - self.visit(block); - } - pub fn visit_if(&mut self, condition: AST, block: AST, else_expr: Option) { - self.visit(condition); - self.visit(block); - if let Some(else_expr) = else_expr { - self.visit(else_expr); - } - } - pub fn visit_block(&mut self, stmts: Vec) { - self.symbol_table.push(HashMap::new()); - for stmt in stmts { - self.visit(stmt); - } - self.symbol_table.pop(); - } - // REMOVE LATER - pub fn visit_print(&mut self, expr: AST) { - self.visit(expr); - } - pub fn visit_decl(&mut self, name: Name, expr: AST) { - self.visit(expr); - self.symbol_table.last_mut().unwrap().insert(name.name, Symbol::Var); - } - pub fn visit_assign(&mut self, ast: AST, name: Name, expr: AST) { - self.visit(expr.clone()); - if !self.symbol_table.last().unwrap().contains_key(&name.name) { - self.errors.push(out_of_scope_var!(self, "Variable \"{}\" not in scope", name, ast)) - } - } - pub fn visit_ternary(&mut self, condition: AST, true_expr: AST, false_expr: AST) { - self.visit(condition); - self.visit(true_expr); - self.visit(false_expr); - } - pub fn visit_subscript(&mut self, array: AST, index: AST) { - self.visit(array); - self.visit(index); - } - pub fn visit_binary(&mut self, left: AST, right: AST) { - self.visit(left); - self.visit(right); - } - pub fn visit_unary(&mut self, right: AST) { - self.visit(right); - } - pub fn visit_lit(&mut self, ast: AST, lit: Lit) { - match lit { - Lit::Ident(name) => { - if !self.resolve_var(&name) { - self.errors.push(out_of_scope!(self, "Variable \"{}\" not in scope", name, ast)) - } - } - _ => {} - } - } - pub fn visit_call(&mut self, ast: AST, ident: Name, args: Vec) { - if !self.resolve_function(&ast, &ident.name, args.len()) { - self.errors.push(out_of_scope!(self, "Function \"{}\" not in scope", ident.name, ast)) - } - for arg in args { - self.visit(arg); - } - } - // TODO: make visit_member not throw out of scope errors - /*pub fn visit_member(&mut self, left: AST, right: AST) { - self.visit(left); - self.visit(right); - }*/ - fn resolve_var(&self, name: &String) -> bool { - for scope in self.symbol_table.iter().rev() { - if matches!(scope.get(name), Some(Symbol::Var)) { - return true; - } - } - false - } - fn resolve_function(&mut self, ast: &AST, name: &String, arity: usize) -> bool { - for scope in self.symbol_table.iter().rev() { - if let Some(Symbol::Function(f_arity)) = scope.get(name) { - if *f_arity == arity { - return true; - } else { - self.errors.push( - KabelError::new( - ErrorKind::OutOfScope, - format!("Function {} has {} argument, provided {}", name, *f_arity, arity), - ast.start_line, - ast.start_column, - collect_lines!(self.text[ast.start_line-1..ast.end_line-1]), - ) - ); - return true; - } - } - } - false - } -} - -pub enum Symbol { - Var, - Function(usize), -} diff --git a/kabel/src/vm.rs b/kabel/src/vm.rs index 6b4863bf7474e5c78ece4610cdbcd6b90c4b6e1d..d5939b9d5a81787735294cf4099c8b0a6dfaf415 100644 --- a/kabel/src/vm.rs +++ b/kabel/src/vm.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use crate::{mismatched_types, runtime_error::KabelRuntimeError, vm_boolean_binary, wrong_type}; pub struct VM { @@ -5,17 +7,20 @@ pub struct VM { pub chunk: Vec, pub stack: Vec, pub pool: Vec, + pub local: HashMap, // (name, stack pointer) pub lines: Vec<(usize, usize)>, // line #, repeats number of instructions text: Vec } impl VM { - pub fn new(bytecode: Vec, lines: Vec<(usize, usize)>, pool: Vec, text: String) -> Self { + pub fn new(bytecode: Vec, lines: Vec<(usize, usize)>, pool: Vec, + local: HashMap, text: String) -> Self { Self { ip: 0, chunk: bytecode, stack: Vec::new(), pool, + local, lines, text: text.lines().map(|s| s.to_string()).collect(), } @@ -24,11 +29,15 @@ impl VM { use Value::*; while self.ip < self.chunk.len() { match self.read() { - 0x00 => { // CONSTANT + 0x00 => { // LOAD let byte = self.read() as usize; self.stack.push(self.pool[byte].clone()); } - 0x01 => { // ADD + 0x01 => { // VAR + let ptr = self.read() as usize; + self.stack.push(self.stack[ptr].clone()); + } + 0x02 => { // ADD match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => self.stack.push(Num(v1 + v2)), (Str(v1), Str(v2)) => { @@ -42,7 +51,7 @@ impl VM { } } } - 0x02 => { // SUB + 0x03 => { // SUB match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => self.stack.push(Num(v1 - v2)), (Str(_v1), Str(_v2)) => { @@ -56,7 +65,7 @@ impl VM { } } } - 0x03 => { // MUL + 0x04 => { // MUL match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => self.stack.push(Num(v1 * v2)), (Str(v1), Num(v2)) => { @@ -77,7 +86,7 @@ impl VM { } } } - 0x04 => { // DIV + 0x05 => { // DIV match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => self.stack.push(Num(v1 / v2)), (Str(_v1), Str(_v2)) => { @@ -91,7 +100,7 @@ impl VM { } } } - 0x05 => { // MOD + 0x06 => { // MOD match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => self.stack.push(Num(v1 % v2)), (Str(_v1), Str(_v2)) => { @@ -105,7 +114,7 @@ impl VM { } } } - 0x06 => { // BITAND + 0x07 => { // BITAND match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => { if v1.fract() != 0.0 { @@ -127,7 +136,7 @@ impl VM { } } } - 0x07 => { // BITXOR + 0x08 => { // BITXOR match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => { if v1.fract() != 0.0 { @@ -149,7 +158,7 @@ impl VM { } } } - 0x08 => { // BITOR + 0x09 => { // BITOR match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(v1), Num(v2)) => { if v1.fract() != 0.0 { @@ -172,19 +181,19 @@ impl VM { } } // EQ - 0x09 => vm_boolean_binary!(self, ==), + 0x0a => vm_boolean_binary!(self, ==), // NE - 0x0A => vm_boolean_binary!(self, !=), + 0x0B => vm_boolean_binary!(self, !=), // GR - 0x0B => vm_boolean_binary!(self, >), + 0x0C => vm_boolean_binary!(self, >), // GE - 0x0C => vm_boolean_binary!(self, >=), + 0x0D => vm_boolean_binary!(self, >=), // LS - 0x0D => vm_boolean_binary!(self, <), + 0x0E => vm_boolean_binary!(self, <), // LE - 0x0E => vm_boolean_binary!(self, <=), + 0x0F => vm_boolean_binary!(self, <=), // OR - 0x0F => match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { + 0x10 => match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(_v1), Num(_v2)) => { return Err(wrong_type!(self, "Cannot perform logical OR on numbers")) } @@ -197,7 +206,7 @@ impl VM { } } // AND - 0x10 => match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { + 0x11 => match (self.stack.pop().unwrap(), self.stack.pop().unwrap()) { (Num(_v1), Num(_v2)) => { return Err(wrong_type!(self, "Cannot perform logical AND on numbers")) } @@ -210,7 +219,7 @@ impl VM { } } // NOT - 0x11 => match self.stack.pop().unwrap() { + 0x12 => match self.stack.pop().unwrap() { Num(_v1) => { return Err(wrong_type!(self, "Cannot perform logical NOT on numbers")) } @@ -220,7 +229,7 @@ impl VM { Bool(v1) => self.stack.push(Bool(!v1)), } // NEG - 0x12 => match self.stack.pop().unwrap() { + 0x13 => match self.stack.pop().unwrap() { Num(v1) => self.stack.push(Num(-v1)), Str(_v1) => { return Err(wrong_type!(self, "Cannot negate strings")) @@ -230,12 +239,12 @@ impl VM { } } // JMP - 0x13 => { + 0x14 => { let loc = self.read_u16(); self.ip += loc as usize; } // IF_NE - 0x14 => { + 0x15 => { let condition = self.stack.pop().unwrap(); if let Value::Bool(condition) = condition { if !condition { diff --git a/kabel/test/syntax/assignment.out b/kabel/test/syntax/assignment.out index 44732e1230001d16b49ba0c4f7e325437b821be6..69ba98f1fd3135a93b872c29cfdd8af48ba01736 100644 --- a/kabel/test/syntax/assignment.out +++ b/kabel/test/syntax/assignment.out @@ -1,10 +1,8 @@ Program -| Expr -| | Decl i -| | | Lit 0 +| Decl i +| | Lit 0 | Expr | | Assign i | | | Lit string -| Expr -| | Decl _foo -| | | Lit 3 +| Decl _foo +| | Lit 3