From 750511548b3d67f7d9dc73b6984104feaed7f5b7 Mon Sep 17 00:00:00 2001 From: ikanago <28985004+ikanago@users.noreply.github.com> Date: Thu, 21 May 2020 21:46:08 +0900 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Implement=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + src/lexer.rs | 10 ++--- src/lib.rs | 5 +-- src/main.rs | 9 ++-- src/parser.rs | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/syntax.rs | 8 ++-- 6 files changed, 140 insertions(+), 15 deletions(-) create mode 100644 src/parser.rs diff --git a/.gitignore b/.gitignore index 96ef6c0..eb1d4ef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target Cargo.lock +.vscode \ No newline at end of file diff --git a/src/lexer.rs b/src/lexer.rs index 9dd887d..65e570e 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,9 +1,9 @@ use std::collections::HashMap; use std::str::from_utf8; -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum Token { - Number(usize), + Number(u64), Identifier(String), True, False, @@ -17,8 +17,8 @@ pub enum Token { fn reserve_keyword() -> HashMap { let mut keywords = HashMap::new(); - keywords.insert("True".to_string(), Token::True); - keywords.insert("False".to_string(), Token::False); + keywords.insert("true".to_string(), Token::True); + keywords.insert("false".to_string(), Token::False); keywords.insert("if".to_string(), Token::If); keywords.insert("then".to_string(), Token::Then); keywords.insert("else".to_string(), Token::Else); @@ -79,7 +79,7 @@ impl<'a> Lexer<'a> { let end = self.read_many(|b| b"0123456789".contains(&b)); let num = from_utf8(&self.input[start..end]) .unwrap() - .parse::() + .parse::() .unwrap(); self.pos = end; Some(Token::Number(num)) diff --git a/src/lib.rs b/src/lib.rs index cc55ba9..6c91fed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,3 @@ pub mod lexer; pub mod syntax; - -pub fn test() { - println!("Hello"); -} +pub mod parser; diff --git a/src/main.rs b/src/main.rs index 8b81f6f..c6a528d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,10 @@ use mini_ml::lexer; +use mini_ml::parser; fn main() { - let mut lexer = lexer::Lexer::new("if true then 3 else 4;;"); - let tokens = lexer.lex(); - println!("{:?}", tokens); + let mut lexer = lexer::Lexer::new("if true then if false then 1 else 2 else 4;;"); + let tokens = lexer.lex().unwrap(); + let mut parser = parser::Parser::new(tokens); + let ast = parser.parse(); + println!("{:?}", ast); } \ No newline at end of file diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..22cfa40 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,122 @@ +use crate::lexer::Token; +use crate::syntax::{BinOpKind, Expr}; + +#[derive(Debug)] +pub enum ParseError { + Eof, + UnexpectedToken, +} + +#[derive(Clone, Debug)] +pub struct Parser<'a> { + tokens: &'a Vec, + pos: usize, +} + +impl<'a> Parser<'a> { + pub fn new(tokens: &'a Vec) -> Self { + Parser { tokens, pos: 0 } + } + + /// Take a look at a next token and return its kind. + fn peek(&self) -> Option<&Token> { + if self.tokens.len() == self.pos { + return None; + } + Some(&self.tokens[self.pos]) + } + + /// Return current token and move `pos` forward. + fn next(&mut self) -> Option { + if self.tokens.len() == self.pos { + return None; + } + let token = self.tokens[self.pos].clone(); + self.pos += 1; + Some(token) + } + + /// Check if a current token has expected type and proceed to next one. + fn expect_token(&mut self, expected_token: Token) -> Result<(), ParseError> { + self.next().ok_or(ParseError::Eof).and_then(|token| { + if token == expected_token { + Ok(()) + } else { + println!("{:?}", token); + println!("{:?}", expected_token); + Err(ParseError::UnexpectedToken) + } + }) + } + + /// Parse tokens and build AST. + pub fn parse(&mut self) -> Result, ParseError> { + let mut asts = Vec::new(); + loop { + let ast = self.parse_expr()?; + self.expect_token(Token::SemiColon)?; + asts.push(ast); + if self.peek().is_none() { + break; + } + } + Ok(asts) + } + + fn parse_expr(&mut self) -> Result { + match self.peek() { + Some(&Token::If) => self.parse_if(), + _ => self.parse_add(), + } + } + + fn parse_if(&mut self) -> Result { + self.expect_token(Token::If)?; + let condition = self.parse_expr()?; + self.expect_token(Token::Then)?; + let then = self.parse_expr()?; + self.expect_token(Token::Else)?; + let els = self.parse_expr()?; + Ok(Expr::If(Box::new(condition), Box::new(then), Box::new(els))) + } + + fn parse_add(&mut self) -> Result { + let mut lhs = self.parse_mul()?; + loop { + if self.peek() == Some(&Token::Plus) { + self.next(); + let rhs = self.parse_mul()?; + lhs = Expr::Binop(BinOpKind::Plus, Box::new(lhs), Box::new(rhs)); + } else { + break; + } + } + Ok(lhs) + } + + fn parse_mul(&mut self) -> Result { + let mut lhs = self.parse_primary()?; + loop { + if self.peek() == Some(&Token::Asterisk) { + self.next(); + let rhs = self.parse_primary()?; + lhs = Expr::Binop(BinOpKind::Plus, Box::new(lhs), Box::new(rhs)); + } else { + break; + } + } + Ok(lhs) + } + + fn parse_primary(&mut self) -> Result { + self.next() + .ok_or(ParseError::Eof) + .and_then(|token| match token { + Token::Number(n) => Ok(Expr::U64(n)), + Token::Identifier(var) => Ok(Expr::Var(var)), + Token::True => Ok(Expr::Bool(true)), + Token::False => Ok(Expr::Bool(false)), + _ => unimplemented!(), + }) + } +} diff --git a/src/syntax.rs b/src/syntax.rs index 2173ba0..6417ed6 100644 --- a/src/syntax.rs +++ b/src/syntax.rs @@ -1,13 +1,15 @@ -enum BinopKind { +#[derive(Debug)] +pub enum BinOpKind { Plus, Mult, } -enum Expr { +#[derive(Debug)] +pub enum Expr { Var(String), U64(u64), Bool(bool), - Binop(BinopKind, Box, Box), + Binop(BinOpKind, Box, Box), If(Box, Box, Box), }