use crate::ast::*; #[derive(Debug, PartialEq)] enum Token { LParen, RParen, Lambda, Word(String), Define, Let, } #[derive(Debug)] struct Parser { tokens: Vec, index: usize, } impl Parser { fn expect(&mut self, token: Token) -> Result<(), String> { if self.tokens.get(self.index) == Some(&token) { Ok(()) } else { Err(format!( "Expected {:?}, got {:?}", token, self.tokens.get(self.index) )) } .map(|_| { self.next(); }) } fn expect_symbol(&mut self) -> Result { if let Token::Word(s) = self.tokens.get(self.index).ok_or("Expected a symbol")? { Ok(s.clone()) } else { Err("Expected a symbol".to_string()) } .map(|s| { self.next(); s }) } fn next(&mut self) { self.index += 1; } fn backtrack(&mut self) { self.index -= 1; } } pub fn parse(arg: &str) -> Vec { parse_total(arg) .map_err(|e| panic!("Syntax error: {}", e)) .unwrap() } pub fn parse_total(arg: &str) -> Result, String> { let tokens = tokenize(arg); let mut parser = Parser { tokens, index: 0 }; let mut result = Vec::new(); while parser.index < parser.tokens.len() { let expr = parse_toplevel(&mut parser)?; result.push(expr); } Ok(result) } fn parse_toplevel(parser: &mut Parser) -> Result { parse_definition(parser).or_else(|_| parse_expression(parser)) } fn parse_definition(parser: &mut Parser) -> Result { parser.expect(Token::LParen)?; parser.expect(Token::Define).map_err(|e| { parser.backtrack(); e.to_string() })?; let var = parse_variable(parser)?; let body = parse_expression(parser)?; parser.expect(Token::RParen)?; Ok(Value::Def(var, Box::new(body))) } fn parse_expression(parser: &mut Parser) -> Result { parse_value(parser) .or_else(|_| parse_abstraction(parser)) .or_else(|_| parse_let(parser)) .or_else(|_| parse_application(parser)) } fn parse_abstraction(parser: &mut Parser) -> Result { parser.expect(Token::LParen)?; parser.expect(Token::Lambda).map_err(|e| { parser.backtrack(); e.to_string() })?; let vars = parse_variables(parser)?; let body = parse_expression(parser)?; parser.expect(Token::RParen)?; let result = vars .iter() .rev() .fold(body, |acc, var| Value::Lam(var.clone(), Box::new(acc))); Ok(result) } fn parse_variables(parser: &mut Parser) -> Result, String> { parse_variable(parser) .map(|s| vec![s]) .or_else(|_| parse_variables_list(parser)) } fn parse_variables_list(parser: &mut Parser) -> Result, String> { let mut vars = Vec::new(); parser.expect(Token::LParen)?; while let Ok(var) = parse_variable(parser) { vars.push(var); } parser.expect(Token::RParen)?; Ok(vars) } fn parse_variable(parser: &mut Parser) -> Result { let var = parser.expect_symbol()?; Ok(var) } fn parse_let(parser: &mut Parser) -> Result { parser.expect(Token::LParen)?; parser.expect(Token::Let).map_err(|e| { parser.backtrack(); e.to_string() })?; parser.expect(Token::LParen)?; let var = parse_variable(parser)?; let body = parse_expression(parser)?; parser.expect(Token::RParen)?; let expr = parse_expression(parser)?; parser.expect(Token::RParen)?; Ok(Value::Let(var, Box::new(body), Box::new(expr))) } fn parse_application(parser: &mut Parser) -> Result { parser.expect(Token::LParen)?; let init = parse_expression(parser)?; let mut exprs = Vec::new(); while let Ok(expr) = parse_expression(parser) { exprs.push(expr); } if exprs.is_empty() { return Err("Application needs two values".to_string()); } parser.expect(Token::RParen)?; let app: Value = exprs.iter().fold(init, |acc, expr| { Value::App(Box::new(acc.clone()), Box::new(expr.clone())) }); Ok(app.to_owned()) } fn parse_value(parser: &mut Parser) -> Result { let token = parser.tokens.get(parser.index).ok_or("Expected a value")?; let val = parse_number(token) .or_else(|_| parse_bool(token)) .or_else(|_| parse_symbol(token))?; parser.next(); Ok(val) } fn tokenize(arg: &str) -> Vec { let mut result = Vec::new(); let mut word = String::new(); for c in arg.chars() { match c { '(' => { terminate(&mut result, &mut word); result.push(Token::LParen) } ')' => { terminate(&mut result, &mut word); result.push(Token::RParen) } c if c.is_whitespace() => terminate(&mut result, &mut word), c => word.push(c), } } terminate(&mut result, &mut word); result } fn terminate(result: &mut Vec, word: &mut String) { if !word.is_empty() { let w = word.clone(); if w == "lam" { result.push(Token::Lambda); } else if w == "def" { result.push(Token::Define); } else if w == "let" { result.push(Token::Let); } else { result.push(Token::Word(w)); } word.clear(); } } fn parse_symbol(token: &Token) -> Result { match token { Token::Word(s) => Ok(Value::Sym(s.clone())), _ => Err(format!("Expected a symbol, got {:?}", token)), } } fn parse_bool(token: &Token) -> Result { match token { Token::Word(s) => s .parse::() .map(Value::Bool) .map_err(|e| e.to_string()), _ => Err("Expected a boolean".to_string()), } } fn parse_number(token: &Token) -> Result { match token { Token::Word(s) => s.parse::().map(Value::Num).map_err(|e| e.to_string()), _ => Err("Expected an integer".to_string()), } } #[cfg(test)] mod tests { use super::parse_total; use super::Token::*; use super::Value; use super::Value::*; use super::{parse, tokenize}; use proptest::prelude::*; proptest! { #[test] fn parse_integer_as_number(i in -1000i32..1000) { let result = parse(&i.to_string()); assert_eq!(vec![Num(i)], result); } } #[test] fn parse_truth_values_as_booleans() { assert_eq!(vec![Bool(true)], parse("true")); assert_eq!(vec![Bool(false)], parse("false")); } #[test] fn parse_identifiers_values_as_symbols() { assert_eq!(vec![Sym("foo".to_string())], parse("foo")); } #[test] fn ignores_whitespace() { assert_eq!(vec![Sym("foo".to_string())], parse(" foo \n\r")); assert_eq!(vec![Num(-42)], parse("\n-42")); } #[test] fn tokenize_several_values() { assert_eq!( vec![ Word("42".to_string()), Word("foo".to_string()), Word("true".to_string()) ], tokenize("42 foo \ntrue ") ); } #[test] fn tokenize_string_with_parens() { assert_eq!( vec![ LParen, LParen, RParen, Word("42".to_string()), RParen, Word("true".to_string()), LParen, ], tokenize("( \r() 42) \ntrue( ") ); } #[test] fn tokenize_lambda_symbol() { assert_eq!(vec![Lambda, LParen,], tokenize("lam (")); } #[test] fn parse_application_of_two_values() { assert_eq!( vec![App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))], parse("(foo 42)") ); } #[test] fn reject_application_of_single_value() { assert_eq!( Err("Application needs two values".to_string()), parse_total("(foo )") ); } #[test] fn desugar_application_of_more_than_two_values() { assert_eq!( vec![App( Box::new(App( Box::new(App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))), Box::new(Bool(true)) )), Box::new(Sym("f".to_string())) )], parse("(foo 42 true f)") ); } #[test] fn parse_abstraction() { assert_eq!( vec![Lam( "x".to_string(), Box::new(App( Box::new(Sym("x".to_string())), Box::new(Sym("x".to_string())) )) )], parse("(lam x (x x))") ); } #[test] fn desugar_abstraction_with_several_variables_into_nested_lambdas() { assert_eq!( vec![Lam( "x".to_string(), Box::new(Lam("y".to_string(), Box::new(Sym("y".to_string())))) )], parse("(lam (x y) y)") ); } #[test] fn parse_definition() { assert_eq!( vec![Def("x".to_string(), Box::new(Num(12)))], parse("(def x 12)") ); } #[test] fn parse_multiple_values() { assert_eq!(vec![Sym("foo".to_string()), Num(42)], parse("foo 42")); } #[test] fn parse_let_expressions() { assert_eq!( vec![Value::Let( "x".to_string(), Box::new(Num(12)), Box::new(Sym("x".to_string())) )], parse("(let (x 12) x)") ); } impl Arbitrary for Value { type Parameters = (); type Strategy = BoxedStrategy; fn arbitrary_with(_args: ()) -> Self::Strategy { let identifier = "\\pL(\\pL|\\pN)*"; let leaf = prop_oneof![ any::().prop_map(Num), any::().prop_map(Bool), // see https://unicode.org/reports/tr18/#General_Category_Property for one letter unicode categories identifier.prop_map(Sym), ]; let expr = leaf.prop_recursive(4, 128, 5, move |inner| { prop_oneof![ (inner.clone(), inner.clone()).prop_map(|(l, r)| App(Box::new(l), Box::new(r))), (identifier, inner.clone()).prop_map(|(var, body)| Lam(var, Box::new(body))), (identifier, inner.clone(), inner.clone()).prop_map(|(var, body, expr)| { Value::Let(var, Box::new(body), Box::new(expr)) }), ] }); prop_oneof![ expr.clone(), (identifier, expr).prop_map(|(var, body)| Def(var, Box::new(body))) ] .boxed() } } proptest! { #[test] fn parse_is_inverse_to_display(values in any::>()) { let result : Vec = values.iter().map(|v:&Value| v.to_string()).collect(); assert_eq!(values, result.iter().flat_map(|s| parse(s)).collect::>()); } } }