diff options
Diffstat (limited to 'lambda-calcul/rust/src/parser.rs')
| -rw-r--r-- | lambda-calcul/rust/src/parser.rs | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/lambda-calcul/rust/src/parser.rs b/lambda-calcul/rust/src/parser.rs new file mode 100644 index 0000000..52aad5a --- /dev/null +++ b/lambda-calcul/rust/src/parser.rs @@ -0,0 +1,392 @@ +use crate::ast::*; + +#[derive(Debug, PartialEq)] +enum Token { + LParen, + RParen, + Lambda, + Word(String), + Define, + Let, +} + +#[derive(Debug)] +struct Parser { + tokens: Vec<Token>, + index: usize, +} + +impl Parser { + fn expect(&mut self, token: Token) -> Result<(), String> { + if self.tokens.get(self.index) == Some(&token) { + Ok(()) + } else { + Err(format!( + "Expected {:?}, got {:?}", + token, + self.tokens.get(self.index) + )) + } + .map(|_| { + self.next(); + }) + } + + fn expect_symbol(&mut self) -> Result<String, String> { + if let Token::Word(s) = self.tokens.get(self.index).ok_or("Expected a symbol")? { + Ok(s.clone()) + } else { + Err("Expected a symbol".to_string()) + } + .map(|s| { + self.next(); + s + }) + } + + fn next(&mut self) { + self.index += 1; + } + + fn backtrack(&mut self) { + self.index -= 1; + } +} + +pub fn parse(arg: &str) -> Vec<Value> { + parse_total(arg) + .map_err(|e| panic!("Syntax error: {}", e)) + .unwrap() +} + +pub fn parse_total(arg: &str) -> Result<Vec<Value>, String> { + let tokens = tokenize(arg); + let mut parser = Parser { tokens, index: 0 }; + let mut result = Vec::new(); + while parser.index < parser.tokens.len() { + let expr = parse_toplevel(&mut parser)?; + result.push(expr); + } + Ok(result) +} + +fn parse_toplevel(parser: &mut Parser) -> Result<Value, String> { + parse_definition(parser).or_else(|_| parse_expression(parser)) +} + +fn parse_definition(parser: &mut Parser) -> Result<Value, String> { + parser.expect(Token::LParen)?; + parser.expect(Token::Define).map_err(|e| { + parser.backtrack(); + e.to_string() + })?; + let var = parse_variable(parser)?; + let body = parse_expression(parser)?; + parser.expect(Token::RParen)?; + Ok(Value::Def(var, Box::new(body))) +} + +fn parse_expression(parser: &mut Parser) -> Result<Value, String> { + parse_value(parser) + .or_else(|_| parse_abstraction(parser)) + .or_else(|_| parse_let(parser)) + .or_else(|_| parse_application(parser)) +} + +fn parse_abstraction(parser: &mut Parser) -> Result<Value, String> { + parser.expect(Token::LParen)?; + parser.expect(Token::Lambda).map_err(|e| { + parser.backtrack(); + e.to_string() + })?; + let vars = parse_variables(parser)?; + let body = parse_expression(parser)?; + parser.expect(Token::RParen)?; + let result = vars + .iter() + .rev() + .fold(body, |acc, var| Value::Lam(var.clone(), Box::new(acc))); + Ok(result) +} + +fn parse_variables(parser: &mut Parser) -> Result<Vec<String>, String> { + parse_variable(parser) + .map(|s| vec![s]) + .or_else(|_| parse_variables_list(parser)) +} + +fn parse_variables_list(parser: &mut Parser) -> Result<Vec<String>, String> { + let mut vars = Vec::new(); + parser.expect(Token::LParen)?; + while let Ok(var) = parse_variable(parser) { + vars.push(var); + } + parser.expect(Token::RParen)?; + Ok(vars) +} + +fn parse_variable(parser: &mut Parser) -> Result<String, String> { + let var = parser.expect_symbol()?; + Ok(var) +} + +fn parse_let(parser: &mut Parser) -> Result<Value, String> { + parser.expect(Token::LParen)?; + parser.expect(Token::Let).map_err(|e| { + parser.backtrack(); + e.to_string() + })?; + parser.expect(Token::LParen)?; + let var = parse_variable(parser)?; + let body = parse_expression(parser)?; + parser.expect(Token::RParen)?; + let expr = parse_expression(parser)?; + parser.expect(Token::RParen)?; + Ok(Value::Let(var, Box::new(body), Box::new(expr))) +} + +fn parse_application(parser: &mut Parser) -> Result<Value, String> { + parser.expect(Token::LParen)?; + let init = parse_expression(parser)?; + let mut exprs = Vec::new(); + while let Ok(expr) = parse_expression(parser) { + exprs.push(expr); + } + if exprs.is_empty() { + return Err("Application needs two values".to_string()); + } + parser.expect(Token::RParen)?; + let app: Value = exprs.iter().fold(init, |acc, expr| { + Value::App(Box::new(acc.clone()), Box::new(expr.clone())) + }); + Ok(app.to_owned()) +} + +fn parse_value(parser: &mut Parser) -> Result<Value, String> { + let token = parser.tokens.get(parser.index).ok_or("Expected a value")?; + let val = parse_number(token) + .or_else(|_| parse_bool(token)) + .or_else(|_| parse_symbol(token))?; + parser.next(); + Ok(val) +} + +fn tokenize(arg: &str) -> Vec<Token> { + let mut result = Vec::new(); + let mut word = String::new(); + + for c in arg.chars() { + match c { + '(' => { + terminate(&mut result, &mut word); + result.push(Token::LParen) + } + ')' => { + terminate(&mut result, &mut word); + result.push(Token::RParen) + } + c if c.is_whitespace() => terminate(&mut result, &mut word), + c => word.push(c), + } + } + terminate(&mut result, &mut word); + result +} + +fn terminate(result: &mut Vec<Token>, word: &mut String) { + if !word.is_empty() { + let w = word.clone(); + if w == "lam" { + result.push(Token::Lambda); + } else if w == "def" { + result.push(Token::Define); + } else if w == "let" { + result.push(Token::Let); + } else { + result.push(Token::Word(w)); + } + word.clear(); + } +} + +fn parse_symbol(token: &Token) -> Result<Value, String> { + match token { + Token::Word(s) => Ok(Value::Sym(s.clone())), + _ => Err(format!("Expected a symbol, got {:?}", token)), + } +} + +fn parse_bool(token: &Token) -> Result<Value, String> { + match token { + Token::Word(s) => s + .parse::<bool>() + .map(Value::Bool) + .map_err(|e| e.to_string()), + _ => Err("Expected a boolean".to_string()), + } +} + +fn parse_number(token: &Token) -> Result<Value, String> { + match token { + Token::Word(s) => s.parse::<i32>().map(Value::Num).map_err(|e| e.to_string()), + _ => Err("Expected an integer".to_string()), + } +} + +#[cfg(test)] +mod tests { + use super::parse_total; + + use super::Token::*; + use super::Value; + use super::Value::*; + use super::{parse, tokenize}; + use proptest::prelude::*; + + proptest! { + #[test] + fn parse_integer_as_number(i in -1000i32..1000) { + let result = parse(&i.to_string()); + assert_eq!(vec![Num(i)], result); + } + + } + + #[test] + fn parse_truth_values_as_booleans() { + assert_eq!(vec![Bool(true)], parse("true")); + assert_eq!(vec![Bool(false)], parse("false")); + } + + #[test] + fn parse_identifiers_values_as_symbols() { + assert_eq!(vec![Sym("foo".to_string())], parse("foo")); + } + + #[test] + fn ignores_whitespace() { + assert_eq!(vec![Sym("foo".to_string())], parse(" foo \n\r")); + assert_eq!(vec![Num(-42)], parse("\n-42")); + } + + #[test] + fn tokenize_several_values() { + assert_eq!( + vec![ + Word("42".to_string()), + Word("foo".to_string()), + Word("true".to_string()) + ], + tokenize("42 foo \ntrue ") + ); + } + + #[test] + fn tokenize_string_with_parens() { + assert_eq!( + vec![ + LParen, + LParen, + RParen, + Word("42".to_string()), + RParen, + Word("true".to_string()), + LParen, + ], + tokenize("( \r() 42) \ntrue( ") + ); + } + + #[test] + fn tokenize_lambda_symbol() { + assert_eq!(vec![Lambda, LParen,], tokenize("lam (")); + } + + #[test] + fn parse_application_of_two_values() { + assert_eq!( + vec![App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))], + parse("(foo 42)") + ); + } + + #[test] + fn reject_application_of_single_value() { + assert_eq!( + Err("Application needs two values".to_string()), + parse_total("(foo )") + ); + } + + #[test] + fn desugar_application_of_more_than_two_values() { + assert_eq!( + vec![App( + Box::new(App( + Box::new(App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))), + Box::new(Bool(true)) + )), + Box::new(Sym("f".to_string())) + )], + parse("(foo 42 true f)") + ); + } + + #[test] + fn parse_abstraction() { + assert_eq!( + vec![Lam( + "x".to_string(), + Box::new(App( + Box::new(Sym("x".to_string())), + Box::new(Sym("x".to_string())) + )) + )], + parse("(lam x (x x))") + ); + } + + #[test] + fn desugar_abstraction_with_several_variables_into_nested_lambdas() { + assert_eq!( + vec![Lam( + "x".to_string(), + Box::new(Lam("y".to_string(), Box::new(Sym("y".to_string())))) + )], + parse("(lam (x y) y)") + ); + } + + #[test] + fn parse_definition() { + assert_eq!( + vec![Def("x".to_string(), Box::new(Num(12)))], + parse("(def x 12)") + ); + } + + #[test] + fn parse_multiple_values() { + assert_eq!(vec![Sym("foo".to_string()), Num(42)], parse("foo 42")); + } + + #[test] + fn parse_let_expressions() { + assert_eq!( + vec![Value::Let( + "x".to_string(), + Box::new(Num(12)), + Box::new(Sym("x".to_string())) + )], + parse("(let (x 12) x)") + ); + } + + proptest! { + #[test] + fn parse_is_inverse_to_display(values in any::<Vec<Value>>()) { + let result : Vec<String> = values.iter().map(|v:&Value| v.to_string()).collect(); + assert_eq!(values, result.iter().flat_map(|s| parse(s)).collect::<Vec<Value>>()); + } + } +} |
