diff options
| author | Arnaud Bailly <arnaud.bailly@iohk.io> | 2025-01-25 10:45:41 +0100 |
|---|---|---|
| committer | Arnaud Bailly <arnaud.bailly@iohk.io> | 2025-01-25 10:45:41 +0100 |
| commit | 7752d73216578d5961751b5d0535088d384b4aa6 (patch) | |
| tree | 786e46fe1276e93ade0a48398cd4c9ac13081707 /rust/src/parser.rs | |
| parent | d6f68e919db51d366c8ca3c1509bea12aa81d692 (diff) | |
| download | lambda-nantes-7752d73216578d5961751b5d0535088d384b4aa6.tar.gz | |
Move λ-calcul workshop code to subdirectory
Diffstat (limited to 'rust/src/parser.rs')
| -rw-r--r-- | rust/src/parser.rs | 392 |
1 files changed, 0 insertions, 392 deletions
diff --git a/rust/src/parser.rs b/rust/src/parser.rs deleted file mode 100644 index 52aad5a..0000000 --- a/rust/src/parser.rs +++ /dev/null @@ -1,392 +0,0 @@ -use crate::ast::*; - -#[derive(Debug, PartialEq)] -enum Token { - LParen, - RParen, - Lambda, - Word(String), - Define, - Let, -} - -#[derive(Debug)] -struct Parser { - tokens: Vec<Token>, - index: usize, -} - -impl Parser { - fn expect(&mut self, token: Token) -> Result<(), String> { - if self.tokens.get(self.index) == Some(&token) { - Ok(()) - } else { - Err(format!( - "Expected {:?}, got {:?}", - token, - self.tokens.get(self.index) - )) - } - .map(|_| { - self.next(); - }) - } - - fn expect_symbol(&mut self) -> Result<String, String> { - if let Token::Word(s) = self.tokens.get(self.index).ok_or("Expected a symbol")? { - Ok(s.clone()) - } else { - Err("Expected a symbol".to_string()) - } - .map(|s| { - self.next(); - s - }) - } - - fn next(&mut self) { - self.index += 1; - } - - fn backtrack(&mut self) { - self.index -= 1; - } -} - -pub fn parse(arg: &str) -> Vec<Value> { - parse_total(arg) - .map_err(|e| panic!("Syntax error: {}", e)) - .unwrap() -} - -pub fn parse_total(arg: &str) -> Result<Vec<Value>, String> { - let tokens = tokenize(arg); - let mut parser = Parser { tokens, index: 0 }; - let mut result = Vec::new(); - while parser.index < parser.tokens.len() { - let expr = parse_toplevel(&mut parser)?; - result.push(expr); - } - Ok(result) -} - -fn parse_toplevel(parser: &mut Parser) -> Result<Value, String> { - parse_definition(parser).or_else(|_| parse_expression(parser)) -} - -fn parse_definition(parser: &mut Parser) -> Result<Value, String> { - parser.expect(Token::LParen)?; - parser.expect(Token::Define).map_err(|e| { - parser.backtrack(); - e.to_string() - })?; - let var = parse_variable(parser)?; - let body = parse_expression(parser)?; - parser.expect(Token::RParen)?; - Ok(Value::Def(var, Box::new(body))) -} - -fn parse_expression(parser: &mut Parser) -> Result<Value, String> { - parse_value(parser) - .or_else(|_| parse_abstraction(parser)) - .or_else(|_| parse_let(parser)) - .or_else(|_| parse_application(parser)) -} - -fn parse_abstraction(parser: &mut Parser) -> Result<Value, String> { - parser.expect(Token::LParen)?; - parser.expect(Token::Lambda).map_err(|e| { - parser.backtrack(); - e.to_string() - })?; - let vars = parse_variables(parser)?; - let body = parse_expression(parser)?; - parser.expect(Token::RParen)?; - let result = vars - .iter() - .rev() - .fold(body, |acc, var| Value::Lam(var.clone(), Box::new(acc))); - Ok(result) -} - -fn parse_variables(parser: &mut Parser) -> Result<Vec<String>, String> { - parse_variable(parser) - .map(|s| vec![s]) - .or_else(|_| parse_variables_list(parser)) -} - -fn parse_variables_list(parser: &mut Parser) -> Result<Vec<String>, String> { - let mut vars = Vec::new(); - parser.expect(Token::LParen)?; - while let Ok(var) = parse_variable(parser) { - vars.push(var); - } - parser.expect(Token::RParen)?; - Ok(vars) -} - -fn parse_variable(parser: &mut Parser) -> Result<String, String> { - let var = parser.expect_symbol()?; - Ok(var) -} - -fn parse_let(parser: &mut Parser) -> Result<Value, String> { - parser.expect(Token::LParen)?; - parser.expect(Token::Let).map_err(|e| { - parser.backtrack(); - e.to_string() - })?; - parser.expect(Token::LParen)?; - let var = parse_variable(parser)?; - let body = parse_expression(parser)?; - parser.expect(Token::RParen)?; - let expr = parse_expression(parser)?; - parser.expect(Token::RParen)?; - Ok(Value::Let(var, Box::new(body), Box::new(expr))) -} - -fn parse_application(parser: &mut Parser) -> Result<Value, String> { - parser.expect(Token::LParen)?; - let init = parse_expression(parser)?; - let mut exprs = Vec::new(); - while let Ok(expr) = parse_expression(parser) { - exprs.push(expr); - } - if exprs.is_empty() { - return Err("Application needs two values".to_string()); - } - parser.expect(Token::RParen)?; - let app: Value = exprs.iter().fold(init, |acc, expr| { - Value::App(Box::new(acc.clone()), Box::new(expr.clone())) - }); - Ok(app.to_owned()) -} - -fn parse_value(parser: &mut Parser) -> Result<Value, String> { - let token = parser.tokens.get(parser.index).ok_or("Expected a value")?; - let val = parse_number(token) - .or_else(|_| parse_bool(token)) - .or_else(|_| parse_symbol(token))?; - parser.next(); - Ok(val) -} - -fn tokenize(arg: &str) -> Vec<Token> { - let mut result = Vec::new(); - let mut word = String::new(); - - for c in arg.chars() { - match c { - '(' => { - terminate(&mut result, &mut word); - result.push(Token::LParen) - } - ')' => { - terminate(&mut result, &mut word); - result.push(Token::RParen) - } - c if c.is_whitespace() => terminate(&mut result, &mut word), - c => word.push(c), - } - } - terminate(&mut result, &mut word); - result -} - -fn terminate(result: &mut Vec<Token>, word: &mut String) { - if !word.is_empty() { - let w = word.clone(); - if w == "lam" { - result.push(Token::Lambda); - } else if w == "def" { - result.push(Token::Define); - } else if w == "let" { - result.push(Token::Let); - } else { - result.push(Token::Word(w)); - } - word.clear(); - } -} - -fn parse_symbol(token: &Token) -> Result<Value, String> { - match token { - Token::Word(s) => Ok(Value::Sym(s.clone())), - _ => Err(format!("Expected a symbol, got {:?}", token)), - } -} - -fn parse_bool(token: &Token) -> Result<Value, String> { - match token { - Token::Word(s) => s - .parse::<bool>() - .map(Value::Bool) - .map_err(|e| e.to_string()), - _ => Err("Expected a boolean".to_string()), - } -} - -fn parse_number(token: &Token) -> Result<Value, String> { - match token { - Token::Word(s) => s.parse::<i32>().map(Value::Num).map_err(|e| e.to_string()), - _ => Err("Expected an integer".to_string()), - } -} - -#[cfg(test)] -mod tests { - use super::parse_total; - - use super::Token::*; - use super::Value; - use super::Value::*; - use super::{parse, tokenize}; - use proptest::prelude::*; - - proptest! { - #[test] - fn parse_integer_as_number(i in -1000i32..1000) { - let result = parse(&i.to_string()); - assert_eq!(vec![Num(i)], result); - } - - } - - #[test] - fn parse_truth_values_as_booleans() { - assert_eq!(vec![Bool(true)], parse("true")); - assert_eq!(vec![Bool(false)], parse("false")); - } - - #[test] - fn parse_identifiers_values_as_symbols() { - assert_eq!(vec![Sym("foo".to_string())], parse("foo")); - } - - #[test] - fn ignores_whitespace() { - assert_eq!(vec![Sym("foo".to_string())], parse(" foo \n\r")); - assert_eq!(vec![Num(-42)], parse("\n-42")); - } - - #[test] - fn tokenize_several_values() { - assert_eq!( - vec![ - Word("42".to_string()), - Word("foo".to_string()), - Word("true".to_string()) - ], - tokenize("42 foo \ntrue ") - ); - } - - #[test] - fn tokenize_string_with_parens() { - assert_eq!( - vec![ - LParen, - LParen, - RParen, - Word("42".to_string()), - RParen, - Word("true".to_string()), - LParen, - ], - tokenize("( \r() 42) \ntrue( ") - ); - } - - #[test] - fn tokenize_lambda_symbol() { - assert_eq!(vec![Lambda, LParen,], tokenize("lam (")); - } - - #[test] - fn parse_application_of_two_values() { - assert_eq!( - vec![App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))], - parse("(foo 42)") - ); - } - - #[test] - fn reject_application_of_single_value() { - assert_eq!( - Err("Application needs two values".to_string()), - parse_total("(foo )") - ); - } - - #[test] - fn desugar_application_of_more_than_two_values() { - assert_eq!( - vec![App( - Box::new(App( - Box::new(App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))), - Box::new(Bool(true)) - )), - Box::new(Sym("f".to_string())) - )], - parse("(foo 42 true f)") - ); - } - - #[test] - fn parse_abstraction() { - assert_eq!( - vec![Lam( - "x".to_string(), - Box::new(App( - Box::new(Sym("x".to_string())), - Box::new(Sym("x".to_string())) - )) - )], - parse("(lam x (x x))") - ); - } - - #[test] - fn desugar_abstraction_with_several_variables_into_nested_lambdas() { - assert_eq!( - vec![Lam( - "x".to_string(), - Box::new(Lam("y".to_string(), Box::new(Sym("y".to_string())))) - )], - parse("(lam (x y) y)") - ); - } - - #[test] - fn parse_definition() { - assert_eq!( - vec![Def("x".to_string(), Box::new(Num(12)))], - parse("(def x 12)") - ); - } - - #[test] - fn parse_multiple_values() { - assert_eq!(vec![Sym("foo".to_string()), Num(42)], parse("foo 42")); - } - - #[test] - fn parse_let_expressions() { - assert_eq!( - vec![Value::Let( - "x".to_string(), - Box::new(Num(12)), - Box::new(Sym("x".to_string())) - )], - parse("(let (x 12) x)") - ); - } - - proptest! { - #[test] - fn parse_is_inverse_to_display(values in any::<Vec<Value>>()) { - let result : Vec<String> = values.iter().map(|v:&Value| v.to_string()).collect(); - assert_eq!(values, result.iter().flat_map(|s| parse(s)).collect::<Vec<Value>>()); - } - } -} |
