summaryrefslogtreecommitdiff
path: root/lambda-calcul/rust/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'lambda-calcul/rust/src/parser.rs')
-rw-r--r--lambda-calcul/rust/src/parser.rs392
1 files changed, 392 insertions, 0 deletions
diff --git a/lambda-calcul/rust/src/parser.rs b/lambda-calcul/rust/src/parser.rs
new file mode 100644
index 0000000..52aad5a
--- /dev/null
+++ b/lambda-calcul/rust/src/parser.rs
@@ -0,0 +1,392 @@
+use crate::ast::*;
+
+#[derive(Debug, PartialEq)]
+enum Token {
+ LParen,
+ RParen,
+ Lambda,
+ Word(String),
+ Define,
+ Let,
+}
+
+#[derive(Debug)]
+struct Parser {
+ tokens: Vec<Token>,
+ index: usize,
+}
+
+impl Parser {
+ fn expect(&mut self, token: Token) -> Result<(), String> {
+ if self.tokens.get(self.index) == Some(&token) {
+ Ok(())
+ } else {
+ Err(format!(
+ "Expected {:?}, got {:?}",
+ token,
+ self.tokens.get(self.index)
+ ))
+ }
+ .map(|_| {
+ self.next();
+ })
+ }
+
+ fn expect_symbol(&mut self) -> Result<String, String> {
+ if let Token::Word(s) = self.tokens.get(self.index).ok_or("Expected a symbol")? {
+ Ok(s.clone())
+ } else {
+ Err("Expected a symbol".to_string())
+ }
+ .map(|s| {
+ self.next();
+ s
+ })
+ }
+
+ fn next(&mut self) {
+ self.index += 1;
+ }
+
+ fn backtrack(&mut self) {
+ self.index -= 1;
+ }
+}
+
+pub fn parse(arg: &str) -> Vec<Value> {
+ parse_total(arg)
+ .map_err(|e| panic!("Syntax error: {}", e))
+ .unwrap()
+}
+
+pub fn parse_total(arg: &str) -> Result<Vec<Value>, String> {
+ let tokens = tokenize(arg);
+ let mut parser = Parser { tokens, index: 0 };
+ let mut result = Vec::new();
+ while parser.index < parser.tokens.len() {
+ let expr = parse_toplevel(&mut parser)?;
+ result.push(expr);
+ }
+ Ok(result)
+}
+
+fn parse_toplevel(parser: &mut Parser) -> Result<Value, String> {
+ parse_definition(parser).or_else(|_| parse_expression(parser))
+}
+
+fn parse_definition(parser: &mut Parser) -> Result<Value, String> {
+ parser.expect(Token::LParen)?;
+ parser.expect(Token::Define).map_err(|e| {
+ parser.backtrack();
+ e.to_string()
+ })?;
+ let var = parse_variable(parser)?;
+ let body = parse_expression(parser)?;
+ parser.expect(Token::RParen)?;
+ Ok(Value::Def(var, Box::new(body)))
+}
+
+fn parse_expression(parser: &mut Parser) -> Result<Value, String> {
+ parse_value(parser)
+ .or_else(|_| parse_abstraction(parser))
+ .or_else(|_| parse_let(parser))
+ .or_else(|_| parse_application(parser))
+}
+
+fn parse_abstraction(parser: &mut Parser) -> Result<Value, String> {
+ parser.expect(Token::LParen)?;
+ parser.expect(Token::Lambda).map_err(|e| {
+ parser.backtrack();
+ e.to_string()
+ })?;
+ let vars = parse_variables(parser)?;
+ let body = parse_expression(parser)?;
+ parser.expect(Token::RParen)?;
+ let result = vars
+ .iter()
+ .rev()
+ .fold(body, |acc, var| Value::Lam(var.clone(), Box::new(acc)));
+ Ok(result)
+}
+
+fn parse_variables(parser: &mut Parser) -> Result<Vec<String>, String> {
+ parse_variable(parser)
+ .map(|s| vec![s])
+ .or_else(|_| parse_variables_list(parser))
+}
+
+fn parse_variables_list(parser: &mut Parser) -> Result<Vec<String>, String> {
+ let mut vars = Vec::new();
+ parser.expect(Token::LParen)?;
+ while let Ok(var) = parse_variable(parser) {
+ vars.push(var);
+ }
+ parser.expect(Token::RParen)?;
+ Ok(vars)
+}
+
+fn parse_variable(parser: &mut Parser) -> Result<String, String> {
+ let var = parser.expect_symbol()?;
+ Ok(var)
+}
+
+fn parse_let(parser: &mut Parser) -> Result<Value, String> {
+ parser.expect(Token::LParen)?;
+ parser.expect(Token::Let).map_err(|e| {
+ parser.backtrack();
+ e.to_string()
+ })?;
+ parser.expect(Token::LParen)?;
+ let var = parse_variable(parser)?;
+ let body = parse_expression(parser)?;
+ parser.expect(Token::RParen)?;
+ let expr = parse_expression(parser)?;
+ parser.expect(Token::RParen)?;
+ Ok(Value::Let(var, Box::new(body), Box::new(expr)))
+}
+
+fn parse_application(parser: &mut Parser) -> Result<Value, String> {
+ parser.expect(Token::LParen)?;
+ let init = parse_expression(parser)?;
+ let mut exprs = Vec::new();
+ while let Ok(expr) = parse_expression(parser) {
+ exprs.push(expr);
+ }
+ if exprs.is_empty() {
+ return Err("Application needs two values".to_string());
+ }
+ parser.expect(Token::RParen)?;
+ let app: Value = exprs.iter().fold(init, |acc, expr| {
+ Value::App(Box::new(acc.clone()), Box::new(expr.clone()))
+ });
+ Ok(app.to_owned())
+}
+
+fn parse_value(parser: &mut Parser) -> Result<Value, String> {
+ let token = parser.tokens.get(parser.index).ok_or("Expected a value")?;
+ let val = parse_number(token)
+ .or_else(|_| parse_bool(token))
+ .or_else(|_| parse_symbol(token))?;
+ parser.next();
+ Ok(val)
+}
+
+fn tokenize(arg: &str) -> Vec<Token> {
+ let mut result = Vec::new();
+ let mut word = String::new();
+
+ for c in arg.chars() {
+ match c {
+ '(' => {
+ terminate(&mut result, &mut word);
+ result.push(Token::LParen)
+ }
+ ')' => {
+ terminate(&mut result, &mut word);
+ result.push(Token::RParen)
+ }
+ c if c.is_whitespace() => terminate(&mut result, &mut word),
+ c => word.push(c),
+ }
+ }
+ terminate(&mut result, &mut word);
+ result
+}
+
+fn terminate(result: &mut Vec<Token>, word: &mut String) {
+ if !word.is_empty() {
+ let w = word.clone();
+ if w == "lam" {
+ result.push(Token::Lambda);
+ } else if w == "def" {
+ result.push(Token::Define);
+ } else if w == "let" {
+ result.push(Token::Let);
+ } else {
+ result.push(Token::Word(w));
+ }
+ word.clear();
+ }
+}
+
+fn parse_symbol(token: &Token) -> Result<Value, String> {
+ match token {
+ Token::Word(s) => Ok(Value::Sym(s.clone())),
+ _ => Err(format!("Expected a symbol, got {:?}", token)),
+ }
+}
+
+fn parse_bool(token: &Token) -> Result<Value, String> {
+ match token {
+ Token::Word(s) => s
+ .parse::<bool>()
+ .map(Value::Bool)
+ .map_err(|e| e.to_string()),
+ _ => Err("Expected a boolean".to_string()),
+ }
+}
+
+fn parse_number(token: &Token) -> Result<Value, String> {
+ match token {
+ Token::Word(s) => s.parse::<i32>().map(Value::Num).map_err(|e| e.to_string()),
+ _ => Err("Expected an integer".to_string()),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::parse_total;
+
+ use super::Token::*;
+ use super::Value;
+ use super::Value::*;
+ use super::{parse, tokenize};
+ use proptest::prelude::*;
+
+ proptest! {
+ #[test]
+ fn parse_integer_as_number(i in -1000i32..1000) {
+ let result = parse(&i.to_string());
+ assert_eq!(vec![Num(i)], result);
+ }
+
+ }
+
+ #[test]
+ fn parse_truth_values_as_booleans() {
+ assert_eq!(vec![Bool(true)], parse("true"));
+ assert_eq!(vec![Bool(false)], parse("false"));
+ }
+
+ #[test]
+ fn parse_identifiers_values_as_symbols() {
+ assert_eq!(vec![Sym("foo".to_string())], parse("foo"));
+ }
+
+ #[test]
+ fn ignores_whitespace() {
+ assert_eq!(vec![Sym("foo".to_string())], parse(" foo \n\r"));
+ assert_eq!(vec![Num(-42)], parse("\n-42"));
+ }
+
+ #[test]
+ fn tokenize_several_values() {
+ assert_eq!(
+ vec![
+ Word("42".to_string()),
+ Word("foo".to_string()),
+ Word("true".to_string())
+ ],
+ tokenize("42 foo \ntrue ")
+ );
+ }
+
+ #[test]
+ fn tokenize_string_with_parens() {
+ assert_eq!(
+ vec![
+ LParen,
+ LParen,
+ RParen,
+ Word("42".to_string()),
+ RParen,
+ Word("true".to_string()),
+ LParen,
+ ],
+ tokenize("( \r() 42) \ntrue( ")
+ );
+ }
+
+ #[test]
+ fn tokenize_lambda_symbol() {
+ assert_eq!(vec![Lambda, LParen,], tokenize("lam ("));
+ }
+
+ #[test]
+ fn parse_application_of_two_values() {
+ assert_eq!(
+ vec![App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))],
+ parse("(foo 42)")
+ );
+ }
+
+ #[test]
+ fn reject_application_of_single_value() {
+ assert_eq!(
+ Err("Application needs two values".to_string()),
+ parse_total("(foo )")
+ );
+ }
+
+ #[test]
+ fn desugar_application_of_more_than_two_values() {
+ assert_eq!(
+ vec![App(
+ Box::new(App(
+ Box::new(App(Box::new(Sym("foo".to_string())), Box::new(Num(42)))),
+ Box::new(Bool(true))
+ )),
+ Box::new(Sym("f".to_string()))
+ )],
+ parse("(foo 42 true f)")
+ );
+ }
+
+ #[test]
+ fn parse_abstraction() {
+ assert_eq!(
+ vec![Lam(
+ "x".to_string(),
+ Box::new(App(
+ Box::new(Sym("x".to_string())),
+ Box::new(Sym("x".to_string()))
+ ))
+ )],
+ parse("(lam x (x x))")
+ );
+ }
+
+ #[test]
+ fn desugar_abstraction_with_several_variables_into_nested_lambdas() {
+ assert_eq!(
+ vec![Lam(
+ "x".to_string(),
+ Box::new(Lam("y".to_string(), Box::new(Sym("y".to_string()))))
+ )],
+ parse("(lam (x y) y)")
+ );
+ }
+
+ #[test]
+ fn parse_definition() {
+ assert_eq!(
+ vec![Def("x".to_string(), Box::new(Num(12)))],
+ parse("(def x 12)")
+ );
+ }
+
+ #[test]
+ fn parse_multiple_values() {
+ assert_eq!(vec![Sym("foo".to_string()), Num(42)], parse("foo 42"));
+ }
+
+ #[test]
+ fn parse_let_expressions() {
+ assert_eq!(
+ vec![Value::Let(
+ "x".to_string(),
+ Box::new(Num(12)),
+ Box::new(Sym("x".to_string()))
+ )],
+ parse("(let (x 12) x)")
+ );
+ }
+
+ proptest! {
+ #[test]
+ fn parse_is_inverse_to_display(values in any::<Vec<Value>>()) {
+ let result : Vec<String> = values.iter().map(|v:&Value| v.to_string()).collect();
+ assert_eq!(values, result.iter().flat_map(|s| parse(s)).collect::<Vec<Value>>());
+ }
+ }
+}