use crate::ast::*; #[derive(Debug, PartialEq)] enum Token { LParen, RParen, Word(String), } pub fn parse(arg: &str) -> Value { let mut tokens = tokenize(arg); let mut state = Vec::new(); parse_tokens(&mut tokens, &mut state).unwrap_or_else(|_| panic!("Syntax) error: {}", arg)) } fn parse_tokens(tokens: &mut [Token], state: &mut Vec) -> Result { match tokens { [] => state.pop().ok_or("Empty expression".to_string()), [Token::LParen, rest @ ..] => parse_tokens(rest, state), [Token::RParen, rest @ ..] => { let right = state.pop().ok_or("Unbalanced parentheses".to_string())?; let left = state.pop().ok_or("Unbalanced parentheses".to_string())?; state.push(Value::App(Box::new(left), Box::new(right))); parse_tokens(rest, state) } [Token::Word(s), rest @ ..] => { let value = parse_value(&Token::Word(s.clone()))?; state.push(value); parse_tokens(rest, state) } } } fn parse_value(token: &Token) -> Result { parse_number(token) .or(parse_bool(token)) .or(parse_symbol(token)) } fn tokenize(arg: &str) -> Vec { let mut result = Vec::new(); let mut word = String::new(); for c in arg.chars() { match c { '(' => { terminate(&mut result, &mut word); result.push(Token::LParen) } ')' => { terminate(&mut result, &mut word); result.push(Token::RParen) } c if c.is_whitespace() => terminate(&mut result, &mut word), c => word.push(c), } } terminate(&mut result, &mut word); result } fn terminate(result: &mut Vec, word: &mut String) { if !word.is_empty() { let w = word.clone(); result.push(Token::Word(w)); word.clear(); } } fn parse_symbol(token: &Token) -> Result { match token { Token::Word(s) => Ok(Value::Sym(s.clone())), _ => Err("Expected a symbol".to_string()), } } fn parse_bool(token: &Token) -> Result { match token { Token::Word(s) => s .parse::() .map(Value::Bool) .map_err(|e| e.to_string()), _ => Err("Expected a boolean".to_string()), } } fn parse_number(token: &Token) -> Result { match token { Token::Word(s) => s.parse::().map(Value::Num).map_err(|e| e.to_string()), _ => Err("Expected an integer".to_string()), } } #[cfg(test)] mod tests { use super::Token::*; use super::Value; use super::Value::*; use super::{parse, tokenize}; use proptest::prelude::*; proptest! { #[test] fn parse_integer_as_number(i in -1000i32..1000) { let result = parse(&i.to_string()); assert_eq!(Num(i), result); } } #[test] fn parse_truth_values_as_booleans() { assert_eq!(Bool(true), parse("true")); assert_eq!(Bool(false), parse("false")); } #[test] fn parse_identifiers_values_as_symbols() { assert_eq!(Sym("foo".to_string()), parse("foo")); } #[test] fn ignores_whitespace() { assert_eq!(Sym("foo".to_string()), parse(" foo \n\r")); assert_eq!(Num(-42), parse("\n-42")); } #[test] fn tokenize_several_values() { assert_eq!( vec![ Word("42".to_string()), Word("foo".to_string()), Word("true".to_string()) ], tokenize("42 foo \ntrue ") ); } #[test] fn tokenize_string_with_parens() { assert_eq!( vec![ LParen, LParen, RParen, Word("42".to_string()), RParen, Word("true".to_string()), LParen, ], tokenize("( \r() 42) \ntrue( ") ); } #[test] fn parse_application_of_two_values() { assert_eq!( App(Box::new(Sym("foo".to_string())), Box::new(Num(42))), parse("(foo 42)") ); } impl Arbitrary for Value { type Parameters = (); type Strategy = BoxedStrategy; fn arbitrary_with(_args: ()) -> Self::Strategy { prop_oneof![ any::().prop_map(Num), any::().prop_map(Bool), // see https://unicode.org/reports/tr18/#General_Category_Property for one letter unicode categories "\\pL(\\pL|\\pN)*".prop_map(Sym), ] .boxed() } } proptest! { #[test] fn parse_is_inverse_to_display(values in any::>()) { let result : Vec = values.iter().map(|v:&Value| v.to_string()).collect(); assert_eq!(values, result.iter().map(|s| parse(s)).collect::>()); } } }