From d20acf7166015d8adf5a301ac06820f99ad69190 Mon Sep 17 00:00:00 2001 From: greg Date: Tue, 5 Nov 2019 02:22:11 -0800 Subject: [PATCH] Add tokenization for string literal prefixes --- schala-lang/language/src/parsing.rs | 4 ++-- schala-lang/language/src/tokenizing.rs | 26 +++++++++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/schala-lang/language/src/parsing.rs b/schala-lang/language/src/parsing.rs index 673fe8f..0d00048 100644 --- a/schala-lang/language/src/parsing.rs +++ b/schala-lang/language/src/parsing.rs @@ -991,7 +991,7 @@ impl Parser { self.token_handler.next(); Pattern::Literal(PatternLiteral::BoolPattern(false)) }, - StrLiteral(s) => { + StrLiteral { s, .. } => { self.token_handler.next(); Pattern::Literal(PatternLiteral::StringPattern(s)) }, @@ -1140,7 +1140,7 @@ impl Parser { let id = self.id_store.fresh(); Ok(Expression::new(id, BoolLiteral(false))) }, - StrLiteral(s) => { + StrLiteral {s, ..} => { self.token_handler.next(); let id = self.id_store.fresh(); Ok(Expression::new(id, StringLiteral(s.clone()))) diff --git a/schala-lang/language/src/tokenizing.rs b/schala-lang/language/src/tokenizing.rs index 75c54f4..edf5123 100644 --- a/schala-lang/language/src/tokenizing.rs +++ b/schala-lang/language/src/tokenizing.rs @@ -21,7 +21,10 @@ pub enum TokenKind { Operator(Rc), DigitGroup(Rc), HexLiteral(Rc), BinNumberSigil, - StrLiteral(Rc), + StrLiteral { + s: Rc, + prefix: Option> + }, Identifier(Rc), Keyword(Kw), @@ -37,7 +40,7 @@ impl fmt::Display for TokenKind { &Operator(ref s) => write!(f, "Operator({})", **s), &DigitGroup(ref s) => write!(f, "DigitGroup({})", s), &HexLiteral(ref s) => write!(f, "HexLiteral({})", s), - &StrLiteral(ref s) => write!(f, "StrLiteral({})", s), + &StrLiteral {ref s, .. } => write!(f, "StrLiteral({})", s), &Identifier(ref s) => write!(f, "Identifier({})", s), &Error(ref s) => write!(f, "Error({})", s), other => write!(f, "{:?}", other), @@ -163,7 +166,7 @@ pub fn tokenize(input: &str) -> Vec { '(' => LParen, ')' => RParen, '{' => LCurlyBrace, '}' => RCurlyBrace, '[' => LSquareBracket, ']' => RSquareBracket, - '"' => handle_quote(&mut input), + '"' => handle_quote(&mut input, None), '\\' => Backslash, c if c.is_digit(10) => handle_digit(c, &mut input), c if c.is_alphabetic() || c == '_' => handle_alphabetic(c, &mut input), @@ -191,7 +194,7 @@ fn handle_digit(c: char, input: &mut Peekable>) -> } } -fn handle_quote(input: &mut Peekable>) -> TokenKind { +fn handle_quote(input: &mut Peekable>, quote_prefix: Option<&str>) -> TokenKind { let mut buf = String::new(); loop { match input.next().map(|(_, _, c)| { c }) { @@ -213,7 +216,7 @@ fn handle_quote(input: &mut Peekable>) -> TokenKind None => return TokenKind::Error(format!("Unclosed string")), } } - TokenKind::StrLiteral(Rc::new(buf)) + TokenKind::StrLiteral { s: Rc::new(buf), prefix: quote_prefix.map(|s| Rc::new(s.to_string())) } } fn handle_alphabetic(c: char, input: &mut Peekable>) -> TokenKind { @@ -225,6 +228,10 @@ fn handle_alphabetic(c: char, input: &mut Peekable> loop { match input.peek().map(|&(_, _, c)| { c }) { + Some(c) if c == '"' => { + input.next(); + return handle_quote(input, Some(&buf)); + }, Some(c) if c.is_alphanumeric() || c == '_' => { input.next(); buf.push(c); @@ -325,4 +332,13 @@ mod schala_tokenizer_tests { let token_kinds: Vec = tokenize("1 `plus` 2").into_iter().map(move |t| t.kind).collect(); assert_eq!(token_kinds, vec![digit!("1"), op!("plus"), digit!("2")]); } + + #[test] + fn string_literals() { + let token_kinds: Vec = tokenize(r#""some string""#).into_iter().map(move |t| t.kind).collect(); + assert_eq!(token_kinds, vec![StrLiteral { s: Rc::new("some string".to_string()), prefix: None }]); + + let token_kinds: Vec = tokenize(r#"b"some bytestring""#).into_iter().map(move |t| t.kind).collect(); + assert_eq!(token_kinds, vec![StrLiteral { s: Rc::new("some bytestring".to_string()), prefix: Some(Rc::new("b".to_string())) }]); + } }