From aae665a4e90d1bab6dc3d27f0c26beccba4030cd Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Wed, 26 Oct 2016 22:04:12 -0700 Subject: [PATCH] Tokenize looks like it's working. --- notes | 1 + src/lib.rs | 55 +++++++++++++++++++++---- src/tests.rs | 113 ++++++++++++++++++++++++++++++++++----------------- 3 files changed, 125 insertions(+), 44 deletions(-) diff --git a/notes b/notes index 604541c..d51cc88 100644 --- a/notes +++ b/notes @@ -77,6 +77,7 @@ notes . tokenizing . executing - make sure there isn't any unused code +- ask users to contribute their justfiles - try to get some users . facebook friends diff --git a/src/lib.rs b/src/lib.rs index 9cac185..5e89370 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -738,12 +738,12 @@ fn tokenize<'a>(text: &'a str) -> Result, Error> { static ref COMMENT: Regex = token(r"#([^!].*)?$" ); static ref STRING: Regex = token("\"[a-z0-9]\"" ); static ref EOL: Regex = token(r"\n|\r\n" ); - static ref INTERPOLATION_END: Regex = token(r"[{][{]" ); + static ref INTERPOLATION_END: Regex = token(r"[}][}]" ); static ref LINE: Regex = re(r"^(?m)[ \t]+[^ \t\n\r].*$"); static ref INDENT: Regex = re(r"^([ \t]*)[^ \t\n\r]" ); static ref INTERPOLATION_START: Regex = re(r"^[{][{]" ); - static ref LEADING_TEXT: Regex = re(r"(?m)(.+?)[{][{]" ); - static ref TEXT: Regex = re(r"(?m)(.+?)$" ); + static ref LEADING_TEXT: Regex = re(r"^(?m)(.+?)[{][{]" ); + static ref TEXT: Regex = re(r"^(?m)(.+)" ); } #[derive(PartialEq)] @@ -1071,6 +1071,41 @@ impl<'a> Parser<'a> { return Err(self.unexpected_token(&token, &[Name, Eol, Eof])); } + enum Piece<'a> { + Text{text: Token<'a>}, + Expression{expression: Expression<'a>}, + } + + let mut new_lines = vec![]; + + if self.accepted(Indent) { + while !self.accepted(Dedent) { + if let Some(token) = self.expect(Line) { + return Err(token.error(ErrorKind::InternalError{ + message: format!("Expected a dedent but got {}", token.class) + })) + } + let mut pieces = vec![]; + + while !self.accepted(Eol) { + if let Some(token) = self.accept(Text) { + pieces.push(Piece::Text{text: token}); + } else if let Some(token) = self.expect(InterpolationStart) { + return Err(self.unexpected_token(&token, &[Text, InterpolationStart, Eol])); + } else { + pieces.push(Piece::Expression{expression: try!(self.expression(true))}); + if let Some(token) = self.expect(InterpolationEnd) { + return Err(self.unexpected_token(&token, &[InterpolationEnd])); + } + } + } + + new_lines.push(pieces); + } + } + + panic!("done!"); + let mut lines = vec![]; let mut line_tokens = vec![]; let mut shebang = false; @@ -1176,7 +1211,7 @@ impl<'a> Parser<'a> { }) } - fn expression(&mut self) -> Result, Error<'a>> { + fn expression(&mut self, interpolation: bool) -> Result, Error<'a>> { let first = self.tokens.next().unwrap(); let lhs = match first.class { Name => Expression::Variable{name: first.lexeme, token: first}, @@ -1185,10 +1220,16 @@ impl<'a> Parser<'a> { }; if self.accepted(Plus) { - let rhs = try!(self.expression()); + let rhs = try!(self.expression(interpolation)); Ok(Expression::Concatination{lhs: Box::new(lhs), rhs: Box::new(rhs)}) + } else if interpolation && self.peek(InterpolationEnd) { + Ok(lhs) } else if let Some(token) = self.expect_eol() { - Err(self.unexpected_token(&token, &[Plus, Eol])) + if interpolation { + Err(self.unexpected_token(&token, &[Plus, Eol, InterpolationEnd])) + } else { + Err(self.unexpected_token(&token, &[Plus, Eol])) + } } else { Ok(lhs) } @@ -1210,7 +1251,7 @@ impl<'a> Parser<'a> { variable: token.lexeme, })); } - assignments.insert(token.lexeme, try!(self.expression())); + assignments.insert(token.lexeme, try!(self.expression(false))); assignment_tokens.insert(token.lexeme, token); } else { if let Some(recipe) = recipes.remove(token.lexeme) { diff --git a/src/tests.rs b/src/tests.rs index 73e03e8..12fdf4e 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -12,8 +12,11 @@ fn tokenize_success(text: &str, expected_summary: &str) { s += t.lexeme; s }).collect::>().join(""); + let summary = token_summary(&tokens); + if summary != expected_summary { + panic!("token summary mismatch:\nexpected: {}\ngot: {}\n", expected_summary, summary); + } assert_eq!(text, roundtrip); - assert_eq!(token_summary(&tokens), expected_summary); } fn tokenize_error(text: &str, expected: Error) { @@ -32,10 +35,10 @@ fn tokenize_error(text: &str, expected: Error) { fn token_summary(tokens: &[Token]) -> String { tokens.iter().map(|t| { match t.class { - super::TokenKind::Line{..} => "*", + super::TokenKind::Line{..} => "^", super::TokenKind::Name => "N", super::TokenKind::Colon => ":", - super::TokenKind::StringToken => "\"", + super::TokenKind::StringToken => "'", super::TokenKind::Plus => "+", super::TokenKind::Equals => "=", super::TokenKind::Comment{..} => "#", @@ -50,6 +53,7 @@ fn token_summary(tokens: &[Token]) -> String { }).collect::>().join("") } +/* fn parse_success(text: &str) -> Justfile { match super::parse(text) { Ok(justfile) => justfile, @@ -80,15 +84,61 @@ fn parse_error(text: &str, expected: Error) { panic!("Expected {:?} but parse succeeded", expected.kind); } } +*/ #[test] -fn tokenize() { +fn tokenize_recipe_interpolation_eol() { + let text = "foo: + {{hello}} +"; + tokenize_success(text, "N:$>^{N}$<."); +} + +#[test] +fn tokenize_recipe_interpolation_eof() { + let text = "foo: + {{hello}}"; + tokenize_success(text, "N:$>^{N}<."); +} + +#[test] +fn tokenize_recipe_complex_interpolation_expression() { + let text = "foo:\n {{a + b + \"z\" + blarg}}"; + tokenize_success(text, "N:$>^{N+N+'+N}<."); +} + +#[test] +fn tokenize_recipe_multiple_interpolations() { + let text = "foo:\n {{a}}0{{b}}1{{c}}"; + tokenize_success(text, "N:$>^{N}_{N}_{N}<."); +} + +#[test] +fn tokenize_junk() { let text = "bob hello blah blah blah : a b c #whatever "; tokenize_success(text, "N$$NNNN:NNN#$."); +} +#[test] +fn tokenize_empty_lines() { + let text = " +hello: + asdf + bsdf + + csdf + + dsdf + "; + + tokenize_success(text, "$N:$>^_$^_$$^_$$^_$<."); +} + +#[test] +fn tokenize_multiple() { let text = " hello: a @@ -101,15 +151,18 @@ hello: bob: frank "; - - tokenize_success(text, "$N:$>*$*$$*$$*$$*$<."); + tokenize_success(text, "$N:$>^_$^_$$^_$$^_$$^_$<."); +} + + +#[test] +fn tokenize_comment() { tokenize_success("a:=#", "N:=#.") } -/* #[test] -fn inconsistent_leading_whitespace() { +fn tokenize_space_then_tab() { let text = "a: 0 1 @@ -123,7 +176,10 @@ fn inconsistent_leading_whitespace() { width: None, kind: ErrorKind::InconsistentLeadingWhitespace{expected: " ", found: "\t"}, }); +} +#[test] +fn tokenize_tabs_then_tab_space() { let text = "a: \t\t0 \t\t 1 @@ -138,7 +194,6 @@ fn inconsistent_leading_whitespace() { kind: ErrorKind::InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "}, }); } -*/ #[test] fn outer_shebang() { @@ -166,6 +221,7 @@ fn unknown_start_of_token() { }); } +/* #[test] fn parse_empty() { parse_summary(" @@ -176,7 +232,6 @@ fn parse_empty() { ", ""); } -/* #[test] fn parse_complex() { parse_summary(" @@ -205,7 +260,6 @@ x: y: z:"); } -*/ #[test] fn parse_assignments() { @@ -402,7 +456,6 @@ fn write_or() { assert_eq!("1, 2, 3, or 4", super::Or(&[1,2,3,4]).to_string()); } -/* #[test] fn run_shebang() { // this test exists to make sure that shebang recipes @@ -429,9 +482,7 @@ a: other => panic!("expected an code run error, but got: {}", other), } } -*/ -/* #[test] fn run_order() { let tmp = tempdir::TempDir::new("run_order").unwrap_or_else(|err| panic!("tmpdir: failed to create temporary directory: {}", err)); @@ -452,7 +503,6 @@ c: b super::std::env::set_current_dir(path).expect("failed to set current directory"); parse_success(text).run(&["a", "d"]).unwrap(); } -*/ #[test] fn unknown_recipes() { @@ -462,7 +512,6 @@ fn unknown_recipes() { } } -/* #[test] fn code_error() { match parse_success("fail:\n @function x { return 100; }; x").run(&["fail"]).unwrap_err() { @@ -473,9 +522,7 @@ fn code_error() { other @ _ => panic!("expected a code run error, but got: {}", other), } } -*/ -/* #[test] fn extra_whitespace() { // we might want to make extra leading whitespace a line continuation in the future, @@ -493,14 +540,13 @@ fn extra_whitespace() { // extra leading whitespace is okay in a shebang recipe parse_success("a:\n #!\n print(1)"); } -*/ #[test] fn bad_recipe_names() { // We are extra strict with names. Although the tokenizer // will tokenize anything that matches /[a-zA-Z0-9_-]+/ // as a name, we throw an error if names do not match - // /[a-z](-?[a-z])*/. This is to support future expansion + // / [a-z](-?[a-z])* /. This is to support future expansion // of justfile and command line syntax. fn bad_name(text: &str, name: &str, index: usize, line: usize, column: usize) { parse_error(text, Error { @@ -525,7 +571,6 @@ fn bad_recipe_names() { bad_name("a:\nZ:", "Z", 3, 1, 0); } -/* #[test] fn bad_interpolation_variable_name() { let text = "a:\n echo {{hello--hello}}"; @@ -538,9 +583,7 @@ fn bad_interpolation_variable_name() { kind: ErrorKind::BadInterpolationVariableName{recipe: "a", text: "hello--hello"} }); } -*/ -/* #[test] fn unclosed_interpolation_delimiter() { let text = "a:\n echo {{"; @@ -553,7 +596,6 @@ fn unclosed_interpolation_delimiter() { kind: ErrorKind::UnclosedInterpolationDelimiter, }); } -*/ #[test] fn unknown_expression_variable() { @@ -570,7 +612,6 @@ fn unknown_expression_variable() { #[test] fn unknown_interpolation_variable() { - /* let text = "x:\n {{ hello}}"; parse_error(text, Error { text: text, @@ -580,17 +621,15 @@ fn unknown_interpolation_variable() { width: Some(5), kind: ErrorKind::UnknownVariable{variable: "hello"}, }); - */ - /* - let text = "x:\n echo\n {{ lol }}"; - parse_error(text, Error { - text: text, - index: 11, - line: 2, - column: 2, - width: Some(3), - kind: ErrorKind::UnknownVariable{variable: "lol"}, - }); - */ + // let text = "x:\n echo\n {{ lol }}"; + // parse_error(text, Error { + // text: text, + // index: 11, + // line: 2, + // column: 2, + // width: Some(3), + // kind: ErrorKind::UnknownVariable{variable: "lol"}, + // }); } +*/