Tokenize looks like it's working.

This commit is contained in:
Casey Rodarmor 2016-10-26 22:04:12 -07:00
parent 7a77c910b6
commit aae665a4e9
3 changed files with 125 additions and 44 deletions

1
notes
View File

@ -77,6 +77,7 @@ notes
. tokenizing . tokenizing
. executing . executing
- make sure there isn't any unused code - make sure there isn't any unused code
- ask users to contribute their justfiles
- try to get some users - try to get some users
. facebook friends . facebook friends

View File

@ -738,12 +738,12 @@ fn tokenize<'a>(text: &'a str) -> Result<Vec<Token>, Error> {
static ref COMMENT: Regex = token(r"#([^!].*)?$" ); static ref COMMENT: Regex = token(r"#([^!].*)?$" );
static ref STRING: Regex = token("\"[a-z0-9]\"" ); static ref STRING: Regex = token("\"[a-z0-9]\"" );
static ref EOL: Regex = token(r"\n|\r\n" ); static ref EOL: Regex = token(r"\n|\r\n" );
static ref INTERPOLATION_END: Regex = token(r"[{][{]" ); static ref INTERPOLATION_END: Regex = token(r"[}][}]" );
static ref LINE: Regex = re(r"^(?m)[ \t]+[^ \t\n\r].*$"); static ref LINE: Regex = re(r"^(?m)[ \t]+[^ \t\n\r].*$");
static ref INDENT: Regex = re(r"^([ \t]*)[^ \t\n\r]" ); static ref INDENT: Regex = re(r"^([ \t]*)[^ \t\n\r]" );
static ref INTERPOLATION_START: Regex = re(r"^[{][{]" ); static ref INTERPOLATION_START: Regex = re(r"^[{][{]" );
static ref LEADING_TEXT: Regex = re(r"(?m)(.+?)[{][{]" ); static ref LEADING_TEXT: Regex = re(r"^(?m)(.+?)[{][{]" );
static ref TEXT: Regex = re(r"(?m)(.+?)$" ); static ref TEXT: Regex = re(r"^(?m)(.+)" );
} }
#[derive(PartialEq)] #[derive(PartialEq)]
@ -1071,6 +1071,41 @@ impl<'a> Parser<'a> {
return Err(self.unexpected_token(&token, &[Name, Eol, Eof])); return Err(self.unexpected_token(&token, &[Name, Eol, Eof]));
} }
enum Piece<'a> {
Text{text: Token<'a>},
Expression{expression: Expression<'a>},
}
let mut new_lines = vec![];
if self.accepted(Indent) {
while !self.accepted(Dedent) {
if let Some(token) = self.expect(Line) {
return Err(token.error(ErrorKind::InternalError{
message: format!("Expected a dedent but got {}", token.class)
}))
}
let mut pieces = vec![];
while !self.accepted(Eol) {
if let Some(token) = self.accept(Text) {
pieces.push(Piece::Text{text: token});
} else if let Some(token) = self.expect(InterpolationStart) {
return Err(self.unexpected_token(&token, &[Text, InterpolationStart, Eol]));
} else {
pieces.push(Piece::Expression{expression: try!(self.expression(true))});
if let Some(token) = self.expect(InterpolationEnd) {
return Err(self.unexpected_token(&token, &[InterpolationEnd]));
}
}
}
new_lines.push(pieces);
}
}
panic!("done!");
let mut lines = vec![]; let mut lines = vec![];
let mut line_tokens = vec![]; let mut line_tokens = vec![];
let mut shebang = false; let mut shebang = false;
@ -1176,7 +1211,7 @@ impl<'a> Parser<'a> {
}) })
} }
fn expression(&mut self) -> Result<Expression<'a>, Error<'a>> { fn expression(&mut self, interpolation: bool) -> Result<Expression<'a>, Error<'a>> {
let first = self.tokens.next().unwrap(); let first = self.tokens.next().unwrap();
let lhs = match first.class { let lhs = match first.class {
Name => Expression::Variable{name: first.lexeme, token: first}, Name => Expression::Variable{name: first.lexeme, token: first},
@ -1185,10 +1220,16 @@ impl<'a> Parser<'a> {
}; };
if self.accepted(Plus) { if self.accepted(Plus) {
let rhs = try!(self.expression()); let rhs = try!(self.expression(interpolation));
Ok(Expression::Concatination{lhs: Box::new(lhs), rhs: Box::new(rhs)}) Ok(Expression::Concatination{lhs: Box::new(lhs), rhs: Box::new(rhs)})
} else if interpolation && self.peek(InterpolationEnd) {
Ok(lhs)
} else if let Some(token) = self.expect_eol() { } else if let Some(token) = self.expect_eol() {
Err(self.unexpected_token(&token, &[Plus, Eol])) if interpolation {
Err(self.unexpected_token(&token, &[Plus, Eol, InterpolationEnd]))
} else {
Err(self.unexpected_token(&token, &[Plus, Eol]))
}
} else { } else {
Ok(lhs) Ok(lhs)
} }
@ -1210,7 +1251,7 @@ impl<'a> Parser<'a> {
variable: token.lexeme, variable: token.lexeme,
})); }));
} }
assignments.insert(token.lexeme, try!(self.expression())); assignments.insert(token.lexeme, try!(self.expression(false)));
assignment_tokens.insert(token.lexeme, token); assignment_tokens.insert(token.lexeme, token);
} else { } else {
if let Some(recipe) = recipes.remove(token.lexeme) { if let Some(recipe) = recipes.remove(token.lexeme) {

View File

@ -12,8 +12,11 @@ fn tokenize_success(text: &str, expected_summary: &str) {
s += t.lexeme; s += t.lexeme;
s s
}).collect::<Vec<_>>().join(""); }).collect::<Vec<_>>().join("");
let summary = token_summary(&tokens);
if summary != expected_summary {
panic!("token summary mismatch:\nexpected: {}\ngot: {}\n", expected_summary, summary);
}
assert_eq!(text, roundtrip); assert_eq!(text, roundtrip);
assert_eq!(token_summary(&tokens), expected_summary);
} }
fn tokenize_error(text: &str, expected: Error) { fn tokenize_error(text: &str, expected: Error) {
@ -32,10 +35,10 @@ fn tokenize_error(text: &str, expected: Error) {
fn token_summary(tokens: &[Token]) -> String { fn token_summary(tokens: &[Token]) -> String {
tokens.iter().map(|t| { tokens.iter().map(|t| {
match t.class { match t.class {
super::TokenKind::Line{..} => "*", super::TokenKind::Line{..} => "^",
super::TokenKind::Name => "N", super::TokenKind::Name => "N",
super::TokenKind::Colon => ":", super::TokenKind::Colon => ":",
super::TokenKind::StringToken => "\"", super::TokenKind::StringToken => "'",
super::TokenKind::Plus => "+", super::TokenKind::Plus => "+",
super::TokenKind::Equals => "=", super::TokenKind::Equals => "=",
super::TokenKind::Comment{..} => "#", super::TokenKind::Comment{..} => "#",
@ -50,6 +53,7 @@ fn token_summary(tokens: &[Token]) -> String {
}).collect::<Vec<_>>().join("") }).collect::<Vec<_>>().join("")
} }
/*
fn parse_success(text: &str) -> Justfile { fn parse_success(text: &str) -> Justfile {
match super::parse(text) { match super::parse(text) {
Ok(justfile) => justfile, Ok(justfile) => justfile,
@ -80,15 +84,61 @@ fn parse_error(text: &str, expected: Error) {
panic!("Expected {:?} but parse succeeded", expected.kind); panic!("Expected {:?} but parse succeeded", expected.kind);
} }
} }
*/
#[test] #[test]
fn tokenize() { fn tokenize_recipe_interpolation_eol() {
let text = "foo:
{{hello}}
";
tokenize_success(text, "N:$>^{N}$<.");
}
#[test]
fn tokenize_recipe_interpolation_eof() {
let text = "foo:
{{hello}}";
tokenize_success(text, "N:$>^{N}<.");
}
#[test]
fn tokenize_recipe_complex_interpolation_expression() {
let text = "foo:\n {{a + b + \"z\" + blarg}}";
tokenize_success(text, "N:$>^{N+N+'+N}<.");
}
#[test]
fn tokenize_recipe_multiple_interpolations() {
let text = "foo:\n {{a}}0{{b}}1{{c}}";
tokenize_success(text, "N:$>^{N}_{N}_{N}<.");
}
#[test]
fn tokenize_junk() {
let text = "bob let text = "bob
hello blah blah blah : a b c #whatever hello blah blah blah : a b c #whatever
"; ";
tokenize_success(text, "N$$NNNN:NNN#$."); tokenize_success(text, "N$$NNNN:NNN#$.");
}
#[test]
fn tokenize_empty_lines() {
let text = "
hello:
asdf
bsdf
csdf
dsdf
";
tokenize_success(text, "$N:$>^_$^_$$^_$$^_$<.");
}
#[test]
fn tokenize_multiple() {
let text = " let text = "
hello: hello:
a a
@ -102,14 +152,17 @@ bob:
frank frank
"; ";
tokenize_success(text, "$N:$>*$*$$*$$*$$<N:$>*$<."); tokenize_success(text, "$N:$>^_$^_$$^_$$^_$$<N:$>^_$<.");
}
#[test]
fn tokenize_comment() {
tokenize_success("a:=#", "N:=#.") tokenize_success("a:=#", "N:=#.")
} }
/*
#[test] #[test]
fn inconsistent_leading_whitespace() { fn tokenize_space_then_tab() {
let text = "a: let text = "a:
0 0
1 1
@ -123,7 +176,10 @@ fn inconsistent_leading_whitespace() {
width: None, width: None,
kind: ErrorKind::InconsistentLeadingWhitespace{expected: " ", found: "\t"}, kind: ErrorKind::InconsistentLeadingWhitespace{expected: " ", found: "\t"},
}); });
}
#[test]
fn tokenize_tabs_then_tab_space() {
let text = "a: let text = "a:
\t\t0 \t\t0
\t\t 1 \t\t 1
@ -138,7 +194,6 @@ fn inconsistent_leading_whitespace() {
kind: ErrorKind::InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "}, kind: ErrorKind::InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "},
}); });
} }
*/
#[test] #[test]
fn outer_shebang() { fn outer_shebang() {
@ -166,6 +221,7 @@ fn unknown_start_of_token() {
}); });
} }
/*
#[test] #[test]
fn parse_empty() { fn parse_empty() {
parse_summary(" parse_summary("
@ -176,7 +232,6 @@ fn parse_empty() {
", ""); ", "");
} }
/*
#[test] #[test]
fn parse_complex() { fn parse_complex() {
parse_summary(" parse_summary("
@ -205,7 +260,6 @@ x:
y: y:
z:"); z:");
} }
*/
#[test] #[test]
fn parse_assignments() { fn parse_assignments() {
@ -402,7 +456,6 @@ fn write_or() {
assert_eq!("1, 2, 3, or 4", super::Or(&[1,2,3,4]).to_string()); assert_eq!("1, 2, 3, or 4", super::Or(&[1,2,3,4]).to_string());
} }
/*
#[test] #[test]
fn run_shebang() { fn run_shebang() {
// this test exists to make sure that shebang recipes // this test exists to make sure that shebang recipes
@ -429,9 +482,7 @@ a:
other => panic!("expected an code run error, but got: {}", other), other => panic!("expected an code run error, but got: {}", other),
} }
} }
*/
/*
#[test] #[test]
fn run_order() { fn run_order() {
let tmp = tempdir::TempDir::new("run_order").unwrap_or_else(|err| panic!("tmpdir: failed to create temporary directory: {}", err)); let tmp = tempdir::TempDir::new("run_order").unwrap_or_else(|err| panic!("tmpdir: failed to create temporary directory: {}", err));
@ -452,7 +503,6 @@ c: b
super::std::env::set_current_dir(path).expect("failed to set current directory"); super::std::env::set_current_dir(path).expect("failed to set current directory");
parse_success(text).run(&["a", "d"]).unwrap(); parse_success(text).run(&["a", "d"]).unwrap();
} }
*/
#[test] #[test]
fn unknown_recipes() { fn unknown_recipes() {
@ -462,7 +512,6 @@ fn unknown_recipes() {
} }
} }
/*
#[test] #[test]
fn code_error() { fn code_error() {
match parse_success("fail:\n @function x { return 100; }; x").run(&["fail"]).unwrap_err() { match parse_success("fail:\n @function x { return 100; }; x").run(&["fail"]).unwrap_err() {
@ -473,9 +522,7 @@ fn code_error() {
other @ _ => panic!("expected a code run error, but got: {}", other), other @ _ => panic!("expected a code run error, but got: {}", other),
} }
} }
*/
/*
#[test] #[test]
fn extra_whitespace() { fn extra_whitespace() {
// we might want to make extra leading whitespace a line continuation in the future, // we might want to make extra leading whitespace a line continuation in the future,
@ -493,14 +540,13 @@ fn extra_whitespace() {
// extra leading whitespace is okay in a shebang recipe // extra leading whitespace is okay in a shebang recipe
parse_success("a:\n #!\n print(1)"); parse_success("a:\n #!\n print(1)");
} }
*/
#[test] #[test]
fn bad_recipe_names() { fn bad_recipe_names() {
// We are extra strict with names. Although the tokenizer // We are extra strict with names. Although the tokenizer
// will tokenize anything that matches /[a-zA-Z0-9_-]+/ // will tokenize anything that matches /[a-zA-Z0-9_-]+/
// as a name, we throw an error if names do not match // as a name, we throw an error if names do not match
// /[a-z](-?[a-z])*/. This is to support future expansion // / [a-z](-?[a-z])* /. This is to support future expansion
// of justfile and command line syntax. // of justfile and command line syntax.
fn bad_name(text: &str, name: &str, index: usize, line: usize, column: usize) { fn bad_name(text: &str, name: &str, index: usize, line: usize, column: usize) {
parse_error(text, Error { parse_error(text, Error {
@ -525,7 +571,6 @@ fn bad_recipe_names() {
bad_name("a:\nZ:", "Z", 3, 1, 0); bad_name("a:\nZ:", "Z", 3, 1, 0);
} }
/*
#[test] #[test]
fn bad_interpolation_variable_name() { fn bad_interpolation_variable_name() {
let text = "a:\n echo {{hello--hello}}"; let text = "a:\n echo {{hello--hello}}";
@ -538,9 +583,7 @@ fn bad_interpolation_variable_name() {
kind: ErrorKind::BadInterpolationVariableName{recipe: "a", text: "hello--hello"} kind: ErrorKind::BadInterpolationVariableName{recipe: "a", text: "hello--hello"}
}); });
} }
*/
/*
#[test] #[test]
fn unclosed_interpolation_delimiter() { fn unclosed_interpolation_delimiter() {
let text = "a:\n echo {{"; let text = "a:\n echo {{";
@ -553,7 +596,6 @@ fn unclosed_interpolation_delimiter() {
kind: ErrorKind::UnclosedInterpolationDelimiter, kind: ErrorKind::UnclosedInterpolationDelimiter,
}); });
} }
*/
#[test] #[test]
fn unknown_expression_variable() { fn unknown_expression_variable() {
@ -570,7 +612,6 @@ fn unknown_expression_variable() {
#[test] #[test]
fn unknown_interpolation_variable() { fn unknown_interpolation_variable() {
/*
let text = "x:\n {{ hello}}"; let text = "x:\n {{ hello}}";
parse_error(text, Error { parse_error(text, Error {
text: text, text: text,
@ -580,17 +621,15 @@ fn unknown_interpolation_variable() {
width: Some(5), width: Some(5),
kind: ErrorKind::UnknownVariable{variable: "hello"}, kind: ErrorKind::UnknownVariable{variable: "hello"},
}); });
*/
/* // let text = "x:\n echo\n {{ lol }}";
let text = "x:\n echo\n {{ lol }}"; // parse_error(text, Error {
parse_error(text, Error { // text: text,
text: text, // index: 11,
index: 11, // line: 2,
line: 2, // column: 2,
column: 2, // width: Some(3),
width: Some(3), // kind: ErrorKind::UnknownVariable{variable: "lol"},
kind: ErrorKind::UnknownVariable{variable: "lol"}, // });
});
*/
} }
*/