From 913bcba5f7562524d354c7ff5a28ebb812423c00 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Sat, 22 Oct 2016 23:18:26 -0700 Subject: [PATCH] Starting to rewrite the parser --- Cargo.lock | 7 + Cargo.toml | 1 + notes | 20 ++ src/lib.rs | 756 ++++++++++++++++----------------------------------- src/tests.rs | 96 ++++++- 5 files changed, 351 insertions(+), 529 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a6cba6c..fdf1424 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,6 +3,7 @@ name = "j" version = "0.2.0" dependencies = [ "clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -49,6 +50,11 @@ dependencies = [ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "lazy_static" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "libc" version = "0.2.16" @@ -163,6 +169,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" "checksum clap 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5fa304b03c49ccbb005784fc26e985b5d2310b1d37f2c311ce90dbcd18ea5fde" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f" "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" diff --git a/Cargo.toml b/Cargo.toml index aae9cb4..f03f9e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,4 @@ homepage = "https://github.com/casey/j" regex = "^0.1.77" clap = "^2.0.0" tempdir = "^0.3.5" +lazy_static = "^0.2.1" diff --git a/notes b/notes index b146d79..3096067 100644 --- a/notes +++ b/notes @@ -5,11 +5,31 @@ notes - parse lines into fragments and store in recipe - positional error messages - use clippy +- document everything, including internal stuff +- spam in rust irc chat when done +- use "kind" instead of class +- should i use // comments, since that's what's used in rust? +- vim and emacs syntax hilighting +- gah, maybe I should change it back to 'just' + . makes more sense as a name + . suggest j as alias + . should see if people are using 'j' + . doesn't conflict with autojmp - allow calling recipes in a justfile in a different directory: - ../foo # ../justfile:foo - xyz/foo # xyz/justfile:foo - #![deny(missing_docs)] + // error on tab after space + // error on mixed leading whitespace + // error on inconsistent leading whitespace + // outer shebang + // strict recipe name checking, be lenient in tokenizing + // but strict in parsing + // duplicate recipe name error + // duplicate dependency error + // differentiate shebang and non-shebang recipe + // resolve each recipe after parsing j: - vector of substitutions diff --git a/src/lib.rs b/src/lib.rs index 1ed4baf..49ec7c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,8 @@ #[cfg(test)] mod tests; +#[macro_use] +extern crate lazy_static; extern crate regex; extern crate tempdir; @@ -48,7 +50,7 @@ pub struct Recipe<'a> { line_number: usize, label: &'a str, name: &'a str, - leading_whitespace: &'a str, + // leading_whitespace: &'a str, lines: Vec<&'a str>, // fragments: Vec>>, // variables: BTreeSet<&'a str>, @@ -181,6 +183,7 @@ impl<'a> Recipe<'a> { } } +/* fn resolve<'a>( text: &'a str, recipes: &BTreeMap<&str, Recipe<'a>>, @@ -218,51 +221,58 @@ fn resolve<'a>( stack.pop(); Ok(()) } +*/ -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub struct Error<'a> { - text: &'a str, - line: usize, - kind: ErrorKind<'a> + text: &'a str, + index: usize, + line: usize, + column: usize, + kind: ErrorKind<'a>, } #[derive(Debug, PartialEq)] enum ErrorKind<'a> { - BadRecipeName{name: &'a str}, - CircularDependency{circle: Vec<&'a str>}, - DuplicateDependency{name: &'a str}, - DuplicateArgument{recipe: &'a str, argument: &'a str}, - DuplicateRecipe{first: usize, name: &'a str}, - TabAfterSpace{whitespace: &'a str}, - MixedLeadingWhitespace{whitespace: &'a str}, - ExtraLeadingWhitespace, + // BadRecipeName{name: &'a str}, + // CircularDependency{circle: Vec<&'a str>}, + // DuplicateDependency{name: &'a str}, + // DuplicateArgument{recipe: &'a str, argument: &'a str}, + // DuplicateRecipe{first: usize, name: &'a str}, + // TabAfterSpace{whitespace: &'a str}, + // MixedLeadingWhitespace{whitespace: &'a str}, + // ExtraLeadingWhitespace, InconsistentLeadingWhitespace{expected: &'a str, found: &'a str}, OuterShebang, - NonLeadingShebang{recipe: &'a str}, - UnknownDependency{name: &'a str, unknown: &'a str}, - Unparsable, - UnparsableDependencies, + // NonLeadingShebang{recipe: &'a str}, + // UnknownDependency{name: &'a str, unknown: &'a str}, + // Unparsable, + // UnparsableDependencies, UnknownStartOfToken, + InternalError{message: String}, } -fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>) - -> Error<'a> -{ - Error { - text: text, - line: line, - kind: kind, - } -} +// fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>) +// -> Error<'a> +// { +// Error { +// text: text, +// line: line, +// kind: kind, +// } +// } fn show_whitespace(text: &str) -> String { text.chars().map(|c| match c { '\t' => 't', ' ' => 's', _ => c }).collect() } +/* fn mixed(text: &str) -> bool { !(text.chars().all(|c| c == ' ') || text.chars().all(|c| c == '\t')) } +*/ +/* fn tab_after_space(text: &str) -> bool { let mut space = false; for c in text.chars() { @@ -276,66 +286,70 @@ fn tab_after_space(text: &str) -> bool { } return false; } +*/ impl<'a> Display for Error<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { try!(write!(f, "justfile:{}: ", self.line)); match self.kind { - ErrorKind::BadRecipeName{name} => { - try!(writeln!(f, "recipe name does not match /[a-z](-[a-z]|[a-z])*/: {}", name)); - } - ErrorKind::CircularDependency{ref circle} => { - try!(write!(f, "circular dependency: {}", circle.join(" -> "))); - return Ok(()); - } - ErrorKind::DuplicateArgument{recipe, argument} => { - try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument)); - } - ErrorKind::DuplicateDependency{name} => { - try!(writeln!(f, "duplicate dependency: {}", name)); - } - ErrorKind::DuplicateRecipe{first, name} => { - try!(write!(f, "duplicate recipe: {} appears on lines {} and {}", - name, first, self.line)); - return Ok(()); - } - ErrorKind::TabAfterSpace{whitespace} => { - try!(writeln!(f, "found tab after space: {}", show_whitespace(whitespace))); - } - ErrorKind::MixedLeadingWhitespace{whitespace} => { - try!(writeln!(f, - "inconsistant leading whitespace: recipe started with {}:", - show_whitespace(whitespace) - )); - } - ErrorKind::ExtraLeadingWhitespace => { - try!(writeln!(f, "line has extra leading whitespace")); - } + // ErrorKind::BadRecipeName{name} => { + // try!(writeln!(f, "recipe name does not match /[a-z](-[a-z]|[a-z])*/: {}", name)); + // } + // ErrorKind::CircularDependency{ref circle} => { + // try!(write!(f, "circular dependency: {}", circle.join(" -> "))); + // return Ok(()); + // } + // ErrorKind::DuplicateArgument{recipe, argument} => { + // try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument)); + //} + // ErrorKind::DuplicateDependency{name} => { + // try!(writeln!(f, "duplicate dependency: {}", name)); + // } + // ErrorKind::DuplicateRecipe{first, name} => { + // try!(write!(f, "duplicate recipe: {} appears on lines {} and {}", + // name, first, self.line)); + // return Ok(()); + // } + // ErrorKind::TabAfterSpace{whitespace} => { + // try!(writeln!(f, "found tab after space: {}", show_whitespace(whitespace))); + // } + // ErrorKind::MixedLeadingWhitespace{whitespace} => { + // try!(writeln!(f, + // "inconsistant leading whitespace: recipe started with {}:", + // show_whitespace(whitespace) + // )); + // } + // ErrorKind::ExtraLeadingWhitespace => { + // try!(writeln!(f, "line has extra leading whitespace")); + // } ErrorKind::InconsistentLeadingWhitespace{expected, found} => { try!(writeln!(f, - "inconsistant leading whitespace: recipe started with {} but found line with {}:", + "inconsistant leading whitespace: recipe started with \"{}\" but found line with \"{}\":", show_whitespace(expected), show_whitespace(found) )); } ErrorKind::OuterShebang => { try!(writeln!(f, "a shebang \"#!\" is reserved syntax outside of recipes")) } - ErrorKind::NonLeadingShebang{..} => { - try!(writeln!(f, "a shebang \"#!\" may only appear on the first line of a recipe")) - } - ErrorKind::UnknownDependency{name, unknown} => { - try!(writeln!(f, "recipe {} has unknown dependency {}", name, unknown)); - } - ErrorKind::Unparsable => { - try!(writeln!(f, "could not parse line:")); - } - ErrorKind::UnparsableDependencies => { - try!(writeln!(f, "could not parse dependencies:")); - } + // ErrorKind::NonLeadingShebang{..} => { + // try!(writeln!(f, "a shebang \"#!\" may only appear on the first line of a recipe")) + //} + // ErrorKind::UnknownDependency{name, unknown} => { + // try!(writeln!(f, "recipe {} has unknown dependency {}", name, unknown)); + // } + // ErrorKind::Unparsable => { + // try!(writeln!(f, "could not parse line:")); + // } + // ErrorKind::UnparsableDependencies => { + // try!(writeln!(f, "could not parse dependencies:")); + // } ErrorKind::UnknownStartOfToken => { try!(writeln!(f, "uknown start of token:")); } + ErrorKind::InternalError{ref message} => { + try!(writeln!(f, "internal error, this may indicate a bug in j: {}\n consider filing an issue: https://github.com/casey/j/issues/new", message)); + } } match self.text.lines().nth(self.line) { @@ -454,14 +468,26 @@ impl<'a> Display for RunError<'a> { } struct Token<'a> { - // index: usize, + index: usize, line: usize, - // col: usize, + column: usize, prefix: &'a str, lexeme: &'a str, class: TokenClass, } +impl<'a> Token<'a> { + fn error(&self, text: &'a str, kind: ErrorKind<'a>) -> Error<'a> { + Error { + text: text, + index: self.index, + line: self.line, + column: self.column, + kind: kind, + } + } +} + #[derive(Debug, PartialEq, Clone, Copy)] enum TokenClass { Name, @@ -486,86 +512,78 @@ fn token(pattern: &str) -> Regex { } fn tokenize(text: &str) -> Result, Error> { - let name_re = token(r"[a-z]((_|-)?[a-z0-9])*"); - let colon_re = token(r":" ); - let equals_re = token(r"=" ); - let comment_re = token(r"#([^!].*)?$" ); - //let shebang_re = token(r"#!" ); - let eol_re = token(r"\n|\r\n" ); - let eof_re = token(r"(?-m)$" ); - //let line_re = token(r"[^\n\r]" ); - - //let split_re = re("(?m)$"); - //let body_re = re(r"^(?ms)(.*?$)\s*(^[^ \t\r\n]|(?-m:$))"); - // let dedent_re = re(r"^(?m)\s*(^[^\s]|(?-m:$))"); - - let line_re = re(r"^(?m)[ \t]+[^ \t\n\r].*$"); - - /* - #[derive(PartialEq)] - enum State<'a> { - Normal, // starting state - Colon, // we have seen a colon since the last eol - Recipe, // we are on the line after a colon - Body{indent: &'a str}, // we are in a recipe body + lazy_static! { + static ref EOF: Regex = token(r"(?-m)$" ); + static ref NAME: Regex = token(r"[a-z]((_|-)?[a-z0-9])*"); + static ref COLON: Regex = token(r":" ); + static ref EQUALS: Regex = token(r"=" ); + static ref COMMENT: Regex = token(r"#([^!].*)?$" ); + static ref EOL: Regex = token(r"\n|\r\n" ); + static ref LINE: Regex = re(r"^(?m)[ \t]+[^ \t\n\r].*$"); + static ref INDENT: Regex = re(r"^([ \t]*)[^ \t\n\r]" ); } - */ - - // state is: - // beginning of line or not - // current indent fn indentation(text: &str) -> Option<&str> { - // fix this so it isn't recompiled every time - let indent_re = re(r"^([ \t]*)[^ \t\n\r]"); - indent_re.captures(text).map(|captures| captures.at(1).unwrap()) + INDENT.captures(text).map(|captures| captures.at(1).unwrap()) } - let mut tokens = vec![]; - let mut rest = text; - // let mut index = 0; - let mut line = 0; - let mut col = 0; + let mut tokens = vec![]; + let mut rest = text; + let mut index = 0; + let mut line = 0; + let mut column = 0; let mut indent: Option<&str> = None; - // let mut line = 0; - // let mut col = 0; - // let mut state = State::Normal; - // let mut line_start = true; + + macro_rules! error { + ($kind:expr) => {{ + Err(Error { + text: text, + index: index, + line: line, + column: column, + kind: $kind, + }) + }}; + } + loop { - if col == 0 { + if column == 0 { if let Some(class) = match (indent, indentation(rest)) { - // dedent - (Some(_), Some("")) => { - indent = None; - Some(Dedent) - } + // ignore: was no indentation and there still isn't (None, Some("")) => { None } - // indent + // ignore: current line is blank + (_, None) => { + None + } + // indent: was no indentation, now there is (None, Some(current @ _)) => { // check mixed leading whitespace indent = Some(current); Some(Indent) } - (Some(previous), Some(current @ _)) => { + // dedent: there was indentation and now there isn't + (Some(_), Some("")) => { + indent = None; + Some(Dedent) + } + // was indentation and still is, check if the new indentation matches + (Some(previous), Some(current)) => { if !current.starts_with(previous) { - return Err(error(text, line, - ErrorKind::InconsistentLeadingWhitespace{expected: previous, found: current} - )); + return error!(ErrorKind::InconsistentLeadingWhitespace{ + expected: previous, + found: current + }); } None // check tabs after spaces } - // ignore - _ => { - None - } } { tokens.push(Token { - // index: index, + index: index, line: line, - // col: col, + column: column, prefix: "", lexeme: "", class: class, @@ -574,159 +592,39 @@ fn tokenize(text: &str) -> Result, Error> { } let (prefix, lexeme, class) = - if let (0, Some(indent), Some(captures)) = (col, indent, line_re.captures(rest)) { + if let (0, Some(indent), Some(captures)) = (column, indent, LINE.captures(rest)) { let line = captures.at(0).unwrap(); if !line.starts_with(indent) { - panic!("Line did not start with expected indentation"); + return error!(ErrorKind::InternalError{message: "unexpected indent".to_string()}); } let (prefix, lexeme) = line.split_at(indent.len()); (prefix, lexeme, Line) - } else if let Some(captures) = name_re.captures(rest) { + } else if let Some(captures) = NAME.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Name) - } else if let Some(captures) = eol_re.captures(rest) { + } else if let Some(captures) = EOL.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Eol) - } else if let Some(captures) = eof_re.captures(rest) { + } else if let Some(captures) = EOF.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Eof) - } else if let Some(captures) = colon_re.captures(rest) { + } else if let Some(captures) = COLON.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Colon) - } else if let Some(captures) = equals_re.captures(rest) { + } else if let Some(captures) = EQUALS.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Equals) - } else if let Some(captures) = comment_re.captures(rest) { + } else if let Some(captures) = COMMENT.captures(rest) { (captures.at(1).unwrap(), captures.at(2).unwrap(), Comment) } else { - return Err(if rest.starts_with("#!") { - error(text, line, ErrorKind::OuterShebang) + return if rest.starts_with("#!") { + error!(ErrorKind::OuterShebang) } else { - error(text, line, ErrorKind::UnknownStartOfToken) - }); + error!(ErrorKind::UnknownStartOfToken) + }; }; - - - // let (captures, class) = if let (0, Some(captures)) = line_re.captures(rest) { - - /* - */ - - /* - if state == State::Recipe { - let captures = indent_re.captures(rest).unwrap(); - let indent = captures.at(1).unwrap(); - let text = captures.at(2).unwrap(); - if indent != "" && text != "" { - tokens.push(Token { - index: index, - prefix: "", - lexeme: "", - class: TokenClass::Indent, - }); - state = State::Body{indent: indent}; - } else { - state = State::Normal; - } - } - */ - /* - State::Body{indent: _} => { - if let Some(captures) = body_re.captures(rest) { - let body_text = captures.at(1).unwrap(); - for mut line in split_re.split(body_text) { - if let Some(captures) = line_re.captures(line) { - let len = captures.at(0).unwrap().len(); - tokens.push(Token { - index: index, - prefix: captures.at(1).unwrap(), - lexeme: captures.at(2).unwrap(), - class: TokenClass::Eol, - }); - line = &line[len..]; - } - println!("{:?}", line); - } - - panic!("matched body: {}", captures.at(1).unwrap()); - - - // split the body into lines - // for each line in the body, push a line if nonblank, then an eol - // push a dedent - } - }, - */ - // State::Normal | State::Colon | State::Body{..} => { - /* - let (captures, class) = if let Some(captures) = eol_re.captures(rest) { - (captures, TokenClass::Eol) - } else if let State::Body{indent} = state { - if dedent_re.is_match(rest) { - tokens.push(Token { - index: index, - prefix: "", - lexeme: "", - class: TokenClass::Dedent, - }); - state = State::Normal; - continue - } - - if let Some(captures) = line_re.captures(rest) { - (captures, TokenClass::Line) - } else { - panic!("Failed to match a line"); - } - } else if let Some(captures) = anchor_re.captures(rest) { - (captures, TokenClass::Anchor) - } else if let Some(captures) = name_re.captures(rest) { - (captures, TokenClass::Name) - } else if let Some(captures) = colon_re.captures(rest) { - (captures, TokenClass::Colon) - } else if let Some(captures) = comment_re.captures(rest) { - let text = captures.at(3).unwrap_or(""); - (captures, TokenClass::Comment{text: text}) - } else if let Some(captures) = eof_re.captures(rest) { - (captures, TokenClass::Eof) - } else { - panic!("Did not match a token! Rest: {}", rest); - }; - */ - - // let (captures, class) = if let (true, Some(captures)) = (line_start, - - // let all = captures.at(0).unwrap(); - // let prefix = captures.at(1).unwrap(); - // let lexeme = captures.at(2).unwrap(); - // let len = all.len(); - // let eof = class == TokenClass::Eof; - //assert!(eof || lexeme.len() > 0); - //assert!(all.len() > 0); - //assert!(prefix.len() + lexeme.len() == len); - - /* - if class == TokenClass::Colon { - state = State::Colon; - } else if class == TokenClass::Eol && state == State::Colon { - state = State::Recipe; - } - */ - - - /* - if class == TokenClass::Eol { - row += 1; - col = 0; - } else { - col += len; - } - - let eof = TokenClass::Eof { - } - */ let len = prefix.len() + lexeme.len(); tokens.push(Token { - // index: index, - line: line, - // col: col, + index: index, + line: line, + column: column, prefix: prefix, lexeme: lexeme, class: class, @@ -735,115 +633,44 @@ fn tokenize(text: &str) -> Result, Error> { match tokens.last().unwrap().class { Eol => { line += 1; - col = 0; + column = 0; }, Eof => { break; }, _ => { - col += len; + column += len; } } rest = &rest[len..]; - // index += len; + index += len; } Ok(tokens) } -/* -struct Parser<'a, I> { - tokens: Vec>, - index: usize, -} -*/ - -//impl<'a> Parser<'a> { - /* - fn peek(&mut self) -> TokenClass { - self.tokens[self.index].class - } - - fn advance(&mut self) { - self.index += 1; - } - - fn accept_eol(&mut self) -> bool { - if self.accept(TokenClass::Comment) { - self.expect(TokenClass::Eol); - true - } else - } - */ - - /* - fn accept(&mut self, class: TokenClass) -> bool { - if self.tokens[self.index].class == class { - self.index += 1; - true - } else { - false - } - } - */ - - /* - fn peek(&mut self) -> Option { - self.tokens.get(self.index).map(|t| t.class) - } - - fn file(mut self) -> Result, Error<'a>> { - let recipes = BTreeMap::new(); - - loop { - let ref current = self.tokens[self.index]; - self.index += 1; - - match current.class { - TokenClass::Eof => break, - TokenClass::Comment => continue, - TokenClass::Eol => continue, - TokenClass::Name => { - match self.peek() { - Some(TokenClass::Name) | Some(TokenClass::Colon) => { - panic!("time to parse a recipe"); - } - Some(TokenClass::Equals) => { - panic!("time to parse an assignment"); - } - Some(unexpected @ _) => { - panic!("unexpected token"); - } - None => { - panic!("unexpected end of token stream"); - } - } - } - unexpected @ _ => { - panic!("unexpected token at top level"); - } - } - } - - Ok(Justfile{recipes: recipes}) - } -} -*/ - -// struct Parser<'a, I> where I: std::iter::Iterator> { -// tokens: std::iter::Peekable, -// } - -struct Parser<'i, 't: 'i> { - text: &'t str, - tokens: &'i mut std::iter::Peekable>> +pub fn parse<'a>(text: &'a str) -> Result { + let tokens = try!(tokenize(text)); + let filtered: Vec<_> = tokens.into_iter().filter(|t| t.class != Comment).collect(); + let parser = Parser{ + text: text, + tokens: filtered.into_iter().peekable() + }; + let justfile = try!(parser.file()); + Ok(justfile) } -impl<'i, 't> Parser<'i, 't> { - fn accept(&mut self, class: TokenClass) -> Option<&Token<'t>> { - if self.tokens.peek().unwrap().class == class { - Some(self.tokens.next().unwrap()) +struct Parser<'a> { + text: &'a str, + tokens: std::iter::Peekable>> +} + +impl<'a> Parser<'a> { + /* + fn accept(&mut self, class: TokenClass) -> Option> { + if self.peek(class) { + self.tokens.next() } else { None } @@ -853,30 +680,28 @@ impl<'i, 't> Parser<'i, 't> { self.accept(class).is_some() } + fn peek(&mut self, class: TokenClass) -> bool { + self.tokens.peek().unwrap().class == class + } + */ + + /* + fn expect(&mut self, class: TokenClass) { if !self.accepted(class) { panic!("we fucked"); } } + */ - fn peek(&mut self, class: TokenClass) -> bool { - self.tokens.peek().unwrap().class == class - } + /* - fn accept_eol(&mut self) -> bool { - if self.accepted(Comment) { - if !self.peek(Eof) { self.expect(Eol) }; - true - } else { - self.accepted(Eol) - } - } // fn accept(&mut self) -> Result, Error<'t>> { // match self.peek( // } - fn recipe(&mut self, name: &'t str) -> Result, Error<'t>> { + fn recipe(&mut self, name: &'a str) -> Result, Error<'a>> { let mut arguments = vec![]; loop { if let Some(name_token) = self.accept(Name) { @@ -896,8 +721,9 @@ impl<'i, 't> Parser<'i, 't> { loop { if let Some(name_token) = self.accept(Name) { if dependencies.contains(&name_token.lexeme) { - return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{ - name: name_token.lexeme})); + panic!("duplicate dependency"); + // return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{ + // name: name_token.lexeme})); } dependencies.push(name_token.lexeme); } else { @@ -913,23 +739,51 @@ impl<'i, 't> Parser<'i, 't> { // Ok(Recipe{ // }) } + */ - fn file(mut self) -> Result, Error<'t>> { - let mut recipes = BTreeMap::new(); + fn error(self, token: &Token<'a>, kind: ErrorKind<'a>) -> Error<'a> { + token.error(self.text, kind) + } + fn file(mut self) -> Result, Error<'a>> { + let recipes = BTreeMap::new(); + + loop { + match self.tokens.next() { + Some(token) => match token.class { + Eof => break, + Eol => continue, + _ => return Err(self.error(&token, ErrorKind::InternalError { + message: format!("unhandled token class: {:?}", token.class) + })), + }, + None => return Err(Error { + text: self.text, + index: 0, + line: 0, + column: 0, + kind: ErrorKind::InternalError { + message: "unexpected end of token stream".to_string() + } + }), + } + } + + /* loop { if self.accepted(Eof) { break; } if self.accept_eol() { continue; } match self.tokens.next() { - Some(&Token{class: Name, line, lexeme: name, ..}) => { + Some(Token{class: Name, lexeme: name, ..}) => { if self.accepted(Equals) { panic!("Variable assignment not yet implemented"); } else { if recipes.contains_key(name) { - return Err(error(self.text, line, ErrorKind::DuplicateDependency{ - name: name, - })); + // return Err(error(self.text, line, ErrorKind::DuplicateDependency{ + // name: name, + // })); + panic!("duplicate dep"); } let recipe = try!(self.recipe(name)); recipes.insert(name, recipe); @@ -938,154 +792,14 @@ impl<'i, 't> Parser<'i, 't> { _ => panic!("got something else") }; } + */ - // assert that token.next() == None + if let Some(ref token) = self.tokens.next() { + return Err(self.error(token, ErrorKind::InternalError{ + message: format!("unexpected token remaining after parsing completed: {:?}", token.class) + })) + } Ok(Justfile{recipes: recipes}) } } - - -// impl<'a, I> Parser<'a, I> where I: std::iter::Iterator> { -// fn file(mut self) -> Result, Error<'a>> { -// Ok() -// } -// } - -pub fn parse<'a>(text: &'a str) -> Result { - let tokens = try!(tokenize(text)); - // let parser = Parser{tokens: tokens, index: 0}; - // try!(parser.file()); - - let parser = Parser{text: text, tokens: &mut tokens.iter().peekable()}; - try!(parser.file()); - - let shebang_re = re(r"^\s*#!(.*)$" ); - let comment_re = re(r"^\s*#([^!].*)?$" ); - let command_re = re(r"^(\s+).*$" ); - let blank_re = re(r"^\s*$" ); - let label_re = re(r"^([^#]*):(.*)$" ); - let name_re = re(r"^[a-z](-[a-z]|[a-z])*$"); - let whitespace_re = re(r"\s+" ); - - let mut recipes: BTreeMap<&'a str, Recipe<'a>> = BTreeMap::new(); - let mut current_recipe: Option = None; - for (i, line) in text.lines().enumerate() { - if blank_re.is_match(line) { - continue; - } - - if let Some(mut recipe) = current_recipe { - match command_re.captures(line) { - Some(captures) => { - let leading_whitespace = captures.at(1).unwrap(); - if tab_after_space(leading_whitespace) { - return Err(error(text, i, ErrorKind::TabAfterSpace{ - whitespace: leading_whitespace, - })); - } else if recipe.leading_whitespace == "" { - if mixed(leading_whitespace) { - return Err(error(text, i, ErrorKind::MixedLeadingWhitespace{ - whitespace: leading_whitespace - })); - } - recipe.leading_whitespace = leading_whitespace; - } else if !line.starts_with(recipe.leading_whitespace) { - return Err(error(text, i, ErrorKind::InconsistentLeadingWhitespace{ - expected: recipe.leading_whitespace, - found: leading_whitespace, - })); - } - recipe.lines.push(line.split_at(recipe.leading_whitespace.len()).1); - current_recipe = Some(recipe); - continue; - }, - None => { - recipes.insert(recipe.name, recipe); - current_recipe = None; - }, - } - } - - if comment_re.is_match(line) { - // ignore - } else if shebang_re.is_match(line) { - return Err(error(text, i, ErrorKind::OuterShebang)); - } else if let Some(captures) = label_re.captures(line) { - let name = captures.at(1).unwrap(); - if !name_re.is_match(name) { - return Err(error(text, i, ErrorKind::BadRecipeName { - name: name, - })); - } - if let Some(recipe) = recipes.get(name) { - return Err(error(text, i, ErrorKind::DuplicateRecipe { - first: recipe.line_number, - name: name, - })); - } - - let rest = captures.at(2).unwrap().trim(); - let mut dependencies = vec![]; - for part in whitespace_re.split(rest) { - if name_re.is_match(part) { - if dependencies.contains(&part) { - return Err(error(text, i, ErrorKind::DuplicateDependency{ - name: part, - })); - } - dependencies.push(part); - } else { - return Err(error(text, i, ErrorKind::UnparsableDependencies)); - } - } - - current_recipe = Some(Recipe{ - line_number: i, - label: line, - name: name, - leading_whitespace: "", - lines: vec![], - // fragments: vec![], - // variables: BTreeSet::new(), - // arguments: vec![], - dependencies: dependencies, - shebang: false, - }); - } else { - return Err(error(text, i, ErrorKind::Unparsable)); - } - } - - if let Some(recipe) = current_recipe { - recipes.insert(recipe.name, recipe); - } - - let leading_whitespace_re = re(r"^\s+"); - - for recipe in recipes.values_mut() { - for (i, line) in recipe.lines.iter().enumerate() { - let line_number = recipe.line_number + 1 + i; - if shebang_re.is_match(line) { - if i == 0 { - recipe.shebang = true; - } else { - return Err(error(text, line_number, ErrorKind::NonLeadingShebang{recipe: recipe.name})); - } - } - if !recipe.shebang && leading_whitespace_re.is_match(line) { - return Err(error(text, line_number, ErrorKind::ExtraLeadingWhitespace)); - } - } - } - - let mut resolved = HashSet::new(); - let mut seen = HashSet::new(); - let mut stack = vec![]; - - for (_, ref recipe) in &recipes { - try!(resolve(text, &recipes, &mut resolved, &mut seen, &mut stack, &recipe)); - } - - Ok(Justfile{recipes: recipes}) -} diff --git a/src/tests.rs b/src/tests.rs index e61d557..09d583f 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -37,13 +37,6 @@ fn check_recipe( assert_eq!(recipe.dependencies.iter().cloned().collect::>(), dependencies); } -fn expect_success(text: &str) -> Justfile { - match super::parse(text) { - Ok(justfile) => justfile, - Err(error) => panic!("Expected successful parse but got error {}", error), - } -} - #[test] fn circular_dependency() { expect_error("a: b\nb: a", 1, ErrorKind::CircularDependency{circle: vec!["a", "b", "a"]}); @@ -213,6 +206,8 @@ a: */ +use super::{Token, Error, ErrorKind, Justfile}; + fn tokenize_success(text: &str, expected_summary: &str) { let tokens = super::tokenize(text).unwrap(); let roundtrip = tokens.iter().map(|t| { @@ -225,7 +220,20 @@ fn tokenize_success(text: &str, expected_summary: &str) { assert_eq!(token_summary(&tokens), expected_summary); } -fn token_summary(tokens: &[super::Token]) -> String { +fn tokenize_error(text: &str, expected: Error) { + if let Err(error) = super::tokenize(text) { + assert_eq!(error.text, expected.text); + assert_eq!(error.index, expected.index); + assert_eq!(error.line, expected.line); + assert_eq!(error.column, expected.column); + assert_eq!(error.kind, expected.kind); + assert_eq!(error, expected); + } else { + panic!("tokenize() succeeded but expected: {}\n{}", expected, text); + } +} + +fn token_summary(tokens: &[Token]) -> String { tokens.iter().map(|t| { match t.class { super::TokenClass::Line{..} => "*", @@ -241,6 +249,13 @@ fn token_summary(tokens: &[super::Token]) -> String { }).collect::>().join("") } +fn parse_success(text: &str) -> Justfile { + match super::parse(text) { + Ok(justfile) => justfile, + Err(error) => panic!("Expected successful parse but got error {}", error), + } +} + #[test] fn tokenize() { let text = "bob @@ -263,4 +278,69 @@ bob: "; tokenize_success(text, "$N:$>*$*$$*$$*$$*$."); + + tokenize_success("a:=#", "N:=#.") +} + +#[test] +fn inconsistent_leading_whitespace() { + let text = "a: + 0 + 1 +\t2 +"; + tokenize_error(text, Error { + text: text, + index: 9, + line: 3, + column: 0, + kind: ErrorKind::InconsistentLeadingWhitespace{expected: " ", found: "\t"}, + }); + + let text = "a: +\t\t0 +\t\t 1 +\t 2 +"; + tokenize_error(text, Error { + text: text, + index: 12, + line: 3, + column: 0, + kind: ErrorKind::InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "}, + }); +} + +#[test] +fn outer_shebang() { + let text = "#!/usr/bin/env bash"; + tokenize_error(text, Error { + text: text, + index: 0, + line: 0, + column: 0, + kind: ErrorKind::OuterShebang + }); +} + +#[test] +fn unknown_start_of_token() { + let text = "~"; + tokenize_error(text, Error { + text: text, + index: 0, + line: 0, + column: 0, + kind: ErrorKind::UnknownStartOfToken + }); +} + +#[test] +fn parse() { + parse_success(" + +# hello + + + "); }