Giant fucking mess.

This commit is contained in:
Casey Rodarmor 2016-10-16 18:59:49 -07:00
parent 3f231fb5b0
commit 4c44096718
5 changed files with 690 additions and 5 deletions

34
grammar.txt Normal file
View File

@ -0,0 +1,34 @@
Justfile grammar is a little weird. Because of the freeform
nature of recipe bodies, we don't tokenize them with the
same rules as the rest of the justfile. Instead the
tokenizer will emit a INDENT at the beginning of a recipe
body, one or more LINEs, which match everything after the
INDENT whitespace, and a DEDENT at the end.
Thus the lexer is context sensitive, which is a little
gross.
tokens:
NAME = /[a-z]((_|-)?[a-z0-9])*/
EOL = /\n|\r\n/
COMMENT = /#[^!].*/
COLON = /:/
INDENT = emitted when indentation increases
DEDENT = emitted when indentation decreases
LINE = /.*/ only emitted between INDENT/DEDENT pairs, doesn't include INDENT whitespace
EOF = emitted at the end of input
grammar:
justfile = item* EOF
item = COMMENT
| recipe
| EOL
assignment = NAME EQUALS expression COMMENT? EOL
expression = STRING
recipe = NAME+ COLON NAME* EOL (INDENT LINE+ DEDENT)?

View File

@ -1,6 +1,9 @@
test:
cargo test --lib
cargo run -- quine clean > /dev/null 2> /dev/null
#cargo run -- quine clean > /dev/null 2> /dev/null
backtrace:
RUST_BACKTRACE=1 cargo test --lib
publish:
git push github master

67
notes
View File

@ -1,7 +1,27 @@
notes
-----
polyglot:
- parse arguments and store in recipe
- parse lines into fragments and store in recipe
- positional error messages
j:
- vector of substitutions
point to start, end, and &str which is name of variable
- also add a vector of substitutions
- indent for line continuation
- multiple names for short names are actually kind of nice
- multiple {{}} per line
- single assignment variables
- matched /{{.*?}}.*/ then unmatched /{{.*/
- echo subbed line
- static errors when variables are missing {{}}, even if recipe isn't run
- ignore comment lines
- post to facebook to get beta testers
- j user email list (how to engage users more generally?)
- see if dotbot guy likes it
- advertise on facebook to get users
- get the extracted script and test its structure
- can I add rust docs for the command/binary?
- change name to "a polyglot command runner"
@ -10,7 +30,52 @@ polyglot:
- publish to github and cargo
- spam facebook, reddit
- duplicate argument test
- should duplicate dependency mention recipe?
- get rid of panics
- doc comments on recipes
- in depth usage string with doc comments, args, dependencies
get rid of unused public items
tokenize error returns successfully parsed tokens
tokenize continues after parse error but inserts parse error into token stream
make sure regexes are only compiled once
fix grammar.txt to reflect reality
- create a really long example justfile
. unzip tarball
. update package manager deps
. clean
. update logs (repetitive git flow)
- full documentation
. habit of using clever commands and writing little scripts
. very low friction to write a script (no new file, chmod, add to rcs)
. make list of contributors, include travis
variable setting
variable substitution: {{}}
command line arguments: must be specified in recipe 'a foo bar:'
quote
arguments are subbed in with {{variable_name}}
doesn't conflict with shell syntax
doesn't conflict with jquery
conflicts a little bit with rust, but can be overcome
very common in many template languages
different ways of setting arguments:
- go for something like python, so we can use full python at the top level
- go for something like rust, so we can use rust at the top level
- don't do barewords, we need strings anyways, so parse them
- x = 10
- export x = 10
- export x
wishlist:
- ability to export environment variables
- preludes:
may be nice to allow all recipes in a given langauge to share
functions, variables, etc. could have a "prelude" recipe

View File

@ -50,10 +50,18 @@ pub struct Recipe<'a> {
name: &'a str,
leading_whitespace: &'a str,
lines: Vec<&'a str>,
fragments: Vec<Vec<Fragment<'a>>>,
variables: BTreeSet<&'a str>,
dependencies: Vec<&'a str>,
arguments: Vec<&'a str>,
shebang: bool,
}
enum Fragment<'a> {
Text{text: &'a str},
Variable{name: &'a str},
}
impl<'a> Display for Recipe<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
try!(writeln!(f, "{}", self.label));
@ -221,6 +229,7 @@ enum ErrorKind<'a> {
BadRecipeName{name: &'a str},
CircularDependency{circle: Vec<&'a str>},
DuplicateDependency{name: &'a str},
DuplicateArgument{recipe: &'a str, argument: &'a str},
DuplicateRecipe{first: usize, name: &'a str},
TabAfterSpace{whitespace: &'a str},
MixedLeadingWhitespace{whitespace: &'a str},
@ -231,6 +240,7 @@ enum ErrorKind<'a> {
UnknownDependency{name: &'a str, unknown: &'a str},
Unparsable,
UnparsableDependencies,
UnknownStartOfToken,
}
fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>)
@ -277,6 +287,9 @@ impl<'a> Display for Error<'a> {
try!(write!(f, "circular dependency: {}", circle.join(" -> ")));
return Ok(());
}
ErrorKind::DuplicateArgument{recipe, argument} => {
try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument));
}
ErrorKind::DuplicateDependency{name} => {
try!(writeln!(f, "duplicate dependency: {}", name));
}
@ -318,6 +331,9 @@ impl<'a> Display for Error<'a> {
ErrorKind::UnparsableDependencies => {
try!(writeln!(f, "could not parse dependencies:"));
}
ErrorKind::UnknownStartOfToken => {
try!(writeln!(f, "uknown start of token:"));
}
}
match self.text.lines().nth(self.line) {
@ -435,7 +451,513 @@ impl<'a> Display for RunError<'a> {
}
}
struct Token<'a> {
index: usize,
line: usize,
col: usize,
prefix: &'a str,
lexeme: &'a str,
class: TokenClass,
}
#[derive(Debug, PartialEq, Clone, Copy)]
enum TokenClass {
Name,
Colon,
Equals,
Comment,
Line,
Indent,
Dedent,
Eol,
Eof,
}
use TokenClass::*;
fn token(pattern: &str) -> Regex {
let mut s = String::new();
s += r"^(?m)([ \t]*)(";
s += pattern;
s += ")";
re(&s)
}
fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
let name_re = token(r"[a-z]((_|-)?[a-z0-9])*");
let colon_re = token(r":" );
let equals_re = token(r"=" );
let comment_re = token(r"#([^!].*)?$" );
//let shebang_re = token(r"#!" );
let eol_re = token(r"\n|\r\n" );
let eof_re = token(r"(?-m)$" );
//let line_re = token(r"[^\n\r]" );
//let split_re = re("(?m)$");
//let body_re = re(r"^(?ms)(.*?$)\s*(^[^ \t\r\n]|(?-m:$))");
// let dedent_re = re(r"^(?m)\s*(^[^\s]|(?-m:$))");
let line_re = re(r"^(?m)[ \t]+[^ \t\n\r].*$");
/*
#[derive(PartialEq)]
enum State<'a> {
Normal, // starting state
Colon, // we have seen a colon since the last eol
Recipe, // we are on the line after a colon
Body{indent: &'a str}, // we are in a recipe body
}
*/
// state is:
// beginning of line or not
// current indent
fn indentation(text: &str) -> Option<&str> {
// fix this so it isn't recompiled every time
let indent_re = re(r"^([ \t]*)[^ \t\n\r]");
indent_re.captures(text).map(|captures| captures.at(1).unwrap())
}
let mut tokens = vec![];
let mut rest = text;
let mut index = 0;
let mut line = 0;
let mut col = 0;
let mut indent: Option<&str> = None;
// let mut line = 0;
// let mut col = 0;
// let mut state = State::Normal;
// let mut line_start = true;
loop {
if col == 0 {
if let Some(class) = match (indent, indentation(rest)) {
// dedent
(Some(_), Some("")) => {
indent = None;
Some(Dedent)
}
(None, Some("")) => {
None
}
// indent
(None, Some(current @ _)) => {
// check mixed leading whitespace
indent = Some(current);
Some(Indent)
}
(Some(previous), Some(current @ _)) => {
if !current.starts_with(previous) {
return Err(error(text, line,
ErrorKind::InconsistentLeadingWhitespace{expected: previous, found: current}
));
}
None
// check tabs after spaces
}
// ignore
_ => {
None
}
} {
tokens.push(Token {
index: index,
line: line,
col: col,
prefix: "",
lexeme: "",
class: class,
});
}
}
let (prefix, lexeme, class) =
if let (0, Some(indent), Some(captures)) = (col, indent, line_re.captures(rest)) {
let line = captures.at(0).unwrap();
if !line.starts_with(indent) {
panic!("Line did not start with expected indentation");
}
let (prefix, lexeme) = line.split_at(indent.len());
(prefix, lexeme, Line)
} else if let Some(captures) = name_re.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Name)
} else if let Some(captures) = eol_re.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Eol)
} else if let Some(captures) = eof_re.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Eof)
} else if let Some(captures) = colon_re.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Colon)
} else if let Some(captures) = equals_re.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Equals)
} else if let Some(captures) = comment_re.captures(rest) {
(captures.at(1).unwrap(), captures.at(2).unwrap(), Comment)
} else {
return Err(if rest.starts_with("#!") {
error(text, line, ErrorKind::OuterShebang)
} else {
error(text, line, ErrorKind::UnknownStartOfToken)
});
};
// let (captures, class) = if let (0, Some(captures)) = line_re.captures(rest) {
/*
*/
/*
if state == State::Recipe {
let captures = indent_re.captures(rest).unwrap();
let indent = captures.at(1).unwrap();
let text = captures.at(2).unwrap();
if indent != "" && text != "" {
tokens.push(Token {
index: index,
prefix: "",
lexeme: "",
class: TokenClass::Indent,
});
state = State::Body{indent: indent};
} else {
state = State::Normal;
}
}
*/
/*
State::Body{indent: _} => {
if let Some(captures) = body_re.captures(rest) {
let body_text = captures.at(1).unwrap();
for mut line in split_re.split(body_text) {
if let Some(captures) = line_re.captures(line) {
let len = captures.at(0).unwrap().len();
tokens.push(Token {
index: index,
prefix: captures.at(1).unwrap(),
lexeme: captures.at(2).unwrap(),
class: TokenClass::Eol,
});
line = &line[len..];
}
println!("{:?}", line);
}
panic!("matched body: {}", captures.at(1).unwrap());
// split the body into lines
// for each line in the body, push a line if nonblank, then an eol
// push a dedent
}
},
*/
// State::Normal | State::Colon | State::Body{..} => {
/*
let (captures, class) = if let Some(captures) = eol_re.captures(rest) {
(captures, TokenClass::Eol)
} else if let State::Body{indent} = state {
if dedent_re.is_match(rest) {
tokens.push(Token {
index: index,
prefix: "",
lexeme: "",
class: TokenClass::Dedent,
});
state = State::Normal;
continue
}
if let Some(captures) = line_re.captures(rest) {
(captures, TokenClass::Line)
} else {
panic!("Failed to match a line");
}
} else if let Some(captures) = anchor_re.captures(rest) {
(captures, TokenClass::Anchor)
} else if let Some(captures) = name_re.captures(rest) {
(captures, TokenClass::Name)
} else if let Some(captures) = colon_re.captures(rest) {
(captures, TokenClass::Colon)
} else if let Some(captures) = comment_re.captures(rest) {
let text = captures.at(3).unwrap_or("");
(captures, TokenClass::Comment{text: text})
} else if let Some(captures) = eof_re.captures(rest) {
(captures, TokenClass::Eof)
} else {
panic!("Did not match a token! Rest: {}", rest);
};
*/
// let (captures, class) = if let (true, Some(captures)) = (line_start,
// let all = captures.at(0).unwrap();
// let prefix = captures.at(1).unwrap();
// let lexeme = captures.at(2).unwrap();
// let len = all.len();
// let eof = class == TokenClass::Eof;
//assert!(eof || lexeme.len() > 0);
//assert!(all.len() > 0);
//assert!(prefix.len() + lexeme.len() == len);
/*
if class == TokenClass::Colon {
state = State::Colon;
} else if class == TokenClass::Eol && state == State::Colon {
state = State::Recipe;
}
*/
/*
if class == TokenClass::Eol {
row += 1;
col = 0;
} else {
col += len;
}
let eof = TokenClass::Eof {
}
*/
let len = prefix.len() + lexeme.len();
tokens.push(Token {
index: index,
line: line,
col: col,
prefix: prefix,
lexeme: lexeme,
class: class,
});
match tokens.last().unwrap().class {
Eol => {
line += 1;
col = 0;
},
Eof => {
break;
},
_ => {
col += len;
}
}
rest = &rest[len..];
index += len;
}
Ok(tokens)
}
/*
struct Parser<'a, I> {
tokens: Vec<Token<'a>>,
index: usize,
}
*/
//impl<'a> Parser<'a> {
/*
fn peek(&mut self) -> TokenClass {
self.tokens[self.index].class
}
fn advance(&mut self) {
self.index += 1;
}
fn accept_eol(&mut self) -> bool {
if self.accept(TokenClass::Comment) {
self.expect(TokenClass::Eol);
true
} else
}
*/
/*
fn accept(&mut self, class: TokenClass) -> bool {
if self.tokens[self.index].class == class {
self.index += 1;
true
} else {
false
}
}
*/
/*
fn peek(&mut self) -> Option<TokenClass> {
self.tokens.get(self.index).map(|t| t.class)
}
fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
let recipes = BTreeMap::new();
loop {
let ref current = self.tokens[self.index];
self.index += 1;
match current.class {
TokenClass::Eof => break,
TokenClass::Comment => continue,
TokenClass::Eol => continue,
TokenClass::Name => {
match self.peek() {
Some(TokenClass::Name) | Some(TokenClass::Colon) => {
panic!("time to parse a recipe");
}
Some(TokenClass::Equals) => {
panic!("time to parse an assignment");
}
Some(unexpected @ _) => {
panic!("unexpected token");
}
None => {
panic!("unexpected end of token stream");
}
}
}
unexpected @ _ => {
panic!("unexpected token at top level");
}
}
}
Ok(Justfile{recipes: recipes})
}
}
*/
// struct Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
// tokens: std::iter::Peekable<I>,
// }
struct Parser<'i, 't: 'i> {
text: &'t str,
tokens: &'i mut std::iter::Peekable<std::slice::Iter<'i, Token<'t>>>
}
impl<'i, 't> Parser<'i, 't> {
fn accept(&mut self, class: TokenClass) -> Option<&Token<'t>> {
if self.tokens.peek().unwrap().class == class {
Some(self.tokens.next().unwrap())
} else {
None
}
}
fn accepted(&mut self, class: TokenClass) -> bool {
self.accept(class).is_some()
}
fn expect(&mut self, class: TokenClass) {
if !self.accepted(class) {
panic!("we fucked");
}
}
fn peek(&mut self, class: TokenClass) -> bool {
self.tokens.peek().unwrap().class == class
}
fn accept_eol(&mut self) -> bool {
if self.accepted(Comment) {
if !self.peek(Eof) { self.expect(Eol) };
true
} else {
self.accepted(Eol)
}
}
// fn accept(&mut self) -> Result<Token<'t>, Error<'t>> {
// match self.peek(
// }
fn recipe(&mut self, name: &'t str) -> Result<Recipe<'t>, Error<'t>> {
let mut arguments = vec![];
loop {
if let Some(name_token) = self.accept(Name) {
if arguments.contains(&name_token.lexeme) {
return Err(error(self.text, name_token.line, ErrorKind::DuplicateArgument{
recipe: name, argument: name_token.lexeme}));
}
arguments.push(name_token.lexeme);
} else {
break;
}
}
self.expect(Colon);
let mut dependencies = vec![];
loop {
if let Some(name_token) = self.accept(Name) {
if dependencies.contains(&name_token.lexeme) {
return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{
name: name_token.lexeme}));
}
dependencies.push(name_token.lexeme);
} else {
break;
}
}
// if !self.accept_eol() {
// return Err(error(self.text, i, ErrorKind::UnparsableDependencies));
// }
panic!("we fucked");
// Ok(Recipe{
// })
}
fn file(mut self) -> Result<Justfile<'t>, Error<'t>> {
let mut recipes = BTreeMap::new();
loop {
if self.accepted(Eof) { break; }
if self.accept_eol() { continue; }
match self.tokens.next() {
Some(&Token{class: Name, line, lexeme: name, ..}) => {
if self.accepted(Equals) {
panic!("Variable assignment not yet implemented");
} else {
if recipes.contains_key(name) {
return Err(error(self.text, line, ErrorKind::DuplicateDependency{
name: name,
}));
}
let recipe = try!(self.recipe(name));
recipes.insert(name, recipe);
}
}
_ => panic!("got something else")
};
}
// assert that token.next() == None
Ok(Justfile{recipes: recipes})
}
}
// impl<'a, I> Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
// fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
// Ok()
// }
// }
pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
let tokens = try!(tokenize(text));
// let parser = Parser{tokens: tokens, index: 0};
// try!(parser.file());
let parser = Parser{text: text, tokens: &mut tokens.iter().peekable()};
try!(parser.file());
let shebang_re = re(r"^\s*#!(.*)$" );
let comment_re = re(r"^\s*#([^!].*)?$" );
let command_re = re(r"^(\s+).*$" );
@ -522,6 +1044,9 @@ pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
name: name,
leading_whitespace: "",
lines: vec![],
fragments: vec![],
variables: BTreeSet::new(),
arguments: vec![],
dependencies: dependencies,
shebang: false,
});

View File

@ -11,7 +11,7 @@ fn expect_error(text: &str, line: usize, expected_error_kind: ErrorKind) {
expected_error_kind, line, error.line);
}
if error.kind != expected_error_kind {
panic!("Expected {:?} error but got {:?}", error.kind, expected_error_kind);
panic!("Expected {:?} error but got {:?}", expected_error_kind, error.kind);
}
}
}
@ -62,7 +62,7 @@ fn duplicate_recipe() {
}
#[test]
fn tab_after_paces() {
fn tab_after_spaces() {
expect_error(
"a:\n \tspaces",
1, ErrorKind::TabAfterSpace{whitespace: " \t"}
@ -107,15 +107,20 @@ fn unparsable() {
expect_error("hello", 0, ErrorKind::Unparsable);
}
/*
can we bring this error back?
#[test]
fn unparsable_dependencies() {
expect_error("a: -f", 0, ErrorKind::UnparsableDependencies);
}
*/
/*
we should be able to emit these errors
#[test]
fn bad_recipe_names() {
fn expect_bad_name(text: &str, name: &str) {
expect_error(text, 0, ErrorKind::BadRecipeName{name: name});
expect_error(text, 0, ErrorKind::UnknownStartOfToken{name: name});
}
expect_bad_name("Z:", "Z");
expect_bad_name("a-:", "a-");
@ -123,6 +128,7 @@ fn bad_recipe_names() {
expect_bad_name("a--a:", "a--a");
expect_bad_name("@:", "@");
}
*/
#[test]
fn parse() {
@ -202,3 +208,55 @@ a:
other @ _ => panic!("expected an code run error, but got: {}", other),
}
}
fn tokenize_success(text: &str, expected_summary: &str) {
let tokens = super::tokenize(text).unwrap();
let roundtrip = tokens.iter().map(|t| {
let mut s = String::new();
s += t.prefix;
s += t.lexeme;
s
}).collect::<Vec<_>>().join("");
assert_eq!(text, roundtrip);
assert_eq!(token_summary(tokens), expected_summary);
}
fn token_summary(tokens: Vec<super::Token>) -> String {
tokens.iter().map(|t| {
match t.class {
super::TokenClass::Line{..} => "*",
super::TokenClass::Name => "N",
super::TokenClass::Colon => ":",
super::TokenClass::Equals => "=",
super::TokenClass::Comment{..} => "#",
super::TokenClass::Indent{..} => ">",
super::TokenClass::Dedent => "<",
super::TokenClass::Eol => "$",
super::TokenClass::Eof => ".",
}
}).collect::<Vec<_>>().join("")
}
#[test]
fn tokenize() {
let text = "bob
hello blah blah blah : a b c #whatever
";
tokenize_success(text, "N$$NNNN:NNN#$.");
let text = "
hello:
a
b
c
d
bob:
frank
";
tokenize_success(text, "$N:$>*$*$$*$$*$$<N:$>*$.");
}