Giant fucking mess.

2016-10-16 18:59:49 -07:00 · 2016-10-16 18:59:49 -07:00 · 4c44096718
commit 4c44096718
parent 3f231fb5b0
5 changed files with 690 additions and 5 deletions
--- a/grammar.txt
+++ b/grammar.txt
@ -0,0 +1,34 @@
 Justfile grammar is a little weird. Because of the freeform
 nature of recipe bodies, we don't tokenize them with the
 same rules as the rest of the justfile. Instead the
 tokenizer will emit a INDENT at the beginning of a recipe
 body, one or more LINEs, which match everything after the
 INDENT whitespace, and a DEDENT at the end.
 Thus the lexer is context sensitive, which is a little
 gross.
 tokens:
 NAME    = /[a-z]((_|-)?[a-z0-9])*/
 EOL     = /\n|\r\n/
 COMMENT = /#[^!].*/
 COLON   = /:/
 INDENT  = emitted when indentation increases
 DEDENT  = emitted when indentation decreases
 LINE    = /.*/ only emitted between INDENT/DEDENT pairs, doesn't include INDENT whitespace
 EOF     = emitted at the end of input
 grammar:
 justfile = item* EOF
 item = COMMENT
     | recipe
     | EOL
 assignment = NAME EQUALS expression COMMENT? EOL
 expression = STRING
 recipe = NAME+ COLON NAME* EOL (INDENT LINE+ DEDENT)?
--- a/5
+++ b/5
@ -1,6 +1,9 @@
 test:
 	cargo test --lib
-	cargo run -- quine clean > /dev/null 2> /dev/null
+	#cargo run -- quine clean > /dev/null 2> /dev/null
 backtrace:
 	RUST_BACKTRACE=1 cargo test --lib
 publish:
 	git push github master
--- a/67
+++ b/67
@ -1,7 +1,27 @@
 notes
 -----
-polyglot:
+- parse arguments and store in recipe
 - parse lines into fragments and store in recipe
 - positional error messages
 j: 
 - vector of substitutions
  point to start, end, and &str which is name of variable
 - also add a vector of substitutions
 - indent for line continuation
 - multiple names for short names are actually kind of nice
 - multiple {{}} per line
 - single assignment variables
 - matched /{{.*?}}.*/ then unmatched /{{.*/
 - echo subbed line
 - static errors when variables are missing {{}}, even if recipe isn't run
 - ignore comment lines
 - post to facebook to get beta testers
 - j user email list (how to engage users more generally?)
 - see if dotbot guy likes it
 - advertise on facebook to get users
 - get the extracted script and test its structure
 - can I add rust docs for the command/binary?
 - change name to "a polyglot command runner"
@ -10,7 +30,52 @@ polyglot:
 - publish to github and cargo
 - spam facebook, reddit
 - duplicate argument test
 - should duplicate dependency mention recipe?
 - get rid of panics
 - doc comments on recipes
 - in depth usage string with doc comments, args, dependencies
 get rid of unused public items
 tokenize error returns successfully parsed tokens
 tokenize continues after parse error but inserts parse error into token stream
 make sure regexes are only compiled once
 fix grammar.txt to reflect reality
 - create a really long example justfile
  . unzip tarball
  . update package manager deps
  . clean
  . update logs (repetitive git flow)
 - full documentation
  . habit of using clever commands and writing little scripts
  . very low friction to write a script (no new file, chmod, add to rcs)
  . make list of contributors, include travis
 variable setting
 variable substitution:  {{}}
 command line arguments: must be specified in recipe 'a foo bar:'
 quote
 arguments are subbed in with {{variable_name}}
 doesn't conflict with shell syntax
 doesn't conflict with jquery
 conflicts a little bit with rust, but can be overcome
 very common in many template languages
 different ways of setting arguments:
 - go for something like python, so we can use full python at the top level
 - go for something like rust, so we can use rust at the top level
 - don't do barewords, we need strings anyways, so parse them
 - x = 10
 - export x = 10
 - export x
 wishlist:
 - ability to export environment variables
 - preludes:
  may be nice to allow all recipes in a given langauge to share
  functions, variables, etc. could have a "prelude" recipe
--- a/src/lib.rs
+++ b/src/lib.rs
@ -50,10 +50,18 @@ pub struct Recipe<'a> {
  name:               &'a str,
  leading_whitespace: &'a str,
  lines:              Vec<&'a str>,
  fragments:          Vec<Vec<Fragment<'a>>>,
  variables:          BTreeSet<&'a str>,
  dependencies:       Vec<&'a str>,
  arguments:          Vec<&'a str>,
  shebang:            bool,
 }
 enum Fragment<'a> {
  Text{text: &'a str},
  Variable{name: &'a str},
 }
 impl<'a> Display for Recipe<'a> {
  fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
    try!(writeln!(f, "{}", self.label));
@ -221,6 +229,7 @@ enum ErrorKind<'a> {
  BadRecipeName{name: &'a str},
  CircularDependency{circle: Vec<&'a str>},
  DuplicateDependency{name: &'a str},
  DuplicateArgument{recipe: &'a str, argument: &'a str},
  DuplicateRecipe{first: usize, name: &'a str},
  TabAfterSpace{whitespace: &'a str},
  MixedLeadingWhitespace{whitespace: &'a str},
@ -231,6 +240,7 @@ enum ErrorKind<'a> {
  UnknownDependency{name: &'a str, unknown: &'a str},
  Unparsable,
  UnparsableDependencies,
  UnknownStartOfToken,
 }
 fn error<'a>(text: &'a str, line: usize, kind: ErrorKind<'a>) 
@ -277,6 +287,9 @@ impl<'a> Display for Error<'a> {
        try!(write!(f, "circular dependency: {}", circle.join(" -> ")));
        return Ok(());
      }
      ErrorKind::DuplicateArgument{recipe, argument} => {
        try!(writeln!(f, "recipe {} has duplicate argument: {}", recipe, argument));
      }
      ErrorKind::DuplicateDependency{name} => {
        try!(writeln!(f, "duplicate dependency: {}", name));
      }
@ -318,6 +331,9 @@ impl<'a> Display for Error<'a> {
      ErrorKind::UnparsableDependencies => {
        try!(writeln!(f, "could not parse dependencies:"));
      }
      ErrorKind::UnknownStartOfToken => {
        try!(writeln!(f, "uknown start of token:"));
      }
    }
    match self.text.lines().nth(self.line) {
@ -435,7 +451,513 @@ impl<'a> Display for RunError<'a> {
  }
 }
 struct Token<'a> {
  index:  usize,
  line:   usize,
  col:    usize,
  prefix: &'a str,
  lexeme: &'a str,
  class:  TokenClass,
 }
 #[derive(Debug, PartialEq, Clone, Copy)]
 enum TokenClass {
  Name,
  Colon,
  Equals,
  Comment,
  Line,
  Indent,
  Dedent,
  Eol,
  Eof,
 }
 use TokenClass::*;
 fn token(pattern: &str) -> Regex {
  let mut s = String::new();
  s += r"^(?m)([ \t]*)(";
  s += pattern;
  s += ")";
  re(&s)
 }
 fn tokenize(text: &str) -> Result<Vec<Token>, Error> {
  let name_re    = token(r"[a-z]((_|-)?[a-z0-9])*");
  let colon_re   = token(r":"                     );
  let equals_re  = token(r"="                     );
  let comment_re = token(r"#([^!].*)?$"           );
  //let shebang_re = token(r"#!"                    );
  let eol_re     = token(r"\n|\r\n"               );
  let eof_re     = token(r"(?-m)$"                );
  //let line_re    = token(r"[^\n\r]"                );
  //let split_re  = re("(?m)$");
  //let body_re   = re(r"^(?ms)(.*?$)\s*(^[^ \t\r\n]|(?-m:$))");
  // let dedent_re = re(r"^(?m)\s*(^[^\s]|(?-m:$))");
  let line_re = re(r"^(?m)[ \t]+[^ \t\n\r].*$");
  /*
  #[derive(PartialEq)]
  enum State<'a> {
    Normal, // starting state
    Colon,  // we have seen a colon since the last eol
    Recipe, // we are on the line after a colon
    Body{indent: &'a str},   // we are in a recipe body
  }
  */
  // state is:
  //   beginning of line or not
  //   current indent
  fn indentation(text: &str) -> Option<&str> {
    // fix this so it isn't recompiled every time
    let indent_re = re(r"^([ \t]*)[^ \t\n\r]");
    indent_re.captures(text).map(|captures| captures.at(1).unwrap())
  }
  let mut tokens     = vec![];
  let mut rest       = text;
  let mut index      = 0;
  let mut line       = 0;
  let mut col        = 0;
  let mut indent: Option<&str> = None;
  // let mut line   = 0;
  // let mut col    = 0;
  // let mut state  = State::Normal;
  // let mut line_start = true;
  loop {
    if col == 0 {
      if let Some(class) = match (indent, indentation(rest)) {
        // dedent
        (Some(_), Some("")) => {
          indent = None;
          Some(Dedent)
        }
        (None, Some("")) => {
          None
        }
        // indent
        (None, Some(current @ _)) => {
          // check mixed leading whitespace
          indent = Some(current);
          Some(Indent)
        }
        (Some(previous), Some(current @ _)) => {
          if !current.starts_with(previous) {
            return Err(error(text, line, 
              ErrorKind::InconsistentLeadingWhitespace{expected: previous, found: current}
            ));
          }
          None
          // check tabs after spaces
        }
        // ignore
        _ => {
          None
        }
      } {
        tokens.push(Token {
          index:  index,
          line:   line,
          col:    col,
          prefix: "",
          lexeme: "",
          class:  class,
        });
      }
    }
    let (prefix, lexeme, class) = 
    if let (0, Some(indent), Some(captures)) = (col, indent, line_re.captures(rest)) {
      let line = captures.at(0).unwrap();
      if !line.starts_with(indent) {
        panic!("Line did not start with expected indentation");
      }
      let (prefix, lexeme) = line.split_at(indent.len());
      (prefix, lexeme, Line)
    } else if let Some(captures) = name_re.captures(rest) {
      (captures.at(1).unwrap(), captures.at(2).unwrap(), Name)
    } else if let Some(captures) = eol_re.captures(rest) {
      (captures.at(1).unwrap(), captures.at(2).unwrap(), Eol)
    } else if let Some(captures) = eof_re.captures(rest) {
      (captures.at(1).unwrap(), captures.at(2).unwrap(), Eof)
    } else if let Some(captures) = colon_re.captures(rest) {
      (captures.at(1).unwrap(), captures.at(2).unwrap(), Colon)
    } else if let Some(captures) = equals_re.captures(rest) {
      (captures.at(1).unwrap(), captures.at(2).unwrap(), Equals)
    } else if let Some(captures) = comment_re.captures(rest) {
      (captures.at(1).unwrap(), captures.at(2).unwrap(), Comment)
    } else {
      return Err(if rest.starts_with("#!") {
        error(text, line, ErrorKind::OuterShebang)
      } else {
        error(text, line, ErrorKind::UnknownStartOfToken)
      });
    };
    // let (captures, class) = if let (0, Some(captures)) = line_re.captures(rest) {
    /*
    */
    /*
    if state == State::Recipe {
      let captures = indent_re.captures(rest).unwrap();
      let indent = captures.at(1).unwrap();
      let text = captures.at(2).unwrap();
      if indent != "" && text != "" {
        tokens.push(Token {
          index:  index,
          prefix: "",
          lexeme: "",
          class:  TokenClass::Indent,
        });
        state = State::Body{indent: indent};
      } else {
        state = State::Normal;
      }
    }
    */
      /*
      State::Body{indent: _} => {
        if let Some(captures) = body_re.captures(rest) {
          let body_text = captures.at(1).unwrap();
          for mut line in split_re.split(body_text) {
            if let Some(captures) = line_re.captures(line) {
              let len = captures.at(0).unwrap().len();
              tokens.push(Token {
                index:  index,
                prefix: captures.at(1).unwrap(),
                lexeme: captures.at(2).unwrap(),
                class:  TokenClass::Eol,
              });
              line = &line[len..];
            }
            println!("{:?}", line);
          }
          panic!("matched body: {}", captures.at(1).unwrap());
          // split the body into lines
          // for each line in the body, push a line if nonblank, then an eol
          // push a dedent
        }
      },
      */
      // State::Normal | State::Colon | State::Body{..} => {
    /*
    let (captures, class) = if let Some(captures) = eol_re.captures(rest) {
      (captures, TokenClass::Eol)
    } else if let State::Body{indent} = state {
      if dedent_re.is_match(rest) {
        tokens.push(Token {
          index:  index,
          prefix: "",
          lexeme: "",
          class:  TokenClass::Dedent,
        });
        state = State::Normal;
        continue
      }
      if let Some(captures) = line_re.captures(rest) {
        (captures, TokenClass::Line)
      } else {
        panic!("Failed to match a line");
      }
    } else if let Some(captures) = anchor_re.captures(rest) {
      (captures, TokenClass::Anchor)
    } else if let Some(captures) = name_re.captures(rest) {
      (captures, TokenClass::Name)
    } else if let Some(captures) = colon_re.captures(rest) {
      (captures, TokenClass::Colon)
    } else if let Some(captures) = comment_re.captures(rest) {
      let text = captures.at(3).unwrap_or("");
      (captures, TokenClass::Comment{text: text})
    } else if let Some(captures) = eof_re.captures(rest) {
      (captures, TokenClass::Eof)
    } else {
      panic!("Did not match a token! Rest: {}", rest);
    };
    */
    // let (captures, class) = if let (true, Some(captures)) = (line_start, 
    // let all    = captures.at(0).unwrap();
    // let prefix = captures.at(1).unwrap();
    // let lexeme = captures.at(2).unwrap();
    // let len    = all.len();
    // let eof    = class == TokenClass::Eof;
    //assert!(eof || lexeme.len() > 0);
    //assert!(all.len() > 0);
    //assert!(prefix.len() + lexeme.len() == len);
    /*
    if class == TokenClass::Colon {
      state = State::Colon;
    } else if class == TokenClass::Eol && state == State::Colon {
      state = State::Recipe;
    }
    */
    /*
    if class == TokenClass::Eol {
      row += 1;
      col = 0;
    } else {
      col += len;
    }
    let eof = TokenClass::Eof {
    }
    */
    let len = prefix.len() + lexeme.len();
    tokens.push(Token {
      index:  index,
      line:    line,
      col:    col,
      prefix: prefix,
      lexeme: lexeme,
      class:  class,
    });
    match tokens.last().unwrap().class {
      Eol => {
        line += 1;
        col = 0;
      },
      Eof => {
        break;
      },
      _ => {
        col += len;
      }
    }
    rest = &rest[len..];
    index += len;
  }
  Ok(tokens)
 }
 /*
 struct Parser<'a, I> {
  tokens: Vec<Token<'a>>,
  index:  usize,
 }
 */
 //impl<'a> Parser<'a> {
  /*
  fn peek(&mut self) -> TokenClass {
    self.tokens[self.index].class
  }
  fn advance(&mut self) {
    self.index += 1;
  }
  fn accept_eol(&mut self) -> bool {
    if self.accept(TokenClass::Comment) {
      self.expect(TokenClass::Eol);
      true
    } else
  }
  */
  /*
  fn accept(&mut self, class: TokenClass) -> bool {
    if self.tokens[self.index].class == class {
      self.index += 1;
      true
    } else {
      false
    }
  }
  */
  /*
  fn peek(&mut self) -> Option<TokenClass> {
    self.tokens.get(self.index).map(|t| t.class)
  }
  fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
    let recipes = BTreeMap::new();
    loop {
      let ref current = self.tokens[self.index];
      self.index += 1;
      match current.class {
        TokenClass::Eof     => break,
        TokenClass::Comment => continue,
        TokenClass::Eol     => continue,
        TokenClass::Name    => {
          match self.peek() {
            Some(TokenClass::Name) | Some(TokenClass::Colon) => {
              panic!("time to parse a recipe");
            }
            Some(TokenClass::Equals) => {
              panic!("time to parse an assignment");
            }
            Some(unexpected @ _) => {
              panic!("unexpected token");
            }
            None => {
              panic!("unexpected end of token stream");
            }
          }
        }
        unexpected @ _ => {
          panic!("unexpected token at top level");
        }
      }
    }
    Ok(Justfile{recipes: recipes})
  }
 }
 */
 // struct Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
 //   tokens: std::iter::Peekable<I>,
 // }
 struct Parser<'i, 't: 'i> {
  text:   &'t str,
  tokens: &'i mut std::iter::Peekable<std::slice::Iter<'i, Token<'t>>>
 }
 impl<'i, 't> Parser<'i, 't> {
  fn accept(&mut self, class: TokenClass) -> Option<&Token<'t>> {
    if self.tokens.peek().unwrap().class == class {
      Some(self.tokens.next().unwrap())
    } else {
      None
    }
  }
  fn accepted(&mut self, class: TokenClass) -> bool {
    self.accept(class).is_some()
  }
  fn expect(&mut self, class: TokenClass) {
    if !self.accepted(class) {
      panic!("we fucked");
    }
  }
  fn peek(&mut self, class: TokenClass) -> bool {
    self.tokens.peek().unwrap().class == class
  }
  fn accept_eol(&mut self) -> bool {
    if self.accepted(Comment) {
      if !self.peek(Eof) { self.expect(Eol) };
      true
    } else {
      self.accepted(Eol)
    }
  }
  // fn accept(&mut self) -> Result<Token<'t>, Error<'t>> {
  // match self.peek(
  // }
  fn recipe(&mut self, name: &'t str) -> Result<Recipe<'t>, Error<'t>> {
    let mut arguments = vec![];
    loop {
      if let Some(name_token) = self.accept(Name) {
        if arguments.contains(&name_token.lexeme) {
          return Err(error(self.text, name_token.line, ErrorKind::DuplicateArgument{
            recipe: name, argument: name_token.lexeme}));
        }
        arguments.push(name_token.lexeme);
      } else {
        break;
      }
    }
    self.expect(Colon);
    let mut dependencies = vec![];
    loop {
      if let Some(name_token) = self.accept(Name) {
        if dependencies.contains(&name_token.lexeme) {
          return Err(error(self.text, name_token.line, ErrorKind::DuplicateDependency{
            name: name_token.lexeme}));
        }
        dependencies.push(name_token.lexeme);
      } else {
        break;
      }
    }
    // if !self.accept_eol() {
    //   return Err(error(self.text, i, ErrorKind::UnparsableDependencies));
    // }
    panic!("we fucked");
    // Ok(Recipe{
    // })
  }
  fn file(mut self) -> Result<Justfile<'t>, Error<'t>> {
    let mut recipes = BTreeMap::new();
    loop {
      if self.accepted(Eof) { break;    }
      if self.accept_eol()  { continue; }
      match self.tokens.next() {
        Some(&Token{class: Name, line, lexeme: name, ..}) => {
          if self.accepted(Equals) {
            panic!("Variable assignment not yet implemented");
          } else {
            if recipes.contains_key(name) {
              return Err(error(self.text, line, ErrorKind::DuplicateDependency{
                name: name,
              }));
            }
            let recipe = try!(self.recipe(name));
            recipes.insert(name, recipe);
          }
        }
        _ => panic!("got something else")
      };
    }
    // assert that token.next() == None
    Ok(Justfile{recipes: recipes})
  }
 }
 // impl<'a, I> Parser<'a, I> where I: std::iter::Iterator<Item=Token<'a>> {
 //   fn file(mut self) -> Result<Justfile<'a>, Error<'a>> {
 //     Ok()
 //   }
 // }
 pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
  let tokens = try!(tokenize(text));
  // let parser = Parser{tokens: tokens, index: 0};
  // try!(parser.file());
  let parser = Parser{text: text, tokens: &mut tokens.iter().peekable()};
  try!(parser.file());
  let shebang_re    = re(r"^\s*#!(.*)$"           );
  let comment_re    = re(r"^\s*#([^!].*)?$"       );
  let command_re    = re(r"^(\s+).*$"             );
@ -522,6 +1044,9 @@ pub fn parse<'a>(text: &'a str) -> Result<Justfile, Error> {
        name:               name,
        leading_whitespace: "",
        lines:              vec![],
        fragments:          vec![],
        variables:          BTreeSet::new(),
        arguments:          vec![],
        dependencies:       dependencies,
        shebang:            false,
      });
--- a/src/tests.rs
+++ b/src/tests.rs
@ -11,7 +11,7 @@ fn expect_error(text: &str, line: usize, expected_error_kind: ErrorKind) {
               expected_error_kind, line, error.line);
      }
      if error.kind != expected_error_kind {
-        panic!("Expected {:?} error but got {:?}", error.kind, expected_error_kind);
+        panic!("Expected {:?} error but got {:?}", expected_error_kind, error.kind);
      }
    }
  }
@ -62,7 +62,7 @@ fn duplicate_recipe() {
 }
 #[test]
-fn tab_after_paces() {
+fn tab_after_spaces() {
  expect_error(
    "a:\n \tspaces",
    1, ErrorKind::TabAfterSpace{whitespace: " \t"}
@ -107,15 +107,20 @@ fn unparsable() {
  expect_error("hello", 0, ErrorKind::Unparsable);
 }
 /*
   can we bring this error back?
 #[test]
 fn unparsable_dependencies() {
  expect_error("a: -f", 0, ErrorKind::UnparsableDependencies);
 }
 */
 /*
   we should be able to emit these errors
 #[test]
 fn bad_recipe_names() {
  fn expect_bad_name(text: &str, name: &str) {
-    expect_error(text, 0, ErrorKind::BadRecipeName{name: name});
+    expect_error(text, 0, ErrorKind::UnknownStartOfToken{name: name});
  }
  expect_bad_name("Z:", "Z");
  expect_bad_name("a-:", "a-");
@ -123,6 +128,7 @@ fn bad_recipe_names() {
  expect_bad_name("a--a:", "a--a");
  expect_bad_name("@:", "@");
 }
 */
 #[test]
 fn parse() {
@ -202,3 +208,55 @@ a:
    other @ _ => panic!("expected an code run error, but got: {}", other),
  }
 }
 fn tokenize_success(text: &str, expected_summary: &str) {
  let tokens = super::tokenize(text).unwrap();
  let roundtrip = tokens.iter().map(|t| {
    let mut s = String::new();
    s += t.prefix;
    s += t.lexeme;
    s
  }).collect::<Vec<_>>().join("");
  assert_eq!(text, roundtrip);
  assert_eq!(token_summary(tokens), expected_summary);
 }
 fn token_summary(tokens: Vec<super::Token>) -> String {
  tokens.iter().map(|t| {
    match t.class {
      super::TokenClass::Line{..}    => "*",
      super::TokenClass::Name        => "N",
      super::TokenClass::Colon       => ":",
      super::TokenClass::Equals      => "=",
      super::TokenClass::Comment{..} => "#",
      super::TokenClass::Indent{..}  => ">",
      super::TokenClass::Dedent      => "<",
      super::TokenClass::Eol         => "$",
      super::TokenClass::Eof         => ".",
    }
  }).collect::<Vec<_>>().join("")
 }
 #[test]
 fn tokenize() {
  let text = "bob
 hello blah blah blah : a b c #whatever
 ";
  tokenize_success(text, "N$$NNNN:NNN#$.");
  let text = "
 hello:
  a
  b
  c
  d
 bob:
  frank
  ";
  tokenize_success(text, "$N:$>*$*$$*$$*$$<N:$>*$.");
 }