2019-04-11 15:23:14 -07:00
|
|
|
use crate::common::*;
|
2017-11-18 03:36:02 -08:00
|
|
|
|
|
|
|
use CompilationErrorKind::*;
|
2018-12-08 14:29:41 -08:00
|
|
|
use TokenKind::*;
|
2017-11-18 03:36:02 -08:00
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Just language lexer
|
|
|
|
///
|
|
|
|
/// `self.next` points to the next character to be lexed, and
|
|
|
|
/// the text between `self.token_start` and `self.token_end` contains
|
|
|
|
/// the current token being lexed.
|
2019-09-21 15:35:03 -07:00
|
|
|
pub(crate) struct Lexer<'a> {
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Source text
|
2018-12-08 14:29:41 -08:00
|
|
|
text: &'a str,
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Char iterator
|
|
|
|
chars: Chars<'a>,
|
|
|
|
/// Tokens
|
|
|
|
tokens: Vec<Token<'a>>,
|
|
|
|
/// State stack
|
2018-12-08 14:29:41 -08:00
|
|
|
state: Vec<State<'a>>,
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Current token start
|
|
|
|
token_start: Position,
|
|
|
|
/// Current token end
|
|
|
|
token_end: Position,
|
|
|
|
/// Next character
|
|
|
|
next: Option<char>,
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2017-12-01 02:22:32 -08:00
|
|
|
impl<'a> Lexer<'a> {
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Lex `text`
|
2019-09-21 15:35:03 -07:00
|
|
|
pub(crate) fn lex(text: &str) -> CompilationResult<Vec<Token>> {
|
2019-04-15 22:40:02 -07:00
|
|
|
Lexer::new(text).tokenize()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new Lexer to lex `text`
|
|
|
|
fn new(text: &'a str) -> Lexer<'a> {
|
|
|
|
let mut chars = text.chars();
|
|
|
|
let next = chars.next();
|
|
|
|
|
|
|
|
let start = Position {
|
|
|
|
offset: 0,
|
2017-11-18 03:36:02 -08:00
|
|
|
column: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
line: 0,
|
2017-11-18 03:36:02 -08:00
|
|
|
};
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
Lexer {
|
|
|
|
state: vec![State::Normal],
|
|
|
|
tokens: Vec::new(),
|
|
|
|
token_start: start,
|
|
|
|
token_end: start,
|
|
|
|
chars,
|
|
|
|
next,
|
|
|
|
text,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Advance over the chracter in `self.next`, updating
|
|
|
|
/// `self.token_end` accordingly.
|
|
|
|
fn advance(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
match self.next {
|
|
|
|
Some(c) => {
|
|
|
|
let len_utf8 = c.len_utf8();
|
|
|
|
|
|
|
|
self.token_end.offset += len_utf8;
|
|
|
|
|
|
|
|
match c {
|
|
|
|
'\n' => {
|
|
|
|
self.token_end.column = 0;
|
|
|
|
self.token_end.line += 1;
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
self.token_end.column += len_utf8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
self.next = self.chars.next();
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
None => Err(self.internal_error("Lexer advanced past end of text")),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lexeme of in-progress token
|
|
|
|
fn lexeme(&self) -> &'a str {
|
|
|
|
&self.text[self.token_start.offset..self.token_end.offset]
|
|
|
|
}
|
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
/// Length of current token
|
|
|
|
fn current_token_length(&self) -> usize {
|
|
|
|
self.token_end.offset - self.token_start.offset
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Is next character c?
|
|
|
|
fn next_is(&self, c: char) -> bool {
|
|
|
|
self.next == Some(c)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Is next character ' ' or '\t'?
|
|
|
|
fn next_is_whitespace(&self) -> bool {
|
|
|
|
self.next_is(' ') || self.next_is('\t')
|
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Un-lexed text
|
|
|
|
fn rest(&self) -> &'a str {
|
|
|
|
&self.text[self.token_end.offset..]
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if unlexed text begins with prefix
|
|
|
|
fn rest_starts_with(&self, prefix: &str) -> bool {
|
|
|
|
self.rest().starts_with(prefix)
|
|
|
|
}
|
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
/// Does rest start with "\n" or "\r\n"?
|
|
|
|
fn at_eol(&self) -> bool {
|
|
|
|
self.next_is('\n') || self.rest_starts_with("\r\n")
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Are we at end-of-line or end-of-file?
|
|
|
|
fn at_eol_or_eof(&self) -> bool {
|
|
|
|
self.at_eol() || self.rest().is_empty()
|
2019-04-15 22:40:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Get current state
|
|
|
|
fn state(&self) -> CompilationResult<'a, State<'a>> {
|
|
|
|
if self.state.is_empty() {
|
|
|
|
Err(self.internal_error("Lexer state stack empty"))
|
|
|
|
} else {
|
|
|
|
Ok(self.state[self.state.len() - 1])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Pop current state from stack
|
|
|
|
fn pop_state(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
if self.state.pop().is_none() {
|
|
|
|
Err(self.internal_error("Lexer attempted to pop in start state"))
|
|
|
|
} else {
|
|
|
|
Ok(())
|
|
|
|
}
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Create a new token with `kind` whose lexeme
|
|
|
|
/// is between `self.token_start` and `self.token_end`
|
|
|
|
fn token(&mut self, kind: TokenKind) {
|
|
|
|
self.tokens.push(Token {
|
|
|
|
offset: self.token_start.offset,
|
|
|
|
column: self.token_start.column,
|
|
|
|
line: self.token_start.line,
|
|
|
|
text: self.text,
|
|
|
|
length: self.token_end.offset - self.token_start.offset,
|
|
|
|
kind,
|
|
|
|
});
|
|
|
|
|
|
|
|
// Set `token_start` to point after the lexed token
|
|
|
|
self.token_start = self.token_end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create an internal error with `message`
|
|
|
|
fn internal_error(&self, message: impl Into<String>) -> CompilationError<'a> {
|
|
|
|
// Use `self.token_end` as the location of the error
|
|
|
|
CompilationError {
|
|
|
|
text: self.text,
|
|
|
|
offset: self.token_end.offset,
|
|
|
|
line: self.token_end.line,
|
|
|
|
column: self.token_end.column,
|
|
|
|
width: 0,
|
|
|
|
kind: CompilationErrorKind::Internal {
|
|
|
|
message: message.into(),
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a compilation error with `kind`
|
2017-11-18 03:36:02 -08:00
|
|
|
fn error(&self, kind: CompilationErrorKind<'a>) -> CompilationError<'a> {
|
2019-04-15 22:40:02 -07:00
|
|
|
// Use the in-progress token span as the location of the error.
|
|
|
|
|
|
|
|
// The width of the error site to highlight depends on the kind of error:
|
|
|
|
let width = match kind {
|
|
|
|
// highlight ' or "
|
|
|
|
UnterminatedString => 1,
|
|
|
|
// highlight `
|
|
|
|
UnterminatedBacktick => 1,
|
|
|
|
// highlight the full token
|
|
|
|
_ => self.lexeme().len(),
|
|
|
|
};
|
|
|
|
|
2017-11-18 03:36:02 -08:00
|
|
|
CompilationError {
|
2018-12-08 14:29:41 -08:00
|
|
|
text: self.text,
|
2019-04-15 22:40:02 -07:00
|
|
|
offset: self.token_start.offset,
|
|
|
|
line: self.token_start.line,
|
|
|
|
column: self.token_start.column,
|
|
|
|
width,
|
2018-03-05 13:21:35 -08:00
|
|
|
kind,
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
fn unterminated_interpolation_error(
|
|
|
|
&self,
|
|
|
|
interpolation_start: Position,
|
|
|
|
) -> CompilationError<'a> {
|
|
|
|
CompilationError {
|
2018-12-08 14:29:41 -08:00
|
|
|
text: self.text,
|
2019-04-15 22:40:02 -07:00
|
|
|
offset: interpolation_start.offset,
|
|
|
|
line: interpolation_start.line,
|
|
|
|
column: interpolation_start.column,
|
|
|
|
width: 2,
|
|
|
|
kind: UnterminatedInterpolation,
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Consume the text and produce a series of tokens
|
|
|
|
fn tokenize(mut self) -> CompilationResult<'a, Vec<Token<'a>>> {
|
|
|
|
loop {
|
|
|
|
if self.token_start.column == 0 {
|
|
|
|
self.lex_line_start()?;
|
|
|
|
}
|
|
|
|
|
|
|
|
match self.next {
|
|
|
|
Some(first) => match self.state()? {
|
|
|
|
State::Normal => self.lex_normal(first)?,
|
|
|
|
State::Interpolation {
|
|
|
|
interpolation_start,
|
|
|
|
} => self.lex_interpolation(interpolation_start, first)?,
|
|
|
|
State::Text => self.lex_text()?,
|
|
|
|
State::Indented { .. } => self.lex_indented()?,
|
|
|
|
},
|
|
|
|
None => break,
|
|
|
|
}
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
if let State::Interpolation {
|
|
|
|
interpolation_start,
|
|
|
|
} = self.state()?
|
|
|
|
{
|
|
|
|
return Err(self.unterminated_interpolation_error(interpolation_start));
|
|
|
|
}
|
|
|
|
|
|
|
|
if let State::Indented { .. } | State::Text = self.state()? {
|
|
|
|
self.token(Dedent);
|
|
|
|
}
|
|
|
|
|
|
|
|
self.token(Eof);
|
|
|
|
|
|
|
|
Ok(self.tokens)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Handle blank lines and indentation
|
|
|
|
fn lex_line_start(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
let nonblank_index = self
|
|
|
|
.rest()
|
|
|
|
.char_indices()
|
|
|
|
.skip_while(|&(_, c)| c == ' ' || c == '\t')
|
|
|
|
.map(|(i, _)| i)
|
|
|
|
.next()
|
|
|
|
.unwrap_or_else(|| self.rest().len());
|
|
|
|
|
|
|
|
let rest = &self.rest()[nonblank_index..];
|
|
|
|
|
|
|
|
// Handle blank line
|
|
|
|
if rest.starts_with('\n') || rest.starts_with("\r\n") || rest.is_empty() {
|
2019-04-18 13:12:38 -07:00
|
|
|
while self.next_is_whitespace() {
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Lex a whitespace token if the blank line was nonempty
|
|
|
|
if self.current_token_length() > 0 {
|
|
|
|
self.token(Whitespace);
|
|
|
|
};
|
|
|
|
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle nonblank lines with no leading whitespace
|
2019-04-18 13:12:38 -07:00
|
|
|
if !self.next_is_whitespace() {
|
2019-04-15 22:40:02 -07:00
|
|
|
if let State::Indented { .. } = self.state()? {
|
|
|
|
self.token(Dedent);
|
|
|
|
self.pop_state()?;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle continued indentation
|
|
|
|
if let State::Indented { indentation } = self.state()? {
|
2019-04-18 13:12:38 -07:00
|
|
|
if self.rest_starts_with(indentation) {
|
|
|
|
for _ in indentation.chars() {
|
|
|
|
self.advance()?;
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
// Indentation matches, lex as whitespace
|
|
|
|
self.token(Whitespace);
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
return Ok(());
|
2019-04-15 22:40:02 -07:00
|
|
|
}
|
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
// Consume whitespace characters, matching or not, up to the length
|
|
|
|
// of expected indentation
|
|
|
|
for _ in indentation.chars().zip(self.rest().chars()) {
|
|
|
|
if self.next_is_whitespace() {
|
|
|
|
self.advance()?;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
// We've either advanced over not enough whitespace or mismatching
|
|
|
|
// whitespace, so return an error
|
|
|
|
return Err(self.error(InconsistentLeadingWhitespace {
|
|
|
|
expected: indentation,
|
|
|
|
found: self.lexeme(),
|
|
|
|
}));
|
2019-04-15 22:40:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
if self.state()? != State::Normal {
|
|
|
|
return Err(self.internal_error(format!(
|
|
|
|
"Lexer::lex_line_start called in unexpected state: {:?}",
|
|
|
|
self.state()
|
|
|
|
)));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle new indentation
|
2019-04-18 13:12:38 -07:00
|
|
|
while self.next_is_whitespace() {
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?;
|
|
|
|
}
|
|
|
|
|
|
|
|
let indentation = self.lexeme();
|
|
|
|
|
|
|
|
let spaces = indentation.chars().any(|c| c == ' ');
|
|
|
|
let tabs = indentation.chars().any(|c| c == '\t');
|
|
|
|
|
|
|
|
if spaces && tabs {
|
|
|
|
return Err(self.error(MixedLeadingWhitespace {
|
|
|
|
whitespace: indentation,
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
|
|
|
|
self.state.push(State::Indented { indentation });
|
|
|
|
|
|
|
|
self.token(Indent);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex token beginning with `start` in normal state
|
|
|
|
fn lex_normal(&mut self, start: char) -> CompilationResult<'a, ()> {
|
|
|
|
match start {
|
|
|
|
'@' => self.lex_single(At),
|
|
|
|
'=' => self.lex_single(Equals),
|
|
|
|
',' => self.lex_single(Comma),
|
2019-04-18 11:48:02 -07:00
|
|
|
':' => self.lex_colon(),
|
2019-04-15 22:40:02 -07:00
|
|
|
'(' => self.lex_single(ParenL),
|
|
|
|
')' => self.lex_single(ParenR),
|
|
|
|
'{' => self.lex_brace_l(),
|
|
|
|
'}' => self.lex_brace_r(),
|
|
|
|
'+' => self.lex_single(Plus),
|
|
|
|
'\n' => self.lex_single(Eol),
|
|
|
|
'\r' => self.lex_cr_lf(),
|
|
|
|
'#' => self.lex_comment(),
|
|
|
|
'`' => self.lex_backtick(),
|
|
|
|
' ' | '\t' => self.lex_whitespace(),
|
|
|
|
'\'' => self.lex_raw_string(),
|
|
|
|
'"' => self.lex_cooked_string(),
|
2019-06-06 23:34:07 -07:00
|
|
|
'a'..='z' | 'A'..='Z' | '_' => self.lex_name(),
|
2019-04-15 22:40:02 -07:00
|
|
|
_ => {
|
|
|
|
self.advance()?;
|
|
|
|
Err(self.error(UnknownStartOfToken))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex token beginning with `start` in interpolation state
|
|
|
|
fn lex_interpolation(
|
|
|
|
&mut self,
|
|
|
|
interpolation_start: Position,
|
|
|
|
start: char,
|
|
|
|
) -> CompilationResult<'a, ()> {
|
|
|
|
// Check for end of interpolation
|
|
|
|
if self.rest_starts_with("}}") {
|
|
|
|
// Pop interpolation state
|
|
|
|
self.pop_state()?;
|
|
|
|
// Emit interpolation end token
|
|
|
|
self.lex_double(InterpolationEnd)
|
2019-04-18 13:12:38 -07:00
|
|
|
} else if self.at_eol_or_eof() {
|
2019-04-15 22:40:02 -07:00
|
|
|
// Return unterminated interpolation error that highlights the opening {{
|
|
|
|
Err(self.unterminated_interpolation_error(interpolation_start))
|
|
|
|
} else {
|
|
|
|
// Otherwise lex as if we are in normal state
|
|
|
|
self.lex_normal(start)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex token beginning with `start` in text state
|
|
|
|
fn lex_text(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
enum Terminator {
|
|
|
|
Newline,
|
|
|
|
NewlineCarriageReturn,
|
|
|
|
Interpolation,
|
|
|
|
EndOfFile,
|
|
|
|
}
|
|
|
|
|
|
|
|
use Terminator::*;
|
|
|
|
|
|
|
|
let terminator = loop {
|
|
|
|
if let Some('\n') = self.next {
|
|
|
|
break Newline;
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.rest_starts_with("\r\n") {
|
|
|
|
break NewlineCarriageReturn;
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.rest_starts_with("{{") {
|
|
|
|
break Interpolation;
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.next.is_none() {
|
|
|
|
break EndOfFile;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.advance()?;
|
|
|
|
};
|
|
|
|
|
|
|
|
// emit text token containing text so far
|
|
|
|
if self.current_token_length() > 0 {
|
|
|
|
self.token(Text);
|
|
|
|
}
|
|
|
|
|
|
|
|
match terminator {
|
|
|
|
Newline => {
|
|
|
|
self.state.pop();
|
|
|
|
self.lex_single(Eol)
|
|
|
|
}
|
|
|
|
NewlineCarriageReturn => {
|
|
|
|
self.state.pop();
|
|
|
|
self.lex_double(Eol)
|
|
|
|
}
|
|
|
|
Interpolation => {
|
|
|
|
self.state.push(State::Interpolation {
|
|
|
|
interpolation_start: self.token_start,
|
|
|
|
});
|
|
|
|
self.lex_double(InterpolationStart)
|
|
|
|
}
|
|
|
|
EndOfFile => self.pop_state(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex token beginning with `start` in indented state
|
|
|
|
fn lex_indented(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
self.state.push(State::Text);
|
|
|
|
self.token(Line);
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex a single character token
|
|
|
|
fn lex_single(&mut self, kind: TokenKind) -> CompilationResult<'a, ()> {
|
|
|
|
self.advance()?;
|
|
|
|
self.token(kind);
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex a double character token
|
|
|
|
fn lex_double(&mut self, kind: TokenKind) -> CompilationResult<'a, ()> {
|
|
|
|
self.advance()?;
|
|
|
|
self.advance()?;
|
|
|
|
self.token(kind);
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2019-04-18 11:48:02 -07:00
|
|
|
/// Lex a token starting with ':'
|
|
|
|
fn lex_colon(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
self.advance()?;
|
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
if self.next_is('=') {
|
2019-04-18 11:48:02 -07:00
|
|
|
self.advance()?;
|
|
|
|
self.token(ColonEquals);
|
|
|
|
} else {
|
|
|
|
self.token(Colon);
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
/// Lex a token starting with '{'
|
|
|
|
fn lex_brace_l(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
if !self.rest_starts_with("{{") {
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
return Err(self.error(UnknownStartOfToken));
|
|
|
|
}
|
|
|
|
|
|
|
|
self.lex_double(InterpolationStart)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex a token starting with '}'
|
|
|
|
fn lex_brace_r(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
if !self.rest_starts_with("}}") {
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
return Err(self.error(UnknownStartOfToken));
|
|
|
|
}
|
|
|
|
|
|
|
|
self.lex_double(InterpolationEnd)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex a carriage return and line feed
|
|
|
|
fn lex_cr_lf(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
if !self.rest_starts_with("\r\n") {
|
|
|
|
// advance over \r
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
return Err(self.error(UnpairedCarriageReturn));
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
|
|
|
|
self.lex_double(Eol)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex name: [a-zA-Z_][a-zA-Z0-9_]*
|
|
|
|
fn lex_name(&mut self) -> CompilationResult<'a, ()> {
|
2019-04-18 13:12:38 -07:00
|
|
|
while self
|
|
|
|
.next
|
|
|
|
.map(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
|
|
|
|
.unwrap_or(false)
|
2019-04-15 22:40:02 -07:00
|
|
|
{
|
|
|
|
self.advance()?;
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
self.token(Name);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex comment: #[^\r\n]
|
|
|
|
fn lex_comment(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
// advance over #
|
|
|
|
self.advance()?;
|
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
while !self.at_eol_or_eof() {
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.token(Comment);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex backtick: `[^\r\n]*`
|
|
|
|
fn lex_backtick(&mut self) -> CompilationResult<'a, ()> {
|
2019-04-18 13:12:38 -07:00
|
|
|
// advance over initial `
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?;
|
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
while !self.next_is('`') {
|
|
|
|
if self.at_eol_or_eof() {
|
2019-04-15 22:40:02 -07:00
|
|
|
return Err(self.error(UnterminatedBacktick));
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?;
|
|
|
|
}
|
2017-11-18 03:36:02 -08:00
|
|
|
|
2019-04-18 13:12:38 -07:00
|
|
|
self.advance()?;
|
2019-04-15 22:40:02 -07:00
|
|
|
self.token(Backtick);
|
2017-11-18 03:36:02 -08:00
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex whitespace: [ \t]+
|
|
|
|
fn lex_whitespace(&mut self) -> CompilationResult<'a, ()> {
|
2019-04-18 13:12:38 -07:00
|
|
|
while self.next_is_whitespace() {
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?
|
|
|
|
}
|
|
|
|
|
|
|
|
self.token(Whitespace);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex raw string: '[^']*'
|
|
|
|
fn lex_raw_string(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
// advance over opening '
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
loop {
|
|
|
|
match self.next {
|
|
|
|
Some('\'') => break,
|
|
|
|
None => return Err(self.error(UnterminatedString)),
|
|
|
|
_ => {}
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?;
|
|
|
|
}
|
|
|
|
|
|
|
|
// advance over closing '
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
self.token(StringRaw);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Lex cooked string: "[^"\n\r]*" (also processes escape sequences)
|
|
|
|
fn lex_cooked_string(&mut self) -> CompilationResult<'a, ()> {
|
|
|
|
// advance over opening "
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
let mut escape = false;
|
|
|
|
|
|
|
|
loop {
|
|
|
|
match self.next {
|
|
|
|
Some('\r') | Some('\n') | None => return Err(self.error(UnterminatedString)),
|
|
|
|
Some('"') if !escape => break,
|
|
|
|
Some('\\') if !escape => escape = true,
|
|
|
|
_ => escape = false,
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
self.advance()?;
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-04-15 22:40:02 -07:00
|
|
|
// advance over closing "
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
self.token(StringCooked);
|
|
|
|
|
|
|
|
Ok(())
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
2019-04-15 22:40:02 -07:00
|
|
|
mod tests {
|
2017-11-18 03:36:02 -08:00
|
|
|
use super::*;
|
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
use pretty_assertions::assert_eq;
|
2019-04-19 02:17:43 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
macro_rules! test {
|
|
|
|
{
|
|
|
|
name: $name:ident,
|
|
|
|
text: $text:expr,
|
|
|
|
tokens: ($($kind:ident $(: $lexeme:literal)?),* $(,)?)$(,)?
|
|
|
|
} => {
|
2017-11-18 03:36:02 -08:00
|
|
|
#[test]
|
|
|
|
fn $name() {
|
2019-10-17 20:04:54 -07:00
|
|
|
let kinds: &[TokenKind] = &[$($kind,)* Eof];
|
2017-11-18 03:36:02 -08:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
let lexemes: &[&str] = &[$(lexeme!($kind $(, $lexeme)?),)* ""];
|
2017-11-18 03:36:02 -08:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
test($text, kinds, lexemes);
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
2019-10-17 20:04:54 -07:00
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
}
|
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
macro_rules! lexeme {
|
|
|
|
{
|
|
|
|
$kind:ident, $lexeme:literal
|
|
|
|
} => {
|
|
|
|
$lexeme
|
|
|
|
};
|
|
|
|
{
|
|
|
|
$kind:ident
|
|
|
|
} => {
|
|
|
|
default_lexeme($kind)
|
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
}
|
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
fn test(text: &str, want_kinds: &[TokenKind], want_lexemes: &[&str]) {
|
|
|
|
let text = testing::unindent(text);
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
let have = Lexer::lex(&text).unwrap();
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
let have_kinds = have
|
|
|
|
.iter()
|
|
|
|
.map(|token| token.kind)
|
|
|
|
.collect::<Vec<TokenKind>>();
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
let have_lexemes = have
|
|
|
|
.iter()
|
|
|
|
.map(|token| token.lexeme())
|
|
|
|
.collect::<Vec<&str>>();
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
assert_eq!(have_kinds, want_kinds, "Token kind mismatch");
|
|
|
|
assert_eq!(have_lexemes, want_lexemes, "Token lexeme mismatch");
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
let mut roundtrip = String::new();
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
for lexeme in have_lexemes {
|
|
|
|
roundtrip.push_str(lexeme);
|
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
assert_eq!(roundtrip, text, "Roundtrip mismatch");
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
let mut offset = 0;
|
|
|
|
let mut line = 0;
|
|
|
|
let mut column = 0;
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
for token in have {
|
|
|
|
assert_eq!(token.offset, offset);
|
|
|
|
assert_eq!(token.line, line);
|
|
|
|
assert_eq!(token.lexeme().len(), token.length);
|
|
|
|
assert_eq!(token.column, column);
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
for c in token.lexeme().chars() {
|
|
|
|
if c == '\n' {
|
|
|
|
line += 1;
|
|
|
|
column = 0;
|
|
|
|
} else {
|
|
|
|
column += c.len_utf8();
|
|
|
|
}
|
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
offset += token.length;
|
|
|
|
}
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
fn default_lexeme(kind: TokenKind) -> &'static str {
|
|
|
|
match kind {
|
|
|
|
// Fixed lexemes
|
|
|
|
At => "@",
|
|
|
|
Colon => ":",
|
|
|
|
ColonEquals => ":=",
|
|
|
|
Comma => ",",
|
|
|
|
Eol => "\n",
|
|
|
|
Equals => "=",
|
|
|
|
Indent => " ",
|
|
|
|
InterpolationEnd => "}}",
|
|
|
|
InterpolationStart => "{{",
|
|
|
|
ParenL => "(",
|
|
|
|
ParenR => ")",
|
|
|
|
Plus => "+",
|
|
|
|
Whitespace => " ",
|
|
|
|
|
|
|
|
// Empty lexemes
|
|
|
|
Line | Dedent | Eof => "",
|
|
|
|
|
|
|
|
// Variable lexemes
|
|
|
|
Text | StringCooked | StringRaw | Name | Comment | Backtick => {
|
|
|
|
panic!("Token {:?} has no default lexeme", kind)
|
|
|
|
}
|
|
|
|
}
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2019-10-17 20:04:54 -07:00
|
|
|
test! {
|
|
|
|
name: name_new,
|
|
|
|
text: "foo",
|
|
|
|
tokens: (Name:"foo"),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: comment,
|
|
|
|
text: "# hello",
|
|
|
|
tokens: (Comment:"# hello"),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: backtick,
|
|
|
|
text: "`echo`",
|
|
|
|
tokens: (Backtick:"`echo`"),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: raw_string,
|
|
|
|
text: "'hello'",
|
|
|
|
tokens: (StringRaw:"'hello'"),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: cooked_string,
|
|
|
|
text: "\"hello\"",
|
|
|
|
tokens: (StringCooked:"\"hello\""),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: export_concatination,
|
|
|
|
text: "export foo = 'foo' + 'bar'",
|
|
|
|
tokens: (
|
|
|
|
Name:"export",
|
|
|
|
Whitespace,
|
|
|
|
Name:"foo",
|
|
|
|
Whitespace,
|
|
|
|
Equals,
|
|
|
|
Whitespace,
|
|
|
|
StringRaw:"'foo'",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
StringRaw:"'bar'",
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: export_complex,
|
|
|
|
text: "export foo = ('foo' + 'bar') + `baz`",
|
|
|
|
tokens: (
|
|
|
|
Name:"export",
|
|
|
|
Whitespace,
|
|
|
|
Name:"foo",
|
|
|
|
Whitespace,
|
|
|
|
Equals,
|
|
|
|
Whitespace,
|
|
|
|
ParenL,
|
|
|
|
StringRaw:"'foo'",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
StringRaw:"'bar'",
|
|
|
|
ParenR,
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
Backtick:"`baz`",
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: eol_linefeed,
|
|
|
|
text: "\n",
|
|
|
|
tokens: (Eol),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: eol_carriage_return_linefeed,
|
|
|
|
text: "\r\n",
|
|
|
|
tokens: (Eol:"\r\n"),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: indented_line,
|
|
|
|
text: "foo:\n a",
|
|
|
|
tokens: (Name:"foo", Colon, Eol, Indent:" ", Line, Text:"a", Dedent),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: indented_block,
|
|
|
|
text: "
|
|
|
|
foo:
|
|
|
|
a
|
|
|
|
b
|
|
|
|
c
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"a",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"b",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"c",
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: indented_block_followed_by_item,
|
|
|
|
text: "
|
|
|
|
foo:
|
|
|
|
a
|
|
|
|
b:
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"a",
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"b",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: indented_block_followed_by_blank,
|
|
|
|
text: "
|
|
|
|
foo:
|
|
|
|
a
|
|
|
|
|
|
|
|
b:
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"a",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"b",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: indented_line_containing_unpaired_carriage_return,
|
|
|
|
text: "foo:\n \r \n",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"\r ",
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: indented_blocks,
|
|
|
|
text: "
|
|
|
|
b: a
|
|
|
|
@mv a b
|
|
|
|
|
|
|
|
a:
|
|
|
|
@touch F
|
|
|
|
@touch a
|
|
|
|
|
|
|
|
d: c
|
|
|
|
@rm c
|
|
|
|
|
|
|
|
c: b
|
|
|
|
@mv b c
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"b",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Name:"a",
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@mv a b",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"a",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@touch F",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"@touch a",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"d",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Name:"c",
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@rm c",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"c",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Name:"b",
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@mv b c",
|
|
|
|
Eol,
|
|
|
|
Dedent
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: interpolation_empty,
|
|
|
|
text: "hello:\n echo {{}}",
|
|
|
|
tokens: (
|
|
|
|
Name:"hello",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"echo ",
|
|
|
|
InterpolationStart,
|
|
|
|
InterpolationEnd,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: interpolation_expression,
|
|
|
|
text: "hello:\n echo {{`echo hello` + `echo goodbye`}}",
|
|
|
|
tokens: (
|
|
|
|
Name:"hello",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"echo ",
|
|
|
|
InterpolationStart,
|
|
|
|
Backtick:"`echo hello`",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
Backtick:"`echo goodbye`",
|
|
|
|
InterpolationEnd,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_names,
|
|
|
|
text: "
|
|
|
|
foo
|
|
|
|
bar-bob
|
|
|
|
b-bob_asdfAAAA
|
|
|
|
test123
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Eol,
|
|
|
|
Name:"bar-bob",
|
|
|
|
Eol,
|
|
|
|
Name:"b-bob_asdfAAAA",
|
|
|
|
Eol,
|
|
|
|
Name:"test123",
|
|
|
|
Eol,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_indented_line,
|
|
|
|
text: "foo:\n a",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"a",
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_indented_block,
|
|
|
|
text: "
|
|
|
|
foo:
|
|
|
|
a
|
|
|
|
b
|
|
|
|
c
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"a",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"b",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"c",
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_strings,
|
|
|
|
text: r#"a = "'a'" + '"b"' + "'c'" + '"d"'#echo hello"#,
|
|
|
|
tokens: (
|
|
|
|
Name:"a",
|
|
|
|
Whitespace,
|
|
|
|
Equals,
|
|
|
|
Whitespace,
|
|
|
|
StringCooked:"\"'a'\"",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
StringRaw:"'\"b\"'",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
StringCooked:"\"'c'\"",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
StringRaw:"'\"d\"'",
|
|
|
|
Comment:"#echo hello",
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_recipe_interpolation_eol,
|
|
|
|
text: "
|
|
|
|
foo: # some comment
|
|
|
|
{{hello}}
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Comment:"# some comment",
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
InterpolationStart,
|
|
|
|
Name:"hello",
|
|
|
|
InterpolationEnd,
|
|
|
|
Eol,
|
|
|
|
Dedent
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_recipe_interpolation_eof,
|
|
|
|
text: "foo: # more comments
|
2017-11-18 03:36:02 -08:00
|
|
|
{{hello}}
|
|
|
|
# another comment
|
|
|
|
",
|
2019-10-17 20:04:54 -07:00
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Comment:"# more comments",
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
InterpolationStart,
|
|
|
|
Name:"hello",
|
|
|
|
InterpolationEnd,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Comment:"# another comment",
|
|
|
|
Eol,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_recipe_complex_interpolation_expression,
|
|
|
|
text: "foo: #lol\n {{a + b + \"z\" + blarg}}",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Whitespace:" ",
|
|
|
|
Comment:"#lol",
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
InterpolationStart,
|
|
|
|
Name:"a",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
Name:"b",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
StringCooked:"\"z\"",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
Name:"blarg",
|
|
|
|
InterpolationEnd,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_recipe_multiple_interpolations,
|
|
|
|
text: "foo:,#ok\n {{a}}0{{b}}1{{c}}",
|
|
|
|
tokens: (
|
|
|
|
Name:"foo",
|
|
|
|
Colon,
|
|
|
|
Comma,
|
|
|
|
Comment:"#ok",
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
InterpolationStart,
|
|
|
|
Name:"a",
|
|
|
|
InterpolationEnd,
|
|
|
|
Text:"0",
|
|
|
|
InterpolationStart,
|
|
|
|
Name:"b",
|
|
|
|
InterpolationEnd,
|
|
|
|
Text:"1",
|
|
|
|
InterpolationStart,
|
|
|
|
Name:"c",
|
|
|
|
InterpolationEnd,
|
|
|
|
Dedent,
|
|
|
|
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_junk,
|
|
|
|
text: "
|
|
|
|
bob
|
|
|
|
|
|
|
|
hello blah blah blah : a b c #whatever
|
2017-11-18 03:36:02 -08:00
|
|
|
",
|
2019-10-17 20:04:54 -07:00
|
|
|
tokens: (
|
|
|
|
Name:"bob",
|
|
|
|
Eol,
|
|
|
|
Eol,
|
|
|
|
Name:"hello",
|
|
|
|
Whitespace,
|
|
|
|
Name:"blah",
|
|
|
|
Whitespace,
|
|
|
|
Name:"blah",
|
|
|
|
Whitespace,
|
|
|
|
Name:"blah",
|
|
|
|
Whitespace,
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Name:"a",
|
|
|
|
Whitespace,
|
|
|
|
Name:"b",
|
|
|
|
Whitespace,
|
|
|
|
Name:"c",
|
|
|
|
Whitespace,
|
|
|
|
Comment:"#whatever",
|
|
|
|
Eol,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_empty_lines,
|
|
|
|
text: "
|
|
|
|
|
|
|
|
# this does something
|
|
|
|
hello:
|
|
|
|
asdf
|
|
|
|
bsdf
|
|
|
|
|
|
|
|
csdf
|
|
|
|
|
|
|
|
dsdf # whatever
|
|
|
|
|
|
|
|
# yolo
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Eol,
|
|
|
|
Comment:"# this does something",
|
|
|
|
Eol,
|
|
|
|
Name:"hello",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"asdf",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"bsdf",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"csdf",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"dsdf # whatever",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Comment:"# yolo",
|
|
|
|
Eol,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_comment_before_variable,
|
|
|
|
text: "
|
|
|
|
#
|
|
|
|
A='1'
|
|
|
|
echo:
|
|
|
|
echo {{A}}
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Comment:"#",
|
|
|
|
Eol,
|
|
|
|
Name:"A",
|
|
|
|
Equals,
|
|
|
|
StringRaw:"'1'",
|
|
|
|
Eol,
|
|
|
|
Name:"echo",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"echo ",
|
|
|
|
InterpolationStart,
|
|
|
|
Name:"A",
|
|
|
|
InterpolationEnd,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_interpolation_backticks,
|
|
|
|
text: "hello:\n echo {{`echo hello` + `echo goodbye`}}",
|
|
|
|
tokens: (
|
|
|
|
Name:"hello",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"echo ",
|
|
|
|
InterpolationStart,
|
|
|
|
Backtick:"`echo hello`",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
Backtick:"`echo goodbye`",
|
|
|
|
InterpolationEnd,
|
|
|
|
Dedent
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_empty_interpolation,
|
|
|
|
text: "hello:\n echo {{}}",
|
|
|
|
tokens: (
|
|
|
|
Name:"hello",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"echo ",
|
|
|
|
InterpolationStart,
|
|
|
|
InterpolationEnd,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_assignment_backticks,
|
|
|
|
text: "a = `echo hello` + `echo goodbye`",
|
|
|
|
tokens: (
|
|
|
|
Name:"a",
|
|
|
|
Whitespace,
|
|
|
|
Equals,
|
|
|
|
Whitespace,
|
|
|
|
Backtick:"`echo hello`",
|
|
|
|
Whitespace,
|
|
|
|
Plus,
|
|
|
|
Whitespace,
|
|
|
|
Backtick:"`echo goodbye`",
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_multiple,
|
|
|
|
text: "
|
|
|
|
|
|
|
|
hello:
|
|
|
|
a
|
|
|
|
b
|
|
|
|
|
|
|
|
c
|
|
|
|
|
|
|
|
d
|
|
|
|
|
|
|
|
# hello
|
|
|
|
bob:
|
|
|
|
frank
|
|
|
|
\t
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Eol,
|
|
|
|
Name:"hello",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"a",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"b",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"c",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"d",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Comment:"# hello",
|
|
|
|
Eol,
|
|
|
|
Name:"bob",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"frank",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_comment,
|
|
|
|
text: "a:=#",
|
|
|
|
tokens: (
|
|
|
|
Name:"a",
|
|
|
|
ColonEquals,
|
|
|
|
Comment:"#",
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_comment_with_bang,
|
|
|
|
text: "a:=#foo!",
|
|
|
|
tokens: (
|
|
|
|
Name:"a",
|
|
|
|
ColonEquals,
|
|
|
|
Comment:"#foo!",
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_order,
|
|
|
|
text: "
|
|
|
|
b: a
|
|
|
|
@mv a b
|
|
|
|
|
|
|
|
a:
|
|
|
|
@touch F
|
|
|
|
@touch a
|
|
|
|
|
|
|
|
d: c
|
|
|
|
@rm c
|
|
|
|
|
|
|
|
c: b
|
|
|
|
@mv b c
|
|
|
|
",
|
|
|
|
tokens: (
|
|
|
|
Name:"b",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Name:"a",
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@mv a b",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"a",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@touch F",
|
|
|
|
Eol,
|
|
|
|
Whitespace:" ",
|
|
|
|
Line,
|
|
|
|
Text:"@touch a",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"d",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Name:"c",
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@rm c",
|
|
|
|
Eol,
|
|
|
|
Line,
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"c",
|
|
|
|
Colon,
|
|
|
|
Whitespace,
|
|
|
|
Name:"b",
|
|
|
|
Eol,
|
|
|
|
Indent,
|
|
|
|
Line,
|
|
|
|
Text:"@mv b c",
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: tokenize_parens,
|
|
|
|
text: "((())) )abc(+",
|
|
|
|
tokens: (
|
|
|
|
ParenL,
|
|
|
|
ParenL,
|
|
|
|
ParenL,
|
|
|
|
ParenR,
|
|
|
|
ParenR,
|
|
|
|
ParenR,
|
|
|
|
Whitespace,
|
|
|
|
ParenR,
|
|
|
|
Name:"abc",
|
|
|
|
ParenL,
|
|
|
|
Plus,
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: crlf_newline,
|
|
|
|
text: "#\r\n#asdf\r\n",
|
|
|
|
tokens: (
|
|
|
|
Comment:"#",
|
|
|
|
Eol:"\r\n",
|
|
|
|
Comment:"#asdf",
|
|
|
|
Eol:"\r\n",
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
test! {
|
|
|
|
name: multiple_recipes,
|
|
|
|
text: "a:\n foo\nb:",
|
|
|
|
tokens: (
|
|
|
|
Name:"a",
|
|
|
|
Colon,
|
|
|
|
Eol,
|
|
|
|
Indent:" ",
|
|
|
|
Line,
|
|
|
|
Text:"foo",
|
|
|
|
Eol,
|
|
|
|
Dedent,
|
|
|
|
Name:"b",
|
|
|
|
Colon,
|
|
|
|
),
|
2019-04-11 23:58:08 -07:00
|
|
|
}
|
|
|
|
|
2017-11-18 03:36:02 -08:00
|
|
|
error_test! {
|
|
|
|
name: tokenize_space_then_tab,
|
|
|
|
input: "a:
|
|
|
|
0
|
|
|
|
1
|
|
|
|
\t2
|
|
|
|
",
|
2019-04-15 22:40:02 -07:00
|
|
|
offset: 9,
|
2017-11-18 03:36:02 -08:00
|
|
|
line: 3,
|
|
|
|
column: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
width: 1,
|
2017-11-18 03:36:02 -08:00
|
|
|
kind: InconsistentLeadingWhitespace{expected: " ", found: "\t"},
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
|
|
|
name: tokenize_tabs_then_tab_space,
|
|
|
|
input: "a:
|
|
|
|
\t\t0
|
|
|
|
\t\t 1
|
|
|
|
\t 2
|
|
|
|
",
|
2019-04-15 22:40:02 -07:00
|
|
|
offset: 12,
|
2017-11-18 03:36:02 -08:00
|
|
|
line: 3,
|
|
|
|
column: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
width: 2,
|
|
|
|
kind: InconsistentLeadingWhitespace{expected: "\t\t", found: "\t "},
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
2019-04-15 22:40:02 -07:00
|
|
|
name: tokenize_unknown,
|
|
|
|
input: "~",
|
|
|
|
offset: 0,
|
2017-11-18 03:36:02 -08:00
|
|
|
line: 0,
|
|
|
|
column: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
width: 1,
|
2017-11-18 03:36:02 -08:00
|
|
|
kind: UnknownStartOfToken,
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
2019-04-15 22:40:02 -07:00
|
|
|
name: unterminated_string_with_escapes,
|
|
|
|
input: r#"a = "\n\t\r\"\\"#,
|
|
|
|
offset: 4,
|
2017-11-18 03:36:02 -08:00
|
|
|
line: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
column: 4,
|
|
|
|
width: 1,
|
2017-11-18 03:36:02 -08:00
|
|
|
kind: UnterminatedString,
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
2019-04-15 22:40:02 -07:00
|
|
|
name: unterminated_raw_string,
|
|
|
|
input: "r a='asdf",
|
|
|
|
offset: 4,
|
2017-11-18 03:36:02 -08:00
|
|
|
line: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
column: 4,
|
|
|
|
width: 1,
|
2017-11-18 03:36:02 -08:00
|
|
|
kind: UnterminatedString,
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
2019-04-15 22:40:02 -07:00
|
|
|
name: unterminated_interpolation,
|
|
|
|
input: "foo:\n echo {{
|
|
|
|
",
|
|
|
|
offset: 11,
|
|
|
|
line: 1,
|
|
|
|
column: 6,
|
|
|
|
width: 2,
|
|
|
|
kind: UnterminatedInterpolation,
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
|
|
|
name: unterminated_backtick,
|
|
|
|
input: "`echo",
|
|
|
|
offset: 0,
|
2017-11-18 03:36:02 -08:00
|
|
|
line: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
column: 0,
|
|
|
|
width: 1,
|
|
|
|
kind: UnterminatedBacktick,
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|
|
|
|
|
2017-12-02 12:49:31 -08:00
|
|
|
error_test! {
|
2019-04-15 22:40:02 -07:00
|
|
|
name: unpaired_carriage_return,
|
|
|
|
input: "foo\rbar",
|
|
|
|
offset: 3,
|
|
|
|
line: 0,
|
|
|
|
column: 3,
|
|
|
|
width: 1,
|
|
|
|
kind: UnpairedCarriageReturn,
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
|
|
|
name: unknown_start_of_token_ampersand,
|
|
|
|
input: " \r\n&",
|
|
|
|
offset: 3,
|
2017-12-02 12:49:31 -08:00
|
|
|
line: 1,
|
2019-04-15 22:40:02 -07:00
|
|
|
column: 0,
|
|
|
|
width: 1,
|
|
|
|
kind: UnknownStartOfToken,
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
|
|
|
name: unknown_start_of_token_tilde,
|
|
|
|
input: "~",
|
|
|
|
offset: 0,
|
|
|
|
line: 0,
|
|
|
|
column: 0,
|
|
|
|
width: 1,
|
|
|
|
kind: UnknownStartOfToken,
|
|
|
|
}
|
|
|
|
|
|
|
|
error_test! {
|
|
|
|
name: unterminated_string,
|
|
|
|
input: r#"a = ""#,
|
|
|
|
offset: 4,
|
|
|
|
line: 0,
|
|
|
|
column: 4,
|
|
|
|
width: 1,
|
|
|
|
kind: UnterminatedString,
|
2017-12-02 12:49:31 -08:00
|
|
|
}
|
|
|
|
|
2017-11-18 03:36:02 -08:00
|
|
|
error_test! {
|
2018-01-05 02:03:58 -08:00
|
|
|
name: mixed_leading_whitespace,
|
|
|
|
input: "a:\n\t echo hello",
|
2019-04-15 22:40:02 -07:00
|
|
|
offset: 3,
|
2017-11-18 03:36:02 -08:00
|
|
|
line: 1,
|
|
|
|
column: 0,
|
2019-04-15 22:40:02 -07:00
|
|
|
width: 2,
|
2017-11-18 03:36:02 -08:00
|
|
|
kind: MixedLeadingWhitespace{whitespace: "\t "},
|
|
|
|
}
|
2019-04-15 22:40:02 -07:00
|
|
|
|
|
|
|
error_test! {
|
|
|
|
name: unclosed_interpolation_delimiter,
|
|
|
|
input: "a:\n echo {{ foo",
|
|
|
|
offset: 9,
|
|
|
|
line: 1,
|
|
|
|
column: 6,
|
|
|
|
width: 2,
|
|
|
|
kind: UnterminatedInterpolation,
|
|
|
|
}
|
2017-11-18 03:36:02 -08:00
|
|
|
}
|