From aa506fa5bd228ed6070de90045b963ffba8319f0 Mon Sep 17 00:00:00 2001 From: Casey Rodarmor Date: Tue, 27 Oct 2020 23:51:17 -0700 Subject: [PATCH] Allow ignore line endings inside delimiters (#717) Modify the lexer to keep track of opening `({[` and closing `]})` delimiters. When the lexer would emit an eol or indent outside of a recipe when there is at least one open delimiter, emit a whitespace token instead. This allows expressions to be split on multiple lines, like so: x := if 'a' == 'b' { 'x' } else { 'y' } This does not work inside of recipe body interpolations, although this restriction might relaxed in the future. --- src/common.rs | 20 ++--- src/compilation_error.rs | 16 ++++ src/compilation_error_kind.rs | 8 ++ src/delimiter.rs | 24 ++++++ src/lexer.rs | 147 +++++++++++++++++++++++++++------- src/lib.rs | 1 + src/summary.rs | 4 +- tests/delimiters.rs | 104 ++++++++++++++++++++++++ tests/lib.rs | 1 + 9 files changed, 282 insertions(+), 43 deletions(-) create mode 100644 src/delimiter.rs create mode 100644 tests/delimiters.rs diff --git a/src/common.rs b/src/common.rs index 059be63..f0ecb03 100644 --- a/src/common.rs +++ b/src/common.rs @@ -45,16 +45,16 @@ pub(crate) use crate::{ assignment_resolver::AssignmentResolver, binding::Binding, color::Color, compilation_error::CompilationError, compilation_error_kind::CompilationErrorKind, compiler::Compiler, config::Config, config_error::ConfigError, count::Count, - dependency::Dependency, enclosure::Enclosure, evaluator::Evaluator, expression::Expression, - fragment::Fragment, function::Function, function_context::FunctionContext, - interrupt_guard::InterruptGuard, interrupt_handler::InterruptHandler, item::Item, - justfile::Justfile, keyword::Keyword, lexer::Lexer, line::Line, list::List, - load_error::LoadError, module::Module, name::Name, output_error::OutputError, - parameter::Parameter, parameter_kind::ParameterKind, parser::Parser, platform::Platform, - position::Position, positional::Positional, recipe::Recipe, recipe_context::RecipeContext, - recipe_resolver::RecipeResolver, runtime_error::RuntimeError, scope::Scope, search::Search, - search_config::SearchConfig, search_error::SearchError, set::Set, setting::Setting, - settings::Settings, shebang::Shebang, show_whitespace::ShowWhitespace, + delimiter::Delimiter, dependency::Dependency, enclosure::Enclosure, evaluator::Evaluator, + expression::Expression, fragment::Fragment, function::Function, + function_context::FunctionContext, interrupt_guard::InterruptGuard, + interrupt_handler::InterruptHandler, item::Item, justfile::Justfile, keyword::Keyword, + lexer::Lexer, line::Line, list::List, load_error::LoadError, module::Module, name::Name, + output_error::OutputError, parameter::Parameter, parameter_kind::ParameterKind, parser::Parser, + platform::Platform, position::Position, positional::Positional, recipe::Recipe, + recipe_context::RecipeContext, recipe_resolver::RecipeResolver, runtime_error::RuntimeError, + scope::Scope, search::Search, search_config::SearchConfig, search_error::SearchError, set::Set, + setting::Setting, settings::Settings, shebang::Shebang, show_whitespace::ShowWhitespace, string_literal::StringLiteral, subcommand::Subcommand, suggestion::Suggestion, table::Table, thunk::Thunk, token::Token, token_kind::TokenKind, unresolved_dependency::UnresolvedDependency, unresolved_recipe::UnresolvedRecipe, use_color::UseColor, variables::Variables, diff --git a/src/compilation_error.rs b/src/compilation_error.rs index c6e7228..1754543 100644 --- a/src/compilation_error.rs +++ b/src/compilation_error.rs @@ -209,6 +209,22 @@ impl Display for CompilationError<'_> { UnknownStartOfToken => { writeln!(f, "Unknown start of token:")?; }, + MismatchedClosingDelimiter { + open, + open_line, + close, + } => { + writeln!( + f, + "Mismatched closing delimiter `{}`. (Did you mean to close the `{}` on line {}?)", + close.close(), + open.open(), + open_line.ordinal(), + )?; + }, + UnexpectedClosingDelimiter { close } => { + writeln!(f, "Unexpected closing delimiter `{}`", close.close())?; + }, UnpairedCarriageReturn => { writeln!(f, "Unpaired carriage return")?; }, diff --git a/src/compilation_error_kind.rs b/src/compilation_error_kind.rs index 807fc70..4e2b2cb 100644 --- a/src/compilation_error_kind.rs +++ b/src/compilation_error_kind.rs @@ -97,6 +97,14 @@ pub(crate) enum CompilationErrorKind<'src> { setting: &'src str, }, UnpairedCarriageReturn, + UnexpectedClosingDelimiter { + close: Delimiter, + }, + MismatchedClosingDelimiter { + close: Delimiter, + open: Delimiter, + open_line: usize, + }, UnterminatedInterpolation, UnterminatedString, UnterminatedBacktick, diff --git a/src/delimiter.rs b/src/delimiter.rs new file mode 100644 index 0000000..afc3cf7 --- /dev/null +++ b/src/delimiter.rs @@ -0,0 +1,24 @@ +#[derive(PartialEq, Eq, Debug, Copy, Clone)] +pub(crate) enum Delimiter { + Brace, + Bracket, + Paren, +} + +impl Delimiter { + pub(crate) fn open(self) -> char { + match self { + Self::Brace => '{', + Self::Bracket => '[', + Self::Paren => '(', + } + } + + pub(crate) fn close(self) -> char { + match self { + Self::Brace => '}', + Self::Bracket => ']', + Self::Paren => ')', + } + } +} diff --git a/src/lexer.rs b/src/lexer.rs index 9b763d9..4be599a 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -32,6 +32,8 @@ pub(crate) struct Lexer<'src> { indentation: Vec<&'src str>, /// Current interpolation start token interpolation_start: Option>, + /// Current open delimiters + open_delimiters: Vec<(Delimiter, usize)>, } impl<'src> Lexer<'src> { @@ -59,6 +61,7 @@ impl<'src> Lexer<'src> { recipe_body_pending: false, recipe_body: false, interpolation_start: None, + open_delimiters: Vec::new(), chars, next, src, @@ -431,14 +434,15 @@ impl<'src> Lexer<'src> { self.advance()?; } - let indentation = self.lexeme(); - - self.indentation.push(indentation); - - self.token(Indent); - - if self.recipe_body_pending { - self.recipe_body = true; + if self.open_delimiters() { + self.token(Whitespace); + } else { + let indentation = self.lexeme(); + self.indentation.push(indentation); + self.token(Indent); + if self.recipe_body_pending { + self.recipe_body = true; + } } Ok(()) @@ -452,23 +456,24 @@ impl<'src> Lexer<'src> { '!' => self.lex_bang(), '*' => self.lex_single(Asterisk), '@' => self.lex_single(At), - '[' => self.lex_single(BracketL), - ']' => self.lex_single(BracketR), + '[' => self.lex_delimiter(BracketL), + ']' => self.lex_delimiter(BracketR), '=' => self.lex_choice('=', EqualsEquals, Equals), ',' => self.lex_single(Comma), ':' => self.lex_colon(), - '(' => self.lex_single(ParenL), - ')' => self.lex_single(ParenR), - '{' => self.lex_single(BraceL), - '}' => self.lex_single(BraceR), + '(' => self.lex_delimiter(ParenL), + ')' => self.lex_delimiter(ParenR), + '{' => self.lex_delimiter(BraceL), + '}' => self.lex_delimiter(BraceR), '+' => self.lex_single(Plus), - '\n' => self.lex_single(Eol), - '\r' => self.lex_cr_lf(), '#' => self.lex_comment(), '`' => self.lex_backtick(), - ' ' | '\t' => self.lex_whitespace(), - '\'' => self.lex_raw_string(), + ' ' => self.lex_whitespace(), '"' => self.lex_cooked_string(), + '\'' => self.lex_raw_string(), + '\n' => self.lex_eol(), + '\r' => self.lex_eol(), + '\t' => self.lex_whitespace(), _ if Self::is_identifier_start(start) => self.lex_identifier(), _ => { self.advance()?; @@ -589,6 +594,53 @@ impl<'src> Lexer<'src> { Ok(()) } + /// Lex an opening or closing delimiter + fn lex_delimiter(&mut self, kind: TokenKind) -> CompilationResult<'src, ()> { + use Delimiter::*; + + match kind { + BraceL => self.open_delimiter(Brace), + BraceR => self.close_delimiter(Brace)?, + BracketL => self.open_delimiter(Bracket), + BracketR => self.close_delimiter(Bracket)?, + ParenL => self.open_delimiter(Paren), + ParenR => self.close_delimiter(Paren)?, + _ => + return Err(self.internal_error(format!( + "Lexer::lex_delimiter called with non-delimiter token: `{}`", + kind, + ))), + } + + // Emit the delimiter token + self.lex_single(kind) + } + + /// Push a delimiter onto the open delimiter stack + fn open_delimiter(&mut self, delimiter: Delimiter) { + self + .open_delimiters + .push((delimiter, self.token_start.line)); + } + + /// Pop a delimiter from the open delimiter stack and error if incorrect type + fn close_delimiter(&mut self, close: Delimiter) -> CompilationResult<'src, ()> { + match self.open_delimiters.pop() { + Some((open, _)) if open == close => Ok(()), + Some((open, open_line)) => Err(self.error(MismatchedClosingDelimiter { + open, + close, + open_line, + })), + None => Err(self.error(UnexpectedClosingDelimiter { close })), + } + } + + /// Return true if there are any unclosed delimiters + fn open_delimiters(&self) -> bool { + !self.open_delimiters.is_empty() + } + /// Lex a token starting with '!' fn lex_bang(&mut self) -> CompilationResult<'src, ()> { self.presume('!')?; @@ -621,14 +673,22 @@ impl<'src> Lexer<'src> { } /// Lex a carriage return and line feed - fn lex_cr_lf(&mut self) -> CompilationResult<'src, ()> { - self.presume('\r')?; - - if !self.accepted('\n')? { - return Err(self.error(UnpairedCarriageReturn)); + fn lex_eol(&mut self) -> CompilationResult<'src, ()> { + if self.accepted('\r')? { + if !self.accepted('\n')? { + return Err(self.error(UnpairedCarriageReturn)); + } + } else { + self.presume('\n')?; } - self.token(Eol); + // Emit an eol if there are no open delimiters, otherwise emit a whitespace + // token. + if self.open_delimiters() { + self.token(Whitespace); + } else { + self.token(Eol); + } Ok(()) } @@ -958,8 +1018,8 @@ mod tests { test! { name: brace_r, - text: "}", - tokens: (BraceR), + text: "{}", + tokens: (BraceL, BraceR), } test! { @@ -970,8 +1030,8 @@ mod tests { test! { name: brace_rrr, - text: "}}}", - tokens: (BraceR, BraceR, BraceR), + text: "{{{}}}", + tokens: (BraceL, BraceL, BraceL, BraceR, BraceR, BraceR), } test! { @@ -1801,7 +1861,7 @@ mod tests { test! { name: tokenize_parens, - text: "((())) )abc(+", + text: "((())) ()abc(+", tokens: ( ParenL, ParenL, @@ -1810,6 +1870,7 @@ mod tests { ParenR, ParenR, Whitespace, + ParenL, ParenR, Identifier:"abc", ParenL, @@ -1846,8 +1907,18 @@ mod tests { test! { name: brackets, - text: "][", - tokens: (BracketR, BracketL), + text: "[][]", + tokens: (BracketL, BracketR, BracketL, BracketR), + } + + test! { + name: open_delimiter_eol, + text: "[\n](\n){\n}", + tokens: ( + BracketL, Whitespace:"\n", BracketR, + ParenL, Whitespace:"\n", ParenR, + BraceL, Whitespace:"\n", BraceR + ), } error! { @@ -2049,6 +2120,20 @@ mod tests { kind: UnexpectedCharacter { expected: '=' }, } + error! { + name: mismatched_closing_brace, + input: "(]", + offset: 1, + line: 0, + column: 1, + width: 0, + kind: MismatchedClosingDelimiter { + open: Delimiter::Paren, + close: Delimiter::Bracket, + open_line: 0, + }, + } + #[test] fn presume_error() { assert_matches!( diff --git a/src/lib.rs b/src/lib.rs index 7b26ac6..4d1d422 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,6 +65,7 @@ mod config; mod config_error; mod count; mod default; +mod delimiter; mod dependency; mod empty; mod enclosure; diff --git a/src/summary.rs b/src/summary.rs index 71d524a..ac372a2 100644 --- a/src/summary.rs +++ b/src/summary.rs @@ -244,8 +244,8 @@ impl Expression { } => Expression::Conditional { lhs: Box::new(Expression::new(lhs)), rhs: Box::new(Expression::new(rhs)), - then: Box::new(Expression::new(lhs)), - otherwise: Box::new(Expression::new(rhs)), + then: Box::new(Expression::new(then)), + otherwise: Box::new(Expression::new(otherwise)), inverted: *inverted, }, StringLiteral { string_literal } => Expression::String { diff --git a/tests/delimiters.rs b/tests/delimiters.rs new file mode 100644 index 0000000..638ca5e --- /dev/null +++ b/tests/delimiters.rs @@ -0,0 +1,104 @@ +use crate::common::*; + +test! { + name: mismatched_delimiter, + justfile: "(]", + stderr: " + error: Mismatched closing delimiter `]`. (Did you mean to close the `(` on line 1?) + | + 1 | (] + | ^ + ", + status: EXIT_FAILURE, +} + +test! { + name: unexpected_delimiter, + justfile: "]", + stderr: " + error: Unexpected closing delimiter `]` + | + 1 | ] + | ^ + ", + status: EXIT_FAILURE, +} + +test! { + name: paren_continuation, + justfile: " + x := ( + 'a' + + + 'b' + ) + + foo: + echo {{x}} + ", + stdout: "ab\n", + stderr: "echo ab\n", +} + +test! { + name: brace_continuation, + justfile: " + x := if '' == '' { + 'a' + } else { + 'b' + } + + foo: + echo {{x}} + ", + stdout: "a\n", + stderr: "echo a\n", +} + +test! { + name: bracket_continuation, + justfile: " + set shell := [ + 'sh', + '-cu', + ] + + foo: + echo foo + ", + stdout: "foo\n", + stderr: "echo foo\n", +} + +test! { + name: dependency_continuation, + justfile: " + foo: ( + bar 'bar' + ) + echo foo + + bar x: + echo {{x}} + ", + stdout: "bar\nfoo\n", + stderr: "echo bar\necho foo\n", +} + +test! { + name: no_interpolation_continuation, + justfile: " + foo: + echo {{ ( + 'a' + 'b')}} + ", + stdout: "", + stderr: " + error: Unterminated interpolation + | + 2 | echo {{ ( + | ^^ + ", + status: EXIT_FAILURE, +} diff --git a/tests/lib.rs b/tests/lib.rs index a89f5c6..ea2abbf 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -6,6 +6,7 @@ mod common; mod choose; mod completions; mod conditional; +mod delimiters; mod dotenv; mod edit; mod error_messages;