From 10ea99e95c4a8364114562e31b20f6929d9f9eca Mon Sep 17 00:00:00 2001 From: Greg Shuflin Date: Fri, 12 Nov 2021 02:02:31 -0800 Subject: [PATCH] rewrite source reference to use raw offsets --- schala-lang/language/src/parsing/new.rs | 6 ++--- schala-lang/language/src/schala.rs | 31 ++++++++++++++++++------- schala-lang/language/src/tokenizing.rs | 6 +++++ 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/schala-lang/language/src/parsing/new.rs b/schala-lang/language/src/parsing/new.rs index ad87cb3..e149b46 100644 --- a/schala-lang/language/src/parsing/new.rs +++ b/schala-lang/language/src/parsing/new.rs @@ -30,7 +30,7 @@ peg::parser! { stmt:statement() delimiter()+ { stmt } rule statement() -> Statement = - kind:statement_kind() { Statement { id: Default::default(), location: Default::default(), kind } } + pos:position!() kind:statement_kind() { Statement { id: Default::default(), location: pos.into(), kind } } rule statement_kind() -> StatementKind = __ import:import() { StatementKind::Import(import) } / @@ -382,9 +382,9 @@ peg::parser! { rule condition_guard() -> Option = ("if" _ expr:expression() { expr } )? - rule expr_or_block() -> Block = block() / ex:expression() { + rule expr_or_block() -> Block = block() / pos:position!() ex:expression() { Statement { - id: Default::default(), location: Default::default(), + id: Default::default(), location: pos.into(), kind: StatementKind::Expression(ex) }.into() } diff --git a/schala-lang/language/src/schala.rs b/schala-lang/language/src/schala.rs index e588bb8..2a5d5c6 100644 --- a/schala-lang/language/src/schala.rs +++ b/schala-lang/language/src/schala.rs @@ -122,24 +122,39 @@ impl<'a> Schala<'a> { /// Represents lines of source code pub(crate) struct SourceReference { - lines: Option>, + last_source: Option, + /// Offsets in *bytes* (not chars) representing a newline character + newline_offsets: Vec, } impl SourceReference { fn new() -> SourceReference { - SourceReference { lines: None } + SourceReference { last_source: None, newline_offsets: vec![]} } fn load_new_source(&mut self, source: &str) { - //TODO this is a lot of heap allocations - maybe there's a way to make it more efficient? - self.lines = Some(source.lines().map(|s| s.to_string()).collect()); + + for (offset, ch) in source.as_bytes().iter().enumerate() { + if *ch == ('\n' as u8) { + self.newline_offsets.push(offset); + } + } + self.last_source = Some(source.to_string()); } pub fn get_line(&self, line: usize) -> String { - self.lines - .as_ref() - .and_then(|x| x.get(line).map(|s| s.to_string())) - .unwrap_or_else(|| "NO LINE FOUND".to_string()) + //TODO make sure this is utf8-safe + let start_idx = match self.newline_offsets.binary_search(&line) { + Ok(index) | Err(index) => index, + }; + + let last_source = self.last_source.as_ref().unwrap(); + + let start = self.newline_offsets[start_idx]; + let end = self.newline_offsets.get(start_idx + 1).cloned().unwrap_or_else(|| last_source.len()); + + let slice = &last_source.as_bytes()[start..end]; + std::str::from_utf8(slice).unwrap().to_string() } } diff --git a/schala-lang/language/src/tokenizing.rs b/schala-lang/language/src/tokenizing.rs index 08cdef5..9abc3ce 100644 --- a/schala-lang/language/src/tokenizing.rs +++ b/schala-lang/language/src/tokenizing.rs @@ -18,6 +18,12 @@ pub struct Location { pub(crate) offset: usize, } +impl From for Location { + fn from(offset: usize) -> Self { + Self { offset } + } +} + impl fmt::Display for Location { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.offset)