filter out whitespace at the beginning and end of lines

This commit is contained in:
Grant Lemons
2024-08-29 13:13:33 -06:00
parent b2bb1a3b6a
commit 236c2b13ff

View File

@@ -1,10 +1,11 @@
use harper_core::parsers::{self, Parser, PlainEnglish};
use harper_core::Span;
use harper_core::Token;
use harper_tree_sitter::TreeSitterMasker;
use tree_sitter::Node;
pub struct LatexParser {
inner: parsers::Mask<TreeSitterMasker, PlainEnglish>
inner: parsers::Mask<TreeSitterMasker, PlainEnglish>,
}
impl LatexParser {
@@ -35,14 +36,73 @@ impl Default for LatexParser {
Self {
inner: parsers::Mask::new(
TreeSitterMasker::new(tree_sitter_latex::language(), Self::node_condition),
PlainEnglish
)
PlainEnglish,
),
}
}
}
impl Parser for LatexParser {
fn parse(&mut self, source: &[char]) -> Vec<Token> {
self.inner.parse(source)
let mut tokens = Vec::new();
let mut chars_traversed = 0;
for line in source.split(|c| *c == '\n') {
let mut new_tokens = parse_line(line, &mut self.inner);
new_tokens.push(Token::new(
Span::new_with_len(line.len(), 1),
harper_core::TokenKind::Newline(1),
));
new_tokens
.iter_mut()
.for_each(|t| t.span.push_by(chars_traversed));
chars_traversed += line.len() + 1;
tokens.append(&mut new_tokens);
}
tokens
}
}
fn parse_line(
source: &[char],
parser: &mut parsers::Mask<TreeSitterMasker, PlainEnglish>,
) -> Vec<Token> {
let actual = without_leading(source);
if actual.is_empty() {
return Vec::new();
}
let source = actual.get_content(source);
let mut new_tokens = parser.parse(source);
new_tokens
.iter_mut()
.for_each(|t| t.span.push_by(actual.start));
new_tokens
}
fn without_leading(source: &[char]) -> Span {
// Skip over the comment start characters
let actual_start = source
.iter()
.position(|c| !c.is_whitespace())
.unwrap_or(source.len());
// Chop off the end
let actual_end = source.len()
- source
.iter()
.rev()
.position(|c| !c.is_whitespace())
.unwrap_or(0);
Span::new(actual_start, actual_end)
}