chore(core): rename + document items (#1523)

This commit is contained in:
Elijah Potter
2025-07-10 15:01:30 -06:00
committed by GitHub
parent b325d5dbe2
commit 705331f878
61 changed files with 238 additions and 145 deletions

View File

@@ -1,7 +1,8 @@
use std::{borrow::Cow, io::Read, path::PathBuf};
use harper_core::spell::Dictionary;
use harper_core::{
Dictionary, Document,
Document,
parsers::{MarkdownOptions, PlainEnglish},
};

View File

@@ -1,5 +1,6 @@
#![doc = include_str!("../README.md")]
use harper_core::spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary, WordId};
use hashbrown::HashMap;
use std::collections::BTreeMap;
use std::fs::File;
@@ -16,8 +17,7 @@ use harper_comments::CommentParser;
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::{Markdown, MarkdownOptions, OrgMode, PlainEnglish};
use harper_core::{
CharStringExt, Dialect, Dictionary, Document, FstDictionary, MergedDictionary,
MutableDictionary, TokenKind, TokenStringExt, WordId, WordMetadata, remove_overlaps,
CharStringExt, Dialect, Document, TokenKind, TokenStringExt, WordMetadata, remove_overlaps,
};
use harper_literate_haskell::LiterateHaskellParser;
use harper_pos_utils::{BrillChunker, BrillTagger};
@@ -45,8 +45,12 @@ enum Args {
count: bool,
/// Restrict linting to only a specific set of rules.
/// If omitted, `harper-cli` will run every rule.
#[arg(short, long, value_delimiter = ',')]
only_lint_with: Option<Vec<String>>,
#[arg(long, value_delimiter = ',')]
ignore: Option<Vec<String>>,
/// Restrict linting to only a specific set of rules.
/// If omitted, `harper-cli` will run every rule.
#[arg(long, value_delimiter = ',')]
only: Option<Vec<String>>,
/// Specify the dialect.
#[arg(short, long, default_value = Dialect::American.to_string())]
dialect: Dialect,
@@ -145,7 +149,8 @@ fn main() -> anyhow::Result<()> {
Args::Lint {
input,
count,
only_lint_with,
ignore,
only,
dialect,
user_dict_path,
file_dict_path,
@@ -176,7 +181,7 @@ fn main() -> anyhow::Result<()> {
let mut linter = LintGroup::new_curated(Arc::new(merged_dict), dialect);
if let Some(rules) = only_lint_with {
if let Some(rules) = only {
linter.set_all_rules_to(Some(false));
for rule in rules {
@@ -184,6 +189,12 @@ fn main() -> anyhow::Result<()> {
}
}
if let Some(rules) = ignore {
for rule in rules {
linter.config.set_rule_enabled(rule, false);
}
}
let mut lints = linter.lint(&doc);
if count {

View File

@@ -1,11 +1,12 @@
use std::path::Path;
use crate::comment_parsers;
use comment_parsers::{Go, JavaDoc, JsDoc, Solidity, Unit};
use harper_core::Token;
use harper_core::parsers::{self, MarkdownOptions, Parser};
use harper_core::{MutableDictionary, Token};
use harper_core::spell::MutableDictionary;
use tree_sitter::Node;
use crate::comment_parsers;
use crate::masker::CommentMasker;
pub struct CommentParser {

View File

@@ -1,4 +1,5 @@
use harper_core::{Masker, MutableDictionary};
use harper_core::Masker;
use harper_core::spell::MutableDictionary;
use harper_tree_sitter::TreeSitterMasker;
pub struct CommentMasker {

View File

@@ -3,7 +3,8 @@ use std::path::Path;
use harper_comments::CommentParser;
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::MarkdownOptions;
use harper_core::{Dialect, Document, FstDictionary};
use harper_core::spell::FstDictionary;
use harper_core::{Dialect, Document};
/// Creates a unit test checking that the linting of a source file in
/// `language_support_sources` produces the expected number of lints.

View File

@@ -1,6 +1,7 @@
use criterion::{Criterion, criterion_group, criterion_main};
use harper_core::linting::{LintGroup, Linter};
use harper_core::{Dialect, Document, FstDictionary};
use harper_core::spell::FstDictionary;
use harper_core::{Dialect, Document};
use std::hint::black_box;
static ESSAY: &str = include_str!("./essay.md");

View File

@@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize};
use crate::Number;
/// A national or international currency
#[derive(Debug, Is, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Hash)]
pub enum Currency {
// $

View File

@@ -9,10 +9,9 @@ use crate::expr::{Expr, ExprExt, LongestMatchOf, Repeating, SequenceExpr};
use crate::parsers::{Markdown, MarkdownOptions, Parser, PlainEnglish};
use crate::patterns::WordSet;
use crate::punctuation::Punctuation;
use crate::spell::{Dictionary, FstDictionary};
use crate::vec_ext::VecExt;
use crate::{
Dictionary, FatStringToken, FatToken, FstDictionary, Lrc, Token, TokenKind, TokenStringExt,
};
use crate::{FatStringToken, FatToken, Lrc, Token, TokenKind, TokenStringExt};
use crate::{OrdinalSuffix, Span};
/// A document containing some amount of lexed and parsed English text.

View File

@@ -1,7 +1,8 @@
use crate::{Span, Token, expr::Expr};
/// A [`Step`] that consumes a list of expressions and only
/// matches if all the child [`Expr`]s do.
/// An [`Expr`] that matches against tokens if and only if all of its children do.
/// This can be useful for situations where you have multiple expressions that represent a grammatical
/// error, but you need _all_ of them to match to be certain.
///
/// It will return the position of the farthest window.
#[derive(Default)]

View File

@@ -6,13 +6,19 @@ use super::Expr;
/// A map from an [`Expr`] to arbitrary data.
///
/// When used as a [`Expr`] in and of itself, it simply iterates through
/// all contained exprs, returning the first match found.
/// You should not assume this search is deterministic.
/// It has been a common pattern for rule authors to build a list of expressions that match a
/// grammatical error.
/// Then, depending on which expression was matched, a suggestion is chosen from another list.
///
/// If you'd like to use this structure in a [`PatternLinter`](crate::linting::PatternLinter), you may want to provide
/// the map as the search expr, then use a pattern lookup once more to determine
/// the corresponding key.
/// The [`ExprMap`] unifies these two lists into one.
///
/// A great example of this is the [`PronounInfectionBe`](crate::linting::PronounInflectionBe)
/// rule.
/// It builds a list of incorrect `PRONOUN + BE` combinations, alongside their corrections.
///
/// When used as a [`Expr`] in and of itself, it simply iterates through
/// all contained expressions, returning the first match found.
/// You should not assume this search is deterministic.
pub struct ExprMap<T>
where
T: LSend,

View File

@@ -1,6 +1,6 @@
use crate::{Span, Token, expr::Expr};
/// A [`Step`] that returns the farthest offset of the longest match in a list of expressions.
/// An [`Expr`] that returns the farthest offset of the longest match in a list of expressions.
#[derive(Default)]
pub struct LongestMatchOf {
exprs: Vec<Box<dyn Expr>>,

View File

@@ -1,8 +1,8 @@
use std::sync::Arc;
use crate::{CharString, Dictionary, FstDictionary, Span, Token, WordMetadata};
use super::{Expr, SequenceExpr, SpaceOrHyphen};
use crate::spell::{Dictionary, FstDictionary};
use crate::{CharString, Span, Token, WordMetadata};
type PredicateFn = dyn Fn(Option<&WordMetadata>, Option<&WordMetadata>) -> bool + Send + Sync;

View File

@@ -1,3 +1,18 @@
//! An `Expr` is a declarative way to express whether a certain set of tokens fulfill a criteria.
//!
//! For example, if we want to look for the word "that" followed by an adjective, we could build an
//! expression to do so.
//!
//! The actual searching is done by another system (usually a part of the [lint framework](crate::linting::ExprLinter)).
//! It iterates through a document, checking if each index matches the criteria.
//!
//! When supplied a specific position in a token stream, the technical job of an `Expr` is to determine the window of tokens (including the cursor itself) that fulfills whatever criteria the author desires.
//!
//! The goal of the `Expr` initiative is to make rules easier to _read_ as well as to write.
//! Gone are the days of trying to manually parse the logic of another man's Rust code.
//!
//! See also: [`SequenceExpr`].
mod all;
mod anchor_end;
mod anchor_start;
@@ -44,10 +59,6 @@ pub use word_expr_group::WordExprGroup;
use crate::{Document, LSend, Span, Token};
/// A common problem in Harper is that we need to identify tokens that fulfil certain criterion.
/// An `Expr` is a way to express whether a certain set of tokens fulfil that criteria.
/// When supplied a specific position in a token stream, the job of an `Expr` is to determine the window of tokens (including the cursor itself) that fulfils whatever criteria the author desires.
/// It is then the job of another system to identify portions of documents that fulfil this criteria.
pub trait Expr: LSend {
fn run(&self, cursor: usize, tokens: &[Token], source: &[char]) -> Option<Span>;
}

View File

@@ -16,18 +16,21 @@ pub struct SequenceExpr {
macro_rules! gen_then_from_is {
($quality:ident) => {
paste! {
#[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
pub fn [< then_$quality >] (self) -> Self{
self.then(|tok: &Token, _source: &[char]| {
tok.kind.[< is_$quality >]()
})
}
#[doc = concat!("Adds a step matching one or more consecutive tokens where [`TokenKind::is_", stringify!($quality), "()`] returns true.")]
pub fn [< then_one_or_more_$quality s >] (self) -> Self{
self.then_one_or_more(Box::new(|tok: &Token, _source: &[char]| {
tok.kind.[< is_$quality >]()
}))
}
#[doc = concat!("Adds a step matching a token where [`TokenKind::is_", stringify!($quality), "()`] returns false.")]
pub fn [< then_anything_but_$quality >] (self) -> Self{
self.then(|tok: &Token, _source: &[char]| {
if tok.kind.[< is_$quality >](){
@@ -71,6 +74,7 @@ impl Expr for SequenceExpr {
}
impl SequenceExpr {
/// Push an [expression](Expr) to the operation list.
pub fn then(mut self, expr: impl Expr + 'static) -> Self {
self.exprs.push(Box::new(expr));
self
@@ -83,11 +87,13 @@ impl SequenceExpr {
}
/// Appends the steps in `other` onto the end of `self`.
pub fn then_expr(mut self, mut other: Self) -> Self {
/// This is more efficient than [`Self::then`] because it avoids pointer redirection.
pub fn then_seq(mut self, mut other: Self) -> Self {
self.exprs.append(&mut other.exprs);
self
}
/// Push an [`IndefiniteArticle`] to the end of the operation list.
pub fn then_indefinite_article(self) -> Self {
self.then(IndefiniteArticle::default())
}
@@ -102,6 +108,7 @@ impl SequenceExpr {
Self::any_capitalization_of(word)
}
/// Construct a new sequence with a [`Word`] at the beginning of the operation list.
pub fn any_capitalization_of(word: &'static str) -> Self {
Self::default().then_any_capitalization_of(word)
}
@@ -141,16 +148,27 @@ impl SequenceExpr {
}
/// Create a new condition that will step one token forward if met.
pub fn if_not_then_step_one(self, condition: impl Expr + 'static) -> Self {
/// If the condition is _not_ met, the whole expression returns `None`.
///
/// This can be used to build out exceptions to other rules.
///
/// See [`UnlessStep`] for more info.
pub fn then_unless(self, condition: impl Expr + 'static) -> Self {
self.then(UnlessStep::new(condition, |_tok: &Token, _src: &[char]| {
true
}))
}
/// Match any single token.
///
/// Shorthand for [`Self::then_anything`].
pub fn t_any(self) -> Self {
self.then_anything()
}
/// Match any single token.
///
/// See [`AnyPattern`] for more info.
pub fn then_anything(self) -> Self {
self.then(AnyPattern)
}

View File

@@ -61,8 +61,9 @@ mod tests {
use quickcheck_macros::quickcheck;
use super::IgnoredLints;
use crate::spell::FstDictionary;
use crate::{
Dialect, Document, FstDictionary,
Dialect, Document,
linting::{LintGroup, Linter},
};

View File

@@ -1,4 +1,7 @@
use crate::{Dictionary, Document, Token, TokenKind};
//! This module implements rudimentary, dictionary-based English language detection.
use crate::spell::Dictionary;
use crate::{Document, Token, TokenKind};
/// Check if the contents of the document are likely intended to represent
/// English.
@@ -51,7 +54,8 @@ pub fn is_likely_english(toks: &[Token], source: &[char], dict: &impl Dictionary
#[cfg(test)]
mod tests {
use super::is_doc_likely_english;
use crate::{Document, FstDictionary};
use crate::Document;
use crate::spell::FstDictionary;
fn assert_not_english(source: &'static str) {
let dict = FstDictionary::curated();

View File

@@ -41,7 +41,6 @@ pub use mask::{Mask, Masker};
pub use number::{Number, OrdinalSuffix};
pub use punctuation::{Punctuation, Quote};
pub use span::Span;
pub use spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary, WordId};
pub use sync::{LSend, Lrc};
pub use title_case::{make_title_case, make_title_case_str};
pub use token::Token;
@@ -85,8 +84,9 @@ pub fn remove_overlaps(lints: &mut Vec<Lint>) {
#[cfg(test)]
mod tests {
use crate::spell::FstDictionary;
use crate::{
Dialect, Document, FstDictionary,
Dialect, Document,
linting::{LintGroup, Linter},
remove_overlaps,
};

View File

@@ -48,7 +48,7 @@ impl DiscourseMarkers {
expr: SequenceExpr::default()
.then(phrases_expr)
.t_ws()
.if_not_then_step_one(UPOSSet::new(&[UPOS::ADJ, UPOS::ADV, UPOS::ADP])),
.then_unless(UPOSSet::new(&[UPOS::ADJ, UPOS::ADV, UPOS::ADP])),
}
}

View File

@@ -43,7 +43,7 @@ impl Default for HowTo {
})),
);
pattern.add(SequenceExpr::default().if_not_then_step_one(exceptions));
pattern.add(SequenceExpr::default().then_unless(exceptions));
Self {
expr: Box::new(pattern),

View File

@@ -1,6 +1,6 @@
use crate::{Dictionary, Document, Span, TokenStringExt};
use super::{Lint, LintKind, Linter, Suggestion};
use crate::spell::Dictionary;
use crate::{Document, Span, TokenStringExt};
pub struct InflectedVerbAfterTo<T>
where
@@ -129,10 +129,8 @@ impl<T: Dictionary> Linter for InflectedVerbAfterTo<T> {
#[cfg(test)]
mod tests {
use super::InflectedVerbAfterTo;
use crate::{
FstDictionary,
linting::tests::{assert_lint_count, assert_suggestion_result},
};
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
use crate::spell::FstDictionary;
#[test]
fn dont_flag_to_check_both_verb_and_noun() {

View File

@@ -31,7 +31,7 @@ impl Default for ItsContraction {
.then_anything()
.then(WordSet::new(&["own", "intended"]));
let inverted = SequenceExpr::default().if_not_then_step_one(exceptions);
let inverted = SequenceExpr::default().then_unless(exceptions);
let expr =
All::new(vec![Box::new(positive), Box::new(inverted)]).or(SequenceExpr::aco("its")

View File

@@ -14,7 +14,7 @@ impl Default for Likewise {
expr.add(SequenceExpr::aco("like").then_whitespace().t_aco("wise"));
expr.add(
SequenceExpr::default().if_not_then_step_one(
SequenceExpr::default().then_unless(
SequenceExpr::default()
.then_anything()
.then_whitespace()

View File

@@ -107,8 +107,8 @@ use super::{ExprLinter, Lint};
use crate::linting::dashes::Dashes;
use crate::linting::open_compounds::OpenCompounds;
use crate::linting::{closed_compounds, initialisms, phrase_corrections};
use crate::spell::{Dictionary, MutableDictionary};
use crate::{CharString, Dialect, Document, TokenStringExt};
use crate::{Dictionary, MutableDictionary};
fn ser_ordered<S>(map: &HashMap<String, Option<bool>>, ser: S) -> Result<S::Ok, S::Error>
where
@@ -567,9 +567,9 @@ impl Linter for LintGroup {
mod tests {
use std::sync::Arc;
use crate::{Dialect, Document, FstDictionary, MutableDictionary, linting::Linter};
use super::LintGroup;
use crate::spell::{FstDictionary, MutableDictionary};
use crate::{Dialect, Document, linting::Linter};
#[test]
fn can_get_all_descriptions() {

View File

@@ -2,9 +2,9 @@ use std::sync::Arc;
use itertools::Itertools;
use crate::{CharString, Dictionary, Document, FstDictionary, Span};
use super::{Lint, LintKind, Linter, Suggestion};
use crate::spell::{Dictionary, FstDictionary};
use crate::{CharString, Document, Span};
pub struct MergeWords {
dict: Arc<FstDictionary>,

View File

@@ -215,7 +215,7 @@ use crate::{Document, LSend, render_markdown};
/// A __stateless__ rule that searches documents for grammatical errors.
///
/// Commonly implemented via [`PatternLinter`].
/// Commonly implemented via [`ExprLinter`].
///
/// See also: [`LintGroup`].
pub trait Linter: LSend {
@@ -247,7 +247,8 @@ pub mod tests {
use hashbrown::HashSet;
use super::Linter;
use crate::{Document, FstDictionary, parsers::PlainEnglish};
use crate::spell::FstDictionary;
use crate::{Document, parsers::PlainEnglish};
#[track_caller]
pub fn assert_no_lints(text: &str, mut linter: impl Linter) {

View File

@@ -1,7 +1,8 @@
use std::sync::Arc;
use super::{Lint, LintKind, Linter, Suggestion};
use crate::{CharStringExt, Dictionary, Document, FstDictionary, Span, TokenKind, TokenStringExt};
use crate::spell::{Dictionary, FstDictionary};
use crate::{CharStringExt, Document, Span, TokenKind, TokenStringExt};
/// Detect phrasal verbs written as compound nouns.
pub struct PhrasalVerbAsCompoundNoun {

View File

@@ -1,14 +1,13 @@
use harper_brill::UPOS;
use crate::Dictionary;
use super::{ExprLinter, Lint, LintKind, Suggestion};
use crate::Token;
use crate::expr::All;
use crate::expr::Expr;
use crate::expr::SequenceExpr;
use crate::patterns::UPOSSet;
use crate::patterns::WordSet;
use super::{ExprLinter, Lint, LintKind, Suggestion};
use crate::spell::Dictionary;
pub struct PossessiveNoun<D> {
expr: Box<dyn Expr>,
@@ -40,9 +39,9 @@ where
let exceptions = SequenceExpr::default()
.t_any()
.t_any()
.if_not_then_step_one(WordSet::new(&["flags", "checks", "catches", "you"]))
.then_unless(WordSet::new(&["flags", "checks", "catches", "you"]))
.t_any()
.if_not_then_step_one(WordSet::new(&["form", "go"]));
.then_unless(WordSet::new(&["form", "go"]));
Self {
expr: Box::new(All::new(vec![
@@ -104,10 +103,9 @@ where
mod tests {
use std::sync::Arc;
use crate::FstDictionary;
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
use super::PossessiveNoun;
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
use crate::spell::FstDictionary;
fn test_linter() -> PossessiveNoun<Arc<FstDictionary>> {
PossessiveNoun::new(FstDictionary::curated())

View File

@@ -4,8 +4,9 @@ use serde::{Deserialize, Serialize};
use super::{ExprLinter, LintGroup};
use super::{Lint, LintKind, Suggestion};
use crate::Document;
use crate::parsers::PlainEnglish;
use crate::{Dictionary, Document};
use crate::spell::Dictionary;
use crate::{Token, TokenStringExt};
use std::sync::Arc;
@@ -137,12 +138,9 @@ pub fn lint_group(dictionary: Arc<impl Dictionary + 'static>) -> LintGroup {
#[cfg(test)]
mod tests {
use crate::{
FstDictionary,
linting::tests::{assert_lint_count, assert_suggestion_result},
};
use super::lint_group;
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};
use crate::spell::FstDictionary;
#[test]
fn americas_lowercase() {

View File

@@ -1,7 +1,8 @@
use super::Suggestion;
use super::{Lint, LintKind, Linter};
use crate::document::Document;
use crate::{Dictionary, Token, TokenKind, TokenStringExt};
use crate::spell::Dictionary;
use crate::{Token, TokenKind, TokenStringExt};
pub struct SentenceCapitalization<T>
where
@@ -117,10 +118,9 @@ fn is_full_sentence(toks: &[Token]) -> bool {
#[cfg(test)]
mod tests {
use crate::FstDictionary;
use super::super::tests::assert_lint_count;
use super::SentenceCapitalization;
use crate::spell::FstDictionary;
#[test]
fn catches_basic() {

View File

@@ -6,8 +6,8 @@ use smallvec::ToSmallVec;
use super::Suggestion;
use super::{Lint, LintKind, Linter};
use crate::document::Document;
use crate::spell::suggest_correct_spelling;
use crate::{CharString, CharStringExt, Dialect, Dictionary, TokenStringExt};
use crate::spell::{Dictionary, suggest_correct_spelling};
use crate::{CharString, CharStringExt, Dialect, TokenStringExt};
pub struct SpellCheck<T>
where
@@ -129,15 +129,15 @@ impl<T: Dictionary> Linter for SpellCheck<T> {
#[cfg(test)]
mod tests {
use super::SpellCheck;
use crate::spell::FstDictionary;
use crate::{
Dialect, FstDictionary,
Dialect,
linting::tests::{
assert_lint_count, assert_suggestion_result, assert_top3_suggestion_result,
},
};
use super::SpellCheck;
// Capitalization tests
#[test]

View File

@@ -18,13 +18,13 @@ impl Default for TheHowWhy {
.t_aco("the")
.then_whitespace()
.t_aco("how")
.if_not_then_step_one(SequenceExpr::default().then_whitespace().t_aco("to"));
.then_unless(SequenceExpr::default().then_whitespace().t_aco("to"));
let the_who = SequenceExpr::default()
.t_aco("the")
.then_whitespace()
.t_aco("who")
.if_not_then_step_one(
.then_unless(
SequenceExpr::default()
.then_whitespace()
.t_aco("'s")

View File

@@ -29,7 +29,7 @@ impl ThenThan {
.t_ws()
.t_aco("then")
.t_ws()
.if_not_then_step_one(Word::new("that")),
.then_unless(Word::new("that")),
),
// Positive form of adjective following "more" or "less"
Box::new(
@@ -42,7 +42,7 @@ impl ThenThan {
.t_ws()
.t_aco("then")
.t_ws()
.if_not_then_step_one(Word::new("that")),
.then_unless(Word::new("that")),
),
])),
// Exceptions to the rule.

View File

@@ -43,7 +43,7 @@ impl UseGenitive {
// Add a prelude to remove false-positives.
let full_pattern = SequenceExpr::default()
.if_not_then_step_one(LongestMatchOf::new(vec![
.then_unless(LongestMatchOf::new(vec![
Box::new(SequenceExpr::default().t_aco("is")),
Box::new(SequenceExpr::default().t_aco("were")),
Box::new(SequenceExpr::default().then_adjective()),

View File

@@ -28,7 +28,7 @@ impl Default for WayTooAdjective {
let expr = All::new(vec![
Box::new(base),
Box::new(SequenceExpr::default().if_not_then_step_one(exceptions)),
Box::new(SequenceExpr::default().then_unless(exceptions)),
]);
Self {

View File

@@ -4,11 +4,16 @@ use is_macro::Is;
use ordered_float::OrderedFloat;
use serde::{Deserialize, Serialize};
/// Represents a written number.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq, Eq, Hash, PartialOrd)]
pub struct Number {
/// The actual value of the number
pub value: OrderedFloat<f64>,
/// Whether it contains a suffix (like the 1__st__ element).
pub suffix: Option<OrdinalSuffix>,
/// What base it is in (hex v.s. decimal, for example).
pub radix: u32,
/// The level of precision the number is formatted with.
pub precision: usize,
}

View File

@@ -5,7 +5,8 @@ use itertools::Itertools;
use super::Parser;
use crate::expr::{ExprExt, SequenceExpr};
use crate::{Dictionary, Lrc, Span, Token, TokenKind, VecExt};
use crate::spell::Dictionary;
use crate::{Lrc, Span, Token, TokenKind, VecExt};
/// A parser that wraps any other parser to collapse token strings that match
/// the pattern `word_word` or `word-word`.
@@ -60,13 +61,13 @@ impl Parser for CollapseIdentifiers {
#[cfg(test)]
mod tests {
use super::*;
use crate::spell::{FstDictionary, MergedDictionary, MutableDictionary};
use crate::{
FstDictionary, MergedDictionary, MutableDictionary, WordMetadata,
WordMetadata,
parsers::{PlainEnglish, StrParser},
};
use super::*;
#[test]
fn matches_kebab() {
let source: Vec<_> = "kebab-case".chars().collect();

View File

@@ -1,6 +1,6 @@
use crate::{Dictionary, language_detection::is_likely_english};
use super::{Parser, Token, TokenStringExt};
use crate::language_detection::is_likely_english;
use crate::spell::Dictionary;
/// A parser that wraps another, using heuristics to quickly redact paragraphs of a document that aren't
/// intended to be English text.
@@ -36,9 +36,9 @@ impl<D: Dictionary> Parser for IsolateEnglish<D> {
#[cfg(test)]
mod tests {
use crate::{Document, FstDictionary, TokenStringExt, parsers::PlainEnglish};
use super::IsolateEnglish;
use crate::spell::FstDictionary;
use crate::{Document, TokenStringExt, parsers::PlainEnglish};
/// Assert that the provided text contains _no_ chunks of valid English
fn assert_no_english(text: &str) {

View File

@@ -1,3 +1,5 @@
//! Adds support for parsing various programming and markup languages through a unified trait: [`Parser`].
mod collapse_identifiers;
mod isolate_english;
mod markdown;

View File

@@ -1,9 +1,9 @@
//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners.
//! [`Pattern`]s are one of the more powerful ways to query text inside Harper, especially for beginners. They are a simplified abstraction over [`Expr`](crate::expr::Expr).
//!
//! Through the [`PatternLinter`](crate::linting::PatternLinter) trait, they make it much easier to
//! Through the [`ExprLinter`](crate::linting::ExprLinter) trait, they make it much easier to
//! build Harper [rules](crate::linting::Linter).
//!
//! See the page about [`SequencePattern`] for a concrete example of their use.
//! See the page about [`SequenceExpr`](crate::expr::SequenceExpr) for a concrete example of their use.
use crate::{Document, LSend, Span, Token};

View File

@@ -219,8 +219,7 @@ mod tests {
use itertools::Itertools;
use crate::CharStringExt;
use crate::Dictionary;
use crate::WordId;
use crate::spell::{Dictionary, WordId};
use super::FstDictionary;

View File

@@ -1,3 +1,6 @@
//! Contains the relevant code for performing dictionary lookups and spellchecking (i.e. fuzzy
//! dictionary lookups).
use crate::{CharString, CharStringExt, WordMetadata};
pub use self::dictionary::Dictionary;

View File

@@ -252,7 +252,7 @@ mod tests {
use hashbrown::HashSet;
use itertools::Itertools;
use crate::{Dictionary, MutableDictionary};
use crate::spell::{Dictionary, MutableDictionary};
#[test]
fn curated_contains_no_duplicates() {

View File

@@ -12,7 +12,8 @@ use super::expansion::{
Expansion, HumanReadableExpansion,
};
use super::word_list::MarkedWord;
use crate::{CharString, Span, WordId, WordMetadata};
use crate::spell::WordId;
use crate::{CharString, Span, WordMetadata};
#[derive(Debug, Clone)]
pub struct AttributeList {

View File

@@ -6,7 +6,8 @@ use crate::TokenKind;
use hashbrown::HashSet;
use lazy_static::lazy_static;
use crate::{CharStringExt, Dictionary, Document, TokenStringExt, parsers::Parser};
use crate::spell::Dictionary;
use crate::{CharStringExt, Document, TokenStringExt, parsers::Parser};
/// A helper function for [`make_title_case`] that uses Strings instead of char buffers.
pub fn make_title_case_str(source: &str, parser: &impl Parser, dict: &impl Dictionary) -> String {
@@ -105,15 +106,12 @@ fn should_capitalize_token(tok: &Token, source: &[char], dict: &impl Dictionary)
#[cfg(test)]
mod tests {
use quickcheck::TestResult;
use quickcheck_macros::quickcheck;
use super::make_title_case_str;
use crate::{
FstDictionary,
parsers::{Markdown, PlainEnglish},
};
use crate::parsers::{Markdown, PlainEnglish};
use crate::spell::FstDictionary;
#[test]
fn normal() {

View File

@@ -2,9 +2,12 @@ use serde::{Deserialize, Serialize};
use crate::{FatToken, Span, TokenKind};
/// Represents a semantic, parsed component of a [`Document`](crate::Document).
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
pub struct Token {
/// The characters the token represents.
pub span: Span,
/// The parsed value.
pub kind: TokenKind,
}

View File

@@ -4,9 +4,17 @@ use serde::{Deserialize, Serialize};
use crate::{Number, Punctuation, Quote, TokenKind::Word, WordMetadata};
/// Generate wrapper code to pass a function call to the inner [`WordMetadata`],
/// if the token is indeed a word, while also emitting method-level documentation.
macro_rules! delegate_to_metadata {
($($method:ident),* $(,)?) => {
$(
#[doc = concat!(
"Delegates to [`WordMetadata::",
stringify!($method),
"`] when this token is a word.\n\n",
"Returns `false` if the token is not a word."
)]
pub fn $method(&self) -> bool {
let Word(Some(metadata)) = self else {
return false;
@@ -17,6 +25,10 @@ macro_rules! delegate_to_metadata {
};
}
/// The parsed value of a [`Token`](crate::Token).
/// Has a variety of queries available.
/// If there is a query missing, it may be easy to implement by just calling the
/// `delegate_to_metadata` macro.
#[derive(Debug, Is, Clone, Serialize, Deserialize, Default, PartialOrd, Hash, Eq, PartialEq)]
#[serde(tag = "kind", content = "value")]
pub enum TokenKind {

View File

@@ -8,7 +8,8 @@ use strum_macros::{Display, EnumCount, EnumString, VariantArray};
use std::convert::TryFrom;
use crate::{Document, TokenKind, TokenStringExt, WordId};
use crate::spell::WordId;
use crate::{Document, TokenKind, TokenStringExt};
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Hash)]
pub struct WordMetadata {
@@ -626,6 +627,7 @@ impl PronounData {
}
}
/// Additional metadata for determiners
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct DeterminerData {
pub is_demonstrative: Option<bool>,
@@ -642,9 +644,9 @@ impl DeterminerData {
}
}
// Degree is a property of adjectives: positive is not inflected
// Comparative is inflected with -er or comes after the word "more"
// Superlative is inflected with -est or comes after the word "most"
/// Degree is a property of adjectives: positive is not inflected
/// Comparative is inflected with -er or comes after the word "more"
/// Superlative is inflected with -est or comes after the word "most"
#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Is, Hash)]
pub enum Degree {
Positive,
@@ -652,9 +654,9 @@ pub enum Degree {
Superlative,
}
// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
/// Some adjectives are not comparable so don't have -er or -est forms and can't be used with "more" or "most".
/// Some adjectives can only be used "attributively" (before a noun); some only predicatively (after "is" etc.).
/// In old grammars words like the articles and determiners are classified as adjectives but behave differently.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct AdjectiveData {
pub degree: Option<Degree>,
@@ -669,9 +671,9 @@ impl AdjectiveData {
}
}
// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
/// Adverb can be a "junk drawer" category for words which don't fit the other major categories.
/// The typical adverbs are "adverbs of manner", those derived from adjectives in -ly
/// other adverbs (time, place, etc) should probably not be considered adverbs for Harper's purposes
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Hash, Default)]
pub struct AdverbData {}
@@ -693,6 +695,10 @@ impl ConjunctionData {
}
/// A regional dialect.
///
/// Note: these have bit-shifted values so that they can ergonomically integrate with
/// `DialectFlags`. Each value here must have a unique bit index inside
/// `DialectsUnderlyingType`.
#[derive(
Debug,
Clone,
@@ -709,9 +715,6 @@ impl ConjunctionData {
VariantArray,
)]
pub enum Dialect {
// Note: these have bit-shifted values so that they can ergonomically integrate with
// `DialectFlags`. Each value here must have a unique bit index inside
// `DialectsUnderlyingType`.
American = 1 << 0,
Canadian = 1 << 1,
Australian = 1 << 2,
@@ -881,7 +884,8 @@ impl Default for DialectFlags {
#[cfg(test)]
mod tests {
use crate::{Dictionary, FstDictionary, WordMetadata};
use crate::WordMetadata;
use crate::spell::{Dictionary, FstDictionary};
// Helper function to get word metadata from the curated dictionary
fn md(word: &str) -> WordMetadata {

View File

@@ -9,8 +9,9 @@
//! Note: This test will fail if the snapshot files are not up to date. This
//! ensures that CI will fail if linters change their behavior.
use harper_core::spell::FstDictionary;
use harper_core::{
Dialect, Document, FstDictionary,
Dialect, Document,
linting::{LintGroup, Linter},
};

View File

@@ -64,7 +64,8 @@
//! - All other token kinds are denoted by their variant name.
use std::borrow::Cow;
use harper_core::{Degree, Dialect, Document, FstDictionary, TokenKind, WordMetadata};
use harper_core::spell::FstDictionary;
use harper_core::{Degree, Dialect, Document, TokenKind, WordMetadata};
mod snapshot;

View File

@@ -1,6 +1,7 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::OrgMode;
use harper_core::{Dialect, Document, FstDictionary};
use harper_core::spell::FstDictionary;
use harper_core::{Dialect, Document};
/// Creates a unit test checking that the linting of a Markdown document (in
/// `tests_sources`) produces the expected number of lints.

View File

@@ -1,5 +1,6 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::{Dialect, Document, FstDictionary};
use harper_core::spell::FstDictionary;
use harper_core::{Dialect, Document};
/// Creates a unit test checking that the linting of a Markdown document (in
/// `tests_sources`) produces the expected number of lints.

View File

@@ -1,10 +1,11 @@
use harper_comments::CommentParser;
use harper_core::{
Lrc, Masker, MutableDictionary, Token,
Lrc, Masker, Token,
parsers::{Markdown, MarkdownOptions, Mask, Parser},
};
mod masker;
use harper_core::spell::MutableDictionary;
use itertools::Itertools;
use masker::LiterateHaskellMasker;

View File

@@ -1,6 +1,7 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::parsers::MarkdownOptions;
use harper_core::{Dialect, Document, FstDictionary};
use harper_core::spell::FstDictionary;
use harper_core::{Dialect, Document};
use harper_literate_haskell::LiterateHaskellParser;
/// Creates a unit test checking that the linting of a Markdown document (in

View File

@@ -4,6 +4,12 @@ use std::io::{BufWriter, Write};
use std::path::PathBuf;
use std::sync::Arc;
use crate::config::Config;
use crate::dictionary_io::{load_dict, save_dict};
use crate::document_state::DocumentState;
use crate::git_commit_parser::GitCommitParser;
use crate::ignored_lints_io::{load_ignored_lints, save_ignored_lints};
use crate::io_utils::fileify_path;
use anyhow::{Context, Result, anyhow};
use futures::future::join;
use harper_comments::CommentParser;
@@ -11,12 +17,11 @@ use harper_core::linting::{LintGroup, LintGroupConfig};
use harper_core::parsers::{
CollapseIdentifiers, IsolateEnglish, Markdown, OrgMode, Parser, PlainEnglish,
};
use harper_core::{
Dialect, Dictionary, Document, FstDictionary, IgnoredLints, MergedDictionary,
MutableDictionary, WordMetadata,
};
use harper_core::spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary};
use harper_core::{Dialect, Document, IgnoredLints, WordMetadata};
use harper_html::HtmlParser;
use harper_literate_haskell::LiterateHaskellParser;
use harper_stats::{Record, Stats};
use harper_typst::Typst;
use serde_json::Value;
use tokio::sync::{Mutex, RwLock};
@@ -35,14 +40,6 @@ use tower_lsp_server::lsp_types::{
use tower_lsp_server::{Client, LanguageServer, UriExt};
use tracing::{error, info, warn};
use crate::config::Config;
use crate::dictionary_io::{load_dict, save_dict};
use crate::document_state::DocumentState;
use crate::git_commit_parser::GitCommitParser;
use crate::ignored_lints_io::{load_ignored_lints, save_ignored_lints};
use crate::io_utils::fileify_path;
use harper_stats::{Record, Stats};
/// Return harper-ls version
pub fn ls_version() -> &'static str {
env!("CARGO_PKG_VERSION")

View File

@@ -1,7 +1,8 @@
use itertools::Itertools;
use std::path::Path;
use harper_core::{Dictionary, MutableDictionary, WordMetadata};
use harper_core::WordMetadata;
use harper_core::spell::{Dictionary, MutableDictionary};
use tokio::fs::{self, File};
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, BufReader, BufWriter, Result};
@@ -64,6 +65,7 @@ async fn dict_from_word_list(mut r: impl AsyncRead + Unpin) -> Result<MutableDic
#[cfg(test)]
mod tests {
use super::*;
use harper_core::spell::MutableDictionary;
use std::io::Cursor;
const TEST_UNSORTED_WORDS: [&str; 10] = [

View File

@@ -2,9 +2,8 @@ use crate::config::{CodeActionConfig, DiagnosticSeverity};
use crate::diagnostics::{lint_to_code_actions, lints_to_diagnostics};
use crate::pos_conv::range_to_span;
use harper_core::linting::{Lint, LintGroup, Linter};
use harper_core::{
Document, IgnoredLints, MergedDictionary, MutableDictionary, TokenKind, remove_overlaps,
};
use harper_core::spell::{MergedDictionary, MutableDictionary};
use harper_core::{Document, IgnoredLints, TokenKind, remove_overlaps};
use harper_core::{Lrc, Token};
use tower_lsp_server::lsp_types::{CodeActionOrCommand, Command, Diagnostic, Range, Uri};

View File

@@ -1,6 +1,7 @@
use std::collections::HashSet;
use harper_core::{Mask, Masker, MutableDictionary, Span, WordMetadata};
use harper_core::spell::MutableDictionary;
use harper_core::{Mask, Masker, Span, WordMetadata};
use tree_sitter::{Language, Node, Tree, TreeCursor};
/// A Harper [`Masker`] that wraps a given tree-sitter language and a condition,

View File

@@ -1,5 +1,6 @@
use harper_core::linting::{LintGroup, Linter};
use harper_core::{Dialect, Document, FstDictionary};
use harper_core::spell::FstDictionary;
use harper_core::{Dialect, Document};
use harper_typst::Typst;
/// Creates a unit test checking that the linting of a document in

View File

@@ -8,8 +8,8 @@ use harper_core::language_detection::is_doc_likely_english;
use harper_core::linting::{LintGroup, Linter as _};
use harper_core::parsers::{IsolateEnglish, Markdown, Parser, PlainEnglish};
use harper_core::{
CharString, Dictionary, Document, FstDictionary, IgnoredLints, LintContext, Lrc,
MergedDictionary, MutableDictionary, WordMetadata, remove_overlaps,
CharString, Document, IgnoredLints, LintContext, Lrc, WordMetadata, remove_overlaps,
spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary},
};
use harper_stats::{Record, RecordKind, Stats};
use serde::{Deserialize, Serialize};

View File

@@ -12,6 +12,8 @@ This guide will go through one easy way to add a complex rule to Harper.
The lofty goal is for this to be doable by someone with little to no Rust experience.
You should, however, be able to figure out how to use Git.
While this guide should be enough to get stared, [others](https://elijahpotter.dev/articles/writing_a_grammatical_rule_for_harper) have been written.
## Fork the Harper Monorepo
Before you can open a pull request or modify any code, you need a mutable copy of our monorepo.
@@ -123,7 +125,7 @@ insert_struct_rule!(AdjectiveOfA, true);
insert_expr_rule!(BackInTheDay, true);
insert_struct_rule!(WordPressDotcom, true);
insert_expr_rule!(OutOfDate, true);
// [svp! df:+] insert_expr_rule!(MyRule, true);
// [svp! df:+]insert_expr_rule!(MyRule, true);
```
If you use a `ExprLinter`, use `insert_expr_rule` to take advantage of Harper's aggressive caching.

View File

@@ -203,6 +203,10 @@ export default defineConfig({
},
],
},
{
title: 'Rust Reference',
to: 'https://docs.rs/harper-core/latest/harper_core/',
},
{
title: 'Rules',
to: '/docs/rules',